From c81ed19cc5936abaa0b2a3c7738164d444b283eb Mon Sep 17 00:00:00 2001 From: Shinya Kato Date: Wed, 29 Oct 2025 21:04:52 +0900 Subject: [PATCH] Report oldest xmin source when autovacuum cannot remove tuples Autovacuum logging now tells which backend or mechanism kept OldestXmin from advancing when dead tuples remain, improving diagnosis of bloating and wraparound hazards. ProcArray now records an OldestXminSource for each computed horizon, and VACUUM retrieves it through the new GetOldestNonRemovableTransactionIdWithReason() helper. The log output names active transactions (with pid), hot standby feedback, prepared transactions, replication slots, or otherwise labels the cause as "other". Author: Shinya Kato Reviewed-by: Discussion: https://postgr.es/m/ --- src/backend/access/heap/vacuumlazy.c | 37 ++++ src/backend/commands/vacuum.c | 4 +- src/backend/storage/ipc/procarray.c | 165 +++++++++++++--- src/include/commands/vacuum.h | 4 + src/include/storage/procarray.h | 21 ++ src/test/modules/test_misc/meson.build | 1 + .../t/010_autovacuum_oldest_xmin_reason.pl | 186 ++++++++++++++++++ 7 files changed, 388 insertions(+), 30 deletions(-) create mode 100644 src/test/modules/test_misc/t/010_autovacuum_oldest_xmin_reason.pl diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index 65bb0568a867..0c760a6c7ac4 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -1047,6 +1047,43 @@ heap_vacuum_rel(Relation rel, const VacuumParams params, appendStringInfo(&buf, _("removable cutoff: %u, which was %d XIDs old when operation ended\n"), vacrel->cutoffs.OldestXmin, diff); + /* + * If there are dead tuples not yet removable, report what + * determined the OldestXmin horizon to aid diagnostics. + */ + if (vacrel->recently_dead_tuples > 0) + { + int pid = vacrel->cutoffs.oldest_xmin_info.backend_pid; + bool include_pid = pid > 0; + + switch (vacrel->cutoffs.oldest_xmin_info.source) + { + case OLDESTXMIN_SOURCE_ACTIVE_TRANSACTION: + msgfmt = include_pid ? + _("oldest xmin source: active transaction (pid=%d)\n") : + _("oldest xmin source: active transaction\n"); + break; + case OLDESTXMIN_SOURCE_HOT_STANDBY_FEEDBACK: + msgfmt = include_pid ? + _("oldest xmin source: hot standby feedback (pid=%d)\n") : + _("oldest xmin source: hot standby feedback\n"); + break; + case OLDESTXMIN_SOURCE_PREPARED_TRANSACTION: + msgfmt = _("oldest xmin source: prepared transaction\n"); + break; + case OLDESTXMIN_SOURCE_REPLICATION_SLOT: + msgfmt = _("oldest xmin source: replication slot\n"); + break; + default: + msgfmt = _("oldest xmin source: other\n"); + break; + } + + if (include_pid) + appendStringInfo(&buf, msgfmt, pid); + else + appendStringInfoString(&buf, msgfmt); + } if (frozenxid_updated) { diff = (int32) (vacrel->NewRelfrozenXid - diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 0528d1b6ecbe..7be66d18fab4 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -1133,7 +1133,9 @@ vacuum_get_cutoffs(Relation rel, const VacuumParams params, * that only one vacuum process can be working on a particular table at * any time, and that each vacuum is always an independent transaction. */ - cutoffs->OldestXmin = GetOldestNonRemovableTransactionId(rel); + cutoffs->OldestXmin = + GetOldestNonRemovableTransactionIdWithReason(rel, + &cutoffs->oldest_xmin_info); Assert(TransactionIdIsNormal(cutoffs->OldestXmin)); diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c index 200f72c6e256..5fe266234b1c 100644 --- a/src/backend/storage/ipc/procarray.c +++ b/src/backend/storage/ipc/procarray.c @@ -241,6 +241,12 @@ typedef struct ComputeXidHorizonsResult * session's temporary tables. */ TransactionId temp_oldest_nonremovable; + + /* Identify what forced each of the horizons above. */ + OldestXminInfo shared_oldest_nonremovable_info; + OldestXminInfo catalog_oldest_nonremovable_info; + OldestXminInfo data_oldest_nonremovable_info; + OldestXminInfo temp_oldest_nonremovable_info; } ComputeXidHorizonsResult; /* @@ -1622,6 +1628,46 @@ TransactionIdIsInProgress(TransactionId xid) return false; } +/* + * Store horizon provenance in *info if caller requested it. + * + * Callers pass NULL when they are not interested in tracking the source. + */ +static inline void +OldestXminInfoSet(OldestXminInfo *info, OldestXminSource source, + int backend_pid) +{ + if (info == NULL) + return; + + info->source = source; + info->backend_pid = backend_pid; +} + +/* + * Update a tracked OldestXmin horizon with a candidate xid and source. + * + * If the candidate is older than *target, adopt it and remember why. + */ +static inline void +UpdateOldestXmin(TransactionId *target, OldestXminInfo *info, + TransactionId candidate, OldestXminSource source, + int backend_pid) +{ + TransactionId old; + TransactionId new_horizon; + + if (!TransactionIdIsValid(candidate)) + return; + + old = *target; + new_horizon = TransactionIdOlder(old, candidate); + if (TransactionIdEquals(new_horizon, old)) + return; + + *target = new_horizon; + OldestXminInfoSet(info, source, backend_pid); +} /* * Determine XID horizons. @@ -1689,6 +1735,8 @@ ComputeXidHorizons(ComputeXidHorizonsResult *h) /* inferred after ProcArrayLock is released */ h->catalog_oldest_nonremovable = InvalidTransactionId; + OldestXminInfoSet(&h->catalog_oldest_nonremovable_info, + OLDESTXMIN_SOURCE_OTHER, 0); LWLockAcquire(ProcArrayLock, LW_SHARED); @@ -1710,6 +1758,10 @@ ComputeXidHorizons(ComputeXidHorizonsResult *h) h->oldest_considered_running = initial; h->shared_oldest_nonremovable = initial; h->data_oldest_nonremovable = initial; + OldestXminInfoSet(&h->shared_oldest_nonremovable_info, + OLDESTXMIN_SOURCE_OTHER, 0); + OldestXminInfoSet(&h->data_oldest_nonremovable_info, + OLDESTXMIN_SOURCE_OTHER, 0); /* * Only modifications made by this backend affect the horizon for @@ -1724,9 +1776,17 @@ ComputeXidHorizons(ComputeXidHorizonsResult *h) * latestCompletedXid. */ if (TransactionIdIsValid(MyProc->xid)) + { h->temp_oldest_nonremovable = MyProc->xid; + OldestXminInfoSet(&h->temp_oldest_nonremovable_info, + OLDESTXMIN_SOURCE_ACTIVE_TRANSACTION, MyProcPid); + } else + { h->temp_oldest_nonremovable = initial; + OldestXminInfoSet(&h->temp_oldest_nonremovable_info, + OLDESTXMIN_SOURCE_OTHER, 0); + } } /* @@ -1744,6 +1804,8 @@ ComputeXidHorizons(ComputeXidHorizonsResult *h) int8 statusFlags = ProcGlobal->statusFlags[index]; TransactionId xid; TransactionId xmin; + OldestXminSource candidate_source; + int candidate_pid; /* Fetch xid just once - see GetNewTransactionId */ xid = UINT32_ACCESS_ONCE(other_xids[index]); @@ -1768,8 +1830,8 @@ ComputeXidHorizons(ComputeXidHorizonsResult *h) * backends are protected even without this check, it can't hurt to * include them here as well.. */ - h->oldest_considered_running = - TransactionIdOlder(h->oldest_considered_running, xmin); + UpdateOldestXmin(&h->oldest_considered_running, NULL, xmin, + OLDESTXMIN_SOURCE_OTHER, 0); /* * Skip over backends either vacuuming (which is ok with rows being @@ -1780,8 +1842,17 @@ ComputeXidHorizons(ComputeXidHorizonsResult *h) continue; /* shared tables need to take backends in all databases into account */ - h->shared_oldest_nonremovable = - TransactionIdOlder(h->shared_oldest_nonremovable, xmin); + candidate_pid = proc->pid; + if (proc->pid == 0) + candidate_source = OLDESTXMIN_SOURCE_PREPARED_TRANSACTION; + else if (statusFlags & PROC_AFFECTS_ALL_HORIZONS) + candidate_source = OLDESTXMIN_SOURCE_HOT_STANDBY_FEEDBACK; + else + candidate_source = OLDESTXMIN_SOURCE_ACTIVE_TRANSACTION; + + UpdateOldestXmin(&h->shared_oldest_nonremovable, + &h->shared_oldest_nonremovable_info, + xmin, candidate_source, candidate_pid); /* * Normally sessions in other databases are ignored for anything but @@ -1807,8 +1878,9 @@ ComputeXidHorizons(ComputeXidHorizonsResult *h) (statusFlags & PROC_AFFECTS_ALL_HORIZONS) || in_recovery) { - h->data_oldest_nonremovable = - TransactionIdOlder(h->data_oldest_nonremovable, xmin); + UpdateOldestXmin(&h->data_oldest_nonremovable, + &h->data_oldest_nonremovable_info, + xmin, candidate_source, candidate_pid); } } @@ -1827,12 +1899,14 @@ ComputeXidHorizons(ComputeXidHorizonsResult *h) if (in_recovery) { - h->oldest_considered_running = - TransactionIdOlder(h->oldest_considered_running, kaxmin); - h->shared_oldest_nonremovable = - TransactionIdOlder(h->shared_oldest_nonremovable, kaxmin); - h->data_oldest_nonremovable = - TransactionIdOlder(h->data_oldest_nonremovable, kaxmin); + UpdateOldestXmin(&h->oldest_considered_running, NULL, + kaxmin, OLDESTXMIN_SOURCE_OTHER, 0); + UpdateOldestXmin(&h->shared_oldest_nonremovable, + &h->shared_oldest_nonremovable_info, + kaxmin, OLDESTXMIN_SOURCE_OTHER, 0); + UpdateOldestXmin(&h->data_oldest_nonremovable, + &h->data_oldest_nonremovable_info, + kaxmin, OLDESTXMIN_SOURCE_OTHER, 0); /* temp relations cannot be accessed in recovery */ } @@ -1844,10 +1918,12 @@ ComputeXidHorizons(ComputeXidHorizonsResult *h) /* * Check whether there are replication slots requiring an older xmin. */ - h->shared_oldest_nonremovable = - TransactionIdOlder(h->shared_oldest_nonremovable, h->slot_xmin); - h->data_oldest_nonremovable = - TransactionIdOlder(h->data_oldest_nonremovable, h->slot_xmin); + UpdateOldestXmin(&h->shared_oldest_nonremovable, + &h->shared_oldest_nonremovable_info, + h->slot_xmin, OLDESTXMIN_SOURCE_REPLICATION_SLOT, 0); + UpdateOldestXmin(&h->data_oldest_nonremovable, + &h->data_oldest_nonremovable_info, + h->slot_xmin, OLDESTXMIN_SOURCE_REPLICATION_SLOT, 0); /* * The only difference between catalog / data horizons is that the slot's @@ -1857,13 +1933,16 @@ ComputeXidHorizons(ComputeXidHorizonsResult *h) * that also can contain catalogs. */ h->shared_oldest_nonremovable_raw = h->shared_oldest_nonremovable; - h->shared_oldest_nonremovable = - TransactionIdOlder(h->shared_oldest_nonremovable, - h->slot_catalog_xmin); + UpdateOldestXmin(&h->shared_oldest_nonremovable, + &h->shared_oldest_nonremovable_info, + h->slot_catalog_xmin, + OLDESTXMIN_SOURCE_REPLICATION_SLOT, 0); h->catalog_oldest_nonremovable = h->data_oldest_nonremovable; - h->catalog_oldest_nonremovable = - TransactionIdOlder(h->catalog_oldest_nonremovable, - h->slot_catalog_xmin); + h->catalog_oldest_nonremovable_info = h->data_oldest_nonremovable_info; + UpdateOldestXmin(&h->catalog_oldest_nonremovable, + &h->catalog_oldest_nonremovable_info, + h->slot_catalog_xmin, + OLDESTXMIN_SOURCE_REPLICATION_SLOT, 0); /* * It's possible that slots backed up the horizons further than @@ -1951,25 +2030,53 @@ GlobalVisHorizonKindForRel(Relation rel) */ TransactionId GetOldestNonRemovableTransactionId(Relation rel) +{ + /* Delegate to the WithReason variant to avoid duplicated logic */ + return GetOldestNonRemovableTransactionIdWithReason(rel, NULL); +} + +/* + * Return horizon like GetOldestNonRemovableTransactionId(), and also classify + * the reason that determined that horizon at the time of computation. + */ +TransactionId +GetOldestNonRemovableTransactionIdWithReason(Relation rel, + OldestXminInfo *out_info) { ComputeXidHorizonsResult horizons; + TransactionId target = InvalidTransactionId; + GlobalVisHorizonKind kind; + const OldestXminInfo *source_info = NULL; + + if (out_info != NULL) + OldestXminInfoSet(out_info, OLDESTXMIN_SOURCE_OTHER, 0); ComputeXidHorizons(&horizons); - switch (GlobalVisHorizonKindForRel(rel)) + kind = GlobalVisHorizonKindForRel(rel); + switch (kind) { case VISHORIZON_SHARED: - return horizons.shared_oldest_nonremovable; + target = horizons.shared_oldest_nonremovable; + source_info = &horizons.shared_oldest_nonremovable_info; + break; case VISHORIZON_CATALOG: - return horizons.catalog_oldest_nonremovable; + target = horizons.catalog_oldest_nonremovable; + source_info = &horizons.catalog_oldest_nonremovable_info; + break; case VISHORIZON_DATA: - return horizons.data_oldest_nonremovable; + target = horizons.data_oldest_nonremovable; + source_info = &horizons.data_oldest_nonremovable_info; + break; case VISHORIZON_TEMP: - return horizons.temp_oldest_nonremovable; + target = horizons.temp_oldest_nonremovable; + source_info = &horizons.temp_oldest_nonremovable_info; + break; } - /* just to prevent compiler warnings */ - return InvalidTransactionId; + if (out_info != NULL && TransactionIdIsValid(target) && source_info != NULL) + *out_info = *source_info; + return target; } /* diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h index 1f3290c7fbfb..025c22eabd23 100644 --- a/src/include/commands/vacuum.h +++ b/src/include/commands/vacuum.h @@ -24,6 +24,7 @@ #include "parser/parse_node.h" #include "storage/buf.h" #include "storage/lock.h" +#include "storage/procarray.h" #include "utils/relcache.h" /* @@ -288,6 +289,9 @@ struct VacuumCutoffs */ TransactionId FreezeLimit; MultiXactId MultiXactCutoff; + + /* What decided OldestXmin at acquisition time */ + OldestXminInfo oldest_xmin_info; }; /* diff --git a/src/include/storage/procarray.h b/src/include/storage/procarray.h index 2f4ae06c2793..fb459f9b6c92 100644 --- a/src/include/storage/procarray.h +++ b/src/include/storage/procarray.h @@ -20,6 +20,25 @@ #include "utils/snapshot.h" +/* + * Identifies what determined a relation's OldestXmin horizon. + * Used by autovacuum to report why dead tuples were not removable. + */ +typedef enum OldestXminSource +{ + OLDESTXMIN_SOURCE_ACTIVE_TRANSACTION, + OLDESTXMIN_SOURCE_HOT_STANDBY_FEEDBACK, + OLDESTXMIN_SOURCE_PREPARED_TRANSACTION, + OLDESTXMIN_SOURCE_REPLICATION_SLOT, + OLDESTXMIN_SOURCE_OTHER +} OldestXminSource; + +typedef struct OldestXminInfo +{ + OldestXminSource source; + int backend_pid; +} OldestXminInfo; + extern Size ProcArrayShmemSize(void); extern void ProcArrayShmemInit(void); extern void ProcArrayAdd(PGPROC *proc); @@ -54,6 +73,8 @@ extern RunningTransactions GetRunningTransactionData(void); extern bool TransactionIdIsInProgress(TransactionId xid); extern TransactionId GetOldestNonRemovableTransactionId(Relation rel); +extern TransactionId GetOldestNonRemovableTransactionIdWithReason(Relation rel, + OldestXminInfo *info); extern TransactionId GetOldestTransactionIdConsideredRunning(void); extern TransactionId GetOldestActiveTransactionId(bool inCommitOnly, bool allDbs); diff --git a/src/test/modules/test_misc/meson.build b/src/test/modules/test_misc/meson.build index f258bf1ccd94..b42fed911848 100644 --- a/src/test/modules/test_misc/meson.build +++ b/src/test/modules/test_misc/meson.build @@ -18,6 +18,7 @@ tests += { 't/007_catcache_inval.pl', 't/008_replslot_single_user.pl', 't/009_log_temp_files.pl', + 't/010_autovacuum_oldest_xmin_reason.pl', ], }, } diff --git a/src/test/modules/test_misc/t/010_autovacuum_oldest_xmin_reason.pl b/src/test/modules/test_misc/t/010_autovacuum_oldest_xmin_reason.pl new file mode 100644 index 000000000000..5818f143182b --- /dev/null +++ b/src/test/modules/test_misc/t/010_autovacuum_oldest_xmin_reason.pl @@ -0,0 +1,186 @@ +# Copyright (c) 2025, PostgreSQL Global Development Group +# +# Validate that autovacuum logs explain why dead tuples could not be removed. + +use strict; +use warnings FATAL => 'all'; + +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; + +# Set up a cluster +my $node = PostgreSQL::Test::Cluster->new('main'); +$node->init(allows_streaming => 'logical'); +$node->append_conf('postgresql.conf', qq[ +max_prepared_transactions = 5 +autovacuum_naptime = 1s +autovacuum_vacuum_cost_delay = 0 +log_autovacuum_min_duration = 0 +]); +$node->start; + +# Helper to wait for autovacuum to complete +sub wait_for_autovacuum +{ + my ($relname) = @_; + my $vacuumed = $node->poll_query_until( + 'postgres', qq[ + SELECT last_autovacuum IS NOT NULL + FROM pg_stat_user_tables + WHERE relname = '$relname'; + ], 't'); + ok($vacuumed, "autovacuum completed on $relname") + or BAIL_OUT("timed out waiting for autovacuum on $relname"); +} + +# +# Active transaction +# +my $active_table = 'av_oldestxmin_active'; +$node->safe_psql('postgres', qq[ +CREATE TABLE $active_table(id int) + WITH (autovacuum_vacuum_scale_factor = 0, + autovacuum_vacuum_threshold = 1); +INSERT INTO $active_table SELECT generate_series(1, 10); +]); + +my $offset = -s $node->logfile; + +my $blocker = $node->background_psql('postgres'); +$blocker->query_safe(qq[ +BEGIN; +SET TRANSACTION ISOLATION LEVEL REPEATABLE READ; +SELECT * FROM $active_table; +]); + +my $blocker_pid = $blocker->query_safe('SELECT pg_backend_pid();'); +chomp($blocker_pid); + +$node->safe_psql('postgres', "DELETE FROM $active_table;"); + +wait_for_autovacuum($active_table); + +ok( $node->wait_for_log( + qr/automatic vacuum of table "\Qpostgres.public.$active_table\E":.*oldest xmin source: active transaction \(pid=$blocker_pid\)/s, + $offset), + 'autovacuum reported active transaction as oldest xmin source'); + +$blocker->query_safe('ROLLBACK;'); +$blocker->quit; +$node->safe_psql('postgres', "DROP TABLE $active_table;"); + +# +# Prepared transaction +# +my $prepared_table = 'av_oldestxmin_prepared'; +$node->safe_psql('postgres', qq[ +CREATE TABLE $prepared_table(id int) + WITH (autovacuum_vacuum_scale_factor = 0, + autovacuum_vacuum_threshold = 1); +INSERT INTO $prepared_table SELECT generate_series(1, 10); +]); + +$node->safe_psql('postgres', qq[ +BEGIN; +PREPARE TRANSACTION 'gx_autovac_xmin'; +]); + +$offset = -s $node->logfile; + +$node->safe_psql('postgres', "DELETE FROM $prepared_table;"); + +wait_for_autovacuum($prepared_table); + +ok( $node->wait_for_log( + qr/automatic vacuum of table "\Qpostgres.public.$prepared_table\E":.*oldest xmin source: prepared transaction/s, + $offset), + 'autovacuum reported prepared transaction as oldest xmin source'); + +$node->safe_psql('postgres', "ROLLBACK PREPARED 'gx_autovac_xmin';"); +$node->safe_psql('postgres', "DROP TABLE $prepared_table;"); + +# +# Replication slot +# +my $slot_table = 'av_oldestxmin_slot'; +$node->safe_psql('postgres', qq[ +CREATE TABLE $slot_table(id int); +]); + +$node->safe_psql('postgres',q[ +SELECT pg_create_logical_replication_slot('autovac_oldestxmin_slot', 'test_decoding'); +]); +$node->safe_psql('postgres', "DROP TABLE $slot_table;"); + +my $slotstderr = ''; +$node->psql('postgres', + 'VACUUM (VERBOSE) pg_catalog.pg_class;', + stderr => \$slotstderr +); +like( + $slotstderr, + qr/oldest xmin source: replication slot/, + 'VACUUM VERBOSE reported replication slot as oldest xmin source'); + +$node->safe_psql('postgres', q[ +SELECT pg_drop_replication_slot('autovac_oldestxmin_slot'); +]); + +# +# Hot standby feedback +# +my $hs_table = 'av_oldestxmin_hotstandby'; +$node->safe_psql('postgres', qq[ +CREATE TABLE $hs_table(id int) + WITH (autovacuum_vacuum_scale_factor = 0, + autovacuum_vacuum_threshold = 1); +INSERT INTO $hs_table SELECT generate_series(1, 10); +]); + +# Set up a hot standby +$node->backup('oldestxmin_hotstandby_bkp'); +my $standby = PostgreSQL::Test::Cluster->new('oldestxmin_standby'); +$standby->init_from_backup($node, 'oldestxmin_hotstandby_bkp', + has_streaming => 1); +$standby->append_conf('postgresql.conf', qq[ +hot_standby_feedback = on +wal_receiver_status_interval = 1s +]); +$standby->start; +$node->wait_for_catchup($standby, 'replay', $node->lsn('flush')); + +my $standby_reader = $standby->background_psql('postgres'); +$standby_reader->query_safe('BEGIN;'); + +$node->poll_query_until('postgres', q[ +SELECT bool_or(backend_xmin IS NOT NULL) +FROM pg_stat_replication; +], 't'); + +my $hs_pid = $node->safe_psql('postgres', q[ +SELECT pid FROM pg_stat_replication +WHERE backend_xmin IS NOT NULL +ORDER BY pid +LIMIT 1; +]); +chomp($hs_pid); + +$offset = -s $node->logfile; + +$node->safe_psql('postgres', "DELETE FROM $hs_table;"); + +wait_for_autovacuum($hs_table); + +ok( $node->wait_for_log( + qr/oldest xmin source: hot standby feedback \(pid=$hs_pid\)/, + $offset), + 'autovacuum reported hot standby feedback as oldest xmin source'); + +$standby_reader->query_safe('ROLLBACK;'); +$standby_reader->quit; +$node->safe_psql('postgres', "DROP TABLE $hs_table;"); + +$standby->stop; +$node->stop; +done_testing();