diff --git a/src/bin/pg_rewind/parsexlog.c b/src/bin/pg_rewind/parsexlog.c index 8f4b282c6b1b..110ab1755275 100644 --- a/src/bin/pg_rewind/parsexlog.c +++ b/src/bin/pg_rewind/parsexlog.c @@ -23,6 +23,7 @@ #include "fe_utils/archive.h" #include "filemap.h" #include "pg_rewind.h" +#include "storage/standbydefs.h" /* * RmgrNames is an array of the built-in resource manager names, to make error @@ -50,6 +51,10 @@ typedef struct XLogPageReadPrivate int tliIndex; } XLogPageReadPrivate; +static ControlFileData ControlFile_target; +static ControlFileData ControlFile_source; + + static int SimpleXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, int reqLen, XLogRecPtr targetRecPtr, char *readBuf); @@ -161,10 +166,71 @@ readOneRecord(const char *datadir, XLogRecPtr ptr, int tliIndex, return endptr; } +/* + * is_shutdown_only_sequence(xlogreader, endptr) + * + * An XLogReaderState positioned at the end-of-wal (endptr), + * walk backwards a small number of records to determine whether the + * tail consists solely of zero-or-more RUNNING_XACTS (RM_STANDBY_ID) + * optionally followed by a single CHECKPOINT_SHUTDOWN (RM_XLOG_ID). + * + * Returns true if the pattern matches and there are no intervening + * meaningful records. + */ + +static bool +is_shutdown_only_sequence(XLogReaderState *xlogreader, XLogRecPtr endptr) +{ + XLogRecPtr searchptr = endptr; + int seen_running_xacts = 0; + int max_steps = 8; + + while (max_steps-- > 0) + { + char *errormsg; + XLogRecord *record; + + XLogBeginRead(xlogreader, searchptr); + record = XLogReadRecord(xlogreader, &errormsg); + + if (record == NULL) + return false; + + /* fetch RMID and info */ + uint8 rmid; + uint8 info; + rmid = XLogRecGetRmid(xlogreader); + info = XLogRecGetInfo(xlogreader) & ~XLR_INFO_MASK; + + if (rmid == RM_STANDBY_ID && info == XLOG_RUNNING_XACTS) + { + seen_running_xacts++; + searchptr = record->xl_prev; + continue; + } + + if (rmid == RM_XLOG_ID && + (info == XLOG_CHECKPOINT_SHUTDOWN || info == XLOG_CHECKPOINT_ONLINE)) + { + /* Only shutdown checkpoints with preceding RUNNING_XACTS allowed */ + if (info == XLOG_CHECKPOINT_SHUTDOWN) + return true; + else + return false; /* online checkpoint → meaningful */ + } + + /* any other record type means there were changes */ + return false; + } + + /* we hit step cap without decisive answer → play safe */ + return false; +} + /* * Find the previous checkpoint preceding given WAL location. */ -void +WalEndStat findLastCheckpoint(const char *datadir, XLogRecPtr forkptr, int tliIndex, XLogRecPtr *lastchkptrec, TimeLineID *lastchkpttli, XLogRecPtr *lastchkptredo, const char *restoreCommand) @@ -210,13 +276,18 @@ findLastCheckpoint(const char *datadir, XLogRecPtr forkptr, int tliIndex, if (record == NULL) { - if (errormsg) - pg_fatal("could not find previous WAL record at %X/%08X: %s", - LSN_FORMAT_ARGS(searchptr), - errormsg); - else - pg_fatal("could not find previous WAL record at %X/%08X", - LSN_FORMAT_ARGS(searchptr)); + /* Check for benign shutdown checkpoint difference */ + if (control_diff_is_benign(&ControlFile_source, &ControlFile_target)) + { + fprintf(stderr, + "pg_rewind: benign shutdown checkpoint difference detected, skipping rewind\n"); + return InvalidXLogRecPtr; /* or other appropriate return to skip rewind */ + } + + /* Otherwise, fatal error */ + pg_fatal("could not find previous WAL record at %X/%X: %s", + LSN_FORMAT_ARGS(searchptr), + errormsg ? errormsg : "unknown error"); } /* Detect if a new WAL file has been opened */ diff --git a/src/bin/pg_rewind/pg_rewind.c b/src/bin/pg_rewind/pg_rewind.c index e9364d04f76e..045115ea8a60 100644 --- a/src/bin/pg_rewind/pg_rewind.c +++ b/src/bin/pg_rewind/pg_rewind.c @@ -116,6 +116,44 @@ usage(const char *progname) printf(_("%s home page: <%s>\n"), PACKAGE_NAME, PACKAGE_URL); } +/* + * Compare two ControlFileData structs. + * Return true if the differences are safe to ignore (benign), + * false if they are significant. + * + */ +bool +control_diff_is_benign(void *src, void *tgt) +{ + ControlFileData *src_cf = (ControlFileData *) src; + ControlFileData *tgt_cf = (ControlFileData *) tgt; + + /* 1. Cluster identity must always match */ + if (src_cf->system_identifier != tgt_cf->system_identifier) + return false; + + /* 2. Check timeline consistency */ + if (src_cf->checkPointCopy.ThisTimeLineID != tgt_cf->checkPointCopy.ThisTimeLineID) + return false; + + /* 3. Transaction ID progression */ + if (src_cf->checkPointCopy.nextXid.value != tgt_cf->checkPointCopy.nextXid.value) + return false; + + /* 4. OID progression */ + if (src_cf->checkPointCopy.nextOid != tgt_cf->checkPointCopy.nextOid) + return false; + + /* 5. MultiXact progression */ + if (src_cf->checkPointCopy.nextMulti != tgt_cf->checkPointCopy.nextMulti) + return false; + + /* 6. Checkpoint LSN should not go backwards */ + if (src_cf->checkPoint < tgt_cf->checkPoint) + return false; + + return true; +} int main(int argc, char **argv) @@ -438,21 +476,71 @@ main(int argc, char **argv) } /* - * Check for the possibility that the target is in fact a direct - * ancestor of the source. In that case, there is no divergent history - * in the target that needs rewinding. - */ - if (target_wal_endrec > divergerec) - { - rewind_needed = true; - } - else - { - /* the last common checkpoint record must be part of target WAL */ - Assert(target_wal_endrec == divergerec); - - rewind_needed = false; - } + * Conservative check: determine whether we can safely skip rewind + * when the target's WAL tail only contains shutdown-only records. + */ + + WalEndStat end_status; + end_status = findLastCheckpoint(datadir_target, target_wal_endrec, lastcommontliIndex, + &chkptrec, &chkpttli, &chkptredo, + restore_command); + + /* Initialize safety flags and controlfile pointers */ + bool src_crc_ok = false; + bool tgt_crc_ok = false; + ControlFileData *src_ctrl = NULL; + ControlFileData *tgt_ctrl = NULL; + + if (end_status == WAL_END_SHUTDOWN_ONLY) + { + pg_log_info("benign control file difference detected; skipping rewind"); + exit(0); + +#ifdef USE_LIBPQ + if (connstr_source) + { + /* If fetching remotely (pg_rewind --source-server=...) */ + src_ctrl = get_remote_controlfile(&src_crc_ok); + } + else +#endif + { + /* Local source cluster */ + src_ctrl = get_controlfile(datadir_source, &src_crc_ok); + } + + /* Target control file is always local */ + tgt_ctrl = get_controlfile(datadir_target, &tgt_crc_ok); + + if (!src_ctrl || !tgt_ctrl) + { + pg_log_warning("could not read one or both control files; conservatively requiring rewind"); + rewind_needed = true; + } + else if (control_diff_is_benign(src_ctrl, tgt_ctrl)) + { + pg_log_info("only benign shutdown control differences found; skipping rewind"); + rewind_needed = false; + } + else + { + pg_log_info("control file differences not benign; rewind required"); + rewind_needed = true; + } + + /* free() if your get_controlfile allocates new structs (optional) */ + /* free(src_ctrl); free(tgt_ctrl); */ + } + else if (end_status == WAL_END_MEANINGFUL) + { + /* Normal case: WAL tail contains meaningful changes */ + rewind_needed = (target_wal_endrec > divergerec); + } + else + { + /* Unknown or empty WAL end; play safe */ + rewind_needed = true; + } } if (!rewind_needed) diff --git a/src/bin/pg_rewind/pg_rewind.h b/src/bin/pg_rewind/pg_rewind.h index 9cea144d2b24..94b75ea9bfa3 100644 --- a/src/bin/pg_rewind/pg_rewind.h +++ b/src/bin/pg_rewind/pg_rewind.h @@ -35,7 +35,15 @@ extern uint64 fetch_done; extern void extractPageMap(const char *datadir, XLogRecPtr startpoint, int tliIndex, XLogRecPtr endpoint, const char *restoreCommand); -extern void findLastCheckpoint(const char *datadir, XLogRecPtr forkptr, + +typedef enum WalEndStat +{ + WAL_END_MEANINGFUL, + WAL_END_SHUTDOWN_ONLY, + WAL_END_EMPTY +} WalEndStat; + +extern WalEndStat findLastCheckpoint(const char *datadir, XLogRecPtr forkptr, int tliIndex, XLogRecPtr *lastchkptrec, TimeLineID *lastchkpttli, XLogRecPtr *lastchkptredo, @@ -43,6 +51,8 @@ extern void findLastCheckpoint(const char *datadir, XLogRecPtr forkptr, extern XLogRecPtr readOneRecord(const char *datadir, XLogRecPtr ptr, int tliIndex, const char *restoreCommand); +bool control_diff_is_benign(void *src, void *tgt); + /* in pg_rewind.c */ extern void progress_report(bool finished);