summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHeikki Linnakangas2025-12-15 11:30:17 +0000
committerHeikki Linnakangas2025-12-15 11:30:17 +0000
commitecb553ae8211e3d135e0c9d42b90cc22be51d27c (patch)
tree41930d6a908fabd483d7593ae43bcb0fe2ff6faf
parent77038d6d0b49622e719fc00ed46db0ab47d2b747 (diff)
Improve sanity checks on multixid members lengthHEADmaster
In the server, check explicitly for multixids with zero members. We used to have an assertion for it, but commit d4b7bde418 replaced it with more extensive runtime checks, but it missed the original case of zero members. In the upgrade code, a negative length never makes sense, so better check for it explicitly. Commit d4b7bde418 added a similar sanity check to the corresponding server code on master, and in backbranches, the 'length' is passed to palloc which would fail with "invalid memory alloc request size" error. Clarify the comments on what kind of invalid entries are tolerated by the upgrade code and which ones are reported as fatal errors. Coverity complained about 'length' in the upgrade code being tainted. That's bogus because we trust the data on disk at least to some extent, but hopefully this will silence the complaint. If not, I'll dismiss it manually. Discussion: https://www.postgresql.org/message-id/7b505284-c6e9-4c80-a7ee-816493170abc@iki.fi
-rw-r--r--src/backend/access/transam/multixact.c5
-rw-r--r--src/bin/pg_upgrade/multixact_read_v18.c44
2 files changed, 38 insertions, 11 deletions
diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c
index f4fab7edfee..34956a5a663 100644
--- a/src/backend/access/transam/multixact.c
+++ b/src/backend/access/transam/multixact.c
@@ -1262,6 +1262,11 @@ GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members,
ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED),
errmsg("MultiXact %u has invalid next offset", multi)));
+ if (nextMXOffset == offset)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATA_CORRUPTED),
+ errmsg("MultiXact %u with offset (%" PRIu64 ") has zero members",
+ multi, offset)));
if (nextMXOffset < offset)
ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED),
diff --git a/src/bin/pg_upgrade/multixact_read_v18.c b/src/bin/pg_upgrade/multixact_read_v18.c
index f7ed97f702b..dc4ed1b3ae6 100644
--- a/src/bin/pg_upgrade/multixact_read_v18.c
+++ b/src/bin/pg_upgrade/multixact_read_v18.c
@@ -146,11 +146,14 @@ AllocOldMultiXactRead(char *pgdata, MultiXactId nextMulti,
* - Because there's no concurrent activity, we don't need to worry about
* locking and some corner cases.
*
- * - Don't bail out on invalid entries. If the server crashes, it can leave
- * invalid or half-written entries on disk. Such multixids won't appear
- * anywhere else on disk, so the server will never try to read them. During
- * upgrade, however, we scan through all multixids in order, and will
- * encounter such invalid but unreferenced multixids too.
+ * - Don't bail out on invalid entries that could've been left behind after a
+ * server crash. Such multixids won't appear anywhere else on disk, so the
+ * server will never try to read them. During upgrade, however, we scan
+ * through all multixids in order, and will encounter such invalid but
+ * unreferenced multixids too. We try to distinguish between entries that
+ * are invalid because of missed disk writes, like entries with zeros in
+ * offsets or members, and entries that look corrupt in other ways that
+ * should not happen even on a server crash.
*
* Returns true on success, false if the multixact was invalid.
*/
@@ -211,7 +214,7 @@ GetOldMultiXactIdSingleMember(OldMultiXactReader *state, MultiXactId multi,
if (offset == 0)
{
- /* Invalid entry */
+ /* Invalid entry. These can be left behind on a server crash. */
return false;
}
@@ -247,11 +250,29 @@ GetOldMultiXactIdSingleMember(OldMultiXactReader *state, MultiXactId multi,
if (nextMXOffset == 0)
{
- /* Invalid entry */
+ /* Invalid entry. These can be left behind on a server crash. */
return false;
}
length = nextMXOffset - offset;
+ if (length < 0)
+ {
+ /*
+ * This entry is corrupt. We should not see these even after a server
+ * crash.
+ */
+ pg_fatal("multixact %u has an invalid length (%d)", multi, length);
+ }
+ if (length == 0)
+ {
+ /*
+ * Invalid entry. The server never writes multixids with zero
+ * members, but it's not clear if a server crash or using pg_resetwal
+ * could leave them behind. Seems best to accept them.
+ */
+ return false;
+ }
+
/* read the members */
prev_pageno = -1;
for (int i = 0; i < length; i++, offset++)
@@ -284,10 +305,11 @@ GetOldMultiXactIdSingleMember(OldMultiXactReader *state, MultiXactId multi,
/*
* Otherwise this is an invalid entry that should not be
- * referenced from anywhere in the heap. We could return 'false'
- * here, but we prefer to continue reading the members and
- * converting them the best we can, to preserve evidence in case
- * this is corruption that should not happen.
+ * referenced from anywhere in the heap. These can be left behind
+ * on a server crash. We could return 'false' here, but we prefer
+ * to continue reading the members and converting them the best we
+ * can, to preserve evidence in case this is corruption that
+ * should not have happened.
*/
}