From a1dd9b3e2e13c22cad5f65244d51b94ab01bcbf5 Mon Sep 17 00:00:00 2001 From: Tomas Vondra Date: Fri, 28 Mar 2025 16:49:04 +0100 Subject: [PATCH 1/6] Fix grammar in GIN README Author: Kirill Reshke Discussion: https://postgr.es/m/CALdSSPgu9uAhVYojQ0yjG%3Dq5MaqmiSLUJPhz%2B-u7cA6K6Mc9UA%40mail.gmail.com --- src/backend/access/gin/README | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/backend/access/gin/README b/src/backend/access/gin/README index b08073162128..742bcbad499f 100644 --- a/src/backend/access/gin/README +++ b/src/backend/access/gin/README @@ -237,10 +237,10 @@ GIN packs keys and downlinks into tuples in a different way. P_i is grouped with K_{i+1}. -Inf key is not needed. -There are couple of additional notes regarding K_{n+1} key. -1) In entry tree rightmost page, a key coupled with P_n doesn't really matter. +There are a couple of additional notes regarding K_{n+1} key. +1) In the entry tree on the rightmost page, a key coupled with P_n doesn't really matter. Highkey is assumed to be infinity. -2) In posting tree, a key coupled with P_n always doesn't matter. Highkey for +2) In the posting tree, a key coupled with P_n always doesn't matter. Highkey for non-rightmost pages is stored separately and accessed via GinDataPageGetRightBound(). From 43f209a1fdba74ee76e56fc556d08898647b93c2 Mon Sep 17 00:00:00 2001 From: Tomas Vondra Date: Fri, 28 Mar 2025 15:40:09 +0100 Subject: [PATCH 2/6] amcheck: Move common routines into a separate module MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Before performing checks on an index, we need to take some safety measures that apply to all index AMs. This includes: * verifying that the index can be checked - Only selected AMs are supported by amcheck (right now only B-Tree). The index has to be valid and not a temporary index from another session. * changing (and then restoring) user's security context * obtaining proper locks on the index (and table, if needed) * discarding GUC changes from the index functions Until now this was implemented in the B-Tree amcheck module, but it's something every AM will have to do. So relocate the code into a new module verify_common for reuse. The shared steps are implemented by amcheck_lock_relation_and_check(), receiving the AM-specific verification as a callback. Custom parameters may be supplied using a pointer. Author: Andrey Borodin Reviewed-By: José Villanova Reviewed-By: Aleksander Alekseev Reviewed-By: Nikolay Samokhvalov Reviewed-By: Andres Freund Reviewed-By: Tomas Vondra Reviewed-By: Mark Dilger Reviewed-By: Peter Geoghegan Reviewed-By: Kirill Reshke Discussion: https://postgr.es/m/45AC9B0A-2B45-40EE-B08F-BDCF5739D1E1%40yandex-team.ru --- contrib/amcheck/Makefile | 1 + contrib/amcheck/expected/check_btree.out | 4 +- contrib/amcheck/meson.build | 1 + contrib/amcheck/verify_common.c | 191 ++++++++++++++++ contrib/amcheck/verify_common.h | 31 +++ contrib/amcheck/verify_nbtree.c | 267 ++++++----------------- src/tools/pgindent/typedefs.list | 1 + 7 files changed, 297 insertions(+), 199 deletions(-) create mode 100644 contrib/amcheck/verify_common.c create mode 100644 contrib/amcheck/verify_common.h diff --git a/contrib/amcheck/Makefile b/contrib/amcheck/Makefile index 5e9002d25018..c3d70f3369cb 100644 --- a/contrib/amcheck/Makefile +++ b/contrib/amcheck/Makefile @@ -3,6 +3,7 @@ MODULE_big = amcheck OBJS = \ $(WIN32RES) \ + verify_common.o \ verify_heapam.o \ verify_nbtree.o diff --git a/contrib/amcheck/expected/check_btree.out b/contrib/amcheck/expected/check_btree.out index e7fb5f551574..c6f4b16c5561 100644 --- a/contrib/amcheck/expected/check_btree.out +++ b/contrib/amcheck/expected/check_btree.out @@ -57,8 +57,8 @@ ERROR: could not open relation with OID 17 BEGIN; CREATE INDEX bttest_a_brin_idx ON bttest_a USING brin(id); SELECT bt_index_parent_check('bttest_a_brin_idx'); -ERROR: only B-Tree indexes are supported as targets for verification -DETAIL: Relation "bttest_a_brin_idx" is not a B-Tree index. +ERROR: expected "btree" index as targets for verification +DETAIL: Relation "bttest_a_brin_idx" is a brin index. ROLLBACK; -- normal check outside of xact SELECT bt_index_check('bttest_a_idx'); diff --git a/contrib/amcheck/meson.build b/contrib/amcheck/meson.build index 61d7eaf2305d..67a4ac8518d1 100644 --- a/contrib/amcheck/meson.build +++ b/contrib/amcheck/meson.build @@ -1,6 +1,7 @@ # Copyright (c) 2022-2025, PostgreSQL Global Development Group amcheck_sources = files( + 'verify_common.c', 'verify_heapam.c', 'verify_nbtree.c', ) diff --git a/contrib/amcheck/verify_common.c b/contrib/amcheck/verify_common.c new file mode 100644 index 000000000000..d095e62ce551 --- /dev/null +++ b/contrib/amcheck/verify_common.c @@ -0,0 +1,191 @@ +/*------------------------------------------------------------------------- + * + * verify_common.c + * Utility functions common to all access methods. + * + * Copyright (c) 2016-2025, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/amcheck/verify_common.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/genam.h" +#include "access/table.h" +#include "access/tableam.h" +#include "verify_common.h" +#include "catalog/index.h" +#include "catalog/pg_am.h" +#include "commands/tablecmds.h" +#include "utils/guc.h" +#include "utils/syscache.h" + +static bool amcheck_index_mainfork_expected(Relation rel); + + +/* + * Check if index relation should have a file for its main relation fork. + * Verification uses this to skip unlogged indexes when in hot standby mode, + * where there is simply nothing to verify. + * + * NB: Caller should call index_checkable() before calling here. + */ +static bool +amcheck_index_mainfork_expected(Relation rel) +{ + if (rel->rd_rel->relpersistence != RELPERSISTENCE_UNLOGGED || + !RecoveryInProgress()) + return true; + + ereport(NOTICE, + (errcode(ERRCODE_READ_ONLY_SQL_TRANSACTION), + errmsg("cannot verify unlogged index \"%s\" during recovery, skipping", + RelationGetRelationName(rel)))); + + return false; +} + +/* +* Amcheck main workhorse. +* Given index relation OID, lock relation. +* Next, take a number of standard actions: +* 1) Make sure the index can be checked +* 2) change the context of the user, +* 3) keep track of GUCs modified via index functions +* 4) execute callback function to verify integrity. +*/ +void +amcheck_lock_relation_and_check(Oid indrelid, + Oid am_id, + IndexDoCheckCallback check, + LOCKMODE lockmode, + void *state) +{ + Oid heapid; + Relation indrel; + Relation heaprel; + Oid save_userid; + int save_sec_context; + int save_nestlevel; + + /* + * We must lock table before index to avoid deadlocks. However, if the + * passed indrelid isn't an index then IndexGetRelation() will fail. + * Rather than emitting a not-very-helpful error message, postpone + * complaining, expecting that the is-it-an-index test below will fail. + * + * In hot standby mode this will raise an error when parentcheck is true. + */ + heapid = IndexGetRelation(indrelid, true); + if (OidIsValid(heapid)) + { + heaprel = table_open(heapid, lockmode); + + /* + * Switch to the table owner's userid, so that any index functions are + * run as that user. Also lock down security-restricted operations + * and arrange to make GUC variable changes local to this command. + */ + GetUserIdAndSecContext(&save_userid, &save_sec_context); + SetUserIdAndSecContext(heaprel->rd_rel->relowner, + save_sec_context | SECURITY_RESTRICTED_OPERATION); + save_nestlevel = NewGUCNestLevel(); + } + else + { + heaprel = NULL; + /* Set these just to suppress "uninitialized variable" warnings */ + save_userid = InvalidOid; + save_sec_context = -1; + save_nestlevel = -1; + } + + /* + * Open the target index relations separately (like relation_openrv(), but + * with heap relation locked first to prevent deadlocking). In hot + * standby mode this will raise an error when parentcheck is true. + * + * There is no need for the usual indcheckxmin usability horizon test + * here, even in the heapallindexed case, because index undergoing + * verification only needs to have entries for a new transaction snapshot. + * (If this is a parentcheck verification, there is no question about + * committed or recently dead heap tuples lacking index entries due to + * concurrent activity.) + */ + indrel = index_open(indrelid, lockmode); + + /* + * Since we did the IndexGetRelation call above without any lock, it's + * barely possible that a race against an index drop/recreation could have + * netted us the wrong table. + */ + if (heaprel == NULL || heapid != IndexGetRelation(indrelid, false)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_TABLE), + errmsg("could not open parent table of index \"%s\"", + RelationGetRelationName(indrel)))); + + /* Check that relation suitable for checking */ + if (index_checkable(indrel, am_id)) + check(indrel, heaprel, state, lockmode == ShareLock); + + /* Roll back any GUC changes executed by index functions */ + AtEOXact_GUC(false, save_nestlevel); + + /* Restore userid and security context */ + SetUserIdAndSecContext(save_userid, save_sec_context); + + /* + * Release locks early. That's ok here because nothing in the called + * routines will trigger shared cache invalidations to be sent, so we can + * relax the usual pattern of only releasing locks after commit. + */ + index_close(indrel, lockmode); + if (heaprel) + table_close(heaprel, lockmode); +} + +/* + * Basic checks about the suitability of a relation for checking as an index. + * + * + * NB: Intentionally not checking permissions, the function is normally not + * callable by non-superusers. If granted, it's useful to be able to check a + * whole cluster. + */ +bool +index_checkable(Relation rel, Oid am_id) +{ + if (rel->rd_rel->relkind != RELKIND_INDEX || + rel->rd_rel->relam != am_id) + { + HeapTuple amtup; + HeapTuple amtuprel; + + amtup = SearchSysCache1(AMOID, ObjectIdGetDatum(am_id)); + amtuprel = SearchSysCache1(AMOID, ObjectIdGetDatum(rel->rd_rel->relam)); + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("expected \"%s\" index as targets for verification", NameStr(((Form_pg_am) GETSTRUCT(amtup))->amname)), + errdetail("Relation \"%s\" is a %s index.", + RelationGetRelationName(rel), NameStr(((Form_pg_am) GETSTRUCT(amtuprel))->amname)))); + } + + if (RELATION_IS_OTHER_TEMP(rel)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot access temporary tables of other sessions"), + errdetail("Index \"%s\" is associated with temporary relation.", + RelationGetRelationName(rel)))); + + if (!rel->rd_index->indisvalid) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot check index \"%s\"", + RelationGetRelationName(rel)), + errdetail("Index is not valid."))); + + return amcheck_index_mainfork_expected(rel); +} diff --git a/contrib/amcheck/verify_common.h b/contrib/amcheck/verify_common.h new file mode 100644 index 000000000000..b2565bfbbab8 --- /dev/null +++ b/contrib/amcheck/verify_common.h @@ -0,0 +1,31 @@ +/*------------------------------------------------------------------------- + * + * amcheck.h + * Shared routines for amcheck verifications. + * + * Copyright (c) 2016-2025, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/amcheck/amcheck.h + * + *------------------------------------------------------------------------- + */ +#include "storage/bufpage.h" +#include "storage/lmgr.h" +#include "storage/lockdefs.h" +#include "utils/relcache.h" +#include "miscadmin.h" + +/* Typedefs for callback functions for amcheck_lock_relation */ +typedef void (*IndexCheckableCallback) (Relation index); +typedef void (*IndexDoCheckCallback) (Relation rel, + Relation heaprel, + void *state, + bool readonly); + +extern void amcheck_lock_relation_and_check(Oid indrelid, + Oid am_id, + IndexDoCheckCallback check, + LOCKMODE lockmode, void *state); + +extern bool index_checkable(Relation rel, Oid am_id); diff --git a/contrib/amcheck/verify_nbtree.c b/contrib/amcheck/verify_nbtree.c index d56eb7637d39..f11c43a0ed79 100644 --- a/contrib/amcheck/verify_nbtree.c +++ b/contrib/amcheck/verify_nbtree.c @@ -30,6 +30,7 @@ #include "access/tableam.h" #include "access/transam.h" #include "access/xact.h" +#include "verify_common.h" #include "catalog/index.h" #include "catalog/pg_am.h" #include "catalog/pg_opfamily_d.h" @@ -159,14 +160,22 @@ typedef struct BtreeLastVisibleEntry ItemPointer tid; /* Heap tid */ } BtreeLastVisibleEntry; +/* + * arguments for the bt_index_check_callback callback + */ +typedef struct BTCallbackState +{ + bool parentcheck; + bool heapallindexed; + bool rootdescend; + bool checkunique; +} BTCallbackState; + PG_FUNCTION_INFO_V1(bt_index_check); PG_FUNCTION_INFO_V1(bt_index_parent_check); -static void bt_index_check_internal(Oid indrelid, bool parentcheck, - bool heapallindexed, bool rootdescend, - bool checkunique); -static inline void btree_index_checkable(Relation rel); -static inline bool btree_index_mainfork_expected(Relation rel); +static void bt_index_check_callback(Relation indrel, Relation heaprel, + void *state, bool readonly); static void bt_check_every_level(Relation rel, Relation heaprel, bool heapkeyspace, bool readonly, bool heapallindexed, bool rootdescend, bool checkunique); @@ -241,15 +250,21 @@ Datum bt_index_check(PG_FUNCTION_ARGS) { Oid indrelid = PG_GETARG_OID(0); - bool heapallindexed = false; - bool checkunique = false; + BTCallbackState args; + + args.heapallindexed = false; + args.rootdescend = false; + args.parentcheck = false; + args.checkunique = false; if (PG_NARGS() >= 2) - heapallindexed = PG_GETARG_BOOL(1); - if (PG_NARGS() == 3) - checkunique = PG_GETARG_BOOL(2); + args.heapallindexed = PG_GETARG_BOOL(1); + if (PG_NARGS() >= 3) + args.checkunique = PG_GETARG_BOOL(2); - bt_index_check_internal(indrelid, false, heapallindexed, false, checkunique); + amcheck_lock_relation_and_check(indrelid, BTREE_AM_OID, + bt_index_check_callback, + AccessShareLock, &args); PG_RETURN_VOID(); } @@ -267,18 +282,23 @@ Datum bt_index_parent_check(PG_FUNCTION_ARGS) { Oid indrelid = PG_GETARG_OID(0); - bool heapallindexed = false; - bool rootdescend = false; - bool checkunique = false; + BTCallbackState args; + + args.heapallindexed = false; + args.rootdescend = false; + args.parentcheck = true; + args.checkunique = false; if (PG_NARGS() >= 2) - heapallindexed = PG_GETARG_BOOL(1); + args.heapallindexed = PG_GETARG_BOOL(1); if (PG_NARGS() >= 3) - rootdescend = PG_GETARG_BOOL(2); - if (PG_NARGS() == 4) - checkunique = PG_GETARG_BOOL(3); + args.rootdescend = PG_GETARG_BOOL(2); + if (PG_NARGS() >= 4) + args.checkunique = PG_GETARG_BOOL(3); - bt_index_check_internal(indrelid, true, heapallindexed, rootdescend, checkunique); + amcheck_lock_relation_and_check(indrelid, BTREE_AM_OID, + bt_index_check_callback, + ShareLock, &args); PG_RETURN_VOID(); } @@ -287,193 +307,46 @@ bt_index_parent_check(PG_FUNCTION_ARGS) * Helper for bt_index_[parent_]check, coordinating the bulk of the work. */ static void -bt_index_check_internal(Oid indrelid, bool parentcheck, bool heapallindexed, - bool rootdescend, bool checkunique) +bt_index_check_callback(Relation indrel, Relation heaprel, void *state, bool readonly) { - Oid heapid; - Relation indrel; - Relation heaprel; - LOCKMODE lockmode; - Oid save_userid; - int save_sec_context; - int save_nestlevel; - - if (parentcheck) - lockmode = ShareLock; - else - lockmode = AccessShareLock; - - /* - * We must lock table before index to avoid deadlocks. However, if the - * passed indrelid isn't an index then IndexGetRelation() will fail. - * Rather than emitting a not-very-helpful error message, postpone - * complaining, expecting that the is-it-an-index test below will fail. - * - * In hot standby mode this will raise an error when parentcheck is true. - */ - heapid = IndexGetRelation(indrelid, true); - if (OidIsValid(heapid)) - { - heaprel = table_open(heapid, lockmode); - - /* - * Switch to the table owner's userid, so that any index functions are - * run as that user. Also lock down security-restricted operations - * and arrange to make GUC variable changes local to this command. - */ - GetUserIdAndSecContext(&save_userid, &save_sec_context); - SetUserIdAndSecContext(heaprel->rd_rel->relowner, - save_sec_context | SECURITY_RESTRICTED_OPERATION); - save_nestlevel = NewGUCNestLevel(); - RestrictSearchPath(); - } - else - { - heaprel = NULL; - /* Set these just to suppress "uninitialized variable" warnings */ - save_userid = InvalidOid; - save_sec_context = -1; - save_nestlevel = -1; - } + BTCallbackState *args = (BTCallbackState *) state; + bool heapkeyspace, + allequalimage; - /* - * Open the target index relations separately (like relation_openrv(), but - * with heap relation locked first to prevent deadlocking). In hot - * standby mode this will raise an error when parentcheck is true. - * - * There is no need for the usual indcheckxmin usability horizon test - * here, even in the heapallindexed case, because index undergoing - * verification only needs to have entries for a new transaction snapshot. - * (If this is a parentcheck verification, there is no question about - * committed or recently dead heap tuples lacking index entries due to - * concurrent activity.) - */ - indrel = index_open(indrelid, lockmode); - - /* - * Since we did the IndexGetRelation call above without any lock, it's - * barely possible that a race against an index drop/recreation could have - * netted us the wrong table. - */ - if (heaprel == NULL || heapid != IndexGetRelation(indrelid, false)) + if (!smgrexists(RelationGetSmgr(indrel), MAIN_FORKNUM)) ereport(ERROR, - (errcode(ERRCODE_UNDEFINED_TABLE), - errmsg("could not open parent table of index \"%s\"", + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\" lacks a main relation fork", RelationGetRelationName(indrel)))); - /* Relation suitable for checking as B-Tree? */ - btree_index_checkable(indrel); - - if (btree_index_mainfork_expected(indrel)) + /* Extract metadata from metapage, and sanitize it in passing */ + _bt_metaversion(indrel, &heapkeyspace, &allequalimage); + if (allequalimage && !heapkeyspace) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\" metapage has equalimage field set on unsupported nbtree version", + RelationGetRelationName(indrel)))); + if (allequalimage && !_bt_allequalimage(indrel, false)) { - bool heapkeyspace, - allequalimage; + bool has_interval_ops = false; - if (!smgrexists(RelationGetSmgr(indrel), MAIN_FORKNUM)) - ereport(ERROR, - (errcode(ERRCODE_INDEX_CORRUPTED), - errmsg("index \"%s\" lacks a main relation fork", - RelationGetRelationName(indrel)))); - - /* Extract metadata from metapage, and sanitize it in passing */ - _bt_metaversion(indrel, &heapkeyspace, &allequalimage); - if (allequalimage && !heapkeyspace) - ereport(ERROR, - (errcode(ERRCODE_INDEX_CORRUPTED), - errmsg("index \"%s\" metapage has equalimage field set on unsupported nbtree version", - RelationGetRelationName(indrel)))); - if (allequalimage && !_bt_allequalimage(indrel, false)) - { - bool has_interval_ops = false; - - for (int i = 0; i < IndexRelationGetNumberOfKeyAttributes(indrel); i++) - if (indrel->rd_opfamily[i] == INTERVAL_BTREE_FAM_OID) - has_interval_ops = true; - ereport(ERROR, - (errcode(ERRCODE_INDEX_CORRUPTED), - errmsg("index \"%s\" metapage incorrectly indicates that deduplication is safe", - RelationGetRelationName(indrel)), - has_interval_ops - ? errhint("This is known of \"interval\" indexes last built on a version predating 2023-11.") - : 0)); - } - - /* Check index, possibly against table it is an index on */ - bt_check_every_level(indrel, heaprel, heapkeyspace, parentcheck, - heapallindexed, rootdescend, checkunique); + for (int i = 0; i < IndexRelationGetNumberOfKeyAttributes(indrel); i++) + if (indrel->rd_opfamily[i] == INTERVAL_BTREE_FAM_OID) + { + has_interval_ops = true; + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\" metapage incorrectly indicates that deduplication is safe", + RelationGetRelationName(indrel)), + has_interval_ops + ? errhint("This is known of \"interval\" indexes last built on a version predating 2023-11.") + : 0)); + } } - /* Roll back any GUC changes executed by index functions */ - AtEOXact_GUC(false, save_nestlevel); - - /* Restore userid and security context */ - SetUserIdAndSecContext(save_userid, save_sec_context); - - /* - * Release locks early. That's ok here because nothing in the called - * routines will trigger shared cache invalidations to be sent, so we can - * relax the usual pattern of only releasing locks after commit. - */ - index_close(indrel, lockmode); - if (heaprel) - table_close(heaprel, lockmode); -} - -/* - * Basic checks about the suitability of a relation for checking as a B-Tree - * index. - * - * NB: Intentionally not checking permissions, the function is normally not - * callable by non-superusers. If granted, it's useful to be able to check a - * whole cluster. - */ -static inline void -btree_index_checkable(Relation rel) -{ - if (rel->rd_rel->relkind != RELKIND_INDEX || - rel->rd_rel->relam != BTREE_AM_OID) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("only B-Tree indexes are supported as targets for verification"), - errdetail("Relation \"%s\" is not a B-Tree index.", - RelationGetRelationName(rel)))); - - if (RELATION_IS_OTHER_TEMP(rel)) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("cannot access temporary tables of other sessions"), - errdetail("Index \"%s\" is associated with temporary relation.", - RelationGetRelationName(rel)))); - - if (!rel->rd_index->indisvalid) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("cannot check index \"%s\"", - RelationGetRelationName(rel)), - errdetail("Index is not valid."))); -} - -/* - * Check if B-Tree index relation should have a file for its main relation - * fork. Verification uses this to skip unlogged indexes when in hot standby - * mode, where there is simply nothing to verify. We behave as if the - * relation is empty. - * - * NB: Caller should call btree_index_checkable() before calling here. - */ -static inline bool -btree_index_mainfork_expected(Relation rel) -{ - if (rel->rd_rel->relpersistence != RELPERSISTENCE_UNLOGGED || - !RecoveryInProgress()) - return true; - - ereport(DEBUG1, - (errcode(ERRCODE_READ_ONLY_SQL_TRANSACTION), - errmsg("cannot verify unlogged index \"%s\" during recovery, skipping", - RelationGetRelationName(rel)))); - - return false; + /* Check index, possibly against table it is an index on */ + bt_check_every_level(indrel, heaprel, heapkeyspace, readonly, + args->heapallindexed, args->rootdescend, args->checkunique); } /* diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 1279b69422a5..b66affbea56e 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -194,6 +194,7 @@ BOOLEAN BOX BTArrayKeyInfo BTBuildState +BTCallbackState BTCycleId BTDedupInterval BTDedupState From bf5e2a058e3e4800b6472c345990776af5f8f22e Mon Sep 17 00:00:00 2001 From: Tomas Vondra Date: Fri, 28 Mar 2025 16:08:05 +0100 Subject: [PATCH 3/6] amcheck: Add gin_index_check() to verify GIN index MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The new functions validates two kinds of invariants on a GIN index: - parent-child consistency: Paths in a GIN graph have to contain consistent keys: tuples on parent pages consistently include tuples from children pages. That is, parent tuples must not require any adjustments. - balanced-tree / graph: Each internal page has at least one downlink, and can reference either only leaf pages or only internal pages. The GIN verification is based on work by Grigory Kryachko, reworked by Heikki Linnakangas and with various improvements by Andrey Borodin. Investigation and fixes for a couple bugs by Kirill Reshke. Author: Grigory Kryachko Author: Heikki Linnakangas Author: Andrey Borodin Reviewed-By: José Villanova Reviewed-By: Aleksander Alekseev Reviewed-By: Nikolay Samokhvalov Reviewed-By: Andres Freund Reviewed-By: Tomas Vondra Reviewed-By: Kirill Reshke Reviewed-By: Mark Dilger Reviewed-By: Peter Geoghegan Discussion: https://postgr.es/m/45AC9B0A-2B45-40EE-B08F-BDCF5739D1E1%40yandex-team.ru --- contrib/amcheck/Makefile | 6 +- contrib/amcheck/amcheck--1.4--1.5.sql | 14 + contrib/amcheck/amcheck.control | 2 +- contrib/amcheck/expected/check_gin.out | 64 ++ contrib/amcheck/meson.build | 3 + contrib/amcheck/sql/check_gin.sql | 40 ++ contrib/amcheck/verify_gin.c | 798 +++++++++++++++++++++++++ doc/src/sgml/amcheck.sgml | 20 + src/tools/pgindent/typedefs.list | 2 + 9 files changed, 946 insertions(+), 3 deletions(-) create mode 100644 contrib/amcheck/amcheck--1.4--1.5.sql create mode 100644 contrib/amcheck/expected/check_gin.out create mode 100644 contrib/amcheck/sql/check_gin.sql create mode 100644 contrib/amcheck/verify_gin.c diff --git a/contrib/amcheck/Makefile b/contrib/amcheck/Makefile index c3d70f3369cb..1b7a63cbaa40 100644 --- a/contrib/amcheck/Makefile +++ b/contrib/amcheck/Makefile @@ -4,14 +4,16 @@ MODULE_big = amcheck OBJS = \ $(WIN32RES) \ verify_common.o \ + verify_gin.o \ verify_heapam.o \ verify_nbtree.o EXTENSION = amcheck -DATA = amcheck--1.3--1.4.sql amcheck--1.2--1.3.sql amcheck--1.1--1.2.sql amcheck--1.0--1.1.sql amcheck--1.0.sql +DATA = amcheck--1.2--1.3.sql amcheck--1.1--1.2.sql amcheck--1.0--1.1.sql amcheck--1.0.sql \ + amcheck--1.3--1.4.sql amcheck--1.4--1.5.sql PGFILEDESC = "amcheck - function for verifying relation integrity" -REGRESS = check check_btree check_heap +REGRESS = check check_btree check_gin check_heap EXTRA_INSTALL = contrib/pg_walinspect TAP_TESTS = 1 diff --git a/contrib/amcheck/amcheck--1.4--1.5.sql b/contrib/amcheck/amcheck--1.4--1.5.sql new file mode 100644 index 000000000000..445c48ccb7d7 --- /dev/null +++ b/contrib/amcheck/amcheck--1.4--1.5.sql @@ -0,0 +1,14 @@ +/* contrib/amcheck/amcheck--1.4--1.5.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "ALTER EXTENSION amcheck UPDATE TO '1.5'" to load this file. \quit + + +-- gin_index_check() +-- +CREATE FUNCTION gin_index_check(index regclass) +RETURNS VOID +AS 'MODULE_PATHNAME', 'gin_index_check' +LANGUAGE C STRICT; + +REVOKE ALL ON FUNCTION gin_index_check(regclass) FROM PUBLIC; diff --git a/contrib/amcheck/amcheck.control b/contrib/amcheck/amcheck.control index e67ace01c995..c8ba6d7c9bc3 100644 --- a/contrib/amcheck/amcheck.control +++ b/contrib/amcheck/amcheck.control @@ -1,5 +1,5 @@ # amcheck extension comment = 'functions for verifying relation integrity' -default_version = '1.4' +default_version = '1.5' module_pathname = '$libdir/amcheck' relocatable = true diff --git a/contrib/amcheck/expected/check_gin.out b/contrib/amcheck/expected/check_gin.out new file mode 100644 index 000000000000..bbcde80e6279 --- /dev/null +++ b/contrib/amcheck/expected/check_gin.out @@ -0,0 +1,64 @@ +-- Test of index bulk load +SELECT setseed(1); + setseed +--------- + +(1 row) + +CREATE TABLE "gin_check"("Column1" int[]); +-- posting trees (frequently used entries) +INSERT INTO gin_check select array_agg(round(random()*255) ) from generate_series(1, 100000) as i group by i % 10000; +-- posting leaves (sparse entries) +INSERT INTO gin_check select array_agg(255 + round(random()*100)) from generate_series(1, 100) as i group by i % 100; +CREATE INDEX gin_check_idx on "gin_check" USING GIN("Column1"); +SELECT gin_index_check('gin_check_idx'); + gin_index_check +----------------- + +(1 row) + +-- cleanup +DROP TABLE gin_check; +-- Test index inserts +SELECT setseed(1); + setseed +--------- + +(1 row) + +CREATE TABLE "gin_check"("Column1" int[]); +CREATE INDEX gin_check_idx on "gin_check" USING GIN("Column1"); +ALTER INDEX gin_check_idx SET (fastupdate = false); +-- posting trees +INSERT INTO gin_check select array_agg(round(random()*255) ) from generate_series(1, 100000) as i group by i % 10000; +-- posting leaves +INSERT INTO gin_check select array_agg(100 + round(random()*255)) from generate_series(1, 100) as i group by i % 100; +SELECT gin_index_check('gin_check_idx'); + gin_index_check +----------------- + +(1 row) + +-- cleanup +DROP TABLE gin_check; +-- Test GIN over text array +SELECT setseed(1); + setseed +--------- + +(1 row) + +CREATE TABLE "gin_check_text_array"("Column1" text[]); +-- posting trees +INSERT INTO gin_check_text_array select array_agg(md5(round(random()*300)::text)::text) from generate_series(1, 100000) as i group by i % 10000; +-- posting leaves +INSERT INTO gin_check_text_array select array_agg(md5(round(random()*300 + 300)::text)::text) from generate_series(1, 10000) as i group by i % 100; +CREATE INDEX gin_check_text_array_idx on "gin_check_text_array" USING GIN("Column1"); +SELECT gin_index_check('gin_check_text_array_idx'); + gin_index_check +----------------- + +(1 row) + +-- cleanup +DROP TABLE gin_check_text_array; diff --git a/contrib/amcheck/meson.build b/contrib/amcheck/meson.build index 67a4ac8518d1..b33e8c9b062f 100644 --- a/contrib/amcheck/meson.build +++ b/contrib/amcheck/meson.build @@ -2,6 +2,7 @@ amcheck_sources = files( 'verify_common.c', + 'verify_gin.c', 'verify_heapam.c', 'verify_nbtree.c', ) @@ -25,6 +26,7 @@ install_data( 'amcheck--1.1--1.2.sql', 'amcheck--1.2--1.3.sql', 'amcheck--1.3--1.4.sql', + 'amcheck--1.4--1.5.sql', kwargs: contrib_data_args, ) @@ -36,6 +38,7 @@ tests += { 'sql': [ 'check', 'check_btree', + 'check_gin', 'check_heap', ], }, diff --git a/contrib/amcheck/sql/check_gin.sql b/contrib/amcheck/sql/check_gin.sql new file mode 100644 index 000000000000..bbd9b9f8281d --- /dev/null +++ b/contrib/amcheck/sql/check_gin.sql @@ -0,0 +1,40 @@ +-- Test of index bulk load +SELECT setseed(1); +CREATE TABLE "gin_check"("Column1" int[]); +-- posting trees (frequently used entries) +INSERT INTO gin_check select array_agg(round(random()*255) ) from generate_series(1, 100000) as i group by i % 10000; +-- posting leaves (sparse entries) +INSERT INTO gin_check select array_agg(255 + round(random()*100)) from generate_series(1, 100) as i group by i % 100; +CREATE INDEX gin_check_idx on "gin_check" USING GIN("Column1"); +SELECT gin_index_check('gin_check_idx'); + +-- cleanup +DROP TABLE gin_check; + +-- Test index inserts +SELECT setseed(1); +CREATE TABLE "gin_check"("Column1" int[]); +CREATE INDEX gin_check_idx on "gin_check" USING GIN("Column1"); +ALTER INDEX gin_check_idx SET (fastupdate = false); +-- posting trees +INSERT INTO gin_check select array_agg(round(random()*255) ) from generate_series(1, 100000) as i group by i % 10000; +-- posting leaves +INSERT INTO gin_check select array_agg(100 + round(random()*255)) from generate_series(1, 100) as i group by i % 100; + +SELECT gin_index_check('gin_check_idx'); + +-- cleanup +DROP TABLE gin_check; + +-- Test GIN over text array +SELECT setseed(1); +CREATE TABLE "gin_check_text_array"("Column1" text[]); +-- posting trees +INSERT INTO gin_check_text_array select array_agg(md5(round(random()*300)::text)::text) from generate_series(1, 100000) as i group by i % 10000; +-- posting leaves +INSERT INTO gin_check_text_array select array_agg(md5(round(random()*300 + 300)::text)::text) from generate_series(1, 10000) as i group by i % 100; +CREATE INDEX gin_check_text_array_idx on "gin_check_text_array" USING GIN("Column1"); +SELECT gin_index_check('gin_check_text_array_idx'); + +-- cleanup +DROP TABLE gin_check_text_array; diff --git a/contrib/amcheck/verify_gin.c b/contrib/amcheck/verify_gin.c new file mode 100644 index 000000000000..670f53637d47 --- /dev/null +++ b/contrib/amcheck/verify_gin.c @@ -0,0 +1,798 @@ +/*------------------------------------------------------------------------- + * + * verify_gin.c + * Verifies the integrity of GIN indexes based on invariants. + * + * + * GIN index verification checks a number of invariants: + * + * - consistency: Paths in GIN graph have to contain consistent keys: tuples + * on parent pages consistently include tuples from children pages. + * + * - graph invariants: Each internal page must have at least one downlink, and + * can reference either only leaf pages or only internal pages. + * + * + * Copyright (c) 2016-2025, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/amcheck/verify_gin.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/gin_private.h" +#include "access/nbtree.h" +#include "catalog/pg_am.h" +#include "utils/memutils.h" +#include "utils/rel.h" +#include "verify_common.h" +#include "string.h" + +/* + * GinScanItem represents one item of depth-first scan of the index. + */ +typedef struct GinScanItem +{ + int depth; + IndexTuple parenttup; + BlockNumber parentblk; + XLogRecPtr parentlsn; + BlockNumber blkno; + struct GinScanItem *next; +} GinScanItem; + +/* + * GinPostingTreeScanItem represents one item of a depth-first posting tree scan. + */ +typedef struct GinPostingTreeScanItem +{ + int depth; + ItemPointerData parentkey; + BlockNumber parentblk; + BlockNumber blkno; + struct GinPostingTreeScanItem *next; +} GinPostingTreeScanItem; + + +PG_FUNCTION_INFO_V1(gin_index_check); + +static void gin_check_parent_keys_consistency(Relation rel, + Relation heaprel, + void *callback_state, bool readonly); +static void check_index_page(Relation rel, Buffer buffer, BlockNumber blockNo); +static IndexTuple gin_refind_parent(Relation rel, + BlockNumber parentblkno, + BlockNumber childblkno, + BufferAccessStrategy strategy); +static ItemId PageGetItemIdCareful(Relation rel, BlockNumber block, Page page, + OffsetNumber offset); + +/* + * gin_index_check(index regclass) + * + * Verify integrity of GIN index. + * + * Acquires AccessShareLock on heap & index relations. + */ +Datum +gin_index_check(PG_FUNCTION_ARGS) +{ + Oid indrelid = PG_GETARG_OID(0); + + amcheck_lock_relation_and_check(indrelid, + GIN_AM_OID, + gin_check_parent_keys_consistency, + AccessShareLock, + NULL); + + PG_RETURN_VOID(); +} + +/* + * Read item pointers from leaf entry tuple. + * + * Returns a palloc'd array of ItemPointers. The number of items is returned + * in *nitems. + */ +static ItemPointer +ginReadTupleWithoutState(IndexTuple itup, int *nitems) +{ + Pointer ptr = GinGetPosting(itup); + int nipd = GinGetNPosting(itup); + ItemPointer ipd; + int ndecoded; + + if (GinItupIsCompressed(itup)) + { + if (nipd > 0) + { + ipd = ginPostingListDecode((GinPostingList *) ptr, &ndecoded); + if (nipd != ndecoded) + elog(ERROR, "number of items mismatch in GIN entry tuple, %d in tuple header, %d decoded", + nipd, ndecoded); + } + else + ipd = palloc(0); + } + else + { + ipd = (ItemPointer) palloc(sizeof(ItemPointerData) * nipd); + memcpy(ipd, ptr, sizeof(ItemPointerData) * nipd); + } + *nitems = nipd; + return ipd; +} + +/* + * Scans through a posting tree (given by the root), and verifies that the keys + * on a child keys are consistent with the parent. + * + * Allocates a separate memory context and scans through posting tree graph. + */ +static void +gin_check_posting_tree_parent_keys_consistency(Relation rel, BlockNumber posting_tree_root) +{ + BufferAccessStrategy strategy = GetAccessStrategy(BAS_BULKREAD); + GinPostingTreeScanItem *stack; + MemoryContext mctx; + MemoryContext oldcontext; + + int leafdepth; + + mctx = AllocSetContextCreate(CurrentMemoryContext, + "posting tree check context", + ALLOCSET_DEFAULT_SIZES); + oldcontext = MemoryContextSwitchTo(mctx); + + /* + * We don't know the height of the tree yet, but as soon as we encounter a + * leaf page, we will set 'leafdepth' to its depth. + */ + leafdepth = -1; + + /* Start the scan at the root page */ + stack = (GinPostingTreeScanItem *) palloc0(sizeof(GinPostingTreeScanItem)); + stack->depth = 0; + ItemPointerSetInvalid(&stack->parentkey); + stack->parentblk = InvalidBlockNumber; + stack->blkno = posting_tree_root; + + elog(DEBUG3, "processing posting tree at blk %u", posting_tree_root); + + while (stack) + { + GinPostingTreeScanItem *stack_next; + Buffer buffer; + Page page; + OffsetNumber i, + maxoff; + BlockNumber rightlink; + + CHECK_FOR_INTERRUPTS(); + + buffer = ReadBufferExtended(rel, MAIN_FORKNUM, stack->blkno, + RBM_NORMAL, strategy); + LockBuffer(buffer, GIN_SHARE); + page = (Page) BufferGetPage(buffer); + + Assert(GinPageIsData(page)); + + /* Check that the tree has the same height in all branches */ + if (GinPageIsLeaf(page)) + { + ItemPointerData minItem; + int nlist; + ItemPointerData *list; + char tidrange_buf[MAXPGPATH]; + + ItemPointerSetMin(&minItem); + + elog(DEBUG1, "page blk: %u, type leaf", stack->blkno); + + if (leafdepth == -1) + leafdepth = stack->depth; + else if (stack->depth != leafdepth) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\": internal pages traversal encountered leaf page unexpectedly on block %u", + RelationGetRelationName(rel), stack->blkno))); + list = GinDataLeafPageGetItems(page, &nlist, minItem); + + if (nlist > 0) + snprintf(tidrange_buf, sizeof(tidrange_buf), + "%d tids (%u, %u) - (%u, %u)", + nlist, + ItemPointerGetBlockNumberNoCheck(&list[0]), + ItemPointerGetOffsetNumberNoCheck(&list[0]), + ItemPointerGetBlockNumberNoCheck(&list[nlist - 1]), + ItemPointerGetOffsetNumberNoCheck(&list[nlist - 1])); + else + snprintf(tidrange_buf, sizeof(tidrange_buf), "0 tids"); + + if (stack->parentblk != InvalidBlockNumber) + elog(DEBUG3, "blk %u: parent %u highkey (%u, %u), %s", + stack->blkno, + stack->parentblk, + ItemPointerGetBlockNumberNoCheck(&stack->parentkey), + ItemPointerGetOffsetNumberNoCheck(&stack->parentkey), + tidrange_buf); + else + elog(DEBUG3, "blk %u: root leaf, %s", + stack->blkno, + tidrange_buf); + + if (stack->parentblk != InvalidBlockNumber && + ItemPointerGetOffsetNumberNoCheck(&stack->parentkey) != InvalidOffsetNumber && + nlist > 0 && ItemPointerCompare(&stack->parentkey, &list[nlist - 1]) < 0) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\": tid exceeds parent's high key in postingTree leaf on block %u", + RelationGetRelationName(rel), stack->blkno))); + } + else + { + LocationIndex pd_lower; + ItemPointerData bound; + int lowersize; + + /* + * Check that tuples in each page are properly ordered and + * consistent with parent high key + */ + maxoff = GinPageGetOpaque(page)->maxoff; + rightlink = GinPageGetOpaque(page)->rightlink; + + elog(DEBUG1, "page blk: %u, type data, maxoff %d", stack->blkno, maxoff); + + if (stack->parentblk != InvalidBlockNumber) + elog(DEBUG3, "blk %u: internal posting tree page with %u items, parent %u highkey (%u, %u)", + stack->blkno, maxoff, stack->parentblk, + ItemPointerGetBlockNumberNoCheck(&stack->parentkey), + ItemPointerGetOffsetNumberNoCheck(&stack->parentkey)); + else + elog(DEBUG3, "blk %u: root internal posting tree page with %u items", + stack->blkno, maxoff); + + /* + * A GIN posting tree internal page stores PostingItems in the + * 'lower' part of the page. The 'upper' part is unused. The + * number of elements is stored in the opaque area (maxoff). Make + * sure the size of the 'lower' part agrees with 'maxoff' + * + * We didn't set pd_lower until PostgreSQL version 9.4, so if this + * check fails, it could also be because the index was + * binary-upgraded from an earlier version. That was a long time + * ago, though, so let's warn if it doesn't match. + */ + pd_lower = ((PageHeader) page)->pd_lower; + lowersize = pd_lower - MAXALIGN(SizeOfPageHeaderData); + if ((lowersize - MAXALIGN(sizeof(ItemPointerData))) / sizeof(PostingItem) != maxoff) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\" has unexpected pd_lower %u in posting tree block %u with maxoff %u)", + RelationGetRelationName(rel), pd_lower, stack->blkno, maxoff))); + + /* + * Before the PostingItems, there's one ItemPointerData in the + * 'lower' part that stores the page's high key. + */ + bound = *GinDataPageGetRightBound(page); + + /* + * Gin page right bound has a sane value only when not a highkey on + * the rightmost page (at a given level). For the rightmost page does + * not store the highkey explicitly, and the value is infinity. + */ + if (ItemPointerIsValid(&stack->parentkey) && + rightlink != InvalidBlockNumber && + !ItemPointerEquals(&stack->parentkey, &bound)) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\": posting tree page's high key (%u, %u) doesn't match the downlink on block %u (parent blk %u, key (%u, %u))", + RelationGetRelationName(rel), + ItemPointerGetBlockNumberNoCheck(&bound), + ItemPointerGetOffsetNumberNoCheck(&bound), + stack->blkno, stack->parentblk, + ItemPointerGetBlockNumberNoCheck(&stack->parentkey), + ItemPointerGetOffsetNumberNoCheck(&stack->parentkey)))); + + for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) + { + GinPostingTreeScanItem *ptr; + PostingItem *posting_item = GinDataPageGetPostingItem(page, i); + + /* ItemPointerGetOffsetNumber expects a valid pointer */ + if (!(i == maxoff && + rightlink == InvalidBlockNumber)) + elog(DEBUG3, "key (%u, %u) -> %u", + ItemPointerGetBlockNumber(&posting_item->key), + ItemPointerGetOffsetNumber(&posting_item->key), + BlockIdGetBlockNumber(&posting_item->child_blkno)); + else + elog(DEBUG3, "key (%u, %u) -> %u", + 0, 0, BlockIdGetBlockNumber(&posting_item->child_blkno)); + + if (i == maxoff && rightlink == InvalidBlockNumber) + { + /* + * The rightmost item in the tree level has (0, 0) as the + * key + */ + if (ItemPointerGetBlockNumberNoCheck(&posting_item->key) != 0 || + ItemPointerGetOffsetNumberNoCheck(&posting_item->key) != 0) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\": rightmost posting tree page (blk %u) has unexpected last key (%u, %u)", + RelationGetRelationName(rel), + stack->blkno, + ItemPointerGetBlockNumberNoCheck(&posting_item->key), + ItemPointerGetOffsetNumberNoCheck(&posting_item->key)))); + } + else if (i != FirstOffsetNumber) + { + PostingItem *previous_posting_item = GinDataPageGetPostingItem(page, i - 1); + + if (ItemPointerCompare(&posting_item->key, &previous_posting_item->key) < 0) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\" has wrong tuple order in posting tree, block %u, offset %u", + RelationGetRelationName(rel), stack->blkno, i))); + } + + /* + * Check if this tuple is consistent with the downlink in the + * parent. + */ + if (stack->parentblk != InvalidBlockNumber && i == maxoff && + ItemPointerCompare(&stack->parentkey, &posting_item->key) < 0) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\": posting item exceeds parent's high key in postingTree internal page on block %u offset %u", + RelationGetRelationName(rel), + stack->blkno, i))); + + /* This is an internal page, recurse into the child. */ + ptr = (GinPostingTreeScanItem *) palloc(sizeof(GinPostingTreeScanItem)); + ptr->depth = stack->depth + 1; + + /* + * Set rightmost parent key to invalid iterm pointer. Its + * value is 'Infinity' and not explicitly stored. + */ + if (rightlink == InvalidBlockNumber) + ItemPointerSetInvalid(&ptr->parentkey); + else + ptr->parentkey = posting_item->key; + + ptr->parentblk = stack->blkno; + ptr->blkno = BlockIdGetBlockNumber(&posting_item->child_blkno); + ptr->next = stack->next; + stack->next = ptr; + } + } + LockBuffer(buffer, GIN_UNLOCK); + ReleaseBuffer(buffer); + + /* Step to next item in the queue */ + stack_next = stack->next; + pfree(stack); + stack = stack_next; + } + + MemoryContextSwitchTo(oldcontext); + MemoryContextDelete(mctx); +} + +/* + * Main entry point for GIN checks. + * + * Allocates memory context and scans through the whole GIN graph. + */ +static void +gin_check_parent_keys_consistency(Relation rel, + Relation heaprel, + void *callback_state, + bool readonly) +{ + BufferAccessStrategy strategy = GetAccessStrategy(BAS_BULKREAD); + GinScanItem *stack; + MemoryContext mctx; + MemoryContext oldcontext; + GinState state; + int leafdepth; + + mctx = AllocSetContextCreate(CurrentMemoryContext, + "amcheck consistency check context", + ALLOCSET_DEFAULT_SIZES); + oldcontext = MemoryContextSwitchTo(mctx); + initGinState(&state, rel); + + /* + * We don't know the height of the tree yet, but as soon as we encounter a + * leaf page, we will set 'leafdepth' to its depth. + */ + leafdepth = -1; + + /* Start the scan at the root page */ + stack = (GinScanItem *) palloc0(sizeof(GinScanItem)); + stack->depth = 0; + stack->parenttup = NULL; + stack->parentblk = InvalidBlockNumber; + stack->parentlsn = InvalidXLogRecPtr; + stack->blkno = GIN_ROOT_BLKNO; + + while (stack) + { + GinScanItem *stack_next; + Buffer buffer; + Page page; + OffsetNumber i, + maxoff, + prev_attnum; + XLogRecPtr lsn; + IndexTuple prev_tuple; + BlockNumber rightlink; + + CHECK_FOR_INTERRUPTS(); + + buffer = ReadBufferExtended(rel, MAIN_FORKNUM, stack->blkno, + RBM_NORMAL, strategy); + LockBuffer(buffer, GIN_SHARE); + page = (Page) BufferGetPage(buffer); + lsn = BufferGetLSNAtomic(buffer); + maxoff = PageGetMaxOffsetNumber(page); + rightlink = GinPageGetOpaque(page)->rightlink; + + /* Do basic sanity checks on the page headers */ + check_index_page(rel, buffer, stack->blkno); + + elog(DEBUG3, "processing entry tree page at blk %u, maxoff: %u", stack->blkno, maxoff); + + /* + * It's possible that the page was split since we looked at the + * parent, so that we didn't missed the downlink of the right sibling + * when we scanned the parent. If so, add the right sibling to the + * stack now. + */ + if (stack->parenttup != NULL) + { + GinNullCategory parent_key_category; + Datum parent_key = gintuple_get_key(&state, + stack->parenttup, + &parent_key_category); + ItemId iid = PageGetItemIdCareful(rel, stack->blkno, + page, maxoff); + IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid); + OffsetNumber attnum = gintuple_get_attrnum(&state, idxtuple); + GinNullCategory page_max_key_category; + Datum page_max_key = gintuple_get_key(&state, idxtuple, &page_max_key_category); + + if (rightlink != InvalidBlockNumber && + ginCompareEntries(&state, attnum, page_max_key, + page_max_key_category, parent_key, + parent_key_category) > 0) + { + /* split page detected, install right link to the stack */ + GinScanItem *ptr; + + elog(DEBUG3, "split detected for blk: %u, parent blk: %u", stack->blkno, stack->parentblk); + + ptr = (GinScanItem *) palloc(sizeof(GinScanItem)); + ptr->depth = stack->depth; + ptr->parenttup = CopyIndexTuple(stack->parenttup); + ptr->parentblk = stack->parentblk; + ptr->parentlsn = stack->parentlsn; + ptr->blkno = rightlink; + ptr->next = stack->next; + stack->next = ptr; + } + } + + /* Check that the tree has the same height in all branches */ + if (GinPageIsLeaf(page)) + { + if (leafdepth == -1) + leafdepth = stack->depth; + else if (stack->depth != leafdepth) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\": internal pages traversal encountered leaf page unexpectedly on block %u", + RelationGetRelationName(rel), stack->blkno))); + } + + /* + * Check that tuples in each page are properly ordered and consistent + * with parent high key + */ + prev_tuple = NULL; + prev_attnum = InvalidAttrNumber; + for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) + { + ItemId iid = PageGetItemIdCareful(rel, stack->blkno, page, i); + IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid); + OffsetNumber attnum = gintuple_get_attrnum(&state, idxtuple); + GinNullCategory prev_key_category; + Datum prev_key; + GinNullCategory current_key_category; + Datum current_key; + + if (MAXALIGN(ItemIdGetLength(iid)) != MAXALIGN(IndexTupleSize(idxtuple))) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\" has inconsistent tuple sizes, block %u, offset %u", + RelationGetRelationName(rel), stack->blkno, i))); + + current_key = gintuple_get_key(&state, idxtuple, ¤t_key_category); + + /* + * First block is metadata, skip order check. Also, never check + * for high key on rightmost page, as this key is not really + * stored explicitly. + * + * Also make sure to not compare entries for different attnums, which + * may be stored on the same page. + */ + if (i != FirstOffsetNumber && attnum == prev_attnum && stack->blkno != GIN_ROOT_BLKNO && + !(i == maxoff && rightlink == InvalidBlockNumber)) + { + prev_key = gintuple_get_key(&state, prev_tuple, &prev_key_category); + if (ginCompareEntries(&state, attnum, prev_key, + prev_key_category, current_key, + current_key_category) >= 0) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\" has wrong tuple order on entry tree page, block %u, offset %u, rightlink %u", + RelationGetRelationName(rel), stack->blkno, i, rightlink))); + } + + /* + * Check if this tuple is consistent with the downlink in the + * parent. + */ + if (stack->parenttup && + i == maxoff) + { + GinNullCategory parent_key_category; + Datum parent_key = gintuple_get_key(&state, + stack->parenttup, + &parent_key_category); + + if (ginCompareEntries(&state, attnum, current_key, + current_key_category, parent_key, + parent_key_category) > 0) + { + /* + * There was a discrepancy between parent and child + * tuples. We need to verify it is not a result of + * concurrent call of gistplacetopage(). So, lock parent + * and try to find downlink for current page. It may be + * missing due to concurrent page split, this is OK. + */ + pfree(stack->parenttup); + stack->parenttup = gin_refind_parent(rel, stack->parentblk, + stack->blkno, strategy); + + /* We found it - make a final check before failing */ + if (!stack->parenttup) + elog(NOTICE, "Unable to find parent tuple for block %u on block %u due to concurrent split", + stack->blkno, stack->parentblk); + else + { + parent_key = gintuple_get_key(&state, + stack->parenttup, + &parent_key_category); + + /* + * Check if it is properly adjusted. If succeed, + * procced to the next key. + */ + if (ginCompareEntries(&state, attnum, current_key, + current_key_category, parent_key, + parent_key_category) > 0) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\" has inconsistent records on page %u offset %u", + RelationGetRelationName(rel), stack->blkno, i))); + } + } + } + + /* If this is an internal page, recurse into the child */ + if (!GinPageIsLeaf(page)) + { + GinScanItem *ptr; + + ptr = (GinScanItem *) palloc(sizeof(GinScanItem)); + ptr->depth = stack->depth + 1; + /* last tuple in layer has no high key */ + if (i != maxoff && !GinPageGetOpaque(page)->rightlink) + ptr->parenttup = CopyIndexTuple(idxtuple); + else + ptr->parenttup = NULL; + ptr->parentblk = stack->blkno; + ptr->blkno = GinGetDownlink(idxtuple); + ptr->parentlsn = lsn; + ptr->next = stack->next; + stack->next = ptr; + } + /* If this item is a pointer to a posting tree, recurse into it */ + else if (GinIsPostingTree(idxtuple)) + { + BlockNumber rootPostingTree = GinGetPostingTree(idxtuple); + + gin_check_posting_tree_parent_keys_consistency(rel, rootPostingTree); + } + else + { + ItemPointer ipd; + int nipd; + + ipd = ginReadTupleWithoutState(idxtuple, &nipd); + + for (int j = 0; j < nipd; j++) + { + if (!OffsetNumberIsValid(ItemPointerGetOffsetNumber(&ipd[j]))) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\": posting list contains invalid heap pointer on block %u", + RelationGetRelationName(rel), stack->blkno))); + } + pfree(ipd); + } + + prev_tuple = CopyIndexTuple(idxtuple); + prev_attnum = attnum; + } + + LockBuffer(buffer, GIN_UNLOCK); + ReleaseBuffer(buffer); + + /* Step to next item in the queue */ + stack_next = stack->next; + if (stack->parenttup) + pfree(stack->parenttup); + pfree(stack); + stack = stack_next; + } + + MemoryContextSwitchTo(oldcontext); + MemoryContextDelete(mctx); +} + +/* + * Verify that a freshly-read page looks sane. + */ +static void +check_index_page(Relation rel, Buffer buffer, BlockNumber blockNo) +{ + Page page = BufferGetPage(buffer); + + /* + * ReadBuffer verifies that every newly-read page passes + * PageHeaderIsValid, which means it either contains a reasonably sane + * page header or is all-zero. We have to defend against the all-zero + * case, however. + */ + if (PageIsNew(page)) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\" contains unexpected zero page at block %u", + RelationGetRelationName(rel), + BufferGetBlockNumber(buffer)), + errhint("Please REINDEX it."))); + + /* + * Additionally check that the special area looks sane. + */ + if (PageGetSpecialSize(page) != MAXALIGN(sizeof(GinPageOpaqueData))) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\" contains corrupted page at block %u", + RelationGetRelationName(rel), + BufferGetBlockNumber(buffer)), + errhint("Please REINDEX it."))); + + if (GinPageIsDeleted(page)) + { + if (!GinPageIsLeaf(page)) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\" has deleted internal page %d", + RelationGetRelationName(rel), blockNo))); + if (PageGetMaxOffsetNumber(page) > InvalidOffsetNumber) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\" has deleted page %d with tuples", + RelationGetRelationName(rel), blockNo))); + } + else if (PageGetMaxOffsetNumber(page) > MaxIndexTuplesPerPage) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\" has page %d with exceeding count of tuples", + RelationGetRelationName(rel), blockNo))); +} + +/* + * Try to re-find downlink pointing to 'blkno', in 'parentblkno'. + * + * If found, returns a palloc'd copy of the downlink tuple. Otherwise, + * returns NULL. + */ +static IndexTuple +gin_refind_parent(Relation rel, BlockNumber parentblkno, + BlockNumber childblkno, BufferAccessStrategy strategy) +{ + Buffer parentbuf; + Page parentpage; + OffsetNumber o, + parent_maxoff; + IndexTuple result = NULL; + + parentbuf = ReadBufferExtended(rel, MAIN_FORKNUM, parentblkno, RBM_NORMAL, + strategy); + + LockBuffer(parentbuf, GIN_SHARE); + parentpage = BufferGetPage(parentbuf); + + if (GinPageIsLeaf(parentpage)) + { + UnlockReleaseBuffer(parentbuf); + return result; + } + + parent_maxoff = PageGetMaxOffsetNumber(parentpage); + for (o = FirstOffsetNumber; o <= parent_maxoff; o = OffsetNumberNext(o)) + { + ItemId p_iid = PageGetItemIdCareful(rel, parentblkno, parentpage, o); + IndexTuple itup = (IndexTuple) PageGetItem(parentpage, p_iid); + + if (ItemPointerGetBlockNumber(&(itup->t_tid)) == childblkno) + { + /* Found it! Make copy and return it */ + result = CopyIndexTuple(itup); + break; + } + } + + UnlockReleaseBuffer(parentbuf); + + return result; +} + +static ItemId +PageGetItemIdCareful(Relation rel, BlockNumber block, Page page, + OffsetNumber offset) +{ + ItemId itemid = PageGetItemId(page, offset); + + if (ItemIdGetOffset(itemid) + ItemIdGetLength(itemid) > + BLCKSZ - MAXALIGN(sizeof(GinPageOpaqueData))) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("line pointer points past end of tuple space in index \"%s\"", + RelationGetRelationName(rel)), + errdetail_internal("Index tid=(%u,%u) lp_off=%u, lp_len=%u lp_flags=%u.", + block, offset, ItemIdGetOffset(itemid), + ItemIdGetLength(itemid), + ItemIdGetFlags(itemid)))); + + /* + * Verify that line pointer isn't LP_REDIRECT or LP_UNUSED or LP_DEAD, + * since GIN never uses all three. Verify that line pointer has storage, + * too. + */ + if (ItemIdIsRedirected(itemid) || !ItemIdIsUsed(itemid) || + ItemIdIsDead(itemid) || ItemIdGetLength(itemid) == 0) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("invalid line pointer storage in index \"%s\"", + RelationGetRelationName(rel)), + errdetail_internal("Index tid=(%u,%u) lp_off=%u, lp_len=%u lp_flags=%u.", + block, offset, ItemIdGetOffset(itemid), + ItemIdGetLength(itemid), + ItemIdGetFlags(itemid)))); + + return itemid; +} diff --git a/doc/src/sgml/amcheck.sgml b/doc/src/sgml/amcheck.sgml index a12aa3abf01a..98f836e15e79 100644 --- a/doc/src/sgml/amcheck.sgml +++ b/doc/src/sgml/amcheck.sgml @@ -188,6 +188,26 @@ ORDER BY c.relpages DESC LIMIT 10; + + + + gin_index_check(index regclass) returns void + + gin_index_check + + + + + + gin_index_check tests that its target GIN index + has consistent parent-child tuples relations (no parent tuples + require tuple adjustement) and page graph respects balanced-tree + invariants (internal pages reference only leaf page or only internal + pages). + + + + diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index b66affbea56e..b66cecd87991 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -1053,8 +1053,10 @@ GinPageOpaque GinPageOpaqueData GinPlaceToPageRC GinPostingList +GinPostingTreeScanItem GinQualCounts GinScanEntry +GinScanItem GinScanKey GinScanOpaque GinScanOpaqueData From 9cfeaa74b6b9cfad83fb2265c99d4b78fba6f437 Mon Sep 17 00:00:00 2001 From: Tomas Vondra Date: Fri, 28 Mar 2025 16:53:31 +0100 Subject: [PATCH 4/6] amcheck: Add a test with GIN index on JSONB data Extend the existing test of GIN checks to also include an index on JSONB data, using the jsonb_path_ops opclass. This is a common enough usage of GIN that it makes sense to have better test coverage for it. Author: Mark Dilger Reviewed-By: Tomas Vondra Reviewed-By: Kirill Reshke Discussion: https://postgr.es/m/BC221A56-977C-418E-A1B8-9EFC881D80C5%40enterprisedb.com --- contrib/amcheck/expected/check_gin.out | 14 ++++++++++++++ contrib/amcheck/sql/check_gin.sql | 12 ++++++++++++ 2 files changed, 26 insertions(+) diff --git a/contrib/amcheck/expected/check_gin.out b/contrib/amcheck/expected/check_gin.out index bbcde80e6279..93147de0ef11 100644 --- a/contrib/amcheck/expected/check_gin.out +++ b/contrib/amcheck/expected/check_gin.out @@ -62,3 +62,17 @@ SELECT gin_index_check('gin_check_text_array_idx'); -- cleanup DROP TABLE gin_check_text_array; +-- Test GIN over jsonb +CREATE TABLE "gin_check_jsonb"("j" jsonb); +INSERT INTO gin_check_jsonb values ('{"a":[["b",{"x":1}],["b",{"x":2}]],"c":3}'); +INSERT INTO gin_check_jsonb values ('[[14,2,3]]'); +INSERT INTO gin_check_jsonb values ('[1,[14,2,3]]'); +CREATE INDEX "gin_check_jsonb_idx" on gin_check_jsonb USING GIN("j" jsonb_path_ops); +SELECT gin_index_check('gin_check_jsonb_idx'); + gin_index_check +----------------- + +(1 row) + +-- cleanup +DROP TABLE gin_check_jsonb; diff --git a/contrib/amcheck/sql/check_gin.sql b/contrib/amcheck/sql/check_gin.sql index bbd9b9f8281d..92ddbbc7a891 100644 --- a/contrib/amcheck/sql/check_gin.sql +++ b/contrib/amcheck/sql/check_gin.sql @@ -38,3 +38,15 @@ SELECT gin_index_check('gin_check_text_array_idx'); -- cleanup DROP TABLE gin_check_text_array; + +-- Test GIN over jsonb +CREATE TABLE "gin_check_jsonb"("j" jsonb); +INSERT INTO gin_check_jsonb values ('{"a":[["b",{"x":1}],["b",{"x":2}]],"c":3}'); +INSERT INTO gin_check_jsonb values ('[[14,2,3]]'); +INSERT INTO gin_check_jsonb values ('[1,[14,2,3]]'); +CREATE INDEX "gin_check_jsonb_idx" on gin_check_jsonb USING GIN("j" jsonb_path_ops); + +SELECT gin_index_check('gin_check_jsonb_idx'); + +-- cleanup +DROP TABLE gin_check_jsonb; From b43ee5a8a8a5e6f8099b4077f5a4e0b6ae85551b Mon Sep 17 00:00:00 2001 From: Tomas Vondra Date: Fri, 28 Mar 2025 17:05:18 +0100 Subject: [PATCH 5/6] amcheck: Add a GIN index to the CREATE INDEX CONCURRENTLY tests The existing CREATE INDEX CONCURRENTLY tests checking only B-Tree, but can be cheaply extended to also check GIN. This helps increasing test coverage for GIN amcheck, especially related to handling concurrent page splits and posting list trees. This already helped to identify several issues during development of the GIN amcheck support. Author: Mark Dilger Reviewed-By: Tomas Vondra Reviewed-By: Kirill Reshke Discussion: https://postgr.es/m/BC221A56-977C-418E-A1B8-9EFC881D80C5%40enterprisedb.com --- contrib/amcheck/t/002_cic.pl | 10 +++++--- contrib/amcheck/t/003_cic_2pc.pl | 40 ++++++++++++++++++++++++++------ 2 files changed, 40 insertions(+), 10 deletions(-) diff --git a/contrib/amcheck/t/002_cic.pl b/contrib/amcheck/t/002_cic.pl index 0b6a5a9e4641..6a0c4f611258 100644 --- a/contrib/amcheck/t/002_cic.pl +++ b/contrib/amcheck/t/002_cic.pl @@ -21,8 +21,9 @@ 'lock_timeout = ' . (1000 * $PostgreSQL::Test::Utils::timeout_default)); $node->start; $node->safe_psql('postgres', q(CREATE EXTENSION amcheck)); -$node->safe_psql('postgres', q(CREATE TABLE tbl(i int))); +$node->safe_psql('postgres', q(CREATE TABLE tbl(i int, j jsonb))); $node->safe_psql('postgres', q(CREATE INDEX idx ON tbl(i))); +$node->safe_psql('postgres', q(CREATE INDEX ginidx ON tbl USING gin(j))); # # Stress CIC with pgbench. @@ -40,13 +41,13 @@ { '002_pgbench_concurrent_transaction' => q( BEGIN; - INSERT INTO tbl VALUES(0); + INSERT INTO tbl VALUES(0, '{"a":[["b",{"x":1}],["b",{"x":2}]],"c":3}'); COMMIT; ), '002_pgbench_concurrent_transaction_savepoints' => q( BEGIN; SAVEPOINT s1; - INSERT INTO tbl VALUES(0); + INSERT INTO tbl VALUES(0, '[[14,2,3]]'); COMMIT; ), '002_pgbench_concurrent_cic' => q( @@ -54,7 +55,10 @@ \if :gotlock DROP INDEX CONCURRENTLY idx; CREATE INDEX CONCURRENTLY idx ON tbl(i); + DROP INDEX CONCURRENTLY ginidx; + CREATE INDEX CONCURRENTLY ginidx ON tbl USING gin(j); SELECT bt_index_check('idx',true); + SELECT gin_index_check('ginidx'); SELECT pg_advisory_unlock(42); \endif ) diff --git a/contrib/amcheck/t/003_cic_2pc.pl b/contrib/amcheck/t/003_cic_2pc.pl index 9134487f3b49..00a446a381fa 100644 --- a/contrib/amcheck/t/003_cic_2pc.pl +++ b/contrib/amcheck/t/003_cic_2pc.pl @@ -25,7 +25,7 @@ 'lock_timeout = ' . (1000 * $PostgreSQL::Test::Utils::timeout_default)); $node->start; $node->safe_psql('postgres', q(CREATE EXTENSION amcheck)); -$node->safe_psql('postgres', q(CREATE TABLE tbl(i int))); +$node->safe_psql('postgres', q(CREATE TABLE tbl(i int, j jsonb))); # @@ -41,7 +41,7 @@ $main_h->query_safe( q( BEGIN; -INSERT INTO tbl VALUES(0); +INSERT INTO tbl VALUES(0, '[[14,2,3]]'); )); my $cic_h = $node->background_psql('postgres'); @@ -50,6 +50,7 @@ qr/start/, q( \echo start CREATE INDEX CONCURRENTLY idx ON tbl(i); +CREATE INDEX CONCURRENTLY ginidx ON tbl USING gin(j); )); $main_h->query_safe( @@ -60,7 +61,7 @@ $main_h->query_safe( q( BEGIN; -INSERT INTO tbl VALUES(0); +INSERT INTO tbl VALUES(0, '[[14,2,3]]'); )); $node->safe_psql('postgres', q(COMMIT PREPARED 'a';)); @@ -69,7 +70,7 @@ q( PREPARE TRANSACTION 'b'; BEGIN; -INSERT INTO tbl VALUES(0); +INSERT INTO tbl VALUES(0, '"mary had a little lamb"'); )); $node->safe_psql('postgres', q(COMMIT PREPARED 'b';)); @@ -86,6 +87,9 @@ $result = $node->psql('postgres', q(SELECT bt_index_check('idx',true))); is($result, '0', 'bt_index_check after overlapping 2PC'); +$result = $node->psql('postgres', q(SELECT gin_index_check('ginidx'))); +is($result, '0', 'gin_index_check after overlapping 2PC'); + # # Server restart shall not change whether prepared xact blocks CIC @@ -94,7 +98,7 @@ $node->safe_psql( 'postgres', q( BEGIN; -INSERT INTO tbl VALUES(0); +INSERT INTO tbl VALUES(0, '{"a":[["b",{"x":1}],["b",{"x":2}]],"c":3}'); PREPARE TRANSACTION 'spans_restart'; BEGIN; CREATE TABLE unused (); @@ -108,12 +112,16 @@ \echo start DROP INDEX CONCURRENTLY idx; CREATE INDEX CONCURRENTLY idx ON tbl(i); +DROP INDEX CONCURRENTLY ginidx; +CREATE INDEX CONCURRENTLY ginidx ON tbl USING gin(j); )); $node->safe_psql('postgres', "COMMIT PREPARED 'spans_restart'"); $reindex_h->quit; $result = $node->psql('postgres', q(SELECT bt_index_check('idx',true))); is($result, '0', 'bt_index_check after 2PC and restart'); +$result = $node->psql('postgres', q(SELECT gin_index_check('ginidx'))); +is($result, '0', 'gin_index_check after 2PC and restart'); # @@ -136,14 +144,14 @@ { '003_pgbench_concurrent_2pc' => q( BEGIN; - INSERT INTO tbl VALUES(0); + INSERT INTO tbl VALUES(0,'null'); PREPARE TRANSACTION 'c:client_id'; COMMIT PREPARED 'c:client_id'; ), '003_pgbench_concurrent_2pc_savepoint' => q( BEGIN; SAVEPOINT s1; - INSERT INTO tbl VALUES(0); + INSERT INTO tbl VALUES(0,'[false, "jnvaba", -76, 7, {"_": [1]}, 9]'); PREPARE TRANSACTION 'c:client_id'; COMMIT PREPARED 'c:client_id'; ), @@ -163,7 +171,25 @@ SELECT bt_index_check('idx',true); SELECT pg_advisory_unlock(42); \endif + ), + '005_pgbench_concurrent_cic' => q( + SELECT pg_try_advisory_lock(42)::integer AS gotginlock \gset + \if :gotginlock + DROP INDEX CONCURRENTLY ginidx; + CREATE INDEX CONCURRENTLY ginidx ON tbl USING gin(j); + SELECT gin_index_check('ginidx'); + SELECT pg_advisory_unlock(42); + \endif + ), + '006_pgbench_concurrent_ric' => q( + SELECT pg_try_advisory_lock(42)::integer AS gotginlock \gset + \if :gotginlock + REINDEX INDEX CONCURRENTLY ginidx; + SELECT gin_index_check('ginidx'); + SELECT pg_advisory_unlock(42); + \endif ) + }); $node->stop; From 48143f4840be193cf92e9842abfc00820c3da786 Mon Sep 17 00:00:00 2001 From: Mark Dilger Date: Fri, 21 Feb 2025 12:11:07 -0800 Subject: [PATCH 6/6] Stress test verify_gin() using pgbench Add a tap test which inserts, updates, deletes, and checks in parallel. Like all pgbench based tap tests, this test contains race conditions between the operations, so Your Mileage May Vary. For me, on my laptop, I got failures like: index "ginidx" has wrong tuple order on entry tree page which I have not yet investigated. The test is included here for anybody interested in debugging this failure. --- contrib/amcheck/t/006_gin_concurrency.pl | 196 +++++++++++++++++++++++ 1 file changed, 196 insertions(+) create mode 100644 contrib/amcheck/t/006_gin_concurrency.pl diff --git a/contrib/amcheck/t/006_gin_concurrency.pl b/contrib/amcheck/t/006_gin_concurrency.pl new file mode 100644 index 000000000000..afc67940d4dd --- /dev/null +++ b/contrib/amcheck/t/006_gin_concurrency.pl @@ -0,0 +1,196 @@ + +# Copyright (c) 2021-2025, PostgreSQL Global Development Group + +use strict; +use warnings FATAL => 'all'; + +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; + +use Test::More; + +my $node; + +# +# Test set-up +# +$node = PostgreSQL::Test::Cluster->new('test'); +$node->init; +$node->append_conf('postgresql.conf', + 'lock_timeout = ' . (1000 * $PostgreSQL::Test::Utils::timeout_default)); +$node->start; +$node->safe_psql('postgres', q(CREATE EXTENSION amcheck)); +$node->safe_psql('postgres', q(CREATE TABLE tbl(i integer[], j jsonb, k jsonb))); +$node->safe_psql('postgres', q(CREATE INDEX ginidx ON tbl USING gin(i, j, k))); +$node->safe_psql('postgres', q(CREATE TABLE jsondata (i serial, j jsonb))); +$node->safe_psql('postgres', q(INSERT INTO jsondata (j) VALUES + ('1'), + ('91'), + ('[5]'), + ('true'), + ('"zxI"'), + ('[1, 7]'), + ('["", 4]'), + ('"utDFBz"'), + ('[[9], ""]'), + ('"eCvxKPML"'), + ('["1VMQNQM"]'), + ('{"": "562c"}'), + ('[58, 8, null]'), + ('{"": {"": 62}}'), + ('["", 6, 19, ""]'), + ('{"ddfWTQ": true}'), + ('["", 734.2, 9, 5]'), + ('"GMV27mjtuuqmlltw"'), + ('{"dabe": -5, "": 6}'), + ('"hgihykirQGIYTcCA30"'), + ('[9, {"Utrn": -6}, ""]'), + ('"BJTZUMST1_WWEgyqgka_"'), + ('["", -4, "", [-2], -47]'), + ('{"": [3], "": {"": "y"}}'), + ('{"myuijj": "YUWIUZXXLGS"}'), + ('{"3": false, "C": "1sHTX"}'), + ('"ZGUORVDE_ACF1QXJ_hipgwrks"'), + ('{"072": [3, -4], "oh": "eL"}'), + ('[{"de": 9, "JWHPMRZJW": [0]}]'), + ('"EACJUZEBAFFBEE6706SZLWVGO635"'), + ('["P", {"TZW": [""]}, {"": [0]}]'), + ('{"": -6, "YMb": -22, "__": [""]}'), + ('{"659": [8], "bfc": [0], "V": ""}'), + ('{"8776": "1tryl", "Q": 2, "": 4.6}'), + ('[[1], "", 9, 0, [1, 0], -1, 0, "C"]'), + ('"635321pnpjlfFzhGTIYP9265iA_19D8260"'), + ('"klmxsoCFDtzxrhotsqlnmvmzlcbdde34twj"'), + ('"GZSXSZVS19ecbe_ZJJED0379c1j9_GSU9167"'), + ('{"F18s": {"": -84194}, "ececab2": [""]}'), + ('["", {"SVAvgg": "Q"}, 1, 9, "gypy", [1]]'), + ('[[""], {"": 5}, "GVZGGVGSWM", 2, ["", 8]]'), + ('{"V": 8, "TPNL": [826, null], "4": -9.729}'), + ('{"HTJP_DAptxn6": 9, "": "r", "hji4124": ""}'), + ('[1, ["9", 5, 6, ""], {"": "", "": "efb"}, 7]'), + ('{"": 6, "1251e_cajrgkyzuxBEDM017444EFD": 548}'), + ('{"853": -60, "TGLUG_jxmrggv": null, "pjx": ""}'), + ('[0, "wsgnnvCfJVV_KOMLVXOUIS9FIQLPXXBbbaohjrpj"]'), + ('"nizvkl36908OLW22ecbdeEBMHMiCEEACcikwkjpmu30X_m"'), + ('{"bD24eeVZWY": 1, "Bt": 9, "": 6052, "FT": ["h"]}'), + ('"CDBnouyzlAMSHJCtguxxizpzgkNYfaNLURVITNLYVPSNLYNy"'), + ('{"d": [[4, "N"], null, 6, true], "1PKV": 6, "9": 6}'), + ('[-7326, [83, 55], -63, [0, {"": 1}], {"ri0": false}]'), + ('{"": 117.38, "FCkx3608szztpvjolomzvlyrshyvrgz": -4.2}'), + ('["", 8, {"WXHNG": {"6": 4}}, [null], 7, 2, "", 299, 6]'), + ('[[-992.2, "TPm", "", "cedeff79BD8", "t", [1]], 0, [-7]]'), + ('[9, 34, ["LONuyiYGQZ"], [7, 88], ["c"], 1, 6, "", [[2]]]'), + ('[20, 5, null, "eLHTXRWNV", 8, ["pnpvrum", -3], "FINY", 3]'), + ('[{"": "", "b": 2, "d": "egu"}, "aPNK", 2, 9, {"": -79946}]'), + ('[1, {"769": 9}, 5, 9821, 22, 0, 2.7, 5, 4, 191, 54.599, 24]'), + ('["c", 77, "b_0lplvHJNLMxw", "VN76dhFadaafadfe5dfbco", false]'), + ('"TYIHXebbPK_86QMP_199bEEIS__8205986vdC_CFAEFBFCEFCJQRHYoqztv"'), + ('"cdmxxxzrhtxpwuyrxinmhb5577NSPHIHMTPQYTXSUVVGJPUUMCBEDb_1569e"'), + ('[[5, null, "C"], "ORNR", "mnCb", 1, -800, "6953", ["K", 0], ""]'), + ('"SSKLTHJxjxywwquhiwsde353eCIJJjkyvn9946c2cdVadcboiyZFAYMHJWGMMT"'), + ('"5185__D5AtvhizvmEVceF3jxtghlCF0789_owmsztJHRMOJ7rlowxqq51XLXJbF"'), + ('{"D": 565206, "xupqtmfedff": "ZGJN9", "9": 1, "glzv": -47, "": -8}'), + ('{"": 9, "": {"": [null], "ROP": 842}, "": ["5FFD", 7, 5, 1, 94, 1]}'), + ('{"JLn": ["8s"], "": "_ahxizrzhivyzvhr", "XSAt": 5, "P": 2838, "": 5}'), + ('[51, 3, {"": 9, "": -9, "": [[6]]}, 7, 7, {"": 0}, "TXLQL", 7.6, [7]]'), + ('[-38.7, "kre40", 5, {"": null}, "tvuv", 8, "", "", "uizygprwwvh", "1"]'), + ('"z934377_nxmzjnuqglgyukjteefeihjyot1irkvwnnrqinptlpzwjgmkjbQMUVxxwvbdz"'), + ('[165.9, "dAFD_60JQPYbafh", false, {"": 6, "": "fcfd"}, [[2], "c"], 4, 2]'), + ('"ffHOOPVSSACDqiyeecTNWJMWPNRXU283aHRXNUNZZZQPUGYSQTTQXQVJM5eeafcIPGIHcac"'), + ('[2, 8, -53, {"": 5}, "F9", 8, "SGUJPNVI", "7OLOZH", 9.84, {"": 6}, 207, 6]'), + ('"xqmqmyljhq__ZGWJVNefagsxrsktruhmlinhxloupuVQW0804901NKGGMNNSYYXWQOosz8938"'), + ('{"FEoLfaab1160167": {"L": [42, 0]}, "938": "FCCUPGYYYMQSQVZJKM", "knqmk": 2}'), + ('"0igyurmOMSXIYHSZQEAcxlvgqdxkhwtrbaabfaaMC138Z_BDRLrythpi30_MPRXMTOILRLswmoy"'), + ('"1129BBCABFFAACA9VGVKipnwohaccc9TSIMTOQKHmcGYVeFE_PWKLHmpyj60137672qugtsstugg"'), + ('"D3BDA069074174vx48A37IVHWVXLUP9382542ypsl1465pixtryzCBgrkkhrvCC_BDDFatkyXHLIe"'), + ('[{"esx7": -53, "ec60834YGVMYoXAAvgxmmqnojyzmiklhdovFipl": 2, "os": 66433}, 9.13]'), + ('{"": ["", 4, null, 5, null], "": "3", "5_GMMHTIhPB_F_vsebc1": "Er", "GY": 121.32}'), + ('["krTVPYDEd", 5, 8, [6, -6], [[-9], 3340, [[""]]], "", 5, [6, true], 3, "", 1, ""]'), + ('{"rBNPKN8446080wruOLeceaCBDCKWNUYYMONSJUlCDFExr": {"": "EE0", "6826": 5, "": 7496}}'), + ('[3, {"": -8}, "101dboMVSNKZLVPITLHLPorwwuxxjmjsh", "", "LSQPRVYKWVYK945imrh", 4, 51]'), + ('[["HY6"], "", "bcdB", [2, [85, 1], 3, 3, 3, [8]], "", ["_m"], "2", -33, 8, 3, "_xwj"]'), + ('["", 0, -3.7, 8, false, null, {"": 5}, 9, "06FccxFcdb283bbZGGVRSMWLJH2_PBAFpwtkbceto"]'), + ('[52, "", -39, -7, [1], "c", {"": 9, "": 45528, "G": {"": 7}}, 3, false, 0, "EB", 8, -6]'), + ('"qzrkvrlG78CCCEBCptzwwok808805243QXVSYed3efZSKLSNXPxhrS357KJMWSKgrfcFFDFDWKSXJJSIJ_yqJu"'), + ('[43, 8, {"": ""}, "uwtv__HURKGJLGGPPW", 9, 66, "yqrvghxuw", {"J": false}, false, 2, 0, 4]'), + ('[{"UVL": 7, "": 1}, false, [6, "H"], "boxlgqgm", 3, "znhm", [true], 0, ["e", 3.7], 9, 9.4]'), + ('{"825634870117somzqw": 1, "": [5], "gYH": "_XT", "b22412631709RZP": 3, "": "", "FDB": [""]}'), + ('[8, ["_bae"], "", "WN", 80, {"o": 2, "aff": 16}, false, true, 4, 6, {"nutzkzikolsxZRQ": 30}]'), + ('["588BD9c_xzsn", {"k": 0, "_Ecezlkslrwvjpwrukiqzl": 3, "Ej": "4"}, "TUXwghn1dTNRXJZpswmD", 5]'), + ('[{"dC": 7}, {"": 1, "4": 41, "": "", "": "adKS"}, {"": "ypv"}, 6, 9, 2, [-61.46], [1, 3.9], 2]'), + ('{"8": 8, "": -364, "855": -238.1, "zj": 9, "SNHJG413": 3, "UMNVI73": [60, 0], "iwvqse": -1.833}'), + ('"VTUKMLZKQPHIEniCFZ_cjrhvspxzulvxhqykjzmrw89OGOGISWdcrvpOPLOFALGK809896999xzqnkm63254_xrmcfcedb"'), + ('["", "USNQbcexyFDCdBAFWJIphloxwytplyZZR008400FmoiYXVYOHVGV79795644463Aug_aeoDDEjzoziisxoykuijhz"]'), + ('{"": 1, "5abB58gXVQVTTMWU3jSHXMMNV": "", "nv": 934, "kjsnhtj": 8, "": [{"xm": [71, 425]}], "": -9}'), + ('"__oliqCcbwwyqmtECsqivplcb1NTMOQRZTYRJONOIPWNHKWLJRIHKROMJNZLNGTTKRcedebccdbMTQXSzhynxmllqxuhnxBA_"'), + ('["thgACBWGNGMkFFEA", [0, -1349, {"18": "RM", "F3": 6, "dP": "_AF"}, 64, 0, {"f": [8]}], 5, [[0]], 2]') +)); + +# +# Stress gin with pgbench. +# +# Modify the table data, and hence the index data, from multiple process +# while from other processes run the index checking code. This should, +# if the index is large enough, result in the checks performing across +# concurrent page splits. +# +$node->pgbench( + '--no-vacuum --client=20 --transactions=5000', + 0, + [qr{actually processed}], + [qr{^$}], + 'concurrent DML and index checking', + { + '006_gin_concurrency_insert_1' => q( + INSERT INTO tbl (i, j, k) + (SELECT ARRAY[x.i, y.i, random(0,100000), random(0,100000)], x.j, y.j + FROM jsondata x, jsondata y + WHERE x.i = random(1,100) + AND y.i = random(1,100) + ) + ), + '006_gin_concurrency_insert_2' => q( + INSERT INTO tbl (i, j, k) + (SELECT gs.i, j.j, j.j || j.j + FROM jsondata j, + (SELECT array_agg(gs) AS i FROM generate_series(random(0,100), random(101,200)) gs) gs + WHERE j.i = random(1,100) + ) + ), + '006_gin_concurrency_insert_nulls' => q( + INSERT INTO tbl (i, j, k) VALUES + (null, null, null), + (null, null, '[]'), + (null, '[]', null), + (ARRAY[]::INTEGER[], null, null), + (null, '[]', '[]'), + (ARRAY[]::INTEGER[], '[]', null), + (ARRAY[]::INTEGER[], '[]', '[]') + ), + '006_gin_concurrency_update_i' => q( + UPDATE tbl + SET i = (SELECT i || i FROM tbl ORDER BY random() LIMIT 1) + WHERE j = (SELECT j FROM tbl ORDER BY random() LIMIT 1); + ), + '006_gin_concurrency_update_j' => q( + UPDATE tbl + SET j = (SELECT j || j FROM tbl ORDER BY random() LIMIT 1) + WHERE k = (SELECT k FROM tbl ORDER BY random() LIMIT 1); + ), + '006_gin_concurrency_update_k' => q( + UPDATE tbl + SET k = (SELECT k || k FROM tbl ORDER BY random() LIMIT 1) + WHERE i = (SELECT i FROM tbl ORDER BY random() LIMIT 1); + ), + '006_gin_concurrency_delete' => q( + DELETE FROM tbl + WHERE random(1,5) = 3; + ), + '006_gin_concurrency_gin_index_check' => q( + SELECT gin_index_check('ginidx'); + ) + }); + +$node->stop; +done_testing(); +