diff options
27 files changed, 723 insertions, 268 deletions
diff --git a/contrib/ltree/lquery_op.c b/contrib/ltree/lquery_op.c index a6466f575fd..0b39d64a839 100644 --- a/contrib/ltree/lquery_op.c +++ b/contrib/ltree/lquery_op.c @@ -41,7 +41,8 @@ getlexeme(char *start, char *end, int *len) } bool -compare_subnode(ltree_level *t, char *qn, int len, int (*cmpptr) (const char *, const char *, size_t), bool anyend) +compare_subnode(ltree_level *t, char *qn, int len, + ltree_prefix_eq_func prefix_eq, bool anyend) { char *endt = t->name + t->len; char *endq = qn + len; @@ -57,7 +58,7 @@ compare_subnode(ltree_level *t, char *qn, int len, int (*cmpptr) (const char *, while ((tn = getlexeme(tn, endt, &lent)) != NULL) { if ((lent == lenq || (lent > lenq && anyend)) && - (*cmpptr) (qn, tn, lenq) == 0) + (*prefix_eq) (qn, lenq, tn, lent)) { isok = true; @@ -74,14 +75,29 @@ compare_subnode(ltree_level *t, char *qn, int len, int (*cmpptr) (const char *, return true; } -int -ltree_strncasecmp(const char *a, const char *b, size_t s) +/* + * Check if 'a' is a prefix of 'b'. + */ +bool +ltree_prefix_eq(const char *a, size_t a_sz, const char *b, size_t b_sz) +{ + if (a_sz > b_sz) + return false; + else + return (strncmp(a, b, a_sz) == 0); +} + +/* + * Case-insensitive check if 'a' is a prefix of 'b'. + */ +bool +ltree_prefix_eq_ci(const char *a, size_t a_sz, const char *b, size_t b_sz) { - char *al = str_tolower(a, s, DEFAULT_COLLATION_OID); - char *bl = str_tolower(b, s, DEFAULT_COLLATION_OID); - int res; + char *al = str_tolower(a, a_sz, DEFAULT_COLLATION_OID); + char *bl = str_tolower(b, b_sz, DEFAULT_COLLATION_OID); + bool res; - res = strncmp(al, bl, s); + res = (strncmp(al, bl, a_sz) == 0); pfree(al); pfree(bl); @@ -109,19 +125,19 @@ checkLevel(lquery_level *curq, ltree_level *curt) for (int i = 0; i < curq->numvar; i++) { - int (*cmpptr) (const char *, const char *, size_t); + ltree_prefix_eq_func prefix_eq; - cmpptr = (curvar->flag & LVAR_INCASE) ? ltree_strncasecmp : strncmp; + prefix_eq = (curvar->flag & LVAR_INCASE) ? ltree_prefix_eq_ci : ltree_prefix_eq; if (curvar->flag & LVAR_SUBLEXEME) { - if (compare_subnode(curt, curvar->name, curvar->len, cmpptr, + if (compare_subnode(curt, curvar->name, curvar->len, prefix_eq, (curvar->flag & LVAR_ANYEND))) return success; } else if ((curvar->len == curt->len || (curt->len > curvar->len && (curvar->flag & LVAR_ANYEND))) && - (*cmpptr) (curvar->name, curt->name, curvar->len) == 0) + (*prefix_eq) (curvar->name, curvar->len, curt->name, curt->len)) return success; curvar = LVAR_NEXT(curvar); diff --git a/contrib/ltree/ltree.h b/contrib/ltree/ltree.h index 5e0761641d3..78478dec173 100644 --- a/contrib/ltree/ltree.h +++ b/contrib/ltree/ltree.h @@ -157,6 +157,8 @@ typedef struct char data[FLEXIBLE_ARRAY_MEMBER]; } ltxtquery; +typedef bool (*ltree_prefix_eq_func) (const char *, size_t, const char *, size_t); + #define HDRSIZEQT MAXALIGN(VARHDRSZ + sizeof(int32)) #define COMPUTESIZE(size,lenofoperand) ( HDRSIZEQT + (size) * sizeof(ITEM) + (lenofoperand) ) #define LTXTQUERY_TOO_BIG(size,lenofoperand) \ @@ -208,9 +210,10 @@ bool ltree_execute(ITEM *curitem, void *checkval, int ltree_compare(const ltree *a, const ltree *b); bool inner_isparent(const ltree *c, const ltree *p); bool compare_subnode(ltree_level *t, char *qn, int len, - int (*cmpptr) (const char *, const char *, size_t), bool anyend); + ltree_prefix_eq_func prefix_eq, bool anyend); ltree *lca_inner(ltree **a, int len); -int ltree_strncasecmp(const char *a, const char *b, size_t s); +bool ltree_prefix_eq(const char *a, size_t a_sz, const char *b, size_t b_sz); +bool ltree_prefix_eq_ci(const char *a, size_t a_sz, const char *b, size_t b_sz); /* fmgr macros for ltree objects */ #define DatumGetLtreeP(X) ((ltree *) PG_DETOAST_DATUM(X)) diff --git a/contrib/ltree/ltxtquery_op.c b/contrib/ltree/ltxtquery_op.c index 002102c9c75..3dcbab2c484 100644 --- a/contrib/ltree/ltxtquery_op.c +++ b/contrib/ltree/ltxtquery_op.c @@ -58,19 +58,19 @@ checkcondition_str(void *checkval, ITEM *val) ltree_level *level = LTREE_FIRST(((CHKVAL *) checkval)->node); int tlen = ((CHKVAL *) checkval)->node->numlevel; char *op = ((CHKVAL *) checkval)->operand + val->distance; - int (*cmpptr) (const char *, const char *, size_t); + ltree_prefix_eq_func prefix_eq; - cmpptr = (val->flag & LVAR_INCASE) ? ltree_strncasecmp : strncmp; + prefix_eq = (val->flag & LVAR_INCASE) ? ltree_prefix_eq_ci : ltree_prefix_eq; while (tlen > 0) { if (val->flag & LVAR_SUBLEXEME) { - if (compare_subnode(level, op, val->length, cmpptr, (val->flag & LVAR_ANYEND))) + if (compare_subnode(level, op, val->length, prefix_eq, (val->flag & LVAR_ANYEND))) return true; } else if ((val->length == level->len || (level->len > val->length && (val->flag & LVAR_ANYEND))) && - (*cmpptr) (op, level->name, val->length) == 0) + (*prefix_eq) (op, val->length, level->name, level->len)) return true; tlen--; diff --git a/src/backend/access/heap/README.tuplock b/src/backend/access/heap/README.tuplock index 750684d3398..16f7d78b7d2 100644 --- a/src/backend/access/heap/README.tuplock +++ b/src/backend/access/heap/README.tuplock @@ -198,6 +198,36 @@ Inplace updates create an exception to the rule that tuple data won't change under a reader holding a pin. A reader of a heap_fetch() result tuple may witness a torn read. Current inplace-updated fields are aligned and are no wider than four bytes, and current readers don't need consistency across -fields. Hence, they get by with just fetching each field once. XXX such a -caller may also read a value that has not reached WAL; see -systable_inplace_update_finish(). +fields. Hence, they get by with just fetching each field once. + +During logical decoding, caches reflect an inplace update no later than the +next XLOG_XACT_INVALIDATIONS. That record witnesses the end of a command. +Tuples of its cmin are then visible to decoding, as are inplace updates of any +lower LSN. Inplace updates of a higher LSN may also be visible, even if those +updates would have been invisible to a non-historic snapshot matching +decoding's historic snapshot. (In other words, decoding may see inplace +updates that were not visible to a similar snapshot taken during original +transaction processing.) That's a consequence of inplace update violating +MVCC: there are no snapshot-specific versions of inplace-updated values. This +all makes it hard to reason about inplace-updated column reads during logical +decoding, but the behavior does suffice for relhasindex. A relhasindex=t in +CREATE INDEX becomes visible no later than the new pg_index row. While it may +be visible earlier, that's harmless. Finding zero indexes despite +relhasindex=t is normal in more cases than this, e.g. after DROP INDEX. +Example of a case that meaningfully reacts to the inplace inval: + +CREATE TABLE cat (c int) WITH (user_catalog_table = true); +CREATE TABLE normal (d int); +... +CREATE INDEX ON cat (c)\; INSERT INTO normal VALUES (1); + +If the output plugin reads "cat" during decoding of the INSERT, it's fair to +want that read to see relhasindex=t and use the new index. + +An alternative would be to have decoding of XLOG_HEAP_INPLACE immediately +execute its invals. That would behave more like invals during original +transaction processing. It would remove the decoding-specific delay in e.g. a +decoding plugin witnessing a relfrozenxid change. However, a good use case +for that is unlikely, since the plugin would still witness relfrozenxid +changes prematurely. Hence, inplace update takes the trivial approach of +delegating to XLOG_XACT_INVALIDATIONS. diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index e6c83334de4..d977df4cec8 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -6158,6 +6158,19 @@ heap_inplace_lock(Relation relation, Assert(BufferIsValid(buffer)); + /* + * Register shared cache invals if necessary. Other sessions may finish + * inplace updates of this tuple between this step and LockTuple(). Since + * inplace updates don't change cache keys, that's harmless. + * + * While it's tempting to register invals only after confirming we can + * return true, the following obstacle precludes reordering steps that + * way. Registering invals might reach a CatalogCacheInitializeCache() + * that locks "buffer". That would hang indefinitely if running after our + * own LockBuffer(). Hence, we must register invals before LockBuffer(). + */ + CacheInvalidateHeapTupleInplace(relation, oldtup_ptr); + LockTuple(relation, &oldtup.t_self, InplaceUpdateTupleLock); LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); @@ -6253,6 +6266,7 @@ heap_inplace_lock(Relation relation, if (!ret) { UnlockTuple(relation, &oldtup.t_self, InplaceUpdateTupleLock); + ForgetInplace_Inval(); InvalidateCatalogSnapshot(); } return ret; @@ -6274,6 +6288,8 @@ heap_inplace_update_and_unlock(Relation relation, HeapTupleHeader htup = oldtup->t_data; uint32 oldlen; uint32 newlen; + char *dst; + char *src; Assert(ItemPointerEquals(&oldtup->t_self, &tuple->t_self)); oldlen = oldtup->t_len - htup->t_hoff; @@ -6281,15 +6297,28 @@ heap_inplace_update_and_unlock(Relation relation, if (oldlen != newlen || htup->t_hoff != tuple->t_data->t_hoff) elog(ERROR, "wrong tuple length"); - /* NO EREPORT(ERROR) from here till changes are logged */ - START_CRIT_SECTION(); + dst = (char *) htup + htup->t_hoff; + src = (char *) tuple->t_data + tuple->t_data->t_hoff; - memcpy((char *) htup + htup->t_hoff, - (char *) tuple->t_data + tuple->t_data->t_hoff, - newlen); + /* + * Unlink relcache init files as needed. If unlinking, acquire + * RelCacheInitLock until after associated invalidations. By doing this + * in advance, if we checkpoint and then crash between inplace + * XLogInsert() and inval, we don't rely on StartupXLOG() -> + * RelationCacheInitFileRemove(). That uses elevel==LOG, so replay would + * neglect to PANIC on EIO. + */ + PreInplace_Inval(); /*---------- - * XXX A crash here can allow datfrozenxid() to get ahead of relfrozenxid: + * NO EREPORT(ERROR) from here till changes are complete + * + * Our buffer lock won't stop a reader having already pinned and checked + * visibility for this tuple. Hence, we write WAL first, then mutate the + * buffer. Like in MarkBufferDirtyHint() or RecordTransactionCommit(), + * checkpoint delay makes that acceptable. With the usual order of + * changes, a crash after memcpy() and before XLogInsert() could allow + * datfrozenxid to overtake relfrozenxid: * * ["D" is a VACUUM (ONLY_DATABASE_STATS)] * ["R" is a VACUUM tbl] @@ -6299,14 +6328,36 @@ heap_inplace_update_and_unlock(Relation relation, * D: raise pg_database.datfrozenxid, XLogInsert(), finish * [crash] * [recovery restores datfrozenxid w/o relfrozenxid] - */ - - MarkBufferDirty(buffer); + * + * Mimic MarkBufferDirtyHint() subroutine XLogSaveBufferForHint(). + * Specifically, use DELAY_CHKPT_START, and copy the buffer to the stack. + * The stack copy facilitates a FPI of the post-mutation block before we + * accept other sessions seeing it. DELAY_CHKPT_START allows us to + * XLogInsert() before MarkBufferDirty(). Since XLogSaveBufferForHint() + * can operate under BUFFER_LOCK_SHARED, it can't avoid DELAY_CHKPT_START. + * This function, however, likely could avoid it with the following order + * of operations: MarkBufferDirty(), XLogInsert(), memcpy(). Opt to use + * DELAY_CHKPT_START here, too, as a way to have fewer distinct code + * patterns to analyze. Inplace update isn't so frequent that it should + * pursue the small optimization of skipping DELAY_CHKPT_START. + */ + Assert((MyProc->delayChkptFlags & DELAY_CHKPT_START) == 0); + START_CRIT_SECTION(); + MyProc->delayChkptFlags |= DELAY_CHKPT_START; /* XLOG stuff */ if (RelationNeedsWAL(relation)) { xl_heap_inplace xlrec; + PGAlignedBlock copied_buffer; + char *origdata = (char *) BufferGetBlock(buffer); + Page page = BufferGetPage(buffer); + uint16 lower = ((PageHeader) page)->pd_lower; + uint16 upper = ((PageHeader) page)->pd_upper; + uintptr_t dst_offset_in_block; + RelFileLocator rlocator; + ForkNumber forkno; + BlockNumber blkno; XLogRecPtr recptr; xlrec.offnum = ItemPointerGetOffsetNumber(&tuple->t_self); @@ -6314,27 +6365,47 @@ heap_inplace_update_and_unlock(Relation relation, XLogBeginInsert(); XLogRegisterData((char *) &xlrec, SizeOfHeapInplace); - XLogRegisterBuffer(0, buffer, REGBUF_STANDARD); - XLogRegisterBufData(0, (char *) htup + htup->t_hoff, newlen); + /* register block matching what buffer will look like after changes */ + memcpy(copied_buffer.data, origdata, lower); + memcpy(copied_buffer.data + upper, origdata + upper, BLCKSZ - upper); + dst_offset_in_block = dst - origdata; + memcpy(copied_buffer.data + dst_offset_in_block, src, newlen); + BufferGetTag(buffer, &rlocator, &forkno, &blkno); + Assert(forkno == MAIN_FORKNUM); + XLogRegisterBlock(0, &rlocator, forkno, blkno, copied_buffer.data, + REGBUF_STANDARD); + XLogRegisterBufData(0, src, newlen); /* inplace updates aren't decoded atm, don't log the origin */ recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_INPLACE); - PageSetLSN(BufferGetPage(buffer), recptr); + PageSetLSN(page, recptr); } + memcpy(dst, src, newlen); + + MarkBufferDirty(buffer); + + LockBuffer(buffer, BUFFER_LOCK_UNLOCK); + + /* + * Send invalidations to shared queue. SearchSysCacheLocked1() assumes we + * do this before UnlockTuple(). + */ + AtInplace_Inval(); + + MyProc->delayChkptFlags &= ~DELAY_CHKPT_START; END_CRIT_SECTION(); + UnlockTuple(relation, &tuple->t_self, InplaceUpdateTupleLock); - heap_inplace_unlock(relation, oldtup, buffer); + AcceptInvalidationMessages(); /* local processing of just-sent inval */ /* - * Send out shared cache inval if necessary. Note that because we only - * pass the new version of the tuple, this mustn't be used for any - * operations that could change catcache lookup keys. But we aren't - * bothering with index updates either, so that's true a fortiori. - * - * XXX ROLLBACK discards the invalidation. See test inplace-inval.spec. + * Queue a transactional inval, for logical decoding and for third-party + * code that might have been relying on it since long before inplace + * update adopted immediate invalidation. See README.tuplock section + * "Reading inplace-updated columns" for logical decoding details. */ if (!IsBootstrapProcessingMode()) CacheInvalidateHeapTuple(relation, tuple, NULL); @@ -6349,6 +6420,7 @@ heap_inplace_unlock(Relation relation, { LockBuffer(buffer, BUFFER_LOCK_UNLOCK); UnlockTuple(relation, &oldtup->t_self, InplaceUpdateTupleLock); + ForgetInplace_Inval(); } /* diff --git a/src/backend/access/transam/parallel.c b/src/backend/access/transam/parallel.c index e775ac7deb8..16a5b8ca47c 100644 --- a/src/backend/access/transam/parallel.c +++ b/src/backend/access/transam/parallel.c @@ -503,8 +503,12 @@ InitializeParallelDSM(ParallelContext *pcxt) void ReinitializeParallelDSM(ParallelContext *pcxt) { + MemoryContext oldcontext; FixedParallelState *fps; + /* We might be running in a very short-lived memory context. */ + oldcontext = MemoryContextSwitchTo(TopTransactionContext); + /* Wait for any old workers to exit. */ if (pcxt->nworkers_launched > 0) { @@ -542,6 +546,9 @@ ReinitializeParallelDSM(ParallelContext *pcxt) pcxt->worker[i].error_mqh = shm_mq_attach(mq, pcxt->seg, NULL); } } + + /* Restore previous memory context. */ + MemoryContextSwitchTo(oldcontext); } /* diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index 4a2ea4adbaf..91dbfcc0d78 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -1337,14 +1337,24 @@ RecordTransactionCommit(void) /* * Transactions without an assigned xid can contain invalidation - * messages (e.g. explicit relcache invalidations or catcache - * invalidations for inplace updates); standbys need to process those. - * We can't emit a commit record without an xid, and we don't want to - * force assigning an xid, because that'd be problematic for e.g. - * vacuum. Hence we emit a bespoke record for the invalidations. We - * don't want to use that in case a commit record is emitted, so they - * happen synchronously with commits (besides not wanting to emit more - * WAL records). + * messages. While inplace updates do this, this is not known to be + * necessary; see comment at inplace CacheInvalidateHeapTuple(). + * Extensions might still rely on this capability, and standbys may + * need to process those invals. We can't emit a commit record + * without an xid, and we don't want to force assigning an xid, + * because that'd be problematic for e.g. vacuum. Hence we emit a + * bespoke record for the invalidations. We don't want to use that in + * case a commit record is emitted, so they happen synchronously with + * commits (besides not wanting to emit more WAL records). + * + * XXX Every known use of this capability is a defect. Since an XID + * isn't controlling visibility of the change that prompted invals, + * other sessions need the inval even if this transactions aborts. + * + * ON COMMIT DELETE ROWS does a nontransactional index_build(), which + * queues a relcache inval, including in transactions without an xid + * that had read the (empty) table. Standbys don't need any ON COMMIT + * DELETE ROWS invals, but we've not done the work to withhold them. */ if (nmsgs != 0) { diff --git a/src/backend/access/transam/xlogrecovery.c b/src/backend/access/transam/xlogrecovery.c index de49bd65c06..512abcc6ab7 100644 --- a/src/backend/access/transam/xlogrecovery.c +++ b/src/backend/access/transam/xlogrecovery.c @@ -813,6 +813,16 @@ InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr, } memcpy(&checkPoint, XLogRecGetData(xlogreader), sizeof(CheckPoint)); wasShutdown = ((record->xl_info & ~XLR_INFO_MASK) == XLOG_CHECKPOINT_SHUTDOWN); + + /* Make sure that REDO location exists. */ + if (checkPoint.redo < CheckPointLoc) + { + XLogPrefetcherBeginRead(xlogprefetcher, checkPoint.redo); + if (!ReadRecord(xlogprefetcher, LOG, false, checkPoint.ThisTimeLineID)) + ereport(PANIC, + errmsg("could not find redo location %X/%08X referenced by checkpoint record at %X/%08X", + LSN_FORMAT_ARGS(checkPoint.redo), LSN_FORMAT_ARGS(CheckPointLoc))); + } } /* diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index bc08ab66bd6..21b2e068bd4 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -2931,12 +2931,19 @@ index_update_stats(Relation rel, if (dirty) { systable_inplace_update_finish(state, tuple); - /* the above sends a cache inval message */ + /* the above sends transactional and immediate cache inval messages */ } else { systable_inplace_update_cancel(state); - /* no need to change tuple, but force relcache inval anyway */ + + /* + * While we didn't change relhasindex, CREATE INDEX needs a + * transactional inval for when the new index's catalog rows become + * visible. Other CREATE INDEX and REINDEX code happens to also queue + * this inval, but keep this in case rare callers rely on this part of + * our API contract. + */ CacheInvalidateRelcacheByTuple(tuple); } diff --git a/src/backend/replication/logical/decode.c b/src/backend/replication/logical/decode.c index db8b2c230c5..a591bf08e1f 100644 --- a/src/backend/replication/logical/decode.c +++ b/src/backend/replication/logical/decode.c @@ -523,20 +523,13 @@ heap_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) /* * Inplace updates are only ever performed on catalog tuples and * can, per definition, not change tuple visibility. Since we - * don't decode catalog tuples, we're not interested in the + * also don't decode catalog tuples, we're not interested in the * record's contents. - * - * In-place updates can be used either by XID-bearing transactions - * (e.g. in CREATE INDEX CONCURRENTLY) or by XID-less - * transactions (e.g. VACUUM). In the former case, the commit - * record will include cache invalidations, so we mark the - * transaction as catalog modifying here. Currently that's - * redundant because the commit will do that as well, but once we - * support decoding in-progress relations, this will be important. */ if (!TransactionIdIsValid(xid)) break; + /* PostgreSQL 13 was the last to need these actions. */ (void) SnapBuildProcessChange(builder, xid, buf->origptr); ReorderBufferXidSetCatalogChanges(ctx->reorder, xid, buf->origptr); break; diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index e007bd46e93..ef14791e2da 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -37,6 +37,9 @@ #include "access/xloginsert.h" #include "access/xlogutils.h" #include "catalog/catalog.h" +#ifdef USE_ASSERT_CHECKING +#include "catalog/pg_tablespace_d.h" +#endif #include "catalog/storage.h" #include "catalog/storage_xlog.h" #include "executor/instrument.h" @@ -498,6 +501,10 @@ static void RelationCopyStorageUsingBuffer(RelFileLocator srclocator, ForkNumber forkNum, bool permanent); static void AtProcExit_Buffers(int code, Datum arg); static void CheckForBufferLeaks(void); +#ifdef USE_ASSERT_CHECKING +static void AssertNotCatalogBufferLock(LWLock *lock, LWLockMode mode, + void *unused_context); +#endif static int rlocator_comparator(const void *p1, const void *p2); static inline int buffertag_comparator(const BufferTag *ba, const BufferTag *bb); static inline int ckpt_buforder_comparator(const CkptSortItem *a, const CkptSortItem *b); @@ -3225,6 +3232,66 @@ CheckForBufferLeaks(void) #endif } +#ifdef USE_ASSERT_CHECKING +/* + * Check for exclusive-locked catalog buffers. This is the core of + * AssertCouldGetRelation(). + * + * A backend would self-deadlock on LWLocks if the catalog scan read the + * exclusive-locked buffer. The main threat is exclusive-locked buffers of + * catalogs used in relcache, because a catcache search on any catalog may + * build that catalog's relcache entry. We don't have an inventory of + * catalogs relcache uses, so just check buffers of most catalogs. + * + * It's better to minimize waits while holding an exclusive buffer lock, so it + * would be nice to broaden this check not to be catalog-specific. However, + * bttextcmp() accesses pg_collation, and non-core opclasses might similarly + * read tables. That is deadlock-free as long as there's no loop in the + * dependency graph: modifying table A may cause an opclass to read table B, + * but it must not cause a read of table A. + */ +void +AssertBufferLocksPermitCatalogRead(void) +{ + ForEachLWLockHeldByMe(AssertNotCatalogBufferLock, NULL); +} + +static void +AssertNotCatalogBufferLock(LWLock *lock, LWLockMode mode, + void *unused_context) +{ + BufferDesc *bufHdr; + BufferTag tag; + Oid relid; + + if (mode != LW_EXCLUSIVE) + return; + + if (!((BufferDescPadded *) lock > BufferDescriptors && + (BufferDescPadded *) lock < BufferDescriptors + NBuffers)) + return; /* not a buffer lock */ + + bufHdr = (BufferDesc *) + ((char *) lock - offsetof(BufferDesc, content_lock)); + tag = bufHdr->tag; + + /* + * This relNumber==relid assumption holds until a catalog experiences + * VACUUM FULL or similar. After a command like that, relNumber will be + * in the normal (non-catalog) range, and we lose the ability to detect + * hazardous access to that catalog. Calling RelidByRelfilenumber() would + * close that gap, but RelidByRelfilenumber() might then deadlock with a + * held lock. + */ + relid = tag.relNumber; + + Assert(!IsCatalogRelationOid(relid)); + /* Shared rels are always catalogs: detect even after VACUUM FULL. */ + Assert(tag.spcOid != GLOBALTABLESPACE_OID); +} +#endif + + /* * Helper routine to issue warnings when a buffer is unexpectedly pinned */ diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c index 15c2f8a2ca9..3fae2a157ab 100644 --- a/src/backend/storage/lmgr/lwlock.c +++ b/src/backend/storage/lmgr/lwlock.c @@ -1911,6 +1911,21 @@ LWLockReleaseAll(void) /* + * ForEachLWLockHeldByMe - run a callback for each held lock + * + * This is meant as debug support only. + */ +void +ForEachLWLockHeldByMe(void (*callback) (LWLock *, LWLockMode, void *), + void *context) +{ + int i; + + for (i = 0; i < num_held_lwlocks; i++) + callback(held_lwlocks[i].lock, held_lwlocks[i].mode, context); +} + +/* * LWLockHeldByMe - test whether my process holds a lock in any mode * * This is meant as debug support only. diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index 111a7888779..df0c84cf371 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -67,6 +67,7 @@ #include "utils/lsyscache.h" #include "utils/memutils.h" #include "utils/pg_locale.h" +#include "utils/relcache.h" #include "utils/syscache.h" #ifdef USE_ICU @@ -1222,6 +1223,8 @@ lookup_collation_cache(Oid collation, bool set_flags) Assert(OidIsValid(collation)); Assert(collation != DEFAULT_COLLATION_OID); + AssertCouldGetRelation(); + if (collation_cache == NULL) { /* First time through, initialize the hash table */ diff --git a/src/backend/utils/cache/catcache.c b/src/backend/utils/cache/catcache.c index d3527cbe3f3..5690a36755c 100644 --- a/src/backend/utils/cache/catcache.c +++ b/src/backend/utils/cache/catcache.c @@ -1000,11 +1000,40 @@ RehashCatCacheLists(CatCache *cp) } /* + * ConditionalCatalogCacheInitializeCache + * + * Call CatalogCacheInitializeCache() if not yet done. + */ +pg_attribute_always_inline +static void +ConditionalCatalogCacheInitializeCache(CatCache *cache) +{ +#ifdef USE_ASSERT_CHECKING + /* + * TypeCacheRelCallback() runs outside transactions and relies on TYPEOID + * for hashing. This isn't ideal. Since lookup_type_cache() both + * registers the callback and searches TYPEOID, reaching trouble likely + * requires OOM at an unlucky moment. + * + * InvalidateAttoptCacheCallback() runs outside transactions and likewise + * relies on ATTNUM. InitPostgres() initializes ATTNUM, so it's reliable. + */ + if (!(cache->id == TYPEOID || cache->id == ATTNUM) || + IsTransactionState()) + AssertCouldGetRelation(); + else + Assert(cache->cc_tupdesc != NULL); +#endif + + if (unlikely(cache->cc_tupdesc == NULL)) + CatalogCacheInitializeCache(cache); +} + +/* * CatalogCacheInitializeCache * * This function does final initialization of a catcache: obtain the tuple - * descriptor and set up the hash and equality function links. We assume - * that the relcache entry can be opened at this point! + * descriptor and set up the hash and equality function links. */ #ifdef CACHEDEBUG #define CatalogCacheInitializeCache_DEBUG1 \ @@ -1139,8 +1168,7 @@ CatalogCacheInitializeCache(CatCache *cache) void InitCatCachePhase2(CatCache *cache, bool touch_index) { - if (cache->cc_tupdesc == NULL) - CatalogCacheInitializeCache(cache); + ConditionalCatalogCacheInitializeCache(cache); if (touch_index && cache->id != AMOID && @@ -1319,16 +1347,12 @@ SearchCatCacheInternal(CatCache *cache, dlist_head *bucket; CatCTup *ct; - /* Make sure we're in an xact, even if this ends up being a cache hit */ - Assert(IsTransactionState()); - Assert(cache->cc_nkeys == nkeys); /* * one-time startup overhead for each cache */ - if (unlikely(cache->cc_tupdesc == NULL)) - CatalogCacheInitializeCache(cache); + ConditionalCatalogCacheInitializeCache(cache); #ifdef CATCACHE_STATS cache->cc_searches++; @@ -1607,8 +1631,7 @@ GetCatCacheHashValue(CatCache *cache, /* * one-time startup overhead for each cache */ - if (cache->cc_tupdesc == NULL) - CatalogCacheInitializeCache(cache); + ConditionalCatalogCacheInitializeCache(cache); /* * calculate the hash value @@ -1659,8 +1682,7 @@ SearchCatCacheList(CatCache *cache, /* * one-time startup overhead for each cache */ - if (unlikely(cache->cc_tupdesc == NULL)) - CatalogCacheInitializeCache(cache); + ConditionalCatalogCacheInitializeCache(cache); Assert(nkeys > 0 && nkeys < cache->cc_nkeys); @@ -2278,7 +2300,8 @@ void PrepareToInvalidateCacheTuple(Relation relation, HeapTuple tuple, HeapTuple newtuple, - void (*function) (int, uint32, Oid)) + void (*function) (int, uint32, Oid, void *), + void *context) { slist_iter iter; Oid reloid; @@ -2313,13 +2336,12 @@ PrepareToInvalidateCacheTuple(Relation relation, continue; /* Just in case cache hasn't finished initialization yet... */ - if (ccp->cc_tupdesc == NULL) - CatalogCacheInitializeCache(ccp); + ConditionalCatalogCacheInitializeCache(ccp); hashvalue = CatalogCacheComputeTupleHashValue(ccp, ccp->cc_nkeys, tuple); dbid = ccp->cc_relisshared ? (Oid) 0 : MyDatabaseId; - (*function) (ccp->id, hashvalue, dbid); + (*function) (ccp->id, hashvalue, dbid, context); if (newtuple) { @@ -2328,7 +2350,7 @@ PrepareToInvalidateCacheTuple(Relation relation, newhashvalue = CatalogCacheComputeTupleHashValue(ccp, ccp->cc_nkeys, newtuple); if (newhashvalue != hashvalue) - (*function) (ccp->id, newhashvalue, dbid); + (*function) (ccp->id, newhashvalue, dbid, context); } } } diff --git a/src/backend/utils/cache/inval.c b/src/backend/utils/cache/inval.c index a140a99f0a7..140848c2d73 100644 --- a/src/backend/utils/cache/inval.c +++ b/src/backend/utils/cache/inval.c @@ -94,6 +94,10 @@ * worth trying to avoid sending such inval traffic in the future, if those * problems can be overcome cheaply. * + * When making a nontransactional change to a cacheable object, we must + * likewise send the invalidation immediately, before ending the change's + * critical section. This includes inplace heap updates, relmap, and smgr. + * * When wal_level=logical, write invalidations into WAL at each command end to * support the decoding of the in-progress transactions. See * CommandEndInvalidationMessages. @@ -131,13 +135,15 @@ /* * Pending requests are stored as ready-to-send SharedInvalidationMessages. - * We keep the messages themselves in arrays in TopTransactionContext - * (there are separate arrays for catcache and relcache messages). Control - * information is kept in a chain of TransInvalidationInfo structs, also - * allocated in TopTransactionContext. (We could keep a subtransaction's - * TransInvalidationInfo in its CurTransactionContext; but that's more - * wasteful not less so, since in very many scenarios it'd be the only - * allocation in the subtransaction's CurTransactionContext.) + * We keep the messages themselves in arrays in TopTransactionContext (there + * are separate arrays for catcache and relcache messages). For transactional + * messages, control information is kept in a chain of TransInvalidationInfo + * structs, also allocated in TopTransactionContext. (We could keep a + * subtransaction's TransInvalidationInfo in its CurTransactionContext; but + * that's more wasteful not less so, since in very many scenarios it'd be the + * only allocation in the subtransaction's CurTransactionContext.) For + * inplace update messages, control information appears in an + * InvalidationInfo, allocated in CurrentMemoryContext. * * We can store the message arrays densely, and yet avoid moving data around * within an array, because within any one subtransaction we need only @@ -148,7 +154,9 @@ * struct. Similarly, we need distinguish messages of prior subtransactions * from those of the current subtransaction only until the subtransaction * completes, after which we adjust the array indexes in the parent's - * TransInvalidationInfo to include the subtransaction's messages. + * TransInvalidationInfo to include the subtransaction's messages. Inplace + * invalidations don't need a concept of command or subtransaction boundaries, + * since we send them during the WAL insertion critical section. * * The ordering of the individual messages within a command's or * subtransaction's output is not considered significant, although this @@ -201,7 +209,7 @@ typedef struct InvalidationMsgsGroup /*---------------- - * Invalidation messages are divided into two groups: + * Transactional invalidation messages are divided into two groups: * 1) events so far in current command, not yet reflected to caches. * 2) events in previous commands of current transaction; these have * been reflected to local caches, and must be either broadcast to @@ -217,26 +225,36 @@ typedef struct InvalidationMsgsGroup *---------------- */ -typedef struct TransInvalidationInfo +/* fields common to both transactional and inplace invalidation */ +typedef struct InvalidationInfo { - /* Back link to parent transaction's info */ - struct TransInvalidationInfo *parent; - - /* Subtransaction nesting depth */ - int my_level; - /* Events emitted by current command */ InvalidationMsgsGroup CurrentCmdInvalidMsgs; + /* init file must be invalidated? */ + bool RelcacheInitFileInval; +} InvalidationInfo; + +/* subclass adding fields specific to transactional invalidation */ +typedef struct TransInvalidationInfo +{ + /* Base class */ + struct InvalidationInfo ii; + /* Events emitted by previous commands of this (sub)transaction */ InvalidationMsgsGroup PriorCmdInvalidMsgs; - /* init file must be invalidated? */ - bool RelcacheInitFileInval; + /* Back link to parent transaction's info */ + struct TransInvalidationInfo *parent; + + /* Subtransaction nesting depth */ + int my_level; } TransInvalidationInfo; static TransInvalidationInfo *transInvalInfo = NULL; +static InvalidationInfo *inplaceInvalInfo = NULL; + /* GUC storage */ int debug_discard_caches = 0; @@ -544,9 +562,12 @@ ProcessInvalidationMessagesMulti(InvalidationMsgsGroup *group, static void RegisterCatcacheInvalidation(int cacheId, uint32 hashValue, - Oid dbId) + Oid dbId, + void *context) { - AddCatcacheInvalidationMessage(&transInvalInfo->CurrentCmdInvalidMsgs, + InvalidationInfo *info = (InvalidationInfo *) context; + + AddCatcacheInvalidationMessage(&info->CurrentCmdInvalidMsgs, cacheId, hashValue, dbId); } @@ -556,10 +577,9 @@ RegisterCatcacheInvalidation(int cacheId, * Register an invalidation event for all catcache entries from a catalog. */ static void -RegisterCatalogInvalidation(Oid dbId, Oid catId) +RegisterCatalogInvalidation(InvalidationInfo *info, Oid dbId, Oid catId) { - AddCatalogInvalidationMessage(&transInvalInfo->CurrentCmdInvalidMsgs, - dbId, catId); + AddCatalogInvalidationMessage(&info->CurrentCmdInvalidMsgs, dbId, catId); } /* @@ -568,10 +588,9 @@ RegisterCatalogInvalidation(Oid dbId, Oid catId) * As above, but register a relcache invalidation event. */ static void -RegisterRelcacheInvalidation(Oid dbId, Oid relId) +RegisterRelcacheInvalidation(InvalidationInfo *info, Oid dbId, Oid relId) { - AddRelcacheInvalidationMessage(&transInvalInfo->CurrentCmdInvalidMsgs, - dbId, relId); + AddRelcacheInvalidationMessage(&info->CurrentCmdInvalidMsgs, dbId, relId); /* * Most of the time, relcache invalidation is associated with system @@ -588,7 +607,7 @@ RegisterRelcacheInvalidation(Oid dbId, Oid relId) * as well. Also zap when we are invalidating whole relcache. */ if (relId == InvalidOid || RelationIdIsInInitFile(relId)) - transInvalInfo->RelcacheInitFileInval = true; + info->RelcacheInitFileInval = true; } /* @@ -598,10 +617,140 @@ RegisterRelcacheInvalidation(Oid dbId, Oid relId) * Only needed for catalogs that don't have catcaches. */ static void -RegisterSnapshotInvalidation(Oid dbId, Oid relId) +RegisterSnapshotInvalidation(InvalidationInfo *info, Oid dbId, Oid relId) +{ + AddSnapshotInvalidationMessage(&info->CurrentCmdInvalidMsgs, dbId, relId); +} + +/* + * PrepareInvalidationState + * Initialize inval data for the current (sub)transaction. + */ +static InvalidationInfo * +PrepareInvalidationState(void) +{ + TransInvalidationInfo *myInfo; + + /* PrepareToInvalidateCacheTuple() needs relcache */ + AssertCouldGetRelation(); + /* Can't queue transactional message while collecting inplace messages. */ + Assert(inplaceInvalInfo == NULL); + + if (transInvalInfo != NULL && + transInvalInfo->my_level == GetCurrentTransactionNestLevel()) + return (InvalidationInfo *) transInvalInfo; + + myInfo = (TransInvalidationInfo *) + MemoryContextAllocZero(TopTransactionContext, + sizeof(TransInvalidationInfo)); + myInfo->parent = transInvalInfo; + myInfo->my_level = GetCurrentTransactionNestLevel(); + + /* Now, do we have a previous stack entry? */ + if (transInvalInfo != NULL) + { + /* Yes; this one should be for a deeper nesting level. */ + Assert(myInfo->my_level > transInvalInfo->my_level); + + /* + * The parent (sub)transaction must not have any current (i.e., + * not-yet-locally-processed) messages. If it did, we'd have a + * semantic problem: the new subtransaction presumably ought not be + * able to see those events yet, but since the CommandCounter is + * linear, that can't work once the subtransaction advances the + * counter. This is a convenient place to check for that, as well as + * being important to keep management of the message arrays simple. + */ + if (NumMessagesInGroup(&transInvalInfo->ii.CurrentCmdInvalidMsgs) != 0) + elog(ERROR, "cannot start a subtransaction when there are unprocessed inval messages"); + + /* + * MemoryContextAllocZero set firstmsg = nextmsg = 0 in each group, + * which is fine for the first (sub)transaction, but otherwise we need + * to update them to follow whatever is already in the arrays. + */ + SetGroupToFollow(&myInfo->PriorCmdInvalidMsgs, + &transInvalInfo->ii.CurrentCmdInvalidMsgs); + SetGroupToFollow(&myInfo->ii.CurrentCmdInvalidMsgs, + &myInfo->PriorCmdInvalidMsgs); + } + else + { + /* + * Here, we need only clear any array pointers left over from a prior + * transaction. + */ + InvalMessageArrays[CatCacheMsgs].msgs = NULL; + InvalMessageArrays[CatCacheMsgs].maxmsgs = 0; + InvalMessageArrays[RelCacheMsgs].msgs = NULL; + InvalMessageArrays[RelCacheMsgs].maxmsgs = 0; + } + + transInvalInfo = myInfo; + return (InvalidationInfo *) myInfo; +} + +/* + * PrepareInplaceInvalidationState + * Initialize inval data for an inplace update. + * + * See previous function for more background. + */ +static InvalidationInfo * +PrepareInplaceInvalidationState(void) { - AddSnapshotInvalidationMessage(&transInvalInfo->CurrentCmdInvalidMsgs, - dbId, relId); + InvalidationInfo *myInfo; + + AssertCouldGetRelation(); + /* limit of one inplace update under assembly */ + Assert(inplaceInvalInfo == NULL); + + /* gone after WAL insertion CritSection ends, so use current context */ + myInfo = (InvalidationInfo *) palloc0(sizeof(InvalidationInfo)); + + /* Stash our messages past end of the transactional messages, if any. */ + if (transInvalInfo != NULL) + SetGroupToFollow(&myInfo->CurrentCmdInvalidMsgs, + &transInvalInfo->ii.CurrentCmdInvalidMsgs); + else + { + InvalMessageArrays[CatCacheMsgs].msgs = NULL; + InvalMessageArrays[CatCacheMsgs].maxmsgs = 0; + InvalMessageArrays[RelCacheMsgs].msgs = NULL; + InvalMessageArrays[RelCacheMsgs].maxmsgs = 0; + } + + inplaceInvalInfo = myInfo; + return myInfo; +} + +/* ---------------------------------------------------------------- + * public functions + * ---------------------------------------------------------------- + */ + +void +InvalidateSystemCachesExtended(bool debug_discard) +{ + int i; + + InvalidateCatalogSnapshot(); + ResetCatalogCachesExt(debug_discard); + RelationCacheInvalidate(debug_discard); /* gets smgr and relmap too */ + + for (i = 0; i < syscache_callback_count; i++) + { + struct SYSCACHECALLBACK *ccitem = syscache_callback_list + i; + + ccitem->function(ccitem->arg, ccitem->id, 0); + } + + for (i = 0; i < relcache_callback_count; i++) + { + struct RELCACHECALLBACK *ccitem = relcache_callback_list + i; + + ccitem->function(ccitem->arg, InvalidOid); + } } /* @@ -704,36 +853,6 @@ InvalidateSystemCaches(void) InvalidateSystemCachesExtended(false); } -void -InvalidateSystemCachesExtended(bool debug_discard) -{ - int i; - - InvalidateCatalogSnapshot(); - ResetCatalogCachesExt(debug_discard); - RelationCacheInvalidate(debug_discard); /* gets smgr and relmap too */ - - for (i = 0; i < syscache_callback_count; i++) - { - struct SYSCACHECALLBACK *ccitem = syscache_callback_list + i; - - ccitem->function(ccitem->arg, ccitem->id, 0); - } - - for (i = 0; i < relcache_callback_count; i++) - { - struct RELCACHECALLBACK *ccitem = relcache_callback_list + i; - - ccitem->function(ccitem->arg, InvalidOid); - } -} - - -/* ---------------------------------------------------------------- - * public functions - * ---------------------------------------------------------------- - */ - /* * AcceptInvalidationMessages * Read and process invalidation messages from the shared invalidation @@ -745,6 +864,12 @@ InvalidateSystemCachesExtended(bool debug_discard) void AcceptInvalidationMessages(void) { +#ifdef USE_ASSERT_CHECKING + /* message handlers shall access catalogs only during transactions */ + if (IsTransactionState()) + AssertCouldGetRelation(); +#endif + ReceiveSharedInvalidMessages(LocalExecuteInvalidationMessage, InvalidateSystemCaches); @@ -788,68 +913,6 @@ AcceptInvalidationMessages(void) } /* - * PrepareInvalidationState - * Initialize inval data for the current (sub)transaction. - */ -static void -PrepareInvalidationState(void) -{ - TransInvalidationInfo *myInfo; - - if (transInvalInfo != NULL && - transInvalInfo->my_level == GetCurrentTransactionNestLevel()) - return; - - myInfo = (TransInvalidationInfo *) - MemoryContextAllocZero(TopTransactionContext, - sizeof(TransInvalidationInfo)); - myInfo->parent = transInvalInfo; - myInfo->my_level = GetCurrentTransactionNestLevel(); - - /* Now, do we have a previous stack entry? */ - if (transInvalInfo != NULL) - { - /* Yes; this one should be for a deeper nesting level. */ - Assert(myInfo->my_level > transInvalInfo->my_level); - - /* - * The parent (sub)transaction must not have any current (i.e., - * not-yet-locally-processed) messages. If it did, we'd have a - * semantic problem: the new subtransaction presumably ought not be - * able to see those events yet, but since the CommandCounter is - * linear, that can't work once the subtransaction advances the - * counter. This is a convenient place to check for that, as well as - * being important to keep management of the message arrays simple. - */ - if (NumMessagesInGroup(&transInvalInfo->CurrentCmdInvalidMsgs) != 0) - elog(ERROR, "cannot start a subtransaction when there are unprocessed inval messages"); - - /* - * MemoryContextAllocZero set firstmsg = nextmsg = 0 in each group, - * which is fine for the first (sub)transaction, but otherwise we need - * to update them to follow whatever is already in the arrays. - */ - SetGroupToFollow(&myInfo->PriorCmdInvalidMsgs, - &transInvalInfo->CurrentCmdInvalidMsgs); - SetGroupToFollow(&myInfo->CurrentCmdInvalidMsgs, - &myInfo->PriorCmdInvalidMsgs); - } - else - { - /* - * Here, we need only clear any array pointers left over from a prior - * transaction. - */ - InvalMessageArrays[CatCacheMsgs].msgs = NULL; - InvalMessageArrays[CatCacheMsgs].maxmsgs = 0; - InvalMessageArrays[RelCacheMsgs].msgs = NULL; - InvalMessageArrays[RelCacheMsgs].maxmsgs = 0; - } - - transInvalInfo = myInfo; -} - -/* * PostPrepare_Inval * Clean up after successful PREPARE. * @@ -904,7 +967,7 @@ xactGetCommittedInvalidationMessages(SharedInvalidationMessage **msgs, * after we send the SI messages. However, we need not do anything unless * we committed. */ - *RelcacheInitFileInval = transInvalInfo->RelcacheInitFileInval; + *RelcacheInitFileInval = transInvalInfo->ii.RelcacheInitFileInval; /* * Collect all the pending messages into a single contiguous array of @@ -915,7 +978,7 @@ xactGetCommittedInvalidationMessages(SharedInvalidationMessage **msgs, * not new ones. */ nummsgs = NumMessagesInGroup(&transInvalInfo->PriorCmdInvalidMsgs) + - NumMessagesInGroup(&transInvalInfo->CurrentCmdInvalidMsgs); + NumMessagesInGroup(&transInvalInfo->ii.CurrentCmdInvalidMsgs); *msgs = msgarray = (SharedInvalidationMessage *) MemoryContextAlloc(CurTransactionContext, @@ -928,7 +991,7 @@ xactGetCommittedInvalidationMessages(SharedInvalidationMessage **msgs, msgs, n * sizeof(SharedInvalidationMessage)), nmsgs += n)); - ProcessMessageSubGroupMulti(&transInvalInfo->CurrentCmdInvalidMsgs, + ProcessMessageSubGroupMulti(&transInvalInfo->ii.CurrentCmdInvalidMsgs, CatCacheMsgs, (memcpy(msgarray + nmsgs, msgs, @@ -940,7 +1003,7 @@ xactGetCommittedInvalidationMessages(SharedInvalidationMessage **msgs, msgs, n * sizeof(SharedInvalidationMessage)), nmsgs += n)); - ProcessMessageSubGroupMulti(&transInvalInfo->CurrentCmdInvalidMsgs, + ProcessMessageSubGroupMulti(&transInvalInfo->ii.CurrentCmdInvalidMsgs, RelCacheMsgs, (memcpy(msgarray + nmsgs, msgs, @@ -1027,7 +1090,9 @@ ProcessCommittedInvalidationMessages(SharedInvalidationMessage *msgs, void AtEOXact_Inval(bool isCommit) { - /* Quick exit if no messages */ + inplaceInvalInfo = NULL; + + /* Quick exit if no transactional messages */ if (transInvalInfo == NULL) return; @@ -1041,16 +1106,16 @@ AtEOXact_Inval(bool isCommit) * after we send the SI messages. However, we need not do anything * unless we committed. */ - if (transInvalInfo->RelcacheInitFileInval) + if (transInvalInfo->ii.RelcacheInitFileInval) RelationCacheInitFilePreInvalidate(); AppendInvalidationMessages(&transInvalInfo->PriorCmdInvalidMsgs, - &transInvalInfo->CurrentCmdInvalidMsgs); + &transInvalInfo->ii.CurrentCmdInvalidMsgs); ProcessInvalidationMessagesMulti(&transInvalInfo->PriorCmdInvalidMsgs, SendSharedInvalidMessages); - if (transInvalInfo->RelcacheInitFileInval) + if (transInvalInfo->ii.RelcacheInitFileInval) RelationCacheInitFilePostInvalidate(); } else @@ -1064,6 +1129,56 @@ AtEOXact_Inval(bool isCommit) } /* + * PreInplace_Inval + * Process queued-up invalidation before inplace update critical section. + * + * Tasks belong here if they are safe even if the inplace update does not + * complete. Currently, this just unlinks a cache file, which can fail. The + * sum of this and AtInplace_Inval() mirrors AtEOXact_Inval(isCommit=true). + */ +void +PreInplace_Inval(void) +{ + Assert(CritSectionCount == 0); + + if (inplaceInvalInfo && inplaceInvalInfo->RelcacheInitFileInval) + RelationCacheInitFilePreInvalidate(); +} + +/* + * AtInplace_Inval + * Process queued-up invalidations after inplace update buffer mutation. + */ +void +AtInplace_Inval(void) +{ + Assert(CritSectionCount > 0); + + if (inplaceInvalInfo == NULL) + return; + + ProcessInvalidationMessagesMulti(&inplaceInvalInfo->CurrentCmdInvalidMsgs, + SendSharedInvalidMessages); + + if (inplaceInvalInfo->RelcacheInitFileInval) + RelationCacheInitFilePostInvalidate(); + + inplaceInvalInfo = NULL; +} + +/* + * ForgetInplace_Inval + * Alternative to PreInplace_Inval()+AtInplace_Inval(): discard queued-up + * invalidations. This lets inplace update enumerate invalidations + * optimistically, before locking the buffer. + */ +void +ForgetInplace_Inval(void) +{ + inplaceInvalInfo = NULL; +} + +/* * AtEOSubXact_Inval * Process queued-up invalidation messages at end of subtransaction. * @@ -1085,9 +1200,20 @@ void AtEOSubXact_Inval(bool isCommit) { int my_level; - TransInvalidationInfo *myInfo = transInvalInfo; + TransInvalidationInfo *myInfo; + + /* + * Successful inplace update must clear this, but we clear it on abort. + * Inplace updates allocate this in CurrentMemoryContext, which has + * lifespan <= subtransaction lifespan. Hence, don't free it explicitly. + */ + if (isCommit) + Assert(inplaceInvalInfo == NULL); + else + inplaceInvalInfo = NULL; - /* Quick exit if no messages. */ + /* Quick exit if no transactional messages. */ + myInfo = transInvalInfo; if (myInfo == NULL) return; @@ -1128,12 +1254,12 @@ AtEOSubXact_Inval(bool isCommit) &myInfo->PriorCmdInvalidMsgs); /* Must readjust parent's CurrentCmdInvalidMsgs indexes now */ - SetGroupToFollow(&myInfo->parent->CurrentCmdInvalidMsgs, + SetGroupToFollow(&myInfo->parent->ii.CurrentCmdInvalidMsgs, &myInfo->parent->PriorCmdInvalidMsgs); /* Pending relcache inval becomes parent's problem too */ - if (myInfo->RelcacheInitFileInval) - myInfo->parent->RelcacheInitFileInval = true; + if (myInfo->ii.RelcacheInitFileInval) + myInfo->parent->ii.RelcacheInitFileInval = true; /* Pop the transaction state stack */ transInvalInfo = myInfo->parent; @@ -1180,7 +1306,7 @@ CommandEndInvalidationMessages(void) if (transInvalInfo == NULL) return; - ProcessInvalidationMessages(&transInvalInfo->CurrentCmdInvalidMsgs, + ProcessInvalidationMessages(&transInvalInfo->ii.CurrentCmdInvalidMsgs, LocalExecuteInvalidationMessage); /* WAL Log per-command invalidation messages for wal_level=logical */ @@ -1188,30 +1314,28 @@ CommandEndInvalidationMessages(void) LogLogicalInvalidations(); AppendInvalidationMessages(&transInvalInfo->PriorCmdInvalidMsgs, - &transInvalInfo->CurrentCmdInvalidMsgs); + &transInvalInfo->ii.CurrentCmdInvalidMsgs); } /* - * CacheInvalidateHeapTuple - * Register the given tuple for invalidation at end of command - * (ie, current command is creating or outdating this tuple). - * Also, detect whether a relcache invalidation is implied. - * - * For an insert or delete, tuple is the target tuple and newtuple is NULL. - * For an update, we are called just once, with tuple being the old tuple - * version and newtuple the new version. This allows avoidance of duplicate - * effort during an update. + * CacheInvalidateHeapTupleCommon + * Common logic for end-of-command and inplace variants. */ -void -CacheInvalidateHeapTuple(Relation relation, - HeapTuple tuple, - HeapTuple newtuple) +static void +CacheInvalidateHeapTupleCommon(Relation relation, + HeapTuple tuple, + HeapTuple newtuple, + InvalidationInfo *(*prepare_callback) (void)) { + InvalidationInfo *info; Oid tupleRelId; Oid databaseId; Oid relationId; + /* PrepareToInvalidateCacheTuple() needs relcache */ + AssertCouldGetRelation(); + /* Do nothing during bootstrap */ if (IsBootstrapProcessingMode()) return; @@ -1231,11 +1355,8 @@ CacheInvalidateHeapTuple(Relation relation, if (IsToastRelation(relation)) return; - /* - * If we're not prepared to queue invalidation messages for this - * subtransaction level, get ready now. - */ - PrepareInvalidationState(); + /* Allocate any required resources. */ + info = prepare_callback(); /* * First let the catcache do its thing @@ -1244,11 +1365,12 @@ CacheInvalidateHeapTuple(Relation relation, if (RelationInvalidatesSnapshotsOnly(tupleRelId)) { databaseId = IsSharedRelation(tupleRelId) ? InvalidOid : MyDatabaseId; - RegisterSnapshotInvalidation(databaseId, tupleRelId); + RegisterSnapshotInvalidation(info, databaseId, tupleRelId); } else PrepareToInvalidateCacheTuple(relation, tuple, newtuple, - RegisterCatcacheInvalidation); + RegisterCatcacheInvalidation, + (void *) info); /* * Now, is this tuple one of the primary definers of a relcache entry? See @@ -1321,7 +1443,48 @@ CacheInvalidateHeapTuple(Relation relation, /* * Yes. We need to register a relcache invalidation event. */ - RegisterRelcacheInvalidation(databaseId, relationId); + RegisterRelcacheInvalidation(info, databaseId, relationId); +} + +/* + * CacheInvalidateHeapTuple + * Register the given tuple for invalidation at end of command + * (ie, current command is creating or outdating this tuple) and end of + * transaction. Also, detect whether a relcache invalidation is implied. + * + * For an insert or delete, tuple is the target tuple and newtuple is NULL. + * For an update, we are called just once, with tuple being the old tuple + * version and newtuple the new version. This allows avoidance of duplicate + * effort during an update. + */ +void +CacheInvalidateHeapTuple(Relation relation, + HeapTuple tuple, + HeapTuple newtuple) +{ + CacheInvalidateHeapTupleCommon(relation, tuple, newtuple, + PrepareInvalidationState); +} + +/* + * CacheInvalidateHeapTupleInplace + * Register the given tuple for nontransactional invalidation pertaining + * to an inplace update. Also, detect whether a relcache invalidation is + * implied. + * + * Like CacheInvalidateHeapTuple(), but for inplace updates. + * + * Just before and just after the inplace update, the tuple's cache keys must + * match those in key_equivalent_tuple. Cache keys consist of catcache lookup + * key columns and columns referencing pg_class.oid values, + * e.g. pg_constraint.conrelid, which would trigger relcache inval. + */ +void +CacheInvalidateHeapTupleInplace(Relation relation, + HeapTuple key_equivalent_tuple) +{ + CacheInvalidateHeapTupleCommon(relation, key_equivalent_tuple, NULL, + PrepareInplaceInvalidationState); } /* @@ -1340,14 +1503,13 @@ CacheInvalidateCatalog(Oid catalogId) { Oid databaseId; - PrepareInvalidationState(); - if (IsSharedRelation(catalogId)) databaseId = InvalidOid; else databaseId = MyDatabaseId; - RegisterCatalogInvalidation(databaseId, catalogId); + RegisterCatalogInvalidation(PrepareInvalidationState(), + databaseId, catalogId); } /* @@ -1365,15 +1527,14 @@ CacheInvalidateRelcache(Relation relation) Oid databaseId; Oid relationId; - PrepareInvalidationState(); - relationId = RelationGetRelid(relation); if (relation->rd_rel->relisshared) databaseId = InvalidOid; else databaseId = MyDatabaseId; - RegisterRelcacheInvalidation(databaseId, relationId); + RegisterRelcacheInvalidation(PrepareInvalidationState(), + databaseId, relationId); } /* @@ -1386,9 +1547,8 @@ CacheInvalidateRelcache(Relation relation) void CacheInvalidateRelcacheAll(void) { - PrepareInvalidationState(); - - RegisterRelcacheInvalidation(InvalidOid, InvalidOid); + RegisterRelcacheInvalidation(PrepareInvalidationState(), + InvalidOid, InvalidOid); } /* @@ -1402,14 +1562,13 @@ CacheInvalidateRelcacheByTuple(HeapTuple classTuple) Oid databaseId; Oid relationId; - PrepareInvalidationState(); - relationId = classtup->oid; if (classtup->relisshared) databaseId = InvalidOid; else databaseId = MyDatabaseId; - RegisterRelcacheInvalidation(databaseId, relationId); + RegisterRelcacheInvalidation(PrepareInvalidationState(), + databaseId, relationId); } /* @@ -1423,8 +1582,6 @@ CacheInvalidateRelcacheByRelid(Oid relid) { HeapTuple tup; - PrepareInvalidationState(); - tup = SearchSysCache1(RELOID, ObjectIdGetDatum(relid)); if (!HeapTupleIsValid(tup)) elog(ERROR, "cache lookup failed for relation %u", relid); @@ -1614,7 +1771,7 @@ LogLogicalInvalidations(void) if (transInvalInfo == NULL) return; - group = &transInvalInfo->CurrentCmdInvalidMsgs; + group = &transInvalInfo->ii.CurrentCmdInvalidMsgs; nmsgs = NumMessagesInGroup(group); if (nmsgs > 0) diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index 34c3cfa8e8c..3140f00ea44 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -2028,6 +2028,23 @@ formrdesc(const char *relationName, Oid relationReltype, relation->rd_isvalid = true; } +#ifdef USE_ASSERT_CHECKING +/* + * AssertCouldGetRelation + * + * Check safety of calling RelationIdGetRelation(). + * + * In code that reads catalogs in the event of a cache miss, call this + * before checking the cache. + */ +void +AssertCouldGetRelation(void) +{ + Assert(IsTransactionState()); + AssertBufferLocksPermitCatalogRead(); +} +#endif + /* ---------------------------------------------------------------- * Relation Descriptor Lookup Interface @@ -2055,8 +2072,7 @@ RelationIdGetRelation(Oid relationId) { Relation rd; - /* Make sure we're in an xact, even if this ends up being a cache hit */ - Assert(IsTransactionState()); + AssertCouldGetRelation(); /* * first try to find reldesc in the cache diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c index 40517895d82..d55277e90e9 100644 --- a/src/backend/utils/cache/syscache.c +++ b/src/backend/utils/cache/syscache.c @@ -956,8 +956,7 @@ SearchSysCacheLocked1(int cacheId, /* * If an inplace update just finished, ensure we process the syscache - * inval. XXX this is insufficient: the inplace updater may not yet - * have reached AtEOXact_Inval(). See test at inplace-inval.spec. + * inval. * * If a heap_update() call just released its LOCKTAG_TUPLE, we'll * probably find the old tuple and reach "tuple concurrently updated". diff --git a/src/backend/utils/mb/mbutils.c b/src/backend/utils/mb/mbutils.c index 1ccdf9da5b0..33ad7ae7774 100644 --- a/src/backend/utils/mb/mbutils.c +++ b/src/backend/utils/mb/mbutils.c @@ -39,6 +39,7 @@ #include "mb/pg_wchar.h" #include "utils/builtins.h" #include "utils/memutils.h" +#include "utils/relcache.h" #include "utils/syscache.h" #include "varatt.h" @@ -311,7 +312,7 @@ InitializeClientEncoding(void) { Oid utf8_to_server_proc; - Assert(IsTransactionState()); + AssertCouldGetRelation(); utf8_to_server_proc = FindDefaultConversionProc(PG_UTF8, current_server_encoding); diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h index b379c76e273..166524c50e0 100644 --- a/src/include/storage/bufmgr.h +++ b/src/include/storage/bufmgr.h @@ -205,6 +205,9 @@ extern Buffer ExtendBufferedRelTo(BufferManagerRelation bmr, extern void InitBufferPoolAccess(void); extern void AtEOXact_Buffers(bool isCommit); +#ifdef USE_ASSERT_CHECKING +extern void AssertBufferLocksPermitCatalogRead(void); +#endif extern void PrintBufferLeakWarning(Buffer buffer); extern void CheckPointBuffers(int flags); extern BlockNumber BufferGetBlockNumber(Buffer buffer); diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h index 34169e5889e..cab38447d48 100644 --- a/src/include/storage/lwlock.h +++ b/src/include/storage/lwlock.h @@ -131,6 +131,8 @@ extern bool LWLockAcquireOrWait(LWLock *lock, LWLockMode mode); extern void LWLockRelease(LWLock *lock); extern void LWLockReleaseClearVar(LWLock *lock, uint64 *valptr, uint64 val); extern void LWLockReleaseAll(void); +extern void ForEachLWLockHeldByMe(void (*callback) (LWLock *, LWLockMode, void *), + void *context); extern bool LWLockHeldByMe(LWLock *lock); extern bool LWLockAnyHeldByMe(LWLock *lock, int nlocks, size_t stride); extern bool LWLockHeldByMeInMode(LWLock *lock, LWLockMode mode); diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h index ea3ca9c48a6..e16ab4a2e58 100644 --- a/src/include/storage/proc.h +++ b/src/include/storage/proc.h @@ -109,10 +109,10 @@ struct XidCache * is inserted prior to the new redo point, the corresponding data changes will * also be flushed to disk before the checkpoint can complete. (In the * extremely common case where the data being modified is in shared buffers - * and we acquire an exclusive content lock on the relevant buffers before - * writing WAL, this mechanism is not needed, because phase 2 will block - * until we release the content lock and then flush the modified data to - * disk.) + * and we acquire an exclusive content lock and MarkBufferDirty() on the + * relevant buffers before writing WAL, this mechanism is not needed, because + * phase 2 will block until we release the content lock and then flush the + * modified data to disk. See transam/README and SyncOneBuffer().) * * Setting DELAY_CHKPT_COMPLETE prevents the system from moving from phase 2 * to phase 3. This is useful if we are performing a WAL-logged operation that diff --git a/src/include/utils/catcache.h b/src/include/utils/catcache.h index 84ca10704bc..e12bd608649 100644 --- a/src/include/utils/catcache.h +++ b/src/include/utils/catcache.h @@ -229,7 +229,8 @@ extern void CatCacheInvalidate(CatCache *cache, uint32 hashValue); extern void PrepareToInvalidateCacheTuple(Relation relation, HeapTuple tuple, HeapTuple newtuple, - void (*function) (int, uint32, Oid)); + void (*function) (int, uint32, Oid, void *), + void *context); extern void PrintCatCacheLeakWarning(HeapTuple tuple); extern void PrintCatCacheListLeakWarning(CatCList *list); diff --git a/src/include/utils/inval.h b/src/include/utils/inval.h index 14b4eac0630..ae4eb65bc59 100644 --- a/src/include/utils/inval.h +++ b/src/include/utils/inval.h @@ -28,6 +28,10 @@ extern void AcceptInvalidationMessages(void); extern void AtEOXact_Inval(bool isCommit); +extern void PreInplace_Inval(void); +extern void AtInplace_Inval(void); +extern void ForgetInplace_Inval(void); + extern void AtEOSubXact_Inval(bool isCommit); extern void PostPrepare_Inval(void); @@ -37,6 +41,8 @@ extern void CommandEndInvalidationMessages(void); extern void CacheInvalidateHeapTuple(Relation relation, HeapTuple tuple, HeapTuple newtuple); +extern void CacheInvalidateHeapTupleInplace(Relation relation, + HeapTuple key_equivalent_tuple); extern void CacheInvalidateCatalog(Oid catalogId); diff --git a/src/include/utils/relcache.h b/src/include/utils/relcache.h index 38524641f47..10a91c986d1 100644 --- a/src/include/utils/relcache.h +++ b/src/include/utils/relcache.h @@ -37,6 +37,14 @@ typedef Relation *RelationPtr; /* * Routines to open (lookup) and close a relcache entry */ +#ifdef USE_ASSERT_CHECKING +extern void AssertCouldGetRelation(void); +#else +static inline void +AssertCouldGetRelation(void) +{ +} +#endif extern Relation RelationIdGetRelation(Oid relationId); extern void RelationClose(Relation relation); diff --git a/src/test/isolation/expected/inplace-inval.out b/src/test/isolation/expected/inplace-inval.out index e68eca5de98..c35895a8aa7 100644 --- a/src/test/isolation/expected/inplace-inval.out +++ b/src/test/isolation/expected/inplace-inval.out @@ -1,6 +1,6 @@ Parsed test spec with 3 sessions -starting permutation: cachefill3 cir1 cic2 ddl3 +starting permutation: cachefill3 cir1 cic2 ddl3 read1 step cachefill3: TABLE newly_indexed; c - @@ -9,6 +9,14 @@ c step cir1: BEGIN; CREATE INDEX i1 ON newly_indexed (c); ROLLBACK; step cic2: CREATE INDEX i2 ON newly_indexed (c); step ddl3: ALTER TABLE newly_indexed ADD extra int; +step read1: + SELECT relhasindex FROM pg_class WHERE oid = 'newly_indexed'::regclass; + +relhasindex +----------- +t +(1 row) + starting permutation: cir1 cic2 ddl3 read1 step cir1: BEGIN; CREATE INDEX i1 ON newly_indexed (c); ROLLBACK; diff --git a/src/test/isolation/specs/inplace-inval.spec b/src/test/isolation/specs/inplace-inval.spec index 96954fd86c4..b99112ddb88 100644 --- a/src/test/isolation/specs/inplace-inval.spec +++ b/src/test/isolation/specs/inplace-inval.spec @@ -1,7 +1,7 @@ -# If a heap_update() caller retrieves its oldtup from a cache, it's possible -# for that cache entry to predate an inplace update, causing loss of that -# inplace update. This arises because the transaction may abort before -# sending the inplace invalidation message to the shared queue. +# An inplace update had been able to abort before sending the inplace +# invalidation message to the shared queue. If a heap_update() caller then +# retrieved its oldtup from a cache, the heap_update() could revert the +# inplace update. setup { @@ -27,14 +27,12 @@ step cachefill3 { TABLE newly_indexed; } step ddl3 { ALTER TABLE newly_indexed ADD extra int; } -# XXX shows an extant bug. Adding step read1 at the end would usually print -# relhasindex=f (not wanted). This does not reach the unwanted behavior under -# -DCATCACHE_FORCE_RELEASE and friends. permutation cachefill3 # populates the pg_class row in the catcache cir1 # sets relhasindex=true; rollback discards cache inval cic2 # sees relhasindex=true, skips changing it (so no inval) ddl3 # cached row as the oldtup of an update, losing relhasindex + read1 # observe damage # without cachefill3, no bug permutation cir1 cic2 ddl3 read1 diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index a51888802ae..c9fd3d95da1 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -1206,6 +1206,7 @@ InternalGrant Interval IntoClause InvalMessageArray +InvalidationInfo InvalidationMsgsGroup IpcMemoryId IpcMemoryKey |
