From: Andres Freund Date: Sun, 20 Jan 2019 22:13:41 +0000 (-0800) Subject: tableam: Introduce and use begin/endscan and do index lookups via AM. X-Git-Url: http://git.postgresql.org/gitweb/static/gitweb.js?a=commitdiff_plain;h=143d9438d7f633d7e2ba608f3d4dff1c2f38f77e;p=users%2Fandresfreund%2Fpostgres.git tableam: Introduce and use begin/endscan and do index lookups via AM. --- diff --git a/contrib/amcheck/verify_nbtree.c b/contrib/amcheck/verify_nbtree.c index 964200a767..4268f8506f 100644 --- a/contrib/amcheck/verify_nbtree.c +++ b/contrib/amcheck/verify_nbtree.c @@ -26,6 +26,7 @@ #include "access/heapam.h" #include "access/htup_details.h" #include "access/nbtree.h" +#include "access/tableam.h" #include "access/transam.h" #include "access/xact.h" #include "catalog/index.h" @@ -481,7 +482,7 @@ bt_check_every_level(Relation rel, Relation heaprel, bool readonly, if (state->heapallindexed) { IndexInfo *indexinfo = BuildIndexInfo(state->rel); - HeapScanDesc scan; + TableScanDesc scan; /* Report on extra downlink checks performed in readonly case */ if (state->readonly) @@ -500,7 +501,7 @@ bt_check_every_level(Relation rel, Relation heaprel, bool readonly, * * Note that IndexBuildHeapScan() calls heap_endscan() for us. */ - scan = heap_beginscan_strat(state->heaprel, /* relation */ + scan = table_beginscan_strat(state->heaprel, /* relation */ snapshot, /* snapshot */ 0, /* number of keys */ NULL, /* scan key */ diff --git a/contrib/pgrowlocks/pgrowlocks.c b/contrib/pgrowlocks/pgrowlocks.c index df2ad7f2c9..82b60d08cf 100644 --- a/contrib/pgrowlocks/pgrowlocks.c +++ b/contrib/pgrowlocks/pgrowlocks.c @@ -27,8 +27,10 @@ #include "access/heapam.h" #include "access/multixact.h" #include "access/relscan.h" +#include "access/tableam.h" #include "access/xact.h" #include "catalog/namespace.h" +#include "catalog/pg_am_d.h" #include "catalog/pg_authid.h" #include "funcapi.h" #include "miscadmin.h" @@ -55,7 +57,7 @@ PG_FUNCTION_INFO_V1(pgrowlocks); typedef struct { Relation rel; - HeapScanDesc scan; + TableScanDesc scan; int ncolumns; } MyData; @@ -70,7 +72,8 @@ Datum pgrowlocks(PG_FUNCTION_ARGS) { FuncCallContext *funcctx; - HeapScanDesc scan; + TableScanDesc scan; + HeapScanDesc hscan; HeapTuple tuple; TupleDesc tupdesc; AttInMetadata *attinmeta; @@ -99,6 +102,10 @@ pgrowlocks(PG_FUNCTION_ARGS) relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname)); rel = relation_openrv(relrv, AccessShareLock); + if (rel->rd_rel->relam != HEAP_TABLE_AM_OID) + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("only heap AM is supported"))); + if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), @@ -124,7 +131,8 @@ pgrowlocks(PG_FUNCTION_ARGS) aclcheck_error(aclresult, get_relkind_objtype(rel->rd_rel->relkind), RelationGetRelationName(rel)); - scan = heap_beginscan(rel, GetActiveSnapshot(), 0, NULL); + scan = table_beginscan(rel, GetActiveSnapshot(), 0, NULL); + hscan = (HeapScanDesc) scan; mydata = palloc(sizeof(*mydata)); mydata->rel = rel; mydata->scan = scan; @@ -138,6 +146,7 @@ pgrowlocks(PG_FUNCTION_ARGS) attinmeta = funcctx->attinmeta; mydata = (MyData *) funcctx->user_fctx; scan = mydata->scan; + hscan = (HeapScanDesc) scan; /* scan the relation */ while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) @@ -147,11 +156,11 @@ pgrowlocks(PG_FUNCTION_ARGS) uint16 infomask; /* must hold a buffer lock to call HeapTupleSatisfiesUpdate */ - LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE); + LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE); htsu = HeapTupleSatisfiesUpdate(tuple, GetCurrentCommandId(false), - scan->rs_cbuf); + hscan->rs_cbuf); xmax = HeapTupleHeaderGetRawXmax(tuple->t_data); infomask = tuple->t_data->t_infomask; @@ -284,7 +293,7 @@ pgrowlocks(PG_FUNCTION_ARGS) BackendXidGetPid(xmax)); } - LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK); + LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK); /* build a tuple */ tuple = BuildTupleFromCStrings(attinmeta, values); @@ -301,11 +310,11 @@ pgrowlocks(PG_FUNCTION_ARGS) } else { - LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK); + LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK); } } - heap_endscan(scan); + table_endscan(scan); table_close(mydata->rel, AccessShareLock); SRF_RETURN_DONE(funcctx); diff --git a/contrib/pgstattuple/pgstattuple.c b/contrib/pgstattuple/pgstattuple.c index 2ac9863463..9bcb640884 100644 --- a/contrib/pgstattuple/pgstattuple.c +++ b/contrib/pgstattuple/pgstattuple.c @@ -29,6 +29,7 @@ #include "access/heapam.h" #include "access/nbtree.h" #include "access/relscan.h" +#include "access/tableam.h" #include "catalog/namespace.h" #include "catalog/pg_am.h" #include "funcapi.h" @@ -317,7 +318,8 @@ pgstat_relation(Relation rel, FunctionCallInfo fcinfo) static Datum pgstat_heap(Relation rel, FunctionCallInfo fcinfo) { - HeapScanDesc scan; + TableScanDesc scan; + HeapScanDesc hscan; HeapTuple tuple; BlockNumber nblocks; BlockNumber block = 0; /* next block to count free space in */ @@ -327,20 +329,22 @@ pgstat_heap(Relation rel, FunctionCallInfo fcinfo) SnapshotData SnapshotDirty; /* Disable syncscan because we assume we scan from block zero upwards */ - scan = heap_beginscan_strat(rel, SnapshotAny, 0, NULL, true, false); + scan = table_beginscan_strat(rel, SnapshotAny, 0, NULL, true, false); + hscan = (HeapScanDesc) scan; + InitDirtySnapshot(SnapshotDirty); - nblocks = scan->rs_nblocks; /* # blocks to be scanned */ + nblocks = hscan->rs_nblocks; /* # blocks to be scanned */ - /* scan the relation */ + /* scan the relation (will error if not heap) */ while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) { CHECK_FOR_INTERRUPTS(); /* must hold a buffer lock to call HeapTupleSatisfiesVisibility */ - LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE); + LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE); - if (HeapTupleSatisfiesVisibility(tuple, &SnapshotDirty, scan->rs_cbuf)) + if (HeapTupleSatisfiesVisibility(tuple, &SnapshotDirty, hscan->rs_cbuf)) { stat.tuple_len += tuple->t_len; stat.tuple_count++; @@ -351,7 +355,7 @@ pgstat_heap(Relation rel, FunctionCallInfo fcinfo) stat.dead_tuple_count++; } - LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK); + LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK); /* * To avoid physically reading the table twice, try to do the @@ -366,7 +370,7 @@ pgstat_heap(Relation rel, FunctionCallInfo fcinfo) CHECK_FOR_INTERRUPTS(); buffer = ReadBufferExtended(rel, MAIN_FORKNUM, block, - RBM_NORMAL, scan->rs_strategy); + RBM_NORMAL, hscan->rs_strategy); LockBuffer(buffer, BUFFER_LOCK_SHARE); stat.free_space += PageGetHeapFreeSpace((Page) BufferGetPage(buffer)); UnlockReleaseBuffer(buffer); @@ -379,14 +383,14 @@ pgstat_heap(Relation rel, FunctionCallInfo fcinfo) CHECK_FOR_INTERRUPTS(); buffer = ReadBufferExtended(rel, MAIN_FORKNUM, block, - RBM_NORMAL, scan->rs_strategy); + RBM_NORMAL, hscan->rs_strategy); LockBuffer(buffer, BUFFER_LOCK_SHARE); stat.free_space += PageGetHeapFreeSpace((Page) BufferGetPage(buffer)); UnlockReleaseBuffer(buffer); block++; } - heap_endscan(scan); + table_endscan(scan); relation_close(rel, AccessShareLock); stat.table_len = (uint64) nblocks * BLCKSZ; diff --git a/contrib/tsm_system_rows/tsm_system_rows.c b/contrib/tsm_system_rows/tsm_system_rows.c index c92490f938..1d35ea3c53 100644 --- a/contrib/tsm_system_rows/tsm_system_rows.c +++ b/contrib/tsm_system_rows/tsm_system_rows.c @@ -209,7 +209,8 @@ static BlockNumber system_rows_nextsampleblock(SampleScanState *node) { SystemRowsSamplerData *sampler = (SystemRowsSamplerData *) node->tsm_state; - HeapScanDesc scan = node->ss.ss_currentScanDesc; + TableScanDesc scan = node->ss.ss_currentScanDesc; + HeapScanDesc hscan = (HeapScanDesc) scan; /* First call within scan? */ if (sampler->doneblocks == 0) @@ -221,14 +222,14 @@ system_rows_nextsampleblock(SampleScanState *node) SamplerRandomState randstate; /* If relation is empty, there's nothing to scan */ - if (scan->rs_nblocks == 0) + if (hscan->rs_nblocks == 0) return InvalidBlockNumber; /* We only need an RNG during this setup step */ sampler_random_init_state(sampler->seed, randstate); /* Compute nblocks/firstblock/step only once per query */ - sampler->nblocks = scan->rs_nblocks; + sampler->nblocks = hscan->rs_nblocks; /* Choose random starting block within the relation */ /* (Actually this is the predecessor of the first block visited) */ @@ -258,7 +259,7 @@ system_rows_nextsampleblock(SampleScanState *node) { /* Advance lb, using uint64 arithmetic to forestall overflow */ sampler->lb = ((uint64) sampler->lb + sampler->step) % sampler->nblocks; - } while (sampler->lb >= scan->rs_nblocks); + } while (sampler->lb >= hscan->rs_nblocks); return sampler->lb; } @@ -278,7 +279,8 @@ system_rows_nextsampletuple(SampleScanState *node, OffsetNumber maxoffset) { SystemRowsSamplerData *sampler = (SystemRowsSamplerData *) node->tsm_state; - HeapScanDesc scan = node->ss.ss_currentScanDesc; + TableScanDesc scan = node->ss.ss_currentScanDesc; + HeapScanDesc hscan = (HeapScanDesc) scan; OffsetNumber tupoffset = sampler->lt; /* Quit if we've returned all needed tuples */ @@ -308,7 +310,7 @@ system_rows_nextsampletuple(SampleScanState *node, } /* Found a candidate? */ - if (SampleOffsetVisible(tupoffset, scan)) + if (SampleOffsetVisible(tupoffset, hscan)) { sampler->donetuples++; break; diff --git a/contrib/tsm_system_time/tsm_system_time.c b/contrib/tsm_system_time/tsm_system_time.c index edeacf0b53..1cc7264e08 100644 --- a/contrib/tsm_system_time/tsm_system_time.c +++ b/contrib/tsm_system_time/tsm_system_time.c @@ -216,7 +216,8 @@ static BlockNumber system_time_nextsampleblock(SampleScanState *node) { SystemTimeSamplerData *sampler = (SystemTimeSamplerData *) node->tsm_state; - HeapScanDesc scan = node->ss.ss_currentScanDesc; + TableScanDesc scan = node->ss.ss_currentScanDesc; + HeapScanDesc hscan = (HeapScanDesc) scan; instr_time cur_time; /* First call within scan? */ @@ -229,14 +230,14 @@ system_time_nextsampleblock(SampleScanState *node) SamplerRandomState randstate; /* If relation is empty, there's nothing to scan */ - if (scan->rs_nblocks == 0) + if (hscan->rs_nblocks == 0) return InvalidBlockNumber; /* We only need an RNG during this setup step */ sampler_random_init_state(sampler->seed, randstate); /* Compute nblocks/firstblock/step only once per query */ - sampler->nblocks = scan->rs_nblocks; + sampler->nblocks = hscan->rs_nblocks; /* Choose random starting block within the relation */ /* (Actually this is the predecessor of the first block visited) */ @@ -272,7 +273,7 @@ system_time_nextsampleblock(SampleScanState *node) { /* Advance lb, using uint64 arithmetic to forestall overflow */ sampler->lb = ((uint64) sampler->lb + sampler->step) % sampler->nblocks; - } while (sampler->lb >= scan->rs_nblocks); + } while (sampler->lb >= hscan->rs_nblocks); return sampler->lb; } diff --git a/src/backend/access/gist/gistget.c b/src/backend/access/gist/gistget.c index a96ef5c3ac..b54d599162 100644 --- a/src/backend/access/gist/gistget.c +++ b/src/backend/access/gist/gistget.c @@ -561,7 +561,7 @@ getNextNearest(IndexScanDesc scan) if (GISTSearchItemIsHeap(*item)) { /* found a heap item at currently minimal distance */ - scan->xs_ctup.t_self = item->data.heap.heapPtr; + scan->xs_heaptid = item->data.heap.heapPtr; scan->xs_recheck = item->data.heap.recheck; index_store_float8_orderby_distances(scan, so->orderByTypes, @@ -650,7 +650,7 @@ gistgettuple(IndexScanDesc scan, ScanDirection dir) so->pageData[so->curPageData - 1].offnum; } /* continuing to return tuples from a leaf page */ - scan->xs_ctup.t_self = so->pageData[so->curPageData].heapPtr; + scan->xs_heaptid = so->pageData[so->curPageData].heapPtr; scan->xs_recheck = so->pageData[so->curPageData].recheck; /* in an index-only scan, also return the reconstructed tuple */ diff --git a/src/backend/access/hash/hashsearch.c b/src/backend/access/hash/hashsearch.c index ccd3fdceac..61c90e6bb7 100644 --- a/src/backend/access/hash/hashsearch.c +++ b/src/backend/access/hash/hashsearch.c @@ -119,7 +119,7 @@ _hash_next(IndexScanDesc scan, ScanDirection dir) /* OK, itemIndex says what to return */ currItem = &so->currPos.items[so->currPos.itemIndex]; - scan->xs_ctup.t_self = currItem->heapTid; + scan->xs_heaptid = currItem->heapTid; return true; } @@ -432,7 +432,7 @@ _hash_first(IndexScanDesc scan, ScanDirection dir) /* OK, itemIndex says what to return */ currItem = &so->currPos.items[so->currPos.itemIndex]; - scan->xs_ctup.t_self = currItem->heapTid; + scan->xs_heaptid = currItem->heapTid; /* if we're here, _hash_readpage found a valid tuples */ return true; diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index dc3499349b..a8a8511b2d 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -40,6 +40,7 @@ #include "access/multixact.h" #include "access/parallel.h" #include "access/relscan.h" +#include "access/tableam.h" #include "access/sysattr.h" #include "access/transam.h" #include "access/tuptoaster.h" @@ -68,22 +69,6 @@ #include "utils/snapmgr.h" -/* GUC variable */ -bool synchronize_seqscans = true; - - -static HeapScanDesc heap_beginscan_internal(Relation relation, - Snapshot snapshot, - int nkeys, ScanKey key, - ParallelHeapScanDesc parallel_scan, - bool allow_strat, - bool allow_sync, - bool allow_pagemode, - bool is_bitmapscan, - bool is_samplescan, - bool temp_snap); -static void heap_parallelscan_startblock_init(HeapScanDesc scan); -static BlockNumber heap_parallelscan_nextpage(HeapScanDesc scan); static HeapTuple heap_prepare_insert(Relation relation, HeapTuple tup, TransactionId xid, CommandId cid, int options); static XLogRecPtr log_heap_update(Relation reln, Buffer oldbuf, @@ -207,6 +192,7 @@ static const int MultiXactStatusLock[MaxMultiXactStatus + 1] = static void initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock) { + ParallelBlockTableScanDesc bpscan = NULL; bool allow_strat; bool allow_sync; @@ -221,10 +207,13 @@ initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock) * results for a non-MVCC snapshot, the caller must hold some higher-level * lock that ensures the interesting tuple(s) won't change.) */ - if (scan->rs_parallel != NULL) - scan->rs_nblocks = scan->rs_parallel->phs_nblocks; + if (scan->rs_scan.rs_parallel != NULL) + { + bpscan = (ParallelBlockTableScanDesc) scan->rs_scan.rs_parallel; + scan->rs_nblocks = bpscan->phs_nblocks; + } else - scan->rs_nblocks = RelationGetNumberOfBlocks(scan->rs_rd); + scan->rs_nblocks = RelationGetNumberOfBlocks(scan->rs_scan.rs_rd); /* * If the table is large relative to NBuffers, use a bulk-read access @@ -238,11 +227,11 @@ initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock) * Note that heap_parallelscan_initialize has a very similar test; if you * change this, consider changing that one, too. */ - if (!RelationUsesLocalBuffers(scan->rs_rd) && + if (!RelationUsesLocalBuffers(scan->rs_scan.rs_rd) && scan->rs_nblocks > NBuffers / 4) { - allow_strat = scan->rs_allow_strat; - allow_sync = scan->rs_allow_sync; + allow_strat = scan->rs_scan.rs_allow_strat; + allow_sync = scan->rs_scan.rs_allow_sync; } else allow_strat = allow_sync = false; @@ -260,10 +249,10 @@ initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock) scan->rs_strategy = NULL; } - if (scan->rs_parallel != NULL) + if (scan->rs_scan.rs_parallel != NULL) { - /* For parallel scan, believe whatever ParallelHeapScanDesc says. */ - scan->rs_syncscan = scan->rs_parallel->phs_syncscan; + /* For parallel scan, believe whatever ParallelTableScanDesc says. */ + scan->rs_scan.rs_syncscan = scan->rs_scan.rs_parallel->phs_syncscan; } else if (keep_startblock) { @@ -272,16 +261,16 @@ initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock) * so that rewinding a cursor doesn't generate surprising results. * Reset the active syncscan setting, though. */ - scan->rs_syncscan = (allow_sync && synchronize_seqscans); + scan->rs_scan.rs_syncscan = (allow_sync && synchronize_seqscans); } else if (allow_sync && synchronize_seqscans) { - scan->rs_syncscan = true; - scan->rs_startblock = ss_get_location(scan->rs_rd, scan->rs_nblocks); + scan->rs_scan.rs_syncscan = true; + scan->rs_startblock = ss_get_location(scan->rs_scan.rs_rd, scan->rs_nblocks); } else { - scan->rs_syncscan = false; + scan->rs_scan.rs_syncscan = false; scan->rs_startblock = 0; } @@ -298,15 +287,15 @@ initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock) * copy the scan key, if appropriate */ if (key != NULL) - memcpy(scan->rs_key, key, scan->rs_nkeys * sizeof(ScanKeyData)); + memcpy(scan->rs_scan.rs_key, key, scan->rs_scan.rs_nkeys * sizeof(ScanKeyData)); /* * Currently, we don't have a stats counter for bitmap heap scans (but the * underlying bitmap index scans will be counted) or sample scans (we only * update stats for tuple fetches there) */ - if (!scan->rs_bitmapscan && !scan->rs_samplescan) - pgstat_count_heap_scan(scan->rs_rd); + if (!scan->rs_scan.rs_bitmapscan && !scan->rs_scan.rs_samplescan) + pgstat_count_heap_scan(scan->rs_scan.rs_rd); } /* @@ -316,10 +305,12 @@ initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock) * numBlks is number of pages to scan (InvalidBlockNumber means "all") */ void -heap_setscanlimits(HeapScanDesc scan, BlockNumber startBlk, BlockNumber numBlks) +heap_setscanlimits(TableScanDesc sscan, BlockNumber startBlk, BlockNumber numBlks) { + HeapScanDesc scan = (HeapScanDesc) sscan; + Assert(!scan->rs_inited); /* else too late to change */ - Assert(!scan->rs_syncscan); /* else rs_startblock is significant */ + Assert(!scan->rs_scan.rs_syncscan); /* else rs_startblock is significant */ /* Check startBlk is valid (but allow case of zero blocks...) */ Assert(startBlk == 0 || startBlk < scan->rs_nblocks); @@ -336,8 +327,9 @@ heap_setscanlimits(HeapScanDesc scan, BlockNumber startBlk, BlockNumber numBlks) * which tuples on the page are visible. */ void -heapgetpage(HeapScanDesc scan, BlockNumber page) +heapgetpage(TableScanDesc sscan, BlockNumber page) { + HeapScanDesc scan = (HeapScanDesc) sscan; Buffer buffer; Snapshot snapshot; Page dp; @@ -364,20 +356,20 @@ heapgetpage(HeapScanDesc scan, BlockNumber page) CHECK_FOR_INTERRUPTS(); /* read page using selected strategy */ - scan->rs_cbuf = ReadBufferExtended(scan->rs_rd, MAIN_FORKNUM, page, + scan->rs_cbuf = ReadBufferExtended(scan->rs_scan.rs_rd, MAIN_FORKNUM, page, RBM_NORMAL, scan->rs_strategy); scan->rs_cblock = page; - if (!scan->rs_pageatatime) + if (!scan->rs_scan.rs_pageatatime) return; buffer = scan->rs_cbuf; - snapshot = scan->rs_snapshot; + snapshot = scan->rs_scan.rs_snapshot; /* * Prune and repair fragmentation for the whole page, if possible. */ - heap_page_prune_opt(scan->rs_rd, buffer); + heap_page_prune_opt(scan->rs_scan.rs_rd, buffer); /* * We must hold share lock on the buffer content while examining tuple @@ -387,7 +379,7 @@ heapgetpage(HeapScanDesc scan, BlockNumber page) LockBuffer(buffer, BUFFER_LOCK_SHARE); dp = BufferGetPage(buffer); - TestForOldSnapshot(snapshot, scan->rs_rd, dp); + TestForOldSnapshot(snapshot, scan->rs_scan.rs_rd, dp); lines = PageGetMaxOffsetNumber(dp); ntup = 0; @@ -422,7 +414,7 @@ heapgetpage(HeapScanDesc scan, BlockNumber page) HeapTupleData loctup; bool valid; - loctup.t_tableOid = RelationGetRelid(scan->rs_rd); + loctup.t_tableOid = RelationGetRelid(scan->rs_scan.rs_rd); loctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lpp); loctup.t_len = ItemIdGetLength(lpp); ItemPointerSet(&(loctup.t_self), page, lineoff); @@ -432,7 +424,7 @@ heapgetpage(HeapScanDesc scan, BlockNumber page) else valid = HeapTupleSatisfiesVisibility(&loctup, snapshot, buffer); - CheckForSerializableConflictOut(valid, scan->rs_rd, &loctup, + CheckForSerializableConflictOut(valid, scan->rs_scan.rs_rd, &loctup, buffer, snapshot); if (valid) @@ -476,7 +468,7 @@ heapgettup(HeapScanDesc scan, ScanKey key) { HeapTuple tuple = &(scan->rs_ctup); - Snapshot snapshot = scan->rs_snapshot; + Snapshot snapshot = scan->rs_scan.rs_snapshot; bool backward = ScanDirectionIsBackward(dir); BlockNumber page; bool finished; @@ -502,11 +494,14 @@ heapgettup(HeapScanDesc scan, tuple->t_data = NULL; return; } - if (scan->rs_parallel != NULL) + if (scan->rs_scan.rs_parallel != NULL) { - heap_parallelscan_startblock_init(scan); + ParallelBlockTableScanDesc pbscan = + (ParallelBlockTableScanDesc) scan->rs_scan.rs_parallel; + + table_block_parallelscan_startblock_init(scan->rs_scan.rs_rd, pbscan); - page = heap_parallelscan_nextpage(scan); + page = table_block_parallelscan_nextpage(scan->rs_scan.rs_rd, pbscan); /* Other processes might have already finished the scan. */ if (page == InvalidBlockNumber) @@ -518,7 +513,7 @@ heapgettup(HeapScanDesc scan, } else page = scan->rs_startblock; /* first page */ - heapgetpage(scan, page); + heapgetpage((TableScanDesc) scan, page); lineoff = FirstOffsetNumber; /* first offnum */ scan->rs_inited = true; } @@ -533,7 +528,7 @@ heapgettup(HeapScanDesc scan, LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE); dp = BufferGetPage(scan->rs_cbuf); - TestForOldSnapshot(snapshot, scan->rs_rd, dp); + TestForOldSnapshot(snapshot, scan->rs_scan.rs_rd, dp); lines = PageGetMaxOffsetNumber(dp); /* page and lineoff now reference the physically next tid */ @@ -542,7 +537,7 @@ heapgettup(HeapScanDesc scan, else if (backward) { /* backward parallel scan not supported */ - Assert(scan->rs_parallel == NULL); + Assert(scan->rs_scan.rs_parallel == NULL); if (!scan->rs_inited) { @@ -562,13 +557,13 @@ heapgettup(HeapScanDesc scan, * time, and much more likely that we'll just bollix things for * forward scanners. */ - scan->rs_syncscan = false; + scan->rs_scan.rs_syncscan = false; /* start from last page of the scan */ if (scan->rs_startblock > 0) page = scan->rs_startblock - 1; else page = scan->rs_nblocks - 1; - heapgetpage(scan, page); + heapgetpage((TableScanDesc) scan, page); } else { @@ -579,7 +574,7 @@ heapgettup(HeapScanDesc scan, LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE); dp = BufferGetPage(scan->rs_cbuf); - TestForOldSnapshot(snapshot, scan->rs_rd, dp); + TestForOldSnapshot(snapshot, scan->rs_scan.rs_rd, dp); lines = PageGetMaxOffsetNumber(dp); if (!scan->rs_inited) @@ -610,11 +605,11 @@ heapgettup(HeapScanDesc scan, page = ItemPointerGetBlockNumber(&(tuple->t_self)); if (page != scan->rs_cblock) - heapgetpage(scan, page); + heapgetpage((TableScanDesc) scan, page); /* Since the tuple was previously fetched, needn't lock page here */ dp = BufferGetPage(scan->rs_cbuf); - TestForOldSnapshot(snapshot, scan->rs_rd, dp); + TestForOldSnapshot(snapshot, scan->rs_scan.rs_rd, dp); lineoff = ItemPointerGetOffsetNumber(&(tuple->t_self)); lpp = PageGetItemId(dp, lineoff); Assert(ItemIdIsNormal(lpp)); @@ -649,11 +644,11 @@ heapgettup(HeapScanDesc scan, snapshot, scan->rs_cbuf); - CheckForSerializableConflictOut(valid, scan->rs_rd, tuple, + CheckForSerializableConflictOut(valid, scan->rs_scan.rs_rd, tuple, scan->rs_cbuf, snapshot); if (valid && key != NULL) - HeapKeyTest(tuple, RelationGetDescr(scan->rs_rd), + HeapKeyTest(tuple, RelationGetDescr(scan->rs_scan.rs_rd), nkeys, key, valid); if (valid) @@ -696,9 +691,12 @@ heapgettup(HeapScanDesc scan, page = scan->rs_nblocks; page--; } - else if (scan->rs_parallel != NULL) + else if (scan->rs_scan.rs_parallel != NULL) { - page = heap_parallelscan_nextpage(scan); + ParallelBlockTableScanDesc pbscan = + (ParallelBlockTableScanDesc) scan->rs_scan.rs_parallel; + + page = table_block_parallelscan_nextpage(scan->rs_scan.rs_rd, pbscan); finished = (page == InvalidBlockNumber); } else @@ -721,8 +719,8 @@ heapgettup(HeapScanDesc scan, * a little bit backwards on every invocation, which is confusing. * We don't guarantee any specific ordering in general, though. */ - if (scan->rs_syncscan) - ss_report_location(scan->rs_rd, page); + if (scan->rs_scan.rs_syncscan) + ss_report_location(scan->rs_scan.rs_rd, page); } /* @@ -739,12 +737,12 @@ heapgettup(HeapScanDesc scan, return; } - heapgetpage(scan, page); + heapgetpage((TableScanDesc) scan, page); LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE); dp = BufferGetPage(scan->rs_cbuf); - TestForOldSnapshot(snapshot, scan->rs_rd, dp); + TestForOldSnapshot(snapshot, scan->rs_scan.rs_rd, dp); lines = PageGetMaxOffsetNumber((Page) dp); linesleft = lines; if (backward) @@ -806,11 +804,14 @@ heapgettup_pagemode(HeapScanDesc scan, tuple->t_data = NULL; return; } - if (scan->rs_parallel != NULL) + if (scan->rs_scan.rs_parallel != NULL) { - heap_parallelscan_startblock_init(scan); + ParallelBlockTableScanDesc pbscan = + (ParallelBlockTableScanDesc) scan->rs_scan.rs_parallel; + + table_block_parallelscan_startblock_init(scan->rs_scan.rs_rd, pbscan); - page = heap_parallelscan_nextpage(scan); + page = table_block_parallelscan_nextpage(scan->rs_scan.rs_rd, pbscan); /* Other processes might have already finished the scan. */ if (page == InvalidBlockNumber) @@ -822,7 +823,7 @@ heapgettup_pagemode(HeapScanDesc scan, } else page = scan->rs_startblock; /* first page */ - heapgetpage(scan, page); + heapgetpage((TableScanDesc) scan, page); lineindex = 0; scan->rs_inited = true; } @@ -834,7 +835,7 @@ heapgettup_pagemode(HeapScanDesc scan, } dp = BufferGetPage(scan->rs_cbuf); - TestForOldSnapshot(scan->rs_snapshot, scan->rs_rd, dp); + TestForOldSnapshot(scan->rs_scan.rs_snapshot, scan->rs_scan.rs_rd, dp); lines = scan->rs_ntuples; /* page and lineindex now reference the next visible tid */ @@ -843,7 +844,7 @@ heapgettup_pagemode(HeapScanDesc scan, else if (backward) { /* backward parallel scan not supported */ - Assert(scan->rs_parallel == NULL); + Assert(scan->rs_scan.rs_parallel == NULL); if (!scan->rs_inited) { @@ -863,13 +864,13 @@ heapgettup_pagemode(HeapScanDesc scan, * time, and much more likely that we'll just bollix things for * forward scanners. */ - scan->rs_syncscan = false; + scan->rs_scan.rs_syncscan = false; /* start from last page of the scan */ if (scan->rs_startblock > 0) page = scan->rs_startblock - 1; else page = scan->rs_nblocks - 1; - heapgetpage(scan, page); + heapgetpage((TableScanDesc) scan, page); } else { @@ -878,7 +879,7 @@ heapgettup_pagemode(HeapScanDesc scan, } dp = BufferGetPage(scan->rs_cbuf); - TestForOldSnapshot(scan->rs_snapshot, scan->rs_rd, dp); + TestForOldSnapshot(scan->rs_scan.rs_snapshot, scan->rs_scan.rs_rd, dp); lines = scan->rs_ntuples; if (!scan->rs_inited) @@ -908,11 +909,11 @@ heapgettup_pagemode(HeapScanDesc scan, page = ItemPointerGetBlockNumber(&(tuple->t_self)); if (page != scan->rs_cblock) - heapgetpage(scan, page); + heapgetpage((TableScanDesc) scan, page); /* Since the tuple was previously fetched, needn't lock page here */ dp = BufferGetPage(scan->rs_cbuf); - TestForOldSnapshot(scan->rs_snapshot, scan->rs_rd, dp); + TestForOldSnapshot(scan->rs_scan.rs_snapshot, scan->rs_scan.rs_rd, dp); lineoff = ItemPointerGetOffsetNumber(&(tuple->t_self)); lpp = PageGetItemId(dp, lineoff); Assert(ItemIdIsNormal(lpp)); @@ -950,7 +951,7 @@ heapgettup_pagemode(HeapScanDesc scan, { bool valid; - HeapKeyTest(tuple, RelationGetDescr(scan->rs_rd), + HeapKeyTest(tuple, RelationGetDescr(scan->rs_scan.rs_rd), nkeys, key, valid); if (valid) { @@ -986,9 +987,12 @@ heapgettup_pagemode(HeapScanDesc scan, page = scan->rs_nblocks; page--; } - else if (scan->rs_parallel != NULL) + else if (scan->rs_scan.rs_parallel != NULL) { - page = heap_parallelscan_nextpage(scan); + ParallelBlockTableScanDesc pbscan = + (ParallelBlockTableScanDesc) scan->rs_scan.rs_parallel; + + page = table_block_parallelscan_nextpage(scan->rs_scan.rs_rd, pbscan); finished = (page == InvalidBlockNumber); } else @@ -1011,8 +1015,8 @@ heapgettup_pagemode(HeapScanDesc scan, * a little bit backwards on every invocation, which is confusing. * We don't guarantee any specific ordering in general, though. */ - if (scan->rs_syncscan) - ss_report_location(scan->rs_rd, page); + if (scan->rs_scan.rs_syncscan) + ss_report_location(scan->rs_scan.rs_rd, page); } /* @@ -1029,10 +1033,10 @@ heapgettup_pagemode(HeapScanDesc scan, return; } - heapgetpage(scan, page); + heapgetpage((TableScanDesc) scan, page); dp = BufferGetPage(scan->rs_cbuf); - TestForOldSnapshot(scan->rs_snapshot, scan->rs_rd, dp); + TestForOldSnapshot(scan->rs_scan.rs_snapshot, scan->rs_scan.rs_rd, dp); lines = scan->rs_ntuples; linesleft = lines; if (backward) @@ -1095,86 +1099,16 @@ fastgetattr(HeapTuple tup, int attnum, TupleDesc tupleDesc, */ -/* ---------------- - * heap_beginscan - begin relation scan - * - * heap_beginscan is the "standard" case. - * - * heap_beginscan_catalog differs in setting up its own temporary snapshot. - * - * heap_beginscan_strat offers an extended API that lets the caller control - * whether a nondefault buffer access strategy can be used, and whether - * syncscan can be chosen (possibly resulting in the scan not starting from - * block zero). Both of these default to true with plain heap_beginscan. - * - * heap_beginscan_bm is an alternative entry point for setting up a - * HeapScanDesc for a bitmap heap scan. Although that scan technology is - * really quite unlike a standard seqscan, there is just enough commonality - * to make it worth using the same data structure. - * - * heap_beginscan_sampling is an alternative entry point for setting up a - * HeapScanDesc for a TABLESAMPLE scan. As with bitmap scans, it's worth - * using the same data structure although the behavior is rather different. - * In addition to the options offered by heap_beginscan_strat, this call - * also allows control of whether page-mode visibility checking is used. - * ---------------- - */ -HeapScanDesc +TableScanDesc heap_beginscan(Relation relation, Snapshot snapshot, - int nkeys, ScanKey key) -{ - return heap_beginscan_internal(relation, snapshot, nkeys, key, NULL, - true, true, true, false, false, false); -} - -HeapScanDesc -heap_beginscan_catalog(Relation relation, int nkeys, ScanKey key) -{ - Oid relid = RelationGetRelid(relation); - Snapshot snapshot = RegisterSnapshot(GetCatalogSnapshot(relid)); - - return heap_beginscan_internal(relation, snapshot, nkeys, key, NULL, - true, true, true, false, false, true); -} - -HeapScanDesc -heap_beginscan_strat(Relation relation, Snapshot snapshot, - int nkeys, ScanKey key, - bool allow_strat, bool allow_sync) -{ - return heap_beginscan_internal(relation, snapshot, nkeys, key, NULL, - allow_strat, allow_sync, true, - false, false, false); -} - -HeapScanDesc -heap_beginscan_bm(Relation relation, Snapshot snapshot, - int nkeys, ScanKey key) -{ - return heap_beginscan_internal(relation, snapshot, nkeys, key, NULL, - false, false, true, true, false, false); -} - -HeapScanDesc -heap_beginscan_sampling(Relation relation, Snapshot snapshot, - int nkeys, ScanKey key, - bool allow_strat, bool allow_sync, bool allow_pagemode) -{ - return heap_beginscan_internal(relation, snapshot, nkeys, key, NULL, - allow_strat, allow_sync, allow_pagemode, - false, true, false); -} - -static HeapScanDesc -heap_beginscan_internal(Relation relation, Snapshot snapshot, - int nkeys, ScanKey key, - ParallelHeapScanDesc parallel_scan, - bool allow_strat, - bool allow_sync, - bool allow_pagemode, - bool is_bitmapscan, - bool is_samplescan, - bool temp_snap) + int nkeys, ScanKey key, + ParallelTableScanDesc parallel_scan, + bool allow_strat, + bool allow_sync, + bool allow_pagemode, + bool is_bitmapscan, + bool is_samplescan, + bool temp_snap) { HeapScanDesc scan; @@ -1192,21 +1126,21 @@ heap_beginscan_internal(Relation relation, Snapshot snapshot, */ scan = (HeapScanDesc) palloc(sizeof(HeapScanDescData)); - scan->rs_rd = relation; - scan->rs_snapshot = snapshot; - scan->rs_nkeys = nkeys; - scan->rs_bitmapscan = is_bitmapscan; - scan->rs_samplescan = is_samplescan; + scan->rs_scan.rs_rd = relation; + scan->rs_scan.rs_snapshot = snapshot; + scan->rs_scan.rs_nkeys = nkeys; + scan->rs_scan.rs_bitmapscan = is_bitmapscan; + scan->rs_scan.rs_samplescan = is_samplescan; scan->rs_strategy = NULL; /* set in initscan */ - scan->rs_allow_strat = allow_strat; - scan->rs_allow_sync = allow_sync; - scan->rs_temp_snap = temp_snap; - scan->rs_parallel = parallel_scan; + scan->rs_scan.rs_allow_strat = allow_strat; + scan->rs_scan.rs_allow_sync = allow_sync; + scan->rs_scan.rs_temp_snap = temp_snap; + scan->rs_scan.rs_parallel = parallel_scan; /* * we can use page-at-a-time mode if it's an MVCC-safe snapshot */ - scan->rs_pageatatime = allow_pagemode && IsMVCCSnapshot(snapshot); + scan->rs_scan.rs_pageatatime = allow_pagemode && snapshot && IsMVCCSnapshot(snapshot); /* * For a seqscan in a serializable transaction, acquire a predicate lock @@ -1219,7 +1153,7 @@ heap_beginscan_internal(Relation relation, Snapshot snapshot, * covering the predicate. But in that case we still have to lock any * matching heap tuples. */ - if (!is_bitmapscan) + if (!is_bitmapscan && snapshot) PredicateLockRelation(relation, snapshot); /* we only need to set this up once */ @@ -1230,13 +1164,13 @@ heap_beginscan_internal(Relation relation, Snapshot snapshot, * initscan() and we don't want to allocate memory again */ if (nkeys > 0) - scan->rs_key = (ScanKey) palloc(sizeof(ScanKeyData) * nkeys); + scan->rs_scan.rs_key = (ScanKey) palloc(sizeof(ScanKeyData) * nkeys); else - scan->rs_key = NULL; + scan->rs_scan.rs_key = NULL; initscan(scan, key, false); - return scan; + return (TableScanDesc) scan; } /* ---------------- @@ -1244,9 +1178,18 @@ heap_beginscan_internal(Relation relation, Snapshot snapshot, * ---------------- */ void -heap_rescan(HeapScanDesc scan, - ScanKey key) +heap_rescan(TableScanDesc sscan, ScanKey key, bool set_params, + bool allow_strat, bool allow_sync, bool allow_pagemode) { + HeapScanDesc scan = (HeapScanDesc) sscan; + + if (set_params) + { + scan->rs_scan.rs_allow_strat = allow_strat; + scan->rs_scan.rs_allow_sync = allow_sync; + scan->rs_scan.rs_pageatatime = allow_pagemode && IsMVCCSnapshot(scan->rs_scan.rs_snapshot); + } + /* * unpin scan buffers */ @@ -1257,27 +1200,22 @@ heap_rescan(HeapScanDesc scan, * reinitialize scan descriptor */ initscan(scan, key, true); -} -/* ---------------- - * heap_rescan_set_params - restart a relation scan after changing params - * - * This call allows changing the buffer strategy, syncscan, and pagemode - * options before starting a fresh scan. Note that although the actual use - * of syncscan might change (effectively, enabling or disabling reporting), - * the previously selected startblock will be kept. - * ---------------- - */ -void -heap_rescan_set_params(HeapScanDesc scan, ScanKey key, - bool allow_strat, bool allow_sync, bool allow_pagemode) -{ - /* adjust parameters */ - scan->rs_allow_strat = allow_strat; - scan->rs_allow_sync = allow_sync; - scan->rs_pageatatime = allow_pagemode && IsMVCCSnapshot(scan->rs_snapshot); - /* ... and rescan */ - heap_rescan(scan, key); + /* + * reset parallel scan, if present + */ + if (scan->rs_scan.rs_parallel != NULL) + { + ParallelTableScanDesc parallel_scan; + + /* + * Caller is responsible for making sure that all workers have + * finished the scan before calling this. + */ + parallel_scan = scan->rs_scan.rs_parallel; + + table_block_parallelscan_reinitialize(sscan->rs_rd, parallel_scan); + } } /* ---------------- @@ -1288,8 +1226,10 @@ heap_rescan_set_params(HeapScanDesc scan, ScanKey key, * ---------------- */ void -heap_endscan(HeapScanDesc scan) +heap_endscan(TableScanDesc sscan) { + HeapScanDesc scan = (HeapScanDesc) sscan; + /* Note: no locking manipulations needed */ /* @@ -1301,221 +1241,20 @@ heap_endscan(HeapScanDesc scan) /* * decrement relation reference count and free scan descriptor storage */ - RelationDecrementReferenceCount(scan->rs_rd); + RelationDecrementReferenceCount(scan->rs_scan.rs_rd); - if (scan->rs_key) - pfree(scan->rs_key); + if (scan->rs_scan.rs_key) + pfree(scan->rs_scan.rs_key); if (scan->rs_strategy != NULL) FreeAccessStrategy(scan->rs_strategy); - if (scan->rs_temp_snap) - UnregisterSnapshot(scan->rs_snapshot); + if (scan->rs_scan.rs_temp_snap) + UnregisterSnapshot(scan->rs_scan.rs_snapshot); pfree(scan); } -/* ---------------- - * heap_parallelscan_estimate - estimate storage for ParallelHeapScanDesc - * - * Sadly, this doesn't reduce to a constant, because the size required - * to serialize the snapshot can vary. - * ---------------- - */ -Size -heap_parallelscan_estimate(Snapshot snapshot) -{ - Size sz = offsetof(ParallelHeapScanDescData, phs_snapshot_data); - - if (IsMVCCSnapshot(snapshot)) - sz = add_size(sz, EstimateSnapshotSpace(snapshot)); - else - Assert(snapshot == SnapshotAny); - - return sz; -} - -/* ---------------- - * heap_parallelscan_initialize - initialize ParallelHeapScanDesc - * - * Must allow as many bytes of shared memory as returned by - * heap_parallelscan_estimate. Call this just once in the leader - * process; then, individual workers attach via heap_beginscan_parallel. - * ---------------- - */ -void -heap_parallelscan_initialize(ParallelHeapScanDesc target, Relation relation, - Snapshot snapshot) -{ - target->phs_relid = RelationGetRelid(relation); - target->phs_nblocks = RelationGetNumberOfBlocks(relation); - /* compare phs_syncscan initialization to similar logic in initscan */ - target->phs_syncscan = synchronize_seqscans && - !RelationUsesLocalBuffers(relation) && - target->phs_nblocks > NBuffers / 4; - SpinLockInit(&target->phs_mutex); - target->phs_startblock = InvalidBlockNumber; - pg_atomic_init_u64(&target->phs_nallocated, 0); - if (IsMVCCSnapshot(snapshot)) - { - SerializeSnapshot(snapshot, target->phs_snapshot_data); - target->phs_snapshot_any = false; - } - else - { - Assert(snapshot == SnapshotAny); - target->phs_snapshot_any = true; - } -} - -/* ---------------- - * heap_parallelscan_reinitialize - reset a parallel scan - * - * Call this in the leader process. Caller is responsible for - * making sure that all workers have finished the scan beforehand. - * ---------------- - */ -void -heap_parallelscan_reinitialize(ParallelHeapScanDesc parallel_scan) -{ - pg_atomic_write_u64(¶llel_scan->phs_nallocated, 0); -} - -/* ---------------- - * heap_beginscan_parallel - join a parallel scan - * - * Caller must hold a suitable lock on the correct relation. - * ---------------- - */ -HeapScanDesc -heap_beginscan_parallel(Relation relation, ParallelHeapScanDesc parallel_scan) -{ - Snapshot snapshot; - - Assert(RelationGetRelid(relation) == parallel_scan->phs_relid); - - if (!parallel_scan->phs_snapshot_any) - { - /* Snapshot was serialized -- restore it */ - snapshot = RestoreSnapshot(parallel_scan->phs_snapshot_data); - RegisterSnapshot(snapshot); - } - else - { - /* SnapshotAny passed by caller (not serialized) */ - snapshot = SnapshotAny; - } - - return heap_beginscan_internal(relation, snapshot, 0, NULL, parallel_scan, - true, true, true, false, false, - !parallel_scan->phs_snapshot_any); -} - -/* ---------------- - * heap_parallelscan_startblock_init - find and set the scan's startblock - * - * Determine where the parallel seq scan should start. This function may - * be called many times, once by each parallel worker. We must be careful - * only to set the startblock once. - * ---------------- - */ -static void -heap_parallelscan_startblock_init(HeapScanDesc scan) -{ - BlockNumber sync_startpage = InvalidBlockNumber; - ParallelHeapScanDesc parallel_scan; - - Assert(scan->rs_parallel); - parallel_scan = scan->rs_parallel; - -retry: - /* Grab the spinlock. */ - SpinLockAcquire(¶llel_scan->phs_mutex); - - /* - * If the scan's startblock has not yet been initialized, we must do so - * now. If this is not a synchronized scan, we just start at block 0, but - * if it is a synchronized scan, we must get the starting position from - * the synchronized scan machinery. We can't hold the spinlock while - * doing that, though, so release the spinlock, get the information we - * need, and retry. If nobody else has initialized the scan in the - * meantime, we'll fill in the value we fetched on the second time - * through. - */ - if (parallel_scan->phs_startblock == InvalidBlockNumber) - { - if (!parallel_scan->phs_syncscan) - parallel_scan->phs_startblock = 0; - else if (sync_startpage != InvalidBlockNumber) - parallel_scan->phs_startblock = sync_startpage; - else - { - SpinLockRelease(¶llel_scan->phs_mutex); - sync_startpage = ss_get_location(scan->rs_rd, scan->rs_nblocks); - goto retry; - } - } - SpinLockRelease(¶llel_scan->phs_mutex); -} - -/* ---------------- - * heap_parallelscan_nextpage - get the next page to scan - * - * Get the next page to scan. Even if there are no pages left to scan, - * another backend could have grabbed a page to scan and not yet finished - * looking at it, so it doesn't follow that the scan is done when the - * first backend gets an InvalidBlockNumber return. - * ---------------- - */ -static BlockNumber -heap_parallelscan_nextpage(HeapScanDesc scan) -{ - BlockNumber page; - ParallelHeapScanDesc parallel_scan; - uint64 nallocated; - - Assert(scan->rs_parallel); - parallel_scan = scan->rs_parallel; - - /* - * phs_nallocated tracks how many pages have been allocated to workers - * already. When phs_nallocated >= rs_nblocks, all blocks have been - * allocated. - * - * Because we use an atomic fetch-and-add to fetch the current value, the - * phs_nallocated counter will exceed rs_nblocks, because workers will - * still increment the value, when they try to allocate the next block but - * all blocks have been allocated already. The counter must be 64 bits - * wide because of that, to avoid wrapping around when rs_nblocks is close - * to 2^32. - * - * The actual page to return is calculated by adding the counter to the - * starting block number, modulo nblocks. - */ - nallocated = pg_atomic_fetch_add_u64(¶llel_scan->phs_nallocated, 1); - if (nallocated >= scan->rs_nblocks) - page = InvalidBlockNumber; /* all blocks have been allocated */ - else - page = (nallocated + parallel_scan->phs_startblock) % scan->rs_nblocks; - - /* - * Report scan location. Normally, we report the current page number. - * When we reach the end of the scan, though, we report the starting page, - * not the ending page, just so the starting positions for later scans - * doesn't slew backwards. We only report the position at the end of the - * scan once, though: subsequent callers will report nothing. - */ - if (scan->rs_syncscan) - { - if (page != InvalidBlockNumber) - ss_report_location(scan->rs_rd, page); - else if (nallocated == scan->rs_nblocks) - ss_report_location(scan->rs_rd, parallel_scan->phs_startblock); - } - - return page; -} - /* ---------------- * heap_update_snapshot * @@ -1523,13 +1262,15 @@ heap_parallelscan_nextpage(HeapScanDesc scan) * ---------------- */ void -heap_update_snapshot(HeapScanDesc scan, Snapshot snapshot) +heap_update_snapshot(TableScanDesc sscan, Snapshot snapshot) { + HeapScanDesc scan = (HeapScanDesc) sscan; + Assert(IsMVCCSnapshot(snapshot)); RegisterSnapshot(snapshot); - scan->rs_snapshot = snapshot; - scan->rs_temp_snap = true; + scan->rs_scan.rs_snapshot = snapshot; + scan->rs_scan.rs_temp_snap = true; } /* ---------------- @@ -1557,17 +1298,29 @@ heap_update_snapshot(HeapScanDesc scan, Snapshot snapshot) HeapTuple -heap_getnext(HeapScanDesc scan, ScanDirection direction) +heap_getnext(TableScanDesc sscan, ScanDirection direction) { + HeapScanDesc scan = (HeapScanDesc) sscan; + + /* + * This is still widely used outside directly, without going through the + * table AM, so add a safety check. It's possible we should at a later + * point downgrade this to an assert. + */ + if (unlikely(sscan->rs_rd->rd_tableam != GetHeapamTableAmRoutine())) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("only heap AM is supported"))); + /* Note: no locking manipulations needed */ HEAPDEBUG_1; /* heap_getnext( info ) */ - if (scan->rs_pageatatime) + if (scan->rs_scan.rs_pageatatime) heapgettup_pagemode(scan, direction, - scan->rs_nkeys, scan->rs_key); + scan->rs_scan.rs_nkeys, scan->rs_scan.rs_key); else - heapgettup(scan, direction, scan->rs_nkeys, scan->rs_key); + heapgettup(scan, direction, scan->rs_scan.rs_nkeys, scan->rs_scan.rs_key); if (scan->rs_ctup.t_data == NULL) { @@ -1581,9 +1334,57 @@ heap_getnext(HeapScanDesc scan, ScanDirection direction) */ HEAPDEBUG_3; /* heap_getnext returning tuple */ - pgstat_count_heap_getnext(scan->rs_rd); + pgstat_count_heap_getnext(scan->rs_scan.rs_rd); + + return &scan->rs_ctup; +} + +#ifdef HEAPAMSLOTDEBUGALL +#define HEAPAMSLOTDEBUG_1 \ + elog(DEBUG2, "heapam_getnext([%s,nkeys=%d],dir=%d) called", \ + RelationGetRelationName(scan->rs_scan.rs_rd), scan->rs_scan.rs_nkeys, (int) direction) +#define HEAPAMSLOTDEBUG_2 \ + elog(DEBUG2, "heapam_getnext returning EOS") +#define HEAPAMSLOTDEBUG_3 \ + elog(DEBUG2, "heapam_getnext returning tuple") +#else +#define HEAPAMSLOTDEBUG_1 +#define HEAPAMSLOTDEBUG_2 +#define HEAPAMSLOTDEBUG_3 +#endif + +TupleTableSlot * +heap_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot) +{ + HeapScanDesc scan = (HeapScanDesc) sscan; + + /* Note: no locking manipulations needed */ + + HEAPAMSLOTDEBUG_1; /* heap_getnext( info ) */ + + if (scan->rs_scan.rs_pageatatime) + heapgettup_pagemode(scan, direction, + scan->rs_scan.rs_nkeys, scan->rs_scan.rs_key); + else + heapgettup(scan, direction, scan->rs_scan.rs_nkeys, scan->rs_scan.rs_key); + + if (scan->rs_ctup.t_data == NULL) + { + HEAPAMSLOTDEBUG_2; /* heap_getnext returning EOS */ + ExecClearTuple(slot); + return NULL; + } + + /* + * if we get here it means we have a new current scan tuple, so point to + * the proper return buffer and return the tuple. + */ + HEAPAMSLOTDEBUG_3; /* heap_getnext returning tuple */ + + pgstat_count_heap_getnext(scan->rs_scan.rs_rd); - return &(scan->rs_ctup); + return ExecStoreBufferHeapTuple(&scan->rs_ctup, slot, + scan->rs_cbuf); } /* @@ -1603,10 +1404,8 @@ heap_getnext(HeapScanDesc scan, ScanDirection direction) * If the tuple is found but fails the time qual check, then false is returned * but tuple->t_data is left pointing to the tuple. * - * keep_buf determines what is done with the buffer in the false-result cases. - * When the caller specifies keep_buf = true, we retain the pin on the buffer - * and return it in *userbuf (so the caller must eventually unpin it); when - * keep_buf = false, the pin is released and *userbuf is set to InvalidBuffer. + * In the false-result cases the buffer pin is released and *userbuf is set to + * InvalidBuffer. * * stats_relation is the relation to charge the heap_fetch operation against * for statistical purposes. (This could be the heap rel itself, an diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index 518d1df84a..9d06ba55b5 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -19,15 +19,168 @@ */ #include "postgres.h" +#include "access/genam.h" +#include "access/heapam.h" #include "access/tableam.h" +#include "catalog/pg_am_d.h" +#include "storage/bufmgr.h" #include "utils/builtins.h" static const TableAmRoutine heapam_methods; +/* ---------------------------------------------------------------- + * AM support routines for heapam + * ---------------------------------------------------------------- + */ + +static const TupleTableSlotOps * +heapam_slot_callbacks(Relation relation) +{ + return &TTSOpsBufferHeapTuple; +} + + +static IndexFetchTableData * +heapam_begin_index_fetch(Relation rel) +{ + IndexFetchHeapData *hscan = palloc0(sizeof(IndexFetchHeapData)); + + hscan->xs_base.rel = rel; + hscan->xs_cbuf = InvalidBuffer; + + return &hscan->xs_base; +} + + +static void +heapam_reset_index_fetch(IndexFetchTableData *scan) +{ + IndexFetchHeapData *hscan = (IndexFetchHeapData *) scan; + + if (BufferIsValid(hscan->xs_cbuf)) + { + ReleaseBuffer(hscan->xs_cbuf); + hscan->xs_cbuf = InvalidBuffer; + } +} + +static void +heapam_end_index_fetch(IndexFetchTableData *scan) +{ + IndexFetchHeapData *hscan = (IndexFetchHeapData *) scan; + + heapam_reset_index_fetch(scan); + + pfree(hscan); +} + + +static bool +heapam_fetch_follow(struct IndexFetchTableData *scan, + ItemPointer tid, + Snapshot snapshot, + TupleTableSlot *slot, + bool *call_again, bool *all_dead) +{ + IndexFetchHeapData *hscan = (IndexFetchHeapData *) scan; + BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot; + bool got_heap_tuple; + + Assert(TTS_IS_BUFFERTUPLE(slot)); + + /* We can skip the buffer-switching logic if we're in mid-HOT chain. */ + if (!*call_again) + { + /* Switch to correct buffer if we don't have it already */ + Buffer prev_buf = hscan->xs_cbuf; + + hscan->xs_cbuf = ReleaseAndReadBuffer(hscan->xs_cbuf, + hscan->xs_base.rel, + ItemPointerGetBlockNumber(tid)); + + /* + * Prune page, but only if we weren't already on this page + */ + if (prev_buf != hscan->xs_cbuf) + heap_page_prune_opt(hscan->xs_base.rel, hscan->xs_cbuf); + } + + /* Obtain share-lock on the buffer so we can examine visibility */ + LockBuffer(hscan->xs_cbuf, BUFFER_LOCK_SHARE); + got_heap_tuple = heap_hot_search_buffer(tid, + hscan->xs_base.rel, + hscan->xs_cbuf, + snapshot, + &bslot->base.tupdata, + all_dead, + !*call_again); + bslot->base.tupdata.t_self = *tid; + LockBuffer(hscan->xs_cbuf, BUFFER_LOCK_UNLOCK); + + if (got_heap_tuple) + { + /* + * Only in a non-MVCC snapshot can more than one member of the HOT + * chain be visible. + */ + *call_again = !IsMVCCSnapshot(snapshot); + + slot->tts_tableOid = RelationGetRelid(scan->rel); + ExecStoreBufferHeapTuple(&bslot->base.tupdata, slot, hscan->xs_cbuf); + } + else + { + /* We've reached the end of the HOT chain. */ + *call_again = false; + } + + return got_heap_tuple; +} + +static bool +heapam_tuple_satisfies_snapshot(Relation rel, TupleTableSlot *slot, Snapshot snapshot) +{ + BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot; + bool res; + + Assert(TTS_IS_BUFFERTUPLE(slot)); + Assert(BufferIsValid(bslot->buffer)); + + /* + * We need buffer pin and lock to call HeapTupleSatisfiesVisibility. + * Caller should be holding pin, but not lock. + */ + LockBuffer(bslot->buffer, BUFFER_LOCK_SHARE); + res = HeapTupleSatisfiesVisibility(bslot->base.tuple, snapshot, + bslot->buffer); + LockBuffer(bslot->buffer, BUFFER_LOCK_UNLOCK); + + return res; +} + + static const TableAmRoutine heapam_methods = { .type = T_TableAmRoutine, + + .slot_callbacks = heapam_slot_callbacks, + + .scan_begin = heap_beginscan, + .scan_end = heap_endscan, + .scan_rescan = heap_rescan, + .scan_update_snapshot = heap_update_snapshot, + + .parallelscan_estimate = table_block_parallelscan_estimate, + .parallelscan_initialize = table_block_parallelscan_initialize, + .parallelscan_reinitialize = table_block_parallelscan_reinitialize, + + .begin_index_fetch = heapam_begin_index_fetch, + .reset_index_fetch = heapam_reset_index_fetch, + .end_index_fetch = heapam_end_index_fetch, + + .tuple_fetch_follow = heapam_fetch_follow, + .tuple_satisfies_snapshot = heapam_tuple_satisfies_snapshot, }; diff --git a/src/backend/access/index/genam.c b/src/backend/access/index/genam.c index e0a5ea42d5..eafa1fd274 100644 --- a/src/backend/access/index/genam.c +++ b/src/backend/access/index/genam.c @@ -22,6 +22,7 @@ #include "access/genam.h" #include "access/heapam.h" #include "access/relscan.h" +#include "access/tableam.h" #include "access/transam.h" #include "catalog/index.h" #include "lib/stringinfo.h" @@ -83,6 +84,7 @@ RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys) scan = (IndexScanDesc) palloc(sizeof(IndexScanDescData)); scan->heapRelation = NULL; /* may be set later */ + scan->xs_heapfetch = NULL; scan->indexRelation = indexRelation; scan->xs_snapshot = InvalidSnapshot; /* caller must initialize this */ scan->numberOfKeys = nkeys; @@ -123,11 +125,6 @@ RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys) scan->xs_hitup = NULL; scan->xs_hitupdesc = NULL; - ItemPointerSetInvalid(&scan->xs_ctup.t_self); - scan->xs_ctup.t_data = NULL; - scan->xs_cbuf = InvalidBuffer; - scan->xs_continue_hot = false; - return scan; } @@ -335,6 +332,8 @@ systable_beginscan(Relation heapRelation, sysscan->heap_rel = heapRelation; sysscan->irel = irel; + sysscan->slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation), + &TTSOpsBufferHeapTuple); if (snapshot == NULL) { @@ -384,9 +383,9 @@ systable_beginscan(Relation heapRelation, * disadvantage; and there are no compensating advantages, because * it's unlikely that such scans will occur in parallel. */ - sysscan->scan = heap_beginscan_strat(heapRelation, snapshot, - nkeys, key, - true, false); + sysscan->scan = table_beginscan_strat(heapRelation, snapshot, + nkeys, key, + true, false); sysscan->iscan = NULL; } @@ -401,28 +400,45 @@ systable_beginscan(Relation heapRelation, * Note that returned tuple is a reference to data in a disk buffer; * it must not be modified, and should be presumed inaccessible after * next getnext() or endscan() call. + * + * XXX: It'd probably make sense to start offering a slot based interface. */ HeapTuple systable_getnext(SysScanDesc sysscan) { - HeapTuple htup; + HeapTuple htup = NULL; if (sysscan->irel) { - htup = index_getnext(sysscan->iscan, ForwardScanDirection); + if (index_getnext_slot(sysscan->iscan, ForwardScanDirection, sysscan->slot)) + { + bool shouldFree; - /* - * We currently don't need to support lossy index operators for any - * system catalog scan. It could be done here, using the scan keys to - * drive the operator calls, if we arranged to save the heap attnums - * during systable_beginscan(); this is practical because we still - * wouldn't need to support indexes on expressions. - */ - if (htup && sysscan->iscan->xs_recheck) - elog(ERROR, "system catalog scans with lossy index conditions are not implemented"); + htup = ExecFetchSlotHeapTuple(sysscan->slot, false, &shouldFree); + Assert(!shouldFree); + + /* + * We currently don't need to support lossy index operators for + * any system catalog scan. It could be done here, using the scan + * keys to drive the operator calls, if we arranged to save the + * heap attnums during systable_beginscan(); this is practical + * because we still wouldn't need to support indexes on + * expressions. + */ + if (sysscan->iscan->xs_recheck) + elog(ERROR, "system catalog scans with lossy index conditions are not implemented"); + } } else - htup = heap_getnext(sysscan->scan, ForwardScanDirection); + { + if (heap_getnextslot(sysscan->scan, ForwardScanDirection, sysscan->slot)) + { + bool shouldFree; + + htup = ExecFetchSlotHeapTuple(sysscan->slot, false, &shouldFree); + Assert(!shouldFree); + } + } return htup; } @@ -446,37 +462,20 @@ systable_recheck_tuple(SysScanDesc sysscan, HeapTuple tup) Snapshot freshsnap; bool result; + Assert(tup == ExecFetchSlotHeapTuple(sysscan->slot, false, NULL)); + /* - * Trust that LockBuffer() and HeapTupleSatisfiesMVCC() do not themselves + * Trust that table_tuple_satisfies_snapshot() and its subsidiaries + * (commonly LockBuffer() and HeapTupleSatisfiesMVCC()) do not themselves * acquire snapshots, so we need not register the snapshot. Those * facilities are too low-level to have any business scanning tables. */ freshsnap = GetCatalogSnapshot(RelationGetRelid(sysscan->heap_rel)); - if (sysscan->irel) - { - IndexScanDesc scan = sysscan->iscan; - - Assert(IsMVCCSnapshot(scan->xs_snapshot)); - Assert(tup == &scan->xs_ctup); - Assert(BufferIsValid(scan->xs_cbuf)); - /* must hold a buffer lock to call HeapTupleSatisfiesVisibility */ - LockBuffer(scan->xs_cbuf, BUFFER_LOCK_SHARE); - result = HeapTupleSatisfiesVisibility(tup, freshsnap, scan->xs_cbuf); - LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK); - } - else - { - HeapScanDesc scan = sysscan->scan; - - Assert(IsMVCCSnapshot(scan->rs_snapshot)); - Assert(tup == &scan->rs_ctup); - Assert(BufferIsValid(scan->rs_cbuf)); - /* must hold a buffer lock to call HeapTupleSatisfiesVisibility */ - LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE); - result = HeapTupleSatisfiesVisibility(tup, freshsnap, scan->rs_cbuf); - LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK); - } + result = table_tuple_satisfies_snapshot(sysscan->heap_rel, + sysscan->slot, + freshsnap); + return result; } @@ -488,13 +487,19 @@ systable_recheck_tuple(SysScanDesc sysscan, HeapTuple tup) void systable_endscan(SysScanDesc sysscan) { + if (sysscan->slot) + { + ExecDropSingleTupleTableSlot(sysscan->slot); + sysscan->slot = NULL; + } + if (sysscan->irel) { index_endscan(sysscan->iscan); index_close(sysscan->irel, AccessShareLock); } else - heap_endscan(sysscan->scan); + table_endscan(sysscan->scan); if (sysscan->snapshot) UnregisterSnapshot(sysscan->snapshot); @@ -541,6 +546,8 @@ systable_beginscan_ordered(Relation heapRelation, sysscan->heap_rel = heapRelation; sysscan->irel = indexRelation; + sysscan->slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation), + &TTSOpsBufferHeapTuple); if (snapshot == NULL) { @@ -586,10 +593,12 @@ systable_beginscan_ordered(Relation heapRelation, HeapTuple systable_getnext_ordered(SysScanDesc sysscan, ScanDirection direction) { - HeapTuple htup; + HeapTuple htup = NULL; Assert(sysscan->irel); - htup = index_getnext(sysscan->iscan, direction); + if (index_getnext_slot(sysscan->iscan, direction, sysscan->slot)) + htup = ExecFetchSlotHeapTuple(sysscan->slot, false, NULL); + /* See notes in systable_getnext */ if (htup && sysscan->iscan->xs_recheck) elog(ERROR, "system catalog scans with lossy index conditions are not implemented"); @@ -603,6 +612,12 @@ systable_getnext_ordered(SysScanDesc sysscan, ScanDirection direction) void systable_endscan_ordered(SysScanDesc sysscan) { + if (sysscan->slot) + { + ExecDropSingleTupleTableSlot(sysscan->slot); + sysscan->slot = NULL; + } + Assert(sysscan->irel); index_endscan(sysscan->iscan); if (sysscan->snapshot) diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c index 4ad30186d9..6f4108155d 100644 --- a/src/backend/access/index/indexam.c +++ b/src/backend/access/index/indexam.c @@ -72,6 +72,7 @@ #include "access/amapi.h" #include "access/heapam.h" #include "access/relscan.h" +#include "access/tableam.h" #include "access/transam.h" #include "access/xlog.h" #include "catalog/index.h" @@ -235,6 +236,8 @@ index_beginscan(Relation heapRelation, scan->heapRelation = heapRelation; scan->xs_snapshot = snapshot; + scan->xs_heapfetch = table_begin_index_fetch_table(heapRelation); + return scan; } @@ -318,16 +321,12 @@ index_rescan(IndexScanDesc scan, Assert(nkeys == scan->numberOfKeys); Assert(norderbys == scan->numberOfOrderBys); - /* Release any held pin on a heap page */ - if (BufferIsValid(scan->xs_cbuf)) - { - ReleaseBuffer(scan->xs_cbuf); - scan->xs_cbuf = InvalidBuffer; - } - - scan->xs_continue_hot = false; + /* Release resources (like buffer pins) for heap accesses */ + if (scan->xs_heapfetch) + table_reset_index_fetch_table(scan->xs_heapfetch); scan->kill_prior_tuple = false; /* for safety */ + scan->xs_heap_continue = false; scan->indexRelation->rd_indam->amrescan(scan, keys, nkeys, orderbys, norderbys); @@ -343,11 +342,11 @@ index_endscan(IndexScanDesc scan) SCAN_CHECKS; CHECK_SCAN_PROCEDURE(amendscan); - /* Release any held pin on a heap page */ - if (BufferIsValid(scan->xs_cbuf)) + /* Release resources (like buffer pins) for heap accesses */ + if (scan->xs_heapfetch) { - ReleaseBuffer(scan->xs_cbuf); - scan->xs_cbuf = InvalidBuffer; + table_end_index_fetch_table(scan->xs_heapfetch); + scan->xs_heapfetch = NULL; } /* End the AM's scan */ @@ -379,17 +378,16 @@ index_markpos(IndexScanDesc scan) /* ---------------- * index_restrpos - restore a scan position * - * NOTE: this only restores the internal scan state of the index AM. - * The current result tuple (scan->xs_ctup) doesn't change. See comments - * for ExecRestrPos(). - * - * NOTE: in the presence of HOT chains, mark/restore only works correctly - * if the scan's snapshot is MVCC-safe; that ensures that there's at most one - * returnable tuple in each HOT chain, and so restoring the prior state at the - * granularity of the index AM is sufficient. Since the only current user - * of mark/restore functionality is nodeMergejoin.c, this effectively means - * that merge-join plans only work for MVCC snapshots. This could be fixed - * if necessary, but for now it seems unimportant. + * NOTE: this only restores the internal scan state of the index AM. See + * comments for ExecRestrPos(). + * + * NOTE: For heap, in the presence of HOT chains, mark/restore only works + * correctly if the scan's snapshot is MVCC-safe; that ensures that there's at + * most one returnable tuple in each HOT chain, and so restoring the prior + * state at the granularity of the index AM is sufficient. Since the only + * current user of mark/restore functionality is nodeMergejoin.c, this + * effectively means that merge-join plans only work for MVCC snapshots. This + * could be fixed if necessary, but for now it seems unimportant. * ---------------- */ void @@ -400,9 +398,12 @@ index_restrpos(IndexScanDesc scan) SCAN_CHECKS; CHECK_SCAN_PROCEDURE(amrestrpos); - scan->xs_continue_hot = false; + /* release resources (like buffer pins) for heap accesses */ + if (scan->xs_heapfetch) + table_reset_index_fetch_table(scan->xs_heapfetch); scan->kill_prior_tuple = false; /* for safety */ + scan->xs_heap_continue = false; scan->indexRelation->rd_indam->amrestrpos(scan); } @@ -483,6 +484,9 @@ index_parallelrescan(IndexScanDesc scan) { SCAN_CHECKS; + if (scan->xs_heapfetch) + table_reset_index_fetch_table(scan->xs_heapfetch); + /* amparallelrescan is optional; assume no-op if not provided by AM */ if (scan->indexRelation->rd_indam->amparallelrescan != NULL) scan->indexRelation->rd_indam->amparallelrescan(scan); @@ -513,6 +517,8 @@ index_beginscan_parallel(Relation heaprel, Relation indexrel, int nkeys, scan->heapRelation = heaprel; scan->xs_snapshot = snapshot; + scan->xs_heapfetch = table_begin_index_fetch_table(heaprel); + return scan; } @@ -535,7 +541,7 @@ index_getnext_tid(IndexScanDesc scan, ScanDirection direction) /* * The AM's amgettuple proc finds the next index entry matching the scan - * keys, and puts the TID into scan->xs_ctup.t_self. It should also set + * keys, and puts the TID into scan->xs_heaptid. It should also set * scan->xs_recheck and possibly scan->xs_itup/scan->xs_hitup, though we * pay no attention to those fields here. */ @@ -543,23 +549,23 @@ index_getnext_tid(IndexScanDesc scan, ScanDirection direction) /* Reset kill flag immediately for safety */ scan->kill_prior_tuple = false; + scan->xs_heap_continue = false; /* If we're out of index entries, we're done */ if (!found) { - /* ... but first, release any held pin on a heap page */ - if (BufferIsValid(scan->xs_cbuf)) - { - ReleaseBuffer(scan->xs_cbuf); - scan->xs_cbuf = InvalidBuffer; - } + /* release resources (like buffer pins) for heap accesses */ + if (scan->xs_heapfetch) + table_reset_index_fetch_table(scan->xs_heapfetch); + return NULL; } + Assert(ItemPointerIsValid(&scan->xs_heaptid)); pgstat_count_index_tuples(scan->indexRelation, 1); /* Return the TID of the tuple we found. */ - return &scan->xs_ctup.t_self; + return &scan->xs_heaptid; } /* ---------------- @@ -580,53 +586,17 @@ index_getnext_tid(IndexScanDesc scan, ScanDirection direction) * enough information to do it efficiently in the general case. * ---------------- */ -HeapTuple -index_fetch_heap(IndexScanDesc scan) +bool +index_fetch_heap(IndexScanDesc scan, TupleTableSlot *slot) { - ItemPointer tid = &scan->xs_ctup.t_self; bool all_dead = false; - bool got_heap_tuple; - - /* We can skip the buffer-switching logic if we're in mid-HOT chain. */ - if (!scan->xs_continue_hot) - { - /* Switch to correct buffer if we don't have it already */ - Buffer prev_buf = scan->xs_cbuf; - - scan->xs_cbuf = ReleaseAndReadBuffer(scan->xs_cbuf, - scan->heapRelation, - ItemPointerGetBlockNumber(tid)); + bool found; - /* - * Prune page, but only if we weren't already on this page - */ - if (prev_buf != scan->xs_cbuf) - heap_page_prune_opt(scan->heapRelation, scan->xs_cbuf); - } + found = table_fetch_follow(scan->xs_heapfetch, &scan->xs_heaptid, scan->xs_snapshot, + slot, &scan->xs_heap_continue, &all_dead); - /* Obtain share-lock on the buffer so we can examine visibility */ - LockBuffer(scan->xs_cbuf, BUFFER_LOCK_SHARE); - got_heap_tuple = heap_hot_search_buffer(tid, scan->heapRelation, - scan->xs_cbuf, - scan->xs_snapshot, - &scan->xs_ctup, - &all_dead, - !scan->xs_continue_hot); - LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK); - - if (got_heap_tuple) - { - /* - * Only in a non-MVCC snapshot can more than one member of the HOT - * chain be visible. - */ - scan->xs_continue_hot = !IsMVCCSnapshot(scan->xs_snapshot); + if (found) pgstat_count_heap_fetch(scan->indexRelation); - return &scan->xs_ctup; - } - - /* We've reached the end of the HOT chain. */ - scan->xs_continue_hot = false; /* * If we scanned a whole HOT chain and found only dead tuples, tell index @@ -638,50 +608,41 @@ index_fetch_heap(IndexScanDesc scan) if (!scan->xactStartedInRecovery) scan->kill_prior_tuple = all_dead; - return NULL; + return found; } /* ---------------- - * index_getnext - get the next heap tuple from a scan + * index_getnext_slot - get the next tuple from a scan * - * The result is the next heap tuple satisfying the scan keys and the - * snapshot, or NULL if no more matching tuples exist. + * The result is true if a tuple satisfying the scan keys and the snapshot was + * found, false otherwise. The tuple is stored in the specified slot. * * On success, the buffer containing the heap tup is pinned (the pin will be * dropped in a future index_getnext_tid, index_fetch_heap or index_endscan - * call). + * call). XXX * * Note: caller must check scan->xs_recheck, and perform rechecking of the * scan keys if required. We do not do that here because we don't have * enough information to do it efficiently in the general case. * ---------------- */ -HeapTuple -index_getnext(IndexScanDesc scan, ScanDirection direction) +bool +index_getnext_slot(IndexScanDesc scan, ScanDirection direction, TupleTableSlot *slot) { - HeapTuple heapTuple; - ItemPointer tid; - for (;;) { - if (scan->xs_continue_hot) - { - /* - * We are resuming scan of a HOT chain after having returned an - * earlier member. Must still hold pin on current heap page. - */ - Assert(BufferIsValid(scan->xs_cbuf)); - Assert(ItemPointerGetBlockNumber(&scan->xs_ctup.t_self) == - BufferGetBlockNumber(scan->xs_cbuf)); - } - else + if (!scan->xs_heap_continue) { + ItemPointer tid; + /* Time to fetch the next TID from the index */ tid = index_getnext_tid(scan, direction); /* If we're out of index entries, we're done */ if (tid == NULL) break; + + Assert(ItemPointerEquals(tid, &scan->xs_heaptid)); } /* @@ -689,14 +650,15 @@ index_getnext(IndexScanDesc scan, ScanDirection direction) * If we don't find anything, loop around and grab the next TID from * the index. */ - heapTuple = index_fetch_heap(scan); - if (heapTuple != NULL) - return heapTuple; + Assert(ItemPointerIsValid(&scan->xs_heaptid)); + if (index_fetch_heap(scan, slot)) + return true; } - return NULL; /* failure exit */ + return false; } + /* ---------------- * index_getbitmap - get all tuples at once from an index scan * diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index 98917de2ef..60e0b90ccf 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -310,7 +310,7 @@ btgetbitmap(IndexScanDesc scan, TIDBitmap *tbm) if (_bt_first(scan, ForwardScanDirection)) { /* Save tuple ID, and continue scanning */ - heapTid = &scan->xs_ctup.t_self; + heapTid = &scan->xs_heaptid; tbm_add_tuples(tbm, heapTid, 1, false); ntids++; diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c index 92832237a8..af3da3aa5b 100644 --- a/src/backend/access/nbtree/nbtsearch.c +++ b/src/backend/access/nbtree/nbtsearch.c @@ -1135,7 +1135,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) readcomplete: /* OK, itemIndex says what to return */ currItem = &so->currPos.items[so->currPos.itemIndex]; - scan->xs_ctup.t_self = currItem->heapTid; + scan->xs_heaptid = currItem->heapTid; if (scan->xs_want_itup) scan->xs_itup = (IndexTuple) (so->currTuples + currItem->tupleOffset); @@ -1185,7 +1185,7 @@ _bt_next(IndexScanDesc scan, ScanDirection dir) /* OK, itemIndex says what to return */ currItem = &so->currPos.items[so->currPos.itemIndex]; - scan->xs_ctup.t_self = currItem->heapTid; + scan->xs_heaptid = currItem->heapTid; if (scan->xs_want_itup) scan->xs_itup = (IndexTuple) (so->currTuples + currItem->tupleOffset); @@ -1964,7 +1964,7 @@ _bt_endpoint(IndexScanDesc scan, ScanDirection dir) /* OK, itemIndex says what to return */ currItem = &so->currPos.items[so->currPos.itemIndex]; - scan->xs_ctup.t_self = currItem->heapTid; + scan->xs_heaptid = currItem->heapTid; if (scan->xs_want_itup) scan->xs_itup = (IndexTuple) (so->currTuples + currItem->tupleOffset); diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c index dc398e1186..7542c29c79 100644 --- a/src/backend/access/nbtree/nbtsort.c +++ b/src/backend/access/nbtree/nbtsort.c @@ -61,6 +61,7 @@ #include "access/nbtree.h" #include "access/parallel.h" #include "access/relscan.h" +#include "access/tableam.h" #include "access/xact.h" #include "access/xlog.h" #include "access/xloginsert.h" @@ -158,9 +159,9 @@ typedef struct BTShared /* * This variable-sized field must come last. * - * See _bt_parallel_estimate_shared() and heap_parallelscan_estimate(). + * See _bt_parallel_estimate_shared() and table_parallelscan_estimate(). */ - ParallelHeapScanDescData heapdesc; + ParallelTableScanDescData heapdesc; } BTShared; /* @@ -282,7 +283,7 @@ static void _bt_load(BTWriteState *wstate, static void _bt_begin_parallel(BTBuildState *buildstate, bool isconcurrent, int request); static void _bt_end_parallel(BTLeader *btleader); -static Size _bt_parallel_estimate_shared(Snapshot snapshot); +static Size _bt_parallel_estimate_shared(Relation heap, Snapshot snapshot); static double _bt_parallel_heapscan(BTBuildState *buildstate, bool *brokenhotchain); static void _bt_leader_participate_as_worker(BTBuildState *buildstate); @@ -1275,7 +1276,7 @@ _bt_begin_parallel(BTBuildState *buildstate, bool isconcurrent, int request) * Estimate size for our own PARALLEL_KEY_BTREE_SHARED workspace, and * PARALLEL_KEY_TUPLESORT tuplesort workspace */ - estbtshared = _bt_parallel_estimate_shared(snapshot); + estbtshared = _bt_parallel_estimate_shared(btspool->heap, snapshot); shm_toc_estimate_chunk(&pcxt->estimator, estbtshared); estsort = tuplesort_estimate_shared(scantuplesortstates); shm_toc_estimate_chunk(&pcxt->estimator, estsort); @@ -1316,7 +1317,7 @@ _bt_begin_parallel(BTBuildState *buildstate, bool isconcurrent, int request) btshared->havedead = false; btshared->indtuples = 0.0; btshared->brokenhotchain = false; - heap_parallelscan_initialize(&btshared->heapdesc, btspool->heap, snapshot); + table_parallelscan_initialize(btspool->heap, &btshared->heapdesc, snapshot); /* * Store shared tuplesort-private state, for which we reserved space. @@ -1403,10 +1404,10 @@ _bt_end_parallel(BTLeader *btleader) * btree index build based on the snapshot its parallel scan will use. */ static Size -_bt_parallel_estimate_shared(Snapshot snapshot) +_bt_parallel_estimate_shared(Relation heap, Snapshot snapshot) { return add_size(offsetof(BTShared, heapdesc), - heap_parallelscan_estimate(snapshot)); + table_parallelscan_estimate(heap, snapshot)); } /* @@ -1617,7 +1618,7 @@ _bt_parallel_scan_and_sort(BTSpool *btspool, BTSpool *btspool2, { SortCoordinate coordinate; BTBuildState buildstate; - HeapScanDesc scan; + TableScanDesc scan; double reltuples; IndexInfo *indexInfo; @@ -1670,7 +1671,7 @@ _bt_parallel_scan_and_sort(BTSpool *btspool, BTSpool *btspool2, /* Join parallel scan */ indexInfo = BuildIndexInfo(btspool->index); indexInfo->ii_Concurrent = btshared->isconcurrent; - scan = heap_beginscan_parallel(btspool->heap, &btshared->heapdesc); + scan = table_beginscan_parallel(btspool->heap, &btshared->heapdesc); reltuples = IndexBuildHeapScan(btspool->heap, btspool->index, indexInfo, true, _bt_build_callback, (void *) &buildstate, scan); diff --git a/src/backend/access/spgist/spgscan.c b/src/backend/access/spgist/spgscan.c index dc0d63924d..9365bc57ad 100644 --- a/src/backend/access/spgist/spgscan.c +++ b/src/backend/access/spgist/spgscan.c @@ -927,7 +927,7 @@ spggettuple(IndexScanDesc scan, ScanDirection dir) if (so->iPtr < so->nPtrs) { /* continuing to return reported tuples */ - scan->xs_ctup.t_self = so->heapPtrs[so->iPtr]; + scan->xs_heaptid = so->heapPtrs[so->iPtr]; scan->xs_recheck = so->recheck[so->iPtr]; scan->xs_hitup = so->reconTups[so->iPtr]; diff --git a/src/backend/access/table/tableam.c b/src/backend/access/table/tableam.c index 84851e4ff8..1d133d5c7a 100644 --- a/src/backend/access/table/tableam.c +++ b/src/backend/access/table/tableam.c @@ -11,8 +11,234 @@ */ #include "postgres.h" +#include "access/heapam.h" #include "access/tableam.h" +#include "access/xact.h" +#include "storage/bufmgr.h" +#include "storage/shmem.h" /* GUC variables */ char *default_table_access_method = DEFAULT_TABLE_ACCESS_METHOD; +bool synchronize_seqscans = true; + + +/* ---------------- + * table_beginscan_parallel - join a parallel scan + * + * Caller must hold a suitable lock on the correct relation. + * ---------------- + */ +TableScanDesc +table_beginscan_parallel(Relation relation, ParallelTableScanDesc parallel_scan) +{ + Snapshot snapshot; + + Assert(RelationGetRelid(relation) == parallel_scan->phs_relid); + + if (!parallel_scan->phs_snapshot_any) + { + /* Snapshot was serialized -- restore it */ + snapshot = RestoreSnapshot((char *) parallel_scan + + parallel_scan->phs_snapshot_off); + RegisterSnapshot(snapshot); + } + else + { + /* SnapshotAny passed by caller (not serialized) */ + snapshot = SnapshotAny; + } + + return relation->rd_tableam->scan_begin(relation, snapshot, 0, NULL, parallel_scan, + true, true, true, false, false, !parallel_scan->phs_snapshot_any); +} + +/* ---------------- + * table_parallelscan_reinitialize - reset a parallel scan + * + * Call this in the leader process. Caller is responsible for + * making sure that all workers have finished the scan beforehand. + * ---------------- + */ +Size +table_parallelscan_estimate(Relation rel, Snapshot snapshot) +{ + Size sz = 0; + + if (IsMVCCSnapshot(snapshot)) + sz = add_size(sz, EstimateSnapshotSpace(snapshot)); + else + Assert(snapshot == SnapshotAny); + + sz = add_size(sz, rel->rd_tableam->parallelscan_estimate(rel)); + + return sz; +} + +/* ---------------- + * table_parallelscan_initialize - initialize ParallelTableScanDesc + * + * Must allow as many bytes of shared memory as returned by + * table_parallelscan_estimate. Call this just once in the leader + * process; then, individual workers attach via table_beginscan_parallel. + * ---------------- + */ +void +table_parallelscan_initialize(Relation rel, ParallelTableScanDesc pscan, + Snapshot snapshot) +{ + Size snapshot_off = rel->rd_tableam->parallelscan_initialize(rel, pscan); + + pscan->phs_snapshot_off = snapshot_off; + + if (IsMVCCSnapshot(snapshot)) + { + SerializeSnapshot(snapshot, (char *) pscan + pscan->phs_snapshot_off); + pscan->phs_snapshot_any = false; + } + else + { + Assert(snapshot == SnapshotAny); + pscan->phs_snapshot_any = true; + } +} + +TableScanDesc +table_beginscan_catalog(Relation relation, int nkeys, struct ScanKeyData *key) +{ + Oid relid = RelationGetRelid(relation); + Snapshot snapshot = RegisterSnapshot(GetCatalogSnapshot(relid)); + + return relation->rd_tableam->scan_begin(relation, snapshot, nkeys, key, NULL, + true, true, true, false, false, true); +} + + +Size +table_block_parallelscan_estimate(Relation rel) +{ + return sizeof(ParallelBlockTableScanDescData); +} + +Size +table_block_parallelscan_initialize(Relation rel, ParallelTableScanDesc pscan) +{ + ParallelBlockTableScanDesc bpscan = (ParallelBlockTableScanDesc) pscan; + + bpscan->base.phs_relid = RelationGetRelid(rel); + bpscan->phs_nblocks = RelationGetNumberOfBlocks(rel); + /* compare phs_syncscan initialization to similar logic in initscan */ + bpscan->base.phs_syncscan = synchronize_seqscans && + !RelationUsesLocalBuffers(rel) && + bpscan->phs_nblocks > NBuffers / 4; + SpinLockInit(&bpscan->phs_mutex); + bpscan->phs_startblock = InvalidBlockNumber; + pg_atomic_init_u64(&bpscan->phs_nallocated, 0); + + return sizeof(ParallelBlockTableScanDescData); +} + +void +table_block_parallelscan_reinitialize(Relation rel, ParallelTableScanDesc pscan) +{ + ParallelBlockTableScanDesc bpscan = (ParallelBlockTableScanDesc) pscan; + + pg_atomic_write_u64(&bpscan->phs_nallocated, 0); +} + +/* ---------------- + * table_parallelscan_startblock_init - find and set the scan's startblock + * + * Determine where the parallel seq scan should start. This function may + * be called many times, once by each parallel worker. We must be careful + * only to set the startblock once. + * ---------------- + */ +void +table_block_parallelscan_startblock_init(Relation rel, ParallelBlockTableScanDesc pbscan) +{ + BlockNumber sync_startpage = InvalidBlockNumber; + +retry: + /* Grab the spinlock. */ + SpinLockAcquire(&pbscan->phs_mutex); + + /* + * If the scan's startblock has not yet been initialized, we must do so + * now. If this is not a synchronized scan, we just start at block 0, but + * if it is a synchronized scan, we must get the starting position from + * the synchronized scan machinery. We can't hold the spinlock while + * doing that, though, so release the spinlock, get the information we + * need, and retry. If nobody else has initialized the scan in the + * meantime, we'll fill in the value we fetched on the second time + * through. + */ + if (pbscan->phs_startblock == InvalidBlockNumber) + { + if (!pbscan->base.phs_syncscan) + pbscan->phs_startblock = 0; + else if (sync_startpage != InvalidBlockNumber) + pbscan->phs_startblock = sync_startpage; + else + { + SpinLockRelease(&pbscan->phs_mutex); + sync_startpage = ss_get_location(rel, pbscan->phs_nblocks); + goto retry; + } + } + SpinLockRelease(&pbscan->phs_mutex); +} + +/* ---------------- + * table_block_parallelscan_nextpage - get the next page to scan + * + * Get the next page to scan. Even if there are no pages left to scan, + * another backend could have grabbed a page to scan and not yet finished + * looking at it, so it doesn't follow that the scan is done when the + * first backend gets an InvalidBlockNumber return. + * ---------------- + */ +BlockNumber +table_block_parallelscan_nextpage(Relation rel, ParallelBlockTableScanDesc pbscan) +{ + BlockNumber page; + uint64 nallocated; + + /* + * phs_nallocated tracks how many pages have been allocated to workers + * already. When phs_nallocated >= rs_nblocks, all blocks have been + * allocated. + * + * Because we use an atomic fetch-and-add to fetch the current value, the + * phs_nallocated counter will exceed rs_nblocks, because workers will + * still increment the value, when they try to allocate the next block but + * all blocks have been allocated already. The counter must be 64 bits + * wide because of that, to avoid wrapping around when rs_nblocks is close + * to 2^32. + * + * The actual page to return is calculated by adding the counter to the + * starting block number, modulo nblocks. + */ + nallocated = pg_atomic_fetch_add_u64(&pbscan->phs_nallocated, 1); + if (nallocated >= pbscan->phs_nblocks) + page = InvalidBlockNumber; /* all blocks have been allocated */ + else + page = (nallocated + pbscan->phs_startblock) % pbscan->phs_nblocks; + + /* + * Report scan location. Normally, we report the current page number. + * When we reach the end of the scan, though, we report the starting page, + * not the ending page, just so the starting positions for later scans + * doesn't slew backwards. We only report the position at the end of the + * scan once, though: subsequent callers will report nothing. + */ + if (pbscan->base.phs_syncscan) + { + if (page != InvalidBlockNumber) + ss_report_location(rel, page); + else if (nallocated == pbscan->phs_nblocks) + ss_report_location(rel, pbscan->phs_startblock); + } + + return page; +} diff --git a/src/backend/access/table/tableamapi.c b/src/backend/access/table/tableamapi.c index 40db935ac1..6031e6b127 100644 --- a/src/backend/access/table/tableamapi.c +++ b/src/backend/access/table/tableamapi.c @@ -97,7 +97,7 @@ get_table_am_oid(const char *tableamname, bool missing_ok) { Oid result; Relation rel; - HeapScanDesc scandesc; + TableScanDesc scandesc; HeapTuple tuple; ScanKeyData entry[1]; @@ -112,7 +112,7 @@ get_table_am_oid(const char *tableamname, bool missing_ok) Anum_pg_am_amname, BTEqualStrategyNumber, F_NAMEEQ, CStringGetDatum(tableamname)); - scandesc = heap_beginscan_catalog(rel, 1, entry); + scandesc = table_beginscan_catalog(rel, 1, entry); tuple = heap_getnext(scandesc, ForwardScanDirection); /* We assume that there can be at most one matching tuple */ @@ -122,7 +122,7 @@ get_table_am_oid(const char *tableamname, bool missing_ok) else result = InvalidOid; - heap_endscan(scandesc); + table_endscan(scandesc); heap_close(rel, AccessShareLock); if (!OidIsValid(result) && !missing_ok) diff --git a/src/backend/access/tablesample/system.c b/src/backend/access/tablesample/system.c index 298e0ab4a0..fe62a73341 100644 --- a/src/backend/access/tablesample/system.c +++ b/src/backend/access/tablesample/system.c @@ -180,7 +180,8 @@ static BlockNumber system_nextsampleblock(SampleScanState *node) { SystemSamplerData *sampler = (SystemSamplerData *) node->tsm_state; - HeapScanDesc scan = node->ss.ss_currentScanDesc; + TableScanDesc scan = node->ss.ss_currentScanDesc; + HeapScanDesc hscan = (HeapScanDesc) scan; BlockNumber nextblock = sampler->nextblock; uint32 hashinput[2]; @@ -199,7 +200,7 @@ system_nextsampleblock(SampleScanState *node) * Loop over block numbers until finding suitable block or reaching end of * relation. */ - for (; nextblock < scan->rs_nblocks; nextblock++) + for (; nextblock < hscan->rs_nblocks; nextblock++) { uint32 hash; @@ -211,7 +212,7 @@ system_nextsampleblock(SampleScanState *node) break; } - if (nextblock < scan->rs_nblocks) + if (nextblock < hscan->rs_nblocks) { /* Found a suitable block; remember where we should start next time */ sampler->nextblock = nextblock + 1; diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c index 4d7ed8ad1a..d8776e192e 100644 --- a/src/backend/bootstrap/bootstrap.c +++ b/src/backend/bootstrap/bootstrap.c @@ -20,6 +20,7 @@ #include "access/genam.h" #include "access/heapam.h" #include "access/htup_details.h" +#include "access/tableam.h" #include "access/xact.h" #include "access/xlog_internal.h" #include "bootstrap/bootstrap.h" @@ -594,7 +595,7 @@ boot_openrel(char *relname) int i; struct typmap **app; Relation rel; - HeapScanDesc scan; + TableScanDesc scan; HeapTuple tup; if (strlen(relname) >= NAMEDATALEN) @@ -604,16 +605,16 @@ boot_openrel(char *relname) { /* We can now load the pg_type data */ rel = table_open(TypeRelationId, NoLock); - scan = heap_beginscan_catalog(rel, 0, NULL); + scan = table_beginscan_catalog(rel, 0, NULL); i = 0; while ((tup = heap_getnext(scan, ForwardScanDirection)) != NULL) ++i; - heap_endscan(scan); + table_endscan(scan); app = Typ = ALLOC(struct typmap *, i + 1); while (i-- > 0) *app++ = ALLOC(struct typmap, 1); *app = NULL; - scan = heap_beginscan_catalog(rel, 0, NULL); + scan = table_beginscan_catalog(rel, 0, NULL); app = Typ; while ((tup = heap_getnext(scan, ForwardScanDirection)) != NULL) { @@ -623,7 +624,7 @@ boot_openrel(char *relname) sizeof((*app)->am_typ)); app++; } - heap_endscan(scan); + table_endscan(scan); table_close(rel, NoLock); } @@ -915,7 +916,7 @@ gettype(char *type) { int i; Relation rel; - HeapScanDesc scan; + TableScanDesc scan; HeapTuple tup; struct typmap **app; @@ -939,16 +940,16 @@ gettype(char *type) } elog(DEBUG4, "external type: %s", type); rel = table_open(TypeRelationId, NoLock); - scan = heap_beginscan_catalog(rel, 0, NULL); + scan = table_beginscan_catalog(rel, 0, NULL); i = 0; while ((tup = heap_getnext(scan, ForwardScanDirection)) != NULL) ++i; - heap_endscan(scan); + table_endscan(scan); app = Typ = ALLOC(struct typmap *, i + 1); while (i-- > 0) *app++ = ALLOC(struct typmap, 1); *app = NULL; - scan = heap_beginscan_catalog(rel, 0, NULL); + scan = table_beginscan_catalog(rel, 0, NULL); app = Typ; while ((tup = heap_getnext(scan, ForwardScanDirection)) != NULL) { @@ -957,7 +958,7 @@ gettype(char *type) (char *) GETSTRUCT(tup), sizeof((*app)->am_typ)); } - heap_endscan(scan); + table_endscan(scan); table_close(rel, NoLock); return gettype(type); } diff --git a/src/backend/catalog/aclchk.c b/src/backend/catalog/aclchk.c index 11ddce2a8b..a88d3e85b2 100644 --- a/src/backend/catalog/aclchk.c +++ b/src/backend/catalog/aclchk.c @@ -20,6 +20,7 @@ #include "access/genam.h" #include "access/heapam.h" #include "access/htup_details.h" +#include "access/tableam.h" #include "access/sysattr.h" #include "access/xact.h" #include "catalog/binary_upgrade.h" @@ -821,7 +822,7 @@ objectsInSchemaToOids(ObjectType objtype, List *nspnames) ScanKeyData key[2]; int keycount; Relation rel; - HeapScanDesc scan; + TableScanDesc scan; HeapTuple tuple; keycount = 0; @@ -843,7 +844,7 @@ objectsInSchemaToOids(ObjectType objtype, List *nspnames) CharGetDatum(PROKIND_PROCEDURE)); rel = table_open(ProcedureRelationId, AccessShareLock); - scan = heap_beginscan_catalog(rel, keycount, key); + scan = table_beginscan_catalog(rel, keycount, key); while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) { @@ -852,7 +853,7 @@ objectsInSchemaToOids(ObjectType objtype, List *nspnames) objects = lappend_oid(objects, oid); } - heap_endscan(scan); + table_endscan(scan); table_close(rel, AccessShareLock); } break; @@ -877,7 +878,7 @@ getRelationsInNamespace(Oid namespaceId, char relkind) List *relations = NIL; ScanKeyData key[2]; Relation rel; - HeapScanDesc scan; + TableScanDesc scan; HeapTuple tuple; ScanKeyInit(&key[0], @@ -890,7 +891,7 @@ getRelationsInNamespace(Oid namespaceId, char relkind) CharGetDatum(relkind)); rel = table_open(RelationRelationId, AccessShareLock); - scan = heap_beginscan_catalog(rel, 2, key); + scan = table_beginscan_catalog(rel, 2, key); while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) { @@ -899,7 +900,7 @@ getRelationsInNamespace(Oid namespaceId, char relkind) relations = lappend_oid(relations, oid); } - heap_endscan(scan); + table_endscan(scan); table_close(rel, AccessShareLock); return relations; diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index c7b5ff62f9..e0345f96e7 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -35,6 +35,7 @@ #include "access/relation.h" #include "access/sysattr.h" #include "access/table.h" +#include "access/tableam.h" #include "access/transam.h" #include "access/xact.h" #include "access/xlog.h" diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index 1ee1ed2894..98fe78f4ce 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -27,6 +27,7 @@ #include "access/heapam.h" #include "access/multixact.h" #include "access/relscan.h" +#include "access/tableam.h" #include "access/sysattr.h" #include "access/transam.h" #include "access/visibilitymap.h" @@ -2138,7 +2139,7 @@ index_update_stats(Relation rel, ReindexIsProcessingHeap(RelationRelationId)) { /* don't assume syscache will work */ - HeapScanDesc pg_class_scan; + TableScanDesc pg_class_scan; ScanKeyData key[1]; ScanKeyInit(&key[0], @@ -2146,10 +2147,10 @@ index_update_stats(Relation rel, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(relid)); - pg_class_scan = heap_beginscan_catalog(pg_class, 1, key); + pg_class_scan = table_beginscan_catalog(pg_class, 1, key); tuple = heap_getnext(pg_class_scan, ForwardScanDirection); tuple = heap_copytuple(tuple); - heap_endscan(pg_class_scan); + table_endscan(pg_class_scan); } else { @@ -2431,7 +2432,7 @@ IndexBuildHeapScan(Relation heapRelation, bool allow_sync, IndexBuildCallback callback, void *callback_state, - HeapScanDesc scan) + TableScanDesc scan) { return IndexBuildHeapRangeScan(heapRelation, indexRelation, indexInfo, allow_sync, @@ -2460,8 +2461,9 @@ IndexBuildHeapRangeScan(Relation heapRelation, BlockNumber numblocks, IndexBuildCallback callback, void *callback_state, - HeapScanDesc scan) + TableScanDesc scan) { + HeapScanDesc hscan; bool is_system_catalog; bool checking_uniqueness; HeapTuple heapTuple; @@ -2540,12 +2542,12 @@ IndexBuildHeapRangeScan(Relation heapRelation, else snapshot = SnapshotAny; - scan = heap_beginscan_strat(heapRelation, /* relation */ - snapshot, /* snapshot */ - 0, /* number of keys */ - NULL, /* scan key */ - true, /* buffer access strategy OK */ - allow_sync); /* syncscan OK? */ + scan = table_beginscan_strat(heapRelation, /* relation */ + snapshot, /* snapshot */ + 0, /* number of keys */ + NULL, /* scan key */ + true, /* buffer access strategy OK */ + allow_sync); /* syncscan OK? */ } else { @@ -2561,6 +2563,8 @@ IndexBuildHeapRangeScan(Relation heapRelation, snapshot = scan->rs_snapshot; } + hscan = (HeapScanDesc) scan; + /* * Must call GetOldestXmin() with SnapshotAny. Should never call * GetOldestXmin() with MVCC snapshot. (It's especially worth checking @@ -2618,15 +2622,15 @@ IndexBuildHeapRangeScan(Relation heapRelation, * tuple per HOT-chain --- else we could create more than one index * entry pointing to the same root tuple. */ - if (scan->rs_cblock != root_blkno) + if (hscan->rs_cblock != root_blkno) { - Page page = BufferGetPage(scan->rs_cbuf); + Page page = BufferGetPage(hscan->rs_cbuf); - LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE); + LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE); heap_get_root_tuples(page, root_offsets); - LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK); + LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK); - root_blkno = scan->rs_cblock; + root_blkno = hscan->rs_cblock; } if (snapshot == SnapshotAny) @@ -2643,7 +2647,7 @@ IndexBuildHeapRangeScan(Relation heapRelation, * be conservative about it. (This remark is still correct even * with HOT-pruning: our pin on the buffer prevents pruning.) */ - LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE); + LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE); /* * The criteria for counting a tuple as live in this block need to @@ -2652,7 +2656,7 @@ IndexBuildHeapRangeScan(Relation heapRelation, * values, e.g. when there are many recently-dead tuples. */ switch (HeapTupleSatisfiesVacuum(heapTuple, OldestXmin, - scan->rs_cbuf)) + hscan->rs_cbuf)) { case HEAPTUPLE_DEAD: /* Definitely dead, we can ignore it */ @@ -2733,7 +2737,7 @@ IndexBuildHeapRangeScan(Relation heapRelation, /* * Must drop the lock on the buffer before we wait */ - LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK); + LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK); XactLockTableWait(xwait, heapRelation, &heapTuple->t_self, XLTW_InsertIndexUnique); @@ -2800,7 +2804,7 @@ IndexBuildHeapRangeScan(Relation heapRelation, /* * Must drop the lock on the buffer before we wait */ - LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK); + LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK); XactLockTableWait(xwait, heapRelation, &heapTuple->t_self, XLTW_InsertIndexUnique); @@ -2852,7 +2856,7 @@ IndexBuildHeapRangeScan(Relation heapRelation, break; } - LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK); + LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK); if (!indexIt) continue; @@ -2931,7 +2935,7 @@ IndexBuildHeapRangeScan(Relation heapRelation, } } - heap_endscan(scan); + table_endscan(scan); /* we can now forget our snapshot, if set and registered by us */ if (need_unregister_snapshot) @@ -2966,8 +2970,8 @@ IndexCheckExclusion(Relation heapRelation, Relation indexRelation, IndexInfo *indexInfo) { - HeapScanDesc scan; HeapTuple heapTuple; + TableScanDesc scan; Datum values[INDEX_MAX_KEYS]; bool isnull[INDEX_MAX_KEYS]; ExprState *predicate; @@ -3003,12 +3007,12 @@ IndexCheckExclusion(Relation heapRelation, * Scan all live tuples in the base relation. */ snapshot = RegisterSnapshot(GetLatestSnapshot()); - scan = heap_beginscan_strat(heapRelation, /* relation */ - snapshot, /* snapshot */ - 0, /* number of keys */ - NULL, /* scan key */ - true, /* buffer access strategy OK */ - true); /* syncscan OK */ + scan = table_beginscan_strat(heapRelation, /* relation */ + snapshot, /* snapshot */ + 0, /* number of keys */ + NULL, /* scan key */ + true, /* buffer access strategy OK */ + true); /* syncscan OK */ while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL) { @@ -3046,7 +3050,7 @@ IndexCheckExclusion(Relation heapRelation, estate, true); } - heap_endscan(scan); + table_endscan(scan); UnregisterSnapshot(snapshot); ExecDropSingleTupleTableSlot(slot); @@ -3281,7 +3285,8 @@ validate_index_heapscan(Relation heapRelation, Snapshot snapshot, v_i_state *state) { - HeapScanDesc scan; + TableScanDesc scan; + HeapScanDesc hscan; HeapTuple heapTuple; Datum values[INDEX_MAX_KEYS]; bool isnull[INDEX_MAX_KEYS]; @@ -3324,12 +3329,13 @@ validate_index_heapscan(Relation heapRelation, * here, because it's critical that we read from block zero forward to * match the sorted TIDs. */ - scan = heap_beginscan_strat(heapRelation, /* relation */ - snapshot, /* snapshot */ - 0, /* number of keys */ - NULL, /* scan key */ - true, /* buffer access strategy OK */ - false); /* syncscan not OK */ + scan = table_beginscan_strat(heapRelation, /* relation */ + snapshot, /* snapshot */ + 0, /* number of keys */ + NULL, /* scan key */ + true, /* buffer access strategy OK */ + false); /* syncscan not OK */ + hscan = (HeapScanDesc) scan; /* * Scan all tuples matching the snapshot. @@ -3358,17 +3364,17 @@ validate_index_heapscan(Relation heapRelation, * already-passed-over tuplesort output TIDs of the current page. We * clear that array here, when advancing onto a new heap page. */ - if (scan->rs_cblock != root_blkno) + if (hscan->rs_cblock != root_blkno) { - Page page = BufferGetPage(scan->rs_cbuf); + Page page = BufferGetPage(hscan->rs_cbuf); - LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE); + LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE); heap_get_root_tuples(page, root_offsets); - LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK); + LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK); memset(in_index, 0, sizeof(in_index)); - root_blkno = scan->rs_cblock; + root_blkno = hscan->rs_cblock; } /* Convert actual tuple TID to root TID */ @@ -3493,7 +3499,7 @@ validate_index_heapscan(Relation heapRelation, } } - heap_endscan(scan); + table_endscan(scan); ExecDropSingleTupleTableSlot(slot); diff --git a/src/backend/catalog/pg_conversion.c b/src/backend/catalog/pg_conversion.c index a3bd8c2c15..b7c7e5e1a7 100644 --- a/src/backend/catalog/pg_conversion.c +++ b/src/backend/catalog/pg_conversion.c @@ -16,6 +16,7 @@ #include "access/heapam.h" #include "access/htup_details.h" +#include "access/tableam.h" #include "access/sysattr.h" #include "catalog/catalog.h" #include "catalog/dependency.h" @@ -152,7 +153,7 @@ RemoveConversionById(Oid conversionOid) { Relation rel; HeapTuple tuple; - HeapScanDesc scan; + TableScanDesc scan; ScanKeyData scanKeyData; ScanKeyInit(&scanKeyData, @@ -163,14 +164,14 @@ RemoveConversionById(Oid conversionOid) /* open pg_conversion */ rel = table_open(ConversionRelationId, RowExclusiveLock); - scan = heap_beginscan_catalog(rel, 1, &scanKeyData); + scan = table_beginscan_catalog(rel, 1, &scanKeyData); /* search for the target tuple */ if (HeapTupleIsValid(tuple = heap_getnext(scan, ForwardScanDirection))) CatalogTupleDelete(rel, &tuple->t_self); else elog(ERROR, "could not find tuple for conversion %u", conversionOid); - heap_endscan(scan); + table_endscan(scan); table_close(rel, RowExclusiveLock); } diff --git a/src/backend/catalog/pg_db_role_setting.c b/src/backend/catalog/pg_db_role_setting.c index 5189c6f7a5..20acac2eea 100644 --- a/src/backend/catalog/pg_db_role_setting.c +++ b/src/backend/catalog/pg_db_role_setting.c @@ -13,6 +13,7 @@ #include "access/genam.h" #include "access/heapam.h" #include "access/htup_details.h" +#include "access/tableam.h" #include "catalog/indexing.h" #include "catalog/objectaccess.h" #include "catalog/pg_db_role_setting.h" @@ -169,7 +170,7 @@ void DropSetting(Oid databaseid, Oid roleid) { Relation relsetting; - HeapScanDesc scan; + TableScanDesc scan; ScanKeyData keys[2]; HeapTuple tup; int numkeys = 0; @@ -195,12 +196,12 @@ DropSetting(Oid databaseid, Oid roleid) numkeys++; } - scan = heap_beginscan_catalog(relsetting, numkeys, keys); + scan = table_beginscan_catalog(relsetting, numkeys, keys); while (HeapTupleIsValid(tup = heap_getnext(scan, ForwardScanDirection))) { CatalogTupleDelete(relsetting, &tup->t_self); } - heap_endscan(scan); + table_endscan(scan); table_close(relsetting, RowExclusiveLock); } diff --git a/src/backend/catalog/pg_publication.c b/src/backend/catalog/pg_publication.c index 96f9275072..1c322655e4 100644 --- a/src/backend/catalog/pg_publication.c +++ b/src/backend/catalog/pg_publication.c @@ -21,6 +21,7 @@ #include "access/hash.h" #include "access/heapam.h" #include "access/htup_details.h" +#include "access/tableam.h" #include "access/xact.h" #include "catalog/catalog.h" @@ -329,7 +330,7 @@ GetAllTablesPublicationRelations(void) { Relation classRel; ScanKeyData key[1]; - HeapScanDesc scan; + TableScanDesc scan; HeapTuple tuple; List *result = NIL; @@ -340,7 +341,7 @@ GetAllTablesPublicationRelations(void) BTEqualStrategyNumber, F_CHAREQ, CharGetDatum(RELKIND_RELATION)); - scan = heap_beginscan_catalog(classRel, 1, key); + scan = table_beginscan_catalog(classRel, 1, key); while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) { @@ -351,7 +352,7 @@ GetAllTablesPublicationRelations(void) result = lappend_oid(result, relid); } - heap_endscan(scan); + table_endscan(scan); table_close(classRel, AccessShareLock); return result; diff --git a/src/backend/catalog/pg_subscription.c b/src/backend/catalog/pg_subscription.c index 935d7670e4..afee2838cc 100644 --- a/src/backend/catalog/pg_subscription.c +++ b/src/backend/catalog/pg_subscription.c @@ -19,6 +19,7 @@ #include "access/genam.h" #include "access/heapam.h" #include "access/htup_details.h" +#include "access/tableam.h" #include "access/xact.h" #include "catalog/indexing.h" @@ -390,7 +391,7 @@ void RemoveSubscriptionRel(Oid subid, Oid relid) { Relation rel; - HeapScanDesc scan; + TableScanDesc scan; ScanKeyData skey[2]; HeapTuple tup; int nkeys = 0; @@ -416,12 +417,12 @@ RemoveSubscriptionRel(Oid subid, Oid relid) } /* Do the search and delete what we found. */ - scan = heap_beginscan_catalog(rel, nkeys, skey); + scan = table_beginscan_catalog(rel, nkeys, skey); while (HeapTupleIsValid(tup = heap_getnext(scan, ForwardScanDirection))) { CatalogTupleDelete(rel, &tup->t_self); } - heap_endscan(scan); + table_endscan(scan); table_close(rel, RowExclusiveLock); } diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c index 4d6453d924..622c2305ed 100644 --- a/src/backend/commands/cluster.c +++ b/src/backend/commands/cluster.c @@ -22,6 +22,7 @@ #include "access/multixact.h" #include "access/relscan.h" #include "access/rewriteheap.h" +#include "access/tableam.h" #include "access/transam.h" #include "access/tuptoaster.h" #include "access/xact.h" @@ -764,6 +765,7 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose, Datum *values; bool *isnull; IndexScanDesc indexScan; + TableScanDesc tableScan; HeapScanDesc heapScan; bool use_wal; bool is_system_catalog; @@ -779,6 +781,8 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose, BlockNumber num_pages; int elevel = verbose ? INFO : DEBUG2; PGRUsage ru0; + TupleTableSlot *slot; + BufferHeapTupleTableSlot *hslot; pg_rusage_init(&ru0); @@ -924,16 +928,22 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose, */ if (OldIndex != NULL && !use_sort) { + tableScan = NULL; heapScan = NULL; indexScan = index_beginscan(OldHeap, OldIndex, SnapshotAny, 0, 0); index_rescan(indexScan, NULL, 0, NULL, 0); } else { - heapScan = heap_beginscan(OldHeap, SnapshotAny, 0, (ScanKey) NULL); + tableScan = table_beginscan(OldHeap, SnapshotAny, 0, (ScanKey) NULL); + heapScan = (HeapScanDesc) tableScan; indexScan = NULL; } + slot = MakeSingleTupleTableSlot(RelationGetDescr(OldHeap), + &TTSOpsBufferHeapTuple); + hslot = (BufferHeapTupleTableSlot *) slot; + /* Log what we're doing */ if (indexScan != NULL) ereport(elevel, @@ -968,19 +978,19 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose, if (indexScan != NULL) { - tuple = index_getnext(indexScan, ForwardScanDirection); - if (tuple == NULL) + if (!index_getnext_slot(indexScan, ForwardScanDirection, slot)) break; /* Since we used no scan keys, should never need to recheck */ if (indexScan->xs_recheck) elog(ERROR, "CLUSTER does not support lossy index conditions"); - buf = indexScan->xs_cbuf; + tuple = hslot->base.tuple; + buf = hslot->buffer; } else { - tuple = heap_getnext(heapScan, ForwardScanDirection); + tuple = heap_getnext(tableScan, ForwardScanDirection); if (tuple == NULL) break; @@ -1066,7 +1076,9 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose, if (indexScan != NULL) index_endscan(indexScan); if (heapScan != NULL) - heap_endscan(heapScan); + table_endscan(tableScan); + if (slot) + ExecDropSingleTupleTableSlot(slot); /* * In scan-and-sort mode, complete the sort, then read out all live tuples @@ -1694,7 +1706,7 @@ static List * get_tables_to_cluster(MemoryContext cluster_context) { Relation indRelation; - HeapScanDesc scan; + TableScanDesc scan; ScanKeyData entry; HeapTuple indexTuple; Form_pg_index index; @@ -1713,7 +1725,7 @@ get_tables_to_cluster(MemoryContext cluster_context) Anum_pg_index_indisclustered, BTEqualStrategyNumber, F_BOOLEQ, BoolGetDatum(true)); - scan = heap_beginscan_catalog(indRelation, 1, &entry); + scan = table_beginscan_catalog(indRelation, 1, &entry); while ((indexTuple = heap_getnext(scan, ForwardScanDirection)) != NULL) { index = (Form_pg_index) GETSTRUCT(indexTuple); @@ -1734,7 +1746,7 @@ get_tables_to_cluster(MemoryContext cluster_context) MemoryContextSwitchTo(old_context); } - heap_endscan(scan); + table_endscan(scan); relation_close(indRelation, AccessShareLock); diff --git a/src/backend/commands/constraint.c b/src/backend/commands/constraint.c index f9ada29af8..374c453cb3 100644 --- a/src/backend/commands/constraint.c +++ b/src/backend/commands/constraint.c @@ -15,6 +15,7 @@ #include "access/genam.h" #include "access/heapam.h" +#include "access/tableam.h" #include "catalog/index.h" #include "commands/trigger.h" #include "executor/executor.h" diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index 5dd6fe02c6..598cafdda3 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -20,6 +20,7 @@ #include "access/heapam.h" #include "access/htup_details.h" +#include "access/tableam.h" #include "access/sysattr.h" #include "access/xact.h" #include "access/xlog.h" @@ -2073,13 +2074,13 @@ CopyTo(CopyState cstate) { Datum *values; bool *nulls; - HeapScanDesc scandesc; HeapTuple tuple; + TableScanDesc scandesc; values = (Datum *) palloc(num_phys_attrs * sizeof(Datum)); nulls = (bool *) palloc(num_phys_attrs * sizeof(bool)); - scandesc = heap_beginscan(cstate->rel, GetActiveSnapshot(), 0, NULL); + scandesc = table_beginscan(cstate->rel, GetActiveSnapshot(), 0, NULL); processed = 0; while ((tuple = heap_getnext(scandesc, ForwardScanDirection)) != NULL) @@ -2094,10 +2095,10 @@ CopyTo(CopyState cstate) processed++; } - heap_endscan(scandesc); pfree(values); pfree(nulls); + table_endscan(scandesc); } else { diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c index d207cd899f..35cad0b629 100644 --- a/src/backend/commands/dbcommands.c +++ b/src/backend/commands/dbcommands.c @@ -26,6 +26,7 @@ #include "access/genam.h" #include "access/heapam.h" #include "access/htup_details.h" +#include "access/tableam.h" #include "access/xact.h" #include "access/xloginsert.h" #include "access/xlogutils.h" @@ -97,7 +98,7 @@ static int errdetail_busy_db(int notherbackends, int npreparedxacts); Oid createdb(ParseState *pstate, const CreatedbStmt *stmt) { - HeapScanDesc scan; + TableScanDesc scan; Relation rel; Oid src_dboid; Oid src_owner; @@ -589,7 +590,7 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) * each one to the new database. */ rel = table_open(TableSpaceRelationId, AccessShareLock); - scan = heap_beginscan_catalog(rel, 0, NULL); + scan = table_beginscan_catalog(rel, 0, NULL); while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) { Form_pg_tablespace spaceform = (Form_pg_tablespace) GETSTRUCT(tuple); @@ -643,7 +644,7 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) XLOG_DBASE_CREATE | XLR_SPECIAL_REL_UPDATE); } } - heap_endscan(scan); + table_endscan(scan); table_close(rel, AccessShareLock); /* @@ -1870,11 +1871,11 @@ static void remove_dbtablespaces(Oid db_id) { Relation rel; - HeapScanDesc scan; + TableScanDesc scan; HeapTuple tuple; rel = table_open(TableSpaceRelationId, AccessShareLock); - scan = heap_beginscan_catalog(rel, 0, NULL); + scan = table_beginscan_catalog(rel, 0, NULL); while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) { Form_pg_tablespace spcform = (Form_pg_tablespace) GETSTRUCT(tuple); @@ -1917,7 +1918,7 @@ remove_dbtablespaces(Oid db_id) pfree(dstpath); } - heap_endscan(scan); + table_endscan(scan); table_close(rel, AccessShareLock); } @@ -1938,11 +1939,11 @@ check_db_file_conflict(Oid db_id) { bool result = false; Relation rel; - HeapScanDesc scan; + TableScanDesc scan; HeapTuple tuple; rel = table_open(TableSpaceRelationId, AccessShareLock); - scan = heap_beginscan_catalog(rel, 0, NULL); + scan = table_beginscan_catalog(rel, 0, NULL); while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) { Form_pg_tablespace spcform = (Form_pg_tablespace) GETSTRUCT(tuple); @@ -1967,7 +1968,7 @@ check_db_file_conflict(Oid db_id) pfree(dstpath); } - heap_endscan(scan); + table_endscan(scan); table_close(rel, AccessShareLock); return result; diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c index 5dcedc337a..7cf1837715 100644 --- a/src/backend/commands/indexcmds.c +++ b/src/backend/commands/indexcmds.c @@ -19,6 +19,7 @@ #include "access/heapam.h" #include "access/htup_details.h" #include "access/reloptions.h" +#include "access/tableam.h" #include "access/sysattr.h" #include "access/xact.h" #include "catalog/catalog.h" @@ -2336,7 +2337,7 @@ ReindexMultipleTables(const char *objectName, ReindexObjectType objectKind, { Oid objectOid; Relation relationRelation; - HeapScanDesc scan; + TableScanDesc scan; ScanKeyData scan_keys[1]; HeapTuple tuple; MemoryContext private_context; @@ -2410,7 +2411,7 @@ ReindexMultipleTables(const char *objectName, ReindexObjectType objectKind, * rels will be processed indirectly by reindex_relation). */ relationRelation = table_open(RelationRelationId, AccessShareLock); - scan = heap_beginscan_catalog(relationRelation, num_keys, scan_keys); + scan = table_beginscan_catalog(relationRelation, num_keys, scan_keys); while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) { Form_pg_class classtuple = (Form_pg_class) GETSTRUCT(tuple); @@ -2469,7 +2470,7 @@ ReindexMultipleTables(const char *objectName, ReindexObjectType objectKind, MemoryContextSwitchTo(old); } - heap_endscan(scan); + table_endscan(scan); table_close(relationRelation, AccessShareLock); /* Now reindex each rel in a separate transaction */ diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 788544ec92..54f3ce889e 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -4736,8 +4736,8 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode) bool *isnull; TupleTableSlot *oldslot; TupleTableSlot *newslot; - HeapScanDesc scan; HeapTuple tuple; + TableScanDesc scan; MemoryContext oldCxt; List *dropped_attrs = NIL; ListCell *lc; @@ -4795,7 +4795,7 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode) * checking all the constraints. */ snapshot = RegisterSnapshot(GetLatestSnapshot()); - scan = heap_beginscan(oldrel, snapshot, 0, NULL); + scan = table_beginscan(oldrel, snapshot, 0, NULL); /* * Switch to per-tuple memory context and reset it for each tuple @@ -4911,7 +4911,7 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode) } MemoryContextSwitchTo(oldCxt); - heap_endscan(scan); + table_endscan(scan); UnregisterSnapshot(snapshot); ExecDropSingleTupleTableSlot(oldslot); @@ -5306,7 +5306,7 @@ find_typed_table_dependencies(Oid typeOid, const char *typeName, DropBehavior be { Relation classRel; ScanKeyData key[1]; - HeapScanDesc scan; + TableScanDesc scan; HeapTuple tuple; List *result = NIL; @@ -5317,7 +5317,7 @@ find_typed_table_dependencies(Oid typeOid, const char *typeName, DropBehavior be BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(typeOid)); - scan = heap_beginscan_catalog(classRel, 1, key); + scan = table_beginscan_catalog(classRel, 1, key); while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) { @@ -5333,7 +5333,7 @@ find_typed_table_dependencies(Oid typeOid, const char *typeName, DropBehavior be result = lappend_oid(result, classform->oid); } - heap_endscan(scan); + table_endscan(scan); table_close(classRel, AccessShareLock); return result; @@ -8819,8 +8819,8 @@ validateCheckConstraint(Relation rel, HeapTuple constrtup) Expr *origexpr; ExprState *exprstate; TupleDesc tupdesc; - HeapScanDesc scan; HeapTuple tuple; + TableScanDesc scan; ExprContext *econtext; MemoryContext oldcxt; TupleTableSlot *slot; @@ -8860,7 +8860,7 @@ validateCheckConstraint(Relation rel, HeapTuple constrtup) econtext->ecxt_scantuple = slot; snapshot = RegisterSnapshot(GetLatestSnapshot()); - scan = heap_beginscan(rel, snapshot, 0, NULL); + scan = table_beginscan(rel, snapshot, 0, NULL); /* * Switch to per-tuple memory context and reset it for each tuple @@ -8883,7 +8883,7 @@ validateCheckConstraint(Relation rel, HeapTuple constrtup) } MemoryContextSwitchTo(oldcxt); - heap_endscan(scan); + table_endscan(scan); UnregisterSnapshot(snapshot); ExecDropSingleTupleTableSlot(slot); FreeExecutorState(estate); @@ -8902,8 +8902,8 @@ validateForeignKeyConstraint(char *conname, Oid pkindOid, Oid constraintOid) { - HeapScanDesc scan; HeapTuple tuple; + TableScanDesc scan; Trigger trig; Snapshot snapshot; @@ -8938,7 +8938,7 @@ validateForeignKeyConstraint(char *conname, * ereport(ERROR) and that's that. */ snapshot = RegisterSnapshot(GetLatestSnapshot()); - scan = heap_beginscan(rel, snapshot, 0, NULL); + scan = table_beginscan(rel, snapshot, 0, NULL); while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) { @@ -8967,7 +8967,7 @@ validateForeignKeyConstraint(char *conname, RI_FKey_check_ins(fcinfo); } - heap_endscan(scan); + table_endscan(scan); UnregisterSnapshot(snapshot); } @@ -11593,7 +11593,7 @@ AlterTableMoveAll(AlterTableMoveAllStmt *stmt) ListCell *l; ScanKeyData key[1]; Relation rel; - HeapScanDesc scan; + TableScanDesc scan; HeapTuple tuple; Oid orig_tablespaceoid; Oid new_tablespaceoid; @@ -11658,7 +11658,7 @@ AlterTableMoveAll(AlterTableMoveAllStmt *stmt) ObjectIdGetDatum(orig_tablespaceoid)); rel = table_open(RelationRelationId, AccessShareLock); - scan = heap_beginscan_catalog(rel, 1, key); + scan = table_beginscan_catalog(rel, 1, key); while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) { Form_pg_class relForm = (Form_pg_class) GETSTRUCT(tuple); @@ -11717,7 +11717,7 @@ AlterTableMoveAll(AlterTableMoveAllStmt *stmt) relations = lappend_oid(relations, relOid); } - heap_endscan(scan); + table_endscan(scan); table_close(rel, AccessShareLock); if (relations == NIL) diff --git a/src/backend/commands/tablespace.c b/src/backend/commands/tablespace.c index 4afd178e97..868f8ec51f 100644 --- a/src/backend/commands/tablespace.c +++ b/src/backend/commands/tablespace.c @@ -53,6 +53,7 @@ #include "access/heapam.h" #include "access/reloptions.h" #include "access/htup_details.h" +#include "access/tableam.h" #include "access/sysattr.h" #include "access/xact.h" #include "access/xlog.h" @@ -405,7 +406,7 @@ DropTableSpace(DropTableSpaceStmt *stmt) { #ifdef HAVE_SYMLINK char *tablespacename = stmt->tablespacename; - HeapScanDesc scandesc; + TableScanDesc scandesc; Relation rel; HeapTuple tuple; Form_pg_tablespace spcform; @@ -421,7 +422,7 @@ DropTableSpace(DropTableSpaceStmt *stmt) Anum_pg_tablespace_spcname, BTEqualStrategyNumber, F_NAMEEQ, CStringGetDatum(tablespacename)); - scandesc = heap_beginscan_catalog(rel, 1, entry); + scandesc = table_beginscan_catalog(rel, 1, entry); tuple = heap_getnext(scandesc, ForwardScanDirection); if (!HeapTupleIsValid(tuple)) @@ -439,7 +440,7 @@ DropTableSpace(DropTableSpaceStmt *stmt) (errmsg("tablespace \"%s\" does not exist, skipping", tablespacename))); /* XXX I assume I need one or both of these next two calls */ - heap_endscan(scandesc); + table_endscan(scandesc); table_close(rel, NoLock); } return; @@ -467,7 +468,7 @@ DropTableSpace(DropTableSpaceStmt *stmt) */ CatalogTupleDelete(rel, &tuple->t_self); - heap_endscan(scandesc); + table_endscan(scandesc); /* * Remove any comments or security labels on this tablespace. @@ -918,7 +919,7 @@ RenameTableSpace(const char *oldname, const char *newname) Oid tspId; Relation rel; ScanKeyData entry[1]; - HeapScanDesc scan; + TableScanDesc scan; HeapTuple tup; HeapTuple newtuple; Form_pg_tablespace newform; @@ -931,7 +932,7 @@ RenameTableSpace(const char *oldname, const char *newname) Anum_pg_tablespace_spcname, BTEqualStrategyNumber, F_NAMEEQ, CStringGetDatum(oldname)); - scan = heap_beginscan_catalog(rel, 1, entry); + scan = table_beginscan_catalog(rel, 1, entry); tup = heap_getnext(scan, ForwardScanDirection); if (!HeapTupleIsValid(tup)) ereport(ERROR, @@ -943,7 +944,7 @@ RenameTableSpace(const char *oldname, const char *newname) newform = (Form_pg_tablespace) GETSTRUCT(newtuple); tspId = newform->oid; - heap_endscan(scan); + table_endscan(scan); /* Must be owner */ if (!pg_tablespace_ownercheck(tspId, GetUserId())) @@ -961,7 +962,7 @@ RenameTableSpace(const char *oldname, const char *newname) Anum_pg_tablespace_spcname, BTEqualStrategyNumber, F_NAMEEQ, CStringGetDatum(newname)); - scan = heap_beginscan_catalog(rel, 1, entry); + scan = table_beginscan_catalog(rel, 1, entry); tup = heap_getnext(scan, ForwardScanDirection); if (HeapTupleIsValid(tup)) ereport(ERROR, @@ -969,7 +970,7 @@ RenameTableSpace(const char *oldname, const char *newname) errmsg("tablespace \"%s\" already exists", newname))); - heap_endscan(scan); + table_endscan(scan); /* OK, update the entry */ namestrcpy(&(newform->spcname), newname); @@ -993,7 +994,7 @@ AlterTableSpaceOptions(AlterTableSpaceOptionsStmt *stmt) { Relation rel; ScanKeyData entry[1]; - HeapScanDesc scandesc; + TableScanDesc scandesc; HeapTuple tup; Oid tablespaceoid; Datum datum; @@ -1011,7 +1012,7 @@ AlterTableSpaceOptions(AlterTableSpaceOptionsStmt *stmt) Anum_pg_tablespace_spcname, BTEqualStrategyNumber, F_NAMEEQ, CStringGetDatum(stmt->tablespacename)); - scandesc = heap_beginscan_catalog(rel, 1, entry); + scandesc = table_beginscan_catalog(rel, 1, entry); tup = heap_getnext(scandesc, ForwardScanDirection); if (!HeapTupleIsValid(tup)) ereport(ERROR, @@ -1053,7 +1054,7 @@ AlterTableSpaceOptions(AlterTableSpaceOptionsStmt *stmt) heap_freetuple(newtuple); /* Conclude heap scan. */ - heap_endscan(scandesc); + table_endscan(scandesc); table_close(rel, NoLock); return tablespaceoid; @@ -1387,7 +1388,7 @@ get_tablespace_oid(const char *tablespacename, bool missing_ok) { Oid result; Relation rel; - HeapScanDesc scandesc; + TableScanDesc scandesc; HeapTuple tuple; ScanKeyData entry[1]; @@ -1402,7 +1403,7 @@ get_tablespace_oid(const char *tablespacename, bool missing_ok) Anum_pg_tablespace_spcname, BTEqualStrategyNumber, F_NAMEEQ, CStringGetDatum(tablespacename)); - scandesc = heap_beginscan_catalog(rel, 1, entry); + scandesc = table_beginscan_catalog(rel, 1, entry); tuple = heap_getnext(scandesc, ForwardScanDirection); /* We assume that there can be at most one matching tuple */ @@ -1411,7 +1412,7 @@ get_tablespace_oid(const char *tablespacename, bool missing_ok) else result = InvalidOid; - heap_endscan(scandesc); + table_endscan(scandesc); table_close(rel, AccessShareLock); if (!OidIsValid(result) && !missing_ok) @@ -1433,7 +1434,7 @@ get_tablespace_name(Oid spc_oid) { char *result; Relation rel; - HeapScanDesc scandesc; + TableScanDesc scandesc; HeapTuple tuple; ScanKeyData entry[1]; @@ -1448,7 +1449,7 @@ get_tablespace_name(Oid spc_oid) Anum_pg_tablespace_oid, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(spc_oid)); - scandesc = heap_beginscan_catalog(rel, 1, entry); + scandesc = table_beginscan_catalog(rel, 1, entry); tuple = heap_getnext(scandesc, ForwardScanDirection); /* We assume that there can be at most one matching tuple */ @@ -1457,7 +1458,7 @@ get_tablespace_name(Oid spc_oid) else result = NULL; - heap_endscan(scandesc); + table_endscan(scandesc); table_close(rel, AccessShareLock); return result; diff --git a/src/backend/commands/typecmds.c b/src/backend/commands/typecmds.c index 448926db12..e0c17d10b9 100644 --- a/src/backend/commands/typecmds.c +++ b/src/backend/commands/typecmds.c @@ -34,6 +34,7 @@ #include "access/genam.h" #include "access/heapam.h" #include "access/htup_details.h" +#include "access/tableam.h" #include "access/xact.h" #include "catalog/binary_upgrade.h" #include "catalog/catalog.h" @@ -2362,13 +2363,13 @@ AlterDomainNotNull(List *names, bool notNull) RelToCheck *rtc = (RelToCheck *) lfirst(rt); Relation testrel = rtc->rel; TupleDesc tupdesc = RelationGetDescr(testrel); - HeapScanDesc scan; HeapTuple tuple; + TableScanDesc scan; Snapshot snapshot; /* Scan all tuples in this relation */ snapshot = RegisterSnapshot(GetLatestSnapshot()); - scan = heap_beginscan(testrel, snapshot, 0, NULL); + scan = table_beginscan(testrel, snapshot, 0, NULL); while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) { int i; @@ -2398,7 +2399,7 @@ AlterDomainNotNull(List *names, bool notNull) } } } - heap_endscan(scan); + table_endscan(scan); UnregisterSnapshot(snapshot); /* Close each rel after processing, but keep lock */ @@ -2776,13 +2777,13 @@ validateDomainConstraint(Oid domainoid, char *ccbin) RelToCheck *rtc = (RelToCheck *) lfirst(rt); Relation testrel = rtc->rel; TupleDesc tupdesc = RelationGetDescr(testrel); - HeapScanDesc scan; HeapTuple tuple; + TableScanDesc scan; Snapshot snapshot; /* Scan all tuples in this relation */ snapshot = RegisterSnapshot(GetLatestSnapshot()); - scan = heap_beginscan(testrel, snapshot, 0, NULL); + scan = table_beginscan(testrel, snapshot, 0, NULL); while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) { int i; @@ -2826,7 +2827,7 @@ validateDomainConstraint(Oid domainoid, char *ccbin) ResetExprContext(econtext); } - heap_endscan(scan); + table_endscan(scan); UnregisterSnapshot(snapshot); /* Hold relation lock till commit (XXX bad for concurrency) */ diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index e91df2171e..3763a8c39e 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -28,6 +28,7 @@ #include "access/heapam.h" #include "access/htup_details.h" #include "access/multixact.h" +#include "access/tableam.h" #include "access/transam.h" #include "access/xact.h" #include "catalog/namespace.h" @@ -745,12 +746,12 @@ get_all_vacuum_rels(int options) { List *vacrels = NIL; Relation pgclass; - HeapScanDesc scan; + TableScanDesc scan; HeapTuple tuple; pgclass = table_open(RelationRelationId, AccessShareLock); - scan = heap_beginscan_catalog(pgclass, 0, NULL); + scan = table_beginscan_catalog(pgclass, 0, NULL); while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) { @@ -784,7 +785,7 @@ get_all_vacuum_rels(int options) MemoryContextSwitchTo(oldcontext); } - heap_endscan(scan); + table_endscan(scan); table_close(pgclass, AccessShareLock); return vacrels; @@ -1381,7 +1382,7 @@ vac_truncate_clog(TransactionId frozenXID, { TransactionId nextXID = ReadNewTransactionId(); Relation relation; - HeapScanDesc scan; + TableScanDesc scan; HeapTuple tuple; Oid oldestxid_datoid; Oid minmulti_datoid; @@ -1412,7 +1413,7 @@ vac_truncate_clog(TransactionId frozenXID, */ relation = table_open(DatabaseRelationId, AccessShareLock); - scan = heap_beginscan_catalog(relation, 0, NULL); + scan = table_beginscan_catalog(relation, 0, NULL); while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) { @@ -1451,7 +1452,7 @@ vac_truncate_clog(TransactionId frozenXID, } } - heap_endscan(scan); + table_endscan(scan); table_close(relation, AccessShareLock); diff --git a/src/backend/executor/execCurrent.c b/src/backend/executor/execCurrent.c index fe99096efc..fdb2c36246 100644 --- a/src/backend/executor/execCurrent.c +++ b/src/backend/executor/execCurrent.c @@ -204,7 +204,7 @@ execCurrentOf(CurrentOfExpr *cexpr, */ IndexScanDesc scan = ((IndexOnlyScanState *) scanstate)->ioss_ScanDesc; - *current_tid = scan->xs_ctup.t_self; + *current_tid = scan->xs_heaptid; } else { diff --git a/src/backend/executor/execIndexing.c b/src/backend/executor/execIndexing.c index fd0520105d..3d8ee38771 100644 --- a/src/backend/executor/execIndexing.c +++ b/src/backend/executor/execIndexing.c @@ -651,7 +651,6 @@ check_exclusion_or_unique_constraint(Relation heap, Relation index, Oid *index_collations = index->rd_indcollation; int indnkeyatts = IndexRelationGetNumberOfKeyAttributes(index); IndexScanDesc index_scan; - HeapTuple tup; ScanKeyData scankeys[INDEX_MAX_KEYS]; SnapshotData DirtySnapshot; int i; @@ -708,7 +707,7 @@ check_exclusion_or_unique_constraint(Relation heap, Relation index, * scantuple. */ existing_slot = MakeSingleTupleTableSlot(RelationGetDescr(heap), - &TTSOpsHeapTuple); + &TTSOpsBufferHeapTuple); econtext = GetPerTupleExprContext(estate); save_scantuple = econtext->ecxt_scantuple; @@ -724,11 +723,9 @@ retry: index_scan = index_beginscan(heap, index, &DirtySnapshot, indnkeyatts, 0); index_rescan(index_scan, scankeys, indnkeyatts, NULL, 0); - while ((tup = index_getnext(index_scan, - ForwardScanDirection)) != NULL) + while (index_getnext_slot(index_scan, ForwardScanDirection, existing_slot)) { TransactionId xwait; - ItemPointerData ctid_wait; XLTW_Oper reason_wait; Datum existing_values[INDEX_MAX_KEYS]; bool existing_isnull[INDEX_MAX_KEYS]; @@ -739,7 +736,7 @@ retry: * Ignore the entry for the tuple we're trying to check. */ if (ItemPointerIsValid(tupleid) && - ItemPointerEquals(tupleid, &tup->t_self)) + ItemPointerEquals(tupleid, &existing_slot->tts_tid)) { if (found_self) /* should not happen */ elog(ERROR, "found self tuple multiple times in index \"%s\"", @@ -752,7 +749,6 @@ retry: * Extract the index column values and isnull flags from the existing * tuple. */ - ExecStoreHeapTuple(tup, existing_slot, false); FormIndexDatum(indexInfo, existing_slot, estate, existing_values, existing_isnull); @@ -787,7 +783,6 @@ retry: DirtySnapshot.speculativeToken && TransactionIdPrecedes(GetCurrentTransactionId(), xwait)))) { - ctid_wait = tup->t_data->t_ctid; reason_wait = indexInfo->ii_ExclusionOps ? XLTW_RecheckExclusionConstr : XLTW_InsertIndex; index_endscan(index_scan); @@ -795,7 +790,8 @@ retry: SpeculativeInsertionWait(DirtySnapshot.xmin, DirtySnapshot.speculativeToken); else - XactLockTableWait(xwait, heap, &ctid_wait, reason_wait); + XactLockTableWait(xwait, heap, + &existing_slot->tts_tid, reason_wait); goto retry; } @@ -807,7 +803,7 @@ retry: { conflict = true; if (conflictTid) - *conflictTid = tup->t_self; + *conflictTid = existing_slot->tts_tid; break; } diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c index 5c5aa96e7f..12459b90be 100644 --- a/src/backend/executor/execReplication.c +++ b/src/backend/executor/execReplication.c @@ -17,6 +17,7 @@ #include "access/genam.h" #include "access/heapam.h" #include "access/relscan.h" +#include "access/tableam.h" #include "access/transam.h" #include "access/xact.h" #include "commands/trigger.h" @@ -118,7 +119,6 @@ RelationFindReplTupleByIndex(Relation rel, Oid idxoid, TupleTableSlot *searchslot, TupleTableSlot *outslot) { - HeapTuple scantuple; ScanKeyData skey[INDEX_MAX_KEYS]; IndexScanDesc scan; SnapshotData snap; @@ -144,10 +144,9 @@ retry: index_rescan(scan, skey, IndexRelationGetNumberOfKeyAttributes(idxrel), NULL, 0); /* Try to find the tuple */ - if ((scantuple = index_getnext(scan, ForwardScanDirection)) != NULL) + if (index_getnext_slot(scan, ForwardScanDirection, outslot)) { found = true; - ExecStoreHeapTuple(scantuple, outslot, false); ExecMaterializeSlot(outslot); xwait = TransactionIdIsValid(snap.xmin) ? @@ -285,7 +284,7 @@ RelationFindReplTupleSeq(Relation rel, LockTupleMode lockmode, TupleTableSlot *searchslot, TupleTableSlot *outslot) { HeapTuple scantuple; - HeapScanDesc scan; + TableScanDesc scan; SnapshotData snap; TransactionId xwait; bool found; @@ -295,21 +294,23 @@ RelationFindReplTupleSeq(Relation rel, LockTupleMode lockmode, /* Start a heap scan. */ InitDirtySnapshot(snap); - scan = heap_beginscan(rel, &snap, 0, NULL); + scan = table_beginscan(rel, &snap, 0, NULL); retry: found = false; - heap_rescan(scan, NULL); + table_rescan(scan, NULL); /* Try to find the tuple */ while ((scantuple = heap_getnext(scan, ForwardScanDirection)) != NULL) { + HeapScanDesc hscan = (HeapScanDesc) scan; + if (!tuple_equals_slot(desc, scantuple, searchslot)) continue; found = true; - ExecStoreHeapTuple(scantuple, outslot, false); + ExecStoreBufferHeapTuple(scantuple, outslot, hscan->rs_cbuf); ExecMaterializeSlot(outslot); xwait = TransactionIdIsValid(snap.xmin) ? @@ -375,7 +376,7 @@ retry: } } - heap_endscan(scan); + table_endscan(scan); return found; } diff --git a/src/backend/executor/nodeBitmapHeapscan.c b/src/backend/executor/nodeBitmapHeapscan.c index 5e74585d5e..138e79b204 100644 --- a/src/backend/executor/nodeBitmapHeapscan.c +++ b/src/backend/executor/nodeBitmapHeapscan.c @@ -39,6 +39,7 @@ #include "access/heapam.h" #include "access/relscan.h" +#include "access/tableam.h" #include "access/transam.h" #include "access/visibilitymap.h" #include "executor/execdebug.h" @@ -61,7 +62,7 @@ static inline void BitmapAdjustPrefetchIterator(BitmapHeapScanState *node, TBMIterateResult *tbmres); static inline void BitmapAdjustPrefetchTarget(BitmapHeapScanState *node); static inline void BitmapPrefetch(BitmapHeapScanState *node, - HeapScanDesc scan); + TableScanDesc scan); static bool BitmapShouldInitializeSharedState( ParallelBitmapHeapState *pstate); @@ -76,7 +77,8 @@ static TupleTableSlot * BitmapHeapNext(BitmapHeapScanState *node) { ExprContext *econtext; - HeapScanDesc scan; + TableScanDesc scan; + HeapScanDesc hscan; TIDBitmap *tbm; TBMIterator *tbmiterator = NULL; TBMSharedIterator *shared_tbmiterator = NULL; @@ -92,6 +94,7 @@ BitmapHeapNext(BitmapHeapScanState *node) econtext = node->ss.ps.ps_ExprContext; slot = node->ss.ss_ScanTupleSlot; scan = node->ss.ss_currentScanDesc; + hscan = (HeapScanDesc) scan; tbm = node->tbm; if (pstate == NULL) tbmiterator = node->tbmiterator; @@ -219,7 +222,7 @@ BitmapHeapNext(BitmapHeapScanState *node) * least AccessShareLock on the table before performing any of the * indexscans, but let's be safe.) */ - if (tbmres->blockno >= scan->rs_nblocks) + if (tbmres->blockno >= hscan->rs_nblocks) { node->tbmres = tbmres = NULL; continue; @@ -242,14 +245,14 @@ BitmapHeapNext(BitmapHeapScanState *node) * The number of tuples on this page is put into * scan->rs_ntuples; note we don't fill scan->rs_vistuples. */ - scan->rs_ntuples = tbmres->ntuples; + hscan->rs_ntuples = tbmres->ntuples; } else { /* * Fetch the current heap page and identify candidate tuples. */ - bitgetpage(scan, tbmres); + bitgetpage(hscan, tbmres); } if (tbmres->ntuples >= 0) @@ -260,7 +263,7 @@ BitmapHeapNext(BitmapHeapScanState *node) /* * Set rs_cindex to first slot to examine */ - scan->rs_cindex = 0; + hscan->rs_cindex = 0; /* Adjust the prefetch target */ BitmapAdjustPrefetchTarget(node); @@ -270,7 +273,7 @@ BitmapHeapNext(BitmapHeapScanState *node) /* * Continuing in previously obtained page; advance rs_cindex */ - scan->rs_cindex++; + hscan->rs_cindex++; #ifdef USE_PREFETCH @@ -297,7 +300,7 @@ BitmapHeapNext(BitmapHeapScanState *node) /* * Out of range? If so, nothing more to look at on this page */ - if (scan->rs_cindex < 0 || scan->rs_cindex >= scan->rs_ntuples) + if (hscan->rs_cindex < 0 || hscan->rs_cindex >= hscan->rs_ntuples) { node->tbmres = tbmres = NULL; continue; @@ -324,15 +327,15 @@ BitmapHeapNext(BitmapHeapScanState *node) /* * Okay to fetch the tuple. */ - targoffset = scan->rs_vistuples[scan->rs_cindex]; - dp = (Page) BufferGetPage(scan->rs_cbuf); + targoffset = hscan->rs_vistuples[hscan->rs_cindex]; + dp = (Page) BufferGetPage(hscan->rs_cbuf); lp = PageGetItemId(dp, targoffset); Assert(ItemIdIsNormal(lp)); - scan->rs_ctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp); - scan->rs_ctup.t_len = ItemIdGetLength(lp); - scan->rs_ctup.t_tableOid = scan->rs_rd->rd_id; - ItemPointerSet(&scan->rs_ctup.t_self, tbmres->blockno, targoffset); + hscan->rs_ctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp); + hscan->rs_ctup.t_len = ItemIdGetLength(lp); + hscan->rs_ctup.t_tableOid = scan->rs_rd->rd_id; + ItemPointerSet(&hscan->rs_ctup.t_self, tbmres->blockno, targoffset); pgstat_count_heap_fetch(scan->rs_rd); @@ -340,9 +343,9 @@ BitmapHeapNext(BitmapHeapScanState *node) * Set up the result slot to point to this tuple. Note that the * slot acquires a pin on the buffer. */ - ExecStoreBufferHeapTuple(&scan->rs_ctup, + ExecStoreBufferHeapTuple(&hscan->rs_ctup, slot, - scan->rs_cbuf); + hscan->rs_cbuf); /* * If we are using lossy info, we have to recheck the qual @@ -392,17 +395,17 @@ bitgetpage(HeapScanDesc scan, TBMIterateResult *tbmres) Assert(page < scan->rs_nblocks); scan->rs_cbuf = ReleaseAndReadBuffer(scan->rs_cbuf, - scan->rs_rd, + scan->rs_scan.rs_rd, page); buffer = scan->rs_cbuf; - snapshot = scan->rs_snapshot; + snapshot = scan->rs_scan.rs_snapshot; ntup = 0; /* * Prune and repair fragmentation for the whole page, if possible. */ - heap_page_prune_opt(scan->rs_rd, buffer); + heap_page_prune_opt(scan->rs_scan.rs_rd, buffer); /* * We must hold share lock on the buffer content while examining tuple @@ -430,7 +433,7 @@ bitgetpage(HeapScanDesc scan, TBMIterateResult *tbmres) HeapTupleData heapTuple; ItemPointerSet(&tid, page, offnum); - if (heap_hot_search_buffer(&tid, scan->rs_rd, buffer, snapshot, + if (heap_hot_search_buffer(&tid, scan->rs_scan.rs_rd, buffer, snapshot, &heapTuple, NULL, true)) scan->rs_vistuples[ntup++] = ItemPointerGetOffsetNumber(&tid); } @@ -456,15 +459,15 @@ bitgetpage(HeapScanDesc scan, TBMIterateResult *tbmres) continue; loctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp); loctup.t_len = ItemIdGetLength(lp); - loctup.t_tableOid = scan->rs_rd->rd_id; + loctup.t_tableOid = scan->rs_scan.rs_rd->rd_id; ItemPointerSet(&loctup.t_self, page, offnum); valid = HeapTupleSatisfiesVisibility(&loctup, snapshot, buffer); if (valid) { scan->rs_vistuples[ntup++] = offnum; - PredicateLockTuple(scan->rs_rd, &loctup, snapshot); + PredicateLockTuple(scan->rs_scan.rs_rd, &loctup, snapshot); } - CheckForSerializableConflictOut(valid, scan->rs_rd, &loctup, + CheckForSerializableConflictOut(valid, scan->rs_scan.rs_rd, &loctup, buffer, snapshot); } } @@ -598,7 +601,7 @@ BitmapAdjustPrefetchTarget(BitmapHeapScanState *node) * BitmapPrefetch - Prefetch, if prefetch_pages are behind prefetch_target */ static inline void -BitmapPrefetch(BitmapHeapScanState *node, HeapScanDesc scan) +BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan) { #ifdef USE_PREFETCH ParallelBitmapHeapState *pstate = node->pstate; @@ -741,7 +744,7 @@ ExecReScanBitmapHeapScan(BitmapHeapScanState *node) PlanState *outerPlan = outerPlanState(node); /* rescan to release any page pin */ - heap_rescan(node->ss.ss_currentScanDesc, NULL); + table_rescan(node->ss.ss_currentScanDesc, NULL); /* release bitmaps and buffers if any */ if (node->tbmiterator) @@ -785,7 +788,7 @@ ExecReScanBitmapHeapScan(BitmapHeapScanState *node) void ExecEndBitmapHeapScan(BitmapHeapScanState *node) { - HeapScanDesc scanDesc; + TableScanDesc scanDesc; /* * extract information from the node @@ -830,7 +833,7 @@ ExecEndBitmapHeapScan(BitmapHeapScanState *node) /* * close heap scan */ - heap_endscan(scanDesc); + table_endscan(scanDesc); } /* ---------------------------------------------------------------- @@ -953,10 +956,10 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags) * Even though we aren't going to do a conventional seqscan, it is useful * to create a HeapScanDesc --- most of the fields in it are usable. */ - scanstate->ss.ss_currentScanDesc = heap_beginscan_bm(currentRelation, - estate->es_snapshot, - 0, - NULL); + scanstate->ss.ss_currentScanDesc = table_beginscan_bm(currentRelation, + estate->es_snapshot, + 0, + NULL); /* * all done. @@ -1104,5 +1107,5 @@ ExecBitmapHeapInitializeWorker(BitmapHeapScanState *node, node->pstate = pstate; snapshot = RestoreSnapshot(pstate->phs_snapshot_data); - heap_update_snapshot(node->ss.ss_currentScanDesc, snapshot); + table_scan_update_snapshot(node->ss.ss_currentScanDesc, snapshot); } diff --git a/src/backend/executor/nodeIndexonlyscan.c b/src/backend/executor/nodeIndexonlyscan.c index 26758e7703..293d48b4ba 100644 --- a/src/backend/executor/nodeIndexonlyscan.c +++ b/src/backend/executor/nodeIndexonlyscan.c @@ -119,7 +119,7 @@ IndexOnlyNext(IndexOnlyScanState *node) */ while ((tid = index_getnext_tid(scandesc, direction)) != NULL) { - HeapTuple tuple = NULL; + bool tuple_from_heap = false; CHECK_FOR_INTERRUPTS(); @@ -165,17 +165,18 @@ IndexOnlyNext(IndexOnlyScanState *node) * Rats, we have to visit the heap to check visibility. */ InstrCountTuples2(node, 1); - tuple = index_fetch_heap(scandesc); - if (tuple == NULL) + if (!index_fetch_heap(scandesc, slot)) continue; /* no visible tuple, try next index entry */ + ExecClearTuple(slot); + /* * Only MVCC snapshots are supported here, so there should be no * need to keep following the HOT chain once a visible entry has * been found. If we did want to allow that, we'd need to keep * more state to remember not to call index_getnext_tid next time. */ - if (scandesc->xs_continue_hot) + if (scandesc->xs_heap_continue) elog(ERROR, "non-MVCC snapshots are not supported in index-only scans"); /* @@ -184,13 +185,15 @@ IndexOnlyNext(IndexOnlyScanState *node) * but it's not clear whether it's a win to do so. The next index * entry might require a visit to the same heap page. */ + + tuple_from_heap = true; } /* * Fill the scan tuple slot with data from the index. This might be - * provided in either HeapTuple or IndexTuple format. Conceivably an - * index AM might fill both fields, in which case we prefer the heap - * format, since it's probably a bit cheaper to fill a slot from. + * provided in either HeapTuple or IndexTuple format. Conceivably + * an index AM might fill both fields, in which case we prefer the + * heap format, since it's probably a bit cheaper to fill a slot from. */ if (scandesc->xs_hitup) { @@ -201,7 +204,7 @@ IndexOnlyNext(IndexOnlyScanState *node) */ Assert(slot->tts_tupleDescriptor->natts == scandesc->xs_hitupdesc->natts); - ExecStoreHeapTuple(scandesc->xs_hitup, slot, false); + ExecForceStoreHeapTuple(scandesc->xs_hitup, slot); } else if (scandesc->xs_itup) StoreIndexTuple(slot, scandesc->xs_itup, scandesc->xs_itupdesc); @@ -244,7 +247,7 @@ IndexOnlyNext(IndexOnlyScanState *node) * anyway, then we already have the tuple-level lock and can skip the * page lock. */ - if (tuple == NULL) + if (!tuple_from_heap) PredicateLockPage(scandesc->heapRelation, ItemPointerGetBlockNumber(tid), estate->es_snapshot); @@ -523,7 +526,8 @@ ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags) * suitable data anyway.) */ tupDesc = ExecTypeFromTL(node->indextlist); - ExecInitScanTupleSlot(estate, &indexstate->ss, tupDesc, &TTSOpsHeapTuple); + ExecInitScanTupleSlot(estate, &indexstate->ss, tupDesc, + &TTSOpsBufferHeapTuple); /* * Initialize result type and projection info. The node's targetlist will diff --git a/src/backend/executor/nodeIndexscan.c b/src/backend/executor/nodeIndexscan.c index 337b561c24..b52f0ac1f4 100644 --- a/src/backend/executor/nodeIndexscan.c +++ b/src/backend/executor/nodeIndexscan.c @@ -31,6 +31,7 @@ #include "access/nbtree.h" #include "access/relscan.h" +#include "access/tableam.h" #include "catalog/pg_am.h" #include "executor/execdebug.h" #include "executor/nodeIndexscan.h" @@ -83,7 +84,6 @@ IndexNext(IndexScanState *node) ExprContext *econtext; ScanDirection direction; IndexScanDesc scandesc; - HeapTuple tuple; TupleTableSlot *slot; /* @@ -130,20 +130,10 @@ IndexNext(IndexScanState *node) /* * ok, now that we have what we need, fetch the next tuple. */ - while ((tuple = index_getnext(scandesc, direction)) != NULL) + while (index_getnext_slot(scandesc, direction, slot)) { CHECK_FOR_INTERRUPTS(); - /* - * Store the scanned tuple in the scan tuple slot of the scan state. - * Note: we pass 'false' because tuples returned by amgetnext are - * pointers onto disk pages and must not be pfree()'d. - */ - ExecStoreBufferHeapTuple(tuple, /* tuple to store */ - slot, /* slot to store in */ - scandesc->xs_cbuf); /* buffer containing - * tuple */ - /* * If the index was lossy, we have to recheck the index quals using * the fetched tuple. @@ -271,8 +261,7 @@ IndexNextWithReorder(IndexScanState *node) */ next_indextuple: slot = node->ss.ss_ScanTupleSlot; - tuple = index_getnext(scandesc, ForwardScanDirection); - if (!tuple) + if (!index_getnext_slot(scandesc, ForwardScanDirection, slot)) { /* * No more tuples from the index. But we still need to drain any @@ -282,14 +271,6 @@ next_indextuple: continue; } - /* - * Store the scanned tuple in the scan tuple slot of the scan state. - */ - ExecStoreBufferHeapTuple(tuple, /* tuple to store */ - slot, /* slot to store in */ - scandesc->xs_cbuf); /* buffer containing - * tuple */ - /* * If the index was lossy, we have to recheck the index quals and * ORDER BY expressions using the fetched tuple. @@ -357,6 +338,8 @@ next_indextuple: topmost->orderbynulls, node) > 0)) { + HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, NULL); + /* Put this tuple to the queue */ reorderqueue_push(node, tuple, lastfetched_vals, lastfetched_nulls); continue; diff --git a/src/backend/executor/nodeSamplescan.c b/src/backend/executor/nodeSamplescan.c index da4a65fd30..825fc2e257 100644 --- a/src/backend/executor/nodeSamplescan.c +++ b/src/backend/executor/nodeSamplescan.c @@ -17,6 +17,7 @@ #include "access/hash.h" #include "access/heapam.h" #include "access/relscan.h" +#include "access/tableam.h" #include "access/tsmapi.h" #include "executor/executor.h" #include "executor/nodeSamplescan.h" @@ -48,6 +49,7 @@ SampleNext(SampleScanState *node) { HeapTuple tuple; TupleTableSlot *slot; + HeapScanDesc hscan; /* * if this is first call within a scan, initialize @@ -61,11 +63,12 @@ SampleNext(SampleScanState *node) tuple = tablesample_getnext(node); slot = node->ss.ss_ScanTupleSlot; + hscan = (HeapScanDesc) node->ss.ss_currentScanDesc; if (tuple) ExecStoreBufferHeapTuple(tuple, /* tuple to store */ slot, /* slot to store in */ - node->ss.ss_currentScanDesc->rs_cbuf); /* tuple's buffer */ + hscan->rs_cbuf); /* tuple's buffer */ else ExecClearTuple(slot); @@ -219,7 +222,7 @@ ExecEndSampleScan(SampleScanState *node) * close heap scan */ if (node->ss.ss_currentScanDesc) - heap_endscan(node->ss.ss_currentScanDesc); + table_endscan(node->ss.ss_currentScanDesc); } /* ---------------------------------------------------------------- @@ -319,19 +322,19 @@ tablesample_init(SampleScanState *scanstate) if (scanstate->ss.ss_currentScanDesc == NULL) { scanstate->ss.ss_currentScanDesc = - heap_beginscan_sampling(scanstate->ss.ss_currentRelation, - scanstate->ss.ps.state->es_snapshot, - 0, NULL, - scanstate->use_bulkread, - allow_sync, - scanstate->use_pagemode); + table_beginscan_sampling(scanstate->ss.ss_currentRelation, + scanstate->ss.ps.state->es_snapshot, + 0, NULL, + scanstate->use_bulkread, + allow_sync, + scanstate->use_pagemode); } else { - heap_rescan_set_params(scanstate->ss.ss_currentScanDesc, NULL, - scanstate->use_bulkread, - allow_sync, - scanstate->use_pagemode); + table_rescan_set_params(scanstate->ss.ss_currentScanDesc, NULL, + scanstate->use_bulkread, + allow_sync, + scanstate->use_pagemode); } pfree(params); @@ -350,8 +353,9 @@ static HeapTuple tablesample_getnext(SampleScanState *scanstate) { TsmRoutine *tsm = scanstate->tsmroutine; - HeapScanDesc scan = scanstate->ss.ss_currentScanDesc; - HeapTuple tuple = &(scan->rs_ctup); + TableScanDesc scan = scanstate->ss.ss_currentScanDesc; + HeapScanDesc hscan = (HeapScanDesc) scan; + HeapTuple tuple = &(hscan->rs_ctup); Snapshot snapshot = scan->rs_snapshot; bool pagemode = scan->rs_pageatatime; BlockNumber blockno; @@ -359,14 +363,14 @@ tablesample_getnext(SampleScanState *scanstate) bool all_visible; OffsetNumber maxoffset; - if (!scan->rs_inited) + if (!hscan->rs_inited) { /* * return null immediately if relation is empty */ - if (scan->rs_nblocks == 0) + if (hscan->rs_nblocks == 0) { - Assert(!BufferIsValid(scan->rs_cbuf)); + Assert(!BufferIsValid(hscan->rs_cbuf)); tuple->t_data = NULL; return NULL; } @@ -380,15 +384,15 @@ tablesample_getnext(SampleScanState *scanstate) } } else - blockno = scan->rs_startblock; - Assert(blockno < scan->rs_nblocks); + blockno = hscan->rs_startblock; + Assert(blockno < hscan->rs_nblocks); heapgetpage(scan, blockno); - scan->rs_inited = true; + hscan->rs_inited = true; } else { /* continue from previously returned page/tuple */ - blockno = scan->rs_cblock; /* current page */ + blockno = hscan->rs_cblock; /* current page */ } /* @@ -396,9 +400,9 @@ tablesample_getnext(SampleScanState *scanstate) * visibility checks. */ if (!pagemode) - LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE); + LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE); - page = (Page) BufferGetPage(scan->rs_cbuf); + page = (Page) BufferGetPage(hscan->rs_cbuf); all_visible = PageIsAllVisible(page) && !snapshot->takenDuringRecovery; maxoffset = PageGetMaxOffsetNumber(page); @@ -431,18 +435,18 @@ tablesample_getnext(SampleScanState *scanstate) if (all_visible) visible = true; else - visible = SampleTupleVisible(tuple, tupoffset, scan); + visible = SampleTupleVisible(tuple, tupoffset, hscan); /* in pagemode, heapgetpage did this for us */ if (!pagemode) CheckForSerializableConflictOut(visible, scan->rs_rd, tuple, - scan->rs_cbuf, snapshot); + hscan->rs_cbuf, snapshot); if (visible) { /* Found visible tuple, return it. */ if (!pagemode) - LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK); + LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK); break; } else @@ -457,7 +461,7 @@ tablesample_getnext(SampleScanState *scanstate) * it's time to move to the next. */ if (!pagemode) - LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK); + LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK); if (tsm->NextSampleBlock) { @@ -469,7 +473,7 @@ tablesample_getnext(SampleScanState *scanstate) { /* Without NextSampleBlock, just do a plain forward seqscan. */ blockno++; - if (blockno >= scan->rs_nblocks) + if (blockno >= hscan->rs_nblocks) blockno = 0; /* @@ -485,7 +489,7 @@ tablesample_getnext(SampleScanState *scanstate) if (scan->rs_syncscan) ss_report_location(scan->rs_rd, blockno); - finished = (blockno == scan->rs_startblock); + finished = (blockno == hscan->rs_startblock); } /* @@ -493,23 +497,23 @@ tablesample_getnext(SampleScanState *scanstate) */ if (finished) { - if (BufferIsValid(scan->rs_cbuf)) - ReleaseBuffer(scan->rs_cbuf); - scan->rs_cbuf = InvalidBuffer; - scan->rs_cblock = InvalidBlockNumber; + if (BufferIsValid(hscan->rs_cbuf)) + ReleaseBuffer(hscan->rs_cbuf); + hscan->rs_cbuf = InvalidBuffer; + hscan->rs_cblock = InvalidBlockNumber; tuple->t_data = NULL; - scan->rs_inited = false; + hscan->rs_inited = false; return NULL; } - Assert(blockno < scan->rs_nblocks); + Assert(blockno < hscan->rs_nblocks); heapgetpage(scan, blockno); /* Re-establish state for new page */ if (!pagemode) - LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE); + LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE); - page = (Page) BufferGetPage(scan->rs_cbuf); + page = (Page) BufferGetPage(hscan->rs_cbuf); all_visible = PageIsAllVisible(page) && !snapshot->takenDuringRecovery; maxoffset = PageGetMaxOffsetNumber(page); } @@ -517,7 +521,7 @@ tablesample_getnext(SampleScanState *scanstate) /* Count successfully-fetched tuples as heap fetches */ pgstat_count_heap_getnext(scan->rs_rd); - return &(scan->rs_ctup); + return &(hscan->rs_ctup); } /* @@ -526,7 +530,7 @@ tablesample_getnext(SampleScanState *scanstate) static bool SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset, HeapScanDesc scan) { - if (scan->rs_pageatatime) + if (scan->rs_scan.rs_pageatatime) { /* * In pageatatime mode, heapgetpage() already did visibility checks, @@ -559,7 +563,7 @@ SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset, HeapScanDesc scan) { /* Otherwise, we have to check the tuple individually. */ return HeapTupleSatisfiesVisibility(tuple, - scan->rs_snapshot, + scan->rs_scan.rs_snapshot, scan->rs_cbuf); } } diff --git a/src/backend/executor/nodeSeqscan.c b/src/backend/executor/nodeSeqscan.c index e5482859ef..5fd813d894 100644 --- a/src/backend/executor/nodeSeqscan.c +++ b/src/backend/executor/nodeSeqscan.c @@ -29,6 +29,7 @@ #include "access/heapam.h" #include "access/relscan.h" +#include "access/tableam.h" #include "executor/execdebug.h" #include "executor/nodeSeqscan.h" #include "utils/rel.h" @@ -50,7 +51,8 @@ static TupleTableSlot * SeqNext(SeqScanState *node) { HeapTuple tuple; - HeapScanDesc scandesc; + TableScanDesc scandesc; + HeapScanDesc hscandesc; EState *estate; ScanDirection direction; TupleTableSlot *slot; @@ -69,12 +71,14 @@ SeqNext(SeqScanState *node) * We reach here if the scan is not parallel, or if we're serially * executing a scan that was planned to be parallel. */ - scandesc = heap_beginscan(node->ss.ss_currentRelation, - estate->es_snapshot, - 0, NULL); + scandesc = table_beginscan(node->ss.ss_currentRelation, + estate->es_snapshot, + 0, NULL); node->ss.ss_currentScanDesc = scandesc; } + hscandesc = (HeapScanDesc) scandesc; + /* * get the next tuple from the table */ @@ -91,7 +95,7 @@ SeqNext(SeqScanState *node) if (tuple) ExecStoreBufferHeapTuple(tuple, /* tuple to store */ slot, /* slot to store in */ - scandesc->rs_cbuf); /* buffer associated + hscandesc->rs_cbuf); /* buffer associated * with this tuple */ else ExecClearTuple(slot); @@ -200,7 +204,7 @@ ExecInitSeqScan(SeqScan *node, EState *estate, int eflags) void ExecEndSeqScan(SeqScanState *node) { - HeapScanDesc scanDesc; + TableScanDesc scanDesc; /* * get information from node @@ -223,7 +227,7 @@ ExecEndSeqScan(SeqScanState *node) * close heap scan */ if (scanDesc != NULL) - heap_endscan(scanDesc); + table_endscan(scanDesc); } /* ---------------------------------------------------------------- @@ -240,13 +244,13 @@ ExecEndSeqScan(SeqScanState *node) void ExecReScanSeqScan(SeqScanState *node) { - HeapScanDesc scan; + TableScanDesc scan; scan = node->ss.ss_currentScanDesc; if (scan != NULL) - heap_rescan(scan, /* scan desc */ - NULL); /* new scan keys */ + table_rescan(scan, /* scan desc */ + NULL); /* new scan keys */ ExecScanReScan((ScanState *) node); } @@ -269,7 +273,8 @@ ExecSeqScanEstimate(SeqScanState *node, { EState *estate = node->ss.ps.state; - node->pscan_len = heap_parallelscan_estimate(estate->es_snapshot); + node->pscan_len = table_parallelscan_estimate(node->ss.ss_currentRelation, + estate->es_snapshot); shm_toc_estimate_chunk(&pcxt->estimator, node->pscan_len); shm_toc_estimate_keys(&pcxt->estimator, 1); } @@ -285,15 +290,15 @@ ExecSeqScanInitializeDSM(SeqScanState *node, ParallelContext *pcxt) { EState *estate = node->ss.ps.state; - ParallelHeapScanDesc pscan; + ParallelTableScanDesc pscan; pscan = shm_toc_allocate(pcxt->toc, node->pscan_len); - heap_parallelscan_initialize(pscan, - node->ss.ss_currentRelation, - estate->es_snapshot); + table_parallelscan_initialize(node->ss.ss_currentRelation, + pscan, + estate->es_snapshot); shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pscan); node->ss.ss_currentScanDesc = - heap_beginscan_parallel(node->ss.ss_currentRelation, pscan); + table_beginscan_parallel(node->ss.ss_currentRelation, pscan); } /* ---------------------------------------------------------------- @@ -306,9 +311,10 @@ void ExecSeqScanReInitializeDSM(SeqScanState *node, ParallelContext *pcxt) { - HeapScanDesc scan = node->ss.ss_currentScanDesc; + ParallelTableScanDesc pscan; - heap_parallelscan_reinitialize(scan->rs_parallel); + pscan = node->ss.ss_currentScanDesc->rs_parallel; + table_parallelscan_reinitialize(node->ss.ss_currentRelation, pscan); } /* ---------------------------------------------------------------- @@ -321,9 +327,9 @@ void ExecSeqScanInitializeWorker(SeqScanState *node, ParallelWorkerContext *pwcxt) { - ParallelHeapScanDesc pscan; + ParallelTableScanDesc pscan; pscan = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false); node->ss.ss_currentScanDesc = - heap_beginscan_parallel(node->ss.ss_currentRelation, pscan); + table_beginscan_parallel(node->ss.ss_currentRelation, pscan); } diff --git a/src/backend/partitioning/partbounds.c b/src/backend/partitioning/partbounds.c index e71eb3793b..9a6e421bd1 100644 --- a/src/backend/partitioning/partbounds.c +++ b/src/backend/partitioning/partbounds.c @@ -15,6 +15,7 @@ #include "postgres.h" #include "access/heapam.h" +#include "access/tableam.h" #include "catalog/partition.h" #include "catalog/pg_inherits.h" #include "catalog/pg_type.h" @@ -1207,7 +1208,7 @@ check_default_partition_contents(Relation parent, Relation default_rel, Snapshot snapshot; TupleDesc tupdesc; ExprContext *econtext; - HeapScanDesc scan; + TableScanDesc scan; MemoryContext oldCxt; TupleTableSlot *tupslot; @@ -1266,8 +1267,8 @@ check_default_partition_contents(Relation parent, Relation default_rel, econtext = GetPerTupleExprContext(estate); snapshot = RegisterSnapshot(GetLatestSnapshot()); - scan = heap_beginscan(part_rel, snapshot, 0, NULL); tupslot = MakeSingleTupleTableSlot(tupdesc, &TTSOpsHeapTuple); + scan = table_beginscan(part_rel, snapshot, 0, NULL); /* * Switch to per-tuple memory context and reset it for each tuple @@ -1291,7 +1292,7 @@ check_default_partition_contents(Relation parent, Relation default_rel, } MemoryContextSwitchTo(oldCxt); - heap_endscan(scan); + table_endscan(scan); UnregisterSnapshot(snapshot); ExecDropSingleTupleTableSlot(tupslot); FreeExecutorState(estate); diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c index 347f91e937..8ed306d5d9 100644 --- a/src/backend/postmaster/autovacuum.c +++ b/src/backend/postmaster/autovacuum.c @@ -69,6 +69,7 @@ #include "access/htup_details.h" #include "access/multixact.h" #include "access/reloptions.h" +#include "access/tableam.h" #include "access/transam.h" #include "access/xact.h" #include "catalog/dependency.h" @@ -1865,7 +1866,7 @@ get_database_list(void) { List *dblist = NIL; Relation rel; - HeapScanDesc scan; + TableScanDesc scan; HeapTuple tup; MemoryContext resultcxt; @@ -1883,7 +1884,7 @@ get_database_list(void) (void) GetTransactionSnapshot(); rel = table_open(DatabaseRelationId, AccessShareLock); - scan = heap_beginscan_catalog(rel, 0, NULL); + scan = table_beginscan_catalog(rel, 0, NULL); while (HeapTupleIsValid(tup = heap_getnext(scan, ForwardScanDirection))) { @@ -1912,7 +1913,7 @@ get_database_list(void) MemoryContextSwitchTo(oldcxt); } - heap_endscan(scan); + table_endscan(scan); table_close(rel, AccessShareLock); CommitTransactionCommand(); @@ -1931,7 +1932,7 @@ do_autovacuum(void) { Relation classRel; HeapTuple tuple; - HeapScanDesc relScan; + TableScanDesc relScan; Form_pg_database dbForm; List *table_oids = NIL; List *orphan_oids = NIL; @@ -2043,7 +2044,7 @@ do_autovacuum(void) * wide tables there might be proportionally much more activity in the * TOAST table than in its parent. */ - relScan = heap_beginscan_catalog(classRel, 0, NULL); + relScan = table_beginscan_catalog(classRel, 0, NULL); /* * On the first pass, we collect main tables to vacuum, and also the main @@ -2132,7 +2133,7 @@ do_autovacuum(void) } } - heap_endscan(relScan); + table_endscan(relScan); /* second pass: check TOAST tables */ ScanKeyInit(&key, @@ -2140,7 +2141,7 @@ do_autovacuum(void) BTEqualStrategyNumber, F_CHAREQ, CharGetDatum(RELKIND_TOASTVALUE)); - relScan = heap_beginscan_catalog(classRel, 1, &key); + relScan = table_beginscan_catalog(classRel, 1, &key); while ((tuple = heap_getnext(relScan, ForwardScanDirection)) != NULL) { Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple); @@ -2187,7 +2188,7 @@ do_autovacuum(void) table_oids = lappend_oid(table_oids, relid); } - heap_endscan(relScan); + table_endscan(relScan); table_close(classRel, AccessShareLock); /* diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c index 81c6499251..b6ac6e1a53 100644 --- a/src/backend/postmaster/pgstat.c +++ b/src/backend/postmaster/pgstat.c @@ -36,6 +36,7 @@ #include "access/heapam.h" #include "access/htup_details.h" +#include "access/tableam.h" #include "access/transam.h" #include "access/twophase_rmgr.h" #include "access/xact.h" @@ -1205,7 +1206,7 @@ pgstat_collect_oids(Oid catalogid, AttrNumber anum_oid) HTAB *htab; HASHCTL hash_ctl; Relation rel; - HeapScanDesc scan; + TableScanDesc scan; HeapTuple tup; Snapshot snapshot; @@ -1220,7 +1221,7 @@ pgstat_collect_oids(Oid catalogid, AttrNumber anum_oid) rel = table_open(catalogid, AccessShareLock); snapshot = RegisterSnapshot(GetLatestSnapshot()); - scan = heap_beginscan(rel, snapshot, 0, NULL); + scan = table_beginscan(rel, snapshot, 0, NULL); while ((tup = heap_getnext(scan, ForwardScanDirection)) != NULL) { Oid thisoid; @@ -1233,7 +1234,7 @@ pgstat_collect_oids(Oid catalogid, AttrNumber anum_oid) (void) hash_search(htab, (void *) &thisoid, HASH_ENTER, NULL); } - heap_endscan(scan); + table_endscan(scan); UnregisterSnapshot(snapshot); table_close(rel, AccessShareLock); diff --git a/src/backend/replication/logical/launcher.c b/src/backend/replication/logical/launcher.c index 55b91b5e12..186057bd93 100644 --- a/src/backend/replication/logical/launcher.c +++ b/src/backend/replication/logical/launcher.c @@ -24,6 +24,7 @@ #include "access/heapam.h" #include "access/htup.h" #include "access/htup_details.h" +#include "access/tableam.h" #include "access/xact.h" #include "catalog/pg_subscription.h" @@ -118,7 +119,7 @@ get_subscription_list(void) { List *res = NIL; Relation rel; - HeapScanDesc scan; + TableScanDesc scan; HeapTuple tup; MemoryContext resultcxt; @@ -136,7 +137,7 @@ get_subscription_list(void) (void) GetTransactionSnapshot(); rel = table_open(SubscriptionRelationId, AccessShareLock); - scan = heap_beginscan_catalog(rel, 0, NULL); + scan = table_beginscan_catalog(rel, 0, NULL); while (HeapTupleIsValid(tup = heap_getnext(scan, ForwardScanDirection))) { @@ -164,7 +165,7 @@ get_subscription_list(void) MemoryContextSwitchTo(oldcxt); } - heap_endscan(scan); + table_endscan(scan); table_close(rel, AccessShareLock); CommitTransactionCommand(); diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c index a5e5007e81..e96fb2666b 100644 --- a/src/backend/replication/logical/worker.c +++ b/src/backend/replication/logical/worker.c @@ -701,7 +701,7 @@ apply_handle_update(StringInfo s) &TTSOpsHeapTuple); localslot = ExecInitExtraTupleSlot(estate, RelationGetDescr(rel->localrel), - &TTSOpsHeapTuple); + &TTSOpsBufferHeapTuple); EvalPlanQualInit(&epqstate, estate, NULL, NIL, -1); PushActiveSnapshot(GetTransactionSnapshot()); @@ -821,7 +821,7 @@ apply_handle_delete(StringInfo s) &TTSOpsVirtual); localslot = ExecInitExtraTupleSlot(estate, RelationGetDescr(rel->localrel), - &TTSOpsHeapTuple); + &TTSOpsBufferHeapTuple); EvalPlanQualInit(&epqstate, estate, NULL, NIL, -1); PushActiveSnapshot(GetTransactionSnapshot()); diff --git a/src/backend/rewrite/rewriteDefine.c b/src/backend/rewrite/rewriteDefine.c index 7ad470d34a..a83f4cd26c 100644 --- a/src/backend/rewrite/rewriteDefine.c +++ b/src/backend/rewrite/rewriteDefine.c @@ -17,6 +17,7 @@ #include "access/heapam.h" #include "access/htup_details.h" #include "access/multixact.h" +#include "access/tableam.h" #include "access/transam.h" #include "access/xact.h" #include "catalog/catalog.h" @@ -423,7 +424,7 @@ DefineQueryRewrite(const char *rulename, if (event_relation->rd_rel->relkind != RELKIND_VIEW && event_relation->rd_rel->relkind != RELKIND_MATVIEW) { - HeapScanDesc scanDesc; + TableScanDesc scanDesc; Snapshot snapshot; if (event_relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) @@ -439,13 +440,13 @@ DefineQueryRewrite(const char *rulename, RelationGetRelationName(event_relation)))); snapshot = RegisterSnapshot(GetLatestSnapshot()); - scanDesc = heap_beginscan(event_relation, snapshot, 0, NULL); + scanDesc = table_beginscan(event_relation, snapshot, 0, NULL); if (heap_getnext(scanDesc, ForwardScanDirection) != NULL) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("could not convert table \"%s\" to a view because it is not empty", RelationGetRelationName(event_relation)))); - heap_endscan(scanDesc); + table_endscan(scanDesc); UnregisterSnapshot(snapshot); if (event_relation->rd_rel->relhastriggers) diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index e6837869cf..2fe6634b96 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -104,6 +104,7 @@ #include "access/brin.h" #include "access/gin.h" #include "access/htup_details.h" +#include "access/tableam.h" #include "access/sysattr.h" #include "access/table.h" #include "catalog/index.h" @@ -5099,7 +5100,6 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata, bool typByVal; ScanKeyData scankeys[1]; IndexScanDesc index_scan; - HeapTuple tup; Datum values[INDEX_MAX_KEYS]; bool isnull[INDEX_MAX_KEYS]; SnapshotData SnapshotNonVacuumable; @@ -5123,7 +5123,7 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata, /* some other stuff */ slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRel), - &TTSOpsHeapTuple); + &TTSOpsBufferHeapTuple); econtext->ecxt_scantuple = slot; get_typlenbyval(vardata->atttype, &typLen, &typByVal); InitNonVacuumableSnapshot(SnapshotNonVacuumable, RecentGlobalXmin); @@ -5175,11 +5175,9 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata, index_rescan(index_scan, scankeys, 1, NULL, 0); /* Fetch first tuple in sortop's direction */ - if ((tup = index_getnext(index_scan, - indexscandir)) != NULL) + if (index_getnext_slot(index_scan, indexscandir, slot)) { - /* Extract the index column values from the heap tuple */ - ExecStoreHeapTuple(tup, slot, false); + /* Extract the index column values from the slot */ FormIndexDatum(indexInfo, slot, estate, values, isnull); @@ -5208,11 +5206,9 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata, index_rescan(index_scan, scankeys, 1, NULL, 0); /* Fetch first tuple in reverse direction */ - if ((tup = index_getnext(index_scan, - -indexscandir)) != NULL) + if (index_getnext_slot(index_scan, -indexscandir, slot)) { - /* Extract the index column values from the heap tuple */ - ExecStoreHeapTuple(tup, slot, false); + /* Extract the index column values from the slot */ FormIndexDatum(indexInfo, slot, estate, values, isnull); diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index a5ee209f91..0b51a6f148 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -23,6 +23,7 @@ #include "access/heapam.h" #include "access/htup_details.h" #include "access/session.h" +#include "access/tableam.h" #include "access/sysattr.h" #include "access/xact.h" #include "access/xlog.h" @@ -1245,15 +1246,15 @@ static bool ThereIsAtLeastOneRole(void) { Relation pg_authid_rel; - HeapScanDesc scan; + TableScanDesc scan; bool result; pg_authid_rel = table_open(AuthIdRelationId, AccessShareLock); - scan = heap_beginscan_catalog(pg_authid_rel, 0, NULL); + scan = table_beginscan_catalog(pg_authid_rel, 0, NULL); result = (heap_getnext(scan, ForwardScanDirection) != NULL); - heap_endscan(scan); + table_endscan(scan); table_close(pg_authid_rel, AccessShareLock); return result; diff --git a/src/include/access/genam.h b/src/include/access/genam.h index c4aba39496..1936195c53 100644 --- a/src/include/access/genam.h +++ b/src/include/access/genam.h @@ -159,8 +159,9 @@ extern IndexScanDesc index_beginscan_parallel(Relation heaprel, ParallelIndexScanDesc pscan); extern ItemPointer index_getnext_tid(IndexScanDesc scan, ScanDirection direction); -extern HeapTuple index_fetch_heap(IndexScanDesc scan); -extern HeapTuple index_getnext(IndexScanDesc scan, ScanDirection direction); +struct TupleTableSlot; +extern bool index_fetch_heap(IndexScanDesc scan, struct TupleTableSlot *slot); +extern bool index_getnext_slot(IndexScanDesc scan, ScanDirection direction, struct TupleTableSlot *slot); extern int64 index_getbitmap(IndexScanDesc scan, TIDBitmap *bitmap); extern IndexBulkDeleteResult *index_bulk_delete(IndexVacuumInfo *info, diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index ab0879138f..0cfdf6c009 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -15,6 +15,7 @@ #define HEAPAM_H #include "access/relation.h" /* for backward compatibility */ +#include "access/relscan.h" #include "access/sdir.h" #include "access/skey.h" #include "access/table.h" /* for backward compatibility */ @@ -52,6 +53,7 @@ typedef struct BulkInsertStateData *BulkInsertState; * the tuple); otherwise cmax is zero. (We make this restriction because * HeapTupleHeaderGetCmax doesn't work for tuples outdated in other * transactions.) + * Descriptor for heap table scans. */ typedef struct HeapUpdateFailureData { @@ -60,6 +62,47 @@ typedef struct HeapUpdateFailureData CommandId cmax; } HeapUpdateFailureData; + +typedef struct HeapScanDescData +{ + /* scan parameters */ + TableScanDescData rs_scan; /* */ + + /* state set up at initscan time */ + BlockNumber rs_nblocks; /* total number of blocks in rel */ + BlockNumber rs_startblock; /* block # to start at */ + BlockNumber rs_numblocks; /* max number of blocks to scan */ + /* rs_numblocks is usually InvalidBlockNumber, meaning "scan whole rel" */ + + /* scan current state */ + bool rs_inited; /* false = scan not init'd yet */ + BlockNumber rs_cblock; /* current block # in scan, if any */ + Buffer rs_cbuf; /* current buffer in scan, if any */ + /* NB: if rs_cbuf is not InvalidBuffer, we hold a pin on that buffer */ + + /* rs_numblocks is usually InvalidBlockNumber, meaning "scan whole rel" */ + BufferAccessStrategy rs_strategy; /* access strategy for reads */ + + HeapTupleData rs_ctup; /* current tuple in scan, if any */ + + /* these fields only used in page-at-a-time mode and for bitmap scans */ + int rs_cindex; /* current tuple's index in vistuples */ + int rs_ntuples; /* number of visible tuples on page */ + OffsetNumber rs_vistuples[MaxHeapTuplesPerPage]; /* their offsets */ +} HeapScanDescData; +typedef struct HeapScanDescData *HeapScanDesc; + +/* + * Descriptor for fetches from heap via an index. + */ +typedef struct IndexFetchHeapData +{ + IndexFetchTableData xs_base; + + Buffer xs_cbuf; /* current heap buffer in scan, if any */ + /* NB: if xs_cbuf is not InvalidBuffer, we hold a pin on that buffer */ +} IndexFetchHeapData; + /* Result codes for HeapTupleSatisfiesVacuum */ typedef enum { @@ -79,42 +122,33 @@ typedef enum */ -/* struct definitions appear in relscan.h */ -typedef struct HeapScanDescData *HeapScanDesc; -typedef struct ParallelHeapScanDescData *ParallelHeapScanDesc; - /* * HeapScanIsValid * True iff the heap scan is valid. */ #define HeapScanIsValid(scan) PointerIsValid(scan) -extern HeapScanDesc heap_beginscan(Relation relation, Snapshot snapshot, - int nkeys, ScanKey key); -extern HeapScanDesc heap_beginscan_catalog(Relation relation, int nkeys, - ScanKey key); -extern HeapScanDesc heap_beginscan_strat(Relation relation, Snapshot snapshot, - int nkeys, ScanKey key, - bool allow_strat, bool allow_sync); -extern HeapScanDesc heap_beginscan_bm(Relation relation, Snapshot snapshot, - int nkeys, ScanKey key); -extern HeapScanDesc heap_beginscan_sampling(Relation relation, - Snapshot snapshot, int nkeys, ScanKey key, - bool allow_strat, bool allow_sync, bool allow_pagemode); -extern void heap_setscanlimits(HeapScanDesc scan, BlockNumber startBlk, +extern TableScanDesc heap_beginscan(Relation relation, Snapshot snapshot, + int nkeys, ScanKey key, + ParallelTableScanDesc parallel_scan, + bool allow_strat, + bool allow_sync, + bool allow_pagemode, + bool is_bitmapscan, + bool is_samplescan, + bool temp_snap); +extern void heap_setscanlimits(TableScanDesc scan, BlockNumber startBlk, BlockNumber endBlk); -extern void heapgetpage(HeapScanDesc scan, BlockNumber page); -extern void heap_rescan(HeapScanDesc scan, ScanKey key); -extern void heap_rescan_set_params(HeapScanDesc scan, ScanKey key, +extern void heapgetpage(TableScanDesc scan, BlockNumber page); +extern void heap_rescan(TableScanDesc scan, ScanKey key, bool set_params, + bool allow_strat, bool allow_sync, bool allow_pagemode); +extern void heap_rescan_set_params(TableScanDesc scan, ScanKey key, bool allow_strat, bool allow_sync, bool allow_pagemode); -extern void heap_endscan(HeapScanDesc scan); -extern HeapTuple heap_getnext(HeapScanDesc scan, ScanDirection direction); - -extern Size heap_parallelscan_estimate(Snapshot snapshot); -extern void heap_parallelscan_initialize(ParallelHeapScanDesc target, - Relation relation, Snapshot snapshot); -extern void heap_parallelscan_reinitialize(ParallelHeapScanDesc parallel_scan); -extern HeapScanDesc heap_beginscan_parallel(Relation, ParallelHeapScanDesc); +extern void heap_endscan(TableScanDesc scan); +extern HeapTuple heap_getnext(TableScanDesc scan, ScanDirection direction); +extern struct TupleTableSlot *heap_getnextslot(TableScanDesc sscan, + ScanDirection direction, + struct TupleTableSlot *slot); extern bool heap_fetch(Relation relation, Snapshot snapshot, HeapTuple tuple, Buffer *userbuf, bool keep_buf, @@ -164,7 +198,7 @@ extern void simple_heap_update(Relation relation, ItemPointer otid, HeapTuple tup); extern void heap_sync(Relation relation); -extern void heap_update_snapshot(HeapScanDesc scan, Snapshot snapshot); +extern void heap_update_snapshot(TableScanDesc scan, Snapshot snapshot); /* in heap/pruneheap.c */ extern void heap_page_prune_opt(Relation relation, Buffer buffer); diff --git a/src/include/access/relscan.h b/src/include/access/relscan.h index b78ef2f47d..a0ec135623 100644 --- a/src/include/access/relscan.h +++ b/src/include/access/relscan.h @@ -21,29 +21,14 @@ #include "storage/spin.h" #include "utils/relcache.h" + +struct ParallelTableScanDescData; + /* - * Shared state for parallel heap scan. - * - * Each backend participating in a parallel heap scan has its own - * HeapScanDesc in backend-private memory, and those objects all contain - * a pointer to this structure. The information here must be sufficient - * to properly initialize each new HeapScanDesc as workers join the scan, - * and it must act as a font of block numbers for those workers. + * Generic descriptor for table scans. This is the base-class for table scans, + * which needs to be embedded in the scans of individual AMs. */ -typedef struct ParallelHeapScanDescData -{ - Oid phs_relid; /* OID of relation to scan */ - bool phs_syncscan; /* report location to syncscan logic? */ - BlockNumber phs_nblocks; /* # blocks in relation at start of scan */ - slock_t phs_mutex; /* mutual exclusion for setting startblock */ - BlockNumber phs_startblock; /* starting block number */ - pg_atomic_uint64 phs_nallocated; /* number of blocks allocated to - * workers so far. */ - bool phs_snapshot_any; /* SnapshotAny, not phs_snapshot_data? */ - char phs_snapshot_data[FLEXIBLE_ARRAY_MEMBER]; -} ParallelHeapScanDescData; - -typedef struct HeapScanDescData +typedef struct TableScanDescData { /* scan parameters */ Relation rs_rd; /* heap relation descriptor */ @@ -56,28 +41,55 @@ typedef struct HeapScanDescData bool rs_allow_strat; /* allow or disallow use of access strategy */ bool rs_allow_sync; /* allow or disallow use of syncscan */ bool rs_temp_snap; /* unregister snapshot at scan end? */ - - /* state set up at initscan time */ - BlockNumber rs_nblocks; /* total number of blocks in rel */ - BlockNumber rs_startblock; /* block # to start at */ - BlockNumber rs_numblocks; /* max number of blocks to scan */ - /* rs_numblocks is usually InvalidBlockNumber, meaning "scan whole rel" */ - BufferAccessStrategy rs_strategy; /* access strategy for reads */ bool rs_syncscan; /* report location to syncscan logic? */ - /* scan current state */ - bool rs_inited; /* false = scan not init'd yet */ - HeapTupleData rs_ctup; /* current tuple in scan, if any */ - BlockNumber rs_cblock; /* current block # in scan, if any */ - Buffer rs_cbuf; /* current buffer in scan, if any */ - /* NB: if rs_cbuf is not InvalidBuffer, we hold a pin on that buffer */ - struct ParallelHeapScanDescData *rs_parallel; /* parallel scan information */ + struct ParallelTableScanDescData *rs_parallel; /* parallel scan information */ - /* these fields only used in page-at-a-time mode and for bitmap scans */ - int rs_cindex; /* current tuple's index in vistuples */ - int rs_ntuples; /* number of visible tuples on page */ - OffsetNumber rs_vistuples[MaxHeapTuplesPerPage]; /* their offsets */ -} HeapScanDescData; +} TableScanDescData; +typedef struct TableScanDescData *TableScanDesc; + +/* + * Shared state for parallel table scan. + * + * Each backend participating in a parallel table scan has its own + * TableScanDesc in backend-private memory, and those objects all contain a + * pointer to this structure. The information here must be sufficient to + * properly initialize each new TableScanDesc as workers join the scan, and it + * must act as a font of block numbers for those workers. + */ +typedef struct ParallelTableScanDescData +{ + Oid phs_relid; /* OID of relation to scan */ + bool phs_syncscan; /* report location to syncscan logic? */ + bool phs_snapshot_any; /* SnapshotAny, not phs_snapshot_data? */ + Size phs_snapshot_off; /* data for snapshot */ +} ParallelTableScanDescData; +typedef struct ParallelTableScanDescData *ParallelTableScanDesc; + +/* + * Shared state for parallel table scans, for block oriented storage. + */ +typedef struct ParallelBlockTableScanDescData +{ + ParallelTableScanDescData base; + + BlockNumber phs_nblocks; /* # blocks in relation at start of scan */ + slock_t phs_mutex; /* mutual exclusion for setting startblock */ + BlockNumber phs_startblock; /* starting block number */ + pg_atomic_uint64 phs_nallocated; /* number of blocks allocated to + * workers so far. */ +} ParallelBlockTableScanDescData; +typedef struct ParallelBlockTableScanDescData *ParallelBlockTableScanDesc; + +/* + * Base class for fetches from a table via an index. This is the base-class + * for such scans, which needs to be embedded in the respective struct for + * individual AMs. + */ +typedef struct IndexFetchTableData +{ + Relation rel; +} IndexFetchTableData; /* * We use the same IndexScanDescData structure for both amgettuple-based @@ -117,10 +129,10 @@ typedef struct IndexScanDescData HeapTuple xs_hitup; /* index data returned by AM, as HeapTuple */ struct TupleDescData *xs_hitupdesc; /* rowtype descriptor of xs_hitup */ - /* xs_ctup/xs_cbuf/xs_recheck are valid after a successful index_getnext */ - HeapTupleData xs_ctup; /* current heap tuple, if any */ - Buffer xs_cbuf; /* current heap buffer in scan, if any */ - /* NB: if xs_cbuf is not InvalidBuffer, we hold a pin on that buffer */ + ItemPointerData xs_heaptid; /* result */ + bool xs_heap_continue; /* T if must keep walking, potential further results */ + IndexFetchTableData *xs_heapfetch; + bool xs_recheck; /* T means scan keys must be rechecked */ /* @@ -134,9 +146,6 @@ typedef struct IndexScanDescData bool *xs_orderbynulls; bool xs_recheckorderby; - /* state data for traversing HOT chains in index_getnext */ - bool xs_continue_hot; /* T if must keep walking HOT chain */ - /* parallel index scan information, in shared memory */ struct ParallelIndexScanDescData *parallel_scan; } IndexScanDescData; @@ -150,14 +159,17 @@ typedef struct ParallelIndexScanDescData char ps_snapshot_data[FLEXIBLE_ARRAY_MEMBER]; } ParallelIndexScanDescData; -/* Struct for heap-or-index scans of system tables */ +struct TupleTableSlot; + +/* Struct for storage-or-index scans of system tables */ typedef struct SysScanDescData { Relation heap_rel; /* catalog being scanned */ Relation irel; /* NULL if doing heap scan */ - struct HeapScanDescData *scan; /* only valid in heap-scan case */ + struct TableScanDescData *scan; /* only valid in storage-scan case */ struct IndexScanDescData *iscan; /* only valid in index-scan case */ struct SnapshotData *snapshot; /* snapshot to unregister at end of scan */ + struct TupleTableSlot *slot; } SysScanDescData; #endif /* RELSCAN_H */ diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h index caeb5887d5..3e422f84b0 100644 --- a/src/include/access/tableam.h +++ b/src/include/access/tableam.h @@ -14,13 +14,17 @@ #ifndef TABLEAM_H #define TABLEAM_H +#include "access/relscan.h" +#include "catalog/index.h" #include "utils/guc.h" +#include "utils/rel.h" +#include "utils/snapshot.h" #define DEFAULT_TABLE_ACCESS_METHOD "heap" extern char *default_table_access_method; - +extern bool synchronize_seqscans; /* @@ -32,13 +36,294 @@ typedef struct TableAmRoutine { /* this must be set to T_TableAmRoutine */ NodeTag type; + + /* + * Return slot implementation suitable for storing a tuple of this AM. + */ + const TupleTableSlotOps *(*slot_callbacks) (Relation rel); + + + /* ------------------------------------------------------------------------ + * Table scan callbacks. + * ------------------------------------------------------------------------ + */ + + TableScanDesc (*scan_begin) (Relation rel, + Snapshot snapshot, + int nkeys, struct ScanKeyData *key, + ParallelTableScanDesc parallel_scan, + bool allow_strat, + bool allow_sync, + bool allow_pagemode, + bool is_bitmapscan, + bool is_samplescan, + bool temp_snap); + void (*scan_end) (TableScanDesc scan); + void (*scan_rescan) (TableScanDesc scan, struct ScanKeyData *key, bool set_params, + bool allow_strat, bool allow_sync, bool allow_pagemode); + void (*scan_update_snapshot) (TableScanDesc scan, Snapshot snapshot); + + + /* ------------------------------------------------------------------------ + * Parallel table scan related functions. + * ------------------------------------------------------------------------ + */ + Size (*parallelscan_estimate) (Relation rel); + Size (*parallelscan_initialize) (Relation rel, ParallelTableScanDesc parallel_scan); + void (*parallelscan_reinitialize) (Relation rel, ParallelTableScanDesc parallel_scan); + + + /* ------------------------------------------------------------------------ + * Index Scan Callbacks + * ------------------------------------------------------------------------ + */ + + struct IndexFetchTableData *(*begin_index_fetch) (Relation rel); + void (*reset_index_fetch) (struct IndexFetchTableData *data); + void (*end_index_fetch) (struct IndexFetchTableData *data); + + + /* ------------------------------------------------------------------------ + * Non-modifying operations on individual tuples. + * ------------------------------------------------------------------------ + */ + + bool (*tuple_fetch_follow) (struct IndexFetchTableData *scan, + ItemPointer tid, + Snapshot snapshot, + TupleTableSlot *slot, + bool *call_again, bool *all_dead); + bool (*tuple_satisfies_snapshot) (Relation rel, + TupleTableSlot *slot, + Snapshot snapshot); + } TableAmRoutine; +/* ---------------------------------------------------------------------------- + * Slot functions. + * ---------------------------------------------------------------------------- + */ + +extern const TupleTableSlotOps *table_slot_callbacks(Relation rel); +extern TupleTableSlot *table_gimmegimmeslot(Relation rel, List **reglist); + + +/* ---------------------------------------------------------------------------- + * Table scan functions. + * ---------------------------------------------------------------------------- + */ + +/* + * table_beginscan - begin relation scan + * + * table_beginscan is the "standard" case. + * + * table_beginscan_catalog differs in setting up its own temporary snapshot. + * + * table_beginscan_strat offers an extended API that lets the caller control + * whether a nondefault buffer access strategy can be used, and whether + * syncscan can be chosen (possibly resulting in the scan not starting from + * block zero). Both of these default to true with plain table_beginscan. + * + * table_beginscan_bm is an alternative entry point for setting up a + * TableScanDesc for a bitmap heap scan. Although that scan technology is + * really quite unlike a standard seqscan, there is just enough commonality + * to make it worth using the same data structure. + * + * table_beginscan_sampling is an alternative entry point for setting up a + * TableScanDesc for a TABLESAMPLE scan. As with bitmap scans, it's worth + * using the same data structure although the behavior is rather different. + * In addition to the options offered by table_beginscan_strat, this call + * also allows control of whether page-mode visibility checking is used. + * ---------------- + */ +static inline TableScanDesc +table_beginscan(Relation rel, Snapshot snapshot, + int nkeys, struct ScanKeyData *key) +{ + return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, + true, true, true, false, false, false); +} + +extern TableScanDesc table_beginscan_catalog(Relation rel, int nkeys, + struct ScanKeyData *key); + +static inline TableScanDesc +table_beginscan_strat(Relation rel, Snapshot snapshot, + int nkeys, struct ScanKeyData *key, + bool allow_strat, bool allow_sync) +{ + return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, + allow_strat, allow_sync, true, + false, false, false); +} + +static inline TableScanDesc +table_beginscan_bm(Relation rel, Snapshot snapshot, + int nkeys, struct ScanKeyData *key) +{ + return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, + false, false, true, true, false, false); +} + +static inline TableScanDesc +table_beginscan_sampling(Relation rel, Snapshot snapshot, + int nkeys, struct ScanKeyData *key, + bool allow_strat, bool allow_sync, bool allow_pagemode) +{ + return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, + allow_strat, allow_sync, allow_pagemode, + false, true, false); +} + +static inline TableScanDesc +table_beginscan_analyze(Relation rel) +{ + return rel->rd_tableam->scan_begin(rel, NULL, 0, NULL, NULL, + true, false, true, + false, true, false); +} /* + * end relation scan + */ +static inline void +table_endscan(TableScanDesc scan) +{ + scan->rs_rd->rd_tableam->scan_end(scan); +} + + +/* + * Restart a relation scan. + */ +static inline void +table_rescan(TableScanDesc scan, + struct ScanKeyData *key) +{ + scan->rs_rd->rd_tableam->scan_rescan(scan, key, false, false, false, false); +} + +/* + * Restart a relation scan after changing params. + * + * This call allows changing the buffer strategy, syncscan, and pagemode + * options before starting a fresh scan. Note that although the actual use of + * syncscan might change (effectively, enabling or disabling reporting), the + * previously selected startblock will be kept. + */ +static inline void +table_rescan_set_params(TableScanDesc scan, struct ScanKeyData *key, + bool allow_strat, bool allow_sync, bool allow_pagemode) +{ + scan->rs_rd->rd_tableam->scan_rescan(scan, key, true, + allow_strat, allow_sync, + allow_pagemode); +} + +/* + * Update snapshot info in heap scan descriptor. + */ +static inline void +table_scan_update_snapshot(TableScanDesc scan, Snapshot snapshot) +{ + scan->rs_rd->rd_tableam->scan_update_snapshot(scan, snapshot); +} + + +/* ---------------------------------------------------------------------------- + * Parallel table scan related functions. + * ---------------------------------------------------------------------------- + */ + +extern TableScanDesc table_beginscan_parallel(Relation rel, ParallelTableScanDesc pscan); +extern Size table_parallelscan_estimate(Relation rel, Snapshot snapshot); +extern void table_parallelscan_initialize(Relation rel, ParallelTableScanDesc parallel_scan, Snapshot snapshot); + +static inline void +table_parallelscan_reinitialize(Relation rel, ParallelTableScanDesc parallel_scan) +{ + return rel->rd_tableam->parallelscan_reinitialize(rel, parallel_scan); +} + + +/* ---------------------------------------------------------------------------- + * Index scan related functions. + * ---------------------------------------------------------------------------- + */ + +static inline IndexFetchTableData * +table_begin_index_fetch_table(Relation rel) +{ + return rel->rd_tableam->begin_index_fetch(rel); +} + +static inline void +table_reset_index_fetch_table(struct IndexFetchTableData *scan) +{ + scan->rel->rd_tableam->reset_index_fetch(scan); +} + +static inline void +table_end_index_fetch_table(struct IndexFetchTableData *scan) +{ + scan->rel->rd_tableam->end_index_fetch(scan); +} + + +/* ---------------------------------------------------------------------------- + * Non-modifying operations on individual tuples. + * ---------------------------------------------------------------------------- + */ + +static inline bool +table_fetch_follow(struct IndexFetchTableData *scan, + ItemPointer tid, + Snapshot snapshot, + TupleTableSlot *slot, + bool *call_again, bool *all_dead) +{ + + return scan->rel->rd_tableam->tuple_fetch_follow(scan, tid, snapshot, + slot, call_again, + all_dead); +} + +/* + * Return true iff tuple in slot satisfies the snapshot. + * + * This assumes the slot's tuple is valid, and of the appropriate type for the + * AM. + * + * Some AMs might modify the data underlying the tuple as a side-effect. If so + * they ought to mark the relevant buffer dirty. + */ +static inline bool +table_tuple_satisfies_snapshot(Relation rel, TupleTableSlot *slot, Snapshot snapshot) +{ + return rel->rd_tableam->tuple_satisfies_snapshot(rel, slot, snapshot); +} + + +/* ---------------------------------------------------------------------------- + * Helper functions to implement parallel scans for block oriented storage. + * ---------------------------------------------------------------------------- + */ + +extern Size table_block_parallelscan_estimate(Relation rel); +extern Size table_block_parallelscan_initialize(Relation rel, + ParallelTableScanDesc pscan); +extern void table_block_parallelscan_reinitialize(Relation rel, ParallelTableScanDesc pscan); +extern BlockNumber table_block_parallelscan_nextpage(Relation rel, ParallelBlockTableScanDesc pbscan); +extern void table_block_parallelscan_startblock_init(Relation rel, ParallelBlockTableScanDesc pbscan); + + +/* ---------------------------------------------------------------------------- * Functions in tableamapi.c + * ---------------------------------------------------------------------------- */ + extern const TableAmRoutine *GetTableAmRoutine(Oid amhandler); extern const TableAmRoutine *GetTableAmRoutineByAmId(Oid amoid); extern const TableAmRoutine *GetHeapamTableAmRoutine(void); diff --git a/src/include/catalog/index.h b/src/include/catalog/index.h index 330c481a8b..29f7ed6237 100644 --- a/src/include/catalog/index.h +++ b/src/include/catalog/index.h @@ -110,13 +110,14 @@ extern void index_build(Relation heapRelation, bool isreindex, bool parallel); +struct TableScanDescData; extern double IndexBuildHeapScan(Relation heapRelation, Relation indexRelation, IndexInfo *indexInfo, bool allow_sync, IndexBuildCallback callback, void *callback_state, - struct HeapScanDescData *scan); + struct TableScanDescData *scan); extern double IndexBuildHeapRangeScan(Relation heapRelation, Relation indexRelation, IndexInfo *indexInfo, @@ -126,7 +127,7 @@ extern double IndexBuildHeapRangeScan(Relation heapRelation, BlockNumber end_blockno, IndexBuildCallback callback, void *callback_state, - struct HeapScanDescData *scan); + struct TableScanDescData *scan); extern void validate_index(Oid heapId, Oid indexId, Snapshot snapshot); diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 6a5411eba8..fb61a339ac 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -1268,7 +1268,7 @@ typedef struct ScanState { PlanState ps; /* its first field is NodeTag */ Relation ss_currentRelation; - struct HeapScanDescData *ss_currentScanDesc; + struct TableScanDescData *ss_currentScanDesc; TupleTableSlot *ss_ScanTupleSlot; } ScanState; diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 7a5d8c47e1..6bc4a1cdcc 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -1018,6 +1018,8 @@ IndexBulkDeleteCallback IndexBulkDeleteResult IndexClauseSet IndexElem +IndexFetchHeapData +IndexFetchTableData IndexInfo IndexList IndexOnlyScan @@ -1602,6 +1604,8 @@ PagetableEntry Pairs ParallelAppendState ParallelBitmapHeapState +ParallelBlockTableScanDesc +ParallelBlockTableScanDescData ParallelCompletionPtr ParallelContext ParallelExecutorInfo @@ -1609,8 +1613,8 @@ ParallelHashGrowth ParallelHashJoinBatch ParallelHashJoinBatchAccessor ParallelHashJoinState -ParallelHeapScanDesc -ParallelHeapScanDescData +ParallelTableScanDesc +ParallelTableScanDescData ParallelIndexScanDesc ParallelSlot ParallelState @@ -2316,6 +2320,8 @@ TableFuncScanState TableInfo TableLikeClause TableSampleClause +TableScanDesc +TableScanDescData TableSpaceCacheEntry TableSpaceOpts TablespaceList