Skip to content

Commit c7b0013

Browse files
antamelCommitfest Bot
authored andcommitted
Extract BTScanState from BTScanOpaqueData
Currently BTScanOpaqueData holds both information about scan keys and state of tree scan. That is OK as soon as we're going to scan btree just in single direction. Upcoming knn-btree patch provides btree scan in two directions simultaneously. This commit extracts data structure representing tree scan state in a single direction into separate BTScanState struct in preparation for knn-btree. Discussion: https://postgr.es/m/ce35e97b-cf34-3f5d-6b99-2c25bae49999%40postgrespro.ru Author: Nikita Glukhov Reviewed-by: Robert Haas, Tom Lane, Anastasia Lubennikova, Alexander Korotkov
1 parent dec6278 commit c7b0013

File tree

4 files changed

+378
-320
lines changed

4 files changed

+378
-320
lines changed

src/backend/access/nbtree/nbtree.c

Lines changed: 100 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,7 @@ bool
207207
btgettuple(IndexScanDesc scan, ScanDirection dir)
208208
{
209209
BTScanOpaque so = (BTScanOpaque) scan->opaque;
210+
BTScanState state = &so->state;
210211
bool res;
211212

212213
/* btree indexes are never lossy */
@@ -220,7 +221,7 @@ btgettuple(IndexScanDesc scan, ScanDirection dir)
220221
* the appropriate direction. If we haven't done so yet, we call
221222
* _bt_first() to get the first item in the scan.
222223
*/
223-
if (!BTScanPosIsValid(so->currPos))
224+
if (!BTScanPosIsValid(state->currPos))
224225
res = _bt_first(scan, dir);
225226
else
226227
{
@@ -238,11 +239,11 @@ btgettuple(IndexScanDesc scan, ScanDirection dir)
238239
* trying to optimize that, so we don't detect it, but instead
239240
* just forget any excess entries.
240241
*/
241-
if (so->killedItems == NULL)
242-
so->killedItems = (int *)
242+
if (state->killedItems == NULL)
243+
state->killedItems = (int *)
243244
palloc(MaxTIDsPerBTreePage * sizeof(int));
244-
if (so->numKilled < MaxTIDsPerBTreePage)
245-
so->killedItems[so->numKilled++] = so->currPos.itemIndex;
245+
if (state->numKilled < MaxTIDsPerBTreePage)
246+
state->killedItems[state->numKilled++] = state->currPos.itemIndex;
246247
}
247248

248249
/*
@@ -267,6 +268,7 @@ int64
267268
btgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
268269
{
269270
BTScanOpaque so = (BTScanOpaque) scan->opaque;
271+
BTScanPos currPos = &so->state.currPos;
270272
int64 ntids = 0;
271273
ItemPointer heapTid;
272274

@@ -287,15 +289,15 @@ btgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
287289
* Advance to next tuple within page. This is the same as the
288290
* easy case in _bt_next().
289291
*/
290-
if (++so->currPos.itemIndex > so->currPos.lastItem)
292+
if (++currPos->itemIndex > currPos->lastItem)
291293
{
292294
/* let _bt_next do the heavy lifting */
293295
if (!_bt_next(scan, ForwardScanDirection))
294296
break;
295297
}
296298

297299
/* Save tuple ID, and continue scanning */
298-
heapTid = &so->currPos.items[so->currPos.itemIndex].heapTid;
300+
heapTid = &currPos->items[currPos->itemIndex].heapTid;
299301
tbm_add_tuples(tbm, heapTid, 1, false);
300302
ntids++;
301303
}
@@ -323,8 +325,8 @@ btbeginscan(Relation rel, int nkeys, int norderbys)
323325

324326
/* allocate private workspace */
325327
so = (BTScanOpaque) palloc(sizeof(BTScanOpaqueData));
326-
BTScanPosInvalidate(so->currPos);
327-
BTScanPosInvalidate(so->markPos);
328+
BTScanPosInvalidate(so->state.currPos);
329+
BTScanPosInvalidate(so->state.markPos);
328330
if (scan->numberOfKeys > 0)
329331
so->keyData = (ScanKey) palloc(scan->numberOfKeys * sizeof(ScanKeyData));
330332
else
@@ -336,15 +338,15 @@ btbeginscan(Relation rel, int nkeys, int norderbys)
336338
so->orderProcs = NULL;
337339
so->arrayContext = NULL;
338340

339-
so->killedItems = NULL; /* until needed */
340-
so->numKilled = 0;
341+
so->state.killedItems = NULL; /* until needed */
342+
so->state.numKilled = 0;
341343

342344
/*
343345
* We don't know yet whether the scan will be index-only, so we do not
344346
* allocate the tuple workspace arrays until btrescan. However, we set up
345347
* scan->xs_itupdesc whether we'll need it or not, since that's so cheap.
346348
*/
347-
so->currTuples = so->markTuples = NULL;
349+
so->state.currTuples = so->state.markTuples = NULL;
348350

349351
scan->xs_itupdesc = RelationGetDescr(rel);
350352

@@ -353,6 +355,45 @@ btbeginscan(Relation rel, int nkeys, int norderbys)
353355
return scan;
354356
}
355357

358+
static void
359+
_bt_release_current_position(BTScanState state, Relation indexRelation,
360+
bool invalidate)
361+
{
362+
/* we aren't holding any read locks, but gotta drop the pins */
363+
if (BTScanPosIsValid(state->currPos))
364+
{
365+
/* Before leaving current page, deal with any killed items */
366+
if (state->numKilled > 0)
367+
_bt_killitems(state, indexRelation);
368+
369+
BTScanPosUnpinIfPinned(state->currPos);
370+
371+
if (invalidate)
372+
BTScanPosInvalidate(state->currPos);
373+
}
374+
}
375+
376+
static void
377+
_bt_release_scan_state(IndexScanDesc scan, BTScanState state, bool free)
378+
{
379+
/* No need to invalidate positions, if the RAM is about to be freed. */
380+
_bt_release_current_position(state, scan->indexRelation, !free);
381+
382+
state->markItemIndex = -1;
383+
BTScanPosUnpinIfPinned(state->markPos);
384+
385+
if (free)
386+
{
387+
if (state->killedItems != NULL)
388+
pfree(state->killedItems);
389+
if (state->currTuples != NULL)
390+
pfree(state->currTuples);
391+
/* markTuples should not be pfree'd (_bt_allocate_tuple_workspaces) */
392+
}
393+
else
394+
BTScanPosInvalidate(state->markPos);
395+
}
396+
356397
/*
357398
* btrescan() -- rescan an index relation
358399
*/
@@ -361,22 +402,12 @@ btrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys,
361402
ScanKey orderbys, int norderbys)
362403
{
363404
BTScanOpaque so = (BTScanOpaque) scan->opaque;
405+
BTScanState state = &so->state;
364406

365-
/* we aren't holding any read locks, but gotta drop the pins */
366-
if (BTScanPosIsValid(so->currPos))
367-
{
368-
/* Before leaving current page, deal with any killed items */
369-
if (so->numKilled > 0)
370-
_bt_killitems(scan);
371-
BTScanPosUnpinIfPinned(so->currPos);
372-
BTScanPosInvalidate(so->currPos);
373-
}
407+
_bt_release_scan_state(scan, state, false);
374408

375-
so->markItemIndex = -1;
376409
so->needPrimScan = false;
377410
so->scanBehind = false;
378-
BTScanPosUnpinIfPinned(so->markPos);
379-
BTScanPosInvalidate(so->markPos);
380411

381412
/*
382413
* Allocate tuple workspace arrays, if needed for an index-only scan and
@@ -394,11 +425,8 @@ btrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys,
394425
* a SIGSEGV is not possible. Yeah, this is ugly as sin, but it beats
395426
* adding special-case treatment for name_ops elsewhere.
396427
*/
397-
if (scan->xs_want_itup && so->currTuples == NULL)
398-
{
399-
so->currTuples = (char *) palloc(BLCKSZ * 2);
400-
so->markTuples = so->currTuples + BLCKSZ;
401-
}
428+
if (scan->xs_want_itup && state->currTuples == NULL)
429+
_bt_allocate_tuple_workspaces(state);
402430

403431
/*
404432
* Reset the scan keys
@@ -419,69 +447,56 @@ btendscan(IndexScanDesc scan)
419447
{
420448
BTScanOpaque so = (BTScanOpaque) scan->opaque;
421449

422-
/* we aren't holding any read locks, but gotta drop the pins */
423-
if (BTScanPosIsValid(so->currPos))
424-
{
425-
/* Before leaving current page, deal with any killed items */
426-
if (so->numKilled > 0)
427-
_bt_killitems(scan);
428-
BTScanPosUnpinIfPinned(so->currPos);
429-
}
430-
431-
so->markItemIndex = -1;
432-
BTScanPosUnpinIfPinned(so->markPos);
433-
434-
/* No need to invalidate positions, the RAM is about to be freed. */
450+
_bt_release_scan_state(scan, &so->state, true);
435451

436452
/* Release storage */
437453
if (so->keyData != NULL)
438454
pfree(so->keyData);
439455
/* so->arrayKeys and so->orderProcs are in arrayContext */
440456
if (so->arrayContext != NULL)
441457
MemoryContextDelete(so->arrayContext);
442-
if (so->killedItems != NULL)
443-
pfree(so->killedItems);
444-
if (so->currTuples != NULL)
445-
pfree(so->currTuples);
446-
/* so->markTuples should not be pfree'd, see btrescan */
458+
447459
pfree(so);
448460
}
449461

450-
/*
451-
* btmarkpos() -- save current scan position
452-
*/
453-
void
454-
btmarkpos(IndexScanDesc scan)
462+
static void
463+
_bt_mark_current_position(BTScanState state)
455464
{
456-
BTScanOpaque so = (BTScanOpaque) scan->opaque;
457-
458465
/* There may be an old mark with a pin (but no lock). */
459-
BTScanPosUnpinIfPinned(so->markPos);
466+
BTScanPosUnpinIfPinned(state->markPos);
460467

461468
/*
462469
* Just record the current itemIndex. If we later step to next page
463470
* before releasing the marked position, _bt_steppage makes a full copy of
464471
* the currPos struct in markPos. If (as often happens) the mark is moved
465472
* before we leave the page, we don't have to do that work.
466473
*/
467-
if (BTScanPosIsValid(so->currPos))
468-
so->markItemIndex = so->currPos.itemIndex;
474+
if (BTScanPosIsValid(state->currPos))
475+
state->markItemIndex = state->currPos.itemIndex;
469476
else
470477
{
471-
BTScanPosInvalidate(so->markPos);
472-
so->markItemIndex = -1;
478+
BTScanPosInvalidate(state->markPos);
479+
state->markItemIndex = -1;
473480
}
474481
}
475482

476483
/*
477-
* btrestrpos() -- restore scan to last saved position
484+
* btmarkpos() -- save current scan position
478485
*/
479486
void
480-
btrestrpos(IndexScanDesc scan)
487+
btmarkpos(IndexScanDesc scan)
481488
{
482489
BTScanOpaque so = (BTScanOpaque) scan->opaque;
483490

484-
if (so->markItemIndex >= 0)
491+
_bt_mark_current_position(&so->state);
492+
}
493+
494+
static void
495+
_bt_restore_marked_position(IndexScanDesc scan, BTScanState state)
496+
{
497+
BTScanOpaque so = (BTScanOpaque) scan->opaque;
498+
499+
if (state->markItemIndex >= 0)
485500
{
486501
/*
487502
* The scan has never moved to a new page since the last mark. Just
@@ -490,7 +505,7 @@ btrestrpos(IndexScanDesc scan)
490505
* NB: In this case we can't count on anything in so->markPos to be
491506
* accurate.
492507
*/
493-
so->currPos.itemIndex = so->markItemIndex;
508+
state->currPos.itemIndex = state->markItemIndex;
494509
}
495510
else
496511
{
@@ -500,34 +515,27 @@ btrestrpos(IndexScanDesc scan)
500515
* locks, but if we're still holding the pin for the current position,
501516
* we must drop it.
502517
*/
503-
if (BTScanPosIsValid(so->currPos))
504-
{
505-
/* Before leaving current page, deal with any killed items */
506-
if (so->numKilled > 0)
507-
_bt_killitems(scan);
508-
BTScanPosUnpinIfPinned(so->currPos);
509-
}
518+
_bt_release_current_position(state, scan->indexRelation,
519+
!BTScanPosIsValid(state->markPos));
510520

511-
if (BTScanPosIsValid(so->markPos))
521+
if (BTScanPosIsValid(state->markPos))
512522
{
513523
/* bump pin on mark buffer for assignment to current buffer */
514-
if (BTScanPosIsPinned(so->markPos))
515-
IncrBufferRefCount(so->markPos.buf);
516-
memcpy(&so->currPos, &so->markPos,
524+
if (BTScanPosIsPinned(state->markPos))
525+
IncrBufferRefCount(state->markPos.buf);
526+
memcpy(&state->currPos, &state->markPos,
517527
offsetof(BTScanPosData, items[1]) +
518-
so->markPos.lastItem * sizeof(BTScanPosItem));
519-
if (so->currTuples)
520-
memcpy(so->currTuples, so->markTuples,
521-
so->markPos.nextTupleOffset);
528+
state->markPos.lastItem * sizeof(BTScanPosItem));
529+
if (state->currTuples)
530+
memcpy(state->currTuples, state->markTuples,
531+
state->markPos.nextTupleOffset);
522532
/* Reset the scan's array keys (see _bt_steppage for why) */
523533
if (so->numArrayKeys)
524534
{
525-
_bt_start_array_keys(scan, so->currPos.dir);
535+
_bt_start_array_keys(scan, state->currPos.dir);
526536
so->needPrimScan = false;
527537
}
528538
}
529-
else
530-
BTScanPosInvalidate(so->currPos);
531539
}
532540
}
533541

@@ -798,6 +806,17 @@ _bt_parallel_primscan_schedule(IndexScanDesc scan, BlockNumber prev_scan_page)
798806
SpinLockRelease(&btscan->btps_mutex);
799807
}
800808

809+
/*
810+
* btrestrpos() -- restore scan to last saved position
811+
*/
812+
void
813+
btrestrpos(IndexScanDesc scan)
814+
{
815+
BTScanOpaque so = (BTScanOpaque) scan->opaque;
816+
817+
_bt_restore_marked_position(scan, &so->state);
818+
}
819+
801820
/*
802821
* Bulk deletion of all index entries pointing to a set of heap tuples.
803822
* The set of target tuples is specified via a callback routine that tells

0 commit comments

Comments
 (0)