From: Alexey Namakonov Date: Fri, 29 Dec 2023 10:21:34 +0000 (+0300) Subject: Added TAP tests and parsing specific contents of GIN data pages. X-Git-Tag: REL_17_0~14 X-Git-Url: http://git.postgresql.org/gitweb/static/gitweb.js?a=commitdiff_plain;h=33ab28f112e55a2028ca0ac3c36945fabb802b09;p=pg_filedump.git Added TAP tests and parsing specific contents of GIN data pages. Close #28. --- diff --git a/Makefile b/Makefile index bf86b73..fac7c37 100644 --- a/Makefile +++ b/Makefile @@ -6,6 +6,7 @@ FD_VERSION=16.0 PROGRAM = pg_filedump OBJS = pg_filedump.o decode.o stringinfo.o REGRESS = datatypes float numeric xml +TAP_TESTS = 1 EXTRA_CLEAN = *.heap PG_CONFIG = pg_config diff --git a/pg_filedump.c b/pg_filedump.c index e902588..ed2f35a 100644 --- a/pg_filedump.c +++ b/pg_filedump.c @@ -809,6 +809,55 @@ IsBtreeMetaPage(Page page) return false; } +/* Check whether page is a gin meta page */ +static bool +IsGinMetaPage(Page page) +{ + if ((PageGetSpecialSize(page) == (MAXALIGN(sizeof(GinPageOpaqueData)))) + && (bytesToFormat == blockSize)) + { + GinPageOpaque gpo = GinPageGetOpaque(page); + + if (gpo->flags & GIN_META) + return true; + } + + return false; +} + +/* Check whether page is a gin leaf page */ +static bool +IsGinLeafPage(Page page) +{ + if ((PageGetSpecialSize(page) == (MAXALIGN(sizeof(GinPageOpaqueData)))) + && (bytesToFormat == blockSize)) + { + GinPageOpaque gpo = GinPageGetOpaque(page); + + if (gpo->flags & GIN_LEAF) + return true; + } + + return false; +} + +/* Check whether page is a SpGist meta page */ +static bool +IsSpGistMetaPage(Page page) +{ + if ((PageGetSpecialSize(page) == (MAXALIGN(sizeof(SpGistPageOpaqueData)))) + && (bytesToFormat == blockSize)) + { + SpGistPageOpaque spgpo = SpGistPageGetOpaque(page); + + if ((spgpo->spgist_page_id == SPGIST_PAGE_ID) && + (spgpo->flags & SPGIST_META)) + return true; + } + + return false; +} + /* Display a header for the dump so we know the file name, the options * used and the time the dump was taken */ static void @@ -980,6 +1029,185 @@ FormatHeader(char *buffer, Page page, BlockNumber blkno, bool isToast) return (rc); } +/* Copied from ginpostinglist.c */ +#define MaxHeapTuplesPerPageBits 11 +static uint64 +itemptr_to_uint64(const ItemPointer iptr) +{ + uint64 val; + + val = GinItemPointerGetBlockNumber(iptr); + val <<= MaxHeapTuplesPerPageBits; + val |= GinItemPointerGetOffsetNumber(iptr); + + return val; +} + +static void +uint64_to_itemptr(uint64 val, ItemPointer iptr) +{ + GinItemPointerSetOffsetNumber(iptr, val & ((1 << MaxHeapTuplesPerPageBits) - 1)); + val = val >> MaxHeapTuplesPerPageBits; + GinItemPointerSetBlockNumber(iptr, val); +} + +/* + * Decode varbyte-encoded integer at *ptr. *ptr is incremented to next integer. + */ +static uint64 +decode_varbyte(unsigned char **ptr) +{ + uint64 val; + unsigned char *p = *ptr; + uint64 c; + + /* 1st byte */ + c = *(p++); + val = c & 0x7F; + if (c & 0x80) + { + /* 2nd byte */ + c = *(p++); + val |= (c & 0x7F) << 7; + if (c & 0x80) + { + /* 3rd byte */ + c = *(p++); + val |= (c & 0x7F) << 14; + if (c & 0x80) + { + /* 4th byte */ + c = *(p++); + val |= (c & 0x7F) << 21; + if (c & 0x80) + { + /* 5th byte */ + c = *(p++); + val |= (c & 0x7F) << 28; + if (c & 0x80) + { + /* 6th byte */ + c = *(p++); + val |= (c & 0x7F) << 35; + if (c & 0x80) + { + /* 7th byte, should not have continuation bit */ + c = *(p++); + val |= c << 42; + Assert((c & 0x80) == 0); + } + } + } + } + } + } + + *ptr = p; + + return val; +} + +/* Dump out gin-specific content of block */ +static void +FormatGinBlock(char *buffer, + bool isToast, + Oid toastOid, + unsigned int toastExternalSize, + char *toastValue, + unsigned int *toastRead) +{ + Page page = (Page) buffer; + char *indent = isToast ? "\t" : ""; + + if (isToast && !verbose) + return; + + printf("%s -----\n", indent); + + if (IsGinLeafPage(page)) + { + if (GinPageIsCompressed(page)) + { + GinPostingList *seg = GinDataLeafPageGetPostingList(page); + int plist_idx = 1; + Size len = GinDataLeafPageGetPostingListSize(page); + Pointer endptr = ((Pointer) seg) + len; + ItemPointer cur; + + while ((Pointer) seg < endptr) + { + int item_idx = 1; + uint64 val; + unsigned char *endseg = seg->bytes + seg->nbytes; + unsigned char *ptr = seg->bytes; + + cur = &seg->first; + printf("\n%s Posting List %3d -- Length: %4u\n", + indent, plist_idx, seg->nbytes); + printf("%s ItemPointer %3d -- Block Id: %4u linp Index: %4u\n", + indent, item_idx, + ((uint32) ((cur->ip_blkid.bi_hi << 16) | + (uint16) cur->ip_blkid.bi_lo)), + cur->ip_posid); + + val = itemptr_to_uint64(&seg->first); + while (ptr < endseg) + { + val += decode_varbyte(&ptr); + item_idx++; + + uint64_to_itemptr(val, cur); + printf("%s ItemPointer %3d -- Block Id: %4u linp Index: %4u\n", + indent, item_idx, + ((uint32) ((cur->ip_blkid.bi_hi << 16) | + (uint16) cur->ip_blkid.bi_lo)), + cur->ip_posid); + } + + plist_idx++; + + seg = GinNextPostingListSegment(seg); + } + + } + else + { + int i, + nitems = GinPageGetOpaque(page)->maxoff; + ItemPointer items = (ItemPointer) GinDataPageGetData(page); + + for (i = 0; i < nitems; i++) + { + printf("%s ItemPointer %d -- Block Id: %u linp Index: %u\n", + indent, i + 1, + ((uint32) ((items[i].ip_blkid.bi_hi << 16) | + (uint16) items[i].ip_blkid.bi_lo)), + items[i].ip_posid); + } + } + } + else + { + OffsetNumber cur, + high = GinPageGetOpaque(page)->maxoff; + PostingItem *pitem = NULL; + + for (cur = FirstOffsetNumber; cur <= high; cur = OffsetNumberNext(cur)) + { + pitem = GinDataPageGetPostingItem(page, cur); + printf("%s PostingItem %d -- child Block Id: (%u) Block Id: %u linp Index: %u\n", + indent, cur, + ((uint32) ((pitem->child_blkno.bi_hi << 16) | + (uint16) pitem->child_blkno.bi_lo)), + ((uint32) ((pitem->key.ip_blkid.bi_hi << 16) | + (uint16) pitem->key.ip_blkid.bi_lo)), + pitem->key.ip_posid); + } + } + + printf("\n"); +} + /* Dump out formatted items that reside on this block */ static void FormatItemBlock(char *buffer, @@ -1003,6 +1231,25 @@ FormatItemBlock(char *buffer, if (IsBtreeMetaPage(page)) return; + /* Same as above */ + if (IsSpGistMetaPage(page)) + return; + + /* Same as above */ + if (IsGinMetaPage(page)) + return; + + /* Leaf pages of GIN index contain posting lists + * instead of item array. + */ + if (specialType == SPEC_SECT_INDEX_GIN) + { + FormatGinBlock(buffer, isToast, toastOid, + toastExternalSize, toastValue, + toastRead); + return; + } + if (!isToast || verbose) printf("%s -----\n", indent); diff --git a/t/001_basic.pl b/t/001_basic.pl new file mode 100644 index 0000000..9571119 --- /dev/null +++ b/t/001_basic.pl @@ -0,0 +1,161 @@ +#!/usr/bin/perl + +use strict; +use warnings; +use Config; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; +use File::Spec; +use IPC::Run qw( run timeout ); + + +note "setting up PostgreSQL instance"; + +my $node = PostgreSQL::Test::Cluster->new('master'); +$node->init(extra => ["--data-checksums"]); +$node->append_conf('postgresql.conf', 'fsync = True'); +$node->start; + +my $query = qq( + create table t1(a int, b text, c bigint, filler char(400)); + insert into t1 values (1, 'asdasd1', 29347293874234444); + insert into t1 values (2, 'asdasd2', 29347293874234445); + insert into t1 values (3, 'asdasd', 29347293874234446); + insert into t1 values (4, 'asdasd', 29347293874234447); + checkpoint; +); +$node->safe_psql('postgres', $query); + +note "running tests"; + +test_basic_output(); +test_btree_output(); +test_btree_dedup_output(); +test_spgist_output(); +test_gin_output(); + +$node->stop; +done_testing(); + +sub get_table_location +{ + return File::Spec->catfile( + $node->data_dir, + $node->safe_psql('postgres', qq(SELECT pg_relation_filepath('@_');)) + ); +} + +sub run_pg_filedump +{ + my ($rel, @options) = @_; + my ($stdout, $stderr); + + my $loc = get_table_location($rel); + my $cmd = [ 'pg_filedump', @options, $loc ]; + my $result = run $cmd, '>', \$stdout, '2>', \$stderr + or die "Error: could not execute pg_filedump"; + + ok($stdout !~ qr/Error/, "error not found"); + + return $stdout; +} + +sub test_basic_output +{ + my $out_ = run_pg_filedump('t1', ("-D", "int,text,bigint")); + + ok($out_ =~ qr/Header/, "Header found"); + ok($out_ =~ qr/COPY: 1/, "first COPY found"); + ok($out_ =~ qr/COPY: 2/, "second COPY found"); + ok($out_ =~ qr/COPY: 3/, "third COPY found"); + ok($out_ =~ qr/COPY: 4/, "fourth COPY found"); + ok($out_ =~ qr/29347293874234447/, "number found"); + ok($out_ =~ qr/asdasd/, "string found"); +} + +sub test_btree_output +{ + my $query = qq( + insert into t1 select * FROM generate_series(1, 10000); + create index i1 on t1(b); + checkpoint; + ); + $node->safe_psql('postgres', $query); + + my $out_ = run_pg_filedump('i1', ('-i')); + + ok($out_ =~ qr/Header/, "Header found"); + ok($out_ =~ qr/BTree Index Section/, "BTree Index Section found"); + ok($out_ =~ qr/BTree Meta Data/, "BTree Meta Data found"); + ok($out_ =~ qr/Item 3/, "Item found"); + ok($out_ =~ qr/Previous/, "Previous item found"); + ok($out_ =~ qr/Next/, "Next item found"); + ok($out_ =~ qr/Level/, "Level found"); + ok($out_ !~ qr/Next XID/, "Next XID not found"); + + # make leaf with BTP_DELETED flag + $node->safe_psql('postgres', "delete from t1 where a >= 2000 and a < 4000;"); + $node->safe_psql('postgres', "vacuum t1; checkpoint;"); + + $out_ = run_pg_filedump('i1', ('-i')); + + ok($out_ =~ qr/Next XID/, "Next XID found"); +} + +# +# The default is deduplicate_items=ON starting from EE12, +# but let us test it explicitly and with large number of +# duplicates. +# +# Will be skipped on all versions without deduplicate_items +# +sub test_btree_dedup_output +{ + # skipTest("btree does not have deduplicate_items in this Postgres version") + my $query = qq( + create table t1_dedup(a int); + create index i1_dedup on t1_dedup(a) with (deduplicate_items=ON); + insert into t1_dedup select s FROM generate_series(1, 100000) s; + insert into t1_dedup select 2 FROM generate_series(1, 100000) s; + insert into t1_dedup select s / 50 FROM generate_series(1, 100000) s; + checkpoint; + ); + $node->safe_psql('postgres', $query); + + my $out_ = run_pg_filedump('i1_dedup', ('-i')); + + ok($out_ =~ qr/Header/, "Header found"); + ok($out_ =~ qr/BTree Index Section/, "BTree Index Section found"); + ok($out_ =~ qr/BTree Meta Data/, "BTree Meta Data found"); + ok($out_ =~ qr/Item 3/, "Item found"); + ok($out_ =~ qr/Block 511/, "Block found"); +} + +sub test_spgist_output +{ + $node->safe_psql('postgres', "create index i2 on t1 using spgist(b); checkpoint;"); + + my $out_ = run_pg_filedump('i2'); + + ok($out_ =~ qr/Header/, "Header found"); + ok($out_ =~ qr/SPGIST Index Section/, "SPGIST Index Section found"); + ok($out_ =~ qr/Item 4/, "Item found"); +} + +sub test_gin_output +{ + # skipTest("failed to create btree_gin extension: install btree_gin for gin tests") + my $query = qq( + create extension btree_gin; + create index i3 on t1 using gin(b); + checkpoint; + ); + $node->safe_psql('postgres', $query); + + my $out_ = run_pg_filedump('i3'); + + ok($out_ =~ qr/Header/, "Header found"); + ok($out_ =~ qr/GIN Index Section/, "GIN Index Section found"); + ok($out_ =~ qr/ItemPointer 3/, "Item found"); +}