summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/backend/access/heap/README.tuplock32
-rw-r--r--src/backend/access/heap/heapam.c51
-rw-r--r--src/backend/access/heap/heapam_xlog.c60
-rw-r--r--src/backend/access/index/genam.c9
-rw-r--r--src/backend/access/nbtree/nbtsearch.c3
-rw-r--r--src/backend/access/transam/multixact.c5
-rw-r--r--src/backend/access/transam/xlog.c6
-rw-r--r--src/backend/access/transam/xlogrecovery.c10
-rw-r--r--src/backend/replication/logical/decode.c15
-rw-r--r--src/backend/storage/ipc/dsm_registry.c8
-rw-r--r--src/backend/utils/adt/bytea.c238
-rw-r--r--src/backend/utils/adt/like_support.c169
-rw-r--r--src/backend/utils/adt/pg_locale.c15
-rw-r--r--src/backend/utils/adt/pg_locale_builtin.c8
-rw-r--r--src/backend/utils/adt/pg_locale_icu.c8
-rw-r--r--src/backend/utils/adt/pg_locale_libc.c14
-rw-r--r--src/backend/utils/adt/varlena.c43
-rw-r--r--src/backend/utils/cache/inval.c10
-rw-r--r--src/bin/pg_upgrade/multixact_read_v18.c44
-rw-r--r--src/bin/pg_upgrade/t/007_multixact_conversion.pl25
-rw-r--r--src/include/mb/pg_wchar.h2
-rw-r--r--src/include/storage/dsm_registry.h4
-rw-r--r--src/include/utils/inval.h3
-rw-r--r--src/include/utils/pg_locale.h4
-rw-r--r--src/interfaces/libpq-oauth/oauth-curl.c120
-rw-r--r--src/interfaces/libpq/fe-auth-oauth.c31
-rw-r--r--src/interfaces/libpq/nls.mk3
-rw-r--r--src/test/modules/injection_points/injection_points.c6
-rw-r--r--src/test/modules/test_dsa/test_dsa.c6
-rw-r--r--src/test/modules/test_dsm_registry/test_dsm_registry.c7
-rw-r--r--src/test/recovery/meson.build1
-rw-r--r--src/test/recovery/t/050_redo_segment_missing.pl117
-rw-r--r--src/tools/pgindent/typedefs.list1
33 files changed, 754 insertions, 324 deletions
diff --git a/src/backend/access/heap/README.tuplock b/src/backend/access/heap/README.tuplock
index 843c2e58f92..16f7d78b7d2 100644
--- a/src/backend/access/heap/README.tuplock
+++ b/src/backend/access/heap/README.tuplock
@@ -199,3 +199,35 @@ under a reader holding a pin. A reader of a heap_fetch() result tuple may
witness a torn read. Current inplace-updated fields are aligned and are no
wider than four bytes, and current readers don't need consistency across
fields. Hence, they get by with just fetching each field once.
+
+During logical decoding, caches reflect an inplace update no later than the
+next XLOG_XACT_INVALIDATIONS. That record witnesses the end of a command.
+Tuples of its cmin are then visible to decoding, as are inplace updates of any
+lower LSN. Inplace updates of a higher LSN may also be visible, even if those
+updates would have been invisible to a non-historic snapshot matching
+decoding's historic snapshot. (In other words, decoding may see inplace
+updates that were not visible to a similar snapshot taken during original
+transaction processing.) That's a consequence of inplace update violating
+MVCC: there are no snapshot-specific versions of inplace-updated values. This
+all makes it hard to reason about inplace-updated column reads during logical
+decoding, but the behavior does suffice for relhasindex. A relhasindex=t in
+CREATE INDEX becomes visible no later than the new pg_index row. While it may
+be visible earlier, that's harmless. Finding zero indexes despite
+relhasindex=t is normal in more cases than this, e.g. after DROP INDEX.
+Example of a case that meaningfully reacts to the inplace inval:
+
+CREATE TABLE cat (c int) WITH (user_catalog_table = true);
+CREATE TABLE normal (d int);
+...
+CREATE INDEX ON cat (c)\; INSERT INTO normal VALUES (1);
+
+If the output plugin reads "cat" during decoding of the INSERT, it's fair to
+want that read to see relhasindex=t and use the new index.
+
+An alternative would be to have decoding of XLOG_HEAP_INPLACE immediately
+execute its invals. That would behave more like invals during original
+transaction processing. It would remove the decoding-specific delay in e.g. a
+decoding plugin witnessing a relfrozenxid change. However, a good use case
+for that is unlikely, since the plugin would still witness relfrozenxid
+changes prematurely. Hence, inplace update takes the trivial approach of
+delegating to XLOG_XACT_INVALIDATIONS.
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 225f9829f22..6daf4a87dec 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -6116,7 +6116,7 @@ heap_finish_speculative(Relation relation, const ItemPointerData *tid)
Buffer buffer;
Page page;
OffsetNumber offnum;
- ItemId lp = NULL;
+ ItemId lp;
HeapTupleHeader htup;
buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
@@ -6124,10 +6124,10 @@ heap_finish_speculative(Relation relation, const ItemPointerData *tid)
page = BufferGetPage(buffer);
offnum = ItemPointerGetOffsetNumber(tid);
- if (PageGetMaxOffsetNumber(page) >= offnum)
- lp = PageGetItemId(page, offnum);
-
- if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
+ if (offnum < 1 || offnum > PageGetMaxOffsetNumber(page))
+ elog(ERROR, "offnum out of range");
+ lp = PageGetItemId(page, offnum);
+ if (!ItemIdIsNormal(lp))
elog(ERROR, "invalid lp");
htup = (HeapTupleHeader) PageGetItem(page, lp);
@@ -6349,10 +6349,13 @@ heap_abort_speculative(Relation relation, const ItemPointerData *tid)
* Since this is intended for system catalogs and SERIALIZABLE doesn't cover
* DDL, this doesn't guarantee any particular predicate locking.
*
- * One could modify this to return true for tuples with delete in progress,
- * All inplace updaters take a lock that conflicts with DROP. If explicit
- * "DELETE FROM pg_class" is in progress, we'll wait for it like we would an
- * update.
+ * heap_delete() is a rarer source of blocking transactions (xwait). We'll
+ * wait for such a transaction just like for the normal heap_update() case.
+ * Normal concurrent DROP commands won't cause that, because all inplace
+ * updaters take some lock that conflicts with DROP. An explicit SQL "DELETE
+ * FROM pg_class" can cause it. By waiting, if the concurrent transaction
+ * executed both "DELETE FROM pg_class" and "INSERT INTO pg_class", our caller
+ * can find the successor tuple.
*
* Readers of inplace-updated fields expect changes to those fields are
* durable. For example, vac_truncate_clog() reads datfrozenxid from
@@ -6393,15 +6396,17 @@ heap_inplace_lock(Relation relation,
Assert(BufferIsValid(buffer));
/*
- * Construct shared cache inval if necessary. Because we pass a tuple
- * version without our own inplace changes or inplace changes other
- * sessions complete while we wait for locks, inplace update mustn't
- * change catcache lookup keys. But we aren't bothering with index
- * updates either, so that's true a fortiori. After LockBuffer(), it
- * would be too late, because this might reach a
- * CatalogCacheInitializeCache() that locks "buffer".
+ * Register shared cache invals if necessary. Other sessions may finish
+ * inplace updates of this tuple between this step and LockTuple(). Since
+ * inplace updates don't change cache keys, that's harmless.
+ *
+ * While it's tempting to register invals only after confirming we can
+ * return true, the following obstacle precludes reordering steps that
+ * way. Registering invals might reach a CatalogCacheInitializeCache()
+ * that locks "buffer". That would hang indefinitely if running after our
+ * own LockBuffer(). Hence, we must register invals before LockBuffer().
*/
- CacheInvalidateHeapTupleInplace(relation, oldtup_ptr, NULL);
+ CacheInvalidateHeapTupleInplace(relation, oldtup_ptr);
LockTuple(relation, &oldtup.t_self, InplaceUpdateTupleLock);
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
@@ -6639,10 +6644,6 @@ heap_inplace_update_and_unlock(Relation relation,
/*
* Send invalidations to shared queue. SearchSysCacheLocked1() assumes we
* do this before UnlockTuple().
- *
- * If we're mutating a tuple visible only to this transaction, there's an
- * equivalent transactional inval from the action that created the tuple,
- * and this inval is superfluous.
*/
AtInplace_Inval();
@@ -6653,10 +6654,10 @@ heap_inplace_update_and_unlock(Relation relation,
AcceptInvalidationMessages(); /* local processing of just-sent inval */
/*
- * Queue a transactional inval. The immediate invalidation we just sent
- * is the only one known to be necessary. To reduce risk from the
- * transition to immediate invalidation, continue sending a transactional
- * invalidation like we've long done. Third-party code might rely on it.
+ * Queue a transactional inval, for logical decoding and for third-party
+ * code that might have been relying on it since long before inplace
+ * update adopted immediate invalidation. See README.tuplock section
+ * "Reading inplace-updated columns" for logical decoding details.
*/
if (!IsBootstrapProcessingMode())
CacheInvalidateHeapTuple(relation, tuple, NULL);
diff --git a/src/backend/access/heap/heapam_xlog.c b/src/backend/access/heap/heapam_xlog.c
index a09fb4b803a..1823feff298 100644
--- a/src/backend/access/heap/heapam_xlog.c
+++ b/src/backend/access/heap/heapam_xlog.c
@@ -422,7 +422,7 @@ heap_xlog_delete(XLogReaderState *record)
xl_heap_delete *xlrec = (xl_heap_delete *) XLogRecGetData(record);
Buffer buffer;
Page page;
- ItemId lp = NULL;
+ ItemId lp;
HeapTupleHeader htup;
BlockNumber blkno;
RelFileLocator target_locator;
@@ -451,10 +451,10 @@ heap_xlog_delete(XLogReaderState *record)
{
page = BufferGetPage(buffer);
- if (PageGetMaxOffsetNumber(page) >= xlrec->offnum)
- lp = PageGetItemId(page, xlrec->offnum);
-
- if (PageGetMaxOffsetNumber(page) < xlrec->offnum || !ItemIdIsNormal(lp))
+ if (xlrec->offnum < 1 || xlrec->offnum > PageGetMaxOffsetNumber(page))
+ elog(PANIC, "offnum out of range");
+ lp = PageGetItemId(page, xlrec->offnum);
+ if (!ItemIdIsNormal(lp))
elog(PANIC, "invalid lp");
htup = (HeapTupleHeader) PageGetItem(page, lp);
@@ -817,7 +817,7 @@ heap_xlog_update(XLogReaderState *record, bool hot_update)
nbuffer;
Page page;
OffsetNumber offnum;
- ItemId lp = NULL;
+ ItemId lp;
HeapTupleData oldtup;
HeapTupleHeader htup;
uint16 prefixlen = 0,
@@ -881,10 +881,10 @@ heap_xlog_update(XLogReaderState *record, bool hot_update)
{
page = BufferGetPage(obuffer);
offnum = xlrec->old_offnum;
- if (PageGetMaxOffsetNumber(page) >= offnum)
- lp = PageGetItemId(page, offnum);
-
- if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
+ if (offnum < 1 || offnum > PageGetMaxOffsetNumber(page))
+ elog(PANIC, "offnum out of range");
+ lp = PageGetItemId(page, offnum);
+ if (!ItemIdIsNormal(lp))
elog(PANIC, "invalid lp");
htup = (HeapTupleHeader) PageGetItem(page, lp);
@@ -1087,7 +1087,7 @@ heap_xlog_confirm(XLogReaderState *record)
Buffer buffer;
Page page;
OffsetNumber offnum;
- ItemId lp = NULL;
+ ItemId lp;
HeapTupleHeader htup;
if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
@@ -1095,10 +1095,10 @@ heap_xlog_confirm(XLogReaderState *record)
page = BufferGetPage(buffer);
offnum = xlrec->offnum;
- if (PageGetMaxOffsetNumber(page) >= offnum)
- lp = PageGetItemId(page, offnum);
-
- if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
+ if (offnum < 1 || offnum > PageGetMaxOffsetNumber(page))
+ elog(PANIC, "offnum out of range");
+ lp = PageGetItemId(page, offnum);
+ if (!ItemIdIsNormal(lp))
elog(PANIC, "invalid lp");
htup = (HeapTupleHeader) PageGetItem(page, lp);
@@ -1126,7 +1126,7 @@ heap_xlog_lock(XLogReaderState *record)
Buffer buffer;
Page page;
OffsetNumber offnum;
- ItemId lp = NULL;
+ ItemId lp;
HeapTupleHeader htup;
/*
@@ -1155,10 +1155,10 @@ heap_xlog_lock(XLogReaderState *record)
page = BufferGetPage(buffer);
offnum = xlrec->offnum;
- if (PageGetMaxOffsetNumber(page) >= offnum)
- lp = PageGetItemId(page, offnum);
-
- if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
+ if (offnum < 1 || offnum > PageGetMaxOffsetNumber(page))
+ elog(PANIC, "offnum out of range");
+ lp = PageGetItemId(page, offnum);
+ if (!ItemIdIsNormal(lp))
elog(PANIC, "invalid lp");
htup = (HeapTupleHeader) PageGetItem(page, lp);
@@ -1200,7 +1200,7 @@ heap_xlog_lock_updated(XLogReaderState *record)
Buffer buffer;
Page page;
OffsetNumber offnum;
- ItemId lp = NULL;
+ ItemId lp;
HeapTupleHeader htup;
xlrec = (xl_heap_lock_updated *) XLogRecGetData(record);
@@ -1231,10 +1231,10 @@ heap_xlog_lock_updated(XLogReaderState *record)
page = BufferGetPage(buffer);
offnum = xlrec->offnum;
- if (PageGetMaxOffsetNumber(page) >= offnum)
- lp = PageGetItemId(page, offnum);
-
- if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
+ if (offnum < 1 || offnum > PageGetMaxOffsetNumber(page))
+ elog(PANIC, "offnum out of range");
+ lp = PageGetItemId(page, offnum);
+ if (!ItemIdIsNormal(lp))
elog(PANIC, "invalid lp");
htup = (HeapTupleHeader) PageGetItem(page, lp);
@@ -1263,7 +1263,7 @@ heap_xlog_inplace(XLogReaderState *record)
Buffer buffer;
Page page;
OffsetNumber offnum;
- ItemId lp = NULL;
+ ItemId lp;
HeapTupleHeader htup;
uint32 oldlen;
Size newlen;
@@ -1275,10 +1275,10 @@ heap_xlog_inplace(XLogReaderState *record)
page = BufferGetPage(buffer);
offnum = xlrec->offnum;
- if (PageGetMaxOffsetNumber(page) >= offnum)
- lp = PageGetItemId(page, offnum);
-
- if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
+ if (offnum < 1 || offnum > PageGetMaxOffsetNumber(page))
+ elog(PANIC, "offnum out of range");
+ lp = PageGetItemId(page, offnum);
+ if (!ItemIdIsNormal(lp))
elog(PANIC, "invalid lp");
htup = (HeapTupleHeader) PageGetItem(page, lp);
diff --git a/src/backend/access/index/genam.c b/src/backend/access/index/genam.c
index 707c25289cd..b7f10a1aed0 100644
--- a/src/backend/access/index/genam.c
+++ b/src/backend/access/index/genam.c
@@ -781,10 +781,11 @@ systable_endscan_ordered(SysScanDesc sysscan)
* systable_inplace_update_begin --- update a row "in place" (overwrite it)
*
* Overwriting violates both MVCC and transactional safety, so the uses of
- * this function in Postgres are extremely limited. Nonetheless we find some
- * places to use it. See README.tuplock section "Locking to write
- * inplace-updated tables" and later sections for expectations of readers and
- * writers of a table that gets inplace updates. Standard flow:
+ * this function in Postgres are extremely limited. This makes no effort to
+ * support updating cache key columns or other indexed columns. Nonetheless
+ * we find some places to use it. See README.tuplock section "Locking to
+ * write inplace-updated tables" and later sections for expectations of
+ * readers and writers of a table that gets inplace updates. Standard flow:
*
* ... [any slow preparation not requiring oldtup] ...
* systable_inplace_update_begin([...], &tup, &inplace_state);
diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c
index aec71093661..7a416d60cea 100644
--- a/src/backend/access/nbtree/nbtsearch.c
+++ b/src/backend/access/nbtree/nbtsearch.c
@@ -2148,6 +2148,9 @@ _bt_get_endpoint(Relation rel, uint32 level, bool rightmost)
else
offnum = P_FIRSTDATAKEY(opaque);
+ if (offnum < 1 || offnum > PageGetMaxOffsetNumber(page))
+ elog(PANIC, "offnum out of range");
+
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
blkno = BTreeTupleGetDownLink(itup);
diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c
index f4fab7edfee..34956a5a663 100644
--- a/src/backend/access/transam/multixact.c
+++ b/src/backend/access/transam/multixact.c
@@ -1262,6 +1262,11 @@ GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members,
ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED),
errmsg("MultiXact %u has invalid next offset", multi)));
+ if (nextMXOffset == offset)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATA_CORRUPTED),
+ errmsg("MultiXact %u with offset (%" PRIu64 ") has zero members",
+ multi, offset)));
if (nextMXOffset < offset)
ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED),
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 6a5640df51a..430a38b1a21 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -7001,6 +7001,10 @@ CreateCheckPoint(int flags)
*/
SyncPreCheckpoint();
+ /* Run these points outside the critical section. */
+ INJECTION_POINT("create-checkpoint-initial", NULL);
+ INJECTION_POINT_LOAD("create-checkpoint-run");
+
/*
* Use a critical section to force system panic if we have trouble.
*/
@@ -7151,6 +7155,8 @@ CreateCheckPoint(int flags)
if (log_checkpoints)
LogCheckpointStart(flags, false);
+ INJECTION_POINT_CACHED("create-checkpoint-run", NULL);
+
/* Update the process title */
update_checkpoint_display(flags, false, false);
diff --git a/src/backend/access/transam/xlogrecovery.c b/src/backend/access/transam/xlogrecovery.c
index ae2398d6975..38b594d2170 100644
--- a/src/backend/access/transam/xlogrecovery.c
+++ b/src/backend/access/transam/xlogrecovery.c
@@ -805,6 +805,16 @@ InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr,
}
memcpy(&checkPoint, XLogRecGetData(xlogreader), sizeof(CheckPoint));
wasShutdown = ((record->xl_info & ~XLR_INFO_MASK) == XLOG_CHECKPOINT_SHUTDOWN);
+
+ /* Make sure that REDO location exists. */
+ if (checkPoint.redo < CheckPointLoc)
+ {
+ XLogPrefetcherBeginRead(xlogprefetcher, checkPoint.redo);
+ if (!ReadRecord(xlogprefetcher, LOG, false, checkPoint.ThisTimeLineID))
+ ereport(FATAL,
+ errmsg("could not find redo location %X/%08X referenced by checkpoint record at %X/%08X",
+ LSN_FORMAT_ARGS(checkPoint.redo), LSN_FORMAT_ARGS(CheckPointLoc)));
+ }
}
if (ArchiveRecoveryRequested)
diff --git a/src/backend/replication/logical/decode.c b/src/backend/replication/logical/decode.c
index cc03f0706e9..5e15cb1825e 100644
--- a/src/backend/replication/logical/decode.c
+++ b/src/backend/replication/logical/decode.c
@@ -521,18 +521,9 @@ heap_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
/*
* Inplace updates are only ever performed on catalog tuples and
- * can, per definition, not change tuple visibility. Inplace
- * updates don't affect storage or interpretation of table rows,
- * so they don't affect logicalrep_write_tuple() outcomes. Hence,
- * we don't process invalidations from the original operation. If
- * inplace updates did affect those things, invalidations wouldn't
- * make it work, since there are no snapshot-specific versions of
- * inplace-updated values. Since we also don't decode catalog
- * tuples, we're not interested in the record's contents.
- *
- * WAL contains likely-unnecessary commit-time invals from the
- * CacheInvalidateHeapTuple() call in
- * heap_inplace_update_and_unlock(). Excess invalidation is safe.
+ * can, per definition, not change tuple visibility. Since we
+ * also don't decode catalog tuples, we're not interested in the
+ * record's contents.
*/
break;
diff --git a/src/backend/storage/ipc/dsm_registry.c b/src/backend/storage/ipc/dsm_registry.c
index 66240318e83..072f9399969 100644
--- a/src/backend/storage/ipc/dsm_registry.c
+++ b/src/backend/storage/ipc/dsm_registry.c
@@ -180,11 +180,13 @@ init_dsm_registry(void)
* Initialize or attach a named DSM segment.
*
* This routine returns the address of the segment. init_callback is called to
- * initialize the segment when it is first created.
+ * initialize the segment when it is first created. 'arg' is passed through to
+ * the initialization callback function.
*/
void *
GetNamedDSMSegment(const char *name, size_t size,
- void (*init_callback) (void *ptr), bool *found)
+ void (*init_callback) (void *ptr, void *arg),
+ bool *found, void *arg)
{
DSMRegistryEntry *entry;
MemoryContext oldcontext;
@@ -235,7 +237,7 @@ GetNamedDSMSegment(const char *name, size_t size,
seg = dsm_create(size, 0);
if (init_callback)
- (*init_callback) (dsm_segment_address(seg));
+ (*init_callback) (dsm_segment_address(seg), arg);
dsm_pin_segment(seg);
dsm_pin_mapping(seg);
diff --git a/src/backend/utils/adt/bytea.c b/src/backend/utils/adt/bytea.c
index 6e7b914c563..f8524548e46 100644
--- a/src/backend/utils/adt/bytea.c
+++ b/src/backend/utils/adt/bytea.c
@@ -15,18 +15,19 @@
#include "postgres.h"
#include "access/detoast.h"
-#include "catalog/pg_collation_d.h"
-#include "catalog/pg_type_d.h"
+#include "common/hashfn.h"
#include "common/int.h"
#include "fmgr.h"
+#include "lib/hyperloglog.h"
#include "libpq/pqformat.h"
#include "port/pg_bitutils.h"
+#include "port/pg_bswap.h"
#include "utils/builtins.h"
#include "utils/bytea.h"
#include "utils/fmgrprotos.h"
+#include "utils/guc.h"
#include "utils/memutils.h"
#include "utils/sortsupport.h"
-#include "utils/varlena.h"
#include "varatt.h"
/* GUC variable */
@@ -37,6 +38,19 @@ static bytea *bytea_substring(Datum str, int S, int L,
bool length_not_specified);
static bytea *bytea_overlay(bytea *t1, bytea *t2, int sp, int sl);
+typedef struct
+{
+ bool abbreviate; /* Should we abbreviate keys? */
+ hyperLogLogState abbr_card; /* Abbreviated key cardinality state */
+ hyperLogLogState full_card; /* Full key cardinality state */
+ double prop_card; /* Required cardinality proportion */
+} ByteaSortSupport;
+
+/* Static function declarations for sort support */
+static int byteafastcmp(Datum x, Datum y, SortSupport ssup);
+static Datum bytea_abbrev_convert(Datum original, SortSupport ssup);
+static bool bytea_abbrev_abort(int memtupcount, SortSupport ssup);
+
/*
* bytea_catenate
* Guts of byteacat(), broken out so it can be used by other functions
@@ -1001,6 +1015,201 @@ bytea_smaller(PG_FUNCTION_ARGS)
PG_RETURN_BYTEA_P(result);
}
+/*
+ * sortsupport comparison func
+ */
+static int
+byteafastcmp(Datum x, Datum y, SortSupport ssup)
+{
+ bytea *arg1 = DatumGetByteaPP(x);
+ bytea *arg2 = DatumGetByteaPP(y);
+ char *a1p,
+ *a2p;
+ int len1,
+ len2,
+ result;
+
+ a1p = VARDATA_ANY(arg1);
+ a2p = VARDATA_ANY(arg2);
+
+ len1 = VARSIZE_ANY_EXHDR(arg1);
+ len2 = VARSIZE_ANY_EXHDR(arg2);
+
+ result = memcmp(a1p, a2p, Min(len1, len2));
+ if ((result == 0) && (len1 != len2))
+ result = (len1 < len2) ? -1 : 1;
+
+ /* We can't afford to leak memory here. */
+ if (PointerGetDatum(arg1) != x)
+ pfree(arg1);
+ if (PointerGetDatum(arg2) != y)
+ pfree(arg2);
+
+ return result;
+}
+
+/*
+ * Conversion routine for sortsupport. Converts original to abbreviated key
+ * representation. Our encoding strategy is simple -- pack the first 8 bytes
+ * of the bytea data into a Datum (on little-endian machines, the bytes are
+ * stored in reverse order), and treat it as an unsigned integer.
+ */
+static Datum
+bytea_abbrev_convert(Datum original, SortSupport ssup)
+{
+ const size_t max_prefix_bytes = sizeof(Datum);
+ ByteaSortSupport *bss = (ByteaSortSupport *) ssup->ssup_extra;
+ bytea *authoritative = DatumGetByteaPP(original);
+ char *authoritative_data = VARDATA_ANY(authoritative);
+ Datum res;
+ char *pres;
+ int len;
+ uint32 hash;
+
+ pres = (char *) &res;
+
+ /* memset(), so any non-overwritten bytes are NUL */
+ memset(pres, 0, max_prefix_bytes);
+ len = VARSIZE_ANY_EXHDR(authoritative);
+
+ /*
+ * Short byteas will have terminating NUL bytes in the abbreviated datum.
+ * Abbreviated comparison need not make a distinction between these NUL
+ * bytes, and NUL bytes representing actual NULs in the authoritative
+ * representation.
+ *
+ * Hopefully a comparison at or past one abbreviated key's terminating NUL
+ * byte will resolve the comparison without consulting the authoritative
+ * representation; specifically, some later non-NUL byte in the longer
+ * bytea can resolve the comparison against a subsequent terminating NUL
+ * in the shorter bytea. There will usually be what is effectively a
+ * "length-wise" resolution there and then.
+ *
+ * If that doesn't work out -- if all bytes in the longer bytea positioned
+ * at or past the offset of the smaller bytea (first) terminating NUL are
+ * actually representative of NUL bytes in the authoritative binary bytea
+ * (perhaps with some *terminating* NUL bytes towards the end of the
+ * longer bytea iff it happens to still be small) -- then an authoritative
+ * tie-breaker will happen, and do the right thing: explicitly consider
+ * bytea length.
+ */
+ memcpy(pres, authoritative_data, Min(len, max_prefix_bytes));
+
+ /*
+ * Maintain approximate cardinality of both abbreviated keys and original,
+ * authoritative keys using HyperLogLog. Used as cheap insurance against
+ * the worst case, where we do many string abbreviations for no saving in
+ * full memcmp()-based comparisons. These statistics are used by
+ * bytea_abbrev_abort().
+ *
+ * First, Hash key proper, or a significant fraction of it. Mix in length
+ * in order to compensate for cases where differences are past
+ * PG_CACHE_LINE_SIZE bytes, so as to limit the overhead of hashing.
+ */
+ hash = DatumGetUInt32(hash_any((unsigned char *) authoritative_data,
+ Min(len, PG_CACHE_LINE_SIZE)));
+
+ if (len > PG_CACHE_LINE_SIZE)
+ hash ^= DatumGetUInt32(hash_uint32((uint32) len));
+
+ addHyperLogLog(&bss->full_card, hash);
+
+ /* Hash abbreviated key */
+ {
+ uint32 tmp;
+
+ tmp = DatumGetUInt32(res) ^ (uint32) (DatumGetUInt64(res) >> 32);
+ hash = DatumGetUInt32(hash_uint32(tmp));
+ }
+
+ addHyperLogLog(&bss->abbr_card, hash);
+
+ /*
+ * Byteswap on little-endian machines.
+ *
+ * This is needed so that ssup_datum_unsigned_cmp() works correctly on all
+ * platforms.
+ */
+ res = DatumBigEndianToNative(res);
+
+ /* Don't leak memory here */
+ if (PointerGetDatum(authoritative) != original)
+ pfree(authoritative);
+
+ return res;
+}
+
+/*
+ * Callback for estimating effectiveness of abbreviated key optimization, using
+ * heuristic rules. Returns value indicating if the abbreviation optimization
+ * should be aborted, based on its projected effectiveness.
+ *
+ * This is based on varstr_abbrev_abort(), but some comments have been elided
+ * for brevity. See there for more details.
+ */
+static bool
+bytea_abbrev_abort(int memtupcount, SortSupport ssup)
+{
+ ByteaSortSupport *bss = (ByteaSortSupport *) ssup->ssup_extra;
+ double abbrev_distinct,
+ key_distinct;
+
+ Assert(ssup->abbreviate);
+
+ /* Have a little patience */
+ if (memtupcount < 100)
+ return false;
+
+ abbrev_distinct = estimateHyperLogLog(&bss->abbr_card);
+ key_distinct = estimateHyperLogLog(&bss->full_card);
+
+ /*
+ * Clamp cardinality estimates to at least one distinct value. While
+ * NULLs are generally disregarded, if only NULL values were seen so far,
+ * that might misrepresent costs if we failed to clamp.
+ */
+ if (abbrev_distinct < 1.0)
+ abbrev_distinct = 1.0;
+
+ if (key_distinct < 1.0)
+ key_distinct = 1.0;
+
+ if (trace_sort)
+ {
+ double norm_abbrev_card = abbrev_distinct / (double) memtupcount;
+
+ elog(LOG, "bytea_abbrev: abbrev_distinct after %d: %f "
+ "(key_distinct: %f, norm_abbrev_card: %f, prop_card: %f)",
+ memtupcount, abbrev_distinct, key_distinct, norm_abbrev_card,
+ bss->prop_card);
+ }
+
+ /*
+ * If the number of distinct abbreviated keys approximately matches the
+ * number of distinct original keys, continue with abbreviation.
+ */
+ if (abbrev_distinct > key_distinct * bss->prop_card)
+ {
+ /*
+ * Decay required cardinality aggressively after 10,000 tuples.
+ */
+ if (memtupcount > 10000)
+ bss->prop_card *= 0.65;
+
+ return false;
+ }
+
+ /*
+ * Abort abbreviation strategy.
+ */
+ if (trace_sort)
+ elog(LOG, "bytea_abbrev: aborted abbreviation at %d "
+ "(abbrev_distinct: %f, key_distinct: %f, prop_card: %f)",
+ memtupcount, abbrev_distinct, key_distinct, bss->prop_card);
+
+ return true;
+}
+
Datum
bytea_sortsupport(PG_FUNCTION_ARGS)
{
@@ -1009,8 +1218,27 @@ bytea_sortsupport(PG_FUNCTION_ARGS)
oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
- /* Use generic string SortSupport, forcing "C" collation */
- varstr_sortsupport(ssup, BYTEAOID, C_COLLATION_OID);
+ ssup->comparator = byteafastcmp;
+
+ /*
+ * Set up abbreviation support if requested.
+ */
+ if (ssup->abbreviate)
+ {
+ ByteaSortSupport *bss;
+
+ bss = palloc_object(ByteaSortSupport);
+ bss->abbreviate = true;
+ bss->prop_card = 0.20;
+ initHyperLogLog(&bss->abbr_card, 10);
+ initHyperLogLog(&bss->full_card, 10);
+
+ ssup->ssup_extra = bss;
+ ssup->abbrev_full_comparator = ssup->comparator;
+ ssup->comparator = ssup_datum_unsigned_cmp;
+ ssup->abbrev_converter = bytea_abbrev_convert;
+ ssup->abbrev_abort = bytea_abbrev_abort;
+ }
MemoryContextSwitchTo(oldcontext);
diff --git a/src/backend/utils/adt/like_support.c b/src/backend/utils/adt/like_support.c
index dca1d9be035..b1b0192aa46 100644
--- a/src/backend/utils/adt/like_support.c
+++ b/src/backend/utils/adt/like_support.c
@@ -99,8 +99,6 @@ static Selectivity like_selectivity(const char *patt, int pattlen,
static Selectivity regex_selectivity(const char *patt, int pattlen,
bool case_insensitive,
int fixed_prefix_len);
-static int pattern_char_isalpha(char c, bool is_multibyte,
- pg_locale_t locale);
static Const *make_greater_string(const Const *str_const, FmgrInfo *ltproc,
Oid collation);
static Datum string_to_datum(const char *str, Oid datatype);
@@ -986,8 +984,8 @@ icnlikejoinsel(PG_FUNCTION_ARGS)
*/
static Pattern_Prefix_Status
-like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
- Const **prefix_const, Selectivity *rest_selec)
+like_fixed_prefix(Const *patt_const, Const **prefix_const,
+ Selectivity *rest_selec)
{
char *match;
char *patt;
@@ -995,34 +993,10 @@ like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
Oid typeid = patt_const->consttype;
int pos,
match_pos;
- bool is_multibyte = (pg_database_encoding_max_length() > 1);
- pg_locale_t locale = 0;
/* the right-hand const is type text or bytea */
Assert(typeid == BYTEAOID || typeid == TEXTOID);
- if (case_insensitive)
- {
- if (typeid == BYTEAOID)
- ereport(ERROR,
- (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("case insensitive matching not supported on type bytea")));
-
- if (!OidIsValid(collation))
- {
- /*
- * This typically means that the parser could not resolve a
- * conflict of implicit collations, so report it that way.
- */
- ereport(ERROR,
- (errcode(ERRCODE_INDETERMINATE_COLLATION),
- errmsg("could not determine which collation to use for ILIKE"),
- errhint("Use the COLLATE clause to set the collation explicitly.")));
- }
-
- locale = pg_newlocale_from_collation(collation);
- }
-
if (typeid != BYTEAOID)
{
patt = TextDatumGetCString(patt_const->constvalue);
@@ -1055,11 +1029,6 @@ like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
break;
}
- /* Stop if case-varying character (it's sort of a wildcard) */
- if (case_insensitive &&
- pattern_char_isalpha(patt[pos], is_multibyte, locale))
- break;
-
match[match_pos++] = patt[pos];
}
@@ -1071,8 +1040,7 @@ like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
*prefix_const = string_to_bytea_const(match, match_pos);
if (rest_selec != NULL)
- *rest_selec = like_selectivity(&patt[pos], pattlen - pos,
- case_insensitive);
+ *rest_selec = like_selectivity(&patt[pos], pattlen - pos, false);
pfree(patt);
pfree(match);
@@ -1087,6 +1055,112 @@ like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
return Pattern_Prefix_None;
}
+/*
+ * Case-insensitive variant of like_fixed_prefix(). Multibyte and
+ * locale-aware for detecting cased characters.
+ */
+static Pattern_Prefix_Status
+like_fixed_prefix_ci(Const *patt_const, Oid collation, Const **prefix_const,
+ Selectivity *rest_selec)
+{
+ text *val = DatumGetTextPP(patt_const->constvalue);
+ Oid typeid = patt_const->consttype;
+ int nbytes = VARSIZE_ANY_EXHDR(val);
+ int wpos;
+ pg_wchar *wpatt;
+ int wpattlen;
+ pg_wchar *wmatch;
+ int wmatch_pos = 0;
+ char *match;
+ int match_mblen;
+ pg_locale_t locale = 0;
+
+ /* the right-hand const is type text or bytea */
+ Assert(typeid == BYTEAOID || typeid == TEXTOID);
+
+ if (typeid == BYTEAOID)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("case insensitive matching not supported on type bytea")));
+
+ if (!OidIsValid(collation))
+ {
+ /*
+ * This typically means that the parser could not resolve a conflict
+ * of implicit collations, so report it that way.
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_INDETERMINATE_COLLATION),
+ errmsg("could not determine which collation to use for ILIKE"),
+ errhint("Use the COLLATE clause to set the collation explicitly.")));
+ }
+
+ locale = pg_newlocale_from_collation(collation);
+
+ wpatt = palloc((nbytes + 1) * sizeof(pg_wchar));
+ wpattlen = pg_mb2wchar_with_len(VARDATA_ANY(val), wpatt, nbytes);
+
+ wmatch = palloc((nbytes + 1) * sizeof(pg_wchar));
+ for (wpos = 0; wpos < wpattlen; wpos++)
+ {
+ /* % and _ are wildcard characters in LIKE */
+ if (wpatt[wpos] == '%' ||
+ wpatt[wpos] == '_')
+ break;
+
+ /* Backslash escapes the next character */
+ if (wpatt[wpos] == '\\')
+ {
+ wpos++;
+ if (wpos >= wpattlen)
+ break;
+ }
+
+ /*
+ * For ILIKE, stop if it's a case-varying character (it's sort of a
+ * wildcard).
+ */
+ if (pg_iswcased(wpatt[wpos], locale))
+ break;
+
+ wmatch[wmatch_pos++] = wpatt[wpos];
+ }
+
+ wmatch[wmatch_pos] = '\0';
+
+ match = palloc(pg_database_encoding_max_length() * wmatch_pos + 1);
+ match_mblen = pg_wchar2mb_with_len(wmatch, match, wmatch_pos);
+ match[match_mblen] = '\0';
+ pfree(wmatch);
+
+ *prefix_const = string_to_const(match, TEXTOID);
+ pfree(match);
+
+ if (rest_selec != NULL)
+ {
+ int wrestlen = wpattlen - wmatch_pos;
+ char *rest;
+ int rest_mblen;
+
+ rest = palloc(pg_database_encoding_max_length() * wrestlen + 1);
+ rest_mblen = pg_wchar2mb_with_len(&wpatt[wmatch_pos], rest, wrestlen);
+
+ *rest_selec = like_selectivity(rest, rest_mblen, true);
+ pfree(rest);
+ }
+
+ pfree(wpatt);
+
+ /* in LIKE, an empty pattern is an exact match! */
+ if (wpos == wpattlen)
+ return Pattern_Prefix_Exact; /* reached end of pattern, so exact */
+
+ if (wmatch_pos > 0)
+ return Pattern_Prefix_Partial;
+
+ return Pattern_Prefix_None;
+}
+
static Pattern_Prefix_Status
regex_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
Const **prefix_const, Selectivity *rest_selec)
@@ -1164,12 +1238,11 @@ pattern_fixed_prefix(Const *patt, Pattern_Type ptype, Oid collation,
switch (ptype)
{
case Pattern_Type_Like:
- result = like_fixed_prefix(patt, false, collation,
- prefix, rest_selec);
+ result = like_fixed_prefix(patt, prefix, rest_selec);
break;
case Pattern_Type_Like_IC:
- result = like_fixed_prefix(patt, true, collation,
- prefix, rest_selec);
+ result = like_fixed_prefix_ci(patt, collation, prefix,
+ rest_selec);
break;
case Pattern_Type_Regex:
result = regex_fixed_prefix(patt, false, collation,
@@ -1481,24 +1554,6 @@ regex_selectivity(const char *patt, int pattlen, bool case_insensitive,
return sel;
}
-/*
- * Check whether char is a letter (and, hence, subject to case-folding)
- *
- * In multibyte character sets or with ICU, we can't use isalpha, and it does
- * not seem worth trying to convert to wchar_t to use iswalpha or u_isalpha.
- * Instead, just assume any non-ASCII char is potentially case-varying, and
- * hard-wire knowledge of which ASCII chars are letters.
- */
-static int
-pattern_char_isalpha(char c, bool is_multibyte,
- pg_locale_t locale)
-{
- if (locale->ctype_is_c)
- return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
- else
- return char_is_cased(c, locale);
-}
-
/*
* For bytea, the increment function need only increment the current byte
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index 70933ee3843..8a3796aa5d0 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -1626,21 +1626,6 @@ pg_towlower(pg_wchar wc, pg_locale_t locale)
}
/*
- * char_is_cased()
- *
- * Fuzzy test of whether the given char is case-varying or not. The argument
- * is a single byte, so in a multibyte encoding, just assume any non-ASCII
- * char is case-varying.
- */
-bool
-char_is_cased(char ch, pg_locale_t locale)
-{
- if (locale->ctype == NULL)
- return (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z');
- return locale->ctype->char_is_cased(ch, locale);
-}
-
-/*
* Return required encoding ID for the given locale, or -1 if any encoding is
* valid for the locale.
*/
diff --git a/src/backend/utils/adt/pg_locale_builtin.c b/src/backend/utils/adt/pg_locale_builtin.c
index 0d4c754a267..0c2920112bb 100644
--- a/src/backend/utils/adt/pg_locale_builtin.c
+++ b/src/backend/utils/adt/pg_locale_builtin.c
@@ -191,13 +191,6 @@ wc_iscased_builtin(pg_wchar wc, pg_locale_t locale)
return pg_u_prop_cased(to_char32(wc));
}
-static bool
-char_is_cased_builtin(char ch, pg_locale_t locale)
-{
- return IS_HIGHBIT_SET(ch) ||
- (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z');
-}
-
static pg_wchar
wc_toupper_builtin(pg_wchar wc, pg_locale_t locale)
{
@@ -225,7 +218,6 @@ static const struct ctype_methods ctype_methods_builtin = {
.wc_ispunct = wc_ispunct_builtin,
.wc_isspace = wc_isspace_builtin,
.wc_isxdigit = wc_isxdigit_builtin,
- .char_is_cased = char_is_cased_builtin,
.wc_iscased = wc_iscased_builtin,
.wc_tolower = wc_tolower_builtin,
.wc_toupper = wc_toupper_builtin,
diff --git a/src/backend/utils/adt/pg_locale_icu.c b/src/backend/utils/adt/pg_locale_icu.c
index e8820666b2d..18d026deda8 100644
--- a/src/backend/utils/adt/pg_locale_icu.c
+++ b/src/backend/utils/adt/pg_locale_icu.c
@@ -121,13 +121,6 @@ static int32_t u_strFoldCase_default(UChar *dest, int32_t destCapacity,
const char *locale,
UErrorCode *pErrorCode);
-static bool
-char_is_cased_icu(char ch, pg_locale_t locale)
-{
- return IS_HIGHBIT_SET(ch) ||
- (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z');
-}
-
/*
* XXX: many of the functions below rely on casts directly from pg_wchar to
* UChar32, which is correct for the UTF-8 encoding, but not in general.
@@ -244,7 +237,6 @@ static const struct ctype_methods ctype_methods_icu = {
.wc_ispunct = wc_ispunct_icu,
.wc_isspace = wc_isspace_icu,
.wc_isxdigit = wc_isxdigit_icu,
- .char_is_cased = char_is_cased_icu,
.wc_iscased = wc_iscased_icu,
.wc_toupper = toupper_icu,
.wc_tolower = tolower_icu,
diff --git a/src/backend/utils/adt/pg_locale_libc.c b/src/backend/utils/adt/pg_locale_libc.c
index 3d841f818a5..3baa5816b5f 100644
--- a/src/backend/utils/adt/pg_locale_libc.c
+++ b/src/backend/utils/adt/pg_locale_libc.c
@@ -262,17 +262,6 @@ wc_iscased_libc_mb(pg_wchar wc, pg_locale_t locale)
iswlower_l((wint_t) wc, locale->lt);
}
-static bool
-char_is_cased_libc(char ch, pg_locale_t locale)
-{
- bool is_multibyte = pg_database_encoding_max_length() > 1;
-
- if (is_multibyte && IS_HIGHBIT_SET(ch))
- return true;
- else
- return isalpha_l((unsigned char) ch, locale->lt);
-}
-
static pg_wchar
toupper_libc_sb(pg_wchar wc, pg_locale_t locale)
{
@@ -345,7 +334,6 @@ static const struct ctype_methods ctype_methods_libc_sb = {
.wc_ispunct = wc_ispunct_libc_sb,
.wc_isspace = wc_isspace_libc_sb,
.wc_isxdigit = wc_isxdigit_libc_sb,
- .char_is_cased = char_is_cased_libc,
.wc_iscased = wc_iscased_libc_sb,
.wc_toupper = toupper_libc_sb,
.wc_tolower = tolower_libc_sb,
@@ -371,7 +359,6 @@ static const struct ctype_methods ctype_methods_libc_other_mb = {
.wc_ispunct = wc_ispunct_libc_sb,
.wc_isspace = wc_isspace_libc_sb,
.wc_isxdigit = wc_isxdigit_libc_sb,
- .char_is_cased = char_is_cased_libc,
.wc_iscased = wc_iscased_libc_sb,
.wc_toupper = toupper_libc_sb,
.wc_tolower = tolower_libc_sb,
@@ -393,7 +380,6 @@ static const struct ctype_methods ctype_methods_libc_utf8 = {
.wc_ispunct = wc_ispunct_libc_mb,
.wc_isspace = wc_isspace_libc_mb,
.wc_isxdigit = wc_isxdigit_libc_mb,
- .char_is_cased = char_is_cased_libc,
.wc_iscased = wc_iscased_libc_mb,
.wc_toupper = toupper_libc_mb,
.wc_tolower = tolower_libc_mb,
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index baa5b44ea8d..8adeb8dadc6 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -92,7 +92,7 @@ typedef struct
int last_returned; /* Last comparison result (cache) */
bool cache_blob; /* Does buf2 contain strxfrm() blob, etc? */
bool collate_c;
- Oid typid; /* Actual datatype (text/bpchar/bytea/name) */
+ Oid typid; /* Actual datatype (text/bpchar/name) */
hyperLogLogState abbr_card; /* Abbreviated key cardinality state */
hyperLogLogState full_card; /* Full key cardinality state */
double prop_card; /* Required cardinality proportion */
@@ -1617,10 +1617,8 @@ bttextsortsupport(PG_FUNCTION_ARGS)
* Includes locale support, and support for BpChar semantics (i.e. removing
* trailing spaces before comparison).
*
- * Relies on the assumption that text, VarChar, BpChar, and bytea all have the
- * same representation. Callers that always use the C collation (e.g.
- * non-collatable type callers like bytea) may have NUL bytes in their strings;
- * this will not work with any other collation, though.
+ * Relies on the assumption that text, VarChar, and BpChar all have the
+ * same representation.
*/
void
varstr_sortsupport(SortSupport ssup, Oid typid, Oid collid)
@@ -1983,7 +1981,7 @@ varstrfastcmp_locale(char *a1p, int len1, char *a2p, int len2, SortSupport ssup)
* representation. Our encoding strategy is simple -- pack the first 8 bytes
* of a strxfrm() blob into a Datum (on little-endian machines, the 8 bytes are
* stored in reverse order), and treat it as an unsigned integer. When the "C"
- * locale is used, or in case of bytea, just memcpy() from original instead.
+ * locale is used just memcpy() from original instead.
*/
static Datum
varstr_abbrev_convert(Datum original, SortSupport ssup)
@@ -2010,30 +2008,8 @@ varstr_abbrev_convert(Datum original, SortSupport ssup)
/*
* If we're using the C collation, use memcpy(), rather than strxfrm(), to
- * abbreviate keys. The full comparator for the C locale is always
- * memcmp(). It would be incorrect to allow bytea callers (callers that
- * always force the C collation -- bytea isn't a collatable type, but this
- * approach is convenient) to use strxfrm(). This is because bytea
- * strings may contain NUL bytes. Besides, this should be faster, too.
- *
- * More generally, it's okay that bytea callers can have NUL bytes in
- * strings because abbreviated cmp need not make a distinction between
- * terminating NUL bytes, and NUL bytes representing actual NULs in the
- * authoritative representation. Hopefully a comparison at or past one
- * abbreviated key's terminating NUL byte will resolve the comparison
- * without consulting the authoritative representation; specifically, some
- * later non-NUL byte in the longer string can resolve the comparison
- * against a subsequent terminating NUL in the shorter string. There will
- * usually be what is effectively a "length-wise" resolution there and
- * then.
- *
- * If that doesn't work out -- if all bytes in the longer string
- * positioned at or past the offset of the smaller string's (first)
- * terminating NUL are actually representative of NUL bytes in the
- * authoritative binary string (perhaps with some *terminating* NUL bytes
- * towards the end of the longer string iff it happens to still be small)
- * -- then an authoritative tie-breaker will happen, and do the right
- * thing: explicitly consider string length.
+ * abbreviate keys. The full comparator for the C locale is also
+ * memcmp(). This should be faster than strxfrm().
*/
if (sss->collate_c)
memcpy(pres, authoritative_data, Min(len, max_prefix_bytes));
@@ -2115,9 +2091,6 @@ varstr_abbrev_convert(Datum original, SortSupport ssup)
* strxfrm() blob is itself NUL terminated, leaving no danger of
* misinterpreting any NUL bytes not intended to be interpreted as
* logically representing termination.
- *
- * (Actually, even if there were NUL bytes in the blob it would be
- * okay. See remarks on bytea case above.)
*/
memcpy(pres, sss->buf2, Min(max_prefix_bytes, bsize));
}
@@ -2198,10 +2171,10 @@ varstr_abbrev_abort(int memtupcount, SortSupport ssup)
* NULLs are generally disregarded, if only NULL values were seen so far,
* that might misrepresent costs if we failed to clamp.
*/
- if (abbrev_distinct <= 1.0)
+ if (abbrev_distinct < 1.0)
abbrev_distinct = 1.0;
- if (key_distinct <= 1.0)
+ if (key_distinct < 1.0)
key_distinct = 1.0;
/*
diff --git a/src/backend/utils/cache/inval.c b/src/backend/utils/cache/inval.c
index 9d16ca10ae1..8f7a56d0f2c 100644
--- a/src/backend/utils/cache/inval.c
+++ b/src/backend/utils/cache/inval.c
@@ -1583,13 +1583,17 @@ CacheInvalidateHeapTuple(Relation relation,
* implied.
*
* Like CacheInvalidateHeapTuple(), but for inplace updates.
+ *
+ * Just before and just after the inplace update, the tuple's cache keys must
+ * match those in key_equivalent_tuple. Cache keys consist of catcache lookup
+ * key columns and columns referencing pg_class.oid values,
+ * e.g. pg_constraint.conrelid, which would trigger relcache inval.
*/
void
CacheInvalidateHeapTupleInplace(Relation relation,
- HeapTuple tuple,
- HeapTuple newtuple)
+ HeapTuple key_equivalent_tuple)
{
- CacheInvalidateHeapTupleCommon(relation, tuple, newtuple,
+ CacheInvalidateHeapTupleCommon(relation, key_equivalent_tuple, NULL,
PrepareInplaceInvalidationState);
}
diff --git a/src/bin/pg_upgrade/multixact_read_v18.c b/src/bin/pg_upgrade/multixact_read_v18.c
index f7ed97f702b..dc4ed1b3ae6 100644
--- a/src/bin/pg_upgrade/multixact_read_v18.c
+++ b/src/bin/pg_upgrade/multixact_read_v18.c
@@ -146,11 +146,14 @@ AllocOldMultiXactRead(char *pgdata, MultiXactId nextMulti,
* - Because there's no concurrent activity, we don't need to worry about
* locking and some corner cases.
*
- * - Don't bail out on invalid entries. If the server crashes, it can leave
- * invalid or half-written entries on disk. Such multixids won't appear
- * anywhere else on disk, so the server will never try to read them. During
- * upgrade, however, we scan through all multixids in order, and will
- * encounter such invalid but unreferenced multixids too.
+ * - Don't bail out on invalid entries that could've been left behind after a
+ * server crash. Such multixids won't appear anywhere else on disk, so the
+ * server will never try to read them. During upgrade, however, we scan
+ * through all multixids in order, and will encounter such invalid but
+ * unreferenced multixids too. We try to distinguish between entries that
+ * are invalid because of missed disk writes, like entries with zeros in
+ * offsets or members, and entries that look corrupt in other ways that
+ * should not happen even on a server crash.
*
* Returns true on success, false if the multixact was invalid.
*/
@@ -211,7 +214,7 @@ GetOldMultiXactIdSingleMember(OldMultiXactReader *state, MultiXactId multi,
if (offset == 0)
{
- /* Invalid entry */
+ /* Invalid entry. These can be left behind on a server crash. */
return false;
}
@@ -247,11 +250,29 @@ GetOldMultiXactIdSingleMember(OldMultiXactReader *state, MultiXactId multi,
if (nextMXOffset == 0)
{
- /* Invalid entry */
+ /* Invalid entry. These can be left behind on a server crash. */
return false;
}
length = nextMXOffset - offset;
+ if (length < 0)
+ {
+ /*
+ * This entry is corrupt. We should not see these even after a server
+ * crash.
+ */
+ pg_fatal("multixact %u has an invalid length (%d)", multi, length);
+ }
+ if (length == 0)
+ {
+ /*
+ * Invalid entry. The server never writes multixids with zero
+ * members, but it's not clear if a server crash or using pg_resetwal
+ * could leave them behind. Seems best to accept them.
+ */
+ return false;
+ }
+
/* read the members */
prev_pageno = -1;
for (int i = 0; i < length; i++, offset++)
@@ -284,10 +305,11 @@ GetOldMultiXactIdSingleMember(OldMultiXactReader *state, MultiXactId multi,
/*
* Otherwise this is an invalid entry that should not be
- * referenced from anywhere in the heap. We could return 'false'
- * here, but we prefer to continue reading the members and
- * converting them the best we can, to preserve evidence in case
- * this is corruption that should not happen.
+ * referenced from anywhere in the heap. These can be left behind
+ * on a server crash. We could return 'false' here, but we prefer
+ * to continue reading the members and converting them the best we
+ * can, to preserve evidence in case this is corruption that
+ * should not have happened.
*/
}
diff --git a/src/bin/pg_upgrade/t/007_multixact_conversion.pl b/src/bin/pg_upgrade/t/007_multixact_conversion.pl
index 443b93c7545..14db40e45ed 100644
--- a/src/bin/pg_upgrade/t/007_multixact_conversion.pl
+++ b/src/bin/pg_upgrade/t/007_multixact_conversion.pl
@@ -26,8 +26,9 @@ my $tempdir = PostgreSQL::Test::Utils::tempdir;
# upgrading them. The workload is a mix of KEY SHARE locking queries
# and UPDATEs, and commits and aborts, to generate a mix of multixids
# with different statuses. It consumes around 3000 multixids with
-# 30000 members. That's enough to span more than one multixids
-# 'offsets' page, and more than one 'members' segment.
+# 60000 members in total. That's enough to span more than one
+# multixids 'offsets' page, and more than one 'members' segment with
+# the default block size.
#
# The workload leaves behind a table called 'mxofftest' containing a
# small number of rows referencing some of the generated multixids.
@@ -68,6 +69,12 @@ sub mxact_workload
# verbose by setting this.
my $verbose = 0;
+ # Bump the timeout on the connections to avoid false negatives on
+ # slow test systems. The timeout covers the whole duration that
+ # the connections are open rather than the individual queries.
+ my $connection_timeout_secs =
+ 4 * $PostgreSQL::Test::Utils::timeout_default;
+
# Open multiple connections to the database. Start a transaction
# in each connection.
for (0 .. $nclients)
@@ -75,8 +82,10 @@ sub mxact_workload
# Use the psql binary from the new installation. The
# BackgroundPsql functionality doesn't work with older psql
# versions.
- my $conn = $binnode->background_psql('',
- connstr => $node->connstr('postgres'));
+ my $conn = $binnode->background_psql(
+ '',
+ connstr => $node->connstr('postgres'),
+ timeout => $connection_timeout_secs);
$conn->query_safe("SET log_statement=none", verbose => $verbose)
unless $verbose;
@@ -88,12 +97,14 @@ sub mxact_workload
# Run queries using cycling through the connections in a
# round-robin fashion. We keep a transaction open in each
- # connection at all times, and lock/update the rows. With 10
+ # connection at all times, and lock/update the rows. With 20
# connections, each SELECT FOR KEY SHARE query generates a new
- # multixid, containing the 10 XIDs of all the transactions running
- # at the time.
+ # multixid, containing the XIDs of all the transactions running at
+ # the time, ie. around 20 XIDs.
for (my $i = 0; $i < 3000; $i++)
{
+ note "generating multixids $i / 3000\n" if ($i % 100 == 0);
+
my $conn = $connections[ $i % $nclients ];
my $sql = ($i % $abort_every == 0) ? "ABORT" : "COMMIT";
diff --git a/src/include/mb/pg_wchar.h b/src/include/mb/pg_wchar.h
index 4d84bdc81e4..5df67ceea87 100644
--- a/src/include/mb/pg_wchar.h
+++ b/src/include/mb/pg_wchar.h
@@ -676,8 +676,6 @@ extern int pg_valid_server_encoding(const char *name);
extern bool is_encoding_supported_by_icu(int encoding);
extern const char *get_encoding_name_for_icu(int encoding);
-extern unsigned char *unicode_to_utf8(char32_t c, unsigned char *utf8string);
-extern char32_t utf8_to_unicode(const unsigned char *c);
extern bool pg_utf8_islegal(const unsigned char *source, int length);
extern int pg_utf_mblen(const unsigned char *s);
extern int pg_mule_mblen(const unsigned char *s);
diff --git a/src/include/storage/dsm_registry.h b/src/include/storage/dsm_registry.h
index 4871ed509eb..c7c6827afec 100644
--- a/src/include/storage/dsm_registry.h
+++ b/src/include/storage/dsm_registry.h
@@ -16,8 +16,8 @@
#include "lib/dshash.h"
extern void *GetNamedDSMSegment(const char *name, size_t size,
- void (*init_callback) (void *ptr),
- bool *found);
+ void (*init_callback) (void *ptr, void *arg),
+ bool *found, void *arg);
extern dsa_area *GetNamedDSA(const char *name, bool *found);
extern dshash_table *GetNamedDSHash(const char *name,
const dshash_parameters *params,
diff --git a/src/include/utils/inval.h b/src/include/utils/inval.h
index af466252578..345733dd038 100644
--- a/src/include/utils/inval.h
+++ b/src/include/utils/inval.h
@@ -61,8 +61,7 @@ extern void CacheInvalidateHeapTuple(Relation relation,
HeapTuple tuple,
HeapTuple newtuple);
extern void CacheInvalidateHeapTupleInplace(Relation relation,
- HeapTuple tuple,
- HeapTuple newtuple);
+ HeapTuple key_equivalent_tuple);
extern void CacheInvalidateCatalog(Oid catalogId);
diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h
index 832007385d8..a533463d5b7 100644
--- a/src/include/utils/pg_locale.h
+++ b/src/include/utils/pg_locale.h
@@ -125,9 +125,6 @@ struct ctype_methods
bool (*wc_iscased) (pg_wchar wc, pg_locale_t locale);
pg_wchar (*wc_toupper) (pg_wchar wc, pg_locale_t locale);
pg_wchar (*wc_tolower) (pg_wchar wc, pg_locale_t locale);
-
- /* required */
- bool (*char_is_cased) (char ch, pg_locale_t locale);
};
/*
@@ -178,7 +175,6 @@ extern pg_locale_t pg_newlocale_from_collation(Oid collid);
extern char *get_collation_actual_version(char collprovider, const char *collcollate);
-extern bool char_is_cased(char ch, pg_locale_t locale);
extern size_t pg_strlower(char *dst, size_t dstsize,
const char *src, ssize_t srclen,
pg_locale_t locale);
diff --git a/src/interfaces/libpq-oauth/oauth-curl.c b/src/interfaces/libpq-oauth/oauth-curl.c
index bd0a656a166..92a7f1cd383 100644
--- a/src/interfaces/libpq-oauth/oauth-curl.c
+++ b/src/interfaces/libpq-oauth/oauth-curl.c
@@ -247,7 +247,8 @@ struct async_ctx
* our entry point, errors have three parts:
*
* - errctx: an optional static string, describing the global operation
- * currently in progress. It'll be translated for you.
+ * currently in progress. Should be translated with
+ * libpq_gettext().
*
* - errbuf: contains the actual error message. Generally speaking, use
* actx_error[_str] to manipulate this. This must be filled
@@ -367,13 +368,16 @@ pg_fe_cleanup_oauth_flow(PGconn *conn)
/*
* Macros for manipulating actx->errbuf. actx_error() translates and formats a
- * string for you; actx_error_str() appends a string directly without
- * translation.
+ * string for you, actx_error_internal() is the untranslated equivalent, and
+ * actx_error_str() appends a string directly (also without translation).
*/
#define actx_error(ACTX, FMT, ...) \
appendPQExpBuffer(&(ACTX)->errbuf, libpq_gettext(FMT), ##__VA_ARGS__)
+#define actx_error_internal(ACTX, FMT, ...) \
+ appendPQExpBuffer(&(ACTX)->errbuf, FMT, ##__VA_ARGS__)
+
#define actx_error_str(ACTX, S) \
appendPQExpBufferStr(&(ACTX)->errbuf, S)
@@ -461,6 +465,9 @@ struct oauth_parse
#define oauth_parse_set_error(ctx, fmt, ...) \
appendPQExpBuffer((ctx)->errbuf, libpq_gettext(fmt), ##__VA_ARGS__)
+#define oauth_parse_set_error_internal(ctx, fmt, ...) \
+ appendPQExpBuffer((ctx)->errbuf, fmt, ##__VA_ARGS__)
+
static void
report_type_mismatch(struct oauth_parse *ctx)
{
@@ -475,20 +482,20 @@ report_type_mismatch(struct oauth_parse *ctx)
switch (ctx->active->type)
{
case JSON_TOKEN_STRING:
- msgfmt = "field \"%s\" must be a string";
+ msgfmt = gettext_noop("field \"%s\" must be a string");
break;
case JSON_TOKEN_NUMBER:
- msgfmt = "field \"%s\" must be a number";
+ msgfmt = gettext_noop("field \"%s\" must be a number");
break;
case JSON_TOKEN_ARRAY_START:
- msgfmt = "field \"%s\" must be an array of strings";
+ msgfmt = gettext_noop("field \"%s\" must be an array of strings");
break;
default:
Assert(false);
- msgfmt = "field \"%s\" has unexpected type";
+ msgfmt = gettext_noop("field \"%s\" has unexpected type");
}
oauth_parse_set_error(ctx, msgfmt, ctx->active->name);
@@ -536,9 +543,9 @@ oauth_json_object_field_start(void *state, char *name, bool isnull)
if (ctx->active)
{
Assert(false);
- oauth_parse_set_error(ctx,
- "internal error: started field '%s' before field '%s' was finished",
- name, ctx->active->name);
+ oauth_parse_set_error_internal(ctx,
+ "internal error: started field \"%s\" before field \"%s\" was finished",
+ name, ctx->active->name);
return JSON_SEM_ACTION_FAILED;
}
@@ -588,9 +595,9 @@ oauth_json_object_end(void *state)
if (!ctx->nested && ctx->active)
{
Assert(false);
- oauth_parse_set_error(ctx,
- "internal error: field '%s' still active at end of object",
- ctx->active->name);
+ oauth_parse_set_error_internal(ctx,
+ "internal error: field \"%s\" still active at end of object",
+ ctx->active->name);
return JSON_SEM_ACTION_FAILED;
}
@@ -644,9 +651,9 @@ oauth_json_array_end(void *state)
if (ctx->nested != 2 || ctx->active->type != JSON_TOKEN_ARRAY_START)
{
Assert(false);
- oauth_parse_set_error(ctx,
- "internal error: found unexpected array end while parsing field '%s'",
- ctx->active->name);
+ oauth_parse_set_error_internal(ctx,
+ "internal error: found unexpected array end while parsing field \"%s\"",
+ ctx->active->name);
return JSON_SEM_ACTION_FAILED;
}
@@ -699,9 +706,9 @@ oauth_json_scalar(void *state, char *token, JsonTokenType type)
if (ctx->nested != 1)
{
Assert(false);
- oauth_parse_set_error(ctx,
- "internal error: scalar target found at nesting level %d",
- ctx->nested);
+ oauth_parse_set_error_internal(ctx,
+ "internal error: scalar target found at nesting level %d",
+ ctx->nested);
return JSON_SEM_ACTION_FAILED;
}
@@ -709,9 +716,9 @@ oauth_json_scalar(void *state, char *token, JsonTokenType type)
if (*field->scalar)
{
Assert(false);
- oauth_parse_set_error(ctx,
- "internal error: scalar field '%s' would be assigned twice",
- ctx->active->name);
+ oauth_parse_set_error_internal(ctx,
+ "internal error: scalar field \"%s\" would be assigned twice",
+ ctx->active->name);
return JSON_SEM_ACTION_FAILED;
}
@@ -731,9 +738,9 @@ oauth_json_scalar(void *state, char *token, JsonTokenType type)
if (ctx->nested != 2)
{
Assert(false);
- oauth_parse_set_error(ctx,
- "internal error: array member found at nesting level %d",
- ctx->nested);
+ oauth_parse_set_error_internal(ctx,
+ "internal error: array member found at nesting level %d",
+ ctx->nested);
return JSON_SEM_ACTION_FAILED;
}
@@ -1087,7 +1094,7 @@ parse_token_error(struct async_ctx *actx, struct token_error *err)
* override the errctx if parsing explicitly fails.
*/
if (!result)
- actx->errctx = "failed to parse token error response";
+ actx->errctx = libpq_gettext("failed to parse token error response");
return result;
}
@@ -1115,8 +1122,8 @@ record_token_error(struct async_ctx *actx, const struct token_error *err)
if (response_code == 401)
{
actx_error(actx, actx->used_basic_auth
- ? "provider rejected the oauth_client_secret"
- : "provider requires client authentication, and no oauth_client_secret is set");
+ ? gettext_noop("provider rejected the oauth_client_secret")
+ : gettext_noop("provider requires client authentication, and no oauth_client_secret is set"));
actx_error_str(actx, " ");
}
}
@@ -1179,20 +1186,20 @@ setup_multiplexer(struct async_ctx *actx)
actx->mux = epoll_create1(EPOLL_CLOEXEC);
if (actx->mux < 0)
{
- actx_error(actx, "failed to create epoll set: %m");
+ actx_error_internal(actx, "failed to create epoll set: %m");
return false;
}
actx->timerfd = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC);
if (actx->timerfd < 0)
{
- actx_error(actx, "failed to create timerfd: %m");
+ actx_error_internal(actx, "failed to create timerfd: %m");
return false;
}
if (epoll_ctl(actx->mux, EPOLL_CTL_ADD, actx->timerfd, &ev) < 0)
{
- actx_error(actx, "failed to add timerfd to epoll set: %m");
+ actx_error_internal(actx, "failed to add timerfd to epoll set: %m");
return false;
}
@@ -1201,8 +1208,7 @@ setup_multiplexer(struct async_ctx *actx)
actx->mux = kqueue();
if (actx->mux < 0)
{
- /*- translator: the term "kqueue" (kernel queue) should not be translated */
- actx_error(actx, "failed to create kqueue: %m");
+ actx_error_internal(actx, "failed to create kqueue: %m");
return false;
}
@@ -1215,7 +1221,7 @@ setup_multiplexer(struct async_ctx *actx)
actx->timerfd = kqueue();
if (actx->timerfd < 0)
{
- actx_error(actx, "failed to create timer kqueue: %m");
+ actx_error_internal(actx, "failed to create timer kqueue: %m");
return false;
}
@@ -1259,7 +1265,7 @@ register_socket(CURL *curl, curl_socket_t socket, int what, void *ctx,
break;
default:
- actx_error(actx, "unknown libcurl socket operation: %d", what);
+ actx_error_internal(actx, "unknown libcurl socket operation: %d", what);
return -1;
}
@@ -1276,15 +1282,15 @@ register_socket(CURL *curl, curl_socket_t socket, int what, void *ctx,
switch (op)
{
case EPOLL_CTL_ADD:
- actx_error(actx, "could not add to epoll set: %m");
+ actx_error_internal(actx, "could not add to epoll set: %m");
break;
case EPOLL_CTL_DEL:
- actx_error(actx, "could not delete from epoll set: %m");
+ actx_error_internal(actx, "could not delete from epoll set: %m");
break;
default:
- actx_error(actx, "could not update epoll set: %m");
+ actx_error_internal(actx, "could not update epoll set: %m");
}
return -1;
@@ -1334,7 +1340,7 @@ register_socket(CURL *curl, curl_socket_t socket, int what, void *ctx,
break;
default:
- actx_error(actx, "unknown libcurl socket operation: %d", what);
+ actx_error_internal(actx, "unknown libcurl socket operation: %d", what);
return -1;
}
@@ -1344,7 +1350,7 @@ register_socket(CURL *curl, curl_socket_t socket, int what, void *ctx,
res = kevent(actx->mux, ev, nev, ev_out, nev, &timeout);
if (res < 0)
{
- actx_error(actx, "could not modify kqueue: %m");
+ actx_error_internal(actx, "could not modify kqueue: %m");
return -1;
}
@@ -1368,10 +1374,10 @@ register_socket(CURL *curl, curl_socket_t socket, int what, void *ctx,
switch (what)
{
case CURL_POLL_REMOVE:
- actx_error(actx, "could not delete from kqueue: %m");
+ actx_error_internal(actx, "could not delete from kqueue: %m");
break;
default:
- actx_error(actx, "could not add to kqueue: %m");
+ actx_error_internal(actx, "could not add to kqueue: %m");
}
return -1;
}
@@ -1420,7 +1426,7 @@ comb_multiplexer(struct async_ctx *actx)
*/
if (kevent(actx->mux, NULL, 0, &ev, 1, &timeout) < 0)
{
- actx_error(actx, "could not comb kqueue: %m");
+ actx_error_internal(actx, "could not comb kqueue: %m");
return false;
}
@@ -1470,7 +1476,7 @@ set_timer(struct async_ctx *actx, long timeout)
if (timerfd_settime(actx->timerfd, 0 /* no flags */ , &spec, NULL) < 0)
{
- actx_error(actx, "setting timerfd to %ld: %m", timeout);
+ actx_error_internal(actx, "setting timerfd to %ld: %m", timeout);
return false;
}
@@ -1500,14 +1506,14 @@ set_timer(struct async_ctx *actx, long timeout)
EV_SET(&ev, 1, EVFILT_TIMER, EV_DELETE, 0, 0, 0);
if (kevent(actx->timerfd, &ev, 1, NULL, 0, NULL) < 0 && errno != ENOENT)
{
- actx_error(actx, "deleting kqueue timer: %m");
+ actx_error_internal(actx, "deleting kqueue timer: %m");
return false;
}
EV_SET(&ev, actx->timerfd, EVFILT_READ, EV_DELETE, 0, 0, 0);
if (kevent(actx->mux, &ev, 1, NULL, 0, NULL) < 0 && errno != ENOENT)
{
- actx_error(actx, "removing kqueue timer from multiplexer: %m");
+ actx_error_internal(actx, "removing kqueue timer from multiplexer: %m");
return false;
}
@@ -1518,14 +1524,14 @@ set_timer(struct async_ctx *actx, long timeout)
EV_SET(&ev, 1, EVFILT_TIMER, (EV_ADD | EV_ONESHOT), 0, timeout, 0);
if (kevent(actx->timerfd, &ev, 1, NULL, 0, NULL) < 0)
{
- actx_error(actx, "setting kqueue timer to %ld: %m", timeout);
+ actx_error_internal(actx, "setting kqueue timer to %ld: %m", timeout);
return false;
}
EV_SET(&ev, actx->timerfd, EVFILT_READ, EV_ADD, 0, 0, 0);
if (kevent(actx->mux, &ev, 1, NULL, 0, NULL) < 0)
{
- actx_error(actx, "adding kqueue timer to multiplexer: %m");
+ actx_error_internal(actx, "adding kqueue timer to multiplexer: %m");
return false;
}
@@ -2153,7 +2159,7 @@ finish_discovery(struct async_ctx *actx)
/*
* Pull the fields we care about from the document.
*/
- actx->errctx = "failed to parse OpenID discovery document";
+ actx->errctx = libpq_gettext("failed to parse OpenID discovery document");
if (!parse_provider(actx, &actx->provider))
return false; /* error message already set */
@@ -2421,7 +2427,7 @@ finish_device_authz(struct async_ctx *actx)
*/
if (response_code == 200)
{
- actx->errctx = "failed to parse device authorization";
+ actx->errctx = libpq_gettext("failed to parse device authorization");
if (!parse_device_authz(actx, &actx->authz))
return false; /* error message already set */
@@ -2509,7 +2515,7 @@ finish_token_request(struct async_ctx *actx, struct token *tok)
*/
if (response_code == 200)
{
- actx->errctx = "failed to parse access token response";
+ actx->errctx = libpq_gettext("failed to parse access token response");
if (!parse_access_token(actx, tok))
return false; /* error message already set */
@@ -2888,7 +2894,7 @@ pg_fe_run_oauth_flow_impl(PGconn *conn)
switch (actx->step)
{
case OAUTH_STEP_INIT:
- actx->errctx = "failed to fetch OpenID discovery document";
+ actx->errctx = libpq_gettext("failed to fetch OpenID discovery document");
if (!start_discovery(actx, conn_oauth_discovery_uri(conn)))
goto error_return;
@@ -2902,11 +2908,11 @@ pg_fe_run_oauth_flow_impl(PGconn *conn)
if (!check_issuer(actx, conn))
goto error_return;
- actx->errctx = "cannot run OAuth device authorization";
+ actx->errctx = libpq_gettext("cannot run OAuth device authorization");
if (!check_for_device_flow(actx))
goto error_return;
- actx->errctx = "failed to obtain device authorization";
+ actx->errctx = libpq_gettext("failed to obtain device authorization");
if (!start_device_authz(actx, conn))
goto error_return;
@@ -2917,7 +2923,7 @@ pg_fe_run_oauth_flow_impl(PGconn *conn)
if (!finish_device_authz(actx))
goto error_return;
- actx->errctx = "failed to obtain access token";
+ actx->errctx = libpq_gettext("failed to obtain access token");
if (!start_token_request(actx, conn))
goto error_return;
@@ -2968,7 +2974,7 @@ pg_fe_run_oauth_flow_impl(PGconn *conn)
break;
case OAUTH_STEP_WAIT_INTERVAL:
- actx->errctx = "failed to obtain access token";
+ actx->errctx = libpq_gettext("failed to obtain access token");
if (!start_token_request(actx, conn))
goto error_return;
@@ -2994,7 +3000,7 @@ error_return:
* also the documentation for struct async_ctx.
*/
if (actx->errctx)
- appendPQExpBuffer(errbuf, "%s: ", libpq_gettext(actx->errctx));
+ appendPQExpBuffer(errbuf, "%s: ", actx->errctx);
if (PQExpBufferDataBroken(actx->errbuf))
appendPQExpBufferStr(errbuf, libpq_gettext("out of memory"));
diff --git a/src/interfaces/libpq/fe-auth-oauth.c b/src/interfaces/libpq/fe-auth-oauth.c
index d146c5f567c..fb63a3249d6 100644
--- a/src/interfaces/libpq/fe-auth-oauth.c
+++ b/src/interfaces/libpq/fe-auth-oauth.c
@@ -183,7 +183,14 @@ struct json_ctx
#define oauth_json_has_error(ctx) \
(PQExpBufferDataBroken((ctx)->errbuf) || (ctx)->errmsg)
-#define oauth_json_set_error(ctx, ...) \
+#define oauth_json_set_error(ctx, fmt, ...) \
+ do { \
+ appendPQExpBuffer(&(ctx)->errbuf, libpq_gettext(fmt), ##__VA_ARGS__); \
+ (ctx)->errmsg = (ctx)->errbuf.data; \
+ } while (0)
+
+/* An untranslated version of oauth_json_set_error(). */
+#define oauth_json_set_error_internal(ctx, ...) \
do { \
appendPQExpBuffer(&(ctx)->errbuf, __VA_ARGS__); \
(ctx)->errmsg = (ctx)->errbuf.data; \
@@ -199,13 +206,13 @@ oauth_json_object_start(void *state)
Assert(ctx->nested == 1);
oauth_json_set_error(ctx,
- libpq_gettext("field \"%s\" must be a string"),
+ "field \"%s\" must be a string",
ctx->target_field_name);
}
++ctx->nested;
if (ctx->nested > MAX_SASL_NESTING_LEVEL)
- oauth_json_set_error(ctx, libpq_gettext("JSON is too deeply nested"));
+ oauth_json_set_error(ctx, "JSON is too deeply nested");
return oauth_json_has_error(ctx) ? JSON_SEM_ACTION_FAILED : JSON_SUCCESS;
}
@@ -254,20 +261,20 @@ oauth_json_array_start(void *state)
if (!ctx->nested)
{
- ctx->errmsg = libpq_gettext("top-level element must be an object");
+ oauth_json_set_error(ctx, "top-level element must be an object");
}
else if (ctx->target_field)
{
Assert(ctx->nested == 1);
oauth_json_set_error(ctx,
- libpq_gettext("field \"%s\" must be a string"),
+ "field \"%s\" must be a string",
ctx->target_field_name);
}
++ctx->nested;
if (ctx->nested > MAX_SASL_NESTING_LEVEL)
- oauth_json_set_error(ctx, libpq_gettext("JSON is too deeply nested"));
+ oauth_json_set_error(ctx, "JSON is too deeply nested");
return oauth_json_has_error(ctx) ? JSON_SEM_ACTION_FAILED : JSON_SUCCESS;
}
@@ -288,7 +295,7 @@ oauth_json_scalar(void *state, char *token, JsonTokenType type)
if (!ctx->nested)
{
- ctx->errmsg = libpq_gettext("top-level element must be an object");
+ oauth_json_set_error(ctx, "top-level element must be an object");
return JSON_SEM_ACTION_FAILED;
}
@@ -301,9 +308,9 @@ oauth_json_scalar(void *state, char *token, JsonTokenType type)
* Assert and don't continue any further for production builds.
*/
Assert(false);
- oauth_json_set_error(ctx,
- "internal error: target scalar found at nesting level %d during OAUTHBEARER parsing",
- ctx->nested);
+ oauth_json_set_error_internal(ctx,
+ "internal error: target scalar found at nesting level %d during OAUTHBEARER parsing",
+ ctx->nested);
return JSON_SEM_ACTION_FAILED;
}
@@ -314,7 +321,7 @@ oauth_json_scalar(void *state, char *token, JsonTokenType type)
if (*ctx->target_field)
{
oauth_json_set_error(ctx,
- libpq_gettext("field \"%s\" is duplicated"),
+ "field \"%s\" is duplicated",
ctx->target_field_name);
return JSON_SEM_ACTION_FAILED;
}
@@ -323,7 +330,7 @@ oauth_json_scalar(void *state, char *token, JsonTokenType type)
if (type != JSON_TOKEN_STRING)
{
oauth_json_set_error(ctx,
- libpq_gettext("field \"%s\" must be a string"),
+ "field \"%s\" must be a string",
ctx->target_field_name);
return JSON_SEM_ACTION_FAILED;
}
diff --git a/src/interfaces/libpq/nls.mk b/src/interfaces/libpq/nls.mk
index b87df277d93..3fa87a0aaac 100644
--- a/src/interfaces/libpq/nls.mk
+++ b/src/interfaces/libpq/nls.mk
@@ -1,6 +1,7 @@
# src/interfaces/libpq/nls.mk
CATALOG_NAME = libpq
GETTEXT_FILES = fe-auth.c \
+ fe-auth-oauth.c \
fe-auth-scram.c \
fe-cancel.c \
fe-connect.c \
@@ -21,6 +22,7 @@ GETTEXT_TRIGGERS = actx_error:2 \
libpq_append_error:2 \
libpq_gettext \
libpq_ngettext:1,2 \
+ oauth_json_set_error:2 \
oauth_parse_set_error:2 \
pqInternalNotice:2
GETTEXT_FLAGS = actx_error:2:c-format \
@@ -29,5 +31,6 @@ GETTEXT_FLAGS = actx_error:2:c-format \
libpq_gettext:1:pass-c-format \
libpq_ngettext:1:pass-c-format \
libpq_ngettext:2:pass-c-format \
+ oauth_json_set_error:2:c-format \
oauth_parse_set_error:2:c-format \
pqInternalNotice:2:c-format
diff --git a/src/test/modules/injection_points/injection_points.c b/src/test/modules/injection_points/injection_points.c
index 417b61f31c5..25340e8d81b 100644
--- a/src/test/modules/injection_points/injection_points.c
+++ b/src/test/modules/injection_points/injection_points.c
@@ -115,7 +115,7 @@ static shmem_startup_hook_type prev_shmem_startup_hook = NULL;
* when initializing dynamically with a DSM or when loading the module.
*/
static void
-injection_point_init_state(void *ptr)
+injection_point_init_state(void *ptr, void *arg)
{
InjectionPointSharedState *state = (InjectionPointSharedState *) ptr;
@@ -159,7 +159,7 @@ injection_shmem_startup(void)
* First time through, so initialize. This is shared with the dynamic
* initialization using a DSM.
*/
- injection_point_init_state(inj_state);
+ injection_point_init_state(inj_state, NULL);
}
LWLockRelease(AddinShmemInitLock);
@@ -179,7 +179,7 @@ injection_init_shmem(void)
inj_state = GetNamedDSMSegment("injection_points",
sizeof(InjectionPointSharedState),
injection_point_init_state,
- &found);
+ &found, NULL);
}
/*
diff --git a/src/test/modules/test_dsa/test_dsa.c b/src/test/modules/test_dsa/test_dsa.c
index 21e4ce6a745..b9c45c0d41c 100644
--- a/src/test/modules/test_dsa/test_dsa.c
+++ b/src/test/modules/test_dsa/test_dsa.c
@@ -21,7 +21,7 @@
PG_MODULE_MAGIC;
static void
-init_tranche(void *ptr)
+init_tranche(void *ptr, void *arg)
{
int *tranche_id = (int *) ptr;
@@ -39,7 +39,7 @@ test_dsa_basic(PG_FUNCTION_ARGS)
dsa_pointer p[100];
tranche_id = GetNamedDSMSegment("test_dsa", sizeof(int),
- init_tranche, &found);
+ init_tranche, &found, NULL);
a = dsa_create(*tranche_id);
for (int i = 0; i < 100; i++)
@@ -80,7 +80,7 @@ test_dsa_resowners(PG_FUNCTION_ARGS)
ResourceOwner childowner;
tranche_id = GetNamedDSMSegment("test_dsa", sizeof(int),
- init_tranche, &found);
+ init_tranche, &found, NULL);
/* Create DSA in parent resource owner */
a = dsa_create(*tranche_id);
diff --git a/src/test/modules/test_dsm_registry/test_dsm_registry.c b/src/test/modules/test_dsm_registry/test_dsm_registry.c
index 4cc2ccdac3f..3e5f4f3cda1 100644
--- a/src/test/modules/test_dsm_registry/test_dsm_registry.c
+++ b/src/test/modules/test_dsm_registry/test_dsm_registry.c
@@ -44,10 +44,13 @@ static const dshash_parameters dsh_params = {
};
static void
-init_tdr_dsm(void *ptr)
+init_tdr_dsm(void *ptr, void *arg)
{
TestDSMRegistryStruct *dsm = (TestDSMRegistryStruct *) ptr;
+ if ((int) (intptr_t) arg != 5432)
+ elog(ERROR, "unexpected arg value %d", (int) (intptr_t) arg);
+
LWLockInitialize(&dsm->lck, LWLockNewTrancheId("test_dsm_registry"));
dsm->val = 0;
}
@@ -60,7 +63,7 @@ tdr_attach_shmem(void)
tdr_dsm = GetNamedDSMSegment("test_dsm_registry_dsm",
sizeof(TestDSMRegistryStruct),
init_tdr_dsm,
- &found);
+ &found, (void *) (intptr_t) 5432);
if (tdr_dsa == NULL)
tdr_dsa = GetNamedDSA("test_dsm_registry_dsa", &found);
diff --git a/src/test/recovery/meson.build b/src/test/recovery/meson.build
index 523a5cd5b52..e93248bd66e 100644
--- a/src/test/recovery/meson.build
+++ b/src/test/recovery/meson.build
@@ -58,6 +58,7 @@ tests += {
't/047_checkpoint_physical_slot.pl',
't/048_vacuum_horizon_floor.pl',
't/049_wait_for_lsn.pl',
+ 't/050_redo_segment_missing.pl',
],
},
}
diff --git a/src/test/recovery/t/050_redo_segment_missing.pl b/src/test/recovery/t/050_redo_segment_missing.pl
new file mode 100644
index 00000000000..f5eb6c30fe6
--- /dev/null
+++ b/src/test/recovery/t/050_redo_segment_missing.pl
@@ -0,0 +1,117 @@
+# Copyright (c) 2025, PostgreSQL Global Development Group
+#
+# Evaluates PostgreSQL's recovery behavior when a WAL segment containing the
+# redo record is missing, with a checkpoint record located in a different
+# segment.
+
+use strict;
+use warnings FATAL => 'all';
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+if ($ENV{enable_injection_points} ne 'yes')
+{
+ plan skip_all => 'Injection points not supported by this build';
+}
+
+my $node = PostgreSQL::Test::Cluster->new('testnode');
+$node->init;
+$node->append_conf('postgresql.conf', 'log_checkpoints = on');
+$node->start;
+
+# Check if the extension injection_points is available, as it may be
+# possible that this script is run with installcheck, where the module
+# would not be installed by default.
+if (!$node->check_extension('injection_points'))
+{
+ plan skip_all => 'Extension injection_points not installed';
+}
+$node->safe_psql('postgres', q(CREATE EXTENSION injection_points));
+
+# Note that this uses two injection points based on waits, not one. This
+# may look strange, but this works as a workaround to enforce all memory
+# allocations to happen outside the critical section of the checkpoint
+# required for this test.
+# First, "create-checkpoint-initial" is run outside the critical section
+# section, and is used as a way to initialize the shared memory required
+# for the wait machinery with its DSM registry.
+# Then, "create-checkpoint-run" is loaded outside the critical section of
+# a checkpoint to allocate any memory required by the library load, and
+# its callback is run inside the critical section.
+$node->safe_psql('postgres',
+ q{select injection_points_attach('create-checkpoint-initial', 'wait')});
+$node->safe_psql('postgres',
+ q{select injection_points_attach('create-checkpoint-run', 'wait')});
+
+# Start a psql session to run the checkpoint in the background and make
+# the test wait on the injection point so the checkpoint stops just after
+# it starts.
+my $checkpoint = $node->background_psql('postgres');
+$checkpoint->query_until(
+ qr/starting_checkpoint/,
+ q(\echo starting_checkpoint
+checkpoint;
+));
+
+# Wait for the initial point to finish, the checkpointer is still
+# outside its critical section. Then release to reach the second
+# point.
+$node->wait_for_event('checkpointer', 'create-checkpoint-initial');
+$node->safe_psql('postgres',
+ q{select injection_points_wakeup('create-checkpoint-initial')});
+
+# Wait until the checkpoint has reached the second injection point.
+# We are now in the middle of a checkpoint running, after the redo
+# record has been logged.
+$node->wait_for_event('checkpointer', 'create-checkpoint-run');
+
+# Switch the WAL segment, ensuring that the redo record will be included
+# in a different segment than the checkpoint record.
+$node->safe_psql('postgres', 'SELECT pg_switch_wal()');
+
+# Continue the checkpoint and wait for its completion.
+my $log_offset = -s $node->logfile;
+$node->safe_psql('postgres',
+ q{select injection_points_wakeup('create-checkpoint-run')});
+$node->wait_for_log(qr/checkpoint complete/, $log_offset);
+
+$checkpoint->quit;
+
+# Retrieve the WAL file names for the redo record and checkpoint record.
+my $redo_lsn = $node->safe_psql('postgres',
+ "SELECT redo_lsn FROM pg_control_checkpoint()");
+my $redo_walfile_name =
+ $node->safe_psql('postgres', "SELECT pg_walfile_name('$redo_lsn')");
+my $checkpoint_lsn = $node->safe_psql('postgres',
+ "SELECT checkpoint_lsn FROM pg_control_checkpoint()");
+my $checkpoint_walfile_name =
+ $node->safe_psql('postgres', "SELECT pg_walfile_name('$checkpoint_lsn')");
+
+# Redo record and checkpoint record should be on different segments.
+isnt($redo_walfile_name, $checkpoint_walfile_name,
+ 'redo and checkpoint records on different segments');
+
+# Remove the WAL segment containing the redo record.
+unlink $node->data_dir . "/pg_wal/$redo_walfile_name"
+ or die "could not remove WAL file: $!";
+
+$node->stop('immediate');
+
+# Use run_log instead of node->start because this test expects that
+# the server ends with an error during recovery.
+run_log(
+ [
+ 'pg_ctl',
+ '--pgdata' => $node->data_dir,
+ '--log' => $node->logfile,
+ 'start',
+ ]);
+
+# Confirm that recovery has failed, as expected.
+my $logfile = slurp_file($node->logfile());
+ok( $logfile =~
+ qr/FATAL: .* could not find redo location .* referenced by checkpoint record at .*/,
+ "ends with FATAL because it could not find redo location");
+
+done_testing();
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 3451538565e..04845d5e680 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -366,6 +366,7 @@ BulkWriteBuffer
BulkWriteState
BumpBlock
BumpContext
+ByteaSortSupport
CACHESIGN
CAC_state
CCFastEqualFN