diff options
Diffstat (limited to 'src/backend')
| -rw-r--r-- | src/backend/access/heap/vacuumlazy.c | 3 | ||||
| -rw-r--r-- | src/backend/parser/scansup.c | 36 | ||||
| -rw-r--r-- | src/backend/utils/activity/pgstat_relation.c | 11 | ||||
| -rw-r--r-- | src/backend/utils/adt/pg_locale.c | 20 | ||||
| -rw-r--r-- | src/backend/utils/adt/pg_locale_builtin.c | 2 | ||||
| -rw-r--r-- | src/backend/utils/adt/pg_locale_icu.c | 75 | ||||
| -rw-r--r-- | src/backend/utils/adt/pg_locale_libc.c | 33 | ||||
| -rw-r--r-- | src/backend/utils/misc/injection_point.c | 3 |
8 files changed, 147 insertions, 36 deletions
diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index 62035b7f9c3..30778a15639 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -961,8 +961,7 @@ heap_vacuum_rel(Relation rel, const VacuumParams params, * soon in cases where the failsafe prevented significant amounts of heap * vacuuming. */ - pgstat_report_vacuum(RelationGetRelid(rel), - rel->rd_rel->relisshared, + pgstat_report_vacuum(rel, Max(vacrel->new_live_tuples, 0), vacrel->recently_dead_tuples + vacrel->missed_dead_tuples, diff --git a/src/backend/parser/scansup.c b/src/backend/parser/scansup.c index 2feb2b6cf5a..d63cb865260 100644 --- a/src/backend/parser/scansup.c +++ b/src/backend/parser/scansup.c @@ -18,6 +18,7 @@ #include "mb/pg_wchar.h" #include "parser/scansup.h" +#include "utils/pg_locale.h" /* @@ -46,35 +47,22 @@ char * downcase_identifier(const char *ident, int len, bool warn, bool truncate) { char *result; - int i; - bool enc_is_single_byte; - - result = palloc(len + 1); - enc_is_single_byte = pg_database_encoding_max_length() == 1; + size_t needed pg_attribute_unused(); /* - * SQL99 specifies Unicode-aware case normalization, which we don't yet - * have the infrastructure for. Instead we use tolower() to provide a - * locale-aware translation. However, there are some locales where this - * is not right either (eg, Turkish may do strange things with 'i' and - * 'I'). Our current compromise is to use tolower() for characters with - * the high bit set, as long as they aren't part of a multi-byte - * character, and use an ASCII-only downcasing for 7-bit characters. + * Preserves string length. + * + * NB: if we decide to support Unicode-aware identifier case folding, then + * we need to account for a change in string length. */ - for (i = 0; i < len; i++) - { - unsigned char ch = (unsigned char) ident[i]; + result = palloc(len + 1); - if (ch >= 'A' && ch <= 'Z') - ch += 'a' - 'A'; - else if (enc_is_single_byte && IS_HIGHBIT_SET(ch) && isupper(ch)) - ch = tolower(ch); - result[i] = (char) ch; - } - result[i] = '\0'; + needed = pg_downcase_ident(result, len + 1, ident, len); + Assert(needed == len); + Assert(result[len] == '\0'); - if (i >= NAMEDATALEN && truncate) - truncate_identifier(result, i, warn); + if (len >= NAMEDATALEN && truncate) + truncate_identifier(result, len, warn); return result; } diff --git a/src/backend/utils/activity/pgstat_relation.c b/src/backend/utils/activity/pgstat_relation.c index b90754f8578..55a10c299db 100644 --- a/src/backend/utils/activity/pgstat_relation.c +++ b/src/backend/utils/activity/pgstat_relation.c @@ -207,14 +207,13 @@ pgstat_drop_relation(Relation rel) * Report that the table was just vacuumed and flush IO statistics. */ void -pgstat_report_vacuum(Oid tableoid, bool shared, - PgStat_Counter livetuples, PgStat_Counter deadtuples, - TimestampTz starttime) +pgstat_report_vacuum(Relation rel, PgStat_Counter livetuples, + PgStat_Counter deadtuples, TimestampTz starttime) { PgStat_EntryRef *entry_ref; PgStatShared_Relation *shtabentry; PgStat_StatTabEntry *tabentry; - Oid dboid = (shared ? InvalidOid : MyDatabaseId); + Oid dboid = (rel->rd_rel->relisshared ? InvalidOid : MyDatabaseId); TimestampTz ts; PgStat_Counter elapsedtime; @@ -226,8 +225,8 @@ pgstat_report_vacuum(Oid tableoid, bool shared, elapsedtime = TimestampDifferenceMilliseconds(starttime, ts); /* block acquiring lock for the same reason as pgstat_report_autovac() */ - entry_ref = pgstat_get_entry_ref_locked(PGSTAT_KIND_RELATION, - dboid, tableoid, false); + entry_ref = pgstat_get_entry_ref_locked(PGSTAT_KIND_RELATION, dboid, + RelationGetRelid(rel), false); shtabentry = (PgStatShared_Relation *) entry_ref->shared_stats; tabentry = &shtabentry->stats; diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index 8a3796aa5d0..ee08ac045b7 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -1353,6 +1353,26 @@ pg_strfold(char *dst, size_t dstsize, const char *src, ssize_t srclen, } /* + * Lowercase an identifier using the database default locale. + * + * For historical reasons, does not use ordinary locale behavior. Should only + * be used for identifiers. XXX: can we make this equivalent to + * pg_strfold(..., default_locale)? + */ +size_t +pg_downcase_ident(char *dst, size_t dstsize, const char *src, ssize_t srclen) +{ + pg_locale_t locale = default_locale; + + if (locale == NULL || locale->ctype == NULL || + locale->ctype->downcase_ident == NULL) + return strlower_c(dst, dstsize, src, srclen); + else + return locale->ctype->downcase_ident(dst, dstsize, src, srclen, + locale); +} + +/* * pg_strcoll * * Like pg_strncoll for NUL-terminated input strings. diff --git a/src/backend/utils/adt/pg_locale_builtin.c b/src/backend/utils/adt/pg_locale_builtin.c index 0c2920112bb..145b4641b1b 100644 --- a/src/backend/utils/adt/pg_locale_builtin.c +++ b/src/backend/utils/adt/pg_locale_builtin.c @@ -208,6 +208,8 @@ static const struct ctype_methods ctype_methods_builtin = { .strtitle = strtitle_builtin, .strupper = strupper_builtin, .strfold = strfold_builtin, + /* uses plain ASCII semantics for historical reasons */ + .downcase_ident = NULL, .wc_isdigit = wc_isdigit_builtin, .wc_isalpha = wc_isalpha_builtin, .wc_isalnum = wc_isalnum_builtin, diff --git a/src/backend/utils/adt/pg_locale_icu.c b/src/backend/utils/adt/pg_locale_icu.c index 18d026deda8..43d44fe43bd 100644 --- a/src/backend/utils/adt/pg_locale_icu.c +++ b/src/backend/utils/adt/pg_locale_icu.c @@ -61,6 +61,8 @@ static size_t strupper_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale); static size_t strfold_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale); +static size_t downcase_ident_icu(char *dst, size_t dstsize, const char *src, + ssize_t srclen, pg_locale_t locale); static int strncoll_icu(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2, pg_locale_t locale); @@ -123,7 +125,7 @@ static int32_t u_strFoldCase_default(UChar *dest, int32_t destCapacity, /* * XXX: many of the functions below rely on casts directly from pg_wchar to - * UChar32, which is correct for the UTF-8 encoding, but not in general. + * UChar32, which is correct for UTF-8 and LATIN1, but not in general. */ static pg_wchar @@ -227,6 +229,7 @@ static const struct ctype_methods ctype_methods_icu = { .strtitle = strtitle_icu, .strupper = strupper_icu, .strfold = strfold_icu, + .downcase_ident = downcase_ident_icu, .wc_isdigit = wc_isdigit_icu, .wc_isalpha = wc_isalpha_icu, .wc_isalnum = wc_isalnum_icu, @@ -241,6 +244,29 @@ static const struct ctype_methods ctype_methods_icu = { .wc_toupper = toupper_icu, .wc_tolower = tolower_icu, }; + +/* + * ICU still depends on libc for compatibility with certain historical + * behavior for single-byte encodings. See downcase_ident_icu(). + * + * XXX: consider fixing by decoding the single byte into a code point, and + * using u_tolower(). + */ +static locale_t +make_libc_ctype_locale(const char *ctype) +{ + locale_t loc; + +#ifndef WIN32 + loc = newlocale(LC_CTYPE_MASK, ctype, NULL); +#else + loc = _create_locale(LC_ALL, ctype); +#endif + if (!loc) + report_newlocale_failure(ctype); + + return loc; +} #endif pg_locale_t @@ -251,6 +277,7 @@ create_pg_locale_icu(Oid collid, MemoryContext context) const char *iculocstr; const char *icurules = NULL; UCollator *collator; + locale_t loc = (locale_t) 0; pg_locale_t result; if (collid == DEFAULT_COLLATION_OID) @@ -273,6 +300,18 @@ create_pg_locale_icu(Oid collid, MemoryContext context) if (!isnull) icurules = TextDatumGetCString(datum); + /* libc only needed for default locale and single-byte encoding */ + if (pg_database_encoding_max_length() == 1) + { + const char *ctype; + + datum = SysCacheGetAttrNotNull(DATABASEOID, tp, + Anum_pg_database_datctype); + ctype = TextDatumGetCString(datum); + + loc = make_libc_ctype_locale(ctype); + } + ReleaseSysCache(tp); } else @@ -303,6 +342,7 @@ create_pg_locale_icu(Oid collid, MemoryContext context) result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct)); result->icu.locale = MemoryContextStrdup(context, iculocstr); result->icu.ucol = collator; + result->icu.lt = loc; result->deterministic = deterministic; result->collate_is_c = false; result->ctype_is_c = false; @@ -565,6 +605,39 @@ strfold_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, } /* + * For historical compatibility, behavior is not multibyte-aware. + * + * NB: uses libc tolower() for single-byte encodings (also for historical + * compatibility), and therefore relies on the global LC_CTYPE setting. + */ +static size_t +downcase_ident_icu(char *dst, size_t dstsize, const char *src, + ssize_t srclen, pg_locale_t locale) +{ + int i; + bool libc_lower; + locale_t lt = locale->icu.lt; + + libc_lower = lt && (pg_database_encoding_max_length() == 1); + + for (i = 0; i < srclen && i < dstsize; i++) + { + unsigned char ch = (unsigned char) src[i]; + + if (ch >= 'A' && ch <= 'Z') + ch = pg_ascii_tolower(ch); + else if (libc_lower && IS_HIGHBIT_SET(ch) && isupper_l(ch, lt)) + ch = tolower_l(ch, lt); + dst[i] = (char) ch; + } + + if (i < dstsize) + dst[i] = '\0'; + + return srclen; +} + +/* * strncoll_icu_utf8 * * Call ucol_strcollUTF8() or ucol_strcoll() as appropriate for the given diff --git a/src/backend/utils/adt/pg_locale_libc.c b/src/backend/utils/adt/pg_locale_libc.c index 3baa5816b5f..ab6117aaace 100644 --- a/src/backend/utils/adt/pg_locale_libc.c +++ b/src/backend/utils/adt/pg_locale_libc.c @@ -318,12 +318,41 @@ tolower_libc_mb(pg_wchar wc, pg_locale_t locale) return wc; } +/* + * Characters A..Z always downcase to a..z, even in the Turkish + * locale. Characters beyond 127 use tolower(). + */ +static size_t +downcase_ident_libc_sb(char *dst, size_t dstsize, const char *src, + ssize_t srclen, pg_locale_t locale) +{ + locale_t loc = locale->lt; + int i; + + for (i = 0; i < srclen && i < dstsize; i++) + { + unsigned char ch = (unsigned char) src[i]; + + if (ch >= 'A' && ch <= 'Z') + ch = pg_ascii_tolower(ch); + else if (IS_HIGHBIT_SET(ch) && isupper_l(ch, loc)) + ch = tolower_l(ch, loc); + dst[i] = (char) ch; + } + + if (i < dstsize) + dst[i] = '\0'; + + return srclen; +} + static const struct ctype_methods ctype_methods_libc_sb = { .strlower = strlower_libc_sb, .strtitle = strtitle_libc_sb, .strupper = strupper_libc_sb, /* in libc, casefolding is the same as lowercasing */ .strfold = strlower_libc_sb, + .downcase_ident = downcase_ident_libc_sb, .wc_isdigit = wc_isdigit_libc_sb, .wc_isalpha = wc_isalpha_libc_sb, .wc_isalnum = wc_isalnum_libc_sb, @@ -349,6 +378,8 @@ static const struct ctype_methods ctype_methods_libc_other_mb = { .strupper = strupper_libc_mb, /* in libc, casefolding is the same as lowercasing */ .strfold = strlower_libc_mb, + /* uses plain ASCII semantics for historical reasons */ + .downcase_ident = NULL, .wc_isdigit = wc_isdigit_libc_sb, .wc_isalpha = wc_isalpha_libc_sb, .wc_isalnum = wc_isalnum_libc_sb, @@ -370,6 +401,8 @@ static const struct ctype_methods ctype_methods_libc_utf8 = { .strupper = strupper_libc_mb, /* in libc, casefolding is the same as lowercasing */ .strfold = strlower_libc_mb, + /* uses plain ASCII semantics for historical reasons */ + .downcase_ident = NULL, .wc_isdigit = wc_isdigit_libc_mb, .wc_isalpha = wc_isalpha_libc_mb, .wc_isalnum = wc_isalnum_libc_mb, diff --git a/src/backend/utils/misc/injection_point.c b/src/backend/utils/misc/injection_point.c index 54a9fe8e163..4945da458b1 100644 --- a/src/backend/utils/misc/injection_point.c +++ b/src/backend/utils/misc/injection_point.c @@ -331,11 +331,8 @@ InjectionPointAttach(const char *name, /* Save the entry */ strlcpy(entry->name, name, sizeof(entry->name)); - entry->name[INJ_NAME_MAXLEN - 1] = '\0'; strlcpy(entry->library, library, sizeof(entry->library)); - entry->library[INJ_LIB_MAXLEN - 1] = '\0'; strlcpy(entry->function, function, sizeof(entry->function)); - entry->function[INJ_FUNC_MAXLEN - 1] = '\0'; if (private_data != NULL) memcpy(entry->private_data, private_data, private_data_size); |
