summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--contrib/ltree/crc32.c47
-rw-r--r--contrib/ltree/lquery_op.c39
-rw-r--r--src/include/utils/pg_locale.h14
3 files changed, 89 insertions, 11 deletions
diff --git a/contrib/ltree/crc32.c b/contrib/ltree/crc32.c
index 134f46a805e..ce1b0f28e21 100644
--- a/contrib/ltree/crc32.c
+++ b/contrib/ltree/crc32.c
@@ -10,31 +10,62 @@
#include "postgres.h"
#include "ltree.h"
+#include "crc32.h"
+#include "utils/pg_crc.h"
#ifdef LOWER_NODE
-#include <ctype.h>
-#define TOLOWER(x) tolower((unsigned char) (x))
-#else
-#define TOLOWER(x) (x)
+#include "catalog/pg_collation.h"
+#include "utils/pg_locale.h"
#endif
-#include "crc32.h"
-#include "utils/pg_crc.h"
+#ifdef LOWER_NODE
unsigned int
ltree_crc32_sz(const char *buf, int size)
{
pg_crc32 crc;
const char *p = buf;
+ static pg_locale_t locale = NULL;
+
+ if (!locale)
+ locale = pg_newlocale_from_collation(DEFAULT_COLLATION_OID);
INIT_TRADITIONAL_CRC32(crc);
while (size > 0)
{
- char c = (char) TOLOWER(*p);
+ char foldstr[UNICODE_CASEMAP_BUFSZ];
+ int srclen = pg_mblen(p);
+ size_t foldlen;
+
+ /* fold one codepoint at a time */
+ foldlen = pg_strfold(foldstr, UNICODE_CASEMAP_BUFSZ, p, srclen,
+ locale);
+
+ COMP_TRADITIONAL_CRC32(crc, foldstr, foldlen);
+
+ size -= srclen;
+ p += srclen;
+ }
+ FIN_TRADITIONAL_CRC32(crc);
+ return (unsigned int) crc;
+}
+
+#else
- COMP_TRADITIONAL_CRC32(crc, &c, 1);
+unsigned int
+ltree_crc32_sz(const char *buf, int size)
+{
+ pg_crc32 crc;
+ const char *p = buf;
+
+ INIT_TRADITIONAL_CRC32(crc);
+ while (size > 0)
+ {
+ COMP_TRADITIONAL_CRC32(crc, p, 1);
size--;
p++;
}
FIN_TRADITIONAL_CRC32(crc);
return (unsigned int) crc;
}
+
+#endif /* !LOWER_NODE */
diff --git a/contrib/ltree/lquery_op.c b/contrib/ltree/lquery_op.c
index 0b39d64a839..9b1de101213 100644
--- a/contrib/ltree/lquery_op.c
+++ b/contrib/ltree/lquery_op.c
@@ -93,11 +93,44 @@ ltree_prefix_eq(const char *a, size_t a_sz, const char *b, size_t b_sz)
bool
ltree_prefix_eq_ci(const char *a, size_t a_sz, const char *b, size_t b_sz)
{
- char *al = str_tolower(a, a_sz, DEFAULT_COLLATION_OID);
- char *bl = str_tolower(b, b_sz, DEFAULT_COLLATION_OID);
+ static pg_locale_t locale = NULL;
+ size_t al_sz = a_sz + 1;
+ size_t al_len;
+ char *al = palloc(al_sz);
+ size_t bl_sz = b_sz + 1;
+ size_t bl_len;
+ char *bl = palloc(bl_sz);
bool res;
- res = (strncmp(al, bl, a_sz) == 0);
+ if (!locale)
+ locale = pg_newlocale_from_collation(DEFAULT_COLLATION_OID);
+
+ /* casefold both a and b */
+
+ al_len = pg_strfold(al, al_sz, a, a_sz, locale);
+ if (al_len + 1 > al_sz)
+ {
+ /* grow buffer if needed and retry */
+ al_sz = al_len + 1;
+ al = repalloc(al, al_sz);
+ al_len = pg_strfold(al, al_sz, a, a_sz, locale);
+ Assert(al_len + 1 <= al_sz);
+ }
+
+ bl_len = pg_strfold(bl, bl_sz, b, b_sz, locale);
+ if (bl_len + 1 > bl_sz)
+ {
+ /* grow buffer if needed and retry */
+ bl_sz = bl_len + 1;
+ bl = repalloc(bl, bl_sz);
+ bl_len = pg_strfold(bl, bl_sz, b, b_sz, locale);
+ Assert(bl_len + 1 <= bl_sz);
+ }
+
+ if (al_len > bl_len)
+ res = false;
+ else
+ res = (strncmp(al, bl, al_len) == 0);
pfree(al);
pfree(bl);
diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h
index 953e185f92d..3a758256591 100644
--- a/src/include/utils/pg_locale.h
+++ b/src/include/utils/pg_locale.h
@@ -24,6 +24,20 @@
/* use for libc locale names */
#define LOCALE_NAME_BUFLEN 128
+/*
+ * Maximum number of bytes needed to map a single codepoint. Useful for
+ * mapping and processing a single input codepoint at a time with a
+ * statically-allocated buffer.
+ *
+ * With full case mapping, an input codepoint may be mapped to as many as
+ * three output codepoints. See Unicode 16.0.0, section 5.18.2, "Change in
+ * Length":
+ *
+ * https://www.unicode.org/versions/Unicode16.0.0/core-spec/chapter-5/#G29675
+ */
+#define UNICODE_CASEMAP_LEN 3
+#define UNICODE_CASEMAP_BUFSZ (UNICODE_CASEMAP_LEN * MAX_MULTIBYTE_CHAR_LEN)
+
/* GUC settings */
extern PGDLLIMPORT char *locale_messages;
extern PGDLLIMPORT char *locale_monetary;