1 files changed, 74 insertions, 9 deletions
diff --git a/src/backend/utils/adt/pg_locale_icu.c b/src/backend/utils/adt/pg_locale_icu.c
index e8820666b2d..43d44fe43bd 100644
--- a/src/backend/utils/adt/pg_locale_icu.c
+++ b/src/backend/utils/adt/pg_locale_icu.c
@@ -61,6 +61,8 @@ static size_t strupper_icu(char *dest, size_t destsize, const char *src,
 						   ssize_t srclen, pg_locale_t locale);
 static size_t strfold_icu(char *dest, size_t destsize, const char *src,
 						  ssize_t srclen, pg_locale_t locale);
+static size_t downcase_ident_icu(char *dst, size_t dstsize, const char *src,
+								 ssize_t srclen, pg_locale_t locale);
 static int	strncoll_icu(const char *arg1, ssize_t len1,
 						 const char *arg2, ssize_t len2,
 						 pg_locale_t locale);
@@ -121,16 +123,9 @@ static int32_t u_strFoldCase_default(UChar *dest, int32_t destCapacity,
 									 const char *locale,
 									 UErrorCode *pErrorCode);
 
-static bool
-char_is_cased_icu(char ch, pg_locale_t locale)
-{
-	return IS_HIGHBIT_SET(ch) ||
-		(ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z');
-}
-
 /*
  * XXX: many of the functions below rely on casts directly from pg_wchar to
- * UChar32, which is correct for the UTF-8 encoding, but not in general.
+ * UChar32, which is correct for UTF-8 and LATIN1, but not in general.
  */
 
 static pg_wchar
@@ -234,6 +229,7 @@ static const struct ctype_methods ctype_methods_icu = {
 	.strtitle = strtitle_icu,
 	.strupper = strupper_icu,
 	.strfold = strfold_icu,
+	.downcase_ident = downcase_ident_icu,
 	.wc_isdigit = wc_isdigit_icu,
 	.wc_isalpha = wc_isalpha_icu,
 	.wc_isalnum = wc_isalnum_icu,
@@ -244,11 +240,33 @@ static const struct ctype_methods ctype_methods_icu = {
 	.wc_ispunct = wc_ispunct_icu,
 	.wc_isspace = wc_isspace_icu,
 	.wc_isxdigit = wc_isxdigit_icu,
-	.char_is_cased = char_is_cased_icu,
 	.wc_iscased = wc_iscased_icu,
 	.wc_toupper = toupper_icu,
 	.wc_tolower = tolower_icu,
 };
+
+/*
+ * ICU still depends on libc for compatibility with certain historical
+ * behavior for single-byte encodings.  See downcase_ident_icu().
+ *
+ * XXX: consider fixing by decoding the single byte into a code point, and
+ * using u_tolower().
+ */
+static locale_t
+make_libc_ctype_locale(const char *ctype)
+{
+	locale_t	loc;
+
+#ifndef WIN32
+	loc = newlocale(LC_CTYPE_MASK, ctype, NULL);
+#else
+	loc = _create_locale(LC_ALL, ctype);
+#endif
+	if (!loc)
+		report_newlocale_failure(ctype);
+
+	return loc;
+}
 #endif
 
 pg_locale_t
@@ -259,6 +277,7 @@ create_pg_locale_icu(Oid collid, MemoryContext context)
 	const char *iculocstr;
 	const char *icurules = NULL;
 	UCollator  *collator;
+	locale_t	loc = (locale_t) 0;
 	pg_locale_t result;
 
 	if (collid == DEFAULT_COLLATION_OID)
@@ -281,6 +300,18 @@ create_pg_locale_icu(Oid collid, MemoryContext context)
 		if (!isnull)
 			icurules = TextDatumGetCString(datum);
 
+		/* libc only needed for default locale and single-byte encoding */
+		if (pg_database_encoding_max_length() == 1)
+		{
+			const char *ctype;
+
+			datum = SysCacheGetAttrNotNull(DATABASEOID, tp,
+										   Anum_pg_database_datctype);
+			ctype = TextDatumGetCString(datum);
+
+			loc = make_libc_ctype_locale(ctype);
+		}
+
 		ReleaseSysCache(tp);
 	}
 	else
@@ -311,6 +342,7 @@ create_pg_locale_icu(Oid collid, MemoryContext context)
 	result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct));
 	result->icu.locale = MemoryContextStrdup(context, iculocstr);
 	result->icu.ucol = collator;
+	result->icu.lt = loc;
 	result->deterministic = deterministic;
 	result->collate_is_c = false;
 	result->ctype_is_c = false;
@@ -573,6 +605,39 @@ strfold_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
 }
 
 /*
+ * For historical compatibility, behavior is not multibyte-aware.
+ *
+ * NB: uses libc tolower() for single-byte encodings (also for historical
+ * compatibility), and therefore relies on the global LC_CTYPE setting.
+ */
+static size_t
+downcase_ident_icu(char *dst, size_t dstsize, const char *src,
+				   ssize_t srclen, pg_locale_t locale)
+{
+	int			i;
+	bool		libc_lower;
+	locale_t	lt = locale->icu.lt;
+
+	libc_lower = lt && (pg_database_encoding_max_length() == 1);
+
+	for (i = 0; i < srclen && i < dstsize; i++)
+	{
+		unsigned char ch = (unsigned char) src[i];
+
+		if (ch >= 'A' && ch <= 'Z')
+			ch = pg_ascii_tolower(ch);
+		else if (libc_lower && IS_HIGHBIT_SET(ch) && isupper_l(ch, lt))
+			ch = tolower_l(ch, lt);
+		dst[i] = (char) ch;
+	}
+
+	if (i < dstsize)
+		dst[i] = '\0';
+
+	return srclen;
+}
+
+/*
  * strncoll_icu_utf8
  *
  * Call ucol_strcollUTF8() or ucol_strcoll() as appropriate for the given