diff options
| -rw-r--r-- | expected/validate.out | 6 | ||||
| -rw-r--r-- | json.c | 9 | ||||
| -rw-r--r-- | util.c | 7 | ||||
| -rw-r--r-- | util.h | 1 |
4 files changed, 8 insertions, 15 deletions
diff --git a/expected/validate.out b/expected/validate.out index 23fa35b..3ff190b 100644 --- a/expected/validate.out +++ b/expected/validate.out @@ -39,8 +39,8 @@ SELECT json_validate(string), string FROM test_strings; f | "\UD834\UDD1E" f | "\uDB00" f | "\uDB00\uDBFF" - f | "\uFFFE" - f | "\uFFFF" + t | "\uFFFE" + t | "\uFFFF" f | . t | "" t | [] @@ -125,7 +125,7 @@ SELECT json_validate(string), string FROM test_strings; t | "\uD834\uDD1E" t | "\uDBFF\uDFFF" t | "\uFFFD" - f | "\uFFFF" + t | "\uFFFF" f | hello t | [32, 1] f | [32, @@ -14,6 +14,8 @@ #include <ctype.h> +#include "mb/pg_wchar.h" + #define is_internal(node) ((node)->type == JSON_ARRAY || (node)->type == JSON_OBJECT) /* We can't use isspace() because it also accepts \v and \f, which @@ -786,12 +788,11 @@ json_decode_string(const char **sp, size_t *length, bool strict) uc = 0x10000 | ((uc & 0x3FF) << 10) | (lc & 0x3FF); } - /* 0xFFFE and 0xFFFF are invalid Unicode */ - if (uc == 0xFFFE || uc == 0xFFFF) - goto failed; + unicode_to_utf8(uc, (unsigned char *) buf); + len = pg_utf_mblen((unsigned char *) buf); - len = utf8_encode_char(buf, uc); Assert(len > 0); + appendBinaryStringInfo(&ret, buf, len); continue; /* Continue the enclosing while loop to skip @@ -193,13 +193,6 @@ utf8_validate(const char *str, size_t length) return true; } -int -utf8_encode_char(char *out, unsigned int uc) -{ - unicode_to_utf8(uc, (unsigned char *) out); - return pg_utf_mblen((unsigned char *) out); -} - char * server_to_utf8(const char *str, int len) { @@ -60,7 +60,6 @@ size_t utf8_substring(const char *src, size_t srcbytes, const char **out_start, size_t *out_bytes); void utf8_decode_char_nocheck(const char **sp, unsigned int *uc); bool utf8_validate(const char *str, size_t length); -int utf8_encode_char(char *out, unsigned int uc); /* * Adaptations of pg_do_encoding_conversion for simplifying UTF-8 conversions. |
