diff options
Diffstat (limited to 'util.c')
| -rw-r--r-- | util.c | 64 |
1 files changed, 64 insertions, 0 deletions
@@ -94,3 +94,67 @@ utf8_substring( *out_bytes = sub_end - sub_start; return sub_length; } + +static const bool utf8_allow_surrogates = false; + +void +utf8_decode_char_nocheck(const char **sp, unsigned int *uc) +{ + const unsigned char *s = (const unsigned char *) *sp; + unsigned char c = *s++; + unsigned int len; + unsigned char sf[4] = {0xFF, 0x1F, 0xF, 0x7}; + + if (c < 0x80) + len = 0; + else if (c < 0xE0) + len = 1; + else if (c < 0xF0) + len = 2; + else + len = 3; + + *uc = c & sf[len]; + while (len--) + { + *uc <<= 6; + *uc |= *s++ & 0x3F; + } + + *sp = (const char *) s; +} + +bool +utf8_validate(const char *str, size_t length) +{ + const unsigned char *s = (const unsigned char *) str; + const unsigned char *e = s + length; + int len; + + while (s < e) + { + if (*s <= 0x7F) + { + s++; + continue; + } + + len = pg_utf_mblen(s); + if (s + len > e) + return false; + + if (!pg_utf8_islegal(s, len)) + return false; + + s += len; + } + + return true; +} + +int +utf8_encode_char(char *out, unsigned int uc) +{ + unicode_to_utf8(uc, (unsigned char *) out); + return pg_utf_mblen((unsigned char *) out); +} |
