diff options
| author | Joey Adams | 2010-07-23 22:01:36 +0000 |
|---|---|---|
| committer | Joey Adams | 2010-07-23 22:01:36 +0000 |
| commit | 8a019e04b817ffca00ec9759f3d12c60c808e67f (patch) | |
| tree | 59846b7f57946d2b2c9a9208a34435e2ef43a178 /json.c | |
| parent | f5cbbbe875326dbe5d0c43bc3c692f64f77a30bd (diff) | |
Ran pg_indent and made a few purely cosmetic changes (before running pg_indent again).
Diffstat (limited to 'json.c')
| -rw-r--r-- | json.c | 1100 |
1 files changed, 671 insertions, 429 deletions
@@ -29,22 +29,26 @@ #define JSON_malloc palloc /* repalloc and pfree can't take a null pointer, unlike normal realloc and free. */ -static void *JSON_realloc(void *ptr, Size size) +static void * +JSON_realloc(void *ptr, Size size) { if (ptr) return repalloc(ptr, size); else return palloc(size); } -static void JSON_free(void *ptr) +static void +JSON_free(void *ptr) { if (ptr) pfree(ptr); } -static char *JSON_strdup(const char *str, size_t length) +static char * +JSON_strdup(const char *str, size_t length) { - char *ret = JSON_malloc(length + 1); + char *ret = JSON_malloc(length + 1); + memcpy(ret, str, length); ret[length] = 0; return ret; @@ -56,22 +60,29 @@ static char *JSON_strdup(const char *str, size_t length) aren't legal whitespace characters in strict JSON. */ #define is_whitespace(c) ((c)==' ' || (c)=='\t' || (c)=='\n' || (c)=='\r') -static void skip_whitespace(const char **sp) +static void +skip_whitespace(const char **sp) { const char *s = *sp; + while (is_whitespace(*s)) s++; *sp = s; } -static char end_parenthesis(json_node *node) +static char +end_parenthesis(json_node * node) { if (!node) return 0; - switch (node->type) { - case JSON_ARRAY: return ']'; - case JSON_OBJECT: return '}'; - default: return 0; + switch (node->type) + { + case JSON_ARRAY: + return ']'; + case JSON_OBJECT: + return '}'; + default: + return 0; } } @@ -80,15 +91,17 @@ static char end_parenthesis(json_node *node) * Writes the result to *out . * Returns true on success, false on failure. */ -static bool read_hex16(const char *in, unsigned int *out) +static bool +read_hex16(const char *in, unsigned int *out) { unsigned int i; unsigned int tmp; - char c; - + char c; + *out = 0; - - for (i=0; i<4; i++) { + + for (i = 0; i < 4; i++) + { c = *in++; if (c >= '0' && c <= '9') tmp = c - '0'; @@ -98,17 +111,19 @@ static bool read_hex16(const char *in, unsigned int *out) tmp = c - 'a' + 10; else return false; - + *out <<= 4; *out += tmp; } - + return true; } -static void write_hex16(char *out, unsigned int val) +static void +write_hex16(char *out, unsigned int val) { const char *hex = "0123456789ABCDEF"; + *out++ = hex[(val >> 12) & 0xF]; *out++ = hex[(val >> 8) & 0xF]; *out++ = hex[(val >> 4) & 0xF]; @@ -117,16 +132,17 @@ static void write_hex16(char *out, unsigned int val) static bool utf8_validate(const char *str, size_t length); static void utf8_decode_char_nocheck(const char **sp, unsigned int *uc); -static int utf8_encode_char(char *out, unsigned int uc); +static int utf8_encode_char(char *out, unsigned int uc); /*************************** String buffer ***************************/ -typedef struct { - char *buffer; - size_t length; - size_t alloc; -} String[1]; +typedef struct +{ + char *buffer; + size_t length; + size_t alloc; +} String[1]; /* Declare and initialize a String with the given name. */ #define String(name) String name = NewString() @@ -136,12 +152,15 @@ typedef struct { /* Grow the string by @need characters, reallocating if necessary. * Returns a pointer to the uninitialized range where text is to go. * A '\0' terminator is added automatically. */ -static char *string_grow(String str, size_t need) +static char * +string_grow(String str, size_t need) { - size_t end = str->length; + size_t end = str->length; + str->length += need; - if (str->alloc <= str->length) { - str->alloc = str->length*3/2 + 1; + if (str->alloc <= str->length) + { + str->alloc = str->length * 3 / 2 + 1; if (str->alloc < 8) str->alloc = 8; str->buffer = JSON_realloc(str->buffer, str->alloc); @@ -149,35 +168,43 @@ static char *string_grow(String str, size_t need) str->buffer[str->length] = '\0'; return str->buffer + end; } -static char *string_buffer(String str) +static char * +string_buffer(String str) { if (!str->buffer) string_grow(str, 0); return str->buffer; } -static inline void string_append_length(String str, const char *append, size_t len) +static inline void +string_append_length(String str, const char *append, size_t len) { - char *dest = string_grow(str, len); + char *dest = string_grow(str, len); + memcpy(dest, append, len); } -static inline void string_append(String str, const char *append) +static inline void +string_append(String str, const char *append) { string_append_length(str, append, strlen(append)); } -static inline void string_append_range(String str, const char *start, const char *end) +static inline void +string_append_range(String str, const char *start, const char *end) { string_append_length(str, start, end - start); } -static inline void string_append_char(String str, char c) +static inline void +string_append_char(String str, char c) { *string_grow(str, 1) = c; } -static inline void string_trunc(String str, size_t len) +static inline void +string_trunc(String str, size_t len) { str->length = len; str->buffer[len] = '\0'; } -static inline void string_free(String str) +static inline void +string_free(String str) { JSON_free(str->buffer); } @@ -185,34 +212,43 @@ static inline void string_free(String str) /*********** json_node creation, manipulation, and deletion **********/ -json_node *json_mknode(json_type type) +json_node * +json_mknode(json_type type) { - json_node *node = JSON_malloc(sizeof(*node)); + json_node *node = JSON_malloc(sizeof(*node)); + memset(node, 0, sizeof(*node)); node->type = type; return node; } -json_node *json_mkbool(bool v_bool) +json_node * +json_mkbool(bool v_bool) { - json_node *node = json_mknode(JSON_BOOL); + json_node *node = json_mknode(JSON_BOOL); + node->v.v_bool = v_bool; return node; } -json_node *json_mkstring(const char *str, size_t length) +json_node * +json_mkstring(const char *str, size_t length) { - json_node *node = json_mknode(JSON_STRING); - if (str) { + json_node *node = json_mknode(JSON_STRING); + + if (str) + { node->v.string.str = JSON_strdup(str, length); node->v.string.length = length; } return node; } -json_node *json_mknumber(const char *number, size_t length) +json_node * +json_mknumber(const char *number, size_t length) { - json_node *node = json_mknode(JSON_NUMBER); + json_node *node = json_mknode(JSON_NUMBER); + if (number) node->v.number = JSON_strdup(number, length); return node; @@ -224,47 +260,55 @@ json_node *json_mknumber(const char *number, size_t length) * Call json_touch_value so that json_encode(, JSONOPT_ORIG) * will encode the new value rather than using original text. */ -void json_touch_value(json_node *node) +void +json_touch_value(json_node * node) { - while (node && node->orig.value.start) { + while (node && node->orig.value.start) + { node->orig.value.start = NULL; node = node->parent; } } -static void json_append_notouch(json_node *parent, json_node *child) +static void +json_append_notouch(json_node * parent, json_node * child) { - Assert(parent->type==JSON_ARRAY || parent->type==JSON_OBJECT); + Assert(parent->type == JSON_ARRAY || parent->type == JSON_OBJECT); Assert(child->parent == NULL); - + parent->v.children.count++; child->parent = parent; child->prev = parent->v.children.tail; child->next = NULL; - - if (parent->v.children.tail) { + + if (parent->v.children.tail) + { parent->v.children.tail->next = child; parent->v.children.tail = child; - } else { + } + else + { parent->v.children.head = parent->v.children.tail = child; } } -void json_append(json_node *parent, json_node *child) +void +json_append(json_node * parent, json_node * child) { json_append_notouch(parent, child); json_touch_value(parent); } -void json_remove(json_node *node) +void +json_remove(json_node * node) { - json_node *parent = node->parent; - + json_node *parent = node->parent; + if (!parent) return; - Assert(parent->type==JSON_ARRAY || parent->type==JSON_OBJECT); + Assert(parent->type == JSON_ARRAY || parent->type == JSON_OBJECT); Assert(parent->v.children.count > 0); - + if (node->prev) node->prev->next = node->next; else @@ -273,7 +317,7 @@ void json_remove(json_node *node) node->next->prev = node->prev; else parent->v.children.tail = node->prev; - + parent->v.children.count--; node->parent = NULL; node->prev = NULL; @@ -282,17 +326,19 @@ void json_remove(json_node *node) json_touch_value(parent); } -void json_replace_value(json_node *node, json_node *replacement) +void +json_replace_value(json_node * node, json_node * replacement) { - node->type = replacement->type; - node->v = replacement->v; - node->orig.value = replacement->orig.value; + node->type = replacement->type; + node->v = replacement->v; + node->orig.value = replacement->orig.value; if (node->parent) json_touch_value(node->parent); } -const char *json_get_string(json_node *node, size_t *length_out) +const char * +json_get_string(json_node * node, size_t *length_out) { Assert(node->type == JSON_STRING); if (length_out) @@ -300,28 +346,34 @@ const char *json_get_string(json_node *node, size_t *length_out) return node->v.string.str; } -void json_set_string(json_node *node, const char *str, size_t length) +void +json_set_string(json_node * node, const char *str, size_t length) { Assert(node->type == JSON_STRING); if (node->v.string.str) JSON_free(node->v.string.str); - if (str) { + if (str) + { node->v.string.str = JSON_strdup(str, length); node->v.string.length = length; - } else { + } + else + { node->v.string.str = NULL; node->v.string.length = 0; } json_touch_value(node); } -const char *json_get_number(json_node *node) +const char * +json_get_number(json_node * node) { Assert(node->type == JSON_NUMBER); return node->v.number; } -void json_set_number(json_node *node, const char *number, size_t length) +void +json_set_number(json_node * node, const char *number, size_t length) { Assert(node->type == JSON_NUMBER); if (node->v.number) @@ -334,7 +386,8 @@ void json_set_number(json_node *node, const char *number, size_t length) } /* Non-recursively free a node */ -static void free_node(json_node *node) +static void +free_node(json_node * node) { if (node->type == JSON_STRING) JSON_free(node->v.string.str); @@ -345,34 +398,36 @@ static void free_node(json_node *node) JSON_free(node); } -void json_delete(json_node *node) +void +json_delete(json_node * node) { - json_node *parent, *next; - + json_node *parent, + *next; + if (!node) return; - + /* Remove node from parent (if it has one). */ json_remove(node); - + goto descend; - + descend: while (is_internal(node) && node->v.children.head) node = node->v.children.head; goto advance; - + advance: parent = node->parent; next = node->next; free_node(node); node = next; - + if (node) goto descend; else goto ascend; - + ascend: node = parent; if (node) @@ -386,39 +441,46 @@ ascend: static json_node *decode_leaf(const char **sp); static json_node *decode_number(const char **sp); -char *json_decode_string(const char **sp, size_t *length, bool strict); +char *json_decode_string(const char **sp, size_t *length, bool strict); + /* json_decode_string has a different signature than its friends because it's also used to parse object member keys. It's also useful outside of json.c, such as in jsonpath.c . */ -bool json_validate(const char *str) +bool +json_validate(const char *str) { - json_node *node = json_decode(str); + json_node *node = json_decode(str); + if (!node) return false; json_delete(node); return true; } -json_node *json_decode(const char *str) +json_node * +json_decode(const char *str) { - json_node *root = NULL, *parent = NULL, *node = NULL; - const char *s = str; - char *key; - size_t key_length; - struct json_node_orig orig; - bool expect_endp; + json_node *root = NULL, + *parent = NULL, + *node = NULL; + const char *s = str; + char *key; + size_t key_length; + struct json_node_orig orig; + bool expect_endp; if (!str) return NULL; - + if (!utf8_validate(str, strlen(str))) return NULL; expect_endp = false; goto item; - -item: /* Expect a value (set expect_endp before goto item; ) */ + +item: /* Expect a value (set expect_endp before goto + * item; ) */ key = NULL; key_length = 0; memset(&orig, 0, sizeof(orig)); @@ -428,12 +490,14 @@ item: /* Expect a value (set expect_endp before goto item; ) */ skip_whitespace(&s); - if (expect_endp) { + if (expect_endp) + { if (*s == ']' || *s == '}') goto endp; } - - if (parent && parent->type == JSON_OBJECT) { + + if (parent && parent->type == JSON_OBJECT) + { /* Parse member key string. */ orig.key_left_space.end = s; orig.key.start = s; @@ -444,7 +508,7 @@ item: /* Expect a value (set expect_endp before goto item; ) */ orig.key.end = s; orig.key_right_space.start = s; - + /* Eat the " : " */ skip_whitespace(&s); if (*s != ':') @@ -458,17 +522,18 @@ item: /* Expect a value (set expect_endp before goto item; ) */ } /* - * The way orig.value and company are initialized is a bit funky. - * If this node has children, we have to finish parsing the node's - * children before we know where it ends. Hence, initialization - * of orig.value_end and after will be deferred if this node has children. + * The way orig.value and company are initialized is a bit funky. If this + * node has children, we have to finish parsing the node's children before + * we know where it ends. Hence, initialization of orig.value_end and + * after will be deferred if this node has children. */ - + orig.left_space.end = s; orig.value.start = s; node = decode_leaf(&s); - if (!node) { + if (!node) + { if (*s == '[') node = json_mknode(JSON_ARRAY); else if (*s == '{') @@ -477,9 +542,13 @@ item: /* Expect a value (set expect_endp before goto item; ) */ goto failed; s++; - /* orig.value.end and later are dangling (actually NULL) for now, - but will be initialized when we get to state 'endp' . */ - } else { + /* + * orig.value.end and later are dangling (actually NULL) for now, but + * will be initialized when we get to state 'endp' . + */ + } + else + { orig.value.end = s; orig.right_space.start = s; @@ -487,48 +556,54 @@ item: /* Expect a value (set expect_endp before goto item; ) */ orig.right_space.end = s; } - + node->key = key; node->key_length = key_length; - /* The key now belongs to the node. This prevents a double free - on failure (see the failed: label). */ + /* + * The key now belongs to the node. This prevents a double free on + * failure (see the failed: label). + */ key = NULL; node->orig = orig; - + if (parent) json_append_notouch(parent, node); else root = node; - - if (is_internal(node)) { - /* "push" node onto the "stack". Nodes point up to their parents, - which is why this function doesn't need a "stack" per se. */ + + if (is_internal(node)) + { + /* + * "push" node onto the "stack". Nodes point up to their parents, + * which is why this function doesn't need a "stack" per se. + */ parent = node; expect_endp = true; goto item; } - + if (parent) goto comma_endp; else goto end; - -comma_endp: /* Expect a comma or end bracket/brace */ - if (*s == ',') { + +comma_endp: /* Expect a comma or end bracket/brace */ + if (*s == ',') + { s++; - + expect_endp = false; goto item; } if (*s == ']' || *s == '}') goto endp; - + goto failed; -endp: /* Handle an end bracket/brace */ +endp: /* Handle an end bracket/brace */ if (*s != end_parenthesis(parent)) goto failed; s++; @@ -537,26 +612,28 @@ endp: /* Handle an end bracket/brace */ node = parent; parent = parent->parent; - /* The other pointers were set when we started - parsing this node in the 'item' state. */ - node->orig.value.end = s; - node->orig.right_space.start = s; + /* + * The other pointers were set when we started parsing this node in the + * 'item' state. + */ + node->orig.value.end = s; + node->orig.right_space.start = s; skip_whitespace(&s); - node->orig.right_space.end = s; + node->orig.right_space.end = s; if (parent) goto comma_endp; else goto end; -end: /* Expect end of text */ +end: /* Expect end of text */ if (*s) goto failed; return node; - -failed: /* Handle failure */ + +failed: /* Handle failure */ if (key) JSON_free(key); json_delete(root); @@ -569,45 +646,52 @@ failed: /* Handle failure */ * * Returns NULL if next character is '[', '{', or invalid. */ -static json_node *decode_leaf(const char **sp) +static json_node * +decode_leaf(const char **sp) { - char c = **sp; - - if (c == '"') { - size_t length; - char *str = json_decode_string(sp, &length, true); - - if (str) { - json_node *node = json_mknode(JSON_STRING); + char c = **sp; + + if (c == '"') + { + size_t length; + char *str = json_decode_string(sp, &length, true); + + if (str) + { + json_node *node = json_mknode(JSON_STRING); + node->v.string.str = str; node->v.string.length = length; return node; } - + return NULL; } if ((c >= '0' && c <= '9') || c == '-') return decode_number(sp); - if (!strncmp(*sp, "true", 4)) { + if (!strncmp(*sp, "true", 4)) + { (*sp) += 4; return json_mkbool(true); } - if (!strncmp(*sp, "false", 5)) { + if (!strncmp(*sp, "false", 5)) + { (*sp) += 5; return json_mkbool(false); } - if (!strncmp(*sp, "null", 4)) { + if (!strncmp(*sp, "null", 4)) + { (*sp) += 4; return json_mknode(JSON_NULL); } - + return NULL; } /* * The JSON spec says that a number shall follow this precise pattern * (spaces and quotes added for readability): - * '-'? (0 | [1-9][0-9]*) ('.' [0-9]+)? ([Ee] [+-]? [0-9]+)? + * '-'? (0 | [1-9][0-9]*) ('.' [0-9]+)? ([Ee] [+-]? [0-9]+)? * * However, some JSON parsers are more liberal. For instance, PHP accepts * '.5' and '1.'. JSON.parse accepts '+3'. @@ -615,168 +699,208 @@ static json_node *decode_leaf(const char **sp) * This function takes the strict approach. The user should use * json_clean() to handle liberal JSON text. */ -static bool validate_number(const char **sp) +static bool +validate_number(const char **sp) { const char *s = *sp; - + /* '-'? */ if (*s == '-') s++; - + /* (0 | [1-9][0-9]*) */ - if (*s == '0') { + if (*s == '0') + { s++; - } else { + } + else + { if (!isdigit(*s)) return false; - do s++; while (isdigit(*s)); + do + s++; + while (isdigit(*s)); } /* ('.' [0-9]+)? */ - if (*s == '.') { + if (*s == '.') + { s++; if (!isdigit(*s)) return false; - do s++; while (isdigit(*s)); + do + s++; + while (isdigit(*s)); } - + /* ([Ee] [+-]? [0-9]+)? */ - if (*s=='E' || *s=='e') { + if (*s == 'E' || *s == 'e') + { s++; - if (*s=='+' || *s=='-') + if (*s == '+' || *s == '-') s++; if (!isdigit(*s)) return false; - do s++; while (isdigit(*s)); + do + s++; + while (isdigit(*s)); } - + *sp = s; return true; } -static json_node *decode_number(const char **sp) +static json_node * +decode_number(const char **sp) { - const char *start, *end; - + const char *start, + *end; + start = *sp; if (!validate_number(sp)) return NULL; end = *sp; - + return json_mknumber(start, end - start); } -char *json_decode_string(const char **sp, size_t *length, bool strict) +char * +json_decode_string(const char **sp, size_t *length, bool strict) { const char *s = *sp; + String(ret); - char *out; - size_t size; - char quote; + char *out; + size_t size; + char quote; Assert(length != NULL); - + quote = *s++; - if (strict) { + if (strict) + { if (quote != '"') return NULL; - } else { + } + else + { if (quote != '"' && quote != '\'') return NULL; } - while (*s && *s != quote) { + while (*s && *s != quote) + { unsigned char c = *s++; - unsigned int uc, lc; - - if (c == '\\') { + unsigned int uc, + lc; + + if (c == '\\') + { c = *s++; - switch (c) { + switch (c) + { case '\\': case '/': break; - case 'b': c = '\b'; break; - case 'f': c = '\f'; break; - case 'n': c = '\n'; break; - case 'r': c = '\r'; break; - case 't': c = '\t'; break; + case 'b': + c = '\b'; + break; + case 'f': + c = '\f'; + break; + case 'n': + c = '\n'; + break; + case 'r': + c = '\r'; + break; + case 't': + c = '\t'; + break; case 'u': size = ret->length; out = string_grow(ret, 4); - + if (!read_hex16(s, &uc)) goto failed; s += 4; - - if (uc >= 0xD800 && uc <= 0xDFFF) { + + if (uc >= 0xD800 && uc <= 0xDFFF) + { /* Handle UTF-16 surrogate pair. */ - + if (uc >= 0xDC00) - goto failed; /* Second surrogate not preceded by - first surrogate. */ - + goto failed; /* Second surrogate not + * preceded by first + * surrogate. */ + if (s[0] != '\\' || s[1] != 'u' - || !read_hex16(s+2, &lc) - || !(lc >= 0xDC00 && lc <= 0xDFFF)) - goto failed; /* First surrogate not followed by - second surrogate. */ - + || !read_hex16(s + 2, &lc) + || !(lc >= 0xDC00 && lc <= 0xDFFF)) + goto failed; /* First surrogate not + * followed by second + * surrogate. */ + s += 6; - + uc = 0x10000 | ((uc & 0x3FF) << 10) | (lc & 0x3FF); } - + /* 0xFFFE and 0xFFFF are invalid Unicode */ if (uc == 0xFFFE || uc == 0xFFFF) goto failed; - + size += utf8_encode_char(out, uc); string_trunc(ret, size); - - continue; /* Continue the enclosing while loop to skip - the str_append below. */ - default: /* Invalid escape */ + + continue; /* Continue the enclosing while loop to skip + * the str_append below. */ + default: /* Invalid escape */ if (c == quote) break; if (!strict && (c == '"' || c == '\'')) break; - goto failed; /* Invalid escape */ + goto failed; /* Invalid escape */ } - } else if (c <= 0x1F) { + } + else if (c <= 0x1F) + { /* Control characters not allowed in string literals. */ goto failed; } string_append_char(ret, c); } - + if (!*s++) goto failed; - + *length = ret->length; *sp = s; return string_buffer(ret); - + failed: string_free(ret); return NULL; } -json_type json_text_type(const char *str, size_t nbytes) +json_type +json_text_type(const char *str, size_t nbytes) { const char *s = str; const char *e = str + nbytes; - char c; - + char c; + /* Skip whitespace characters. */ while (s < e && is_whitespace(*s)) s++; - + /* Get first non-white character, making sure it's in bounds. */ if (s >= e) return JSON_INVALID; c = *s; - - switch (c) { + + switch (c) + { case 'n': return JSON_NULL; case '"': @@ -798,7 +922,8 @@ json_type json_text_type(const char *str, size_t nbytes) /****************************** Encoding *****************************/ -static bool encode_string(String out, const char *string, size_t length, char quote, bool escape_unicode) +static bool +encode_string(String out, const char *string, size_t length, char quote, bool escape_unicode) { const char *s = string; const char *e = s + length; @@ -807,74 +932,97 @@ static bool encode_string(String out, const char *string, size_t length, char qu return false; string_append_char(out, quote); - - while (s < e) { + + while (s < e) + { unsigned char c = *s++; unsigned char e; - - switch (c) { - case '\\': e = '\\'; break; - case '\b': e = 'b'; break; - case '\f': e = 'f'; break; - case '\n': e = 'n'; break; - case '\r': e = 'r'; break; - case '\t': e = 't'; break; - default: { - if (c == quote) { - e = quote; - break; - } - if (c < 0x1F || (c >= 0x80 && escape_unicode)) { - /* Encode using \u.... */ - unsigned int uc, lc; - char txt[13]; - - s--; - utf8_decode_char_nocheck(&s, &uc); - - txt[0] = '\\'; - txt[1] = 'u'; - txt[6] = '\\'; - txt[7] = 'u'; - if (uc <= 0xFFFF) { - write_hex16(txt+2, uc); - txt[6] = '\0'; - } else { - uc -= 0x10000; - lc = uc & 0x3FF; - uc = uc >> 10; - uc |= 0xD800; - lc |= 0xDC00; - write_hex16(txt+2, uc); - write_hex16(txt+8, lc); - txt[12] = '\0'; + + switch (c) + { + case '\\': + e = '\\'; + break; + case '\b': + e = 'b'; + break; + case '\f': + e = 'f'; + break; + case '\n': + e = 'n'; + break; + case '\r': + e = 'r'; + break; + case '\t': + e = 't'; + break; + default: + { + if (c == quote) + { + e = quote; + break; + } + if (c < 0x1F || (c >= 0x80 && escape_unicode)) + { + /* Encode using \u.... */ + unsigned int uc, + lc; + char txt[13]; + + s--; + utf8_decode_char_nocheck(&s, &uc); + + txt[0] = '\\'; + txt[1] = 'u'; + txt[6] = '\\'; + txt[7] = 'u'; + if (uc <= 0xFFFF) + { + write_hex16(txt + 2, uc); + txt[6] = '\0'; + } + else + { + uc -= 0x10000; + lc = uc & 0x3FF; + uc = uc >> 10; + uc |= 0xD800; + lc |= 0xDC00; + write_hex16(txt + 2, uc); + write_hex16(txt + 8, lc); + txt[12] = '\0'; + } + + string_append(out, txt); + continue; /* Skip backslash-encoding code below. */ } - - string_append(out, txt); - continue; /* Skip backslash-encoding code below. */ + e = 0; } - e = 0; - } } - + string_append_char(out, e ? '\\' : c); if (e) string_append_char(out, e); } - + string_append_char(out, quote); - + return true; } -static bool encode_number(String out, const char *string) +static bool +encode_number(String out, const char *string) { const char *s = string; - const char *start, *end; - + const char *start, + *end; + if (!string) return false; - + /* Validate number, trimming whitespace. */ skip_whitespace(&s); start = s; @@ -884,31 +1032,34 @@ static bool encode_number(String out, const char *string) skip_whitespace(&s); if (*s != '\0') return false; - + /* Append number to out */ - string_append_length(out, start, end-start); - + string_append_length(out, start, end - start); + return true; } -typedef struct { - String str; - bool use_orig; - bool escape_unicode; - bool trim; -} json_encode_ctx; +typedef struct +{ + String str; + bool use_orig; + bool escape_unicode; + bool trim; +} json_encode_ctx; -static bool json_encode_recurse(json_node *node, json_encode_ctx *ctx); +static bool json_encode_recurse(json_node * node, json_encode_ctx * ctx); -char *json_encode(json_node *node, int options) +char * +json_encode(json_node * node, int options) { json_encode_ctx ctx = { NewString(), !!(options & JSONOPT_USE_ORIG), !!(options & JSONOPT_ESCAPE_UNICODE), - !(options & JSONOPT_NO_TRIM)}; + !(options & JSONOPT_NO_TRIM)}; - if (!json_encode_recurse(node, &ctx)) { + if (!json_encode_recurse(node, &ctx)) + { string_free(ctx.str); return NULL; } @@ -916,28 +1067,34 @@ char *json_encode(json_node *node, int options) return string_buffer(ctx.str); } -static bool json_encode_recurse(json_node *node, json_encode_ctx *ctx) +static bool +json_encode_recurse(json_node * node, json_encode_ctx * ctx) { - #define has_orig(field) \ +#define has_orig(field) \ (use_orig && node->orig.field.start) - #define push_orig(field) \ +#define push_orig(field) \ string_append_range(ctx->str, node->orig.field.start, node->orig.field.end) - bool use_orig = ctx->use_orig; - bool trim = ctx->trim; + bool use_orig = ctx->use_orig; + bool trim = ctx->trim; - ctx->trim = false; /* Don't trim internal nodes, just the root node. */ + ctx->trim = false; /* Don't trim internal nodes, just the root + * node. */ if (!trim && has_orig(left_space)) push_orig(left_space); - if (has_orig(value)) { + if (has_orig(value)) + { push_orig(value); - } else { - const char *txt = NULL; - json_node *child; - - switch (node->type) { + } + else + { + const char *txt = NULL; + json_node *child; + + switch (node->type) + { case JSON_NULL: txt = "null"; break; @@ -948,11 +1105,11 @@ static bool json_encode_recurse(json_node *node, json_encode_ctx *ctx) txt = "false"; break; case JSON_STRING: - if (!encode_string( ctx->str, - node->v.string.str, - node->v.string.length, - '"', - ctx->escape_unicode)) + if (!encode_string(ctx->str, + node->v.string.str, + node->v.string.length, + '"', + ctx->escape_unicode)) return false; break; case JSON_NUMBER: @@ -962,7 +1119,8 @@ static bool json_encode_recurse(json_node *node, json_encode_ctx *ctx) case JSON_ARRAY: string_append_char(ctx->str, '['); - json_foreach(child, node) { + json_foreach(child, node) + { json_encode_recurse(child, ctx); if (child->next) string_append_char(ctx->str, ','); @@ -973,23 +1131,29 @@ static bool json_encode_recurse(json_node *node, json_encode_ctx *ctx) case JSON_OBJECT: string_append_char(ctx->str, '{'); - json_foreach(child, node) { - /* Shadows the parent node (assigned to the variable @node) - * so we can use our macros on the child node instead. - * Hurray for lexical scoping! */ - json_node *node = child; + json_foreach(child, node) + { + /* + * Shadows the parent node (assigned to the variable + * @node) so we can use our macros on the child node + * instead. Hurray for lexical scoping! + */ + json_node *node = child; if (has_orig(key_left_space)) push_orig(key_left_space); - if (has_orig(key)) { + if (has_orig(key)) + { push_orig(key); - } else { - if (!encode_string( ctx->str, - node->key, - node->key_length, - '"', - ctx->escape_unicode)) + } + else + { + if (!encode_string(ctx->str, + node->key, + node->key_length, + '"', + ctx->escape_unicode)) return false; } @@ -1019,15 +1183,17 @@ static bool json_encode_recurse(json_node *node, json_encode_ctx *ctx) return true; - #undef has_orig - #undef push_orig +#undef has_orig +#undef push_orig } -char *json_encode_string(const char *str, size_t length, char quote, bool escape_unicode) +char * +json_encode_string(const char *str, size_t length, char quote, bool escape_unicode) { String(ret); - if (!encode_string(ret, str, length, quote, escape_unicode)) { + if (!encode_string(ret, str, length, quote, escape_unicode)) + { string_free(ret); return NULL; } @@ -1038,54 +1204,63 @@ char *json_encode_string(const char *str, size_t length, char quote, bool escape /************************ Liberal JSON support ***********************/ -bool json_validate_liberal(const char *str) +bool +json_validate_liberal(const char *str) { - char *cleaned = json_cleanup(str); - json_node *node = json_decode(cleaned); - bool ret = !!node; + char *cleaned = json_cleanup(str); + json_node *node = json_decode(cleaned); + bool ret = !!node; if (node) json_delete(node); if (cleaned) JSON_free(cleaned); - + return ret; } -char *json_cleanup(const char *str) +char * +json_cleanup(const char *str) { String(ret); const char *p = str; const char *s = str; - int comment_start_width = 0; - char quote_char = 0; - /* flush(): flush content we have scanned, meaning append characters - * from p thru s to ret, then set p to s. */ - #define flush() do { \ + int comment_start_width = 0; + char quote_char = 0; + + /* + * flush(): flush content we have scanned, meaning append characters from + * p thru s to ret, then set p to s. + */ +#define flush() do { \ string_append_length(ret, p, s-p); \ p = s; \ } while(0) - + if (!str) return NULL; - + goto begin; - + begin: - for (;*s; s++) { + for (; *s; s++) + { if (*s == '"' || *s == '\'') goto quote; - if (isdigit(*s) || *s=='-' || *s=='+' || *s=='.') + if (isdigit(*s) || *s == '-' || *s == '+' || *s == '.') goto number; - if (s[0]=='#') { + if (s[0] == '#') + { comment_start_width = 1; goto line_comment; } - if (s[0]=='/' && s[1]=='/') { + if (s[0] == '/' && s[1] == '/') + { comment_start_width = 2; goto line_comment; } - if (s[0]=='/' && s[1]=='*') { + if (s[0] == '/' && s[1] == '*') + { comment_start_width = 2; goto c_comment; } @@ -1095,44 +1270,61 @@ begin: quote: quote_char = *s; - if (*s == '\'') { + if (*s == '\'') + { flush(); string_append_char(ret, '"'); - p = s = s+1; - } else { + p = s = s + 1; + } + else + { s++; } - while (*s) { - if (*s == quote_char) { - if (*s == '\'') { + while (*s) + { + if (*s == quote_char) + { + if (*s == '\'') + { flush(); string_append_char(ret, '"'); - p = s = s+1; - } else { + p = s = s + 1; + } + else + { s++; } break; - } else if (*s == '"') { - /* We're converting single quotes to double quotes, - * so double quotes need to be automatically escaped. */ + } + else if (*s == '"') + { + /* + * We're converting single quotes to double quotes, so double + * quotes need to be automatically escaped. + */ flush(); string_append_char(ret, '\\'); s++; - } else if (*s == '\\') { + } + else if (*s == '\\') + { s++; - switch (*s) { + switch (*s) + { case '\0': break; case '\'': /* Convert \' to \u0027 */ flush(); string_append(ret, "u0027"); - p = s = s+1; + p = s = s + 1; break; default: s++; } - } else { + } + else + { s++; } } @@ -1140,80 +1332,108 @@ quote: number: /* Skip a '-', or remove a '+' if present. */ - if (*s == '-') { + if (*s == '-') + { s++; - } else if (*s == '+') { + } + else if (*s == '+') + { flush(); - p = s = s+1; + p = s = s + 1; } /* Make sure number has at least one digit. */ - if (!isdigit(*s)) { + if (!isdigit(*s)) + { if (*s != '.') goto failed; if (!isdigit(s[1])) goto failed; } - /* Make sure that if first digit before '.' is '0', that it is the only digit. - * Leading 0s are not allowed, and for a good reason: to avoid ambiguity - * between octal and decimal formats. */ - if (*s == '0') { + + /* + * Make sure that if first digit before '.' is '0', that it is the only + * digit. Leading 0s are not allowed, and for a good reason: to avoid + * ambiguity between octal and decimal formats. + */ + if (*s == '0') + { s++; if (isdigit(*s)) goto failed; goto frac; } /* Skip digits, or add a '0' if none are present. */ - if (isdigit(*s)) { - do s++; while (isdigit(*s)); - } else { + if (isdigit(*s)) + { + do + s++; + while (isdigit(*s)); + } + else + { flush(); string_append_char(ret, '0'); } goto frac; - + frac: - if (*s == '.') { + if (*s == '.') + { s++; - if (isdigit(*s)) { - do s++; while (isdigit(*s)); - } else { + if (isdigit(*s)) + { + do + s++; + while (isdigit(*s)); + } + else + { flush(); string_append_char(ret, '0'); } } /* exp: */ - if (*s=='E' || *s=='e') { + if (*s == 'E' || *s == 'e') + { s++; - if (*s=='+' || *s=='-') + if (*s == '+' || *s == '-') s++; if (!isdigit(*s)) goto failed; - do s++; while (isdigit(*s)); + do + s++; + while (isdigit(*s)); } - /* The isdigit check is not needed, but here - * for clarity and safety. */ - if (isdigit(*s) || *s=='-' || *s=='+' || *s=='.') + + /* + * The isdigit check is not needed, but here for clarity and safety. + */ + if (isdigit(*s) || *s == '-' || *s == '+' || *s == '.') goto failed; goto begin; -line_comment: /* Remove all characters up to newline */ +line_comment: /* Remove all characters up to newline */ flush(); s += comment_start_width; /* Skip characters up to newline */ while (*s && !(*s == '\n' || *s == '\r')) s++; /* Skip newline character and its complement (if present) */ - if (s[0]) { - if (s[1] == '\n'+'\r'-s[0]) + if (s[0]) + { + if (s[1] == '\n' + '\r' - s[0]) s++; s++; } - /* Set begin marker so characters skipped are not - * appended to output on next flush. */ + + /* + * Set begin marker so characters skipped are not appended to output on + * next flush. + */ p = s; goto begin; - -c_comment: /* Remove all characters up to star-slash */ + +c_comment: /* Remove all characters up to star-slash */ flush(); s += comment_start_width; /* Skip characters up to and including star-slash */ @@ -1222,17 +1442,20 @@ c_comment: /* Remove all characters up to star-slash */ if (*s) s += 2; else - goto failed; /* No star-slash present */ - /* Set begin marker so characters skipped are not - * appended to output on next flush. */ + goto failed; /* No star-slash present */ + + /* + * Set begin marker so characters skipped are not appended to output on + * next flush. + */ p = s; goto begin; failed: string_free(ret); return NULL; - - #undef flush + +#undef flush } @@ -1240,13 +1463,14 @@ failed: static const bool utf8_allow_surrogates = false; -static void utf8_decode_char_nocheck(const char **sp, unsigned int *uc) +static void +utf8_decode_char_nocheck(const char **sp, unsigned int *uc) { - const unsigned char *s = (const unsigned char *)*sp; + const unsigned char *s = (const unsigned char *) *sp; unsigned char c = *s++; unsigned int len; unsigned char sf[4] = {0xFF, 0x1F, 0xF, 0x7}; - + if (c < 0x80) len = 0; else if (c < 0xE0) @@ -1255,64 +1479,74 @@ static void utf8_decode_char_nocheck(const char **sp, unsigned int *uc) len = 2; else len = 3; - + *uc = c & sf[len]; - while (len--) { + while (len--) + { *uc <<= 6; *uc |= *s++ & 0x3F; } - - *sp = (const char*)s; + + *sp = (const char *) s; } -static bool utf8_validate(const char *str, size_t length) +static bool +utf8_validate(const char *str, size_t length) { - const unsigned char *s = (const unsigned char*)str; + const unsigned char *s = (const unsigned char *) str; const unsigned char *e = s + length; - - while (s < e) { + + while (s < e) + { unsigned char c = *s++; - unsigned int len; /* number of bytes in sequence - 2 */ - + unsigned int len; /* number of bytes in sequence - 2 */ + /* If character is ASCII, move on. */ if (c < 0x80) continue; - + if (s >= e) - return false; /* Missing bytes in sequence. */ - - if (c < 0xE0) { - /* 2-byte sequence, U+0080 to U+07FF - c must be 11000010 or higher - s[0] must be 10xxxxxx */ + return false; /* Missing bytes in sequence. */ + + if (c < 0xE0) + { + /* + * 2-byte sequence, U+0080 to U+07FF c must be 11000010 or higher + * s[0] must be 10xxxxxx + */ len = 0; if (c < 0xC2) return false; - } else if (c < 0xF0) { - /* 3-byte sequence, U+0800 to U+FFFF - Note that the surrogate range is U+D800 to U+DFFF, - and that U+FFFE and U+FFFF are illegal characters. - c must be >= 11100000 (which it is) - If c is 11100000, then s[0] must be >= 10100000 - If the global parameter utf8_allow_surrogates is false: - If c is 11101101 and s[0] is >= 10100000, - then this is a surrogate and we should fail. - If c is 11101111, s[0] is 10111111, and s[1] >= 10111110, - then this is an illegal character and we should fail. - s[0] and s[1] must be 10xxxxxx */ + } + else if (c < 0xF0) + { + /* + * 3-byte sequence, U+0800 to U+FFFF Note that the surrogate range + * is U+D800 to U+DFFF, and that U+FFFE and U+FFFF are illegal + * characters. c must be >= 11100000 (which it is) If c is + * 11100000, then s[0] must be >= 10100000 If the global parameter + * utf8_allow_surrogates is false: If c is 11101101 and s[0] is >= + * 10100000, then this is a surrogate and we should fail. If c is + * 11101111, s[0] is 10111111, and s[1] >= 10111110, then this is + * an illegal character and we should fail. s[0] and s[1] must be + * 10xxxxxx + */ len = 1; if (c == 0xE0 && *s < 0xA0) return false; if (!utf8_allow_surrogates && c == 0xED && *s >= 0xA0) return false; - if (c == 0xEF && s[0] == 0xBF && (s+1 >= e || s[1] >= 0xBE)) + if (c == 0xEF && s[0] == 0xBF && (s + 1 >= e || s[1] >= 0xBE)) return false; - } else { - /* 4-byte sequence, U+010000 to U+10FFFF - c must be >= 11110000 (which it is) and <= 11110100 - If c is 11110000, then s[0] must be >= 10010000 - If c is 11110100, then s[0] must be < 10010000 - s[0], s[1], and s[2] must be 10xxxxxx */ + } + else + { + /* + * 4-byte sequence, U+010000 to U+10FFFF c must be >= 11110000 + * (which it is) and <= 11110100 If c is 11110000, then s[0] must + * be >= 10010000 If c is 11110100, then s[0] must be < 10010000 + * s[0], s[1], and s[2] must be 10xxxxxx + */ len = 2; if (c > 0xF4) return false; @@ -1321,16 +1555,17 @@ static bool utf8_validate(const char *str, size_t length) if (c == 0xF4 && *s >= 0x90) return false; } - + if (s + len >= e) - return false; /* Missing bytes in sequence. */ - - do { + return false; /* Missing bytes in sequence. */ + + do + { if ((*s++ & 0xC0) != 0x80) return false; } while (len--); } - + return true; } @@ -1339,33 +1574,40 @@ static bool utf8_validate(const char *str, size_t length) * to *out and updating *out to point to the end of the UTF-8 sequence. * * If uc is too high, no character will be emitted, and *out will - * not be changed. If uc is in the UTF-16 surrogate range + * not be changed. If uc is in the UTF-16 surrogate range * (U+D800 thru U+DFFF) or is a designated not-a-character * (U+FFFE or U+FFFF), the character will be emitted anyway, * although it is technically invalid UTF-8. * * Returns the number of characters emitted. */ -static int utf8_encode_char(char *out, unsigned int uc) +static int +utf8_encode_char(char *out, unsigned int uc) { - char *start = out; - - if (uc < 0x80) { + char *start = out; + + if (uc < 0x80) + { *out++ = uc & 0x7F; - } else if (uc < 0x800) { + } + else if (uc < 0x800) + { *out++ = 0xC0 | (uc >> 6); *out++ = 0x80 | (uc & 0x3F); - } else if (uc < 0x10000) { + } + else if (uc < 0x10000) + { *out++ = 0xE0 | (uc >> 12); *out++ = 0x80 | ((uc >> 6) & 0x3F); *out++ = 0x80 | (uc & 0x3F); - } else if (uc < 0x110000) { + } + else if (uc < 0x110000) + { *out++ = 0xF0 | ((uc >> 18) & 0x07); *out++ = 0x80 | ((uc >> 12) & 0x3F); *out++ = 0x80 | ((uc >> 6) & 0x3F); *out++ = 0x80 | (uc & 0x3F); } - + return out - start; } - |
