#include "jsonpath.h" #include #include "mb/pg_wchar.h" /* NB: These macros evaluate their argument multiple times. */ #define isletter(c) (((c) >= 'A' && (c) <= 'Z') || ((c) >= 'a' && (c) <= 'z')) /* isalpha() is locale-specific. This simply matches [A-Za-z] . */ #define isextended(c) ((unsigned char)(c) > 127) /* Note that Unicode characters are allowed in identifiers. */ #define identifier_start(c) (isletter(c) || (c) == '_' || (c) == '$' || isextended(c)) #define identifier_char(c) (identifier_start(c) || isdigit(c)) #define integer_start(c) (isdigit(c) || (c) == '+' || (c) == '-') /* * In a valid JSONPath list, the first element is always of type JP_ROOT. * This element is used so an otherwise empty JSONPath list won't be NULL. * This allows us to use NULL to indicate an invalid JSONPath. * * This function returns the first cell, * making sure it is of type JP_ROOT. */ static ListCell *jp_root(JSONPath *jp) { ListCell *cell; jp_element *elem; Assert(jp != NULL); cell = list_head(jp); elem = lfirst(cell); Assert(elem->type == JP_ROOT); return cell; } /* * This function returns the second cell of a JSONPath list * (the first cell after the JP_ROOT). */ static ListCell *jp_head(JSONPath *jp) { return lnext(jp_root(jp)); } /* * Note that skip_spaces differs from skip_whitespace in json.c * in that this function treats '\f' and '\v' as whitespace. * This is because JSON does not accept these characters as * whitespace, but since this is JSONPath, * we can do whatever we want here :-) */ static void skip_spaces(const char **sp) { const char *s = *sp; while (isspace(*s)) s++; *sp = s; } static jp_element *mkElement(jp_element_type type, bool rd) { jp_element *elem = palloc0(sizeof(*elem)); elem->type = type; elem->recursive_descent = rd; return elem; } static jp_element *mkRoot(void) { jp_element *elem = mkElement(JP_ROOT, false); return elem; } static jp_element *mkWildcard(bool rd) { jp_element *elem = mkElement(JP_WILDCARD, rd); return elem; } static jp_element *mkIndexSubscript(int index, bool rd) { jp_element *elem = mkElement(JP_INDEX_SUBSCRIPT, rd); elem->data.index = index; return elem; } static jp_element *mkKeySubscript(char *key, size_t length, bool rd) { jp_element *elem = mkElement(JP_KEY_SUBSCRIPT, rd); elem->data.key.ptr = key; elem->data.key.length = length; return elem; } static JPRef *mkRef(JPRefType type) { JPRef *ref = palloc0(sizeof(*ref)); ref->type = type; return ref; } static JPRef *mkRefNode(json_node *node) { JPRef *ref = mkRef(JP_REF_NODE); ref->u.node = node; return ref; } static JPRef *mkRefChar(const char *bytes, size_t length) { JPRef *ref = mkRef(JP_REF_CHAR); ref->u.chr.bytes = bytes; ref->u.chr.length = length; return ref; } char *jp_show(JSONPath *jp) { StringInfoData string[1]; ListCell *cell; jp_element *elem; bool rd; char *tmp; initStringInfo(string); foreach(cell, jp) { elem = lfirst(cell); rd = elem->recursive_descent; switch (elem->type) { case JP_ROOT: appendStringInfoChar(string, '$'); break; case JP_WILDCARD: appendStringInfoString(string, rd ? "..[*]" : "[*]"); break; case JP_INDEX_SUBSCRIPT: appendStringInfo(string, "%s[%ld]", rd ? ".." : "", elem->data.index); break; case JP_KEY_SUBSCRIPT: tmp = json_encode_string(elem->data.key.ptr, elem->data.key.length, '"', false); Assert(tmp != NULL); appendStringInfo(string, "%s[%s]", rd ? ".." : "", tmp); pfree(tmp); break; default: Assert(false); } } return string->data; } JSONPath *jp_parse(const char *pattern) { JSONPath *jp = NIL; const char *s = pattern; const char *p; bool recursive_descent = false; bool bracket = false; const char *err_msg = NULL; long index; char *key; size_t key_length; skip_spaces(&s); /* pattern may not be empty */ if (!*s) return NULL; jp = lappend(jp, mkRoot()); if (*s == '$') { s++; goto begin_element; } else if (*s != '.') { goto dot_subscript; // implicit '.' at beginning } begin_element: skip_spaces(&s); recursive_descent = false; bracket = false; if (*s == '\0') goto end; if (s[0] == '.' && s[1] == '.') { recursive_descent = true; s += 2; goto dot_subscript; } if (s[0] == '.') { s++; goto dot_subscript; } if (s[0] == '[') { s++; goto bracket_subscript; } goto failed; next_element: if (bracket) { skip_spaces(&s); if (*s != ']') goto failed; s++; } goto begin_element; dot_subscript: skip_spaces(&s); if (*s == '*') goto wildcard; if (integer_start(*s)) goto integer; if (identifier_start(*s)) goto identifier; if (*s == '"' || *s == '\'') goto string; if (*s == '[') { s++; goto bracket_subscript; } goto failed; bracket_subscript: skip_spaces(&s); bracket = true; if (*s == '*') goto wildcard; if (integer_start(*s)) goto integer; if (identifier_start(*s)) { err_msg = "Identifiers may not be bracketed. This syntax is reserved for future use."; goto failed; } if (*s == '"' || *s == '\'') goto string; goto failed; wildcard: s++; jp = lappend(jp, mkWildcard(recursive_descent)); goto next_element; integer: p = s; errno = 0; index = strtol(s, (char**)&p, 10); if (p <= s || errno != 0) goto failed; s = p; jp = lappend(jp, mkIndexSubscript(index, recursive_descent)); goto next_element; identifier: p = s; while (identifier_char(*p)) p++; key = pnstrdup(s, p - s); key_length = p - s; s = p; jp = lappend(jp, mkKeySubscript(key, key_length, recursive_descent)); goto next_element; string: key = json_decode_string(&s, &key_length, false); if (!key) goto failed; jp = lappend(jp, mkKeySubscript(key, key_length, recursive_descent)); goto next_element; end: return jp; failed: return NULL; } static size_t utf8_substring( const char *src, size_t srcbytes, size_t start, size_t length, const char **out_start, size_t *out_bytes) { const char *e = src + srcbytes; const char *sub_start; const char *sub_end; size_t sub_length; sub_start = src; while (start > 0 && sub_start < e) { sub_start += pg_utf_mblen((const unsigned char*)sub_start); start--; } sub_end = sub_start; sub_length = 0; while (sub_length < length && sub_end < e) { sub_end += pg_utf_mblen((const unsigned char*)sub_end); sub_length++; } /* Make sure the input didn't have a clipped UTF-8 character */ if(sub_start > e) { Assert(false); sub_start = sub_end = e; } else if (sub_end > e) { Assert(false); sub_end = e; } *out_start = sub_start; *out_bytes = sub_end - sub_start; return sub_length; } static json_node *json_head(json_node *parent) { switch (parent->type) { case JSON_ARRAY: case JSON_OBJECT: return parent->v.children.head; default: return NULL; } } #define json_foreach(child, parent) \ for ((child) = json_head(parent); (child) != NULL; (child) = (child)->next) static JPRef *json_index_subscript(JPRef *ref, long index) { json_node *json; if (index < 0) return NULL; switch (ref->type) { case JP_REF_NODE: json = ref->u.node; switch (json->type) { case JSON_STRING: { const char *sub_start; size_t sub_bytes; size_t sub_length; sub_length = utf8_substring( json->v.string.str, json->v.string.length, index, 1, &sub_start, &sub_bytes); if (sub_length != 1) return NULL; return mkRefChar(sub_start, sub_bytes); } case JSON_ARRAY: { json_node *child; if ((size_t)index >= json->v.children.count) return NULL; for (child = json->v.children.head; index && child; child = child->next, index--) {} if (index != 0 || child == NULL) { Assert(false); return NULL; } return mkRefNode(child); } default: return NULL; } break; case JP_REF_CHAR: if (index != 0) return NULL; return ref; default: Assert(false); return NULL; } } /* Currently, a lot of JPRef nodes are allocated just to pass json_node pointers to match_recurse. If this becomes a memory/performance issue in the future, JPRef could merged with json_node by adding JPRef's specialty types to the json_type enum and json_node union. JPRef is currently not merged with json_node in an attempt to keep the codebase tidy and easier to extend. */ static void match_recurse(List **results, ListCell *path, JPRef *ref) { jp_element *elem; JPRef *child_ref; json_node *json, *child; if (path == NULL) { /* The end of the JSONPath list is the "accept" state. */ *results = lappend(*results, ref); return; } elem = lfirst(path); if (ref->type == JP_REF_NODE) json = ref->u.node; else json = NULL; switch (elem->type) { case JP_WILDCARD: if (json) { json_foreach(child, json) match_recurse(results, lnext(path), mkRefNode(child)); } break; case JP_INDEX_SUBSCRIPT: child_ref = json_index_subscript(ref, elem->data.index); if (child_ref != NULL) match_recurse(results, lnext(path), child_ref); break; case JP_KEY_SUBSCRIPT: json_foreach(child, json) { if (child->key != NULL && child->key_length == elem->data.key.length && !memcmp(child->key, elem->data.key.ptr, child->key_length)) { match_recurse(results, lnext(path), mkRefNode(child)); } } break; default:; } if (elem->recursive_descent && json) { json_foreach(child, json) match_recurse(results, path, mkRefNode(child)); } } List *jp_match(JSONPath *jp, json_node *json) { ListCell *lc = jp_head(jp); List *results = NIL; match_recurse(&results, lc, mkRefNode(json)); return results; } char *jpref_encode(JPRef *ref) { switch (ref->type) { case JP_REF_NODE: return json_encode(ref->u.node, JSONOPT_USE_ORIG); case JP_REF_CHAR: return json_encode_string(ref->u.chr.bytes, ref->u.chr.length, '"', false); default: Assert(false); return NULL; } }