diff options
| -rw-r--r-- | expected/json_path.out | 267 | ||||
| -rw-r--r-- | jsonpath.c | 180 | ||||
| -rw-r--r-- | jsonpath.h | 3 | ||||
| -rw-r--r-- | sql/json_path.sql | 55 |
4 files changed, 345 insertions, 160 deletions
diff --git a/expected/json_path.out b/expected/json_path.out index a77497c..22c0f10 100644 --- a/expected/json_path.out +++ b/expected/json_path.out @@ -301,83 +301,197 @@ $$); $..["\"hello\"\\\"world\""][5]..[3][*][*] (1 row) -SELECT json_path($$"⁰¹\u0000³"$$, '[0]'); +select parse_json_path('$char'); + parse_json_path +----------------- + +(1 row) + +select parse_json_path('$.char'); + parse_json_path +----------------- + $["char"] +(1 row) + +select parse_json_path('$.char()'); + parse_json_path +----------------- + +(1 row) + +select parse_json_path('$.char(5)'); + parse_json_path +----------------- + $.char(5) +(1 row) + +select parse_json_path('$.char( -1 )'); + parse_json_path +----------------- + $.char(-1) +(1 row) + +select parse_json_path('$.char( -1 ) . char(0)'); + parse_json_path +-------------------- + $.char(-1).char(0) +(1 row) + +select parse_json_path('$.char( -1 ) . char ( 0 ) '); + parse_json_path +-------------------- + $.char(-1).char(0) +(1 row) + +select parse_json_path('$.char( +1 ) . char ( 0 ) '); + parse_json_path +------------------- + $.char(1).char(0) +(1 row) + +select parse_json_path('$ . char( +1 ) . char ( 0 ) '); + parse_json_path +------------------- + $.char(1).char(0) +(1 row) + +select parse_json_path('$ .. char( +1 ) . char ( 0 ) '); + parse_json_path +-------------------- + $..char(1).char(0) +(1 row) + +select parse_json_path('$ .. char( +1 ) .. char ( 0 ) '); + parse_json_path +--------------------- + $..char(1)..char(0) +(1 row) + +select parse_json_path('$ . char( +1 ) .. char ( 0 ) '); + parse_json_path +-------------------- + $.char(1)..char(0) +(1 row) + +select parse_json_path('$ . char( +1 ) char ( 0 ) '); + parse_json_path +----------------- + +(1 row) + +select parse_json_path('$ char( +1 ) char ( 0 ) '); + parse_json_path +----------------- + +(1 row) + +select parse_json_path('char( +1 ) . char ( 0 ) '); + parse_json_path +------------------- + $.char(1).char(0) +(1 row) + +select parse_json_path('.char( +1 ) . char ( 0 ) '); + parse_json_path +------------------- + $.char(1).char(0) +(1 row) + +select parse_json_path('..char( +1 ) . char ( 0 ) '); + parse_json_path +-------------------- + $..char(1).char(0) +(1 row) + +select parse_json_path('...char( +1 ) . char ( 0 ) '); + parse_json_path +----------------- + +(1 row) + +select parse_json_path('[char(5)]'); + parse_json_path +----------------- + +(1 row) + +SELECT json_path($$"⁰¹\u0000³"$$, 'char(0)'); json_path ----------- "⁰" (1 row) -SELECT json_path($$"⁰¹\u0000³"$$, '[1]'); +SELECT json_path($$"⁰¹\u0000³"$$, 'char(1)'); json_path ----------- "¹" (1 row) -SELECT json_path($$"⁰¹\u0000³"$$, '[2]'); +SELECT json_path($$"⁰¹\u0000³"$$, 'char(2)'); json_path ----------- "\u0000" (1 row) -SELECT json_path($$"⁰¹\u0000³"$$, '[3]'); +SELECT json_path($$"⁰¹\u0000³"$$, 'char(3)'); json_path ----------- "³" (1 row) -SELECT json_path('"0123"', '[-1]'); +SELECT json_path('"0123"', 'char(-1)'); json_path ----------- (0 rows) -SELECT json_path('"0123"', '[0]'); +SELECT json_path('"0123"', 'char(0)'); json_path ----------- "0" (1 row) -SELECT json_path('"0123"', '[1]'); +SELECT json_path('"0123"', 'char(1)'); json_path ----------- "1" (1 row) -SELECT json_path('"0123"', '[2]'); +SELECT json_path('"0123"', 'char(2)'); json_path ----------- "2" (1 row) -SELECT json_path('"0123"', '[3]'); +SELECT json_path('"0123"', 'char(3)'); json_path ----------- "3" (1 row) -SELECT json_path('"0123"', '[4]'); +SELECT json_path('"0123"', 'char(4)'); json_path ----------- (0 rows) -SELECT json_path('"⁰¹²³"', '[0]'); +SELECT json_path('"⁰¹²³"', 'char(0)'); json_path ----------- "⁰" (1 row) -SELECT json_path('"⁰¹²³"', '[1]'); +SELECT json_path('"⁰¹²³"', 'char(1)'); json_path ----------- "¹" (1 row) -SELECT json_path('"⁰¹²³"', '[2]'); +SELECT json_path('"⁰¹²³"', 'char(2)'); json_path ----------- "²" (1 row) -SELECT json_path('"⁰¹²³"', '[3]'); +SELECT json_path('"⁰¹²³"', 'char(3)'); json_path ----------- "³" @@ -600,26 +714,17 @@ SELECT json_path(json, 'store.toys[*]') FROM sample; SELECT json_path(json, 'store.toys[*][0]') FROM sample; json_path ----------- - "Y" - "B" - "B" -(3 rows) +(0 rows) SELECT json_path(json, 'store.toys[*][1]') FROM sample; json_path ----------- - "o" - "o" - "a" -(3 rows) +(0 rows) SELECT json_path(json, 'store.toys[*][0][0]') FROM sample; json_path ----------- - "Y" - "B" - "B" -(3 rows) +(0 rows) SELECT json_path(json, 'store.toys[*][0][1]') FROM sample; json_path @@ -725,20 +830,11 @@ SELECT json_path(json, '..[0]') FROM sample; "author": "author 0",+ "prices": [1,2,3] + } - "b" - "a" 1 - "b" - "a" 4 - "b" - "a" 7 "Yo-yo" - "Y" - "B" - "B" -(14 rows) +(5 rows) SELECT json_path(json, '..[1]') FROM sample; json_path @@ -748,20 +844,11 @@ SELECT json_path(json, '..[1]') FROM sample; "author": "author 1",+ "prices": [4,5,6] + } - "o" - "u" 2 - "o" - "u" 5 - "o" - "u" 8 "Boomerang" - "o" - "o" - "a" -(14 rows) +(5 rows) SELECT json_path(json, '..[2]') FROM sample; json_path @@ -771,34 +858,16 @@ SELECT json_path(json, '..[2]') FROM sample; "author": "author 2",+ "prices": [7,8,9] + } - "o" - "t" 3 - "o" - "t" 6 - "o" - "t" 9 "Basketball" - "-" - "o" - "s" -(14 rows) +(5 rows) SELECT json_path(json, '..[3]') FROM sample; json_path ----------- - "k" - "h" - "k" - "h" - "k" - "h" - "y" - "m" - "k" -(9 rows) +(0 rows) SELECT json_path(json, '$') FROM sample; json_path @@ -912,3 +981,69 @@ SELECT json_path(json, '..*') FROM sample; "Basketball" (27 rows) +SELECT json_path(json, '..char(-1)') FROM sample; + json_path +----------- +(0 rows) + +SELECT json_path(json, '..char(0)') FROM sample; + json_path +----------- + "b" + "a" + "b" + "a" + "b" + "a" + "Y" + "B" + "B" +(9 rows) + +SELECT json_path(json, '..char(1)') FROM sample; + json_path +----------- + "o" + "u" + "o" + "u" + "o" + "u" + "o" + "o" + "a" +(9 rows) + +SELECT json_path(json, '..char(2)') FROM sample; + json_path +----------- + "o" + "t" + "o" + "t" + "o" + "t" + "-" + "o" + "s" +(9 rows) + +SELECT json_path(json, '..char(3)') FROM sample; + json_path +----------- + "k" + "h" + "k" + "h" + "k" + "h" + "y" + "m" + "k" +(9 rows) + +SELECT json_path(json, '..char(100)') FROM sample; + json_path +----------- +(0 rows) + @@ -81,7 +81,7 @@ static jp_element *mkWildcard(bool rd) return elem; } -static jp_element *mkIndexSubscript(int index, bool rd) +static jp_element *mkIndexSubscript(long index, bool rd) { jp_element *elem = mkElement(JP_INDEX_SUBSCRIPT, rd); elem->data.index = index; @@ -96,6 +96,13 @@ static jp_element *mkKeySubscript(char *key, size_t length, bool rd) return elem; } +static jp_element *mkCallChar(long index, bool rd) +{ + jp_element *elem = mkElement(JP_CALL_CHAR, rd); + elem->data.index = index; + return elem; +} + static JPRef *mkRef(JPRefType type) { JPRef *ref = palloc0(sizeof(*ref)); @@ -148,6 +155,9 @@ char *jp_show(JSONPath *jp) appendStringInfo(string, "%s[%s]", rd ? ".." : "", tmp); pfree(tmp); break; + case JP_CALL_CHAR: + appendStringInfo(string, "%s(%ld)", rd ? "..char" : ".char", elem->data.index); + break; default: Assert(false); } @@ -156,11 +166,25 @@ char *jp_show(JSONPath *jp) return string->data; } +static bool parse_long(const char **s, long *out) +{ + const char *p = *s; + + errno = 0; + *out = strtol(*s, (char**)&p, 10); + if (p <= *s || errno != 0) + return false; + + *s = p; + return true; +} + JSONPath *jp_parse(const char *pattern) { JSONPath *jp = NIL; const char *s = pattern; - const char *p; + const char *start; + const char *end; bool recursive_descent = false; bool bracket = false; const char *err_msg = NULL; @@ -185,6 +209,7 @@ JSONPath *jp_parse(const char *pattern) begin_element: skip_spaces(&s); +begin_element_noskip: recursive_descent = false; bracket = false; @@ -258,26 +283,36 @@ wildcard: goto next_element; integer: - p = s; - errno = 0; - index = strtol(s, (char**)&p, 10); - if (p <= s || errno != 0) + if (!parse_long(&s, &index)) goto failed; - s = p; jp = lappend(jp, mkIndexSubscript(index, recursive_descent)); goto next_element; identifier: - p = s; - while (identifier_char(*p)) - p++; - key = pnstrdup(s, p - s); - key_length = p - s; - s = p; + start = s; + while (identifier_char(*s)) + s++; + end = s; + + skip_spaces(&s); + + if (*s == '(') { + if (end - start == 4 && !memcmp(start, "char", 4)) + { + s++; + skip_spaces(&s); + goto call_char; + } + + goto failed; + } + + key = pnstrdup(start, end - start); + key_length = end - start; jp = lappend(jp, mkKeySubscript(key, key_length, recursive_descent)); - goto next_element; + goto begin_element_noskip; string: key = json_decode_string(&s, &key_length, false); @@ -287,6 +322,18 @@ string: jp = lappend(jp, mkKeySubscript(key, key_length, recursive_descent)); goto next_element; +call_char: + if (!parse_long(&s, &index)) + goto failed; + + skip_spaces(&s); + + if (*s++ != ')') + goto failed; + + jp = lappend(jp, mkCallChar(index, recursive_descent)); + goto begin_element; + end: return jp; @@ -331,66 +378,6 @@ static size_t utf8_substring( return sub_length; } -static JPRef *json_index_subscript(JPRef *ref, long index) -{ - json_node *json; - - if (index < 0) - return NULL; - - switch (ref->type) { - case JP_REF_NODE: - json = ref->u.node; - - switch (json->type) { - case JSON_STRING: { - const char *sub_start; - size_t sub_bytes; - size_t sub_length; - - sub_length = utf8_substring( - json->v.string.str, json->v.string.length, - index, 1, - &sub_start, &sub_bytes); - - if (sub_length != 1) - return NULL; - - return mkRefChar(sub_start, sub_bytes); - } - case JSON_ARRAY: { - json_node *child; - - if ((size_t)index >= json->v.children.count) - return NULL; - - for (child = json->v.children.head; - index && child; - child = child->next, index--) {} - - if (index != 0 || child == NULL) { - Assert(false); - return NULL; - } - - return mkRefNode(child); - } - default: - return NULL; - } - break; - - case JP_REF_CHAR: - if (index != 0) - return NULL; - return ref; - - default: - Assert(false); - return NULL; - } -} - /* Currently, a lot of JPRef nodes are allocated just to pass json_node pointers to match_recurse. If this becomes a memory/performance issue in the future, @@ -402,7 +389,6 @@ static void match_recurse(void on_match(void *ctx, JPRef *ref), void *ctx, ListCell *path, JPRef *ref) { jp_element *elem; - JPRef *child_ref; json_node *json, *child; if (path == NULL) { @@ -427,13 +413,30 @@ static void match_recurse(void on_match(void *ctx, JPRef *ref), void *ctx, break; case JP_INDEX_SUBSCRIPT: - child_ref = json_index_subscript(ref, elem->data.index); - if (child_ref != NULL) - match_recurse(on_match, ctx, lnext(path), child_ref); + if (json && json->type == JSON_ARRAY) { + size_t i; + size_t index = elem->data.index; + /* Note: elem->data.index is signed (long), + while index is unsigned (size_t). */ + + if (elem->data.index >= 0 && index < json->v.children.count) { + for (child = json->v.children.head, i = 0; + child != NULL && i < index; + child = child->next, i++) + { + } + + /* If this fails, it means json->v.children.count + was greater than the actual number of children. */ + Assert(i == index && child != NULL); + + match_recurse(on_match, ctx, lnext(path), mkRefNode(child)); + } + } break; case JP_KEY_SUBSCRIPT: - if (json->type == JSON_OBJECT) { + if (json && json->type == JSON_OBJECT) { json_foreach(child, json) { if (child->key != NULL && child->key_length == elem->data.key.length && @@ -445,6 +448,25 @@ static void match_recurse(void on_match(void *ctx, JPRef *ref), void *ctx, } break; + case JP_CALL_CHAR: + if (json && json->type == JSON_STRING && elem->data.index >= 0) { + const char *sub_start; + size_t sub_bytes; + size_t sub_length; + + sub_length = utf8_substring( + json->v.string.str, json->v.string.length, + elem->data.index, 1, + &sub_start, &sub_bytes); + + if (sub_length == 1) + match_recurse(on_match, ctx, lnext(path), mkRefChar(sub_start, sub_bytes)); + } else if (ref->type == JP_REF_CHAR && elem->data.index == 0) { + /* char(0) on a character yields itself. */ + match_recurse(on_match, ctx, lnext(path), ref); + } + break; + default:; } @@ -9,7 +9,8 @@ typedef enum { JP_ROOT, JP_WILDCARD, JP_INDEX_SUBSCRIPT, - JP_KEY_SUBSCRIPT + JP_KEY_SUBSCRIPT, + JP_CALL_CHAR } jp_element_type; typedef struct { diff --git a/sql/json_path.sql b/sql/json_path.sql index 72a4c7e..01d972b 100644 --- a/sql/json_path.sql +++ b/sql/json_path.sql @@ -52,20 +52,40 @@ SELECT parse_json_path($$ [ 5 ] .. [3] . * [ * ] $$); -SELECT json_path($$"⁰¹\u0000³"$$, '[0]'); -SELECT json_path($$"⁰¹\u0000³"$$, '[1]'); -SELECT json_path($$"⁰¹\u0000³"$$, '[2]'); -SELECT json_path($$"⁰¹\u0000³"$$, '[3]'); -SELECT json_path('"0123"', '[-1]'); -SELECT json_path('"0123"', '[0]'); -SELECT json_path('"0123"', '[1]'); -SELECT json_path('"0123"', '[2]'); -SELECT json_path('"0123"', '[3]'); -SELECT json_path('"0123"', '[4]'); -SELECT json_path('"⁰¹²³"', '[0]'); -SELECT json_path('"⁰¹²³"', '[1]'); -SELECT json_path('"⁰¹²³"', '[2]'); -SELECT json_path('"⁰¹²³"', '[3]'); +select parse_json_path('$char'); +select parse_json_path('$.char'); +select parse_json_path('$.char()'); +select parse_json_path('$.char(5)'); +select parse_json_path('$.char( -1 )'); +select parse_json_path('$.char( -1 ) . char(0)'); +select parse_json_path('$.char( -1 ) . char ( 0 ) '); +select parse_json_path('$.char( +1 ) . char ( 0 ) '); +select parse_json_path('$ . char( +1 ) . char ( 0 ) '); +select parse_json_path('$ .. char( +1 ) . char ( 0 ) '); +select parse_json_path('$ .. char( +1 ) .. char ( 0 ) '); +select parse_json_path('$ . char( +1 ) .. char ( 0 ) '); +select parse_json_path('$ . char( +1 ) char ( 0 ) '); +select parse_json_path('$ char( +1 ) char ( 0 ) '); +select parse_json_path('char( +1 ) . char ( 0 ) '); +select parse_json_path('.char( +1 ) . char ( 0 ) '); +select parse_json_path('..char( +1 ) . char ( 0 ) '); +select parse_json_path('...char( +1 ) . char ( 0 ) '); +select parse_json_path('[char(5)]'); + +SELECT json_path($$"⁰¹\u0000³"$$, 'char(0)'); +SELECT json_path($$"⁰¹\u0000³"$$, 'char(1)'); +SELECT json_path($$"⁰¹\u0000³"$$, 'char(2)'); +SELECT json_path($$"⁰¹\u0000³"$$, 'char(3)'); +SELECT json_path('"0123"', 'char(-1)'); +SELECT json_path('"0123"', 'char(0)'); +SELECT json_path('"0123"', 'char(1)'); +SELECT json_path('"0123"', 'char(2)'); +SELECT json_path('"0123"', 'char(3)'); +SELECT json_path('"0123"', 'char(4)'); +SELECT json_path('"⁰¹²³"', 'char(0)'); +SELECT json_path('"⁰¹²³"', 'char(1)'); +SELECT json_path('"⁰¹²³"', 'char(2)'); +SELECT json_path('"⁰¹²³"', 'char(3)'); SELECT json_path('[1,2,3]', '$'); SELECT json_path('[1,2,3]', '$.+1'); SELECT json_path('[1,2,3]', '$.-1'); @@ -144,3 +164,10 @@ SELECT json_path(json, '..[3]') FROM sample; SELECT json_path(json, '$') FROM sample; SELECT json_path(json, '..*') FROM sample; + +SELECT json_path(json, '..char(-1)') FROM sample; +SELECT json_path(json, '..char(0)') FROM sample; +SELECT json_path(json, '..char(1)') FROM sample; +SELECT json_path(json, '..char(2)') FROM sample; +SELECT json_path(json, '..char(3)') FROM sample; +SELECT json_path(json, '..char(100)') FROM sample; |
