* JSONPath index subscript no longer extracts chars from strings.

* char() method added to JSONPath for extracting chars from strings. Although index subscripts (those using an integer) extract characters from strings in Stefan Goessner's JSONPath, and although it works that way in JavaScript, I believe it is rather illogical and unexpected in the context of JSONPath, and a poor use of the [] real estate. Because extracting characters from strings can still be useful, I have added a char() method for this. I implemented it now to prevent the supporting code for character extraction from wasting away.
author: Joey Adams 2010-07-23 21:13:59 +0000
committer: Joey Adams 2010-07-23 21:13:59 +0000
commit: f5cbbbe875326dbe5d0c43bc3c692f64f77a30bd (patch)
tree: af65a7ecd05468d504710227f4d2f34a75a044a5 /jsonpath.c
parent: d60f4513e9490b2a0734f69f9de0bc92d8a874b4 (diff)
1 files changed, 101 insertions, 79 deletions
diff --git a/jsonpath.c b/jsonpath.c
index 9d53259..b3a0b76 100644
--- a/jsonpath.c
+++ b/jsonpath.c
@@ -81,7 +81,7 @@ static jp_element *mkWildcard(bool rd)
 	return elem;
 }
 
-static jp_element *mkIndexSubscript(int index, bool rd)
+static jp_element *mkIndexSubscript(long index, bool rd)
 {
 	jp_element *elem = mkElement(JP_INDEX_SUBSCRIPT, rd);
 	elem->data.index = index;
@@ -96,6 +96,13 @@ static jp_element *mkKeySubscript(char *key, size_t length, bool rd)
 	return elem;
 }
 
+static jp_element *mkCallChar(long index, bool rd)
+{
+	jp_element *elem = mkElement(JP_CALL_CHAR, rd);
+	elem->data.index = index;
+	return elem;
+}
+
 static JPRef *mkRef(JPRefType type)
 {
 	JPRef *ref = palloc0(sizeof(*ref));
@@ -148,6 +155,9 @@ char *jp_show(JSONPath *jp)
 				appendStringInfo(string, "%s[%s]", rd ? ".." : "", tmp);
 				pfree(tmp);
 				break;
+			case JP_CALL_CHAR:
+				appendStringInfo(string, "%s(%ld)", rd ? "..char" : ".char", elem->data.index);
+				break;
 			default:
 				Assert(false);
 		}
@@ -156,11 +166,25 @@ char *jp_show(JSONPath *jp)
 	return string->data;
 }
 
+static bool parse_long(const char **s, long *out)
+{
+	const char *p = *s;
+
+	errno = 0;
+	*out = strtol(*s, (char**)&p, 10);
+	if (p <= *s || errno != 0)
+		return false;
+
+	*s = p;
+	return true;
+}
+
 JSONPath *jp_parse(const char *pattern)
 {
 	JSONPath	*jp					= NIL;
 	const char	*s					= pattern;
-	const char	*p;
+	const char	*start;
+	const char	*end;
 	bool		recursive_descent 	= false;
 	bool		bracket				= false;
 	const char	*err_msg			= NULL;
@@ -185,6 +209,7 @@ JSONPath *jp_parse(const char *pattern)
 	
 begin_element:
 	skip_spaces(&s);
+begin_element_noskip:
 
 	recursive_descent = false;
 	bracket = false;
@@ -258,26 +283,36 @@ wildcard:
 	goto next_element;
 
 integer:
-	p = s;
-	errno = 0;
-	index = strtol(s, (char**)&p, 10);
-	if (p <= s || errno != 0)
+	if (!parse_long(&s, &index))
 		goto failed;
-	s = p;
 	
 	jp = lappend(jp, mkIndexSubscript(index, recursive_descent));
 	goto next_element;
 
 identifier:
-	p = s;
-	while (identifier_char(*p))
-		p++;
-	key = pnstrdup(s, p - s);
-	key_length = p - s;
-	s = p;
+	start = s;
+	while (identifier_char(*s))
+		s++;
+	end = s;
+
+	skip_spaces(&s);
+
+	if (*s == '(') {
+		if (end - start == 4 && !memcmp(start, "char", 4))
+		{
+			s++;
+			skip_spaces(&s);
+			goto call_char;
+		}
+
+		goto failed;
+	}
+
+	key = pnstrdup(start, end - start);
+	key_length = end - start;
 
 	jp = lappend(jp, mkKeySubscript(key, key_length, recursive_descent));
-	goto next_element;
+	goto begin_element_noskip;
 
 string:
 	key = json_decode_string(&s, &key_length, false);
@@ -287,6 +322,18 @@ string:
 	jp = lappend(jp, mkKeySubscript(key, key_length, recursive_descent));
 	goto next_element;
 
+call_char:
+	if (!parse_long(&s, &index))
+		goto failed;
+
+	skip_spaces(&s);
+
+	if (*s++ != ')')
+		goto failed;
+
+	jp = lappend(jp, mkCallChar(index, recursive_descent));
+	goto begin_element;
+
 end:
 	return jp;
 
@@ -331,66 +378,6 @@ static size_t utf8_substring(
 	return sub_length;
 }
 
-static JPRef *json_index_subscript(JPRef *ref, long index)
-{
-	json_node *json;
-
-	if (index < 0)
-		return NULL;
-	
-	switch (ref->type) {
-		case JP_REF_NODE:
-			json = ref->u.node;
-
-			switch (json->type) {
-				case JSON_STRING: {
-					const char	*sub_start;
-					size_t		sub_bytes;
-					size_t		sub_length;
-
-					sub_length = utf8_substring(
-						json->v.string.str, json->v.string.length,
-						index, 1,
-						&sub_start, &sub_bytes);
-
-					if (sub_length != 1)
-						return NULL;
-
-					return mkRefChar(sub_start, sub_bytes);
-				}
-				case JSON_ARRAY: {
-					json_node	*child;
-
-					if ((size_t)index >= json->v.children.count)
-						return NULL;
-
-					for (child = json->v.children.head;
-						 index && child;
-						 child = child->next, index--) {}
-
-					if (index != 0 || child == NULL) {
-						Assert(false);
-						return NULL;
-					}
-
-					return mkRefNode(child);
-				}
-				default:
-					return NULL;
-			}
-			break;
-
-		case JP_REF_CHAR:
-			if (index != 0)
-				return NULL;
-			return ref;
-
-		default:
-			Assert(false);
-			return NULL;
-	}
-}
-
 /*
 Currently, a lot of JPRef nodes are allocated just to pass json_node pointers
 to match_recurse.  If this becomes a memory/performance issue in the future,
@@ -402,7 +389,6 @@ static void match_recurse(void on_match(void *ctx, JPRef *ref), void *ctx,
 	ListCell *path, JPRef *ref)
 {
 	jp_element	*elem;
-	JPRef		*child_ref;
 	json_node	*json, *child;
 
 	if (path == NULL) {
@@ -427,13 +413,30 @@ static void match_recurse(void on_match(void *ctx, JPRef *ref), void *ctx,
 			break;
 
 		case JP_INDEX_SUBSCRIPT:
-			child_ref = json_index_subscript(ref, elem->data.index);
-			if (child_ref != NULL)
-				match_recurse(on_match, ctx, lnext(path), child_ref);
+			if (json && json->type == JSON_ARRAY) {
+				size_t	i;
+				size_t	index = elem->data.index;
+				/* Note: elem->data.index is signed (long),
+				   while index is unsigned (size_t). */
+
+				if (elem->data.index >= 0 && index < json->v.children.count) {
+					for (child = json->v.children.head, i = 0;
+					     child != NULL && i < index;
+						 child = child->next, i++)
+					{
+					}
+
+					/* If this fails, it means json->v.children.count
+					   was greater than the actual number of children. */
+					Assert(i == index && child != NULL);
+
+					match_recurse(on_match, ctx, lnext(path), mkRefNode(child));
+				}
+			}
 			break;
 
 		case JP_KEY_SUBSCRIPT:
-			if (json->type == JSON_OBJECT) {
+			if (json && json->type == JSON_OBJECT) {
 				json_foreach(child, json) {
 					if (child->key != NULL &&
 						child->key_length == elem->data.key.length &&
@@ -445,6 +448,25 @@ static void match_recurse(void on_match(void *ctx, JPRef *ref), void *ctx,
 			}
 			break;
 
+		case JP_CALL_CHAR:
+			if (json && json->type == JSON_STRING && elem->data.index >= 0) {
+				const char  *sub_start;
+				size_t      sub_bytes;
+				size_t      sub_length;
+
+				sub_length = utf8_substring(
+					json->v.string.str, json->v.string.length,
+					elem->data.index, 1,
+					&sub_start, &sub_bytes);
+
+				if (sub_length == 1)
+					match_recurse(on_match, ctx, lnext(path), mkRefChar(sub_start, sub_bytes));
+			} else if (ref->type == JP_REF_CHAR && elem->data.index == 0) {
+				/* char(0) on a character yields itself. */
+				match_recurse(on_match, ctx, lnext(path), ref);
+			}
+			break;
+
 		default:;
 	}
author	Joey Adams	2010-07-23 21:13:59 +0000
committer	Joey Adams	2010-07-23 21:13:59 +0000
commit	f5cbbbe875326dbe5d0c43bc3c692f64f77a30bd (patch)
tree	af65a7ecd05468d504710227f4d2f34a75a044a5 /jsonpath.c
parent	d60f4513e9490b2a0734f69f9de0bc92d8a874b4 (diff)