#include "jsonpath.h"

#include <ctype.h>
#include "mb/pg_wchar.h"

/* NB: These macros evaluate their argument multiple times. */

#define isletter(c) (((c) >= 'A' && (c) <= 'Z') || ((c) >= 'a' && (c) <= 'z'))
	/* isalpha() is locale-specific.  This simply matches [A-Za-z] . */
#define isextended(c) ((unsigned char)(c) > 127)

/* Note that Unicode characters are allowed in identifiers. */
#define identifier_start(c) (isletter(c) || (c) == '_' || (c) == '$' || isextended(c))
#define identifier_char(c) (identifier_start(c) || isdigit(c))

#define integer_start(c) (isdigit(c) || (c) == '+' || (c) == '-')

/*
 * In a valid JSONPath list, the first element is always of type JP_ROOT.
 * This element is used so an otherwise empty JSONPath list won't be NULL.
 * This allows us to use NULL to indicate an invalid JSONPath.
 *
 * This function returns the first cell,
 * making sure it is of type JP_ROOT.
 */
static ListCell *jp_root(JSONPath *jp)
{
	ListCell	*cell;
	jp_element	*elem;

	Assert(jp != NULL);

	cell = list_head(jp);
	elem = lfirst(cell);
	Assert(elem->type == JP_ROOT);

	return cell;
}

/*
 * This function returns the second cell of a JSONPath list
 * (the first cell after the JP_ROOT).
 */
static ListCell *jp_head(JSONPath *jp)
{
	return lnext(jp_root(jp));
}

/*
 * Note that skip_spaces differs from skip_whitespace in json.c
 * in that this function treats '\f' and '\v' as whitespace.
 * This is because JSON does not accept these characters as
 * whitespace, but since this is JSONPath,
 * we can do whatever we want here :-)
 */
static void skip_spaces(const char **sp)
{
	const char *s = *sp;
	while (isspace(*s))
		s++;
	*sp = s;
}

static jp_element *mkElement(jp_element_type type, bool rd)
{
	jp_element *elem = palloc0(sizeof(*elem));
	elem->type = type;
	elem->recursive_descent = rd;
	return elem;
}

static jp_element *mkRoot(void)
{
	jp_element *elem = mkElement(JP_ROOT, false);
	return elem;
}

static jp_element *mkWildcard(bool rd)
{
	jp_element *elem = mkElement(JP_WILDCARD, rd);
	return elem;
}

static jp_element *mkIndexSubscript(int index, bool rd)
{
	jp_element *elem = mkElement(JP_INDEX_SUBSCRIPT, rd);
	elem->data.index = index;
	return elem;
}

static jp_element *mkKeySubscript(char *key, size_t length, bool rd)
{
	jp_element *elem = mkElement(JP_KEY_SUBSCRIPT, rd);
	elem->data.key.ptr = key;
	elem->data.key.length = length;
	return elem;
}

static JPRef *mkRef(JPRefType type)
{
	JPRef *ref = palloc0(sizeof(*ref));
	ref->type = type;
	return ref;
}

static JPRef *mkRefNode(json_node *node)
{
	JPRef *ref = mkRef(JP_REF_NODE);
	ref->u.node = node;
	return ref;
}

static JPRef *mkRefChar(const char *bytes, size_t length)
{
	JPRef *ref = mkRef(JP_REF_CHAR);
	ref->u.chr.bytes = bytes;
	ref->u.chr.length = length;
	return ref;
}

char *jp_show(JSONPath *jp)
{
	StringInfoData	string[1];
	ListCell		*cell;
	jp_element		*elem;
	bool			rd;
	char			*tmp;

	initStringInfo(string);

	foreach(cell, jp) {
		elem = lfirst(cell);
		rd   = elem->recursive_descent;

		switch (elem->type) {
			case JP_ROOT:
				appendStringInfoChar(string, '$');
				break;
			case JP_WILDCARD:
				appendStringInfoString(string, rd ? "..[*]" : "[*]");
				break;
			case JP_INDEX_SUBSCRIPT:
				appendStringInfo(string, "%s[%ld]", rd ? ".." : "", elem->data.index);
				break;
			case JP_KEY_SUBSCRIPT:
				tmp = json_encode_string(elem->data.key.ptr, elem->data.key.length, '"', false);
				Assert(tmp != NULL);
				appendStringInfo(string, "%s[%s]", rd ? ".." : "", tmp);
				pfree(tmp);
				break;
			default:
				Assert(false);
		}
	}

	return string->data;
}

JSONPath *jp_parse(const char *pattern)
{
	JSONPath	*jp					= NIL;
	const char	*s					= pattern;
	const char	*p;
	bool		recursive_descent 	= false;
	bool		bracket				= false;
	const char	*err_msg			= NULL;
	long		index;
	char		*key;
	size_t		key_length;
	
	skip_spaces(&s);
	
	/* pattern may not be empty */
	if (!*s)
		return NULL;
	
	jp = lappend(jp, mkRoot());
	
	if (*s == '$') {
		s++;
		goto begin_element;
	} else if (*s != '.') {
		goto dot_subscript; // implicit '.' at beginning
	}
	
begin_element:
	skip_spaces(&s);

	recursive_descent = false;
	bracket = false;

	if (*s == '\0')
		goto end;
	if (s[0] == '.' && s[1] == '.') {
		recursive_descent = true;
		s += 2;
		goto dot_subscript;
	}
	if (s[0] == '.') {
		s++;
		goto dot_subscript;
	}
	if (s[0] == '[') {
		s++;
		goto bracket_subscript;
	}

	goto failed;

next_element:
	if (bracket) {
		skip_spaces(&s);
		if (*s != ']')
			goto failed;
		s++;
	}
	goto begin_element;

dot_subscript:
	skip_spaces(&s);
	
	if (*s == '*')
		goto wildcard;
	if (integer_start(*s))
		goto integer;
	if (identifier_start(*s))
		goto identifier;
	if (*s == '"' || *s == '\'')
		goto string;
	if (*s == '[') {
		s++;
		goto bracket_subscript;
	}

	goto failed;

bracket_subscript:
	skip_spaces(&s);

	bracket = true;

	if (*s == '*')
		goto wildcard;
	if (integer_start(*s))
		goto integer;
	if (identifier_start(*s)) {
		err_msg = "Identifiers may not be bracketed.  This syntax is reserved for future use.";
		goto failed;
	}
	if (*s == '"' || *s == '\'')
		goto string;
	
	goto failed;

wildcard:
	s++;
	jp = lappend(jp, mkWildcard(recursive_descent));
	goto next_element;

integer:
	p = s;
	errno = 0;
	index = strtol(s, (char**)&p, 10);
	if (p <= s || errno != 0)
		goto failed;
	s = p;
	
	jp = lappend(jp, mkIndexSubscript(index, recursive_descent));
	goto next_element;

identifier:
	p = s;
	while (identifier_char(*p))
		p++;
	key = pnstrdup(s, p - s);
	key_length = p - s;
	s = p;

	jp = lappend(jp, mkKeySubscript(key, key_length, recursive_descent));
	goto next_element;

string:
	key = json_decode_string(&s, &key_length, false);
	if (!key)
		goto failed;
	
	jp = lappend(jp, mkKeySubscript(key, key_length, recursive_descent));
	goto next_element;

end:
	return jp;

failed:
	return NULL;
}

static size_t utf8_substring(
	const char *src, size_t srcbytes,
	size_t start, size_t length,
	const char **out_start, size_t *out_bytes)
{
	const char	*e = src + srcbytes;
	const char	*sub_start;
	const char	*sub_end;
	size_t		sub_length;

	sub_start = src;
	while (start > 0 && sub_start < e) {
		sub_start += pg_utf_mblen((const unsigned char*)sub_start);
		start--;
	}

	sub_end = sub_start;
	sub_length = 0;
	while (sub_length < length && sub_end < e) {
		sub_end += pg_utf_mblen((const unsigned char*)sub_end);
		sub_length++;
	}

	/* Make sure the input didn't have a clipped UTF-8 character */
	if(sub_start > e) {
		Assert(false);
		sub_start = sub_end = e;
	} else if (sub_end > e) {
		Assert(false);
		sub_end = e;
	}

	*out_start = sub_start;
	*out_bytes = sub_end - sub_start;
	return sub_length;
}

static json_node *json_head(json_node *parent)
{
	switch (parent->type) {
		case JSON_ARRAY:
		case JSON_OBJECT:
			return parent->v.children.head;
		default:
			return NULL;
	}
}

#define json_foreach(child, parent) \
	for ((child) = json_head(parent); (child) != NULL; (child) = (child)->next)

static JPRef *json_index_subscript(JPRef *ref, long index)
{
	json_node *json;

	if (index < 0)
		return NULL;
	
	switch (ref->type) {
		case JP_REF_NODE:
			json = ref->u.node;

			switch (json->type) {
				case JSON_STRING: {
					const char	*sub_start;
					size_t		sub_bytes;
					size_t		sub_length;

					sub_length = utf8_substring(
						json->v.string.str, json->v.string.length,
						index, 1,
						&sub_start, &sub_bytes);

					if (sub_length != 1)
						return NULL;

					return mkRefChar(sub_start, sub_bytes);
				}
				case JSON_ARRAY: {
					json_node	*child;

					if ((size_t)index >= json->v.children.count)
						return NULL;

					for (child = json->v.children.head;
						 index && child;
						 child = child->next, index--) {}

					if (index != 0 || child == NULL) {
						Assert(false);
						return NULL;
					}

					return mkRefNode(child);
				}
				default:
					return NULL;
			}
			break;

		case JP_REF_CHAR:
			if (index != 0)
				return NULL;
			return ref;

		default:
			Assert(false);
			return NULL;
	}
}

/*
Currently, a lot of JPRef nodes are allocated just to pass json_node pointers
to match_recurse.  If this becomes a memory/performance issue in the future,
JPRef could merged with json_node by adding JPRef's specialty types to the
json_type enum and json_node union.  JPRef is currently not merged with
json_node in an attempt to keep the codebase tidy and easier to extend.
*/
static void match_recurse(List **results, ListCell *path, JPRef *ref)
{
	jp_element	*elem;
	JPRef		*child_ref;
	json_node	*json, *child;

	if (path == NULL) {
		/* The end of the JSONPath list is the "accept" state. */
		*results = lappend(*results, ref);
		return;
	}

	elem = lfirst(path);

	if (ref->type == JP_REF_NODE)
		json = ref->u.node;
	else
		json = NULL;

	switch (elem->type) {
		case JP_WILDCARD:
			if (json) {
				json_foreach(child, json)
					match_recurse(results, lnext(path), mkRefNode(child));
			}
			break;

		case JP_INDEX_SUBSCRIPT:
			child_ref = json_index_subscript(ref, elem->data.index);
			if (child_ref != NULL)
				match_recurse(results, lnext(path), child_ref);
			break;

		case JP_KEY_SUBSCRIPT:
			json_foreach(child, json) {
				if (child->key != NULL &&
				    child->key_length == elem->data.key.length &&
					!memcmp(child->key, elem->data.key.ptr, child->key_length))
				{
					match_recurse(results, lnext(path), mkRefNode(child));
				}
			}
			break;

		default:;
	}

	if (elem->recursive_descent && json) {
		json_foreach(child, json)
			match_recurse(results, path, mkRefNode(child));
	}
}

List *jp_match(JSONPath *jp, json_node *json)
{
	ListCell	*lc			= jp_head(jp);
	List		*results	= NIL;

	match_recurse(&results, lc, mkRefNode(json));

	return results;
}

char *jpref_encode(JPRef *ref)
{
	switch (ref->type) {
		case JP_REF_NODE:
			return json_encode(ref->u.node, JSONOPT_USE_ORIG);

		case JP_REF_CHAR:
			return json_encode_string(ref->u.chr.bytes, ref->u.chr.length, '"', false);

		default:
			Assert(false);
			return NULL;
	}
}