/*
  Copyright (C) 2010 Joseph A. Adams (joeyadams3.14159@gmail.com)
  All rights reserved.

  Permission is hereby granted, free of charge, to any person obtaining a copy
  of this software and associated documentation files (the "Software"), to deal
  in the Software without restriction, including without limitation the rights
  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  copies of the Software, and to permit persons to whom the Software is
  furnished to do so, subject to the following conditions:

  The above copyright notice and this permission notice shall be included in
  all copies or substantial portions of the Software.

  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  THE SOFTWARE.
*/

#include "json.h"
#include "util.h"

#include <ctype.h>

#define is_internal(node) ((node)->type == JSON_ARRAY || (node)->type == JSON_OBJECT)

/* We can't use isspace() because it also accepts \v and \f, which
   aren't legal whitespace characters in strict JSON. */
#define is_whitespace(c) ((c)==' ' || (c)=='\t' || (c)=='\n' || (c)=='\r')

static void
skip_whitespace(const char **sp)
{
	const char *s = *sp;

	while (is_whitespace(*s))
		s++;
	*sp = s;
}

static char
end_parenthesis(json_node * node)
{
	if (!node)
		return 0;
	switch (node->type)
	{
		case JSON_ARRAY:
			return ']';
		case JSON_OBJECT:
			return '}';
		default:
			return 0;
	}
}

/*
 * Reads exactly 4 hex characters (capital or lowercase).
 * Writes the result to *out .
 * Returns true on success, false on failure.
 */
static bool
read_hex16(const char *in, unsigned int *out)
{
	unsigned int i;
	unsigned int tmp;
	char		c;

	*out = 0;

	for (i = 0; i < 4; i++)
	{
		c = *in++;
		if (c >= '0' && c <= '9')
			tmp = c - '0';
		else if (c >= 'A' && c <= 'F')
			tmp = c - 'A' + 10;
		else if (c >= 'a' && c <= 'f')
			tmp = c - 'a' + 10;
		else
			return false;

		*out <<= 4;
		*out += tmp;
	}

	return true;
}

static void
write_hex16(char *out, unsigned int val)
{
	const char *hex = "0123456789ABCDEF";

	*out++ = hex[(val >> 12) & 0xF];
	*out++ = hex[(val >> 8) & 0xF];
	*out++ = hex[(val >> 4) & 0xF];
	*out++ = hex[val & 0xF];
}


/*********** json_node creation, manipulation, and deletion **********/

json_node *
json_mknode(json_type type)
{
	json_node  *node = palloc(sizeof(*node));

	memset(node, 0, sizeof(*node));
	node->type = type;
	return node;
}

json_node *
json_mkbool(bool v_bool)
{
	json_node  *node = json_mknode(JSON_BOOL);

	node->v.v_bool = v_bool;
	return node;
}

json_node *
json_mkstring(const char *str, size_t length)
{
	json_node  *node = json_mknode(JSON_STRING);

	if (str)
	{
		node->v.string.str = pnstrdup(str, length);
		node->v.string.length = length;
	}
	return node;
}

json_node *
json_mknumber(const char *number, size_t length)
{
	json_node  *node = json_mknode(JSON_NUMBER);

	if (number)
		node->v.number = pnstrdup(number, length);
	return node;
}

/* Indicate that the node's value has changed,
 * marking ancestors as necessary.
 *
 * Call json_touch_value so that json_encode(, JSONOPT_ORIG)
 * will encode the new value rather than using original text.
 */
void
json_touch_value(json_node * node)
{
	while (node && node->orig.value.start)
	{
		node->orig.value.start = NULL;
		node = node->parent;
	}
}

static void
json_append_notouch(json_node * parent, json_node * child)
{
	Assert(parent->type == JSON_ARRAY || parent->type == JSON_OBJECT);
	Assert(child->parent == NULL);

	parent->v.children.count++;
	child->parent = parent;
	child->prev = parent->v.children.tail;
	child->next = NULL;

	if (parent->v.children.tail)
	{
		parent->v.children.tail->next = child;
		parent->v.children.tail = child;
	}
	else
	{
		parent->v.children.head = parent->v.children.tail = child;
	}
}

void
json_append(json_node * parent, json_node * child)
{
	json_append_notouch(parent, child);
	json_touch_value(parent);
}

void
json_remove(json_node * node)
{
	json_node  *parent = node->parent;

	if (!parent)
		return;
	Assert(parent->type == JSON_ARRAY || parent->type == JSON_OBJECT);
	Assert(parent->v.children.count > 0);

	if (node->prev)
		node->prev->next = node->next;
	else
		parent->v.children.head = node->next;
	if (node->next)
		node->next->prev = node->prev;
	else
		parent->v.children.tail = node->prev;

	parent->v.children.count--;
	node->parent = NULL;
	node->prev = NULL;
	node->next = NULL;

	json_touch_value(parent);
}

void
json_replace_value(json_node * node, json_node * replacement)
{
	node->type = replacement->type;
	node->v = replacement->v;
	node->orig.value = replacement->orig.value;

	if (node->parent)
		json_touch_value(node->parent);
}

const char *
json_get_string(json_node * node, size_t *length_out)
{
	Assert(node->type == JSON_STRING);
	if (length_out)
		*length_out = node->v.string.length;
	return node->v.string.str;
}

void
json_set_string(json_node * node, const char *str, size_t length)
{
	Assert(node->type == JSON_STRING);
	if (node->v.string.str)
		pfree(node->v.string.str);
	if (str)
	{
		node->v.string.str = pnstrdup(str, length);
		node->v.string.length = length;
	}
	else
	{
		node->v.string.str = NULL;
		node->v.string.length = 0;
	}
	json_touch_value(node);
}

const char *
json_get_number(json_node * node)
{
	Assert(node->type == JSON_NUMBER);
	return node->v.number;
}

void
json_set_number(json_node * node, const char *number, size_t length)
{
	Assert(node->type == JSON_NUMBER);
	if (node->v.number)
		pfree(node->v.number);
	if (number)
		node->v.number = pnstrdup(number, length);
	else
		node->v.number = NULL;
	json_touch_value(node);
}

/* Non-recursively free a node */
static void
free_node(json_node * node)
{
	if (node->type == JSON_STRING)
	{
		if (node->v.string.str)
			pfree(node->v.string.str);
	}
	else if (node->type == JSON_NUMBER)
	{
		if (node->v.number)
			pfree(node->v.number);
	}

	if (node->key)
		pfree(node->key);

	pfree(node);
}

void
json_delete(json_node * node)
{
	json_node  *parent,
			   *next;

	if (!node)
		return;

	/* Remove node from parent (if it has one). */
	json_remove(node);

	goto descend;

descend:
	while (is_internal(node) && node->v.children.head)
		node = node->v.children.head;
	goto advance;

advance:
	parent = node->parent;
	next = node->next;
	free_node(node);
	node = next;

	if (node)
		goto descend;
	else
		goto ascend;

ascend:
	node = parent;
	if (node)
		goto advance;
	else
		return;
}


/*********************** Parsing and validation **********************/

static json_node *decode_leaf(const char **sp);
static json_node *decode_number(const char **sp);
char	   *json_decode_string(const char **sp, size_t *length, bool strict);

/* json_decode_string has a different signature than its friends
   because it's also used to parse object member keys.
   It's also useful outside of json.c, such as in jsonpath.c . */

/*
 * json_validate
 *    Make sure the given UTF-8 string is valid JSON.
 */
bool
json_validate(const char *str)
{
	json_node  *node = json_decode(str);

	if (!node)
		return false;
	json_delete(node);
	return true;
}

/*
 * json_decode
 *    Convert a JSON-encoded string to a JSON node.
 *    @str must be valid UTF-8.
 */
json_node *
json_decode(const char *str)
{
	json_node  *root = NULL,
			   *parent = NULL,
			   *node = NULL;
	const char *s = str;
	char	   *key;
	size_t		key_length;
	struct json_node_orig orig;
	bool		expect_endp;

	if (!str)
		return NULL;

	Assert(utf8_validate(str, strlen(str)));

	expect_endp = false;
	goto item;

item:							/* Expect a value (set expect_endp before goto
								 * item; ) */
	key = NULL;
	key_length = 0;
	memset(&orig, 0, sizeof(orig));

	orig.key_left_space.start = s;
	orig.left_space.start = s;

	skip_whitespace(&s);

	if (expect_endp)
	{
		if (*s == ']' || *s == '}')
			goto endp;
	}

	if (parent && parent->type == JSON_OBJECT)
	{
		/* Parse member key string. */
		orig.key_left_space.end = s;
		orig.key.start = s;

		key = json_decode_string(&s, &key_length, true);
		if (!key)
			goto failed;

		orig.key.end = s;
		orig.key_right_space.start = s;

		/* Eat the " : " */
		skip_whitespace(&s);
		if (*s != ':')
			goto failed;

		orig.key_right_space.end = s;
		s++;
		orig.left_space.start = s;

		skip_whitespace(&s);
	}

	/*
	 * The way orig.value and company are initialized is a bit funky. If this
	 * node has children, we have to finish parsing the node's children before
	 * we know where it ends.  Hence, initialization of orig.value_end and
	 * after will be deferred if this node has children.
	 */

	orig.left_space.end = s;
	orig.value.start = s;

	node = decode_leaf(&s);
	if (!node)
	{
		if (*s == '[')
			node = json_mknode(JSON_ARRAY);
		else if (*s == '{')
			node = json_mknode(JSON_OBJECT);
		else
			goto failed;
		s++;

		/*
		 * orig.value.end and later are dangling (actually NULL) for now, but
		 * will be initialized when we get to state 'endp' .
		 */
	}
	else
	{
		orig.value.end = s;
		orig.right_space.start = s;

		skip_whitespace(&s);

		orig.right_space.end = s;
	}

	node->key = key;
	node->key_length = key_length;

	/*
	 * The key now belongs to the node.  This prevents a double free on
	 * failure (see the failed: label).
	 */
	key = NULL;

	node->orig = orig;

	if (parent)
		json_append_notouch(parent, node);
	else
		root = node;

	if (is_internal(node))
	{
		/*
		 * "push" node onto the "stack".  Nodes point up to their parents,
		 * which is why this function doesn't need a "stack" per se.
		 */
		parent = node;

		expect_endp = true;
		goto item;
	}

	if (parent)
		goto comma_endp;
	else
		goto end;

comma_endp:						/* Expect a comma or end bracket/brace */
	if (*s == ',')
	{
		s++;

		expect_endp = false;
		goto item;
	}
	if (*s == ']' || *s == '}')
		goto endp;

	goto failed;

endp:							/* Handle an end bracket/brace */
	if (*s != end_parenthesis(parent))
		goto failed;
	s++;

	/* "pop" a node from the "stack" */
	node = parent;
	parent = parent->parent;

	/*
	 * The other pointers were set when we started parsing this node in the
	 * 'item' state.
	 */
	node->orig.value.end = s;
	node->orig.right_space.start = s;

	skip_whitespace(&s);

	node->orig.right_space.end = s;

	if (parent)
		goto comma_endp;
	else
		goto end;

end:							/* Expect end of text */
	if (*s)
		goto failed;
	return node;

failed:					/* Handle failure */
	if (key)
		pfree(key);
	json_delete(root);
	return NULL;
}

/*
 * Decode and skip a node that does not have children.
 * Whitespace is not skipped first (it is done in the primary decode loop).
 *
 * Returns NULL if next character is '[', '{', or invalid.
 */
static json_node *
decode_leaf(const char **sp)
{
	char		c = **sp;

	if (c == '"')
	{
		size_t		length;
		char	   *str = json_decode_string(sp, &length, true);

		if (str)
		{
			json_node  *node = json_mknode(JSON_STRING);

			node->v.string.str = str;
			node->v.string.length = length;
			return node;
		}

		return NULL;
	}
	if ((c >= '0' && c <= '9') || c == '-')
		return decode_number(sp);
	if (!strncmp(*sp, "true", 4))
	{
		(*sp) += 4;
		return json_mkbool(true);
	}
	if (!strncmp(*sp, "false", 5))
	{
		(*sp) += 5;
		return json_mkbool(false);
	}
	if (!strncmp(*sp, "null", 4))
	{
		(*sp) += 4;
		return json_mknode(JSON_NULL);
	}

	return NULL;
}

/*
 * The JSON spec says that a number shall follow this precise pattern
 * (spaces and quotes added for readability):
 *	 '-'? (0 | [1-9][0-9]*) ('.' [0-9]+)? ([Ee] [+-]? [0-9]+)?
 *
 * However, some JSON parsers are more liberal.  For instance, PHP accepts
 * '.5' and '1.'.  JSON.parse accepts '+3'.
 *
 * This function takes the strict approach.
 */
static bool
validate_number(const char **sp)
{
	const char *s = *sp;

	/* '-'? */
	if (*s == '-')
		s++;

	/* (0 | [1-9][0-9]*) */
	if (*s == '0')
	{
		s++;
	}
	else
	{
		if (!isdigit(*s))
			return false;
		do
			s++;
		while (isdigit(*s));
	}

	/* ('.' [0-9]+)? */
	if (*s == '.')
	{
		s++;
		if (!isdigit(*s))
			return false;
		do
			s++;
		while (isdigit(*s));
	}

	/* ([Ee] [+-]? [0-9]+)? */
	if (*s == 'E' || *s == 'e')
	{
		s++;
		if (*s == '+' || *s == '-')
			s++;
		if (!isdigit(*s))
			return false;
		do
			s++;
		while (isdigit(*s));
	}

	*sp = s;
	return true;
}

static json_node *
decode_number(const char **sp)
{
	const char *start,
			   *end;

	start = *sp;
	if (!validate_number(sp))
		return NULL;
	end = *sp;

	return json_mknumber(start, end - start);
}

/*
 * json_decode_string
 *    If you're interested in the decoding JSON in general, see json_decode.
 *
 *    Decodes a JSON string literal (e.g. "\"hello\"").
 *
 *    If strict is true, string must be double-quoted,
 *    as is required by the JSON RFC.
 *    Otherwise (e.g. if parsing something JSON-like, such as JSONPath),
 *    the string may be single- or double-quoted.
 *
 *    Also, no whitespace skipping is done, so the caller should only
 *    call this function when it expects **sp to be either " or '
 *
 *    On success, returns the decoded string, passes that string's length
 *    through *length (which must not be NULL), and advances *sp to point
 *    to the end of string literal (including the quote character).
 *
 *    On failure (parse error), returns NULL and
 *    leaves *length and *sp untouched.
 */
char *
json_decode_string(const char **sp, size_t *length, bool strict)
{
	const char		*s = *sp;
	StringInfoData	ret;
	char			buf[4];
	int				len;
	char			quote;

	Assert(length != NULL);

	initStringInfo(&ret);

	quote = *s++;
	if (strict)
	{
		if (quote != '"')
			return NULL;
	}
	else
	{
		if (quote != '"' && quote != '\'')
			return NULL;
	}

	while (*s && *s != quote)
	{
		unsigned char c = *s++;
		unsigned int uc;
		unsigned int lc;

		if (c == '\\')
		{
			c = *s++;
			switch (c)
			{
				case '\\':
				case '/':
					break;
				case 'b':
					c = '\b';
					break;
				case 'f':
					c = '\f';
					break;
				case 'n':
					c = '\n';
					break;
				case 'r':
					c = '\r';
					break;
				case 't':
					c = '\t';
					break;
				case 'u':
					if (!read_hex16(s, &uc))
						goto failed;
					s += 4;

					if (uc >= 0xD800 && uc <= 0xDFFF)
					{
						/* Handle UTF-16 surrogate pair. */

						if (uc >= 0xDC00)
							goto failed;		/* Second surrogate not
												 * preceded by first
												 * surrogate. */

						if (s[0] != '\\' || s[1] != 'u'
							|| !read_hex16(s + 2, &lc)
							|| !(lc >= 0xDC00 && lc <= 0xDFFF))
							goto failed;		/* First surrogate not
												 * followed by second
												 * surrogate. */

						s += 6;

						uc = 0x10000 | ((uc & 0x3FF) << 10) | (lc & 0x3FF);
					}

					/* 0xFFFE and 0xFFFF are invalid Unicode */
					if (uc == 0xFFFE || uc == 0xFFFF)
						goto failed;

					len = utf8_encode_char(buf, uc);
					Assert(len > 0);
					appendBinaryStringInfo(&ret, buf, len);

					continue;	/* Continue the enclosing while loop to skip
								 * the str_append below. */
				default:		/* Invalid escape */
					if (c == quote)
						break;
					if (!strict && (c == '"' || c == '\''))
						break;
					goto failed;	/* Invalid escape */
			}
		}
		else if (c <= 0x1F)
		{
			/* Control characters not allowed in string literals. */
			goto failed;
		}
		appendStringInfoChar(&ret, c);
	}

	if (!*s++)
		goto failed;

	*length = ret.len;
	*sp = s;
	return ret.data;

failed:
	pfree(ret.data);
	return NULL;
}

json_type
json_text_type(const char *str, size_t nbytes)
{
	const char *s = str;
	const char *e = str + nbytes;
	char		c;

	/* Skip whitespace characters. */
	while (s < e && is_whitespace(*s))
		s++;

	/* Get first non-white character, making sure it's in bounds. */
	if (s >= e)
		return JSON_INVALID;
	c = *s;

	switch (c)
	{
		case 'n':
			return JSON_NULL;
		case '"':
			return JSON_STRING;
		case 't':
		case 'f':
			return JSON_BOOL;
		case '{':
			return JSON_OBJECT;
		case '[':
			return JSON_ARRAY;
		default:
			if (c == '-' || (c >= '0' && c <= '9'))
				return JSON_NUMBER;
			return JSON_INVALID;
	}
}


/****************************** Encoding *****************************/

static void
encode_string(StringInfo out, const char *string, size_t length, char quote,
			bool escape_unicode)
{
	const char *s = string;
	const char *e = s + length;

	Assert(utf8_validate(string, length));
	Assert(quote != '\\');

	appendStringInfoChar(out, quote);

	while (s < e)
	{
		unsigned char c = *s++;
		unsigned char e;

		switch (c)
		{
			case '\\':
				e = '\\';
				break;
			case '\b':
				e = 'b';
				break;
			case '\f':
				e = 'f';
				break;
			case '\n':
				e = 'n';
				break;
			case '\r':
				e = 'r';
				break;
			case '\t':
				e = 't';
				break;
			default:
				{
					if (c == quote)
					{
						e = quote;
						break;
					}
					if (c < 0x1F || (c >= 0x80 && escape_unicode))
					{
						/* Encode using \u.... */
						unsigned int uc,
									lc;
						char		txt[13];

						s--;
						utf8_decode_char_nocheck(&s, &uc);

						txt[0] = '\\';
						txt[1] = 'u';
						txt[6] = '\\';
						txt[7] = 'u';
						if (uc <= 0xFFFF)
						{
							write_hex16(txt + 2, uc);
							txt[6] = '\0';
						}
						else
						{
							uc -= 0x10000;
							lc = uc & 0x3FF;
							uc = uc >> 10;
							uc |= 0xD800;
							lc |= 0xDC00;
							write_hex16(txt + 2, uc);
							write_hex16(txt + 8, lc);
							txt[12] = '\0';
						}

						appendStringInfoString(out, txt);
						continue;		/* Skip backslash-encoding code below. */
					}
					e = 0;
				}
		}

		appendStringInfoChar(out, e ? '\\' : c);
		if (e)
			appendStringInfoChar(out, e);
	}

	appendStringInfoChar(out, quote);
}

static bool
encode_number(StringInfo out, const char *string)
{
	const char *s = string;
	const char *start,
			   *end;

	if (!string)
		return false;

	/* Validate number, trimming whitespace. */
	skip_whitespace(&s);
	start = s;
	if (!validate_number(&s))
		return false;
	end = s;
	skip_whitespace(&s);
	if (*s != '\0')
		return false;

	/* Append number to out */
	appendBinaryStringInfo(out, start, end - start);

	return true;
}

typedef struct
{
	StringInfoData	str;
	bool			use_orig;
	bool			escape_unicode;
	bool			trim;
}	json_encode_ctx;

static bool json_encode_recurse(json_node * node, json_encode_ctx * ctx);

char *
json_encode(json_node * node, int options)
{
	json_encode_ctx ctx;

	initStringInfo(&ctx.str);
	ctx.use_orig		= !!(options & JSONOPT_USE_ORIG);
	ctx.escape_unicode	= !!(options & JSONOPT_ESCAPE_UNICODE);
	ctx.trim			=  !(options & JSONOPT_NO_TRIM);

	if (!json_encode_recurse(node, &ctx))
	{
		pfree(ctx.str.data);
		return NULL;
	}

	return ctx.str.data;
}

static bool
json_encode_recurse(json_node * node, json_encode_ctx * ctx)
{
#define has_orig(field) \
		(use_orig && node->orig.field.start)
#define push_orig(field) \
		appendBinaryStringInfo(&ctx->str, node->orig.field.start, \
							node->orig.field.end - node->orig.field.start)

	bool		use_orig = ctx->use_orig;
	bool		trim = ctx->trim;

	ctx->trim = false;			/* Don't trim internal nodes, just the root
								 * node. */

	if (!trim && has_orig(left_space))
		push_orig(left_space);

	if (has_orig(value))
	{
		push_orig(value);
	}
	else
	{
		const char *txt = NULL;
		json_node  *child;

		switch (node->type)
		{
			case JSON_NULL:
				txt = "null";
				break;
			case JSON_BOOL:
				if (node->v.v_bool)
					txt = "true";
				else
					txt = "false";
				break;
			case JSON_STRING:
				encode_string(&ctx->str,
						node->v.string.str, node->v.string.length,
						'"', ctx->escape_unicode);
				break;
			case JSON_NUMBER:
				if (!encode_number(&ctx->str, node->v.number))
					return false;
				break;
			case JSON_ARRAY:
				appendStringInfoChar(&ctx->str, '[');

				json_foreach(child, node)
				{
					json_encode_recurse(child, ctx);
					if (child->next)
						appendStringInfoChar(&ctx->str, ',');
				}

				appendStringInfoChar(&ctx->str, ']');
				break;
			case JSON_OBJECT:
				appendStringInfoChar(&ctx->str, '{');

				json_foreach(child, node)
				{
					/*
					 * Shadows the parent node (assigned to the variable
					 * @node) so we can use our macros on the child node
					 * instead. Hurray for lexical scoping!
					 */
					json_node  *node = child;

					if (has_orig(key_left_space))
						push_orig(key_left_space);

					if (has_orig(key))
						push_orig(key);
					else
						encode_string(&ctx->str, node->key, node->key_length,
										'"', ctx->escape_unicode);

					if (has_orig(key_right_space))
						push_orig(key_right_space);

					appendStringInfoChar(&ctx->str, ':');

					json_encode_recurse(node, ctx);

					if (node->next)
						appendStringInfoChar(&ctx->str, ',');
				}

				appendStringInfoChar(&ctx->str, '}');
				break;
			default:
				return false;
		}

		if (txt)
			appendStringInfoString(&ctx->str, txt);
	}

	if (!trim && has_orig(right_space))
		push_orig(right_space);

	return true;

#undef has_orig
#undef push_orig
}

/*
 * json_encode_string
 *    If you're interested in encoding JSON in general, see json_encode .
 *
 *    Encodes a string literal JSON-style using the given quote character.
 *    Note that using anything but '"' as the quote character will result
 *    in invalid JSON.
 *
 *    @str must be valid UTF-8, though it may contain null characters
 *       (hence the length argument).
 *    @quote must not be a backslash.
 */
char *
json_encode_string(const char *str, size_t length, char quote,
					bool escape_unicode)
{
	StringInfoData ret;

	initStringInfo(&ret);
	encode_string(&ret, str, length, quote, escape_unicode);

	return ret.data;
}