/*
  Copyright (C) 2010 Joseph A. Adams (joeyadams3.14159@gmail.com)
  All rights reserved.

  Permission is hereby granted, free of charge, to any person obtaining a copy
  of this software and associated documentation files (the "Software"), to deal
  in the Software without restriction, including without limitation the rights
  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  copies of the Software, and to permit persons to whom the Software is
  furnished to do so, subject to the following conditions:

  The above copyright notice and this permission notice shall be included in
  all copies or substantial portions of the Software.

  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  THE SOFTWARE.
*/

#include "json.h"

#include <ctype.h>


bool json_escape_unicode = false;

#define JSON_malloc palloc

/* repalloc and pfree can't take a null pointer, unlike normal realloc and free. */
static void *JSON_realloc(void *ptr, Size size)
{
	if (ptr)
		return repalloc(ptr, size);
	else
		return palloc(size);
}
static void JSON_free(void *ptr)
{
	if (ptr)
		pfree(ptr);
}

static char *JSON_strdup(const char *str, size_t length)
{
	char *ret = JSON_malloc(length + 1);
	memcpy(ret, str, length);
	ret[length] = 0;
	return ret;
}

#define is_internal(node) ((node)->type == JSON_ARRAY || (node)->type == JSON_OBJECT)

/* We can't use isspace() because it also accepts \v and \f, which
   aren't legal whitespace characters in strict JSON. */
#define is_whitespace(c) ((c)==' ' || (c)=='\t' || (c)=='\n' || (c)=='\r')

static void skip_whitespace(const char **sp)
{
	const char *s = *sp;
	while (is_whitespace(*s))
		s++;
	*sp = s;
}

static char end_parenthesis(json_node *node)
{
	if (!node)
		return 0;
	switch (node->type) {
		case JSON_ARRAY: return ']';
		case JSON_OBJECT: return '}';
		default: return 0;
	}
}

/*
 * Reads exactly 4 hex characters (capital or lowercase).
 * Writes the result to *out .
 * Returns true on success, false on failure.
 */
static bool read_hex16(const char *in, unsigned int *out)
{
	unsigned int i;
	unsigned int tmp;
	char c;
	
	*out = 0;
	
	for (i=0; i<4; i++) {
		c = *in++;
		if (c >= '0' && c <= '9')
			tmp = c - '0';
		else if (c >= 'A' && c <= 'F')
			tmp = c - 'A' + 10;
		else if (c >= 'a' && c <= 'f')
			tmp = c - 'a' + 10;
		else
			return false;
		
		*out <<= 4;
		*out += tmp;
	}
	
	return true;
}

static void write_hex16(char *out, unsigned int val)
{
	const char *hex = "0123456789ABCDEF";
	*out++ = hex[(val >> 12) & 0xF];
	*out++ = hex[(val >> 8) & 0xF];
	*out++ = hex[(val >> 4) & 0xF];
	*out++ = hex[val & 0xF];
}

static bool utf8_validate(const char *str, size_t length);
static void utf8_decode_char_nocheck(const char **sp, unsigned int *uc);
static int utf8_encode_char(char *out, unsigned int uc);


/*************************** String buffer ***************************/

typedef struct {
	char *buffer;
	size_t length;
	size_t alloc;
} String[1];

/* Declare and initialize a String with the given name. */
#define String(name) String name = {{NULL, 0, 0}}

/* Grow the string by @need characters, reallocating if necessary.
 * Returns a pointer to the uninitialized range where text is to go.
 * A '\0' terminator is added automatically. */
static char *string_grow(String str, size_t need)
{
	size_t end = str->length;
	str->length += need;
	if (str->alloc <= str->length) {
		str->alloc = str->length*3/2 + 1;
		if (str->alloc < 8)
			str->alloc = 8;
		str->buffer = JSON_realloc(str->buffer, str->alloc);
	}
	str->buffer[str->length] = '\0';
	return str->buffer + end;
}
static char *string_buffer(String str)
{
	if (!str->buffer)
		string_grow(str, 0);
	return str->buffer;
}
static inline void string_append_length(String str, const char *append, size_t len)
{
	char *dest = string_grow(str, len);
	memcpy(dest, append, len);
}
static inline void string_append(String str, const char *append)
{
	string_append_length(str, append, strlen(append));
}
static inline void string_append_char(String str, char c)
{
	*string_grow(str, 1) = c;
}
static inline void string_trunc(String str, size_t len)
{
	str->length = len;
	str->buffer[len] = '\0';
}
static inline void string_free(String str)
{
	JSON_free(str->buffer);
}


/*********** json_node creation, manipulation, and deletion **********/

json_node *json_mknode(json_type type)
{
	json_node *node = JSON_malloc(sizeof(*node));
	memset(node, 0, sizeof(*node));
	node->type = type;
	return node;
}

json_node *json_mkbool(bool v_bool)
{
	json_node *node = json_mknode(JSON_BOOL);
	node->v.v_bool = v_bool;
	return node;
}

json_node *json_mkstring(const char *str, size_t length)
{
	json_node *node = json_mknode(JSON_STRING);
	if (str) {
		node->v.string.str = JSON_strdup(str, length);
		node->v.string.length = length;
	}
	return node;
}

json_node *json_mknumber(const char *number, size_t length)
{
	json_node *node = json_mknode(JSON_NUMBER);
	if (number)
		node->v.number = JSON_strdup(number, length);
	return node;
}

void json_append(json_node *parent, json_node *child)
{
	Assert(parent->type==JSON_ARRAY || parent->type==JSON_OBJECT);
	Assert(child->parent == NULL);
	
	parent->v.children.count++;
	child->parent = parent;
	child->prev = parent->v.children.tail;
	child->next = NULL;
	
	if (parent->v.children.tail) {
		parent->v.children.tail->next = child;
		parent->v.children.tail = child;
	} else {
		parent->v.children.head = parent->v.children.tail = child;
	}
}

void json_remove(json_node *node)
{
	json_node *parent = node->parent;
	
	if (!parent)
		return;
	Assert(parent->type==JSON_ARRAY || parent->type==JSON_OBJECT);
	Assert(parent->v.children.count > 0);
	
	if (node->prev)
		node->prev->next = node->next;
	else
		parent->v.children.head = node->next;
	if (node->next)
		node->next->prev = node->prev;
	else
		parent->v.children.tail = node->prev;
	
	parent->v.children.count--;
	node->parent = NULL;
	node->prev = NULL;
	node->next = NULL;
}

const char *json_get_string(json_node *node, size_t *length_out)
{
	Assert(node->type == JSON_STRING);
	if (length_out)
		*length_out = node->v.string.length;
	return node->v.string.str;
}

void json_set_string(json_node *node, const char *str, size_t length)
{
	Assert(node->type == JSON_STRING);
	if (node->v.string.str)
		JSON_free(node->v.string.str);
	if (str) {
		node->v.string.str = JSON_strdup(str, length);
		node->v.string.length = length;
	} else {
		node->v.string.str = NULL;
		node->v.string.length = 0;
	}
}

const char *json_get_number(json_node *node)
{
	Assert(node->type == JSON_NUMBER);
	return node->v.number;
}

void json_set_number(json_node *node, const char *number, size_t length)
{
	Assert(node->type == JSON_NUMBER);
	if (node->v.number)
		JSON_free(node->v.number);
	if (number)
		node->v.number = JSON_strdup(number, length);
	else
		node->v.number = NULL;
}

/* Non-recursively free a node */
static void free_node(json_node *node)
{
	if (node->type == JSON_STRING)
		JSON_free(node->v.string.str);
	else if (node->type == JSON_NUMBER)
		JSON_free(node->v.number);
	if (node->key)
		JSON_free(node->key);
	JSON_free(node);
}

void json_delete(json_node *node)
{
	json_node *parent, *next;
	
	if (!node)
		return;
	
	/* Remove node from parent (if it has one). */
	json_remove(node);
	
	goto descend;
	
descend:
	while (is_internal(node) && node->v.children.head)
		node = node->v.children.head;
	goto advance;
	
advance:
	parent = node->parent;
	next = node->next;
	free_node(node);
	node = next;
	
	if (node)
		goto descend;
	else
		goto ascend;
	
ascend:
	node = parent;
	if (node)
		goto advance;
	else
		return;
}


/*********************** Parsing and validation **********************/

static json_node *decode_leaf(const char **sp);
static json_node *decode_number(const char **sp);
static char *decode_string(const char **sp, size_t *length);
/* decode_string has a different signature than its friends
   because it's also used to parse object member keys. */

bool json_validate(const char *str)
{
	json_node *node = json_decode(str);
	if (!node)
		return false;
	json_delete(node);
	return true;
}

json_node *json_decode(const char *str)
{
	json_node *root = NULL, *parent = NULL, *node = NULL;
	const char *s = str;
	char *key = NULL;
	size_t key_length = 0;
	
	if (!str)
		return NULL;
	
	if (!utf8_validate(str, strlen(str)))
		return NULL;
	
	goto item;
	
item: /* Expect a value */
	skip_whitespace(&s);
	
	if (parent && parent->type == JSON_OBJECT) {
		/* Parse member key string. */
		key = decode_string(&s, &key_length);
		if (!key)
			goto failed;
		
		/* Eat the " : " */
		skip_whitespace(&s);
		if (*s != ':')
			goto failed;
		s++;
		skip_whitespace(&s);
	} else {
		key = NULL;
	}
	
	node = decode_leaf(&s);
	if (!node) {
		if (*s == '[')
			node = json_mknode(JSON_ARRAY);
		else if (*s == '{')
			node = json_mknode(JSON_OBJECT);
		else
			goto failed;
		s++;
	}
	
	if (key) {
		node->key = key;
		node->key_length = key_length;
		key = NULL;
	}
	
	if (parent)
		json_append(parent, node);
	else
		root = node;
	
	if (is_internal(node)) {
		parent = node;
		goto item_endp;
	}
	
	if (parent)
		goto comma_endp;
	else
		goto end;
	
comma_endp: /* Expect a comma or end bracket/brace */
	skip_whitespace(&s);
	
	if (*s == ',') {
		s++;
		goto item;
	}
	if (*s == ']' || *s == '}')
		goto endp;
	
	goto failed;

item_endp: /* Expect a value or end bracket/brace */
	skip_whitespace(&s);
	if (*s == ']' || *s == '}')
		goto endp;
	goto item;

endp: /* Handle an end bracket/brace */
	if (*s != end_parenthesis(parent))
		goto failed;
	s++;
	node = parent;
	parent = parent->parent;
	if (parent)
		goto comma_endp;
	else
		goto end;

end: /* Expect end of text */
	skip_whitespace(&s);
	if (*s)
		goto failed;
	return node;
	
failed: /* Handle failure */
	if (key)
		JSON_free(key);
	json_delete(root);
	return NULL;
}

/*
 * Decode and skip a node that does not have children.
 * Whitespace is not skipped first (it is done in the primary decode loop).
 *
 * Returns NULL if next character is '[', '{', or invalid.
 */
static json_node *decode_leaf(const char **sp)
{
	char c = **sp;
	
	if (c == '"') {
		size_t length;
		char *str = decode_string(sp, &length);
		
		if (str) {
			json_node *node = json_mknode(JSON_STRING);
			node->v.string.str = str;
			node->v.string.length = length;
			return node;
		}
		
		return NULL;
	}
	if ((c >= '0' && c <= '9') || c == '-')
		return decode_number(sp);
	if (!strncmp(*sp, "true", 4)) {
		(*sp) += 4;
		return json_mkbool(true);
	}
	if (!strncmp(*sp, "false", 5)) {
		(*sp) += 5;
		return json_mkbool(false);
	}
	if (!strncmp(*sp, "null", 4)) {
		(*sp) += 4;
		return json_mknode(JSON_NULL);
	}
	
	return NULL;
}

/*
 * The JSON spec says that a number shall follow this precise pattern
 * (spaces and quotes added for readability):
 *   '-'? (0 | [1-9][0-9]*) ('.' [0-9]+)? ([Ee] [+-]? [0-9]+)?
 *
 * However, some JSON parsers are more liberal.  For instance, PHP accepts
 * '.5' and '1.'.  JSON.parse accepts '+3'.
 *
 * This function takes the strict approach.  The user should use
 * json_clean() to handle liberal JSON text.
 */
static bool validate_number(const char **sp)
{
	const char *s = *sp;
	
	/* '-'? */
	if (*s == '-')
		s++;
	
	/* (0 | [1-9][0-9]*) */
	if (*s == '0') {
		s++;
	} else {
		if (!isdigit(*s))
			return false;
		do s++; while (isdigit(*s));
	}

	/* ('.' [0-9]+)? */
	if (*s == '.') {
		s++;
		if (!isdigit(*s))
			return false;
		do s++; while (isdigit(*s));
	}
	
	/* ([Ee] [+-]? [0-9]+)? */
	if (*s=='E' || *s=='e') {
		s++;
		if (*s=='+' || *s=='-')
			s++;
		if (!isdigit(*s))
			return false;
		do s++; while (isdigit(*s));
	}
	
	*sp = s;
	return true;
}

static json_node *decode_number(const char **sp)
{
	const char *start, *end;
	
	start = *sp;
	if (!validate_number(sp))
		return NULL;
	end = *sp;
	
	return json_mknumber(start, end - start);
}

static char *decode_string(const char **sp, size_t *length)
{
	const char *s = *sp;
	String(ret);
	char *out;
	size_t size;
	
	if (*s++ != '"')
		return NULL;
	
	while (*s && *s != '"') {
		unsigned char c = *s++;
		unsigned int uc, lc;
		
		if (c == '\\') {
			c = *s++;
			switch (c) {
				case '"':
				case '\\':
				case '/':
					break;
				case 'b': c = '\b'; break;
				case 'f': c = '\f'; break;
				case 'n': c = '\n'; break;
				case 'r': c = '\r'; break;
				case 't': c = '\t'; break;
				case 'u':
					size = ret->length;
					out = string_grow(ret, 4);
					
					if (!read_hex16(s, &uc))
						goto failed;
					s += 4;
					
					if (uc >= 0xD800 && uc <= 0xDFFF) {
						/* Handle UTF-16 surrogate pair. */
						
						if (uc >= 0xDC00)
							goto failed; /* Second surrogate not preceded by
							                first surrogate. */
						
						if (s[0] != '\\' || s[1] != 'u'
						|| !read_hex16(s+2, &lc)
						|| !(lc >= 0xDC00 && lc <= 0xDFFF))
							goto failed; /* First surrogate not followed by
							                second surrogate. */
						
						s += 6;
						
						uc = 0x10000 | ((uc & 0x3FF) << 10) | (lc & 0x3FF);
					}
					
					/* 0xFFFE and 0xFFFF are invalid Unicode */
					if (uc == 0xFFFE || uc == 0xFFFF)
						goto failed;
					
					size += utf8_encode_char(out, uc);
					string_trunc(ret, size);
					
					continue; /* Continue the enclosing while loop to skip
					             the str_append below. */
				default: /* Invalid escape */
					goto failed;
			}
		} else if (c <= 0x1F) {
			/* Control characters not allowed in string literals. */
			goto failed;
		}
		string_append_char(ret, c);
	}
	
	if (!*s++)
		goto failed;
	
	*length = ret->length;
	*sp = s;
	return string_buffer(ret);
	
failed:
	string_free(ret);
	return NULL;
}

json_type json_text_type(const char *str, size_t nbytes)
{
	const char *s = str;
	const char *e = str + nbytes;
	char c;
	
	/* Skip whitespace characters. */
	while (s < e && is_whitespace(*s))
		s++;
	
	/* Get first non-white character, making sure it's in bounds. */
	if (s >= e)
		return JSON_INVALID;
	c = *s;
	
	switch (c) {
		case 'n':
			return JSON_NULL;
		case '"':
			return JSON_STRING;
		case 't':
		case 'f':
			return JSON_BOOL;
		case '{':
			return JSON_OBJECT;
		case '[':
			return JSON_ARRAY;
		default:
			if (c == '-' || (c >= '0' && c <= '9'))
				return JSON_NUMBER;
			return JSON_INVALID;
	}
}


/****************************** Encoding *****************************/

static bool encode_string(String out, const char *string, size_t length)
{
	const char *s = string;
	const char *e = s + length;
	
	if (!utf8_validate(string, length))
		return false;
	
	string_append_char(out, '"');
	
	while (s < e) {
		unsigned char c = *s++;
		unsigned char e;
		
		switch (c) {
			case '"': e = '"'; break;
			case '\\': e = '\\'; break;
			case '\b': e = 'b'; break;
			case '\f': e = 'f'; break;
			case '\n': e = 'n'; break;
			case '\r': e = 'r'; break;
			case '\t': e = 't'; break;
			default: {
				if (c < 0x1F || (c >= 0x80 && json_escape_unicode)) {
					/* Encode using \u.... */
					unsigned int uc, lc;
					char txt[13];
					
					s--;
					utf8_decode_char_nocheck(&s, &uc);
					
					txt[0] = '\\';
					txt[1] = 'u';
					txt[6] = '\\';
					txt[7] = 'u';
					if (uc <= 0xFFFF) {
						write_hex16(txt+2, uc);
						txt[6] = '\0';
					} else {
						uc -= 0x10000;
						lc = uc & 0x3FF;
						uc = uc >> 10;
						uc |= 0xD800;
						lc |= 0xDC00;
						write_hex16(txt+2, uc);
						write_hex16(txt+8, lc);
						txt[12] = '\0';
					}
					
					string_append(out, txt);
					continue; /* Skip backslash-encoding code below. */
				}
				e = 0;
			}
		}
		
		string_append_char(out, e ? '\\' : c);
		if (e)
			string_append_char(out, e);
	}
	
	string_append_char(out, '"');
	
	return true;
}

static bool encode_number(String out, const char *string)
{
	const char *s = string;
	const char *start, *end;
	
	if (!string)
		return false;
	
	/* Validate number, trimming whitespace. */
	skip_whitespace(&s);
	start = s;
	if (!validate_number(&s))
		return false;
	end = s;
	skip_whitespace(&s);
	if (*s != '\0')
		return false;
	
	/* Append number to out */
	string_append_length(out, start, end-start);
	
	return true;
}

char *json_encode(json_node *node)
{
	String(ret);
	const char *txt;
	json_node *sentinel;
	
	if (!node)
		return NULL;
	sentinel = node->parent;
	
	goto begin_nokey;
	
begin: /* Encode entire node, or (if it's an array or object)
          the beginning of it. */
	
	if (node->key) {
		if (!encode_string(ret, node->key, node->key_length))
			goto failed;
		string_append_char(ret, ':');
	}
	goto begin_nokey;
	
begin_nokey:
	
	txt = NULL;
	switch (node->type) {
		case JSON_NULL:
			txt = "null";
			break;
		case JSON_BOOL:
			if (node->v.v_bool)
				txt = "true";
			else
				txt = "false";
			break;
		case JSON_STRING:
			if (!encode_string(ret, node->v.string.str, node->v.string.length))
				goto failed;
			break;
		case JSON_NUMBER:
			if (!encode_number(ret, node->v.number))
				goto failed;
			break;
		case JSON_ARRAY:
			txt = "[";
			break;
		case JSON_OBJECT:
			txt = "{";
			break;
		default:
			goto failed;
	}
	if (txt)
		string_append(ret, txt);
	
	if (is_internal(node) && node->v.children.head) {
		node = node->v.children.head;
		goto begin;
	} else {
		goto finish;
	}
	
finish: /* Finish a node and move to the next one. */
	if (node->type == JSON_ARRAY)
		string_append_char(ret, ']');
	else if (node->type == JSON_OBJECT)
		string_append_char(ret, '}');
	
	if (node->next) {
		string_append_char(ret, ',');
		node = node->next;
		goto begin;
	}
	if (node->parent != sentinel) {
		node = node->parent;
		goto finish;
	}
	goto end;

end: /* All nodes finished being serialized. */
	return string_buffer(ret);
	
failed: /* Handle error. */
	string_free(ret);
	return NULL;
}


/************************ Liberal JSON support ***********************/

bool json_validate_liberal(const char *str)
{
	json_node *node = json_decode_liberal(str);
	if (!node)
		return false;
	json_delete(node);
	return true;
}

json_node *json_decode_liberal(const char *str)
{
	char *cleaned = json_cleanup(str);
	json_node *node = json_decode(cleaned);
	if (cleaned)
		JSON_free(cleaned);
	return node;
}

char *json_cleanup(const char *str)
{
	String(ret);
	const char *p = str;
	const char *s = str;
	int comment_start_width = 0;
	char quote_char = 0;
	/* flush(): flush content we have scanned, meaning append characters
	 *          from p thru s to ret, then set p to s. */
	#define flush() do { \
			string_append_length(ret, p, s-p); \
			p = s; \
		} while(0)
	
	if (!str)
		return NULL;
	
	goto begin;
	
begin:
	for (;*s; s++) {
		if (*s == '"' || *s == '\'')
			goto quote;
		if (isdigit(*s) || *s=='-' || *s=='+' || *s=='.')
			goto number;
		if (s[0]=='#') {
			comment_start_width = 1;
			goto line_comment;
		}
		if (s[0]=='/' && s[1]=='/') {
			comment_start_width = 2;
			goto line_comment;
		}
		if (s[0]=='/' && s[1]=='*') {
			comment_start_width = 2;
			goto c_comment;
		}
	}
	flush();
	return ret->buffer;

quote:
	quote_char = *s;
	if (*s == '\'') {
		flush();
		string_append_char(ret, '"');
		p = s = s+1;
	} else {
		s++;
	}
	while (*s) {
		if (*s == quote_char) {
			if (*s == '\'') {
				flush();
				string_append_char(ret, '"');
				p = s = s+1;
			} else {
				s++;
			}
			break;
		} if (*s == '"') {
			/* We're converting single quotes to double quotes,
			 * so double quotes need to be automatically escaped. */
			flush();
			string_append_char(ret, '\\');
			s++;
		} else if (*s == '\\') {
			s++;
			switch (*s) {
				case '\0':
					break;
				case '\'':
					/* Convert \' to \u0027 */
					flush();
					string_append(ret, "u0027");
					p = s = s+1;
					break;
				default:
					s++;
			}
		} else {
			s++;
		}
	}
	goto begin;

number:
	/* Skip a '-', or remove a '+' if present. */
	if (*s == '-') {
		s++;
	} else if (*s == '+') {
		flush();
		p = s = s+1;
	}
	/* Make sure number has at least one digit. */
	if (!isdigit(*s)) {
		if (*s != '.')
			goto failed;
		if (!isdigit(s[1]))
			goto failed;
	}
	/* Make sure that if first digit before '.' is '0', that it is the only digit.
	 * Leading 0s are not allowed, and for a good reason: to avoid ambiguity
	 * between octal and decimal formats. */
	if (*s == '0') {
		s++;
		if (isdigit(*s))
			goto failed;
		goto frac;
	}
	/* Skip digits, or add a '0' if none are present. */
	if (isdigit(*s)) {
		do s++; while (isdigit(*s));
	} else {
		flush();
		string_append_char(ret, '0');
	}
	goto frac;
	
frac:
	if (*s == '.') {
		s++;
		if (isdigit(*s)) {
			do s++; while (isdigit(*s));
		} else {
			flush();
			string_append_char(ret, '0');
		}
	}
/* exp: */
	if (*s=='E' || *s=='e') {
		s++;
		if (*s=='+' || *s=='-')
			s++;
		if (!isdigit(*s))
			goto failed;
		do s++; while (isdigit(*s));
	}
	/* The isdigit check is not needed, but here
	 * for clarity and safety. */
	if (isdigit(*s) || *s=='-' || *s=='+' || *s=='.')
		goto failed;
	goto begin;

line_comment: /* Remove all characters up to newline */
	flush();
	s += comment_start_width;
	/* Skip characters up to newline */
	while (*s && !(*s == '\n' || *s == '\r'))
		s++;
	/* Skip newline character and its complement (if present) */
	if (s[0]) {
		if (s[1] == '\n'+'\r'-s[0])
			s++;
		s++;
	}
	/* Set begin marker so characters skipped are not
	 * appended to output on next flush. */
	p = s;
	goto begin;
	
c_comment: /* Remove all characters up to star-slash */
	flush();
	s += comment_start_width;
	/* Skip characters up to and including star-slash */
	while (*s && !(s[0] == '*' && s[1] == '/'))
		s++;
	if (*s)
		s += 2;
	else
		goto failed; /* No star-slash present */
	/* Set begin marker so characters skipped are not
	 * appended to output on next flush. */
	p = s;
	goto begin;

failed:
	string_free(ret);
	return NULL;
	
	#undef flush
}


/****************************** Unicode ******************************/

static const bool utf8_allow_surrogates = false;

static void utf8_decode_char_nocheck(const char **sp, unsigned int *uc)
{
	const unsigned char *s = (const unsigned char *)*sp;
	unsigned char c = *s++;
	unsigned int len;
	unsigned char sf[4] = {0xFF, 0x1F, 0xF, 0x7};
	
	if (c < 0x80)
		len = 0;
	else if (c < 0xE0)
		len = 1;
	else if (c < 0xF0)
		len = 2;
	else
		len = 3;
	
	*uc = c & sf[len];
	while (len--) {
		*uc <<= 6;
		*uc |= *s++ & 0x3F;
	}
	
	*sp = (const char*)s;
}

static bool utf8_validate(const char *str, size_t length)
{
	const unsigned char *s = (const unsigned char*)str;
	const unsigned char *e = s + length;
	
	while (s < e) {
		unsigned char c = *s++;
		unsigned int len; /* number of bytes in sequence - 2 */
		
		/* If character is ASCII, move on. */
		if (c < 0x80)
			continue;
		
		if (s >= e)
			return false; /* Missing bytes in sequence. */
		
		if (c < 0xE0) {
			/* 2-byte sequence, U+0080 to U+07FF
			   c must be 11000010 or higher
			   s[0] must be 10xxxxxx */
			len = 0;
			if (c < 0xC2)
				return false;
		} else if (c < 0xF0) {
			/* 3-byte sequence, U+0800 to U+FFFF
			   Note that the surrogate range is U+D800 to U+DFFF,
				  and that U+FFFE and U+FFFF are illegal characters.
			   c must be >= 11100000 (which it is)
			   If c is 11100000, then s[0] must be >= 10100000
			   If the global parameter utf8_allow_surrogates is false:
				  If c is 11101101 and s[0] is >= 10100000,
				     then this is a surrogate and we should fail.
			   If c is 11101111, s[0] is 10111111, and s[1] >= 10111110,
				  then this is an illegal character and we should fail.
			   s[0] and s[1] must be 10xxxxxx */
			len = 1;
			if (c == 0xE0 && *s < 0xA0)
				return false;
			if (!utf8_allow_surrogates && c == 0xED && *s >= 0xA0)
				return false;
			if (c == 0xEF && s[0] == 0xBF && (s+1 >= e || s[1] >= 0xBE))
				return false;
		} else {
			/* 4-byte sequence, U+010000 to U+10FFFF
			   c must be >= 11110000 (which it is) and <= 11110100
			   If c is 11110000, then s[0] must be >= 10010000
			   If c is 11110100, then s[0] must be < 10010000
			   s[0], s[1], and s[2] must be 10xxxxxx */
			len = 2;
			if (c > 0xF4)
				return false;
			if (c == 0xF0 && *s < 0x90)
				return false;
			if (c == 0xF4 && *s >= 0x90)
				return false;
		}
		
		if (s + len >= e)
			return false; /* Missing bytes in sequence. */
		
		do {
			if ((*s++ & 0xC0) != 0x80)
				return false;
		} while (len--);
	}
	
	return true;
}

/*
 * Encodes the Unicode character uc as UTF-8, writing it
 * to *out and updating *out to point to the end of the UTF-8 sequence.
 *
 * If uc is too high, no character will be emitted, and *out will
 * not be changed.  If uc is in the UTF-16 surrogate range
 * (U+D800 thru U+DFFF) or is a designated not-a-character
 * (U+FFFE or U+FFFF), the character will be emitted anyway,
 * although it is technically invalid UTF-8.
 *
 * Returns the number of characters emitted.
 */
static int utf8_encode_char(char *out, unsigned int uc)
{
	char *start = out;
	
	if (uc < 0x80) {
		*out++ = uc & 0x7F;
	} else if (uc < 0x800) {
		*out++ = 0xC0 | (uc >> 6);
		*out++ = 0x80 | (uc & 0x3F);
	} else if (uc < 0x10000) {
		*out++ = 0xE0 | (uc >> 12);
		*out++ = 0x80 | ((uc >> 6) & 0x3F);
		*out++ = 0x80 | (uc & 0x3F);
	} else if (uc < 0x110000) {
		*out++ = 0xF0 | ((uc >> 18) & 0x07);
		*out++ = 0x80 | ((uc >> 12) & 0x3F);
		*out++ = 0x80 | ((uc >> 6) & 0x3F);
		*out++ = 0x80 | (uc & 0x3F);
	}
	
	return out - start;
}