Skip to content

Commit 3459499

Browse files
committed
Add allow_control_characters parsing option
While it's not allowed by the spec, some parsers like Oj do accept it, and it can be blocking a transition. Having this feature can help people migrate.
1 parent 1da3fd9 commit 3459499

File tree

6 files changed

+143
-100
lines changed

6 files changed

+143
-100
lines changed

CHANGES.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
### Unreleased
44

5+
* Add `:allow_control_characters` parser options, to allow JSON strings containing unescaped ASCII control characters (e.g. newlines).
6+
57
### 2025-12-04 (2.17.1)
68

79
* Fix a regression in parsing of unicode surogate pairs (`\uXX\uXX`) that could cause an invalid string to be returned.

ext/json/ext/parser/parser.c

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ static VALUE CNaN, CInfinity, CMinusInfinity;
77

88
static ID i_new, i_try_convert, i_uminus, i_encode;
99

10-
static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_symbolize_names, sym_freeze,
10+
static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_allow_control_characters, sym_symbolize_names, sym_freeze,
1111
sym_decimal_class, sym_on_load, sym_allow_duplicate_key;
1212

1313
static int binary_encindex;
@@ -335,6 +335,7 @@ typedef struct JSON_ParserStruct {
335335
int max_nesting;
336336
bool allow_nan;
337337
bool allow_trailing_comma;
338+
bool allow_control_characters;
338339
bool symbolize_names;
339340
bool freeze;
340341
} JSON_ParserConfig;
@@ -752,12 +753,15 @@ NOINLINE(static) VALUE json_string_unescape(JSON_ParserState *state, JSON_Parser
752753
break;
753754
default:
754755
if ((unsigned char)*pe < 0x20) {
755-
if (*pe == '\n') {
756-
raise_parse_error_at("Invalid unescaped newline character (\\n) in string: %s", state, pe - 1);
756+
if (!config->allow_control_characters) {
757+
if (*pe == '\n') {
758+
raise_parse_error_at("Invalid unescaped newline character (\\n) in string: %s", state, pe - 1);
759+
}
760+
raise_parse_error_at("invalid ASCII control character in string: %s", state, pe - 1);
757761
}
758-
raise_parse_error_at("invalid ASCII control character in string: %s", state, pe - 1);
762+
} else {
763+
raise_parse_error_at("invalid escape character in string: %s", state, pe - 1);
759764
}
760-
raise_parse_error_at("invalid escape character in string: %s", state, pe - 1);
761765
break;
762766
}
763767
}
@@ -1009,7 +1013,9 @@ static VALUE json_parse_escaped_string(JSON_ParserState *state, JSON_ParserConfi
10091013
break;
10101014
}
10111015
default:
1012-
raise_parse_error("invalid ASCII control character in string: %s", state);
1016+
if (!config->allow_control_characters) {
1017+
raise_parse_error("invalid ASCII control character in string: %s", state);
1018+
}
10131019
break;
10141020
}
10151021

@@ -1430,14 +1436,15 @@ static int parser_config_init_i(VALUE key, VALUE val, VALUE data)
14301436
{
14311437
JSON_ParserConfig *config = (JSON_ParserConfig *)data;
14321438

1433-
if (key == sym_max_nesting) { config->max_nesting = RTEST(val) ? FIX2INT(val) : 0; }
1434-
else if (key == sym_allow_nan) { config->allow_nan = RTEST(val); }
1435-
else if (key == sym_allow_trailing_comma) { config->allow_trailing_comma = RTEST(val); }
1436-
else if (key == sym_symbolize_names) { config->symbolize_names = RTEST(val); }
1437-
else if (key == sym_freeze) { config->freeze = RTEST(val); }
1438-
else if (key == sym_on_load) { config->on_load_proc = RTEST(val) ? val : Qfalse; }
1439-
else if (key == sym_allow_duplicate_key) { config->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; }
1440-
else if (key == sym_decimal_class) {
1439+
if (key == sym_max_nesting) { config->max_nesting = RTEST(val) ? FIX2INT(val) : 0; }
1440+
else if (key == sym_allow_nan) { config->allow_nan = RTEST(val); }
1441+
else if (key == sym_allow_trailing_comma) { config->allow_trailing_comma = RTEST(val); }
1442+
else if (key == sym_allow_control_characters) { config->allow_control_characters = RTEST(val); }
1443+
else if (key == sym_symbolize_names) { config->symbolize_names = RTEST(val); }
1444+
else if (key == sym_freeze) { config->freeze = RTEST(val); }
1445+
else if (key == sym_on_load) { config->on_load_proc = RTEST(val) ? val : Qfalse; }
1446+
else if (key == sym_allow_duplicate_key) { config->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; }
1447+
else if (key == sym_decimal_class) {
14411448
if (RTEST(val)) {
14421449
if (rb_respond_to(val, i_try_convert)) {
14431450
config->decimal_class = val;
@@ -1650,6 +1657,7 @@ void Init_parser(void)
16501657
sym_max_nesting = ID2SYM(rb_intern("max_nesting"));
16511658
sym_allow_nan = ID2SYM(rb_intern("allow_nan"));
16521659
sym_allow_trailing_comma = ID2SYM(rb_intern("allow_trailing_comma"));
1660+
sym_allow_control_characters = ID2SYM(rb_intern("allow_control_characters"));
16531661
sym_symbolize_names = ID2SYM(rb_intern("symbolize_names"));
16541662
sym_freeze = ID2SYM(rb_intern("freeze"));
16551663
sym_on_load = ID2SYM(rb_intern("on_load"));

0 commit comments

Comments
 (0)