Handle unexpected tokens properly

Also, add null and boolean values to json grammar
This commit is contained in:
Max Brunsfeld 2014-03-22 15:46:58 -07:00
parent f248ece3aa
commit c40411b4d1
5 changed files with 158 additions and 40 deletions

View file

@ -23,7 +23,10 @@ namespace test_grammars {
sym("object"),
sym("array"),
sym("string"),
sym("number") }) },
sym("number"),
sym("true"),
sym("false"),
sym("null"), }) },
{ "object", seq({
_sym("left_brace"),
comma_sep(err(seq({
@ -43,6 +46,9 @@ namespace test_grammars {
{ "right_bracket", str("]") },
{ "left_brace", str("{") },
{ "right_brace", str("}") },
{ "null", str("null") },
{ "true", str("true") },
{ "false", str("false") },
});
}
}

View file

@ -1,12 +1,15 @@
#include "tree_sitter/parser.h"
#define TS_SYMBOL_COUNT 15
#define TS_SYMBOL_COUNT 18
enum {
ts_sym_array,
ts_sym_false,
ts_sym_null,
ts_sym_number,
ts_sym_object,
ts_sym_string,
ts_sym_true,
ts_sym_value,
ts_sym_colon,
ts_sym_comma,
@ -20,9 +23,12 @@ enum {
SYMBOL_NAMES = {
"array",
"false",
"null",
"number",
"object",
"string",
"true",
"value",
"colon",
"comma",
@ -72,8 +78,14 @@ LEX_FN() {
ADVANCE(15);
if (LOOKAHEAD_CHAR() == '[')
ADVANCE(16);
if (LOOKAHEAD_CHAR() == '{')
if (LOOKAHEAD_CHAR() == 'f')
ADVANCE(17);
if (LOOKAHEAD_CHAR() == 'n')
ADVANCE(22);
if (LOOKAHEAD_CHAR() == 't')
ADVANCE(26);
if (LOOKAHEAD_CHAR() == '{')
ADVANCE(30);
LEX_ERROR();
case 9:
if (!((LOOKAHEAD_CHAR() == '\"') ||
@ -134,20 +146,66 @@ LEX_FN() {
case 16:
ACCEPT_TOKEN(ts_sym_left_bracket);
case 17:
ACCEPT_TOKEN(ts_sym_left_brace);
if (LOOKAHEAD_CHAR() == 'a')
ADVANCE(18);
LEX_ERROR();
case 18:
if (LOOKAHEAD_CHAR() == ':')
if (LOOKAHEAD_CHAR() == 'l')
ADVANCE(19);
LEX_ERROR();
case 19:
ACCEPT_TOKEN(ts_sym_colon);
if (LOOKAHEAD_CHAR() == 's')
ADVANCE(20);
LEX_ERROR();
case 20:
if (LOOKAHEAD_CHAR() == 'e')
ADVANCE(21);
LEX_ERROR();
case 21:
ACCEPT_TOKEN(ts_sym_false);
case 22:
if (LOOKAHEAD_CHAR() == 'u')
ADVANCE(23);
LEX_ERROR();
case 23:
if (LOOKAHEAD_CHAR() == 'l')
ADVANCE(24);
LEX_ERROR();
case 24:
if (LOOKAHEAD_CHAR() == 'l')
ADVANCE(25);
LEX_ERROR();
case 25:
ACCEPT_TOKEN(ts_sym_null);
case 26:
if (LOOKAHEAD_CHAR() == 'r')
ADVANCE(27);
LEX_ERROR();
case 27:
if (LOOKAHEAD_CHAR() == 'u')
ADVANCE(28);
LEX_ERROR();
case 28:
if (LOOKAHEAD_CHAR() == 'e')
ADVANCE(29);
LEX_ERROR();
case 29:
ACCEPT_TOKEN(ts_sym_true);
case 30:
ACCEPT_TOKEN(ts_sym_left_brace);
case 31:
if (LOOKAHEAD_CHAR() == ':')
ADVANCE(32);
LEX_ERROR();
case 32:
ACCEPT_TOKEN(ts_sym_colon);
case 33:
if (LOOKAHEAD_CHAR() == '\"')
ADVANCE(9);
if (LOOKAHEAD_CHAR() == '}')
ADVANCE(3);
LEX_ERROR();
case 21:
case 34:
if (LOOKAHEAD_CHAR() == '\"')
ADVANCE(9);
if ('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9')
@ -156,42 +214,54 @@ LEX_FN() {
ADVANCE(16);
if (LOOKAHEAD_CHAR() == ']')
ADVANCE(6);
if (LOOKAHEAD_CHAR() == '{')
if (LOOKAHEAD_CHAR() == 'f')
ADVANCE(17);
if (LOOKAHEAD_CHAR() == 'n')
ADVANCE(22);
if (LOOKAHEAD_CHAR() == 't')
ADVANCE(26);
if (LOOKAHEAD_CHAR() == '{')
ADVANCE(30);
LEX_ERROR();
case 22:
case 35:
if (LOOKAHEAD_CHAR() == '\"')
ADVANCE(9);
LEX_ERROR();
case 23:
case 36:
ACCEPT_TOKEN(ts_sym_comma);
case 24:
case 37:
ACCEPT_TOKEN(ts_sym_colon);
case 25:
case 38:
ACCEPT_TOKEN(ts_sym_left_bracket);
case 26:
case 39:
ACCEPT_TOKEN(ts_sym_right_bracket);
case 27:
case 40:
ACCEPT_TOKEN(ts_sym_left_brace);
case 28:
case 41:
ACCEPT_TOKEN(ts_sym_right_brace);
case ts_lex_state_error:
if (LOOKAHEAD_CHAR() == '\"')
ADVANCE(9);
if (LOOKAHEAD_CHAR() == ',')
ADVANCE(23);
ADVANCE(36);
if ('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9')
ADVANCE(15);
if (LOOKAHEAD_CHAR() == ':')
ADVANCE(24);
ADVANCE(37);
if (LOOKAHEAD_CHAR() == '[')
ADVANCE(25);
ADVANCE(38);
if (LOOKAHEAD_CHAR() == ']')
ADVANCE(39);
if (LOOKAHEAD_CHAR() == 'f')
ADVANCE(17);
if (LOOKAHEAD_CHAR() == 'n')
ADVANCE(22);
if (LOOKAHEAD_CHAR() == 't')
ADVANCE(26);
if (LOOKAHEAD_CHAR() == '{')
ADVANCE(27);
ADVANCE(40);
if (LOOKAHEAD_CHAR() == '}')
ADVANCE(28);
ADVANCE(41);
LEX_ERROR();
default:
LEX_PANIC();
@ -204,9 +274,12 @@ PARSE_TABLE() {
STATE(0);
SET_LEX_STATE(8);
SHIFT(ts_sym_array, 1)
SHIFT(ts_sym_false, 1)
SHIFT(ts_sym_null, 1)
SHIFT(ts_sym_number, 1)
SHIFT(ts_sym_object, 1)
SHIFT(ts_sym_string, 1)
SHIFT(ts_sym_true, 1)
SHIFT(ts_sym_value, 2)
SHIFT(ts_sym_left_brace, 3)
SHIFT(ts_sym_left_bracket, 55)
@ -223,23 +296,26 @@ PARSE_TABLE() {
END_STATE();
STATE(3);
SET_LEX_STATE(20);
SET_LEX_STATE(33);
SHIFT(ts_sym_string, 4)
SHIFT(ts_sym_right_brace, 51)
SHIFT(ts_builtin_sym_error, 52)
END_STATE();
STATE(4);
SET_LEX_STATE(18);
SET_LEX_STATE(31);
SHIFT(ts_sym_colon, 5)
END_STATE();
STATE(5);
SET_LEX_STATE(8);
SHIFT(ts_sym_array, 6)
SHIFT(ts_sym_false, 6)
SHIFT(ts_sym_null, 6)
SHIFT(ts_sym_number, 6)
SHIFT(ts_sym_object, 6)
SHIFT(ts_sym_string, 6)
SHIFT(ts_sym_true, 6)
SHIFT(ts_sym_value, 7)
SHIFT(ts_sym_left_brace, 13)
SHIFT(ts_sym_left_bracket, 19)
@ -259,22 +335,25 @@ PARSE_TABLE() {
END_STATE();
STATE(8);
SET_LEX_STATE(22);
SET_LEX_STATE(35);
SHIFT(ts_sym_string, 9)
SHIFT(ts_builtin_sym_error, 47)
END_STATE();
STATE(9);
SET_LEX_STATE(18);
SET_LEX_STATE(31);
SHIFT(ts_sym_colon, 10)
END_STATE();
STATE(10);
SET_LEX_STATE(8);
SHIFT(ts_sym_array, 6)
SHIFT(ts_sym_false, 6)
SHIFT(ts_sym_null, 6)
SHIFT(ts_sym_number, 6)
SHIFT(ts_sym_object, 6)
SHIFT(ts_sym_string, 6)
SHIFT(ts_sym_true, 6)
SHIFT(ts_sym_value, 11)
SHIFT(ts_sym_left_brace, 13)
SHIFT(ts_sym_left_bracket, 19)
@ -293,23 +372,26 @@ PARSE_TABLE() {
END_STATE();
STATE(13);
SET_LEX_STATE(20);
SET_LEX_STATE(33);
SHIFT(ts_sym_string, 14)
SHIFT(ts_sym_right_brace, 43)
SHIFT(ts_builtin_sym_error, 44)
END_STATE();
STATE(14);
SET_LEX_STATE(18);
SET_LEX_STATE(31);
SHIFT(ts_sym_colon, 15)
END_STATE();
STATE(15);
SET_LEX_STATE(8);
SHIFT(ts_sym_array, 6)
SHIFT(ts_sym_false, 6)
SHIFT(ts_sym_null, 6)
SHIFT(ts_sym_number, 6)
SHIFT(ts_sym_object, 6)
SHIFT(ts_sym_string, 6)
SHIFT(ts_sym_true, 6)
SHIFT(ts_sym_value, 16)
SHIFT(ts_sym_left_brace, 13)
SHIFT(ts_sym_left_bracket, 19)
@ -334,11 +416,14 @@ PARSE_TABLE() {
END_STATE();
STATE(19);
SET_LEX_STATE(21);
SET_LEX_STATE(34);
SHIFT(ts_sym_array, 20)
SHIFT(ts_sym_false, 20)
SHIFT(ts_sym_null, 20)
SHIFT(ts_sym_number, 20)
SHIFT(ts_sym_object, 20)
SHIFT(ts_sym_string, 20)
SHIFT(ts_sym_true, 20)
SHIFT(ts_sym_value, 21)
SHIFT(ts_sym_left_brace, 25)
SHIFT(ts_sym_left_bracket, 35)
@ -362,9 +447,12 @@ PARSE_TABLE() {
STATE(22);
SET_LEX_STATE(8);
SHIFT(ts_sym_array, 20)
SHIFT(ts_sym_false, 20)
SHIFT(ts_sym_null, 20)
SHIFT(ts_sym_number, 20)
SHIFT(ts_sym_object, 20)
SHIFT(ts_sym_string, 20)
SHIFT(ts_sym_true, 20)
SHIFT(ts_sym_value, 23)
SHIFT(ts_sym_left_brace, 25)
SHIFT(ts_sym_left_bracket, 35)
@ -384,23 +472,26 @@ PARSE_TABLE() {
END_STATE();
STATE(25);
SET_LEX_STATE(20);
SET_LEX_STATE(33);
SHIFT(ts_sym_string, 26)
SHIFT(ts_sym_right_brace, 31)
SHIFT(ts_builtin_sym_error, 32)
END_STATE();
STATE(26);
SET_LEX_STATE(18);
SET_LEX_STATE(31);
SHIFT(ts_sym_colon, 27)
END_STATE();
STATE(27);
SET_LEX_STATE(8);
SHIFT(ts_sym_array, 6)
SHIFT(ts_sym_false, 6)
SHIFT(ts_sym_null, 6)
SHIFT(ts_sym_number, 6)
SHIFT(ts_sym_object, 6)
SHIFT(ts_sym_string, 6)
SHIFT(ts_sym_true, 6)
SHIFT(ts_sym_value, 28)
SHIFT(ts_sym_left_brace, 13)
SHIFT(ts_sym_left_bracket, 19)
@ -449,11 +540,14 @@ PARSE_TABLE() {
END_STATE();
STATE(35);
SET_LEX_STATE(21);
SET_LEX_STATE(34);
SHIFT(ts_sym_array, 20)
SHIFT(ts_sym_false, 20)
SHIFT(ts_sym_null, 20)
SHIFT(ts_sym_number, 20)
SHIFT(ts_sym_object, 20)
SHIFT(ts_sym_string, 20)
SHIFT(ts_sym_true, 20)
SHIFT(ts_sym_value, 36)
SHIFT(ts_sym_left_brace, 25)
SHIFT(ts_sym_left_bracket, 35)
@ -571,11 +665,14 @@ PARSE_TABLE() {
END_STATE();
STATE(55);
SET_LEX_STATE(21);
SET_LEX_STATE(34);
SHIFT(ts_sym_array, 20)
SHIFT(ts_sym_false, 20)
SHIFT(ts_sym_null, 20)
SHIFT(ts_sym_number, 20)
SHIFT(ts_sym_object, 20)
SHIFT(ts_sym_string, 20)
SHIFT(ts_sym_true, 20)
SHIFT(ts_sym_value, 56)
SHIFT(ts_sym_left_brace, 25)
SHIFT(ts_sym_left_bracket, 35)

View file

@ -351,7 +351,11 @@ static int ts_lr_parser_handle_error(ts_lr_parser *parser) {
for (;;) {
ts_tree_release(parser->lookahead);
size_t position = ts_lexer_position(&parser->lexer);
parser->lookahead = ts_lex(&parser->lexer, ts_lex_state_error);
if (ts_lexer_position(&parser->lexer) == position)
ts_lexer_advance(&parser->lexer);
if (parser->lookahead->symbol == ts_builtin_sym_end) {
parser->stack.entries[0].node = error;
return 0;

View file

@ -1,13 +1,20 @@
==============================
==========================================
recovers from top-level errors
==============================
==========================================
[}
---
(ERROR)
==================================
==========================================
recovers from unexpected tokens
==========================================
barf
---
(ERROR)
==========================================
recovers from errors inside arrays
==================================
==========================================
[1,,2]
---
(value (array
@ -15,9 +22,9 @@ recovers from errors inside arrays
(ERROR)
(value (number))))
==================================
==========================================
recovers from errors inside objects
==================================
==========================================
{ "key1": 1, 5 }
---
(value (object (string) (value (number)) (ERROR)))

View file

@ -16,14 +16,18 @@ parses empty arrays
parses arrays
===================
[
1, 2, 3,
333,
null,
true,
false,
{ "stuff": "good" }
]
---
(value (array
(value (number))
(value (number))
(value (number))
(value (null))
(value (true))
(value (false))
(value (object
(string) (value (string))
))