From fd0d77ef8b4a80e03c74e9564d2742cf016bf512 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 28 Jan 2014 13:27:30 -0800 Subject: [PATCH] Separate auxiliary rules from user-specified rules --- spec/compiler/expand_repeats_spec.cpp | 7 +- .../prepare_grammar/extract_tokens_spec.cpp | 14 +- spec/fixtures/parsers/arithmetic.c | 262 +++---- spec/fixtures/parsers/json.c | 732 ++++++++++++------ spec/runtime/arithmetic_spec.cpp | 2 +- spec/runtime/json_spec.cpp | 13 +- src/compiler/generate_code/c_code.cpp | 5 +- src/compiler/grammar.cpp | 43 +- src/compiler/grammar.h | 11 +- .../prepare_grammar/expand_repeats.cpp | 12 +- .../prepare_grammar/extract_tokens.cpp | 22 +- src/compiler/prepare_grammar/perform.cpp | 6 +- src/compiler/rules/rules.cpp | 6 +- src/compiler/rules/rules.h | 1 + src/compiler/rules/symbol.cpp | 15 +- src/compiler/rules/symbol.h | 2 + 16 files changed, 741 insertions(+), 412 deletions(-) diff --git a/spec/compiler/expand_repeats_spec.cpp b/spec/compiler/expand_repeats_spec.cpp index 9e280e47..92cd2d0e 100644 --- a/spec/compiler/expand_repeats_spec.cpp +++ b/spec/compiler/expand_repeats_spec.cpp @@ -16,16 +16,17 @@ describe("expanding repeat rules in a grammar", []() { }) }, })); - AssertThat(result, Equals(Grammar({ + AssertThat(result, Equals(Grammar("rule1", { { "rule1", seq({ sym("x"), - sym("repeat_helper1"), + aux_sym("repeat_helper1"), sym("y") }) }, + }, { { "repeat_helper1", seq({ seq({ sym("a"), sym("b") }), choice({ - sym("repeat_helper1") , + aux_sym("repeat_helper1") , blank() }), }) } diff --git a/spec/compiler/prepare_grammar/extract_tokens_spec.cpp b/spec/compiler/prepare_grammar/extract_tokens_spec.cpp index 3b64dd52..c26d2772 100644 --- a/spec/compiler/prepare_grammar/extract_tokens_spec.cpp +++ b/spec/compiler/prepare_grammar/extract_tokens_spec.cpp @@ -22,15 +22,15 @@ describe("preparing a grammar", []() { AssertThat(result.first, Equals(Grammar({ { "rule1", seq({ - sym("1"), + aux_sym("token1"), seq({ sym("rule2"), sym("rule3") }), - sym("1") }) } + aux_sym("token1") }) } }))); - AssertThat(result.second, Equals(Grammar("", { - { "1", rules::seq({ + AssertThat(result.second, Equals(Grammar("", {}, { + { "token1", rules::seq({ rules::character('a'), rules::character('b') }) }, }))); @@ -68,14 +68,14 @@ describe("preparing a grammar", []() { AssertThat(result.first, Equals(Grammar({ { "rule1", seq({ choice({ - repeat(choice({ sym("1"), sym("a") })), + repeat(choice({ aux_sym("token1"), sym("a") })), sym("b"), }), sym("c") }) } }))); - AssertThat(result.second, Equals(Grammar("", { - { "1", str("stuff") }, + AssertThat(result.second, Equals(Grammar("", {}, { + { "token1", str("stuff") }, }))); }); }); diff --git a/spec/fixtures/parsers/arithmetic.c b/spec/fixtures/parsers/arithmetic.c index 212897ef..70a91561 100644 --- a/spec/fixtures/parsers/arithmetic.c +++ b/spec/fixtures/parsers/arithmetic.c @@ -2,28 +2,28 @@ #include enum ts_symbol { - ts_symbol_factor, - ts_symbol_1, ts_symbol_number, + ts_symbol_expression, ts_symbol_variable, + ts_symbol_factor, + ts_aux_token1, + ts_symbol_plus, + ts_aux_token2, ts_symbol_times, ts_symbol_term, - ts_symbol_expression, - ts_symbol_plus, - ts_symbol_2, ts_symbol___END__, }; static const char *ts_symbol_names[] = { - "factor", - "1", "number", + "expression", "variable", + "factor", + "token1", + "plus", + "token2", "times", "term", - "expression", - "plus", - "2", "__END__", }; @@ -49,43 +49,43 @@ static void ts_lex(TSParser *parser) { ADVANCE(5); LEX_ERROR(1, EXPECT({"')'"})); case 5: - ACCEPT_TOKEN(ts_symbol_2); + ACCEPT_TOKEN(ts_aux_token2); case 6: - if (LOOKAHEAD_CHAR() == '*') - ADVANCE(3); if (LOOKAHEAD_CHAR() == ')') ADVANCE(5); - LEX_ERROR(2, EXPECT({"')'", "'*'"})); + if (LOOKAHEAD_CHAR() == '*') + ADVANCE(3); + LEX_ERROR(2, EXPECT({"'*'", "')'"})); case 7: + if (LOOKAHEAD_CHAR() == ')') + ADVANCE(5); if (LOOKAHEAD_CHAR() == '*') ADVANCE(3); if (LOOKAHEAD_CHAR() == '+') ADVANCE(8); - if (LOOKAHEAD_CHAR() == ')') - ADVANCE(5); - LEX_ERROR(3, EXPECT({"')'", "'+'", "'*'"})); + LEX_ERROR(3, EXPECT({"'+'", "'*'", "')'"})); case 8: ACCEPT_TOKEN(ts_symbol_plus); case 9: - if (LOOKAHEAD_CHAR() == '+') - ADVANCE(8); if (LOOKAHEAD_CHAR() == ')') ADVANCE(5); - LEX_ERROR(2, EXPECT({"')'", "'+'"})); + if (LOOKAHEAD_CHAR() == '+') + ADVANCE(8); + LEX_ERROR(2, EXPECT({"'+'", "')'"})); case 10: if (isalnum(LOOKAHEAD_CHAR())) ADVANCE(13); - if (isdigit(LOOKAHEAD_CHAR())) - ADVANCE(12); if (LOOKAHEAD_CHAR() == '(') - ADVANCE(11); - LEX_ERROR(3, EXPECT({"'('", "", ""})); - case 11: - ACCEPT_TOKEN(ts_symbol_1); - case 12: - if (isdigit(LOOKAHEAD_CHAR())) ADVANCE(12); + if (isdigit(LOOKAHEAD_CHAR())) + ADVANCE(11); + LEX_ERROR(3, EXPECT({"", "'('", ""})); + case 11: + if (isdigit(LOOKAHEAD_CHAR())) + ADVANCE(11); ACCEPT_TOKEN(ts_symbol_number); + case 12: + ACCEPT_TOKEN(ts_aux_token1); case 13: if (isalnum(LOOKAHEAD_CHAR())) ADVANCE(13); @@ -118,18 +118,18 @@ static TSParseResult ts_parse(const char *input) { switch (LOOKAHEAD_SYM()) { case ts_symbol_factor: SHIFT(45); - case ts_symbol_1: + case ts_aux_token1: SHIFT(42); case ts_symbol_number: SHIFT(41); + case ts_symbol_expression: + SHIFT(1); case ts_symbol_variable: SHIFT(41); case ts_symbol_term: SHIFT(2); - case ts_symbol_expression: - SHIFT(1); default: - PARSE_ERROR(6, EXPECT({"expression", "factor", "variable", "number", "1", "term"})); + PARSE_ERROR(6, EXPECT({"term", "expression", "variable", "number", "token1", "factor"})); } case 1: SET_LEX_STATE(0); @@ -152,18 +152,18 @@ static TSParseResult ts_parse(const char *input) { case 3: SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { - case ts_symbol_1: + case ts_symbol_factor: + SHIFT(34); + case ts_aux_token1: SHIFT(6); case ts_symbol_number: SHIFT(5); case ts_symbol_variable: SHIFT(5); - case ts_symbol_factor: - SHIFT(34); case ts_symbol_term: SHIFT(4); default: - PARSE_ERROR(5, EXPECT({"term", "factor", "variable", "number", "1"})); + PARSE_ERROR(5, EXPECT({"term", "variable", "number", "token1", "factor"})); } case 4: SET_LEX_STATE(0); @@ -186,314 +186,314 @@ static TSParseResult ts_parse(const char *input) { case 6: SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { + case ts_symbol_variable: + SHIFT(12); case ts_symbol_expression: SHIFT(32); case ts_symbol_factor: SHIFT(16); - case ts_symbol_1: + case ts_aux_token1: SHIFT(13); case ts_symbol_number: SHIFT(12); - case ts_symbol_variable: - SHIFT(12); case ts_symbol_term: SHIFT(7); default: - PARSE_ERROR(6, EXPECT({"term", "variable", "number", "factor", "1", "expression"})); + PARSE_ERROR(6, EXPECT({"term", "variable", "expression", "number", "token1", "factor"})); } case 7: SET_LEX_STATE(9); switch (LOOKAHEAD_SYM()) { - case ts_symbol_2: + case ts_aux_token2: REDUCE(ts_symbol_expression, 1); case ts_symbol_plus: SHIFT(8); default: - PARSE_ERROR(2, EXPECT({"plus", "2"})); + PARSE_ERROR(2, EXPECT({"plus", "token2"})); } case 8: SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { - case ts_symbol_1: + case ts_symbol_factor: + SHIFT(25); + case ts_aux_token1: SHIFT(11); case ts_symbol_number: SHIFT(10); case ts_symbol_variable: SHIFT(10); - case ts_symbol_factor: - SHIFT(25); case ts_symbol_term: SHIFT(9); default: - PARSE_ERROR(5, EXPECT({"term", "factor", "variable", "number", "1"})); + PARSE_ERROR(5, EXPECT({"term", "variable", "number", "token1", "factor"})); } case 9: SET_LEX_STATE(4); switch (LOOKAHEAD_SYM()) { - case ts_symbol_2: + case ts_aux_token2: REDUCE(ts_symbol_expression, 3); default: - PARSE_ERROR(1, EXPECT({"2"})); + PARSE_ERROR(1, EXPECT({"token2"})); } case 10: SET_LEX_STATE(6); switch (LOOKAHEAD_SYM()) { + case ts_aux_token2: + REDUCE(ts_symbol_factor, 1); case ts_symbol_times: REDUCE(ts_symbol_factor, 1); - case ts_symbol_2: - REDUCE(ts_symbol_factor, 1); default: - PARSE_ERROR(2, EXPECT({"2", "times"})); + PARSE_ERROR(2, EXPECT({"times", "token2"})); } case 11: SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { + case ts_symbol_variable: + SHIFT(12); case ts_symbol_expression: SHIFT(23); case ts_symbol_factor: SHIFT(16); - case ts_symbol_1: + case ts_aux_token1: SHIFT(13); case ts_symbol_number: SHIFT(12); - case ts_symbol_variable: - SHIFT(12); case ts_symbol_term: SHIFT(7); default: - PARSE_ERROR(6, EXPECT({"term", "variable", "number", "factor", "1", "expression"})); + PARSE_ERROR(6, EXPECT({"term", "variable", "expression", "number", "token1", "factor"})); } case 12: SET_LEX_STATE(7); switch (LOOKAHEAD_SYM()) { + case ts_aux_token2: + REDUCE(ts_symbol_factor, 1); case ts_symbol_times: REDUCE(ts_symbol_factor, 1); case ts_symbol_plus: REDUCE(ts_symbol_factor, 1); - case ts_symbol_2: - REDUCE(ts_symbol_factor, 1); default: - PARSE_ERROR(3, EXPECT({"2", "plus", "times"})); + PARSE_ERROR(3, EXPECT({"plus", "times", "token2"})); } case 13: SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { + case ts_symbol_variable: + SHIFT(12); case ts_symbol_expression: SHIFT(14); case ts_symbol_factor: SHIFT(16); - case ts_symbol_1: + case ts_aux_token1: SHIFT(13); case ts_symbol_number: SHIFT(12); - case ts_symbol_variable: - SHIFT(12); case ts_symbol_term: SHIFT(7); default: - PARSE_ERROR(6, EXPECT({"term", "variable", "number", "factor", "1", "expression"})); + PARSE_ERROR(6, EXPECT({"term", "variable", "expression", "number", "token1", "factor"})); } case 14: SET_LEX_STATE(4); switch (LOOKAHEAD_SYM()) { - case ts_symbol_2: + case ts_aux_token2: SHIFT(15); default: - PARSE_ERROR(1, EXPECT({"2"})); + PARSE_ERROR(1, EXPECT({"token2"})); } case 15: SET_LEX_STATE(7); switch (LOOKAHEAD_SYM()) { + case ts_aux_token2: + REDUCE(ts_symbol_factor, 3); case ts_symbol_times: REDUCE(ts_symbol_factor, 3); case ts_symbol_plus: REDUCE(ts_symbol_factor, 3); - case ts_symbol_2: - REDUCE(ts_symbol_factor, 3); default: - PARSE_ERROR(3, EXPECT({"2", "plus", "times"})); + PARSE_ERROR(3, EXPECT({"plus", "times", "token2"})); } case 16: SET_LEX_STATE(7); switch (LOOKAHEAD_SYM()) { case ts_symbol_plus: REDUCE(ts_symbol_term, 1); - case ts_symbol_2: + case ts_aux_token2: REDUCE(ts_symbol_term, 1); case ts_symbol_times: SHIFT(17); default: - PARSE_ERROR(3, EXPECT({"times", "2", "plus"})); + PARSE_ERROR(3, EXPECT({"times", "token2", "plus"})); } case 17: SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { case ts_symbol_factor: SHIFT(22); - case ts_symbol_1: + case ts_aux_token1: SHIFT(19); case ts_symbol_number: SHIFT(18); case ts_symbol_variable: SHIFT(18); default: - PARSE_ERROR(4, EXPECT({"variable", "number", "1", "factor"})); + PARSE_ERROR(4, EXPECT({"variable", "number", "token1", "factor"})); } case 18: SET_LEX_STATE(9); switch (LOOKAHEAD_SYM()) { + case ts_aux_token2: + REDUCE(ts_symbol_factor, 1); case ts_symbol_plus: REDUCE(ts_symbol_factor, 1); - case ts_symbol_2: - REDUCE(ts_symbol_factor, 1); default: - PARSE_ERROR(2, EXPECT({"2", "plus"})); + PARSE_ERROR(2, EXPECT({"plus", "token2"})); } case 19: SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { + case ts_symbol_variable: + SHIFT(12); case ts_symbol_expression: SHIFT(20); case ts_symbol_factor: SHIFT(16); - case ts_symbol_1: + case ts_aux_token1: SHIFT(13); case ts_symbol_number: SHIFT(12); - case ts_symbol_variable: - SHIFT(12); case ts_symbol_term: SHIFT(7); default: - PARSE_ERROR(6, EXPECT({"term", "variable", "number", "factor", "1", "expression"})); + PARSE_ERROR(6, EXPECT({"term", "variable", "expression", "number", "token1", "factor"})); } case 20: SET_LEX_STATE(4); switch (LOOKAHEAD_SYM()) { - case ts_symbol_2: + case ts_aux_token2: SHIFT(21); default: - PARSE_ERROR(1, EXPECT({"2"})); + PARSE_ERROR(1, EXPECT({"token2"})); } case 21: SET_LEX_STATE(9); switch (LOOKAHEAD_SYM()) { + case ts_aux_token2: + REDUCE(ts_symbol_factor, 3); case ts_symbol_plus: REDUCE(ts_symbol_factor, 3); - case ts_symbol_2: - REDUCE(ts_symbol_factor, 3); default: - PARSE_ERROR(2, EXPECT({"2", "plus"})); + PARSE_ERROR(2, EXPECT({"plus", "token2"})); } case 22: SET_LEX_STATE(9); switch (LOOKAHEAD_SYM()) { + case ts_aux_token2: + REDUCE(ts_symbol_term, 3); case ts_symbol_plus: REDUCE(ts_symbol_term, 3); - case ts_symbol_2: - REDUCE(ts_symbol_term, 3); default: - PARSE_ERROR(2, EXPECT({"2", "plus"})); + PARSE_ERROR(2, EXPECT({"plus", "token2"})); } case 23: SET_LEX_STATE(4); switch (LOOKAHEAD_SYM()) { - case ts_symbol_2: + case ts_aux_token2: SHIFT(24); default: - PARSE_ERROR(1, EXPECT({"2"})); + PARSE_ERROR(1, EXPECT({"token2"})); } case 24: SET_LEX_STATE(6); switch (LOOKAHEAD_SYM()) { + case ts_aux_token2: + REDUCE(ts_symbol_factor, 3); case ts_symbol_times: REDUCE(ts_symbol_factor, 3); - case ts_symbol_2: - REDUCE(ts_symbol_factor, 3); default: - PARSE_ERROR(2, EXPECT({"2", "times"})); + PARSE_ERROR(2, EXPECT({"times", "token2"})); } case 25: SET_LEX_STATE(6); switch (LOOKAHEAD_SYM()) { - case ts_symbol_2: + case ts_aux_token2: REDUCE(ts_symbol_term, 1); case ts_symbol_times: SHIFT(26); default: - PARSE_ERROR(2, EXPECT({"times", "2"})); + PARSE_ERROR(2, EXPECT({"times", "token2"})); } case 26: SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { case ts_symbol_factor: SHIFT(31); - case ts_symbol_1: + case ts_aux_token1: SHIFT(28); case ts_symbol_number: SHIFT(27); case ts_symbol_variable: SHIFT(27); default: - PARSE_ERROR(4, EXPECT({"variable", "number", "1", "factor"})); + PARSE_ERROR(4, EXPECT({"variable", "number", "token1", "factor"})); } case 27: SET_LEX_STATE(4); switch (LOOKAHEAD_SYM()) { - case ts_symbol_2: + case ts_aux_token2: REDUCE(ts_symbol_factor, 1); default: - PARSE_ERROR(1, EXPECT({"2"})); + PARSE_ERROR(1, EXPECT({"token2"})); } case 28: SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { + case ts_symbol_variable: + SHIFT(12); case ts_symbol_expression: SHIFT(29); case ts_symbol_factor: SHIFT(16); - case ts_symbol_1: + case ts_aux_token1: SHIFT(13); case ts_symbol_number: SHIFT(12); - case ts_symbol_variable: - SHIFT(12); case ts_symbol_term: SHIFT(7); default: - PARSE_ERROR(6, EXPECT({"term", "variable", "number", "factor", "1", "expression"})); + PARSE_ERROR(6, EXPECT({"term", "variable", "expression", "number", "token1", "factor"})); } case 29: SET_LEX_STATE(4); switch (LOOKAHEAD_SYM()) { - case ts_symbol_2: + case ts_aux_token2: SHIFT(30); default: - PARSE_ERROR(1, EXPECT({"2"})); + PARSE_ERROR(1, EXPECT({"token2"})); } case 30: SET_LEX_STATE(4); switch (LOOKAHEAD_SYM()) { - case ts_symbol_2: + case ts_aux_token2: REDUCE(ts_symbol_factor, 3); default: - PARSE_ERROR(1, EXPECT({"2"})); + PARSE_ERROR(1, EXPECT({"token2"})); } case 31: SET_LEX_STATE(4); switch (LOOKAHEAD_SYM()) { - case ts_symbol_2: + case ts_aux_token2: REDUCE(ts_symbol_term, 3); default: - PARSE_ERROR(1, EXPECT({"2"})); + PARSE_ERROR(1, EXPECT({"token2"})); } case 32: SET_LEX_STATE(4); switch (LOOKAHEAD_SYM()) { - case ts_symbol_2: + case ts_aux_token2: SHIFT(33); default: - PARSE_ERROR(1, EXPECT({"2"})); + PARSE_ERROR(1, EXPECT({"token2"})); } case 33: SET_LEX_STATE(2); @@ -520,14 +520,14 @@ static TSParseResult ts_parse(const char *input) { switch (LOOKAHEAD_SYM()) { case ts_symbol_factor: SHIFT(40); - case ts_symbol_1: + case ts_aux_token1: SHIFT(37); case ts_symbol_number: SHIFT(36); case ts_symbol_variable: SHIFT(36); default: - PARSE_ERROR(4, EXPECT({"variable", "number", "1", "factor"})); + PARSE_ERROR(4, EXPECT({"variable", "number", "token1", "factor"})); } case 36: SET_LEX_STATE(0); @@ -540,28 +540,28 @@ static TSParseResult ts_parse(const char *input) { case 37: SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { + case ts_symbol_variable: + SHIFT(12); case ts_symbol_expression: SHIFT(38); case ts_symbol_factor: SHIFT(16); - case ts_symbol_1: + case ts_aux_token1: SHIFT(13); case ts_symbol_number: SHIFT(12); - case ts_symbol_variable: - SHIFT(12); case ts_symbol_term: SHIFT(7); default: - PARSE_ERROR(6, EXPECT({"term", "variable", "number", "factor", "1", "expression"})); + PARSE_ERROR(6, EXPECT({"term", "variable", "expression", "number", "token1", "factor"})); } case 38: SET_LEX_STATE(4); switch (LOOKAHEAD_SYM()) { - case ts_symbol_2: + case ts_aux_token2: SHIFT(39); default: - PARSE_ERROR(1, EXPECT({"2"})); + PARSE_ERROR(1, EXPECT({"token2"})); } case 39: SET_LEX_STATE(0); @@ -594,28 +594,28 @@ static TSParseResult ts_parse(const char *input) { case 42: SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { + case ts_symbol_variable: + SHIFT(12); case ts_symbol_expression: SHIFT(43); case ts_symbol_factor: SHIFT(16); - case ts_symbol_1: + case ts_aux_token1: SHIFT(13); case ts_symbol_number: SHIFT(12); - case ts_symbol_variable: - SHIFT(12); case ts_symbol_term: SHIFT(7); default: - PARSE_ERROR(6, EXPECT({"term", "variable", "number", "factor", "1", "expression"})); + PARSE_ERROR(6, EXPECT({"term", "variable", "expression", "number", "token1", "factor"})); } case 43: SET_LEX_STATE(4); switch (LOOKAHEAD_SYM()) { - case ts_symbol_2: + case ts_aux_token2: SHIFT(44); default: - PARSE_ERROR(1, EXPECT({"2"})); + PARSE_ERROR(1, EXPECT({"token2"})); } case 44: SET_LEX_STATE(15); @@ -646,14 +646,14 @@ static TSParseResult ts_parse(const char *input) { switch (LOOKAHEAD_SYM()) { case ts_symbol_factor: SHIFT(51); - case ts_symbol_1: + case ts_aux_token1: SHIFT(48); case ts_symbol_number: SHIFT(47); case ts_symbol_variable: SHIFT(47); default: - PARSE_ERROR(4, EXPECT({"variable", "number", "1", "factor"})); + PARSE_ERROR(4, EXPECT({"variable", "number", "token1", "factor"})); } case 47: SET_LEX_STATE(14); @@ -668,28 +668,28 @@ static TSParseResult ts_parse(const char *input) { case 48: SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { + case ts_symbol_variable: + SHIFT(12); case ts_symbol_expression: SHIFT(49); case ts_symbol_factor: SHIFT(16); - case ts_symbol_1: + case ts_aux_token1: SHIFT(13); case ts_symbol_number: SHIFT(12); - case ts_symbol_variable: - SHIFT(12); case ts_symbol_term: SHIFT(7); default: - PARSE_ERROR(6, EXPECT({"term", "variable", "number", "factor", "1", "expression"})); + PARSE_ERROR(6, EXPECT({"term", "variable", "expression", "number", "token1", "factor"})); } case 49: SET_LEX_STATE(4); switch (LOOKAHEAD_SYM()) { - case ts_symbol_2: + case ts_aux_token2: SHIFT(50); default: - PARSE_ERROR(1, EXPECT({"2"})); + PARSE_ERROR(1, EXPECT({"token2"})); } case 50: SET_LEX_STATE(14); diff --git a/spec/fixtures/parsers/json.c b/spec/fixtures/parsers/json.c index 5e5ab323..8fcc3744 100644 --- a/spec/fixtures/parsers/json.c +++ b/spec/fixtures/parsers/json.c @@ -2,38 +2,38 @@ #include enum ts_symbol { + ts_aux_token6, ts_symbol_number, ts_symbol_string, - ts_symbol_object, - ts_symbol_repeat_helper2, - ts_symbol_repeat_helper1, - ts_symbol_3, - ts_symbol_6, - ts_symbol_7, - ts_symbol_4, - ts_symbol___END__, ts_symbol_array, - ts_symbol_2, - ts_symbol_5, - ts_symbol_1, + ts_symbol_object, + ts_aux_repeat_helper1, + ts_aux_token7, + ts_aux_repeat_helper2, + ts_aux_token4, + ts_aux_token5, + ts_aux_token1, + ts_symbol___END__, + ts_aux_token3, + ts_aux_token2, ts_symbol_value, }; static const char *ts_symbol_names[] = { + "token6", "number", "string", - "object", - "repeat_helper2", - "repeat_helper1", - "3", - "6", - "7", - "4", - "__END__", "array", - "2", - "5", - "1", + "object", + "repeat_helper1", + "token7", + "repeat_helper2", + "token4", + "token5", + "token1", + "__END__", + "token3", + "token2", "value", }; @@ -49,35 +49,43 @@ static void ts_lex(TSParser *parser) { case 2: if (LOOKAHEAD_CHAR() == ',') ADVANCE(3); - ACCEPT_TOKEN(ts_symbol_2); + ACCEPT_TOKEN(ts_aux_token3); case 3: - ACCEPT_TOKEN(ts_symbol_7); + ACCEPT_TOKEN(ts_aux_token2); case 4: if (LOOKAHEAD_CHAR() == ']') ADVANCE(5); LEX_ERROR(1, EXPECT({"']'"})); case 5: - ACCEPT_TOKEN(ts_symbol_3); + ACCEPT_TOKEN(ts_aux_token4); case 6: - if (LOOKAHEAD_CHAR() == '}') - ADVANCE(7); - LEX_ERROR(1, EXPECT({"'}'"})); + if (LOOKAHEAD_CHAR() == ']') + ADVANCE(5); + if (LOOKAHEAD_CHAR() == ',') + ADVANCE(3); + LEX_ERROR(2, EXPECT({"','", "']'"})); case 7: - ACCEPT_TOKEN(ts_symbol_6); + if (LOOKAHEAD_CHAR() == '}') + ADVANCE(8); + LEX_ERROR(1, EXPECT({"'}'"})); case 8: + ACCEPT_TOKEN(ts_aux_token7); + case 9: + if (LOOKAHEAD_CHAR() == '}') + ADVANCE(8); + if (LOOKAHEAD_CHAR() == ',') + ADVANCE(3); + LEX_ERROR(2, EXPECT({"','", "'}'"})); + case 10: + if (LOOKAHEAD_CHAR() == '{') + ADVANCE(16); + if (LOOKAHEAD_CHAR() == '[') + ADVANCE(15); if (LOOKAHEAD_CHAR() == '\"') ADVANCE(12); if (isdigit(LOOKAHEAD_CHAR())) ADVANCE(11); - if (LOOKAHEAD_CHAR() == '{') - ADVANCE(10); - if (LOOKAHEAD_CHAR() == '[') - ADVANCE(9); - LEX_ERROR(4, EXPECT({"'['", "'{'", "", "'\"'"})); - case 9: - ACCEPT_TOKEN(ts_symbol_1); - case 10: - ACCEPT_TOKEN(ts_symbol_4); + LEX_ERROR(4, EXPECT({"", "'\"'", "'['", "'{'"})); case 11: if (isdigit(LOOKAHEAD_CHAR())) ADVANCE(11); @@ -95,12 +103,16 @@ static void ts_lex(TSParser *parser) { case 14: ACCEPT_TOKEN(ts_symbol_string); case 15: - if (LOOKAHEAD_CHAR() == ':') - ADVANCE(16); - LEX_ERROR(1, EXPECT({"':'"})); + ACCEPT_TOKEN(ts_aux_token1); case 16: - ACCEPT_TOKEN(ts_symbol_5); + ACCEPT_TOKEN(ts_aux_token5); case 17: + if (LOOKAHEAD_CHAR() == ':') + ADVANCE(18); + LEX_ERROR(1, EXPECT({"':'"})); + case 18: + ACCEPT_TOKEN(ts_aux_token6); + case 19: if (LOOKAHEAD_CHAR() == '\"') ADVANCE(12); LEX_ERROR(1, EXPECT({"'\"'"})); @@ -114,24 +126,24 @@ static TSParseResult ts_parse(const char *input) { START_PARSER(); switch (PARSE_STATE()) { case 0: - SET_LEX_STATE(8); + SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { case ts_symbol_array: - SHIFT(31); - case ts_symbol_4: - SHIFT(25); - case ts_symbol_number: - SHIFT(31); - case ts_symbol_1: + SHIFT(53); + case ts_symbol_object: + SHIFT(53); + case ts_aux_token1: SHIFT(2); case ts_symbol_string: - SHIFT(31); - case ts_symbol_object: - SHIFT(31); + SHIFT(53); + case ts_aux_token5: + SHIFT(47); + case ts_symbol_number: + SHIFT(53); case ts_symbol_value: SHIFT(1); default: - PARSE_ERROR(7, EXPECT({"value", "object", "string", "number", "1", "4", "array"})); + PARSE_ERROR(7, EXPECT({"value", "number", "token5", "string", "token1", "object", "array"})); } case 1: SET_LEX_STATE(0); @@ -142,268 +154,520 @@ static TSParseResult ts_parse(const char *input) { PARSE_ERROR(1, EXPECT({"__END__"})); } case 2: - SET_LEX_STATE(8); + SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { case ts_symbol_array: - SHIFT(19); - case ts_symbol_string: - SHIFT(19); + SHIFT(25); case ts_symbol_object: - SHIFT(19); - case ts_symbol_value: - SHIFT(22); - case ts_symbol_4: - SHIFT(8); + SHIFT(25); + case ts_symbol_string: + SHIFT(25); + case ts_aux_token5: + SHIFT(12); case ts_symbol_number: - SHIFT(19); - case ts_symbol_1: + SHIFT(25); + case ts_symbol_value: + SHIFT(44); + case ts_aux_token1: SHIFT(3); default: - PARSE_ERROR(7, EXPECT({"1", "number", "4", "value", "object", "string", "array"})); + PARSE_ERROR(7, EXPECT({"token1", "string", "token5", "value", "number", "object", "array"})); } case 3: - SET_LEX_STATE(8); + SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { case ts_symbol_array: - SHIFT(19); - case ts_symbol_string: - SHIFT(19); + SHIFT(25); case ts_symbol_object: - SHIFT(19); + SHIFT(25); + case ts_symbol_string: + SHIFT(25); + case ts_aux_token5: + SHIFT(12); + case ts_symbol_number: + SHIFT(25); case ts_symbol_value: SHIFT(4); - case ts_symbol_4: - SHIFT(8); - case ts_symbol_number: - SHIFT(19); - case ts_symbol_1: + case ts_aux_token1: SHIFT(3); default: - PARSE_ERROR(7, EXPECT({"1", "number", "4", "value", "object", "string", "array"})); + PARSE_ERROR(7, EXPECT({"token1", "string", "token5", "value", "number", "object", "array"})); } case 4: SET_LEX_STATE(2); switch (LOOKAHEAD_SYM()) { - case ts_symbol_7: + case ts_aux_token2: SHIFT(7); - case ts_symbol_2: + case ts_aux_token3: SHIFT(5); - case ts_symbol_repeat_helper1: + case ts_aux_repeat_helper2: SHIFT(5); default: - PARSE_ERROR(3, EXPECT({"repeat_helper1", "2", "7"})); + PARSE_ERROR(3, EXPECT({"repeat_helper2", "token3", "token2"})); } case 5: SET_LEX_STATE(4); switch (LOOKAHEAD_SYM()) { - case ts_symbol_3: + case ts_aux_token4: SHIFT(6); default: - PARSE_ERROR(1, EXPECT({"3"})); + PARSE_ERROR(1, EXPECT({"token4"})); } case 6: SET_LEX_STATE(2); switch (LOOKAHEAD_SYM()) { - case ts_symbol_7: + case ts_aux_token3: REDUCE(ts_symbol_array, 4); - case ts_symbol_2: + case ts_aux_token2: REDUCE(ts_symbol_array, 4); default: - PARSE_ERROR(2, EXPECT({"2", "7"})); + PARSE_ERROR(2, EXPECT({"token2", "token3"})); } case 7: - SET_LEX_STATE(8); + SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { case ts_symbol_array: - SHIFT(19); - case ts_symbol_string: - SHIFT(19); + SHIFT(43); case ts_symbol_object: - SHIFT(19); - case ts_symbol_value: - SHIFT(20); - case ts_symbol_4: - SHIFT(8); + SHIFT(43); case ts_symbol_number: - SHIFT(19); - case ts_symbol_1: - SHIFT(3); + SHIFT(43); + case ts_symbol_value: + SHIFT(41); + case ts_symbol_string: + SHIFT(43); + case ts_aux_token5: + SHIFT(35); + case ts_aux_token1: + SHIFT(8); default: - PARSE_ERROR(7, EXPECT({"1", "number", "4", "value", "object", "string", "array"})); + PARSE_ERROR(7, EXPECT({"token1", "number", "value", "token5", "string", "object", "array"})); } case 8: - SET_LEX_STATE(17); + SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { + case ts_symbol_array: + SHIFT(25); + case ts_symbol_object: + SHIFT(25); case ts_symbol_string: + SHIFT(25); + case ts_aux_token5: + SHIFT(12); + case ts_symbol_number: + SHIFT(25); + case ts_symbol_value: SHIFT(9); + case ts_aux_token1: + SHIFT(3); default: - PARSE_ERROR(1, EXPECT({"string"})); + PARSE_ERROR(7, EXPECT({"token1", "string", "token5", "value", "number", "object", "array"})); } case 9: - SET_LEX_STATE(15); + SET_LEX_STATE(2); switch (LOOKAHEAD_SYM()) { - case ts_symbol_5: + case ts_aux_token2: + SHIFT(7); + case ts_aux_token3: + SHIFT(10); + case ts_aux_repeat_helper2: SHIFT(10); default: - PARSE_ERROR(1, EXPECT({"5"})); + PARSE_ERROR(3, EXPECT({"repeat_helper2", "token3", "token2"})); } case 10: - SET_LEX_STATE(8); + SET_LEX_STATE(4); switch (LOOKAHEAD_SYM()) { - case ts_symbol_array: - SHIFT(19); - case ts_symbol_string: - SHIFT(19); - case ts_symbol_object: - SHIFT(19); - case ts_symbol_value: + case ts_aux_token4: SHIFT(11); - case ts_symbol_4: - SHIFT(8); - case ts_symbol_number: - SHIFT(19); - case ts_symbol_1: - SHIFT(3); default: - PARSE_ERROR(7, EXPECT({"1", "number", "4", "value", "object", "string", "array"})); + PARSE_ERROR(1, EXPECT({"token4"})); } case 11: - SET_LEX_STATE(2); - switch (LOOKAHEAD_SYM()) { - case ts_symbol_7: - SHIFT(14); - case ts_symbol_2: - SHIFT(12); - case ts_symbol_repeat_helper2: - SHIFT(12); - default: - PARSE_ERROR(3, EXPECT({"repeat_helper2", "2", "7"})); - } - case 12: SET_LEX_STATE(6); switch (LOOKAHEAD_SYM()) { - case ts_symbol_6: - SHIFT(13); + case ts_aux_token4: + REDUCE(ts_symbol_array, 4); + case ts_aux_token2: + REDUCE(ts_symbol_array, 4); default: - PARSE_ERROR(1, EXPECT({"6"})); + PARSE_ERROR(2, EXPECT({"token2", "token4"})); } - case 13: - SET_LEX_STATE(2); - switch (LOOKAHEAD_SYM()) { - case ts_symbol_7: - REDUCE(ts_symbol_object, 6); - case ts_symbol_2: - REDUCE(ts_symbol_object, 6); - default: - PARSE_ERROR(2, EXPECT({"2", "7"})); - } - case 14: - SET_LEX_STATE(17); + case 12: + SET_LEX_STATE(19); switch (LOOKAHEAD_SYM()) { case ts_symbol_string: - SHIFT(15); + SHIFT(13); default: PARSE_ERROR(1, EXPECT({"string"})); } - case 15: - SET_LEX_STATE(15); + case 13: + SET_LEX_STATE(17); switch (LOOKAHEAD_SYM()) { - case ts_symbol_5: - SHIFT(16); + case ts_aux_token6: + SHIFT(14); default: - PARSE_ERROR(1, EXPECT({"5"})); + PARSE_ERROR(1, EXPECT({"token6"})); } - case 16: - SET_LEX_STATE(8); + case 14: + SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { case ts_symbol_array: - SHIFT(19); - case ts_symbol_string: - SHIFT(19); + SHIFT(25); case ts_symbol_object: - SHIFT(19); - case ts_symbol_value: - SHIFT(17); - case ts_symbol_4: - SHIFT(8); + SHIFT(25); case ts_symbol_number: - SHIFT(19); - case ts_symbol_1: + SHIFT(25); + case ts_symbol_value: + SHIFT(15); + case ts_symbol_string: + SHIFT(25); + case ts_aux_token5: + SHIFT(12); + case ts_aux_token1: SHIFT(3); default: - PARSE_ERROR(7, EXPECT({"1", "number", "4", "value", "object", "string", "array"})); + PARSE_ERROR(7, EXPECT({"token1", "number", "value", "token5", "string", "object", "array"})); + } + case 15: + SET_LEX_STATE(2); + switch (LOOKAHEAD_SYM()) { + case ts_aux_token2: + SHIFT(18); + case ts_aux_token3: + SHIFT(16); + case ts_aux_repeat_helper1: + SHIFT(16); + default: + PARSE_ERROR(3, EXPECT({"repeat_helper1", "token3", "token2"})); + } + case 16: + SET_LEX_STATE(7); + switch (LOOKAHEAD_SYM()) { + case ts_aux_token7: + SHIFT(17); + default: + PARSE_ERROR(1, EXPECT({"token7"})); } case 17: SET_LEX_STATE(2); switch (LOOKAHEAD_SYM()) { - case ts_symbol_7: - SHIFT(14); - case ts_symbol_2: - SHIFT(18); - case ts_symbol_repeat_helper2: - SHIFT(18); + case ts_aux_token3: + REDUCE(ts_symbol_object, 6); + case ts_aux_token2: + REDUCE(ts_symbol_object, 6); default: - PARSE_ERROR(3, EXPECT({"repeat_helper2", "2", "7"})); + PARSE_ERROR(2, EXPECT({"token2", "token3"})); } case 18: - SET_LEX_STATE(6); + SET_LEX_STATE(19); switch (LOOKAHEAD_SYM()) { - case ts_symbol_6: - REDUCE(ts_symbol_repeat_helper2, 5); + case ts_symbol_string: + SHIFT(19); default: - PARSE_ERROR(1, EXPECT({"6"})); + PARSE_ERROR(1, EXPECT({"string"})); } case 19: - SET_LEX_STATE(2); + SET_LEX_STATE(17); switch (LOOKAHEAD_SYM()) { - case ts_symbol_7: - REDUCE(ts_symbol_value, 1); - case ts_symbol_2: - REDUCE(ts_symbol_value, 1); + case ts_aux_token6: + SHIFT(20); default: - PARSE_ERROR(2, EXPECT({"2", "7"})); + PARSE_ERROR(1, EXPECT({"token6"})); } case 20: - SET_LEX_STATE(2); + SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { - case ts_symbol_7: - SHIFT(7); - case ts_symbol_2: - SHIFT(21); - case ts_symbol_repeat_helper1: + case ts_symbol_array: + SHIFT(34); + case ts_symbol_object: + SHIFT(34); + case ts_symbol_number: + SHIFT(34); + case ts_symbol_value: + SHIFT(32); + case ts_symbol_string: + SHIFT(34); + case ts_aux_token5: + SHIFT(26); + case ts_aux_token1: SHIFT(21); default: - PARSE_ERROR(3, EXPECT({"repeat_helper1", "2", "7"})); + PARSE_ERROR(7, EXPECT({"token1", "number", "value", "token5", "string", "object", "array"})); } case 21: - SET_LEX_STATE(4); + SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { - case ts_symbol_3: - REDUCE(ts_symbol_repeat_helper1, 3); + case ts_symbol_array: + SHIFT(25); + case ts_symbol_object: + SHIFT(25); + case ts_symbol_string: + SHIFT(25); + case ts_aux_token5: + SHIFT(12); + case ts_symbol_number: + SHIFT(25); + case ts_symbol_value: + SHIFT(22); + case ts_aux_token1: + SHIFT(3); default: - PARSE_ERROR(1, EXPECT({"3"})); + PARSE_ERROR(7, EXPECT({"token1", "string", "token5", "value", "number", "object", "array"})); } case 22: SET_LEX_STATE(2); switch (LOOKAHEAD_SYM()) { - case ts_symbol_7: + case ts_aux_token2: SHIFT(7); - case ts_symbol_2: + case ts_aux_token3: SHIFT(23); - case ts_symbol_repeat_helper1: + case ts_aux_repeat_helper2: SHIFT(23); default: - PARSE_ERROR(3, EXPECT({"repeat_helper1", "2", "7"})); + PARSE_ERROR(3, EXPECT({"repeat_helper2", "token3", "token2"})); } case 23: SET_LEX_STATE(4); switch (LOOKAHEAD_SYM()) { - case ts_symbol_3: + case ts_aux_token4: SHIFT(24); default: - PARSE_ERROR(1, EXPECT({"3"})); + PARSE_ERROR(1, EXPECT({"token4"})); } case 24: + SET_LEX_STATE(9); + switch (LOOKAHEAD_SYM()) { + case ts_aux_token7: + REDUCE(ts_symbol_array, 4); + case ts_aux_token2: + REDUCE(ts_symbol_array, 4); + default: + PARSE_ERROR(2, EXPECT({"token2", "token7"})); + } + case 25: + SET_LEX_STATE(2); + switch (LOOKAHEAD_SYM()) { + case ts_aux_token3: + REDUCE(ts_symbol_value, 1); + case ts_aux_token2: + REDUCE(ts_symbol_value, 1); + default: + PARSE_ERROR(2, EXPECT({"token2", "token3"})); + } + case 26: + SET_LEX_STATE(19); + switch (LOOKAHEAD_SYM()) { + case ts_symbol_string: + SHIFT(27); + default: + PARSE_ERROR(1, EXPECT({"string"})); + } + case 27: + SET_LEX_STATE(17); + switch (LOOKAHEAD_SYM()) { + case ts_aux_token6: + SHIFT(28); + default: + PARSE_ERROR(1, EXPECT({"token6"})); + } + case 28: + SET_LEX_STATE(10); + switch (LOOKAHEAD_SYM()) { + case ts_symbol_array: + SHIFT(25); + case ts_symbol_object: + SHIFT(25); + case ts_symbol_number: + SHIFT(25); + case ts_symbol_value: + SHIFT(29); + case ts_symbol_string: + SHIFT(25); + case ts_aux_token5: + SHIFT(12); + case ts_aux_token1: + SHIFT(3); + default: + PARSE_ERROR(7, EXPECT({"token1", "number", "value", "token5", "string", "object", "array"})); + } + case 29: + SET_LEX_STATE(2); + switch (LOOKAHEAD_SYM()) { + case ts_aux_token2: + SHIFT(18); + case ts_aux_token3: + SHIFT(30); + case ts_aux_repeat_helper1: + SHIFT(30); + default: + PARSE_ERROR(3, EXPECT({"repeat_helper1", "token3", "token2"})); + } + case 30: + SET_LEX_STATE(7); + switch (LOOKAHEAD_SYM()) { + case ts_aux_token7: + SHIFT(31); + default: + PARSE_ERROR(1, EXPECT({"token7"})); + } + case 31: + SET_LEX_STATE(9); + switch (LOOKAHEAD_SYM()) { + case ts_aux_token7: + REDUCE(ts_symbol_object, 6); + case ts_aux_token2: + REDUCE(ts_symbol_object, 6); + default: + PARSE_ERROR(2, EXPECT({"token2", "token7"})); + } + case 32: + SET_LEX_STATE(9); + switch (LOOKAHEAD_SYM()) { + case ts_aux_token7: + REDUCE(ts_aux_repeat_helper1, 4); + case ts_aux_token2: + SHIFT(18); + case ts_aux_repeat_helper1: + SHIFT(33); + default: + PARSE_ERROR(3, EXPECT({"repeat_helper1", "token2", "token7"})); + } + case 33: + SET_LEX_STATE(7); + switch (LOOKAHEAD_SYM()) { + case ts_aux_token7: + REDUCE(ts_aux_repeat_helper1, 5); + default: + PARSE_ERROR(1, EXPECT({"token7"})); + } + case 34: + SET_LEX_STATE(9); + switch (LOOKAHEAD_SYM()) { + case ts_aux_token7: + REDUCE(ts_symbol_value, 1); + case ts_aux_token2: + REDUCE(ts_symbol_value, 1); + default: + PARSE_ERROR(2, EXPECT({"token2", "token7"})); + } + case 35: + SET_LEX_STATE(19); + switch (LOOKAHEAD_SYM()) { + case ts_symbol_string: + SHIFT(36); + default: + PARSE_ERROR(1, EXPECT({"string"})); + } + case 36: + SET_LEX_STATE(17); + switch (LOOKAHEAD_SYM()) { + case ts_aux_token6: + SHIFT(37); + default: + PARSE_ERROR(1, EXPECT({"token6"})); + } + case 37: + SET_LEX_STATE(10); + switch (LOOKAHEAD_SYM()) { + case ts_symbol_array: + SHIFT(25); + case ts_symbol_object: + SHIFT(25); + case ts_symbol_number: + SHIFT(25); + case ts_symbol_value: + SHIFT(38); + case ts_symbol_string: + SHIFT(25); + case ts_aux_token5: + SHIFT(12); + case ts_aux_token1: + SHIFT(3); + default: + PARSE_ERROR(7, EXPECT({"token1", "number", "value", "token5", "string", "object", "array"})); + } + case 38: + SET_LEX_STATE(2); + switch (LOOKAHEAD_SYM()) { + case ts_aux_token2: + SHIFT(18); + case ts_aux_token3: + SHIFT(39); + case ts_aux_repeat_helper1: + SHIFT(39); + default: + PARSE_ERROR(3, EXPECT({"repeat_helper1", "token3", "token2"})); + } + case 39: + SET_LEX_STATE(7); + switch (LOOKAHEAD_SYM()) { + case ts_aux_token7: + SHIFT(40); + default: + PARSE_ERROR(1, EXPECT({"token7"})); + } + case 40: + SET_LEX_STATE(6); + switch (LOOKAHEAD_SYM()) { + case ts_aux_token4: + REDUCE(ts_symbol_object, 6); + case ts_aux_token2: + REDUCE(ts_symbol_object, 6); + default: + PARSE_ERROR(2, EXPECT({"token2", "token4"})); + } + case 41: + SET_LEX_STATE(6); + switch (LOOKAHEAD_SYM()) { + case ts_aux_token4: + REDUCE(ts_aux_repeat_helper2, 2); + case ts_aux_token2: + SHIFT(7); + case ts_aux_repeat_helper2: + SHIFT(42); + default: + PARSE_ERROR(3, EXPECT({"repeat_helper2", "token2", "token4"})); + } + case 42: + SET_LEX_STATE(4); + switch (LOOKAHEAD_SYM()) { + case ts_aux_token4: + REDUCE(ts_aux_repeat_helper2, 3); + default: + PARSE_ERROR(1, EXPECT({"token4"})); + } + case 43: + SET_LEX_STATE(6); + switch (LOOKAHEAD_SYM()) { + case ts_aux_token4: + REDUCE(ts_symbol_value, 1); + case ts_aux_token2: + REDUCE(ts_symbol_value, 1); + default: + PARSE_ERROR(2, EXPECT({"token2", "token4"})); + } + case 44: + SET_LEX_STATE(2); + switch (LOOKAHEAD_SYM()) { + case ts_aux_token2: + SHIFT(7); + case ts_aux_token3: + SHIFT(45); + case ts_aux_repeat_helper2: + SHIFT(45); + default: + PARSE_ERROR(3, EXPECT({"repeat_helper2", "token3", "token2"})); + } + case 45: + SET_LEX_STATE(4); + switch (LOOKAHEAD_SYM()) { + case ts_aux_token4: + SHIFT(46); + default: + PARSE_ERROR(1, EXPECT({"token4"})); + } + case 46: SET_LEX_STATE(0); switch (LOOKAHEAD_SYM()) { case ts_symbol___END__: @@ -411,63 +675,63 @@ static TSParseResult ts_parse(const char *input) { default: PARSE_ERROR(1, EXPECT({"__END__"})); } - case 25: - SET_LEX_STATE(17); + case 47: + SET_LEX_STATE(19); switch (LOOKAHEAD_SYM()) { case ts_symbol_string: - SHIFT(26); + SHIFT(48); default: PARSE_ERROR(1, EXPECT({"string"})); } - case 26: - SET_LEX_STATE(15); + case 48: + SET_LEX_STATE(17); switch (LOOKAHEAD_SYM()) { - case ts_symbol_5: - SHIFT(27); + case ts_aux_token6: + SHIFT(49); default: - PARSE_ERROR(1, EXPECT({"5"})); + PARSE_ERROR(1, EXPECT({"token6"})); } - case 27: - SET_LEX_STATE(8); + case 49: + SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { case ts_symbol_array: - SHIFT(19); - case ts_symbol_string: - SHIFT(19); + SHIFT(25); case ts_symbol_object: - SHIFT(19); - case ts_symbol_value: - SHIFT(28); - case ts_symbol_4: - SHIFT(8); + SHIFT(25); case ts_symbol_number: - SHIFT(19); - case ts_symbol_1: + SHIFT(25); + case ts_symbol_value: + SHIFT(50); + case ts_symbol_string: + SHIFT(25); + case ts_aux_token5: + SHIFT(12); + case ts_aux_token1: SHIFT(3); default: - PARSE_ERROR(7, EXPECT({"1", "number", "4", "value", "object", "string", "array"})); + PARSE_ERROR(7, EXPECT({"token1", "number", "value", "token5", "string", "object", "array"})); } - case 28: + case 50: SET_LEX_STATE(2); switch (LOOKAHEAD_SYM()) { - case ts_symbol_7: - SHIFT(14); - case ts_symbol_2: - SHIFT(29); - case ts_symbol_repeat_helper2: - SHIFT(29); + case ts_aux_token2: + SHIFT(18); + case ts_aux_token3: + SHIFT(51); + case ts_aux_repeat_helper1: + SHIFT(51); default: - PARSE_ERROR(3, EXPECT({"repeat_helper2", "2", "7"})); + PARSE_ERROR(3, EXPECT({"repeat_helper1", "token3", "token2"})); } - case 29: - SET_LEX_STATE(6); + case 51: + SET_LEX_STATE(7); switch (LOOKAHEAD_SYM()) { - case ts_symbol_6: - SHIFT(30); + case ts_aux_token7: + SHIFT(52); default: - PARSE_ERROR(1, EXPECT({"6"})); + PARSE_ERROR(1, EXPECT({"token7"})); } - case 30: + case 52: SET_LEX_STATE(0); switch (LOOKAHEAD_SYM()) { case ts_symbol___END__: @@ -475,7 +739,7 @@ static TSParseResult ts_parse(const char *input) { default: PARSE_ERROR(1, EXPECT({"__END__"})); } - case 31: + case 53: SET_LEX_STATE(0); switch (LOOKAHEAD_SYM()) { case ts_symbol___END__: diff --git a/spec/runtime/arithmetic_spec.cpp b/spec/runtime/arithmetic_spec.cpp index 82cb0d93..4732a8e0 100644 --- a/spec/runtime/arithmetic_spec.cpp +++ b/spec/runtime/arithmetic_spec.cpp @@ -36,7 +36,7 @@ describe("arithmetic", []() { TSDocumentSetText(document, "x*(y+z)"); AssertThat(string(TSDocumentToString(document)), Equals( - "(expression (term (factor (variable)) (times) (factor (1) (expression (term (factor (variable))) (plus) (term (factor (variable)))) (2))))")); + "(expression (term (factor (variable)) (times) (factor (token1) (expression (term (factor (variable))) (plus) (term (factor (variable)))) (token2))))")); }); }); diff --git a/spec/runtime/json_spec.cpp b/spec/runtime/json_spec.cpp index 44e620eb..419637f8 100644 --- a/spec/runtime/json_spec.cpp +++ b/spec/runtime/json_spec.cpp @@ -19,11 +19,16 @@ describe("json", []() { }); it("parses objects", [&]() { - TSDocumentSetText(document, "{\"key1\":1}"); - AssertThat(string(TSDocumentToString(document)), Equals("(value (object (4) (string) (5) (value (number)) (2) (6)))")); - TSDocumentSetText(document, "{\"key1\":1,\"key2\":2}"); - AssertThat(string(TSDocumentToString(document)), Equals("(value (object (4) (string) (5) (value (number)) (repeat_helper2 (7) (string) (5) (value (number)) (2)) (6)))")); + AssertThat(string(TSDocumentToString(document)), Equals("(value (object (token5) (string) (token6) (value (number)) (repeat_helper1 (token2) (string) (token6) (value (number))) (token7)))")); + + TSDocumentSetText(document, "{\"key1\":1}"); + AssertThat(string(TSDocumentToString(document)), Equals("(value (object (token5) (string) (token6) (value (number)) (token3) (token7)))")); + }); + + it("parses arrays", [&]() { + TSDocumentSetText(document, "[1,2,3]"); + AssertThat(string(TSDocumentToString(document)), Equals("(value (array (token1) (value (number)) (repeat_helper2 (token2) (value (number)) (repeat_helper2 (token2) (value (number)))) (token4)))")); }); }); diff --git a/src/compiler/generate_code/c_code.cpp b/src/compiler/generate_code/c_code.cpp index 2ae8485e..98627495 100644 --- a/src/compiler/generate_code/c_code.cpp +++ b/src/compiler/generate_code/c_code.cpp @@ -84,7 +84,10 @@ namespace tree_sitter { {} string symbol_id(rules::Symbol symbol) { - return "ts_symbol_" + symbol.name; + if (symbol.is_auxiliary) + return "ts_aux_" + symbol.name; + else + return "ts_symbol_" + symbol.name; } string character_code(char character) { diff --git a/src/compiler/grammar.cpp b/src/compiler/grammar.cpp index 097c6611..9967ca7c 100644 --- a/src/compiler/grammar.cpp +++ b/src/compiler/grammar.cpp @@ -16,11 +16,18 @@ namespace tree_sitter { rules(rules), start_rule_name(start_rule_name) {} + Grammar::Grammar(std::string start_rule_name, rule_map &rules, rule_map &aux_rules) : + rules(rules), + aux_rules(aux_rules), + start_rule_name(start_rule_name) {} + const rules::rule_ptr Grammar::rule(const rules::Symbol &symbol) const { - auto iter = rules.find(symbol.name); - return (iter == rules.end()) ? - rules::rule_ptr(nullptr) : - iter->second; + auto map = symbol.is_auxiliary ? aux_rules : rules; + auto iter = map.find(symbol.name); + if (iter != map.end()) + return iter->second; + else + return rules::rule_ptr(); } vector Grammar::rule_names() const { @@ -34,21 +41,30 @@ namespace tree_sitter { bool Grammar::operator==(const Grammar &other) const { if (other.start_rule_name != start_rule_name) return false; if (other.rules.size() != rules.size()) return false; + if (other.aux_rules.size() != aux_rules.size()) return false; + for (auto pair : rules) { auto other_pair = other.rules.find(pair.first); if (other_pair == other.rules.end()) return false; - auto orr = other_pair->second->to_string();; if (!other_pair->second->operator==(*pair.second)) return false; } + for (auto pair : aux_rules) { + auto other_pair = other.aux_rules.find(pair.first); + if (other_pair == other.aux_rules.end()) return false; + if (!other_pair->second->operator==(*pair.second)) return false; + } + return true; } bool Grammar::has_definition(const rules::Symbol &symbol) const { - return rules.find(symbol.name) != rules.end(); + return rule(symbol).get() != nullptr; } ostream& operator<<(ostream &stream, const Grammar &grammar) { - stream << string("# "); + stream << pair.second; + started = true; + } + stream << string("}"); + return stream << string(">"); } } diff --git a/src/compiler/grammar.h b/src/compiler/grammar.h index 71cd1d14..8d743ebd 100644 --- a/src/compiler/grammar.h +++ b/src/compiler/grammar.h @@ -8,16 +8,21 @@ namespace tree_sitter { class Grammar { typedef std::initializer_list> rule_map_init_list; + typedef const std::unordered_map rule_map; + public: Grammar(const rule_map_init_list &rules); - Grammar(std::string start_rule_name, const std::unordered_map &rules); - const rules::rule_ptr rule(const rules::Symbol &) const; + Grammar(std::string start_rule_name, rule_map &rules); + Grammar(std::string start_rule_name, rule_map &rules, rule_map &aux_rules); + const std::string start_rule_name; std::vector rule_names() const; bool operator==(const Grammar &other) const; bool has_definition(const rules::Symbol &symbol) const; + const rules::rule_ptr rule(const rules::Symbol &symbol) const; - const std::unordered_map rules; + rule_map rules; + rule_map aux_rules; }; std::ostream& operator<<(std::ostream &stream, const Grammar &grammar); diff --git a/src/compiler/prepare_grammar/expand_repeats.cpp b/src/compiler/prepare_grammar/expand_repeats.cpp index 2405f8a8..a837343d 100644 --- a/src/compiler/prepare_grammar/expand_repeats.cpp +++ b/src/compiler/prepare_grammar/expand_repeats.cpp @@ -13,7 +13,7 @@ namespace tree_sitter { rule_ptr value; unordered_map aux_rules; - rule_ptr apply(const rule_ptr rule) { + rule_ptr apply(const rule_ptr &rule) { rule->accept(*this); return value; } @@ -21,7 +21,7 @@ namespace tree_sitter { rule_ptr make_repeat_helper(string name, const rule_ptr &rule) { return seq({ rule, - choice({ sym(name), blank() }) + choice({ aux_sym(name), blank() }) }); } @@ -29,7 +29,7 @@ namespace tree_sitter { rule_ptr inner_rule = apply(rule->content); string helper_rule_name = string("repeat_helper") + to_string(aux_rules.size() + 1); aux_rules.insert({ helper_rule_name, make_repeat_helper(helper_rule_name, inner_rule) }); - value = sym(helper_rule_name); + value = aux_sym(helper_rule_name); } void visit(const Seq *rule) { @@ -48,11 +48,11 @@ namespace tree_sitter { Grammar expand_repeats(const Grammar &grammar) { unordered_map result; RepeatExpander visitor; + for (auto pair : grammar.rules) result.insert({ pair.first, visitor.apply(pair.second) }); - for (auto pair : visitor.aux_rules) - result.insert(pair); - return Grammar(grammar.start_rule_name, result); + + return Grammar(grammar.start_rule_name, result, visitor.aux_rules); } } } \ No newline at end of file diff --git a/src/compiler/prepare_grammar/extract_tokens.cpp b/src/compiler/prepare_grammar/extract_tokens.cpp index 6287e32c..ea9438b7 100644 --- a/src/compiler/prepare_grammar/extract_tokens.cpp +++ b/src/compiler/prepare_grammar/extract_tokens.cpp @@ -29,7 +29,7 @@ namespace tree_sitter { return value; } else { string token_name = add_token(rule); - return sym(token_name); + return aux_sym(token_name); } } @@ -37,7 +37,7 @@ namespace tree_sitter { for (auto pair : tokens) if (*pair.second == *rule) return pair.first; - string name = to_string(tokens.size() + 1); + string name = "token" + to_string(tokens.size() + 1); tokens.insert({ name, rule }); return name; } @@ -62,6 +62,7 @@ namespace tree_sitter { pair extract_tokens(const Grammar &input_grammar) { TokenExtractor extractor; unordered_map rules; + unordered_map aux_rules; unordered_map tokens; for (auto pair : input_grammar.rules) { @@ -73,13 +74,20 @@ namespace tree_sitter { else tokens.insert({ name, rule }); } - - for (auto pair : extractor.tokens) - tokens.insert(pair); - + + for (auto pair : input_grammar.aux_rules) { + string name = pair.first; + rule_ptr rule = pair.second; + auto new_rule = extractor.initial_apply(rule); + if (new_rule.get()) + aux_rules.insert({ name, new_rule }); + else + tokens.insert({ name, rule }); + } + return { Grammar(input_grammar.start_rule_name, rules), - Grammar("", tokens) + Grammar("", tokens, extractor.tokens) }; } } diff --git a/src/compiler/prepare_grammar/perform.cpp b/src/compiler/prepare_grammar/perform.cpp index 9606ab76..22d1ddfc 100644 --- a/src/compiler/prepare_grammar/perform.cpp +++ b/src/compiler/prepare_grammar/perform.cpp @@ -7,8 +7,10 @@ using std::pair; namespace tree_sitter { namespace prepare_grammar { pair perform(const Grammar &input_grammar) { - auto rule_grammar = expand_repeats(input_grammar); - return prepare_grammar::extract_tokens(rule_grammar); + auto grammars = prepare_grammar::extract_tokens(input_grammar); + auto rule_grammar = expand_repeats(grammars.first); + auto lex_grammar = grammars.second; + return { rule_grammar, lex_grammar }; } } } diff --git a/src/compiler/rules/rules.cpp b/src/compiler/rules/rules.cpp index a1f95433..de94e54d 100644 --- a/src/compiler/rules/rules.cpp +++ b/src/compiler/rules/rules.cpp @@ -47,7 +47,11 @@ namespace tree_sitter { } rule_ptr sym(const string &name) { - return make_shared(name); + return make_shared(name, false); + } + + rule_ptr aux_sym(const string &name) { + return make_shared(name, true); } } } diff --git a/src/compiler/rules/rules.h b/src/compiler/rules/rules.h index 46732977..1d8a86bb 100644 --- a/src/compiler/rules/rules.h +++ b/src/compiler/rules/rules.h @@ -24,6 +24,7 @@ namespace tree_sitter { rule_ptr seq(const std::initializer_list &rules); rule_ptr str(const std::string &value); rule_ptr sym(const std::string &name); + rule_ptr aux_sym(const std::string &name); } } diff --git a/src/compiler/rules/symbol.cpp b/src/compiler/rules/symbol.cpp index 2bc7e837..5ec8811d 100644 --- a/src/compiler/rules/symbol.cpp +++ b/src/compiler/rules/symbol.cpp @@ -5,15 +5,16 @@ using std::hash; namespace tree_sitter { namespace rules { - Symbol::Symbol(const std::string &name) : name(name) {}; + Symbol::Symbol(const std::string &name) : name(name), is_auxiliary(false) {}; + Symbol::Symbol(const std::string &name, bool is_auxiliary) : name(name), is_auxiliary(is_auxiliary) {}; bool Symbol::operator==(const Rule &rule) const { const Symbol *other = dynamic_cast(&rule); - return other && (other->name == name); + return other && (other->name == name) && (other->is_auxiliary == is_auxiliary); } size_t Symbol::hash_code() const { - return typeid(this).hash_code() ^ hash()(name); + return typeid(this).hash_code() ^ hash()(name) ^ hash()(is_auxiliary); } rule_ptr Symbol::copy() const { @@ -21,11 +22,15 @@ namespace tree_sitter { } string Symbol::to_string() const { - return string("#"; + return is_auxiliary ? + string("#" : + string("#"; } bool Symbol::operator<(const Symbol &other) const { - return name < other.name; + if (name < other.name) return true; + if (other.name < name) return false; + return is_auxiliary < other.is_auxiliary; } void Symbol::accept(Visitor &visitor) const { diff --git a/src/compiler/rules/symbol.h b/src/compiler/rules/symbol.h index 0ad5bbdf..ba8b15c2 100644 --- a/src/compiler/rules/symbol.h +++ b/src/compiler/rules/symbol.h @@ -8,6 +8,7 @@ namespace tree_sitter { class Symbol : public Rule { public: Symbol(const std::string &name); + Symbol(const std::string &name, bool is_auxiliary); bool operator==(const Rule& other) const; size_t hash_code() const; @@ -17,6 +18,7 @@ namespace tree_sitter { bool operator<(const Symbol &other) const; std::string name; + bool is_auxiliary; }; } }