From 70173512f1cae61684450b19d51b848fbbaa8075 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 12 Feb 2014 18:31:57 -0800 Subject: [PATCH] Don't extract isolated blanks from grammars into tokens --- spec/compiler/prepare_grammar_spec.cpp | 12 + spec/fixtures/parsers/json.c | 634 ++++++++---------- .../prepare_grammar/extract_tokens.cpp | 2 +- 3 files changed, 292 insertions(+), 356 deletions(-) diff --git a/spec/compiler/prepare_grammar_spec.cpp b/spec/compiler/prepare_grammar_spec.cpp index e93d5d43..996ca0b8 100644 --- a/spec/compiler/prepare_grammar_spec.cpp +++ b/spec/compiler/prepare_grammar_spec.cpp @@ -102,6 +102,18 @@ describe("preparing a grammar", []() { { "token1", repeat(seq({ str("a"), str("b") })) }, }))); }); + + it("does not extract blanks into tokens", [&]() { + pair result = perform(Grammar({ + { "rule1", choice({ sym("rule2"), blank() }) }, + })); + + AssertThat(result.first, Equals(Grammar("rule1", { + { "rule1", choice({ sym("rule2"), blank() }) }, + }))); + + AssertThat(result.second, Equals(Grammar("", map()))); + }); }); END_TEST \ No newline at end of file diff --git a/spec/fixtures/parsers/json.c b/spec/fixtures/parsers/json.c index 7ac34365..8208391f 100644 --- a/spec/fixtures/parsers/json.c +++ b/spec/fixtures/parsers/json.c @@ -16,7 +16,6 @@ enum ts_symbol { ts_aux_token4, ts_aux_token5, ts_aux_token6, - ts_aux_token7, }; static const char *ts_symbol_names[] = { @@ -34,7 +33,6 @@ static const char *ts_symbol_names[] = { "token4", "token5", "token6", - "token7", }; static void ts_lex(TSParser *parser) { @@ -49,111 +47,107 @@ static void ts_lex(TSParser *parser) { case 2: if (LOOKAHEAD_CHAR() == ',') ADVANCE(3); - ACCEPT_TOKEN(ts_aux_token3); + if (LOOKAHEAD_CHAR() == ']') + ADVANCE(4); + LEX_ERROR(2, EXPECT({",", "]"})); case 3: ACCEPT_TOKEN(ts_aux_token2); case 4: - if (LOOKAHEAD_CHAR() == ']') - ADVANCE(5); - LEX_ERROR(1, EXPECT({"]"})); + ACCEPT_TOKEN(ts_aux_token3); case 5: - ACCEPT_TOKEN(ts_aux_token4); + if (LOOKAHEAD_CHAR() == ']') + ADVANCE(4); + LEX_ERROR(1, EXPECT({"]"})); case 6: if (LOOKAHEAD_CHAR() == ',') ADVANCE(3); - if (LOOKAHEAD_CHAR() == ']') - ADVANCE(5); - LEX_ERROR(2, EXPECT({",", "]"})); - case 7: if (LOOKAHEAD_CHAR() == '}') - ADVANCE(8); - LEX_ERROR(1, EXPECT({"}"})); - case 8: - ACCEPT_TOKEN(ts_aux_token7); - case 9: - if (LOOKAHEAD_CHAR() == ',') - ADVANCE(3); - if (LOOKAHEAD_CHAR() == '}') - ADVANCE(8); + ADVANCE(7); LEX_ERROR(2, EXPECT({",", "}"})); - case 10: + case 7: + ACCEPT_TOKEN(ts_aux_token6); + case 8: + if (LOOKAHEAD_CHAR() == '}') + ADVANCE(7); + LEX_ERROR(1, EXPECT({"}"})); + case 9: if (LOOKAHEAD_CHAR() == '\"') - ADVANCE(11); + ADVANCE(10); if ('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9') - ADVANCE(17); + ADVANCE(16); if (LOOKAHEAD_CHAR() == '[') - ADVANCE(18); + ADVANCE(17); if (LOOKAHEAD_CHAR() == '{') - ADVANCE(19); + ADVANCE(18); LEX_ERROR(4, EXPECT({"\"", "0-9", "[", "{"})); + case 10: + if (!((LOOKAHEAD_CHAR() == '\"') || + (LOOKAHEAD_CHAR() == '\\'))) + ADVANCE(11); + if (LOOKAHEAD_CHAR() == '\\') + ADVANCE(13); + if (']' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '\\') + ADVANCE(15); + LEX_ERROR(2, EXPECT({"-!", "#-"})); case 11: if (!((LOOKAHEAD_CHAR() == '\"') || (LOOKAHEAD_CHAR() == '\\'))) + ADVANCE(11); + if (LOOKAHEAD_CHAR() == '\"') ADVANCE(12); if (LOOKAHEAD_CHAR() == '\\') - ADVANCE(14); + ADVANCE(13); if (']' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '\\') - ADVANCE(16); - LEX_ERROR(2, EXPECT({"-!", "#-"})); + ADVANCE(15); + LEX_ERROR(1, EXPECT({""})); case 12: + ACCEPT_TOKEN(ts_symbol_string); + case 13: if (!((LOOKAHEAD_CHAR() == '\"') || (LOOKAHEAD_CHAR() == '\\'))) - ADVANCE(12); + ADVANCE(11); if (LOOKAHEAD_CHAR() == '\"') - ADVANCE(13); - if (LOOKAHEAD_CHAR() == '\\') ADVANCE(14); + if ('#' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '\"') + ADVANCE(11); + if (LOOKAHEAD_CHAR() == '\\') + ADVANCE(13); if (']' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '\\') - ADVANCE(16); + ADVANCE(15); LEX_ERROR(1, EXPECT({""})); - case 13: - ACCEPT_TOKEN(ts_symbol_string); case 14: if (!((LOOKAHEAD_CHAR() == '\"') || (LOOKAHEAD_CHAR() == '\\'))) - ADVANCE(12); + ADVANCE(11); if (LOOKAHEAD_CHAR() == '\"') - ADVANCE(15); - if ('#' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '\"') ADVANCE(12); if (LOOKAHEAD_CHAR() == '\\') - ADVANCE(14); - if (']' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '\\') - ADVANCE(16); - LEX_ERROR(1, EXPECT({""})); - case 15: - if (!((LOOKAHEAD_CHAR() == '\"') || - (LOOKAHEAD_CHAR() == '\\'))) - ADVANCE(12); - if (LOOKAHEAD_CHAR() == '\"') ADVANCE(13); - if (LOOKAHEAD_CHAR() == '\\') - ADVANCE(14); if (']' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '\\') - ADVANCE(16); + ADVANCE(15); ACCEPT_TOKEN(ts_symbol_string); - case 16: - if (LOOKAHEAD_CHAR() == '\"') - ADVANCE(12); - LEX_ERROR(1, EXPECT({"\""})); - case 17: - if ('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9') - ADVANCE(17); - ACCEPT_TOKEN(ts_symbol_number); - case 18: - ACCEPT_TOKEN(ts_aux_token1); - case 19: - ACCEPT_TOKEN(ts_aux_token5); - case 20: - if (LOOKAHEAD_CHAR() == ':') - ADVANCE(21); - LEX_ERROR(1, EXPECT({":"})); - case 21: - ACCEPT_TOKEN(ts_aux_token6); - case 22: + case 15: if (LOOKAHEAD_CHAR() == '\"') ADVANCE(11); LEX_ERROR(1, EXPECT({"\""})); + case 16: + if ('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9') + ADVANCE(16); + ACCEPT_TOKEN(ts_symbol_number); + case 17: + ACCEPT_TOKEN(ts_aux_token1); + case 18: + ACCEPT_TOKEN(ts_aux_token4); + case 19: + if (LOOKAHEAD_CHAR() == ':') + ADVANCE(20); + LEX_ERROR(1, EXPECT({":"})); + case 20: + ACCEPT_TOKEN(ts_aux_token5); + case 21: + if (LOOKAHEAD_CHAR() == '\"') + ADVANCE(10); + LEX_ERROR(1, EXPECT({"\""})); default: LEX_PANIC(); } @@ -164,7 +158,7 @@ static TSParseResult ts_parse(const char *input) { START_PARSER(); switch (PARSE_STATE()) { case 0: - SET_LEX_STATE(10); + SET_LEX_STATE(9); switch (LOOKAHEAD_SYM()) { case ts_symbol_array: SHIFT(1); @@ -178,8 +172,8 @@ static TSParseResult ts_parse(const char *input) { SHIFT(2); case ts_aux_token1: SHIFT(3); - case ts_aux_token5: - SHIFT(48); + case ts_aux_token4: + SHIFT(42); default: PARSE_PANIC(); } @@ -200,7 +194,7 @@ static TSParseResult ts_parse(const char *input) { PARSE_PANIC(); } case 3: - SET_LEX_STATE(10); + SET_LEX_STATE(9); switch (LOOKAHEAD_SYM()) { case ts_symbol_array: SHIFT(4); @@ -213,9 +207,9 @@ static TSParseResult ts_parse(const char *input) { case ts_symbol_value: SHIFT(5); case ts_aux_token1: + SHIFT(11); + case ts_aux_token4: SHIFT(16); - case ts_aux_token5: - SHIFT(20); default: PARSE_PANIC(); } @@ -237,14 +231,14 @@ static TSParseResult ts_parse(const char *input) { case ts_aux_token2: SHIFT(8); case ts_aux_token3: - SHIFT(6); + SHIFT(41); default: PARSE_PANIC(); } case 6: - SET_LEX_STATE(4); + SET_LEX_STATE(5); switch (LOOKAHEAD_SYM()) { - case ts_aux_token4: + case ts_aux_token3: SHIFT(7); default: PARSE_PANIC(); @@ -258,57 +252,47 @@ static TSParseResult ts_parse(const char *input) { PARSE_PANIC(); } case 8: - SET_LEX_STATE(10); + SET_LEX_STATE(9); switch (LOOKAHEAD_SYM()) { case ts_symbol_array: - SHIFT(9); + SHIFT(4); case ts_symbol_number: - SHIFT(9); + SHIFT(4); case ts_symbol_object: - SHIFT(9); + SHIFT(4); case ts_symbol_string: - SHIFT(9); + SHIFT(4); case ts_symbol_value: - SHIFT(10); + SHIFT(9); case ts_aux_token1: - SHIFT(12); - case ts_aux_token5: - SHIFT(42); + SHIFT(11); + case ts_aux_token4: + SHIFT(16); default: PARSE_PANIC(); } case 9: - SET_LEX_STATE(6); - switch (LOOKAHEAD_SYM()) { - case ts_aux_token2: - REDUCE(ts_symbol_value, 1, COLLAPSE({0})); - case ts_aux_token4: - REDUCE(ts_symbol_value, 1, COLLAPSE({0})); - default: - PARSE_PANIC(); - } - case 10: - SET_LEX_STATE(6); + SET_LEX_STATE(2); switch (LOOKAHEAD_SYM()) { case ts_aux_repeat_helper1: - SHIFT(11); + SHIFT(10); case ts_aux_token2: SHIFT(8); - case ts_aux_token4: + case ts_aux_token3: REDUCE(ts_aux_repeat_helper1, 2, COLLAPSE({1, 0})); default: PARSE_PANIC(); } - case 11: - SET_LEX_STATE(4); + case 10: + SET_LEX_STATE(5); switch (LOOKAHEAD_SYM()) { - case ts_aux_token4: + case ts_aux_token3: REDUCE(ts_aux_repeat_helper1, 3, COLLAPSE({1, 0, 1})); default: PARSE_PANIC(); } - case 12: - SET_LEX_STATE(10); + case 11: + SET_LEX_STATE(9); switch (LOOKAHEAD_SYM()) { case ts_symbol_array: SHIFT(4); @@ -319,104 +303,114 @@ static TSParseResult ts_parse(const char *input) { case ts_symbol_string: SHIFT(4); case ts_symbol_value: - SHIFT(13); + SHIFT(12); case ts_aux_token1: + SHIFT(11); + case ts_aux_token4: SHIFT(16); - case ts_aux_token5: - SHIFT(20); + default: + PARSE_PANIC(); + } + case 12: + SET_LEX_STATE(2); + switch (LOOKAHEAD_SYM()) { + case ts_aux_repeat_helper1: + SHIFT(13); + case ts_aux_token2: + SHIFT(8); + case ts_aux_token3: + SHIFT(15); default: PARSE_PANIC(); } case 13: - SET_LEX_STATE(2); + SET_LEX_STATE(5); switch (LOOKAHEAD_SYM()) { - case ts_aux_repeat_helper1: - SHIFT(14); - case ts_aux_token2: - SHIFT(8); case ts_aux_token3: SHIFT(14); default: PARSE_PANIC(); } case 14: - SET_LEX_STATE(4); + SET_LEX_STATE(2); switch (LOOKAHEAD_SYM()) { - case ts_aux_token4: - SHIFT(15); + case ts_aux_token2: + REDUCE(ts_symbol_array, 4, COLLAPSE({1, 0, 1, 1})); + case ts_aux_token3: + REDUCE(ts_symbol_array, 4, COLLAPSE({1, 0, 1, 1})); default: PARSE_PANIC(); } case 15: - SET_LEX_STATE(6); + SET_LEX_STATE(2); switch (LOOKAHEAD_SYM()) { case ts_aux_token2: - REDUCE(ts_symbol_array, 4, COLLAPSE({1, 0, 1, 1})); - case ts_aux_token4: - REDUCE(ts_symbol_array, 4, COLLAPSE({1, 0, 1, 1})); + REDUCE(ts_symbol_array, 3, COLLAPSE({1, 0, 1})); + case ts_aux_token3: + REDUCE(ts_symbol_array, 3, COLLAPSE({1, 0, 1})); default: PARSE_PANIC(); } case 16: - SET_LEX_STATE(10); + SET_LEX_STATE(21); switch (LOOKAHEAD_SYM()) { - case ts_symbol_array: - SHIFT(4); - case ts_symbol_number: - SHIFT(4); - case ts_symbol_object: - SHIFT(4); case ts_symbol_string: - SHIFT(4); - case ts_symbol_value: SHIFT(17); - case ts_aux_token1: - SHIFT(16); - case ts_aux_token5: - SHIFT(20); default: PARSE_PANIC(); } case 17: - SET_LEX_STATE(2); + SET_LEX_STATE(19); switch (LOOKAHEAD_SYM()) { - case ts_aux_repeat_helper1: - SHIFT(18); - case ts_aux_token2: - SHIFT(8); - case ts_aux_token3: + case ts_aux_token5: SHIFT(18); default: PARSE_PANIC(); } case 18: - SET_LEX_STATE(4); + SET_LEX_STATE(9); switch (LOOKAHEAD_SYM()) { - case ts_aux_token4: + case ts_symbol_array: SHIFT(19); + case ts_symbol_number: + SHIFT(19); + case ts_symbol_object: + SHIFT(19); + case ts_symbol_string: + SHIFT(19); + case ts_symbol_value: + SHIFT(20); + case ts_aux_token1: + SHIFT(28); + case ts_aux_token4: + SHIFT(33); default: PARSE_PANIC(); } case 19: - SET_LEX_STATE(2); + SET_LEX_STATE(6); switch (LOOKAHEAD_SYM()) { case ts_aux_token2: - REDUCE(ts_symbol_array, 4, COLLAPSE({1, 0, 1, 1})); - case ts_aux_token3: - REDUCE(ts_symbol_array, 4, COLLAPSE({1, 0, 1, 1})); + REDUCE(ts_symbol_value, 1, COLLAPSE({0})); + case ts_aux_token6: + REDUCE(ts_symbol_value, 1, COLLAPSE({0})); default: PARSE_PANIC(); } case 20: - SET_LEX_STATE(22); + SET_LEX_STATE(6); switch (LOOKAHEAD_SYM()) { - case ts_symbol_string: + case ts_aux_repeat_helper2: SHIFT(21); + case ts_aux_token2: + SHIFT(23); + case ts_aux_token6: + SHIFT(40); default: PARSE_PANIC(); } case 21: - SET_LEX_STATE(20); + SET_LEX_STATE(8); switch (LOOKAHEAD_SYM()) { case ts_aux_token6: SHIFT(22); @@ -424,123 +418,73 @@ static TSParseResult ts_parse(const char *input) { PARSE_PANIC(); } case 22: - SET_LEX_STATE(10); + SET_LEX_STATE(2); switch (LOOKAHEAD_SYM()) { - case ts_symbol_array: - SHIFT(4); - case ts_symbol_number: - SHIFT(4); - case ts_symbol_object: - SHIFT(4); - case ts_symbol_string: - SHIFT(4); - case ts_symbol_value: - SHIFT(23); - case ts_aux_token1: - SHIFT(16); - case ts_aux_token5: - SHIFT(20); + case ts_aux_token2: + REDUCE(ts_symbol_object, 6, COLLAPSE({1, 0, 1, 0, 1, 1})); + case ts_aux_token3: + REDUCE(ts_symbol_object, 6, COLLAPSE({1, 0, 1, 0, 1, 1})); default: PARSE_PANIC(); } case 23: - SET_LEX_STATE(2); + SET_LEX_STATE(21); switch (LOOKAHEAD_SYM()) { - case ts_aux_repeat_helper2: - SHIFT(24); - case ts_aux_token2: - SHIFT(26); - case ts_aux_token3: + case ts_symbol_string: SHIFT(24); default: PARSE_PANIC(); } case 24: - SET_LEX_STATE(7); + SET_LEX_STATE(19); switch (LOOKAHEAD_SYM()) { - case ts_aux_token7: + case ts_aux_token5: SHIFT(25); default: PARSE_PANIC(); } case 25: - SET_LEX_STATE(2); + SET_LEX_STATE(9); switch (LOOKAHEAD_SYM()) { - case ts_aux_token2: - REDUCE(ts_symbol_object, 6, COLLAPSE({1, 0, 1, 0, 1, 1})); - case ts_aux_token3: - REDUCE(ts_symbol_object, 6, COLLAPSE({1, 0, 1, 0, 1, 1})); + case ts_symbol_array: + SHIFT(19); + case ts_symbol_number: + SHIFT(19); + case ts_symbol_object: + SHIFT(19); + case ts_symbol_string: + SHIFT(19); + case ts_symbol_value: + SHIFT(26); + case ts_aux_token1: + SHIFT(28); + case ts_aux_token4: + SHIFT(33); default: PARSE_PANIC(); } case 26: - SET_LEX_STATE(22); - switch (LOOKAHEAD_SYM()) { - case ts_symbol_string: - SHIFT(27); - default: - PARSE_PANIC(); - } - case 27: - SET_LEX_STATE(20); - switch (LOOKAHEAD_SYM()) { - case ts_aux_token6: - SHIFT(28); - default: - PARSE_PANIC(); - } - case 28: - SET_LEX_STATE(10); - switch (LOOKAHEAD_SYM()) { - case ts_symbol_array: - SHIFT(29); - case ts_symbol_number: - SHIFT(29); - case ts_symbol_object: - SHIFT(29); - case ts_symbol_string: - SHIFT(29); - case ts_symbol_value: - SHIFT(30); - case ts_aux_token1: - SHIFT(32); - case ts_aux_token5: - SHIFT(36); - default: - PARSE_PANIC(); - } - case 29: - SET_LEX_STATE(9); - switch (LOOKAHEAD_SYM()) { - case ts_aux_token2: - REDUCE(ts_symbol_value, 1, COLLAPSE({0})); - case ts_aux_token7: - REDUCE(ts_symbol_value, 1, COLLAPSE({0})); - default: - PARSE_PANIC(); - } - case 30: - SET_LEX_STATE(9); + SET_LEX_STATE(6); switch (LOOKAHEAD_SYM()) { case ts_aux_repeat_helper2: - SHIFT(31); + SHIFT(27); case ts_aux_token2: - SHIFT(26); - case ts_aux_token7: + SHIFT(23); + case ts_aux_token6: REDUCE(ts_aux_repeat_helper2, 4, COLLAPSE({1, 0, 1, 0})); default: PARSE_PANIC(); } - case 31: - SET_LEX_STATE(7); + case 27: + SET_LEX_STATE(8); switch (LOOKAHEAD_SYM()) { - case ts_aux_token7: + case ts_aux_token6: REDUCE(ts_aux_repeat_helper2, 5, COLLAPSE({1, 0, 1, 0, 1})); default: PARSE_PANIC(); } - case 32: - SET_LEX_STATE(10); + case 28: + SET_LEX_STATE(9); switch (LOOKAHEAD_SYM()) { case ts_symbol_array: SHIFT(4); @@ -551,30 +495,66 @@ static TSParseResult ts_parse(const char *input) { case ts_symbol_string: SHIFT(4); case ts_symbol_value: - SHIFT(33); + SHIFT(29); case ts_aux_token1: + SHIFT(11); + case ts_aux_token4: SHIFT(16); - case ts_aux_token5: - SHIFT(20); + default: + PARSE_PANIC(); + } + case 29: + SET_LEX_STATE(2); + switch (LOOKAHEAD_SYM()) { + case ts_aux_repeat_helper1: + SHIFT(30); + case ts_aux_token2: + SHIFT(8); + case ts_aux_token3: + SHIFT(32); + default: + PARSE_PANIC(); + } + case 30: + SET_LEX_STATE(5); + switch (LOOKAHEAD_SYM()) { + case ts_aux_token3: + SHIFT(31); + default: + PARSE_PANIC(); + } + case 31: + SET_LEX_STATE(6); + switch (LOOKAHEAD_SYM()) { + case ts_aux_token2: + REDUCE(ts_symbol_array, 4, COLLAPSE({1, 0, 1, 1})); + case ts_aux_token6: + REDUCE(ts_symbol_array, 4, COLLAPSE({1, 0, 1, 1})); + default: + PARSE_PANIC(); + } + case 32: + SET_LEX_STATE(6); + switch (LOOKAHEAD_SYM()) { + case ts_aux_token2: + REDUCE(ts_symbol_array, 3, COLLAPSE({1, 0, 1})); + case ts_aux_token6: + REDUCE(ts_symbol_array, 3, COLLAPSE({1, 0, 1})); default: PARSE_PANIC(); } case 33: - SET_LEX_STATE(2); + SET_LEX_STATE(21); switch (LOOKAHEAD_SYM()) { - case ts_aux_repeat_helper1: - SHIFT(34); - case ts_aux_token2: - SHIFT(8); - case ts_aux_token3: + case ts_symbol_string: SHIFT(34); default: PARSE_PANIC(); } case 34: - SET_LEX_STATE(4); + SET_LEX_STATE(19); switch (LOOKAHEAD_SYM()) { - case ts_aux_token4: + case ts_aux_token5: SHIFT(35); default: PARSE_PANIC(); @@ -582,23 +562,37 @@ static TSParseResult ts_parse(const char *input) { case 35: SET_LEX_STATE(9); switch (LOOKAHEAD_SYM()) { - case ts_aux_token2: - REDUCE(ts_symbol_array, 4, COLLAPSE({1, 0, 1, 1})); - case ts_aux_token7: - REDUCE(ts_symbol_array, 4, COLLAPSE({1, 0, 1, 1})); + case ts_symbol_array: + SHIFT(19); + case ts_symbol_number: + SHIFT(19); + case ts_symbol_object: + SHIFT(19); + case ts_symbol_string: + SHIFT(19); + case ts_symbol_value: + SHIFT(36); + case ts_aux_token1: + SHIFT(28); + case ts_aux_token4: + SHIFT(33); default: PARSE_PANIC(); } case 36: - SET_LEX_STATE(22); + SET_LEX_STATE(6); switch (LOOKAHEAD_SYM()) { - case ts_symbol_string: + case ts_aux_repeat_helper2: SHIFT(37); + case ts_aux_token2: + SHIFT(23); + case ts_aux_token6: + SHIFT(39); default: PARSE_PANIC(); } case 37: - SET_LEX_STATE(20); + SET_LEX_STATE(8); switch (LOOKAHEAD_SYM()) { case ts_aux_token6: SHIFT(38); @@ -606,57 +600,45 @@ static TSParseResult ts_parse(const char *input) { PARSE_PANIC(); } case 38: - SET_LEX_STATE(10); + SET_LEX_STATE(6); switch (LOOKAHEAD_SYM()) { - case ts_symbol_array: - SHIFT(4); - case ts_symbol_number: - SHIFT(4); - case ts_symbol_object: - SHIFT(4); - case ts_symbol_string: - SHIFT(4); - case ts_symbol_value: - SHIFT(39); - case ts_aux_token1: - SHIFT(16); - case ts_aux_token5: - SHIFT(20); + case ts_aux_token2: + REDUCE(ts_symbol_object, 6, COLLAPSE({1, 0, 1, 0, 1, 1})); + case ts_aux_token6: + REDUCE(ts_symbol_object, 6, COLLAPSE({1, 0, 1, 0, 1, 1})); default: PARSE_PANIC(); } case 39: - SET_LEX_STATE(2); + SET_LEX_STATE(6); switch (LOOKAHEAD_SYM()) { - case ts_aux_repeat_helper2: - SHIFT(40); case ts_aux_token2: - SHIFT(26); - case ts_aux_token3: - SHIFT(40); + REDUCE(ts_symbol_object, 5, COLLAPSE({1, 0, 1, 0, 1})); + case ts_aux_token6: + REDUCE(ts_symbol_object, 5, COLLAPSE({1, 0, 1, 0, 1})); default: PARSE_PANIC(); } case 40: - SET_LEX_STATE(7); + SET_LEX_STATE(2); switch (LOOKAHEAD_SYM()) { - case ts_aux_token7: - SHIFT(41); + case ts_aux_token2: + REDUCE(ts_symbol_object, 5, COLLAPSE({1, 0, 1, 0, 1})); + case ts_aux_token3: + REDUCE(ts_symbol_object, 5, COLLAPSE({1, 0, 1, 0, 1})); default: PARSE_PANIC(); } case 41: - SET_LEX_STATE(9); + SET_LEX_STATE(0); switch (LOOKAHEAD_SYM()) { - case ts_aux_token2: - REDUCE(ts_symbol_object, 6, COLLAPSE({1, 0, 1, 0, 1, 1})); - case ts_aux_token7: - REDUCE(ts_symbol_object, 6, COLLAPSE({1, 0, 1, 0, 1, 1})); + case ts_aux_end: + REDUCE(ts_symbol_array, 3, COLLAPSE({1, 0, 1})); default: PARSE_PANIC(); } case 42: - SET_LEX_STATE(22); + SET_LEX_STATE(21); switch (LOOKAHEAD_SYM()) { case ts_symbol_string: SHIFT(43); @@ -664,124 +646,66 @@ static TSParseResult ts_parse(const char *input) { PARSE_PANIC(); } case 43: - SET_LEX_STATE(20); + SET_LEX_STATE(19); switch (LOOKAHEAD_SYM()) { - case ts_aux_token6: + case ts_aux_token5: SHIFT(44); default: PARSE_PANIC(); } case 44: - SET_LEX_STATE(10); + SET_LEX_STATE(9); switch (LOOKAHEAD_SYM()) { case ts_symbol_array: - SHIFT(4); + SHIFT(19); case ts_symbol_number: - SHIFT(4); + SHIFT(19); case ts_symbol_object: - SHIFT(4); + SHIFT(19); case ts_symbol_string: - SHIFT(4); + SHIFT(19); case ts_symbol_value: SHIFT(45); case ts_aux_token1: - SHIFT(16); - case ts_aux_token5: - SHIFT(20); + SHIFT(28); + case ts_aux_token4: + SHIFT(33); default: PARSE_PANIC(); } case 45: - SET_LEX_STATE(2); + SET_LEX_STATE(6); switch (LOOKAHEAD_SYM()) { case ts_aux_repeat_helper2: SHIFT(46); case ts_aux_token2: - SHIFT(26); - case ts_aux_token3: - SHIFT(46); + SHIFT(23); + case ts_aux_token6: + SHIFT(48); default: PARSE_PANIC(); } case 46: - SET_LEX_STATE(7); + SET_LEX_STATE(8); switch (LOOKAHEAD_SYM()) { - case ts_aux_token7: + case ts_aux_token6: SHIFT(47); default: PARSE_PANIC(); } case 47: - SET_LEX_STATE(6); + SET_LEX_STATE(0); switch (LOOKAHEAD_SYM()) { - case ts_aux_token2: - REDUCE(ts_symbol_object, 6, COLLAPSE({1, 0, 1, 0, 1, 1})); - case ts_aux_token4: + case ts_aux_end: REDUCE(ts_symbol_object, 6, COLLAPSE({1, 0, 1, 0, 1, 1})); default: PARSE_PANIC(); } case 48: - SET_LEX_STATE(22); - switch (LOOKAHEAD_SYM()) { - case ts_symbol_string: - SHIFT(49); - default: - PARSE_PANIC(); - } - case 49: - SET_LEX_STATE(20); - switch (LOOKAHEAD_SYM()) { - case ts_aux_token6: - SHIFT(50); - default: - PARSE_PANIC(); - } - case 50: - SET_LEX_STATE(10); - switch (LOOKAHEAD_SYM()) { - case ts_symbol_array: - SHIFT(4); - case ts_symbol_number: - SHIFT(4); - case ts_symbol_object: - SHIFT(4); - case ts_symbol_string: - SHIFT(4); - case ts_symbol_value: - SHIFT(51); - case ts_aux_token1: - SHIFT(16); - case ts_aux_token5: - SHIFT(20); - default: - PARSE_PANIC(); - } - case 51: - SET_LEX_STATE(2); - switch (LOOKAHEAD_SYM()) { - case ts_aux_repeat_helper2: - SHIFT(52); - case ts_aux_token2: - SHIFT(26); - case ts_aux_token3: - SHIFT(52); - default: - PARSE_PANIC(); - } - case 52: - SET_LEX_STATE(7); - switch (LOOKAHEAD_SYM()) { - case ts_aux_token7: - SHIFT(53); - default: - PARSE_PANIC(); - } - case 53: SET_LEX_STATE(0); switch (LOOKAHEAD_SYM()) { case ts_aux_end: - REDUCE(ts_symbol_object, 6, COLLAPSE({1, 0, 1, 0, 1, 1})); + REDUCE(ts_symbol_object, 5, COLLAPSE({1, 0, 1, 0, 1})); default: PARSE_PANIC(); } diff --git a/src/compiler/prepare_grammar/extract_tokens.cpp b/src/compiler/prepare_grammar/extract_tokens.cpp index 6d7b62b2..2e9f8f43 100644 --- a/src/compiler/prepare_grammar/extract_tokens.cpp +++ b/src/compiler/prepare_grammar/extract_tokens.cpp @@ -24,7 +24,7 @@ namespace tree_sitter { } rule_ptr apply(const rule_ptr rule) { - if (search_for_symbols(rule)) { + if (search_for_symbols(rule) || rule->operator==(Blank())) { rule->accept(*this); return value; } else {