diff --git a/spec/compiler/build_tables/perform_spec.cpp b/spec/compiler/build_tables/perform_spec.cpp index 552a11c8..44f9ba80 100644 --- a/spec/compiler/build_tables/perform_spec.cpp +++ b/spec/compiler/build_tables/perform_spec.cpp @@ -61,7 +61,6 @@ describe("building parse and lex tables", []() { { "term", parse_actions({ ParseAction::Shift(2) }) }, { "number", parse_actions({ ParseAction::Shift(5) }) }, { "variable", parse_actions({ ParseAction::Shift(5) }) }, - { "left-paren", parse_actions({ ParseAction::Shift(6) }) }, { "variable", parse_actions({ ParseAction::Shift(9) }) }, { "number", parse_actions({ ParseAction::Shift(10) }) }, @@ -89,6 +88,7 @@ describe("building parse and lex tables", []() { it("has the right next states", [&]() { AssertThat(parse_state(2).actions, Equals(unordered_map({ { "plus", parse_actions({ ParseAction::Shift(3) }) }, + { "__END__", parse_actions({ ParseAction::Reduce("expression", 1) }) }, }))); }); }); diff --git a/spec/compiler/prepare_grammar_spec.cpp b/spec/compiler/prepare_grammar_spec.cpp index 9b6bcbfc..cef0ecbc 100644 --- a/spec/compiler/prepare_grammar_spec.cpp +++ b/spec/compiler/prepare_grammar_spec.cpp @@ -32,7 +32,8 @@ describe("preparing a grammar", []() { AssertThat(result.second, Equals(Grammar("", { { "1", rules::seq({ rules::character('a'), - rules::character('b') }) } + rules::character('b') }) }, + { "__END__", character('\0') }, }))); }); @@ -51,7 +52,8 @@ describe("preparing a grammar", []() { AssertThat(result.second, Equals(Grammar("", { { "rule2", seq({ character('a'), - character('b') }) } + character('b') }) }, + { "__END__", character('\0') }, }))); }); }); diff --git a/spec/fixtures/parsers/arithmetic.c b/spec/fixtures/parsers/arithmetic.c index 795d127b..cfe0124c 100644 --- a/spec/fixtures/parsers/arithmetic.c +++ b/spec/fixtures/parsers/arithmetic.c @@ -10,8 +10,8 @@ enum ts_symbol { ts_symbol_1, ts_symbol_2, ts_symbol_number, + ts_symbol___END__, ts_symbol_variable, - ts_symbol___END__ }; static const char *ts_symbol_names[] = { @@ -23,85 +23,95 @@ static const char *ts_symbol_names[] = { "1", "2", "number", + "__END__", "variable", - "__END__" }; static void ts_lex(TSParser *parser) { START_LEXER(); switch (LEX_STATE()) { case 0: - LEX_ERROR(0, EXPECT({})); + if (LOOKAHEAD_CHAR() == '\0') + ADVANCE(1); + LEX_ERROR(1, EXPECT({"''"})); case 1: - if (LOOKAHEAD_CHAR() == '*') - ADVANCE(2); - LEX_ERROR(1, EXPECT({"'*'"})); + ACCEPT_TOKEN(ts_symbol___END__); case 2: - ACCEPT_TOKEN(ts_symbol_3); + if (LOOKAHEAD_CHAR() == '\0') + ADVANCE(1); + if (LOOKAHEAD_CHAR() == '*') + ADVANCE(3); + LEX_ERROR(2, EXPECT({"'*'", "''"})); case 3: - if (LOOKAHEAD_CHAR() == ')') - ADVANCE(4); - LEX_ERROR(1, EXPECT({"')'"})); + ACCEPT_TOKEN(ts_symbol_3); case 4: - ACCEPT_TOKEN(ts_symbol_2); + if (LOOKAHEAD_CHAR() == ')') + ADVANCE(5); + LEX_ERROR(1, EXPECT({"')'"})); case 5: - if (LOOKAHEAD_CHAR() == '*') - ADVANCE(2); - if (LOOKAHEAD_CHAR() == ')') - ADVANCE(4); - LEX_ERROR(2, EXPECT({"')'", "'*'"})); + ACCEPT_TOKEN(ts_symbol_2); case 6: - if (LOOKAHEAD_CHAR() == '+') - ADVANCE(7); if (LOOKAHEAD_CHAR() == '*') - ADVANCE(2); + ADVANCE(3); if (LOOKAHEAD_CHAR() == ')') - ADVANCE(4); - LEX_ERROR(3, EXPECT({"')'", "'*'", "'+'"})); + ADVANCE(5); + LEX_ERROR(2, EXPECT({"')'", "'*'"})); case 7: - ACCEPT_TOKEN(ts_symbol_4); - case 8: if (LOOKAHEAD_CHAR() == '+') - ADVANCE(7); + ADVANCE(8); + if (LOOKAHEAD_CHAR() == '*') + ADVANCE(3); if (LOOKAHEAD_CHAR() == ')') - ADVANCE(4); - LEX_ERROR(2, EXPECT({"')'", "'+'"})); + ADVANCE(5); + LEX_ERROR(3, EXPECT({"')'", "'*'", "'+'"})); + case 8: + ACCEPT_TOKEN(ts_symbol_4); case 9: - if (isalnum(LOOKAHEAD_CHAR())) - ADVANCE(12); - if (LOOKAHEAD_CHAR() == '(') - ADVANCE(11); - if (isdigit(LOOKAHEAD_CHAR())) - ADVANCE(10); - LEX_ERROR(3, EXPECT({"", "'('", ""})); + if (LOOKAHEAD_CHAR() == '+') + ADVANCE(8); + if (LOOKAHEAD_CHAR() == ')') + ADVANCE(5); + LEX_ERROR(2, EXPECT({"')'", "'+'"})); case 10: - if (isdigit(LOOKAHEAD_CHAR())) - ADVANCE(10); - ACCEPT_TOKEN(ts_symbol_number); - case 11: - ACCEPT_TOKEN(ts_symbol_1); - case 12: if (isalnum(LOOKAHEAD_CHAR())) + ADVANCE(13); + if (LOOKAHEAD_CHAR() == '(') ADVANCE(12); - ACCEPT_TOKEN(ts_symbol_variable); + if (isdigit(LOOKAHEAD_CHAR())) + ADVANCE(11); + LEX_ERROR(3, EXPECT({"", "'('", ""})); + case 11: + if (isdigit(LOOKAHEAD_CHAR())) + ADVANCE(11); + ACCEPT_TOKEN(ts_symbol_number); + case 12: + ACCEPT_TOKEN(ts_symbol_1); case 13: if (isalnum(LOOKAHEAD_CHAR())) - ADVANCE(12); - if (LOOKAHEAD_CHAR() == '(') - ADVANCE(11); - if (isdigit(LOOKAHEAD_CHAR())) - ADVANCE(10); - LEX_ERROR(3, EXPECT({"", "'('", ""})); + ADVANCE(13); + ACCEPT_TOKEN(ts_symbol_variable); case 14: - if (LOOKAHEAD_CHAR() == '+') - ADVANCE(7); - LEX_ERROR(1, EXPECT({"'+'"})); + if (isalnum(LOOKAHEAD_CHAR())) + ADVANCE(13); + if (LOOKAHEAD_CHAR() == '(') + ADVANCE(12); + if (isdigit(LOOKAHEAD_CHAR())) + ADVANCE(11); + LEX_ERROR(3, EXPECT({"", "'('", ""})); case 15: + if (LOOKAHEAD_CHAR() == '\0') + ADVANCE(1); if (LOOKAHEAD_CHAR() == '+') - ADVANCE(7); + ADVANCE(8); + LEX_ERROR(2, EXPECT({"'+'", "''"})); + case 16: + if (LOOKAHEAD_CHAR() == '\0') + ADVANCE(1); + if (LOOKAHEAD_CHAR() == '+') + ADVANCE(8); if (LOOKAHEAD_CHAR() == '*') - ADVANCE(2); - LEX_ERROR(2, EXPECT({"'*'", "'+'"})); + ADVANCE(3); + LEX_ERROR(3, EXPECT({"'*'", "'+'", "''"})); default: LEX_PANIC(); } @@ -112,7 +122,7 @@ static TSParseResult ts_parse(const char *input) { START_PARSER(); switch (PARSE_STATE()) { case 0: - SET_LEX_STATE(9); + SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { case ts_symbol_factor: SHIFT(45); @@ -138,7 +148,7 @@ static TSParseResult ts_parse(const char *input) { PARSE_ERROR(1, EXPECT({"__END__"})); } case 2: - SET_LEX_STATE(14); + SET_LEX_STATE(15); switch (LOOKAHEAD_SYM()) { case ts_symbol___END__: REDUCE(ts_symbol_expression, 1); @@ -148,7 +158,7 @@ static TSParseResult ts_parse(const char *input) { PARSE_ERROR(2, EXPECT({"4", "__END__"})); } case 3: - SET_LEX_STATE(9); + SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { case ts_symbol_factor: SHIFT(34); @@ -172,7 +182,7 @@ static TSParseResult ts_parse(const char *input) { PARSE_ERROR(1, EXPECT({"__END__"})); } case 5: - SET_LEX_STATE(1); + SET_LEX_STATE(2); switch (LOOKAHEAD_SYM()) { case ts_symbol___END__: REDUCE(ts_symbol_factor, 1); @@ -182,7 +192,7 @@ static TSParseResult ts_parse(const char *input) { PARSE_ERROR(2, EXPECT({"3", "__END__"})); } case 6: - SET_LEX_STATE(9); + SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { case ts_symbol_factor: SHIFT(16); @@ -200,7 +210,7 @@ static TSParseResult ts_parse(const char *input) { PARSE_ERROR(6, EXPECT({"number", "1", "term", "variable", "expression", "factor"})); } case 7: - SET_LEX_STATE(8); + SET_LEX_STATE(9); switch (LOOKAHEAD_SYM()) { case ts_symbol_2: REDUCE(ts_symbol_expression, 1); @@ -210,7 +220,7 @@ static TSParseResult ts_parse(const char *input) { PARSE_ERROR(2, EXPECT({"4", "2"})); } case 8: - SET_LEX_STATE(9); + SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { case ts_symbol_factor: SHIFT(25); @@ -226,7 +236,7 @@ static TSParseResult ts_parse(const char *input) { PARSE_ERROR(5, EXPECT({"term", "number", "1", "variable", "factor"})); } case 9: - SET_LEX_STATE(3); + SET_LEX_STATE(4); switch (LOOKAHEAD_SYM()) { case ts_symbol_2: REDUCE(ts_symbol_expression, 3); @@ -234,7 +244,7 @@ static TSParseResult ts_parse(const char *input) { PARSE_ERROR(1, EXPECT({"2"})); } case 10: - SET_LEX_STATE(5); + SET_LEX_STATE(6); switch (LOOKAHEAD_SYM()) { case ts_symbol_3: REDUCE(ts_symbol_factor, 1); @@ -244,7 +254,7 @@ static TSParseResult ts_parse(const char *input) { PARSE_ERROR(2, EXPECT({"2", "3"})); } case 11: - SET_LEX_STATE(9); + SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { case ts_symbol_factor: SHIFT(16); @@ -262,7 +272,7 @@ static TSParseResult ts_parse(const char *input) { PARSE_ERROR(6, EXPECT({"number", "1", "term", "variable", "expression", "factor"})); } case 12: - SET_LEX_STATE(6); + SET_LEX_STATE(7); switch (LOOKAHEAD_SYM()) { case ts_symbol_4: REDUCE(ts_symbol_factor, 1); @@ -274,7 +284,7 @@ static TSParseResult ts_parse(const char *input) { PARSE_ERROR(3, EXPECT({"2", "3", "4"})); } case 13: - SET_LEX_STATE(9); + SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { case ts_symbol_factor: SHIFT(16); @@ -292,7 +302,7 @@ static TSParseResult ts_parse(const char *input) { PARSE_ERROR(6, EXPECT({"number", "1", "term", "variable", "expression", "factor"})); } case 14: - SET_LEX_STATE(3); + SET_LEX_STATE(4); switch (LOOKAHEAD_SYM()) { case ts_symbol_2: SHIFT(15); @@ -300,7 +310,7 @@ static TSParseResult ts_parse(const char *input) { PARSE_ERROR(1, EXPECT({"2"})); } case 15: - SET_LEX_STATE(6); + SET_LEX_STATE(7); switch (LOOKAHEAD_SYM()) { case ts_symbol_4: REDUCE(ts_symbol_factor, 3); @@ -312,7 +322,7 @@ static TSParseResult ts_parse(const char *input) { PARSE_ERROR(3, EXPECT({"2", "3", "4"})); } case 16: - SET_LEX_STATE(6); + SET_LEX_STATE(7); switch (LOOKAHEAD_SYM()) { case ts_symbol_4: REDUCE(ts_symbol_term, 1); @@ -324,7 +334,7 @@ static TSParseResult ts_parse(const char *input) { PARSE_ERROR(3, EXPECT({"3", "2", "4"})); } case 17: - SET_LEX_STATE(13); + SET_LEX_STATE(14); switch (LOOKAHEAD_SYM()) { case ts_symbol_factor: SHIFT(22); @@ -338,7 +348,7 @@ static TSParseResult ts_parse(const char *input) { PARSE_ERROR(4, EXPECT({"variable", "number", "1", "factor"})); } case 18: - SET_LEX_STATE(8); + SET_LEX_STATE(9); switch (LOOKAHEAD_SYM()) { case ts_symbol_4: REDUCE(ts_symbol_factor, 1); @@ -348,7 +358,7 @@ static TSParseResult ts_parse(const char *input) { PARSE_ERROR(2, EXPECT({"2", "4"})); } case 19: - SET_LEX_STATE(9); + SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { case ts_symbol_factor: SHIFT(16); @@ -366,7 +376,7 @@ static TSParseResult ts_parse(const char *input) { PARSE_ERROR(6, EXPECT({"number", "1", "term", "variable", "expression", "factor"})); } case 20: - SET_LEX_STATE(3); + SET_LEX_STATE(4); switch (LOOKAHEAD_SYM()) { case ts_symbol_2: SHIFT(21); @@ -374,7 +384,7 @@ static TSParseResult ts_parse(const char *input) { PARSE_ERROR(1, EXPECT({"2"})); } case 21: - SET_LEX_STATE(8); + SET_LEX_STATE(9); switch (LOOKAHEAD_SYM()) { case ts_symbol_4: REDUCE(ts_symbol_factor, 3); @@ -384,7 +394,7 @@ static TSParseResult ts_parse(const char *input) { PARSE_ERROR(2, EXPECT({"2", "4"})); } case 22: - SET_LEX_STATE(8); + SET_LEX_STATE(9); switch (LOOKAHEAD_SYM()) { case ts_symbol_4: REDUCE(ts_symbol_term, 3); @@ -394,7 +404,7 @@ static TSParseResult ts_parse(const char *input) { PARSE_ERROR(2, EXPECT({"2", "4"})); } case 23: - SET_LEX_STATE(3); + SET_LEX_STATE(4); switch (LOOKAHEAD_SYM()) { case ts_symbol_2: SHIFT(24); @@ -402,7 +412,7 @@ static TSParseResult ts_parse(const char *input) { PARSE_ERROR(1, EXPECT({"2"})); } case 24: - SET_LEX_STATE(5); + SET_LEX_STATE(6); switch (LOOKAHEAD_SYM()) { case ts_symbol_3: REDUCE(ts_symbol_factor, 3); @@ -412,7 +422,7 @@ static TSParseResult ts_parse(const char *input) { PARSE_ERROR(2, EXPECT({"2", "3"})); } case 25: - SET_LEX_STATE(5); + SET_LEX_STATE(6); switch (LOOKAHEAD_SYM()) { case ts_symbol_2: REDUCE(ts_symbol_term, 1); @@ -422,7 +432,7 @@ static TSParseResult ts_parse(const char *input) { PARSE_ERROR(2, EXPECT({"3", "2"})); } case 26: - SET_LEX_STATE(13); + SET_LEX_STATE(14); switch (LOOKAHEAD_SYM()) { case ts_symbol_factor: SHIFT(31); @@ -436,7 +446,7 @@ static TSParseResult ts_parse(const char *input) { PARSE_ERROR(4, EXPECT({"variable", "number", "1", "factor"})); } case 27: - SET_LEX_STATE(3); + SET_LEX_STATE(4); switch (LOOKAHEAD_SYM()) { case ts_symbol_2: REDUCE(ts_symbol_factor, 1); @@ -444,7 +454,7 @@ static TSParseResult ts_parse(const char *input) { PARSE_ERROR(1, EXPECT({"2"})); } case 28: - SET_LEX_STATE(9); + SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { case ts_symbol_factor: SHIFT(16); @@ -462,7 +472,7 @@ static TSParseResult ts_parse(const char *input) { PARSE_ERROR(6, EXPECT({"number", "1", "term", "variable", "expression", "factor"})); } case 29: - SET_LEX_STATE(3); + SET_LEX_STATE(4); switch (LOOKAHEAD_SYM()) { case ts_symbol_2: SHIFT(30); @@ -470,7 +480,7 @@ static TSParseResult ts_parse(const char *input) { PARSE_ERROR(1, EXPECT({"2"})); } case 30: - SET_LEX_STATE(3); + SET_LEX_STATE(4); switch (LOOKAHEAD_SYM()) { case ts_symbol_2: REDUCE(ts_symbol_factor, 3); @@ -478,7 +488,7 @@ static TSParseResult ts_parse(const char *input) { PARSE_ERROR(1, EXPECT({"2"})); } case 31: - SET_LEX_STATE(3); + SET_LEX_STATE(4); switch (LOOKAHEAD_SYM()) { case ts_symbol_2: REDUCE(ts_symbol_term, 3); @@ -486,7 +496,7 @@ static TSParseResult ts_parse(const char *input) { PARSE_ERROR(1, EXPECT({"2"})); } case 32: - SET_LEX_STATE(3); + SET_LEX_STATE(4); switch (LOOKAHEAD_SYM()) { case ts_symbol_2: SHIFT(33); @@ -494,7 +504,7 @@ static TSParseResult ts_parse(const char *input) { PARSE_ERROR(1, EXPECT({"2"})); } case 33: - SET_LEX_STATE(1); + SET_LEX_STATE(2); switch (LOOKAHEAD_SYM()) { case ts_symbol___END__: REDUCE(ts_symbol_factor, 3); @@ -504,7 +514,7 @@ static TSParseResult ts_parse(const char *input) { PARSE_ERROR(2, EXPECT({"3", "__END__"})); } case 34: - SET_LEX_STATE(1); + SET_LEX_STATE(2); switch (LOOKAHEAD_SYM()) { case ts_symbol___END__: REDUCE(ts_symbol_term, 1); @@ -514,7 +524,7 @@ static TSParseResult ts_parse(const char *input) { PARSE_ERROR(2, EXPECT({"3", "__END__"})); } case 35: - SET_LEX_STATE(13); + SET_LEX_STATE(14); switch (LOOKAHEAD_SYM()) { case ts_symbol_factor: SHIFT(40); @@ -536,7 +546,7 @@ static TSParseResult ts_parse(const char *input) { PARSE_ERROR(1, EXPECT({"__END__"})); } case 37: - SET_LEX_STATE(9); + SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { case ts_symbol_factor: SHIFT(16); @@ -554,7 +564,7 @@ static TSParseResult ts_parse(const char *input) { PARSE_ERROR(6, EXPECT({"number", "1", "term", "variable", "expression", "factor"})); } case 38: - SET_LEX_STATE(3); + SET_LEX_STATE(4); switch (LOOKAHEAD_SYM()) { case ts_symbol_2: SHIFT(39); @@ -578,7 +588,7 @@ static TSParseResult ts_parse(const char *input) { PARSE_ERROR(1, EXPECT({"__END__"})); } case 41: - SET_LEX_STATE(15); + SET_LEX_STATE(16); switch (LOOKAHEAD_SYM()) { case ts_symbol___END__: REDUCE(ts_symbol_factor, 1); @@ -590,7 +600,7 @@ static TSParseResult ts_parse(const char *input) { PARSE_ERROR(3, EXPECT({"3", "4", "__END__"})); } case 42: - SET_LEX_STATE(9); + SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { case ts_symbol_factor: SHIFT(16); @@ -608,7 +618,7 @@ static TSParseResult ts_parse(const char *input) { PARSE_ERROR(6, EXPECT({"number", "1", "term", "variable", "expression", "factor"})); } case 43: - SET_LEX_STATE(3); + SET_LEX_STATE(4); switch (LOOKAHEAD_SYM()) { case ts_symbol_2: SHIFT(44); @@ -616,7 +626,7 @@ static TSParseResult ts_parse(const char *input) { PARSE_ERROR(1, EXPECT({"2"})); } case 44: - SET_LEX_STATE(15); + SET_LEX_STATE(16); switch (LOOKAHEAD_SYM()) { case ts_symbol___END__: REDUCE(ts_symbol_factor, 3); @@ -628,7 +638,7 @@ static TSParseResult ts_parse(const char *input) { PARSE_ERROR(3, EXPECT({"3", "4", "__END__"})); } case 45: - SET_LEX_STATE(15); + SET_LEX_STATE(16); switch (LOOKAHEAD_SYM()) { case ts_symbol___END__: REDUCE(ts_symbol_term, 1); @@ -640,7 +650,7 @@ static TSParseResult ts_parse(const char *input) { PARSE_ERROR(3, EXPECT({"3", "4", "__END__"})); } case 46: - SET_LEX_STATE(13); + SET_LEX_STATE(14); switch (LOOKAHEAD_SYM()) { case ts_symbol_factor: SHIFT(51); @@ -654,7 +664,7 @@ static TSParseResult ts_parse(const char *input) { PARSE_ERROR(4, EXPECT({"variable", "number", "1", "factor"})); } case 47: - SET_LEX_STATE(14); + SET_LEX_STATE(15); switch (LOOKAHEAD_SYM()) { case ts_symbol___END__: REDUCE(ts_symbol_factor, 1); @@ -664,7 +674,7 @@ static TSParseResult ts_parse(const char *input) { PARSE_ERROR(2, EXPECT({"4", "__END__"})); } case 48: - SET_LEX_STATE(9); + SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { case ts_symbol_factor: SHIFT(16); @@ -682,7 +692,7 @@ static TSParseResult ts_parse(const char *input) { PARSE_ERROR(6, EXPECT({"number", "1", "term", "variable", "expression", "factor"})); } case 49: - SET_LEX_STATE(3); + SET_LEX_STATE(4); switch (LOOKAHEAD_SYM()) { case ts_symbol_2: SHIFT(50); @@ -690,7 +700,7 @@ static TSParseResult ts_parse(const char *input) { PARSE_ERROR(1, EXPECT({"2"})); } case 50: - SET_LEX_STATE(14); + SET_LEX_STATE(15); switch (LOOKAHEAD_SYM()) { case ts_symbol___END__: REDUCE(ts_symbol_factor, 3); @@ -700,7 +710,7 @@ static TSParseResult ts_parse(const char *input) { PARSE_ERROR(2, EXPECT({"4", "__END__"})); } case 51: - SET_LEX_STATE(14); + SET_LEX_STATE(15); switch (LOOKAHEAD_SYM()) { case ts_symbol___END__: REDUCE(ts_symbol_term, 3); diff --git a/src/compiler/generate_code/c_code.cpp b/src/compiler/generate_code/c_code.cpp index 4cc4e824..7363fe92 100644 --- a/src/compiler/generate_code/c_code.cpp +++ b/src/compiler/generate_code/c_code.cpp @@ -87,6 +87,15 @@ namespace tree_sitter { return "ts_symbol_" + symbol_name; } + string character_code(char character) { + switch (character) { + case '\0': + return "\\0"; + default: + return string() + character; + } + } + string condition_for_char_match(const CharMatch &char_match) { auto value = "LOOKAHEAD_CHAR()"; switch (char_match.type) { @@ -98,7 +107,7 @@ namespace tree_sitter { return string("isalnum(") + value + ")"; } case CharMatchTypeSpecific: - return string(value) + " == '" + char_match.value.character + "'"; + return string(value) + " == '" + character_code(char_match.value.character) + "'"; default: return ""; } @@ -201,16 +210,14 @@ namespace tree_sitter { string result = "enum ts_symbol {\n"; for (string rule_name : rule_names) result += indent(symbol_id(rule_name)) + ",\n"; - result += indent(symbol_id(ParseTable::END_OF_INPUT)); - return result + "\n};"; + return result + "};"; } string rule_names_list() { string result = "static const char *ts_symbol_names[] = {\n"; for (string rule_name : rule_names) result += indent(string("\"") + rule_name) + "\",\n"; - result += indent(string("\"") + ParseTable::END_OF_INPUT + "\""); - return result + "\n};"; + return result + "};"; } string includes() { diff --git a/src/compiler/prepare_grammar/extract_tokens.cpp b/src/compiler/prepare_grammar/extract_tokens.cpp index 47378388..5b42cd65 100644 --- a/src/compiler/prepare_grammar/extract_tokens.cpp +++ b/src/compiler/prepare_grammar/extract_tokens.cpp @@ -71,6 +71,8 @@ namespace tree_sitter { rules.insert({ name, new_rule }); } + extractor.tokens.insert({ "__END__", rules::character('\0') }); + return { Grammar(input_grammar.start_rule_name, rules), Grammar("", extractor.tokens)