diff --git a/include/tree_sitter/compiler.h b/include/tree_sitter/compiler.h index e3878f33..b362e535 100644 --- a/include/tree_sitter/compiler.h +++ b/include/tree_sitter/compiler.h @@ -13,6 +13,7 @@ typedef enum { TSCompileErrorTypeInvalidUbiquitousToken, TSCompileErrorTypeLexConflict, TSCompileErrorTypeParseConflict, + TSCompileErrorTypeEpsilonRule, } TSCompileErrorType; typedef struct { diff --git a/spec/integration/compile_grammar_spec.cc b/spec/integration/compile_grammar_spec.cc index 6dbf1e02..36f5647e 100644 --- a/spec/integration/compile_grammar_spec.cc +++ b/spec/integration/compile_grammar_spec.cc @@ -326,6 +326,33 @@ describe("compile_grammar", []() { }); }); + describe("when the grammar contains rules that match the empty string", [&]() { + it("reports an error", [&]() { + TSCompileResult result = ts_compile_grammar(R"JSON( + { + "name": "empty_rules", + + "rules": { + "rule_1": {"type": "SYMBOL", "name": "rule_2"}, + + "rule_2": { + "type": "CHOICE", + "members": [ + {"type": "SYMBOL", "name": "rule_1"}, + {"type": "BLANK"} + ] + } + } + } + )JSON"); + + AssertThat(result.error_message, Equals(dedent(R"MESSAGE( + The rule `rule_2` matches the empty string. + Tree-sitter currently does not support syntactic rules that match the empty string. + )MESSAGE"))); + }); + }); + describe("when the grammar's start symbol is a token", [&]() { it("parses the token", [&]() { TSCompileResult result = ts_compile_grammar(R"JSON( diff --git a/src/compiler/build_tables/build_parse_table.cc b/src/compiler/build_tables/build_parse_table.cc index a263a497..2047b4ba 100644 --- a/src/compiler/build_tables/build_parse_table.cc +++ b/src/compiler/build_tables/build_parse_table.cc @@ -177,7 +177,7 @@ class ParseTableBuilder { parse_table.add_terminal_action(state_id, lookahead, action); } else { ParseAction &existing_action = entry.actions[0]; - if (allow_any_conflict) { + if (existing_action.type == ParseActionTypeAccept || allow_any_conflict) { entry.actions.push_back(action); } else { int existing_precedence = existing_action.precedence(); diff --git a/src/compiler/prepare_grammar/flatten_grammar.cc b/src/compiler/prepare_grammar/flatten_grammar.cc index fa253718..ddba9a5f 100644 --- a/src/compiler/prepare_grammar/flatten_grammar.cc +++ b/src/compiler/prepare_grammar/flatten_grammar.cc @@ -13,6 +13,7 @@ namespace tree_sitter { namespace prepare_grammar { using std::find; +using std::pair; using std::vector; class FlattenRule : public rules::RuleFn { @@ -87,16 +88,35 @@ SyntaxVariable flatten_rule(const Variable &variable) { return SyntaxVariable(variable.name, variable.type, productions); } -SyntaxGrammar flatten_grammar(const InitialSyntaxGrammar &grammar) { +pair flatten_grammar(const InitialSyntaxGrammar &grammar) { SyntaxGrammar result; result.expected_conflicts = grammar.expected_conflicts; result.extra_tokens = grammar.extra_tokens; + bool is_start = true; for (const Variable &variable : grammar.variables) { - result.variables.push_back(flatten_rule(variable)); + SyntaxVariable syntax_variable = flatten_rule(variable); + + if (!is_start) { + for (const Production &production : syntax_variable.productions) { + if (production.empty()) { + return { + result, + CompileError( + TSCompileErrorTypeEpsilonRule, + "The rule `" + variable.name + "` matches the empty string.\n" + + "Tree-sitter currently does not support syntactic rules that match the empty string.\n" + ) + }; + } + } + } + + result.variables.push_back(syntax_variable); + is_start = false; } - return result; + return {result, CompileError::none()}; } } // namespace prepare_grammar diff --git a/src/compiler/prepare_grammar/flatten_grammar.h b/src/compiler/prepare_grammar/flatten_grammar.h index 1fb6eb9d..b5501fb4 100644 --- a/src/compiler/prepare_grammar/flatten_grammar.h +++ b/src/compiler/prepare_grammar/flatten_grammar.h @@ -3,6 +3,7 @@ #include #include "tree_sitter/compiler.h" +#include "compiler/compile_error.h" #include "compiler/syntax_grammar.h" namespace tree_sitter { @@ -11,7 +12,7 @@ namespace prepare_grammar { struct InitialSyntaxGrammar; SyntaxVariable flatten_rule(const Variable &variable); -SyntaxGrammar flatten_grammar(const InitialSyntaxGrammar &); +std::pair flatten_grammar(const InitialSyntaxGrammar &); } // namespace prepare_grammar } // namespace tree_sitter diff --git a/src/compiler/prepare_grammar/prepare_grammar.cc b/src/compiler/prepare_grammar/prepare_grammar.cc index f0e4ee9f..81750b58 100644 --- a/src/compiler/prepare_grammar/prepare_grammar.cc +++ b/src/compiler/prepare_grammar/prepare_grammar.cc @@ -51,7 +51,11 @@ tuple prepare_grammar( /* * Flatten syntax rules into lists of productions. */ - SyntaxGrammar syntax_grammar = flatten_grammar(syntax_grammar1); + auto flatten_result = flatten_grammar(syntax_grammar1); + SyntaxGrammar syntax_grammar = flatten_result.first; + error = flatten_result.second; + if (error.type) + return make_tuple(SyntaxGrammar(), LexicalGrammar(), error); /* * Ensure all lexical rules are in a consistent format.