Disallow syntax rules that match the empty string (for now)

This commit is contained in:
Max Brunsfeld 2016-11-30 22:47:25 -08:00
parent d627042fa6
commit 996ca91e70
6 changed files with 59 additions and 6 deletions

View file

@ -13,6 +13,7 @@ typedef enum {
TSCompileErrorTypeInvalidUbiquitousToken,
TSCompileErrorTypeLexConflict,
TSCompileErrorTypeParseConflict,
TSCompileErrorTypeEpsilonRule,
} TSCompileErrorType;
typedef struct {

View file

@ -326,6 +326,33 @@ describe("compile_grammar", []() {
});
});
describe("when the grammar contains rules that match the empty string", [&]() {
it("reports an error", [&]() {
TSCompileResult result = ts_compile_grammar(R"JSON(
{
"name": "empty_rules",
"rules": {
"rule_1": {"type": "SYMBOL", "name": "rule_2"},
"rule_2": {
"type": "CHOICE",
"members": [
{"type": "SYMBOL", "name": "rule_1"},
{"type": "BLANK"}
]
}
}
}
)JSON");
AssertThat(result.error_message, Equals(dedent(R"MESSAGE(
The rule `rule_2` matches the empty string.
Tree-sitter currently does not support syntactic rules that match the empty string.
)MESSAGE")));
});
});
describe("when the grammar's start symbol is a token", [&]() {
it("parses the token", [&]() {
TSCompileResult result = ts_compile_grammar(R"JSON(

View file

@ -177,7 +177,7 @@ class ParseTableBuilder {
parse_table.add_terminal_action(state_id, lookahead, action);
} else {
ParseAction &existing_action = entry.actions[0];
if (allow_any_conflict) {
if (existing_action.type == ParseActionTypeAccept || allow_any_conflict) {
entry.actions.push_back(action);
} else {
int existing_precedence = existing_action.precedence();

View file

@ -13,6 +13,7 @@ namespace tree_sitter {
namespace prepare_grammar {
using std::find;
using std::pair;
using std::vector;
class FlattenRule : public rules::RuleFn<void> {
@ -87,16 +88,35 @@ SyntaxVariable flatten_rule(const Variable &variable) {
return SyntaxVariable(variable.name, variable.type, productions);
}
SyntaxGrammar flatten_grammar(const InitialSyntaxGrammar &grammar) {
pair<SyntaxGrammar, CompileError> flatten_grammar(const InitialSyntaxGrammar &grammar) {
SyntaxGrammar result;
result.expected_conflicts = grammar.expected_conflicts;
result.extra_tokens = grammar.extra_tokens;
bool is_start = true;
for (const Variable &variable : grammar.variables) {
result.variables.push_back(flatten_rule(variable));
SyntaxVariable syntax_variable = flatten_rule(variable);
if (!is_start) {
for (const Production &production : syntax_variable.productions) {
if (production.empty()) {
return {
result,
CompileError(
TSCompileErrorTypeEpsilonRule,
"The rule `" + variable.name + "` matches the empty string.\n" +
"Tree-sitter currently does not support syntactic rules that match the empty string.\n"
)
};
}
}
}
result.variables.push_back(syntax_variable);
is_start = false;
}
return result;
return {result, CompileError::none()};
}
} // namespace prepare_grammar

View file

@ -3,6 +3,7 @@
#include <string>
#include "tree_sitter/compiler.h"
#include "compiler/compile_error.h"
#include "compiler/syntax_grammar.h"
namespace tree_sitter {
@ -11,7 +12,7 @@ namespace prepare_grammar {
struct InitialSyntaxGrammar;
SyntaxVariable flatten_rule(const Variable &variable);
SyntaxGrammar flatten_grammar(const InitialSyntaxGrammar &);
std::pair<SyntaxGrammar, CompileError> flatten_grammar(const InitialSyntaxGrammar &);
} // namespace prepare_grammar
} // namespace tree_sitter

View file

@ -51,7 +51,11 @@ tuple<SyntaxGrammar, LexicalGrammar, CompileError> prepare_grammar(
/*
* Flatten syntax rules into lists of productions.
*/
SyntaxGrammar syntax_grammar = flatten_grammar(syntax_grammar1);
auto flatten_result = flatten_grammar(syntax_grammar1);
SyntaxGrammar syntax_grammar = flatten_result.first;
error = flatten_result.second;
if (error.type)
return make_tuple(SyntaxGrammar(), LexicalGrammar(), error);
/*
* Ensure all lexical rules are in a consistent format.