Never move the start rule of a grammar into the lexical grammar

This preserves a useful invariant that the root node of the AST is never
a token.
This commit is contained in:
Max Brunsfeld 2017-12-07 11:40:41 -08:00
parent 48681c3f0e
commit 493db39363
3 changed files with 34 additions and 13 deletions

View file

@ -83,10 +83,7 @@ class ParseTableBuilderImpl : public ParseTableBuilder {
ParseStateId error_state_id = add_parse_state({}, ParseItemSet{});
// Add the starting state.
Symbol start_symbol = grammar.variables.empty() ?
Symbol::terminal(0) :
Symbol::non_terminal(0);
Symbol start_symbol = Symbol::non_terminal(0);
Production start_production({{start_symbol, 0, rules::AssociativityNone, rules::Alias{}}}, 0);
add_parse_state({}, ParseItemSet{{

View file

@ -210,7 +210,7 @@ tuple<InitialSyntaxGrammar, LexicalGrammar, CompileError> extract_tokens(
size_t i = -1;
for (const auto &variable : processed_variables) {
i++;
if (variable.rule.is<Symbol>()) {
if (i > 0 && variable.rule.is<Symbol>()) {
auto symbol = variable.rule.get_unchecked<Symbol>();
if (symbol.is_terminal() && extractor.token_usage_counts[symbol.index] == 1) {
lexical_grammar.variables[symbol.index].type = variable.type;

View file

@ -249,39 +249,63 @@ describe("extract_tokens", []() {
}));
});
it("does not move the start rule into the lexical grammar", [&]() {
auto result = extract_tokens(InternedGrammar{
{
Variable{
"rule_a",
VariableTypeNamed,
String{"a"}
},
},
{}, {}, {}, {}
});
InitialSyntaxGrammar &syntax_grammar = get<0>(result);
LexicalGrammar &lexical_grammar = get<1>(result);
AssertThat(syntax_grammar.variables.size(), Equals(1u));
AssertThat(lexical_grammar.variables.size(), Equals(1u));
});
it("renumbers the grammar's expected conflict symbols based on any moved rules", [&]() {
auto result = extract_tokens(InternedGrammar{
{
Variable{
"rule_A",
"rule_a",
VariableTypeNamed,
Symbol::non_terminal(2)
},
Variable{
"rule_b",
VariableTypeNamed,
String{"ok"}
},
Variable{
"rule_B",
"rule_c",
VariableTypeNamed,
Repeat{Symbol::non_terminal(0)}
Repeat{Symbol::non_terminal(1)}
},
Variable{
"rule_C",
"rule_d",
VariableTypeNamed,
Repeat{Seq{Symbol::non_terminal(0), Symbol::non_terminal(0)}}
Repeat{Seq{Symbol::non_terminal(1), Symbol::non_terminal(1)}}
},
},
{
String{" "}
},
{
{ Symbol::non_terminal(1), Symbol::non_terminal(2) }
{ Symbol::non_terminal(2), Symbol::non_terminal(3) }
},
{}, {}
});
InitialSyntaxGrammar &syntax_grammar = get<0>(result);
AssertThat(syntax_grammar.variables.size(), Equals<size_t>(2));
AssertThat(syntax_grammar.variables.size(), Equals<size_t>(3));
AssertThat(syntax_grammar.expected_conflicts, Equals(set<set<Symbol>>({
{ Symbol::non_terminal(0), Symbol::non_terminal(1) },
{ Symbol::non_terminal(1), Symbol::non_terminal(2) },
})));
});