Never move the start rule of a grammar into the lexical grammar
This preserves a useful invariant that the root node of the AST is never a token.
This commit is contained in:
parent
48681c3f0e
commit
493db39363
3 changed files with 34 additions and 13 deletions
|
|
@ -83,10 +83,7 @@ class ParseTableBuilderImpl : public ParseTableBuilder {
|
|||
ParseStateId error_state_id = add_parse_state({}, ParseItemSet{});
|
||||
|
||||
// Add the starting state.
|
||||
Symbol start_symbol = grammar.variables.empty() ?
|
||||
Symbol::terminal(0) :
|
||||
Symbol::non_terminal(0);
|
||||
|
||||
Symbol start_symbol = Symbol::non_terminal(0);
|
||||
Production start_production({{start_symbol, 0, rules::AssociativityNone, rules::Alias{}}}, 0);
|
||||
|
||||
add_parse_state({}, ParseItemSet{{
|
||||
|
|
|
|||
|
|
@ -210,7 +210,7 @@ tuple<InitialSyntaxGrammar, LexicalGrammar, CompileError> extract_tokens(
|
|||
size_t i = -1;
|
||||
for (const auto &variable : processed_variables) {
|
||||
i++;
|
||||
if (variable.rule.is<Symbol>()) {
|
||||
if (i > 0 && variable.rule.is<Symbol>()) {
|
||||
auto symbol = variable.rule.get_unchecked<Symbol>();
|
||||
if (symbol.is_terminal() && extractor.token_usage_counts[symbol.index] == 1) {
|
||||
lexical_grammar.variables[symbol.index].type = variable.type;
|
||||
|
|
|
|||
|
|
@ -249,39 +249,63 @@ describe("extract_tokens", []() {
|
|||
}));
|
||||
});
|
||||
|
||||
it("does not move the start rule into the lexical grammar", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{
|
||||
{
|
||||
Variable{
|
||||
"rule_a",
|
||||
VariableTypeNamed,
|
||||
String{"a"}
|
||||
},
|
||||
},
|
||||
{}, {}, {}, {}
|
||||
});
|
||||
|
||||
InitialSyntaxGrammar &syntax_grammar = get<0>(result);
|
||||
LexicalGrammar &lexical_grammar = get<1>(result);
|
||||
|
||||
AssertThat(syntax_grammar.variables.size(), Equals(1u));
|
||||
AssertThat(lexical_grammar.variables.size(), Equals(1u));
|
||||
});
|
||||
|
||||
it("renumbers the grammar's expected conflict symbols based on any moved rules", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{
|
||||
{
|
||||
Variable{
|
||||
"rule_A",
|
||||
"rule_a",
|
||||
VariableTypeNamed,
|
||||
Symbol::non_terminal(2)
|
||||
},
|
||||
Variable{
|
||||
"rule_b",
|
||||
VariableTypeNamed,
|
||||
String{"ok"}
|
||||
},
|
||||
Variable{
|
||||
"rule_B",
|
||||
"rule_c",
|
||||
VariableTypeNamed,
|
||||
Repeat{Symbol::non_terminal(0)}
|
||||
Repeat{Symbol::non_terminal(1)}
|
||||
},
|
||||
Variable{
|
||||
"rule_C",
|
||||
"rule_d",
|
||||
VariableTypeNamed,
|
||||
Repeat{Seq{Symbol::non_terminal(0), Symbol::non_terminal(0)}}
|
||||
Repeat{Seq{Symbol::non_terminal(1), Symbol::non_terminal(1)}}
|
||||
},
|
||||
},
|
||||
{
|
||||
String{" "}
|
||||
},
|
||||
{
|
||||
{ Symbol::non_terminal(1), Symbol::non_terminal(2) }
|
||||
{ Symbol::non_terminal(2), Symbol::non_terminal(3) }
|
||||
},
|
||||
{}, {}
|
||||
});
|
||||
|
||||
InitialSyntaxGrammar &syntax_grammar = get<0>(result);
|
||||
|
||||
AssertThat(syntax_grammar.variables.size(), Equals<size_t>(2));
|
||||
AssertThat(syntax_grammar.variables.size(), Equals<size_t>(3));
|
||||
AssertThat(syntax_grammar.expected_conflicts, Equals(set<set<Symbol>>({
|
||||
{ Symbol::non_terminal(0), Symbol::non_terminal(1) },
|
||||
{ Symbol::non_terminal(1), Symbol::non_terminal(2) },
|
||||
})));
|
||||
});
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue