From 493db393632fd81f0b15c97b5bac321b4711bc32 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 7 Dec 2017 11:40:41 -0800 Subject: [PATCH] Never move the start rule of a grammar into the lexical grammar This preserves a useful invariant that the root node of the AST is never a token. --- .../build_tables/parse_table_builder.cc | 5 +-- .../prepare_grammar/extract_tokens.cc | 2 +- .../prepare_grammar/extract_tokens_test.cc | 40 +++++++++++++++---- 3 files changed, 34 insertions(+), 13 deletions(-) diff --git a/src/compiler/build_tables/parse_table_builder.cc b/src/compiler/build_tables/parse_table_builder.cc index 8f4b0fb1..9e5e1bf1 100644 --- a/src/compiler/build_tables/parse_table_builder.cc +++ b/src/compiler/build_tables/parse_table_builder.cc @@ -83,10 +83,7 @@ class ParseTableBuilderImpl : public ParseTableBuilder { ParseStateId error_state_id = add_parse_state({}, ParseItemSet{}); // Add the starting state. - Symbol start_symbol = grammar.variables.empty() ? - Symbol::terminal(0) : - Symbol::non_terminal(0); - + Symbol start_symbol = Symbol::non_terminal(0); Production start_production({{start_symbol, 0, rules::AssociativityNone, rules::Alias{}}}, 0); add_parse_state({}, ParseItemSet{{ diff --git a/src/compiler/prepare_grammar/extract_tokens.cc b/src/compiler/prepare_grammar/extract_tokens.cc index 73d3d866..61435008 100644 --- a/src/compiler/prepare_grammar/extract_tokens.cc +++ b/src/compiler/prepare_grammar/extract_tokens.cc @@ -210,7 +210,7 @@ tuple extract_tokens( size_t i = -1; for (const auto &variable : processed_variables) { i++; - if (variable.rule.is()) { + if (i > 0 && variable.rule.is()) { auto symbol = variable.rule.get_unchecked(); if (symbol.is_terminal() && extractor.token_usage_counts[symbol.index] == 1) { lexical_grammar.variables[symbol.index].type = variable.type; diff --git a/test/compiler/prepare_grammar/extract_tokens_test.cc b/test/compiler/prepare_grammar/extract_tokens_test.cc index 2e515e89..0f9be780 100644 --- a/test/compiler/prepare_grammar/extract_tokens_test.cc +++ b/test/compiler/prepare_grammar/extract_tokens_test.cc @@ -249,39 +249,63 @@ describe("extract_tokens", []() { })); }); + it("does not move the start rule into the lexical grammar", [&]() { + auto result = extract_tokens(InternedGrammar{ + { + Variable{ + "rule_a", + VariableTypeNamed, + String{"a"} + }, + }, + {}, {}, {}, {} + }); + + InitialSyntaxGrammar &syntax_grammar = get<0>(result); + LexicalGrammar &lexical_grammar = get<1>(result); + + AssertThat(syntax_grammar.variables.size(), Equals(1u)); + AssertThat(lexical_grammar.variables.size(), Equals(1u)); + }); + it("renumbers the grammar's expected conflict symbols based on any moved rules", [&]() { auto result = extract_tokens(InternedGrammar{ { Variable{ - "rule_A", + "rule_a", + VariableTypeNamed, + Symbol::non_terminal(2) + }, + Variable{ + "rule_b", VariableTypeNamed, String{"ok"} }, Variable{ - "rule_B", + "rule_c", VariableTypeNamed, - Repeat{Symbol::non_terminal(0)} + Repeat{Symbol::non_terminal(1)} }, Variable{ - "rule_C", + "rule_d", VariableTypeNamed, - Repeat{Seq{Symbol::non_terminal(0), Symbol::non_terminal(0)}} + Repeat{Seq{Symbol::non_terminal(1), Symbol::non_terminal(1)}} }, }, { String{" "} }, { - { Symbol::non_terminal(1), Symbol::non_terminal(2) } + { Symbol::non_terminal(2), Symbol::non_terminal(3) } }, {}, {} }); InitialSyntaxGrammar &syntax_grammar = get<0>(result); - AssertThat(syntax_grammar.variables.size(), Equals(2)); + AssertThat(syntax_grammar.variables.size(), Equals(3)); AssertThat(syntax_grammar.expected_conflicts, Equals(set>({ - { Symbol::non_terminal(0), Symbol::non_terminal(1) }, + { Symbol::non_terminal(1), Symbol::non_terminal(2) }, }))); });