From b7d0606fbd3dc1e9087f1015ea2aaf0a819d6953 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 16 Mar 2018 14:56:57 -0700 Subject: [PATCH] Be less conservative in merging parse states with external tokens Also, clean up the internal representation of external tokens --- .../build_tables/parse_table_builder.cc | 30 +++++++------------ src/compiler/grammar.h | 2 +- src/compiler/parse_grammar.cc | 10 +------ .../prepare_grammar/extract_tokens.cc | 8 ++--- .../prepare_grammar/intern_symbols.cc | 26 ++++++++++++---- .../prepare_grammar/interned_grammar.h | 1 + .../prepare_grammar/intern_symbols_test.cc | 16 +++------- 7 files changed, 42 insertions(+), 51 deletions(-) diff --git a/src/compiler/build_tables/parse_table_builder.cc b/src/compiler/build_tables/parse_table_builder.cc index e91c74d7..cd2f2aff 100644 --- a/src/compiler/build_tables/parse_table_builder.cc +++ b/src/compiler/build_tables/parse_table_builder.cc @@ -478,28 +478,20 @@ class ParseTableBuilderImpl : public ParseTableBuilder { if (entry.actions.back().type != ParseActionTypeReduce) return false; if (!has_actions(state, entry)) return false; - // Do not add external tokens; they could conflict lexically with any - // of the state's existing lookahead tokens. + // Do not add external tokens; they could conflict lexically with any of the state's + // existing lookahead tokens. if (new_token.is_external()) return false; + // Do not add tokens which are both internal and external. Their validity could + // influence the behavior of the external scanner. + for (const ExternalToken &external_token : grammar.external_tokens) { + if (external_token.corresponding_internal_token == new_token) return false; + } + + // Do not add a token if it conflicts with an existing token. if (!new_token.is_built_in()) { - const auto &incompatible_tokens = lex_table_builder->get_incompatible_tokens(new_token.index); - if (!incompatible_tokens.empty()) { - for (const auto &pair : state.terminal_entries) { - const Symbol &existing_token = pair.first; - - // Do not add a token if it conflicts with any token in the follow set - // of an existing external token. - if (existing_token.is_external()) { - const LookaheadSet &following_tokens = following_tokens_by_token[existing_token]; - for (auto &incompatible_token : incompatible_tokens) { - if (following_tokens.contains(incompatible_token)) return false; - } - } - - // Do not add a token if it conflicts with an existing token. - if (incompatible_tokens.count(existing_token)) return false; - } + for (Symbol incompatible_token : lex_table_builder->get_incompatible_tokens(new_token.index)) { + if (state.terminal_entries.count(incompatible_token)) return false; } } diff --git a/src/compiler/grammar.h b/src/compiler/grammar.h index 54fe69e9..6c63340c 100644 --- a/src/compiler/grammar.h +++ b/src/compiler/grammar.h @@ -30,7 +30,7 @@ struct InputGrammar { std::vector variables; std::vector extra_tokens; std::vector> expected_conflicts; - std::vector external_tokens; + std::vector external_tokens; std::unordered_set variables_to_inline; }; diff --git a/src/compiler/parse_grammar.cc b/src/compiler/parse_grammar.cc index 4764542a..345fb94f 100644 --- a/src/compiler/parse_grammar.cc +++ b/src/compiler/parse_grammar.cc @@ -354,15 +354,7 @@ ParseGrammarResult parse_grammar(const string &input) { error_message = "Invalid external token: " + result.error_message; goto error; } - - grammar.external_tokens.push_back(result.rule.match( - [](rules::NamedSymbol named_symbol) { - return Variable{named_symbol.value, VariableTypeNamed, named_symbol}; - }, - [](auto rule) { - return Variable{"", VariableTypeAnonymous, rule}; - } - )); + grammar.external_tokens.push_back(result.rule); } } diff --git a/src/compiler/prepare_grammar/extract_tokens.cc b/src/compiler/prepare_grammar/extract_tokens.cc index 61435008..61016687 100644 --- a/src/compiler/prepare_grammar/extract_tokens.cc +++ b/src/compiler/prepare_grammar/extract_tokens.cc @@ -179,10 +179,10 @@ tuple extract_tokens( vector processed_external_tokens; for (const auto &external_token : grammar.external_tokens) { - processed_external_tokens.push_back({ + processed_external_tokens.push_back(Variable{ external_token.name, external_token.type, - extractor.apply(external_token.rule) + extractor.apply(external_token.rule), }); } @@ -312,13 +312,13 @@ tuple extract_tokens( syntax_grammar.external_tokens.push_back(ExternalToken{ external_token.name, external_token.type, - rules::NONE() + rules::NONE(), }); } else { syntax_grammar.external_tokens.push_back(ExternalToken{ lexical_grammar.variables[symbol.index].name, external_token.type, - symbol + symbol, }); } } diff --git a/src/compiler/prepare_grammar/intern_symbols.cc b/src/compiler/prepare_grammar/intern_symbols.cc index 0d515fad..4e610960 100644 --- a/src/compiler/prepare_grammar/intern_symbols.cc +++ b/src/compiler/prepare_grammar/intern_symbols.cc @@ -72,7 +72,7 @@ class SymbolInterner { } for (size_t i = 0; i < grammar.external_tokens.size(); i++) { - if (grammar.external_tokens[i].name == named_symbol.value) { + if (grammar.external_tokens[i] == named_symbol) { return Symbol::external(i); } } @@ -96,16 +96,30 @@ pair intern_symbols(const InputGrammar &grammar) SymbolInterner interner(grammar); - for (auto &external_token : grammar.external_tokens) { - auto new_rule = interner.apply(external_token.rule); + for (const Rule &external_token : grammar.external_tokens) { + string external_token_name; + VariableType external_token_type = VariableTypeAnonymous; + external_token.match( + [&](rules::NamedSymbol named_symbol) { + external_token_name = named_symbol.value; + if (external_token_name[0] == '_') { + external_token_type = VariableTypeHidden; + } else { + external_token_type =VariableTypeNamed; + } + }, + [](auto rule) {} + ); + + auto new_rule = interner.apply(external_token); if (!interner.missing_rule_name.empty()) { return { result, missing_rule_error(interner.missing_rule_name) }; } result.external_tokens.push_back(Variable{ - external_token.name, - external_token.name[0] == '_' ? VariableTypeHidden : external_token.type, - new_rule + external_token_name, + external_token_type, + new_rule, }); } diff --git a/src/compiler/prepare_grammar/interned_grammar.h b/src/compiler/prepare_grammar/interned_grammar.h index c96dfa66..83117ced 100644 --- a/src/compiler/prepare_grammar/interned_grammar.h +++ b/src/compiler/prepare_grammar/interned_grammar.h @@ -15,6 +15,7 @@ struct InternedGrammar { std::vector extra_tokens; std::set> expected_conflicts; std::vector external_tokens; + std::set blank_external_tokens; std::set variables_to_inline; }; diff --git a/test/compiler/prepare_grammar/intern_symbols_test.cc b/test/compiler/prepare_grammar/intern_symbols_test.cc index 22944f53..65bad45e 100644 --- a/test/compiler/prepare_grammar/intern_symbols_test.cc +++ b/test/compiler/prepare_grammar/intern_symbols_test.cc @@ -75,16 +75,8 @@ describe("intern_symbols", []() { {}, {}, { - Variable{ - "w", - VariableTypeNamed, - NamedSymbol{"w"} - }, - Variable{ - "z", - VariableTypeNamed, - NamedSymbol{"z"} - }, + NamedSymbol{"w"}, + NamedSymbol{"z"}, }, {} }; @@ -95,12 +87,12 @@ describe("intern_symbols", []() { Variable{ "w", VariableTypeNamed, - Symbol::external(0) + Symbol::external(0), }, Variable{ "z", VariableTypeNamed, - Symbol::non_terminal(2) + Symbol::non_terminal(2), }, })) });