diff --git a/src/compiler/build_tables/lex_table_builder.cc b/src/compiler/build_tables/lex_table_builder.cc index e59d3f2d..d0f363d1 100644 --- a/src/compiler/build_tables/lex_table_builder.cc +++ b/src/compiler/build_tables/lex_table_builder.cc @@ -49,6 +49,19 @@ using rules::Symbol; using rules::Metadata; using rules::Seq; +enum ConflictStatus { + DoesNotMatch = 0, + MatchesShorterStringWithinSeparators = 1 << 0, + MatchesSameString = 1 << 1, + MatchesLongerString = 1 << 2, + MatchesLongerStringWithValidNextChar = 1 << 3, + CannotDistinguish = ( + MatchesShorterStringWithinSeparators | + MatchesSameString | + MatchesLongerStringWithValidNextChar + ), +}; + static const std::unordered_set EMPTY; bool CoincidentTokenIndex::contains(Symbol a, Symbol b) const { @@ -294,6 +307,18 @@ class LexTableBuilderImpl : public LexTableBuilder { return {main_lex_table, keyword_lex_table, word_rule}; } + bool does_token_shadow_other(Symbol token, Symbol shadowed_token) const { + if (token == word_rule && keyword_symbols.contains(shadowed_token)) return false; + return get_conflict_status(shadowed_token, token) & ( + MatchesShorterStringWithinSeparators | + MatchesLongerStringWithValidNextChar + ); + } + + bool does_token_match_same_string_as_other(Symbol token, Symbol shadowed_token) const { + return get_conflict_status(shadowed_token, token) & MatchesSameString; + } + ConflictStatus get_conflict_status(Symbol shadowed_token, Symbol other_token) const { if (shadowed_token.is_built_in() || other_token.is_built_in() || @@ -621,8 +646,12 @@ LexTableBuilder::BuildResult LexTableBuilder::build() { return static_cast(this)->build(); } -ConflictStatus LexTableBuilder::get_conflict_status(Symbol a, Symbol b) const { - return static_cast(this)->get_conflict_status(a, b); +bool LexTableBuilder::does_token_shadow_other(Symbol a, Symbol b) const { + return static_cast(this)->does_token_shadow_other(a, b); +} + +bool LexTableBuilder::does_token_match_same_string_as_other(Symbol a, Symbol b) const { + return static_cast(this)->does_token_match_same_string_as_other(a, b); } } // namespace build_tables diff --git a/src/compiler/build_tables/lex_table_builder.h b/src/compiler/build_tables/lex_table_builder.h index 4ec4f22b..d69b996b 100644 --- a/src/compiler/build_tables/lex_table_builder.h +++ b/src/compiler/build_tables/lex_table_builder.h @@ -30,19 +30,6 @@ namespace build_tables { class LookaheadSet; -enum ConflictStatus { - DoesNotMatch = 0, - MatchesShorterStringWithinSeparators = 1 << 0, - MatchesSameString = 1 << 1, - MatchesLongerString = 1 << 2, - MatchesLongerStringWithValidNextChar = 1 << 3, - CannotDistinguish = ( - MatchesShorterStringWithinSeparators | - MatchesSameString | - MatchesLongerStringWithValidNextChar - ), -}; - struct CoincidentTokenIndex { std::unordered_map< std::pair, @@ -69,7 +56,8 @@ class LexTableBuilder { BuildResult build(); - ConflictStatus get_conflict_status(rules::Symbol, rules::Symbol) const; + bool does_token_shadow_other(rules::Symbol, rules::Symbol) const; + bool does_token_match_same_string_as_other(rules::Symbol, rules::Symbol) const; protected: LexTableBuilder() = default; diff --git a/src/compiler/build_tables/parse_table_builder.cc b/src/compiler/build_tables/parse_table_builder.cc index 0e6b4247..26dae5b7 100644 --- a/src/compiler/build_tables/parse_table_builder.cc +++ b/src/compiler/build_tables/parse_table_builder.cc @@ -134,11 +134,6 @@ class ParseTableBuilderImpl : public ParseTableBuilder { } void build_error_parse_state(ParseStateId state_id) { - unsigned CannotMerge = ( - MatchesShorterStringWithinSeparators | - MatchesLongerStringWithValidNextChar - ); - parse_table.states[state_id].terminal_entries.clear(); // First, identify the conflict-free tokens. @@ -149,7 +144,7 @@ class ParseTableBuilderImpl : public ParseTableBuilder { for (unsigned j = 0; j < lexical_grammar.variables.size(); j++) { Symbol other_token = Symbol::terminal(j); if (!coincident_token_index.contains(token, other_token) && - (lex_table_builder->get_conflict_status(other_token, token) & CannotMerge)) { + lex_table_builder->does_token_shadow_other(token, other_token)) { conflicts_with_other_tokens = true; break; } @@ -171,7 +166,7 @@ class ParseTableBuilderImpl : public ParseTableBuilder { bool conflicts_with_other_tokens = false; conflict_free_tokens.for_each([&](Symbol other_token) { if (!coincident_token_index.contains(token, other_token) && - (lex_table_builder->get_conflict_status(other_token, token) & CannotMerge)) { + lex_table_builder->does_token_shadow_other(token, other_token)) { LOG( "exclude %s: conflicts with %s", symbol_name(token).c_str(), @@ -517,7 +512,8 @@ class ParseTableBuilderImpl : public ParseTableBuilder { // Do not add a token if it conflicts with an existing token. if (!new_token.is_built_in()) { for (const auto &entry : state.terminal_entries) { - if (lex_table_builder->get_conflict_status(entry.first, new_token) & CannotDistinguish) { + if (lex_table_builder->does_token_shadow_other(new_token, entry.first) || + lex_table_builder->does_token_match_same_string_as_other(new_token, entry.first)) { LOG_IF( logged_conflict_tokens.insert({entry.first, new_token}).second, "cannot merge parse states due to token conflict: %s and %s",