diff --git a/src/compiler/build_tables/lex_table_builder.cc b/src/compiler/build_tables/lex_table_builder.cc index 37705849..b43f9d09 100644 --- a/src/compiler/build_tables/lex_table_builder.cc +++ b/src/compiler/build_tables/lex_table_builder.cc @@ -169,21 +169,43 @@ class LexTableBuilderImpl : public LexTableBuilder { } } + // Find a 'keyword capture token' that matches all of the indentified keywords. for (Symbol::Index i = 0, n = grammar.variables.size(); i < n; i++) { Symbol symbol = Symbol::terminal(i); - bool matches_all_keywords = true; keyword_symbols.for_each([&](Symbol keyword_symbol) { if (!shadowed_tokens_by_token[keyword_symbol.index].count(symbol)) { matches_all_keywords = false; } }); + if (!matches_all_keywords) continue; - if (matches_all_keywords && ( - keyword_capture_token == rules::NONE() || - shadowed_tokens_by_token[symbol.index].size() < - shadowed_tokens_by_token[keyword_capture_token.index].size() - )) keyword_capture_token = symbol; + // Don't use a token to capture keywords if it overlaps with separator characters. + AllCharacterAggregator capture_aggregator; + capture_aggregator.apply(grammar.variables[i].rule); + if (capture_aggregator.result.includes_all) continue; + if (capture_aggregator.result.intersects(separator_start_characters)) continue; + + // Don't use a token to capture keywords if it conflicts with other tokens + // that occur in the same state as a keyword. + bool shadows_other_tokens = false; + for (auto shadowed_token : shadowed_tokens_by_token[i]) { + if (!keyword_symbols.contains(shadowed_token) && + keyword_symbols.intersects(coincident_tokens_by_token[shadowed_token.index])) { + shadows_other_tokens = true; + break; + } + } + if (shadows_other_tokens) continue; + + // If multiple keyword capture tokens are found, don't bother extracting + // the keywords into their own function. + if (keyword_capture_token == rules::NONE()) { + keyword_capture_token = symbol; + } else { + keyword_capture_token = rules::NONE(); + break; + } } } diff --git a/src/compiler/build_tables/lookahead_set.cc b/src/compiler/build_tables/lookahead_set.cc index 7b33cca2..8b16add4 100644 --- a/src/compiler/build_tables/lookahead_set.cc +++ b/src/compiler/build_tables/lookahead_set.cc @@ -32,6 +32,14 @@ bool LookaheadSet::contains(const Symbol &symbol) const { return bits.size() > static_cast(symbol.index) && bits[symbol.index]; } +bool LookaheadSet::intersects(const LookaheadSet &other) const { + bool result = false; + for_each([&](Symbol symbol) { + if (other.contains(symbol)) result = true; + }); + return result; +} + size_t LookaheadSet::size() const { size_t result = 0; for (bool bit : external_bits) if (bit) result++; diff --git a/src/compiler/build_tables/lookahead_set.h b/src/compiler/build_tables/lookahead_set.h index 8b824873..6898b5a2 100644 --- a/src/compiler/build_tables/lookahead_set.h +++ b/src/compiler/build_tables/lookahead_set.h @@ -22,6 +22,7 @@ class LookaheadSet { bool contains(const rules::Symbol &) const; bool insert_all(const LookaheadSet &); bool insert(const rules::Symbol &); + bool intersects(const LookaheadSet &) const; template void for_each(const Callback &callback) const {