Ensure keyword capture tokens aren't too loosely defined

This commit is contained in:
Max Brunsfeld 2018-03-07 13:29:06 -08:00
parent c0cc35ff07
commit 53cd89c614
3 changed files with 37 additions and 6 deletions

View file

@ -169,21 +169,43 @@ class LexTableBuilderImpl : public LexTableBuilder {
}
}
// Find a 'keyword capture token' that matches all of the indentified keywords.
for (Symbol::Index i = 0, n = grammar.variables.size(); i < n; i++) {
Symbol symbol = Symbol::terminal(i);
bool matches_all_keywords = true;
keyword_symbols.for_each([&](Symbol keyword_symbol) {
if (!shadowed_tokens_by_token[keyword_symbol.index].count(symbol)) {
matches_all_keywords = false;
}
});
if (!matches_all_keywords) continue;
if (matches_all_keywords && (
keyword_capture_token == rules::NONE() ||
shadowed_tokens_by_token[symbol.index].size() <
shadowed_tokens_by_token[keyword_capture_token.index].size()
)) keyword_capture_token = symbol;
// Don't use a token to capture keywords if it overlaps with separator characters.
AllCharacterAggregator capture_aggregator;
capture_aggregator.apply(grammar.variables[i].rule);
if (capture_aggregator.result.includes_all) continue;
if (capture_aggregator.result.intersects(separator_start_characters)) continue;
// Don't use a token to capture keywords if it conflicts with other tokens
// that occur in the same state as a keyword.
bool shadows_other_tokens = false;
for (auto shadowed_token : shadowed_tokens_by_token[i]) {
if (!keyword_symbols.contains(shadowed_token) &&
keyword_symbols.intersects(coincident_tokens_by_token[shadowed_token.index])) {
shadows_other_tokens = true;
break;
}
}
if (shadows_other_tokens) continue;
// If multiple keyword capture tokens are found, don't bother extracting
// the keywords into their own function.
if (keyword_capture_token == rules::NONE()) {
keyword_capture_token = symbol;
} else {
keyword_capture_token = rules::NONE();
break;
}
}
}

View file

@ -32,6 +32,14 @@ bool LookaheadSet::contains(const Symbol &symbol) const {
return bits.size() > static_cast<size_t>(symbol.index) && bits[symbol.index];
}
bool LookaheadSet::intersects(const LookaheadSet &other) const {
bool result = false;
for_each([&](Symbol symbol) {
if (other.contains(symbol)) result = true;
});
return result;
}
size_t LookaheadSet::size() const {
size_t result = 0;
for (bool bit : external_bits) if (bit) result++;

View file

@ -22,6 +22,7 @@ class LookaheadSet {
bool contains(const rules::Symbol &) const;
bool insert_all(const LookaheadSet &);
bool insert(const rules::Symbol &);
bool intersects(const LookaheadSet &) const;
template <typename Callback>
void for_each(const Callback &callback) const {