Update parse state merging logic for explicit word tokens
Co-Authored-By: Ashi Krishnan <queerviolet@github.com>
This commit is contained in:
parent
30b0d1c833
commit
91e3bc3e55
3 changed files with 37 additions and 24 deletions
|
|
@ -49,6 +49,19 @@ using rules::Symbol;
|
|||
using rules::Metadata;
|
||||
using rules::Seq;
|
||||
|
||||
enum ConflictStatus {
|
||||
DoesNotMatch = 0,
|
||||
MatchesShorterStringWithinSeparators = 1 << 0,
|
||||
MatchesSameString = 1 << 1,
|
||||
MatchesLongerString = 1 << 2,
|
||||
MatchesLongerStringWithValidNextChar = 1 << 3,
|
||||
CannotDistinguish = (
|
||||
MatchesShorterStringWithinSeparators |
|
||||
MatchesSameString |
|
||||
MatchesLongerStringWithValidNextChar
|
||||
),
|
||||
};
|
||||
|
||||
static const std::unordered_set<ParseStateId> EMPTY;
|
||||
|
||||
bool CoincidentTokenIndex::contains(Symbol a, Symbol b) const {
|
||||
|
|
@ -294,6 +307,18 @@ class LexTableBuilderImpl : public LexTableBuilder {
|
|||
return {main_lex_table, keyword_lex_table, word_rule};
|
||||
}
|
||||
|
||||
bool does_token_shadow_other(Symbol token, Symbol shadowed_token) const {
|
||||
if (token == word_rule && keyword_symbols.contains(shadowed_token)) return false;
|
||||
return get_conflict_status(shadowed_token, token) & (
|
||||
MatchesShorterStringWithinSeparators |
|
||||
MatchesLongerStringWithValidNextChar
|
||||
);
|
||||
}
|
||||
|
||||
bool does_token_match_same_string_as_other(Symbol token, Symbol shadowed_token) const {
|
||||
return get_conflict_status(shadowed_token, token) & MatchesSameString;
|
||||
}
|
||||
|
||||
ConflictStatus get_conflict_status(Symbol shadowed_token, Symbol other_token) const {
|
||||
if (shadowed_token.is_built_in() ||
|
||||
other_token.is_built_in() ||
|
||||
|
|
@ -621,8 +646,12 @@ LexTableBuilder::BuildResult LexTableBuilder::build() {
|
|||
return static_cast<LexTableBuilderImpl *>(this)->build();
|
||||
}
|
||||
|
||||
ConflictStatus LexTableBuilder::get_conflict_status(Symbol a, Symbol b) const {
|
||||
return static_cast<const LexTableBuilderImpl *>(this)->get_conflict_status(a, b);
|
||||
bool LexTableBuilder::does_token_shadow_other(Symbol a, Symbol b) const {
|
||||
return static_cast<const LexTableBuilderImpl *>(this)->does_token_shadow_other(a, b);
|
||||
}
|
||||
|
||||
bool LexTableBuilder::does_token_match_same_string_as_other(Symbol a, Symbol b) const {
|
||||
return static_cast<const LexTableBuilderImpl *>(this)->does_token_match_same_string_as_other(a, b);
|
||||
}
|
||||
|
||||
} // namespace build_tables
|
||||
|
|
|
|||
|
|
@ -30,19 +30,6 @@ namespace build_tables {
|
|||
|
||||
class LookaheadSet;
|
||||
|
||||
enum ConflictStatus {
|
||||
DoesNotMatch = 0,
|
||||
MatchesShorterStringWithinSeparators = 1 << 0,
|
||||
MatchesSameString = 1 << 1,
|
||||
MatchesLongerString = 1 << 2,
|
||||
MatchesLongerStringWithValidNextChar = 1 << 3,
|
||||
CannotDistinguish = (
|
||||
MatchesShorterStringWithinSeparators |
|
||||
MatchesSameString |
|
||||
MatchesLongerStringWithValidNextChar
|
||||
),
|
||||
};
|
||||
|
||||
struct CoincidentTokenIndex {
|
||||
std::unordered_map<
|
||||
std::pair<rules::Symbol::Index, rules::Symbol::Index>,
|
||||
|
|
@ -69,7 +56,8 @@ class LexTableBuilder {
|
|||
|
||||
BuildResult build();
|
||||
|
||||
ConflictStatus get_conflict_status(rules::Symbol, rules::Symbol) const;
|
||||
bool does_token_shadow_other(rules::Symbol, rules::Symbol) const;
|
||||
bool does_token_match_same_string_as_other(rules::Symbol, rules::Symbol) const;
|
||||
|
||||
protected:
|
||||
LexTableBuilder() = default;
|
||||
|
|
|
|||
|
|
@ -134,11 +134,6 @@ class ParseTableBuilderImpl : public ParseTableBuilder {
|
|||
}
|
||||
|
||||
void build_error_parse_state(ParseStateId state_id) {
|
||||
unsigned CannotMerge = (
|
||||
MatchesShorterStringWithinSeparators |
|
||||
MatchesLongerStringWithValidNextChar
|
||||
);
|
||||
|
||||
parse_table.states[state_id].terminal_entries.clear();
|
||||
|
||||
// First, identify the conflict-free tokens.
|
||||
|
|
@ -149,7 +144,7 @@ class ParseTableBuilderImpl : public ParseTableBuilder {
|
|||
for (unsigned j = 0; j < lexical_grammar.variables.size(); j++) {
|
||||
Symbol other_token = Symbol::terminal(j);
|
||||
if (!coincident_token_index.contains(token, other_token) &&
|
||||
(lex_table_builder->get_conflict_status(other_token, token) & CannotMerge)) {
|
||||
lex_table_builder->does_token_shadow_other(token, other_token)) {
|
||||
conflicts_with_other_tokens = true;
|
||||
break;
|
||||
}
|
||||
|
|
@ -171,7 +166,7 @@ class ParseTableBuilderImpl : public ParseTableBuilder {
|
|||
bool conflicts_with_other_tokens = false;
|
||||
conflict_free_tokens.for_each([&](Symbol other_token) {
|
||||
if (!coincident_token_index.contains(token, other_token) &&
|
||||
(lex_table_builder->get_conflict_status(other_token, token) & CannotMerge)) {
|
||||
lex_table_builder->does_token_shadow_other(token, other_token)) {
|
||||
LOG(
|
||||
"exclude %s: conflicts with %s",
|
||||
symbol_name(token).c_str(),
|
||||
|
|
@ -517,7 +512,8 @@ class ParseTableBuilderImpl : public ParseTableBuilder {
|
|||
// Do not add a token if it conflicts with an existing token.
|
||||
if (!new_token.is_built_in()) {
|
||||
for (const auto &entry : state.terminal_entries) {
|
||||
if (lex_table_builder->get_conflict_status(entry.first, new_token) & CannotDistinguish) {
|
||||
if (lex_table_builder->does_token_shadow_other(new_token, entry.first) ||
|
||||
lex_table_builder->does_token_match_same_string_as_other(new_token, entry.first)) {
|
||||
LOG_IF(
|
||||
logged_conflict_tokens.insert({entry.first, new_token}).second,
|
||||
"cannot merge parse states due to token conflict: %s and %s",
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue