Be less conservative in merging parse states with external tokens
Also, clean up the internal representation of external tokens
This commit is contained in:
parent
fe29173d5f
commit
b7d0606fbd
7 changed files with 42 additions and 51 deletions
|
|
@ -478,28 +478,20 @@ class ParseTableBuilderImpl : public ParseTableBuilder {
|
|||
if (entry.actions.back().type != ParseActionTypeReduce) return false;
|
||||
if (!has_actions(state, entry)) return false;
|
||||
|
||||
// Do not add external tokens; they could conflict lexically with any
|
||||
// of the state's existing lookahead tokens.
|
||||
// Do not add external tokens; they could conflict lexically with any of the state's
|
||||
// existing lookahead tokens.
|
||||
if (new_token.is_external()) return false;
|
||||
|
||||
// Do not add tokens which are both internal and external. Their validity could
|
||||
// influence the behavior of the external scanner.
|
||||
for (const ExternalToken &external_token : grammar.external_tokens) {
|
||||
if (external_token.corresponding_internal_token == new_token) return false;
|
||||
}
|
||||
|
||||
// Do not add a token if it conflicts with an existing token.
|
||||
if (!new_token.is_built_in()) {
|
||||
const auto &incompatible_tokens = lex_table_builder->get_incompatible_tokens(new_token.index);
|
||||
if (!incompatible_tokens.empty()) {
|
||||
for (const auto &pair : state.terminal_entries) {
|
||||
const Symbol &existing_token = pair.first;
|
||||
|
||||
// Do not add a token if it conflicts with any token in the follow set
|
||||
// of an existing external token.
|
||||
if (existing_token.is_external()) {
|
||||
const LookaheadSet &following_tokens = following_tokens_by_token[existing_token];
|
||||
for (auto &incompatible_token : incompatible_tokens) {
|
||||
if (following_tokens.contains(incompatible_token)) return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Do not add a token if it conflicts with an existing token.
|
||||
if (incompatible_tokens.count(existing_token)) return false;
|
||||
}
|
||||
for (Symbol incompatible_token : lex_table_builder->get_incompatible_tokens(new_token.index)) {
|
||||
if (state.terminal_entries.count(incompatible_token)) return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ struct InputGrammar {
|
|||
std::vector<Variable> variables;
|
||||
std::vector<rules::Rule> extra_tokens;
|
||||
std::vector<std::unordered_set<rules::NamedSymbol>> expected_conflicts;
|
||||
std::vector<Variable> external_tokens;
|
||||
std::vector<rules::Rule> external_tokens;
|
||||
std::unordered_set<rules::NamedSymbol> variables_to_inline;
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -354,15 +354,7 @@ ParseGrammarResult parse_grammar(const string &input) {
|
|||
error_message = "Invalid external token: " + result.error_message;
|
||||
goto error;
|
||||
}
|
||||
|
||||
grammar.external_tokens.push_back(result.rule.match(
|
||||
[](rules::NamedSymbol named_symbol) {
|
||||
return Variable{named_symbol.value, VariableTypeNamed, named_symbol};
|
||||
},
|
||||
[](auto rule) {
|
||||
return Variable{"", VariableTypeAnonymous, rule};
|
||||
}
|
||||
));
|
||||
grammar.external_tokens.push_back(result.rule);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -179,10 +179,10 @@ tuple<InitialSyntaxGrammar, LexicalGrammar, CompileError> extract_tokens(
|
|||
|
||||
vector<Variable> processed_external_tokens;
|
||||
for (const auto &external_token : grammar.external_tokens) {
|
||||
processed_external_tokens.push_back({
|
||||
processed_external_tokens.push_back(Variable{
|
||||
external_token.name,
|
||||
external_token.type,
|
||||
extractor.apply(external_token.rule)
|
||||
extractor.apply(external_token.rule),
|
||||
});
|
||||
}
|
||||
|
||||
|
|
@ -312,13 +312,13 @@ tuple<InitialSyntaxGrammar, LexicalGrammar, CompileError> extract_tokens(
|
|||
syntax_grammar.external_tokens.push_back(ExternalToken{
|
||||
external_token.name,
|
||||
external_token.type,
|
||||
rules::NONE()
|
||||
rules::NONE(),
|
||||
});
|
||||
} else {
|
||||
syntax_grammar.external_tokens.push_back(ExternalToken{
|
||||
lexical_grammar.variables[symbol.index].name,
|
||||
external_token.type,
|
||||
symbol
|
||||
symbol,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -72,7 +72,7 @@ class SymbolInterner {
|
|||
}
|
||||
|
||||
for (size_t i = 0; i < grammar.external_tokens.size(); i++) {
|
||||
if (grammar.external_tokens[i].name == named_symbol.value) {
|
||||
if (grammar.external_tokens[i] == named_symbol) {
|
||||
return Symbol::external(i);
|
||||
}
|
||||
}
|
||||
|
|
@ -96,16 +96,30 @@ pair<InternedGrammar, CompileError> intern_symbols(const InputGrammar &grammar)
|
|||
|
||||
SymbolInterner interner(grammar);
|
||||
|
||||
for (auto &external_token : grammar.external_tokens) {
|
||||
auto new_rule = interner.apply(external_token.rule);
|
||||
for (const Rule &external_token : grammar.external_tokens) {
|
||||
string external_token_name;
|
||||
VariableType external_token_type = VariableTypeAnonymous;
|
||||
external_token.match(
|
||||
[&](rules::NamedSymbol named_symbol) {
|
||||
external_token_name = named_symbol.value;
|
||||
if (external_token_name[0] == '_') {
|
||||
external_token_type = VariableTypeHidden;
|
||||
} else {
|
||||
external_token_type =VariableTypeNamed;
|
||||
}
|
||||
},
|
||||
[](auto rule) {}
|
||||
);
|
||||
|
||||
auto new_rule = interner.apply(external_token);
|
||||
if (!interner.missing_rule_name.empty()) {
|
||||
return { result, missing_rule_error(interner.missing_rule_name) };
|
||||
}
|
||||
|
||||
result.external_tokens.push_back(Variable{
|
||||
external_token.name,
|
||||
external_token.name[0] == '_' ? VariableTypeHidden : external_token.type,
|
||||
new_rule
|
||||
external_token_name,
|
||||
external_token_type,
|
||||
new_rule,
|
||||
});
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ struct InternedGrammar {
|
|||
std::vector<rules::Rule> extra_tokens;
|
||||
std::set<std::set<rules::Symbol>> expected_conflicts;
|
||||
std::vector<Variable> external_tokens;
|
||||
std::set<rules::Symbol> blank_external_tokens;
|
||||
std::set<rules::Symbol> variables_to_inline;
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -75,16 +75,8 @@ describe("intern_symbols", []() {
|
|||
{},
|
||||
{},
|
||||
{
|
||||
Variable{
|
||||
"w",
|
||||
VariableTypeNamed,
|
||||
NamedSymbol{"w"}
|
||||
},
|
||||
Variable{
|
||||
"z",
|
||||
VariableTypeNamed,
|
||||
NamedSymbol{"z"}
|
||||
},
|
||||
NamedSymbol{"w"},
|
||||
NamedSymbol{"z"},
|
||||
},
|
||||
{}
|
||||
};
|
||||
|
|
@ -95,12 +87,12 @@ describe("intern_symbols", []() {
|
|||
Variable{
|
||||
"w",
|
||||
VariableTypeNamed,
|
||||
Symbol::external(0)
|
||||
Symbol::external(0),
|
||||
},
|
||||
Variable{
|
||||
"z",
|
||||
VariableTypeNamed,
|
||||
Symbol::non_terminal(2)
|
||||
Symbol::non_terminal(2),
|
||||
},
|
||||
}))
|
||||
});
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue