Be less conservative in merging parse states with external tokens

Also, clean up the internal representation of external tokens
This commit is contained in:
Max Brunsfeld 2018-03-16 14:56:57 -07:00
parent fe29173d5f
commit b7d0606fbd
7 changed files with 42 additions and 51 deletions

View file

@ -478,28 +478,20 @@ class ParseTableBuilderImpl : public ParseTableBuilder {
if (entry.actions.back().type != ParseActionTypeReduce) return false;
if (!has_actions(state, entry)) return false;
// Do not add external tokens; they could conflict lexically with any
// of the state's existing lookahead tokens.
// Do not add external tokens; they could conflict lexically with any of the state's
// existing lookahead tokens.
if (new_token.is_external()) return false;
// Do not add tokens which are both internal and external. Their validity could
// influence the behavior of the external scanner.
for (const ExternalToken &external_token : grammar.external_tokens) {
if (external_token.corresponding_internal_token == new_token) return false;
}
// Do not add a token if it conflicts with an existing token.
if (!new_token.is_built_in()) {
const auto &incompatible_tokens = lex_table_builder->get_incompatible_tokens(new_token.index);
if (!incompatible_tokens.empty()) {
for (const auto &pair : state.terminal_entries) {
const Symbol &existing_token = pair.first;
// Do not add a token if it conflicts with any token in the follow set
// of an existing external token.
if (existing_token.is_external()) {
const LookaheadSet &following_tokens = following_tokens_by_token[existing_token];
for (auto &incompatible_token : incompatible_tokens) {
if (following_tokens.contains(incompatible_token)) return false;
}
}
// Do not add a token if it conflicts with an existing token.
if (incompatible_tokens.count(existing_token)) return false;
}
for (Symbol incompatible_token : lex_table_builder->get_incompatible_tokens(new_token.index)) {
if (state.terminal_entries.count(incompatible_token)) return false;
}
}

View file

@ -30,7 +30,7 @@ struct InputGrammar {
std::vector<Variable> variables;
std::vector<rules::Rule> extra_tokens;
std::vector<std::unordered_set<rules::NamedSymbol>> expected_conflicts;
std::vector<Variable> external_tokens;
std::vector<rules::Rule> external_tokens;
std::unordered_set<rules::NamedSymbol> variables_to_inline;
};

View file

@ -354,15 +354,7 @@ ParseGrammarResult parse_grammar(const string &input) {
error_message = "Invalid external token: " + result.error_message;
goto error;
}
grammar.external_tokens.push_back(result.rule.match(
[](rules::NamedSymbol named_symbol) {
return Variable{named_symbol.value, VariableTypeNamed, named_symbol};
},
[](auto rule) {
return Variable{"", VariableTypeAnonymous, rule};
}
));
grammar.external_tokens.push_back(result.rule);
}
}

View file

@ -179,10 +179,10 @@ tuple<InitialSyntaxGrammar, LexicalGrammar, CompileError> extract_tokens(
vector<Variable> processed_external_tokens;
for (const auto &external_token : grammar.external_tokens) {
processed_external_tokens.push_back({
processed_external_tokens.push_back(Variable{
external_token.name,
external_token.type,
extractor.apply(external_token.rule)
extractor.apply(external_token.rule),
});
}
@ -312,13 +312,13 @@ tuple<InitialSyntaxGrammar, LexicalGrammar, CompileError> extract_tokens(
syntax_grammar.external_tokens.push_back(ExternalToken{
external_token.name,
external_token.type,
rules::NONE()
rules::NONE(),
});
} else {
syntax_grammar.external_tokens.push_back(ExternalToken{
lexical_grammar.variables[symbol.index].name,
external_token.type,
symbol
symbol,
});
}
}

View file

@ -72,7 +72,7 @@ class SymbolInterner {
}
for (size_t i = 0; i < grammar.external_tokens.size(); i++) {
if (grammar.external_tokens[i].name == named_symbol.value) {
if (grammar.external_tokens[i] == named_symbol) {
return Symbol::external(i);
}
}
@ -96,16 +96,30 @@ pair<InternedGrammar, CompileError> intern_symbols(const InputGrammar &grammar)
SymbolInterner interner(grammar);
for (auto &external_token : grammar.external_tokens) {
auto new_rule = interner.apply(external_token.rule);
for (const Rule &external_token : grammar.external_tokens) {
string external_token_name;
VariableType external_token_type = VariableTypeAnonymous;
external_token.match(
[&](rules::NamedSymbol named_symbol) {
external_token_name = named_symbol.value;
if (external_token_name[0] == '_') {
external_token_type = VariableTypeHidden;
} else {
external_token_type =VariableTypeNamed;
}
},
[](auto rule) {}
);
auto new_rule = interner.apply(external_token);
if (!interner.missing_rule_name.empty()) {
return { result, missing_rule_error(interner.missing_rule_name) };
}
result.external_tokens.push_back(Variable{
external_token.name,
external_token.name[0] == '_' ? VariableTypeHidden : external_token.type,
new_rule
external_token_name,
external_token_type,
new_rule,
});
}

View file

@ -15,6 +15,7 @@ struct InternedGrammar {
std::vector<rules::Rule> extra_tokens;
std::set<std::set<rules::Symbol>> expected_conflicts;
std::vector<Variable> external_tokens;
std::set<rules::Symbol> blank_external_tokens;
std::set<rules::Symbol> variables_to_inline;
};

View file

@ -75,16 +75,8 @@ describe("intern_symbols", []() {
{},
{},
{
Variable{
"w",
VariableTypeNamed,
NamedSymbol{"w"}
},
Variable{
"z",
VariableTypeNamed,
NamedSymbol{"z"}
},
NamedSymbol{"w"},
NamedSymbol{"z"},
},
{}
};
@ -95,12 +87,12 @@ describe("intern_symbols", []() {
Variable{
"w",
VariableTypeNamed,
Symbol::external(0)
Symbol::external(0),
},
Variable{
"z",
VariableTypeNamed,
Symbol::non_terminal(2)
Symbol::non_terminal(2),
},
}))
});