Fix unit reduction elimination bugs
* Handle 'chains' of unit reductions starting in a single state * Avoid eliminating rules which will later receive aliases
This commit is contained in:
parent
72849787b1
commit
7183f8d3e7
4 changed files with 112 additions and 35 deletions
|
|
@ -382,6 +382,16 @@ class ParseTableBuilderImpl : public ParseTableBuilder {
|
|||
}
|
||||
|
||||
void eliminate_unit_reductions() {
|
||||
set<Symbol::Index> aliased_symbols;
|
||||
for (auto &variable : grammar.variables) {
|
||||
for (auto &production : variable.productions) {
|
||||
for (auto &step : production) {
|
||||
if (!step.alias.value.empty()) {
|
||||
aliased_symbols.insert(step.symbol.index);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Find all the "unit reduction states" - states whose only actions are unit reductions,
|
||||
// all of which reduce by the same symbol. Store the symbols along with the state indices.
|
||||
|
|
@ -391,12 +401,15 @@ class ParseTableBuilderImpl : public ParseTableBuilder {
|
|||
bool only_unit_reductions = true;
|
||||
Symbol::Index unit_reduction_symbol = -1;
|
||||
|
||||
if (!state.nonterminal_entries.empty()) continue;
|
||||
|
||||
for (auto &entry : state.terminal_entries) {
|
||||
for (ParseAction &action : entry.second.actions) {
|
||||
if (action.extra) continue;
|
||||
if (action.type == ParseActionTypeReduce &&
|
||||
action.consumed_symbol_count == 1 &&
|
||||
action.alias_sequence_id == 0 &&
|
||||
!aliased_symbols.count(action.symbol.index) &&
|
||||
grammar.variables[action.symbol.index].type != VariableTypeNamed &&
|
||||
(unit_reduction_symbol == -1 || unit_reduction_symbol == action.symbol.index)
|
||||
) {
|
||||
|
|
@ -414,44 +427,21 @@ class ParseTableBuilderImpl : public ParseTableBuilder {
|
|||
}
|
||||
|
||||
// Update each parse state so that the parser never enters these "unit reduction states".
|
||||
// If a shift action points to a unit reduction state, update it to point directly at
|
||||
// the same state as the shift action that's associated with the unit reduction's
|
||||
// non-terminal.
|
||||
for (ParseState &state : parse_table.states) {
|
||||
|
||||
// Update all of the shift actions associated with terminals. If a shift action
|
||||
// points to a unit reduction state, update it to point directly at the same state
|
||||
// as the shift action that's associated with the unit reduction state's non-terminal.
|
||||
for (auto entry = state.nonterminal_entries.begin();
|
||||
entry != state.nonterminal_entries.end();) {
|
||||
const auto &unit_reduction_entry = unit_reduction_states.find(entry->second);
|
||||
if (unit_reduction_entry != unit_reduction_states.end() &&
|
||||
unit_reduction_entry->first == entry->second) {
|
||||
auto entry_for_reduced_symbol = state.nonterminal_entries.find(unit_reduction_entry->second);
|
||||
if (entry_for_reduced_symbol != state.nonterminal_entries.end()) {
|
||||
entry->second = entry_for_reduced_symbol->second;
|
||||
} else {
|
||||
entry = state.nonterminal_entries.erase(entry);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
++entry;
|
||||
}
|
||||
|
||||
// Update all of the shift actions associated with non-terminals in the same way.
|
||||
for (auto entry = state.terminal_entries.begin(); entry != state.terminal_entries.end();) {
|
||||
auto &last_action = entry->second.actions.back();
|
||||
if (last_action.type == ParseActionTypeShift) {
|
||||
const auto &unit_reduction_entry = unit_reduction_states.find(last_action.state_index);
|
||||
if (unit_reduction_entry != unit_reduction_states.end() &&
|
||||
unit_reduction_entry->first == last_action.state_index) {
|
||||
bool done = false;
|
||||
while (!done) {
|
||||
done = true;
|
||||
state.each_referenced_state([&](ParseStateId *state_id) {
|
||||
const auto &unit_reduction_entry = unit_reduction_states.find(*state_id);
|
||||
if (unit_reduction_entry != unit_reduction_states.end()) {
|
||||
auto entry_for_reduced_symbol = state.nonterminal_entries.find(unit_reduction_entry->second);
|
||||
if (entry_for_reduced_symbol != state.nonterminal_entries.end()) {
|
||||
last_action.state_index = entry_for_reduced_symbol->second;
|
||||
} else {
|
||||
entry = state.terminal_entries.erase(entry);
|
||||
continue;
|
||||
}
|
||||
*state_id = entry_for_reduced_symbol->second;
|
||||
done = false;
|
||||
}
|
||||
}
|
||||
++entry;
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
12
test/fixtures/test_grammars/aliased_unit_reductions/corpus.txt
vendored
Normal file
12
test/fixtures/test_grammars/aliased_unit_reductions/corpus.txt
vendored
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
==========================================
|
||||
Aliases on rules that are unit reductions
|
||||
==========================================
|
||||
|
||||
one two three;
|
||||
|
||||
---
|
||||
|
||||
(statement
|
||||
(identifier)
|
||||
(b_prime (identifier))
|
||||
(c_prime (identifier)))
|
||||
70
test/fixtures/test_grammars/aliased_unit_reductions/grammar.json
vendored
Normal file
70
test/fixtures/test_grammars/aliased_unit_reductions/grammar.json
vendored
Normal file
|
|
@ -0,0 +1,70 @@
|
|||
{
|
||||
"name": "aliased_unit_reductions",
|
||||
|
||||
"extras": [
|
||||
{"type": "PATTERN", "value": "\\s"}
|
||||
],
|
||||
|
||||
"rules": {
|
||||
"statement": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "_a"},
|
||||
{
|
||||
"type": "ALIAS",
|
||||
"named": true,
|
||||
"value": "b_prime",
|
||||
"content": {
|
||||
"type": "SYMBOL",
|
||||
"name": "_b"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "ALIAS",
|
||||
"named": true,
|
||||
"value": "c_prime",
|
||||
"content": {
|
||||
"type": "SYMBOL",
|
||||
"name": "_c"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": ";"
|
||||
}
|
||||
]
|
||||
},
|
||||
|
||||
"_a": {
|
||||
"type": "SYMBOL",
|
||||
"name": "_A"
|
||||
},
|
||||
|
||||
"_A": {
|
||||
"type": "SYMBOL",
|
||||
"name": "identifier"
|
||||
},
|
||||
|
||||
"_b": {
|
||||
"type": "SYMBOL",
|
||||
"name": "_B"
|
||||
},
|
||||
|
||||
"_B": {
|
||||
"type": "SYMBOL",
|
||||
"name": "identifier"
|
||||
},
|
||||
|
||||
"_c": {
|
||||
"type": "SYMBOL",
|
||||
"name": "_B"
|
||||
},
|
||||
|
||||
"_C": {
|
||||
"type": "SYMBOL",
|
||||
"name": "identifier"
|
||||
},
|
||||
|
||||
"identifier": {"type": "PATTERN", "value": "[a-z]+"}
|
||||
}
|
||||
}
|
||||
5
test/fixtures/test_grammars/aliased_unit_reductions/readme.md
vendored
Normal file
5
test/fixtures/test_grammars/aliased_unit_reductions/readme.md
vendored
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
Normally, when there are invisible rules (rules whose names start with an `_`) that simply
|
||||
wrap another rule, there is an optimization at parser-generation time called *Unit Reduction Elimination* that avoids creating nodes for those rules at runtime. One case where this
|
||||
optimization must *not* be applied is when those invisible rules are going to be aliased
|
||||
within their parent rule. In that situation, eliminating the invisible node could cause
|
||||
the alias to be incorrectly applied to its child.
|
||||
Loading…
Add table
Add a link
Reference in a new issue