Eliminate non-user-visible unit reductions from parse tables

This commit is contained in:
Max Brunsfeld 2018-03-08 12:53:28 -08:00
parent f932d592d6
commit 128edbebd6
2 changed files with 117 additions and 22 deletions

View file

@ -110,6 +110,8 @@ class ParseTableBuilderImpl : public ParseTableBuilder {
build_error_parse_state(error_state_id);
remove_precedence_values();
remove_duplicate_parse_states();
eliminate_unit_reductions();
populate_used_terminals();
auto lex_table_result = lex_table_builder->build(&parse_table);
return {
@ -222,7 +224,7 @@ class ParseTableBuilderImpl : public ParseTableBuilder {
// Only add the highest-precedence Reduce actions to the parse table.
// If other lower-precedence actions are possible, ignore them.
if (entry.actions.empty()) {
parse_table.add_terminal_action(state_id, lookahead, action);
entry.actions.push_back(action);
} else {
ParseAction &existing_action = entry.actions[0];
if (existing_action.type == ParseActionTypeAccept) {
@ -376,30 +378,96 @@ class ParseTableBuilderImpl : public ParseTableBuilder {
}
}
vector<ParseStateId> new_state_ids(parse_table.states.size());
size_t deleted_state_count = 0;
auto deleted_state_iter = deleted_states.begin();
for (ParseStateId i = 0; i < new_state_ids.size(); i++) {
while (deleted_state_iter != deleted_states.end() && *deleted_state_iter < i) {
deleted_state_count++;
deleted_state_iter++;
delete_parse_states(deleted_states);
}
void eliminate_unit_reductions() {
// Find all the "unit reduction states" - states whose only actions are unit reductions,
// all of which reduce by the same symbol. Store the symbols along with the state indices.
unordered_map<ParseStateId, Symbol::Index> unit_reduction_states;
for (ParseStateId i = 0, n = parse_table.states.size(); i < n; i++) {
ParseState &state = parse_table.states[i];
bool only_unit_reductions = true;
Symbol::Index unit_reduction_symbol = -1;
for (auto &entry : state.terminal_entries) {
for (ParseAction &action : entry.second.actions) {
if (action.extra) continue;
if (action.type == ParseActionTypeReduce &&
action.consumed_symbol_count == 1 &&
action.alias_sequence_id == 0 &&
grammar.variables[action.symbol.index].type != VariableTypeNamed &&
(unit_reduction_symbol == -1 || unit_reduction_symbol == action.symbol.index)
) {
unit_reduction_symbol = action.symbol.index;
} else {
only_unit_reductions = false;
break;
}
}
if (!only_unit_reductions) break;
}
new_state_ids[i] = i - deleted_state_count;
if (only_unit_reductions) unit_reduction_states[i] = unit_reduction_symbol;
}
ParseStateId original_state_index = 0;
auto iter = parse_table.states.begin();
while (iter != parse_table.states.end()) {
if (deleted_states.count(original_state_index)) {
iter = parse_table.states.erase(iter);
} else {
ParseState &state = *iter;
state.each_referenced_state([&new_state_ids](ParseStateId *state_index) {
*state_index = new_state_ids[*state_index];
});
++iter;
// Update each parse state so that the parser never enters these "unit reduction states".
for (ParseState &state : parse_table.states) {
// Update all of the shift actions associated with terminals. If a shift action
// points to a unit reduction state, update it to point directly at the same state
// as the shift action that's associated with the unit reduction state's non-terminal.
for (auto entry = state.nonterminal_entries.begin();
entry != state.nonterminal_entries.end();) {
const auto &unit_reduction_entry = unit_reduction_states.find(entry->second);
if (unit_reduction_entry != unit_reduction_states.end() &&
unit_reduction_entry->first == entry->second) {
auto entry_for_reduced_symbol = state.nonterminal_entries.find(unit_reduction_entry->second);
if (entry_for_reduced_symbol != state.nonterminal_entries.end()) {
entry->second = entry_for_reduced_symbol->second;
} else {
entry = state.nonterminal_entries.erase(entry);
continue;
}
}
++entry;
}
// Update all of the shift actions associated with non-terminals in the same way.
for (auto entry = state.terminal_entries.begin(); entry != state.terminal_entries.end();) {
auto &last_action = entry->second.actions.back();
if (last_action.type == ParseActionTypeShift) {
const auto &unit_reduction_entry = unit_reduction_states.find(last_action.state_index);
if (unit_reduction_entry != unit_reduction_states.end() &&
unit_reduction_entry->first == last_action.state_index) {
auto entry_for_reduced_symbol = state.nonterminal_entries.find(unit_reduction_entry->second);
if (entry_for_reduced_symbol != state.nonterminal_entries.end()) {
last_action.state_index = entry_for_reduced_symbol->second;
} else {
entry = state.terminal_entries.erase(entry);
continue;
}
}
}
++entry;
}
}
// Remove the unit reduction states from the parse table.
set<ParseStateId> states_to_delete;
for (auto &entry : unit_reduction_states) {
if (entry.first != 1) states_to_delete.insert(entry.first);
}
delete_parse_states(states_to_delete);
}
void populate_used_terminals() {
for (const ParseState &state : parse_table.states) {
for (auto &entry : state.terminal_entries) {
parse_table.symbols.insert(entry.first);
}
original_state_index++;
}
}
@ -697,6 +765,34 @@ class ParseTableBuilderImpl : public ParseTableBuilder {
return description;
}
void delete_parse_states(const set<ParseStateId> deleted_states) {
vector<ParseStateId> new_state_ids(parse_table.states.size());
size_t deleted_state_count = 0;
auto deleted_state_iter = deleted_states.begin();
for (ParseStateId i = 0; i < new_state_ids.size(); i++) {
while (deleted_state_iter != deleted_states.end() && *deleted_state_iter < i) {
deleted_state_count++;
deleted_state_iter++;
}
new_state_ids[i] = i - deleted_state_count;
}
ParseStateId original_state_index = 0;
auto iter = parse_table.states.begin();
while (iter != parse_table.states.end()) {
if (deleted_states.count(original_state_index)) {
iter = parse_table.states.erase(iter);
} else {
ParseState &state = *iter;
state.each_referenced_state([&new_state_ids](ParseStateId *state_index) {
*state_index = new_state_ids[*state_index];
});
++iter;
}
original_state_index++;
}
}
string symbol_name(const rules::Symbol &symbol) const {
if (symbol.is_built_in()) {
if (symbol == END_OF_INPUT())

View file

@ -146,7 +146,6 @@ bool ParseState::operator==(const ParseState &other) const {
ParseAction &ParseTable::add_terminal_action(ParseStateId state_id,
Symbol lookahead,
ParseAction action) {
symbols.insert(lookahead);
ParseTableEntry &entry = states[state_id].terminal_entries[lookahead];
entry.actions.push_back(action);
return *entry.actions.rbegin();