From 128edbebd6210a20814e92fc2abdf50862e545e6 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 8 Mar 2018 12:53:28 -0800 Subject: [PATCH] Eliminate non-user-visible unit reductions from parse tables --- .../build_tables/parse_table_builder.cc | 138 +++++++++++++++--- src/compiler/parse_table.cc | 1 - 2 files changed, 117 insertions(+), 22 deletions(-) diff --git a/src/compiler/build_tables/parse_table_builder.cc b/src/compiler/build_tables/parse_table_builder.cc index 9e581512..caa24d44 100644 --- a/src/compiler/build_tables/parse_table_builder.cc +++ b/src/compiler/build_tables/parse_table_builder.cc @@ -110,6 +110,8 @@ class ParseTableBuilderImpl : public ParseTableBuilder { build_error_parse_state(error_state_id); remove_precedence_values(); remove_duplicate_parse_states(); + eliminate_unit_reductions(); + populate_used_terminals(); auto lex_table_result = lex_table_builder->build(&parse_table); return { @@ -222,7 +224,7 @@ class ParseTableBuilderImpl : public ParseTableBuilder { // Only add the highest-precedence Reduce actions to the parse table. // If other lower-precedence actions are possible, ignore them. if (entry.actions.empty()) { - parse_table.add_terminal_action(state_id, lookahead, action); + entry.actions.push_back(action); } else { ParseAction &existing_action = entry.actions[0]; if (existing_action.type == ParseActionTypeAccept) { @@ -376,30 +378,96 @@ class ParseTableBuilderImpl : public ParseTableBuilder { } } - vector new_state_ids(parse_table.states.size()); - size_t deleted_state_count = 0; - auto deleted_state_iter = deleted_states.begin(); - for (ParseStateId i = 0; i < new_state_ids.size(); i++) { - while (deleted_state_iter != deleted_states.end() && *deleted_state_iter < i) { - deleted_state_count++; - deleted_state_iter++; + delete_parse_states(deleted_states); + } + + void eliminate_unit_reductions() { + + // Find all the "unit reduction states" - states whose only actions are unit reductions, + // all of which reduce by the same symbol. Store the symbols along with the state indices. + unordered_map unit_reduction_states; + for (ParseStateId i = 0, n = parse_table.states.size(); i < n; i++) { + ParseState &state = parse_table.states[i]; + bool only_unit_reductions = true; + Symbol::Index unit_reduction_symbol = -1; + + for (auto &entry : state.terminal_entries) { + for (ParseAction &action : entry.second.actions) { + if (action.extra) continue; + if (action.type == ParseActionTypeReduce && + action.consumed_symbol_count == 1 && + action.alias_sequence_id == 0 && + grammar.variables[action.symbol.index].type != VariableTypeNamed && + (unit_reduction_symbol == -1 || unit_reduction_symbol == action.symbol.index) + ) { + unit_reduction_symbol = action.symbol.index; + } else { + only_unit_reductions = false; + break; + } + } + + if (!only_unit_reductions) break; } - new_state_ids[i] = i - deleted_state_count; + + if (only_unit_reductions) unit_reduction_states[i] = unit_reduction_symbol; } - ParseStateId original_state_index = 0; - auto iter = parse_table.states.begin(); - while (iter != parse_table.states.end()) { - if (deleted_states.count(original_state_index)) { - iter = parse_table.states.erase(iter); - } else { - ParseState &state = *iter; - state.each_referenced_state([&new_state_ids](ParseStateId *state_index) { - *state_index = new_state_ids[*state_index]; - }); - ++iter; + // Update each parse state so that the parser never enters these "unit reduction states". + for (ParseState &state : parse_table.states) { + + // Update all of the shift actions associated with terminals. If a shift action + // points to a unit reduction state, update it to point directly at the same state + // as the shift action that's associated with the unit reduction state's non-terminal. + for (auto entry = state.nonterminal_entries.begin(); + entry != state.nonterminal_entries.end();) { + const auto &unit_reduction_entry = unit_reduction_states.find(entry->second); + if (unit_reduction_entry != unit_reduction_states.end() && + unit_reduction_entry->first == entry->second) { + auto entry_for_reduced_symbol = state.nonterminal_entries.find(unit_reduction_entry->second); + if (entry_for_reduced_symbol != state.nonterminal_entries.end()) { + entry->second = entry_for_reduced_symbol->second; + } else { + entry = state.nonterminal_entries.erase(entry); + continue; + } + } + ++entry; + } + + // Update all of the shift actions associated with non-terminals in the same way. + for (auto entry = state.terminal_entries.begin(); entry != state.terminal_entries.end();) { + auto &last_action = entry->second.actions.back(); + if (last_action.type == ParseActionTypeShift) { + const auto &unit_reduction_entry = unit_reduction_states.find(last_action.state_index); + if (unit_reduction_entry != unit_reduction_states.end() && + unit_reduction_entry->first == last_action.state_index) { + auto entry_for_reduced_symbol = state.nonterminal_entries.find(unit_reduction_entry->second); + if (entry_for_reduced_symbol != state.nonterminal_entries.end()) { + last_action.state_index = entry_for_reduced_symbol->second; + } else { + entry = state.terminal_entries.erase(entry); + continue; + } + } + } + ++entry; + } + } + + // Remove the unit reduction states from the parse table. + set states_to_delete; + for (auto &entry : unit_reduction_states) { + if (entry.first != 1) states_to_delete.insert(entry.first); + } + delete_parse_states(states_to_delete); + } + + void populate_used_terminals() { + for (const ParseState &state : parse_table.states) { + for (auto &entry : state.terminal_entries) { + parse_table.symbols.insert(entry.first); } - original_state_index++; } } @@ -697,6 +765,34 @@ class ParseTableBuilderImpl : public ParseTableBuilder { return description; } + void delete_parse_states(const set deleted_states) { + vector new_state_ids(parse_table.states.size()); + size_t deleted_state_count = 0; + auto deleted_state_iter = deleted_states.begin(); + for (ParseStateId i = 0; i < new_state_ids.size(); i++) { + while (deleted_state_iter != deleted_states.end() && *deleted_state_iter < i) { + deleted_state_count++; + deleted_state_iter++; + } + new_state_ids[i] = i - deleted_state_count; + } + + ParseStateId original_state_index = 0; + auto iter = parse_table.states.begin(); + while (iter != parse_table.states.end()) { + if (deleted_states.count(original_state_index)) { + iter = parse_table.states.erase(iter); + } else { + ParseState &state = *iter; + state.each_referenced_state([&new_state_ids](ParseStateId *state_index) { + *state_index = new_state_ids[*state_index]; + }); + ++iter; + } + original_state_index++; + } + } + string symbol_name(const rules::Symbol &symbol) const { if (symbol.is_built_in()) { if (symbol == END_OF_INPUT()) diff --git a/src/compiler/parse_table.cc b/src/compiler/parse_table.cc index 0f11a8a4..0252e7ad 100644 --- a/src/compiler/parse_table.cc +++ b/src/compiler/parse_table.cc @@ -146,7 +146,6 @@ bool ParseState::operator==(const ParseState &other) const { ParseAction &ParseTable::add_terminal_action(ParseStateId state_id, Symbol lookahead, ParseAction action) { - symbols.insert(lookahead); ParseTableEntry &entry = states[state_id].terminal_entries[lookahead]; entry.actions.push_back(action); return *entry.actions.rbegin();