From a199b217f35b4e36f93c3a7f2522a5a2d83ba344 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 11 Jul 2017 12:54:29 -0700 Subject: [PATCH] Optimize ParseTableBuilder for non-terminals w/ many productions --- .../build_tables/parse_item_set_builder.cc | 131 +++++++++--------- .../build_tables/parse_item_set_builder.h | 1 - 2 files changed, 66 insertions(+), 66 deletions(-) diff --git a/src/compiler/build_tables/parse_item_set_builder.cc b/src/compiler/build_tables/parse_item_set_builder.cc index 77fde864..236be6f0 100644 --- a/src/compiler/build_tables/parse_item_set_builder.cc +++ b/src/compiler/build_tables/parse_item_set_builder.cc @@ -1,6 +1,7 @@ #include "compiler/build_tables/parse_item_set_builder.h" #include #include +#include #include #include #include "compiler/syntax_grammar.h" @@ -12,10 +13,10 @@ namespace build_tables { using std::vector; using std::set; -using std::map; using std::get; using std::pair; using std::tuple; +using std::unordered_map; using std::make_tuple; using rules::Symbol; using rules::NONE; @@ -79,58 +80,62 @@ ParseItemSetBuilder::ParseItemSetBuilder(const SyntaxGrammar &grammar, } } } + last_sets.insert({symbol, last_set}); } - vector components_to_process; + struct NonTerminalQueueEntry { + Symbol::Index non_terminal; + LookaheadSet lookaheads; + bool propagates_lookaheads; + }; + + vector non_terminal_queue_entry; for (size_t i = 0, n = grammar.variables.size(); i < n; i++) { Symbol symbol = Symbol::non_terminal(i); - map> cache_entry; - components_to_process.clear(); - for (const Production &production : grammar.variables[i].productions) { - components_to_process.push_back(ParseItemSetComponent{ - ParseItem(symbol, production, 0), - LookaheadSet(), - true - }); - } + unordered_map> cached_lookaheads_by_non_terminal; - while (!components_to_process.empty()) { - ParseItemSetComponent component = components_to_process.back(); - ParseItem &item = component.item; - LookaheadSet &lookaheads = component.lookaheads; - components_to_process.pop_back(); + non_terminal_queue_entry.clear(); + non_terminal_queue_entry.push_back({ + symbol.index, + LookaheadSet(), + true + }); - bool component_is_new; - if (component.propagates_lookaheads) { - component_is_new = !cache_entry[item].second; - cache_entry[item].second = true; + while (!non_terminal_queue_entry.empty()) { + NonTerminalQueueEntry queue_entry = non_terminal_queue_entry.back(); + non_terminal_queue_entry.pop_back(); + + bool queue_entry_is_new; + auto &cache_entry = cached_lookaheads_by_non_terminal[queue_entry.non_terminal]; + if (queue_entry.propagates_lookaheads) { + queue_entry_is_new = !cache_entry.second; + cache_entry.second = true; } else { - component_is_new = cache_entry[item].first.insert_all(lookaheads); + queue_entry_is_new = cache_entry.first.insert_all(queue_entry.lookaheads); } - if (component_is_new) { - Symbol next_symbol = item.next_symbol(); - if (!next_symbol.is_non_terminal() || next_symbol.is_built_in()) - continue; + if (queue_entry_is_new) { + for (const Production &production : grammar.variables[queue_entry.non_terminal].productions) { + if (production.empty()) continue; + Symbol next_symbol = production.at(0).symbol; + if (!next_symbol.is_non_terminal() || next_symbol.is_built_in()) continue; - LookaheadSet next_lookaheads; - bool propagates_lookaheads; - size_t next_step = item.step_index + 1; - if (next_step == item.production->size()) { - next_lookaheads = lookaheads; - propagates_lookaheads = component.propagates_lookaheads; - } else { - Symbol symbol_after_next = item.production->at(next_step).symbol; - next_lookaheads = first_sets.find(symbol_after_next)->second; - propagates_lookaheads = false; - } + LookaheadSet next_lookaheads; + bool propagates_lookaheads; + if (production.size() == 1) { + next_lookaheads = queue_entry.lookaheads; + propagates_lookaheads = queue_entry.propagates_lookaheads; + } else { + Symbol symbol_after_next = production.at(1).symbol; + next_lookaheads = first_sets.find(symbol_after_next)->second; + propagates_lookaheads = false; + } - for (const Production &production : grammar.variables[next_symbol.index].productions) { - components_to_process.push_back(ParseItemSetComponent{ - ParseItem(next_symbol, production, 0), + non_terminal_queue_entry.push_back({ + next_symbol.index, next_lookaheads, propagates_lookaheads }); @@ -138,45 +143,41 @@ ParseItemSetBuilder::ParseItemSetBuilder(const SyntaxGrammar &grammar, } } - for (auto &pair : cache_entry) { - component_cache[symbol.index].push_back(ParseItemSetComponent{ - pair.first, - pair.second.first, - pair.second.second - }); + for (auto &pair : cached_lookaheads_by_non_terminal) { + for (const Production &production : grammar.variables[pair.first].productions) { + component_cache[i].push_back({ + ParseItem(Symbol::non_terminal(pair.first), production, 0), + pair.second.first, + pair.second.second + }); + } } } } void ParseItemSetBuilder::apply_transitive_closure(ParseItemSet *item_set) { - item_set_buffer.clear(); - for (const auto &pair : item_set->entries) { const ParseItem &item = pair.first; const LookaheadSet &lookaheads = pair.second; + if (item.lhs() != rules::START() && item.step_index == 0) continue; const Symbol &next_symbol = item.next_symbol(); - if (next_symbol.is_non_terminal() && !next_symbol.is_built_in()) { - LookaheadSet next_lookaheads; - size_t next_step = item.step_index + 1; - if (next_step == item.production->size()) { - next_lookaheads = lookaheads; - } else { - Symbol symbol_after_next = item.production->at(next_step).symbol; - next_lookaheads = first_sets.find(symbol_after_next)->second; - } + if (!next_symbol.is_non_terminal() || next_symbol.is_built_in()) continue; - for (const ParseItemSetComponent &component : component_cache[next_symbol.index]) { - item_set_buffer.push_back({component.item, component.lookaheads}); - if (component.propagates_lookaheads) { - item_set_buffer.push_back({component.item, next_lookaheads}); - } - } + LookaheadSet next_lookaheads; + size_t next_step = item.step_index + 1; + if (next_step == item.production->size()) { + next_lookaheads = lookaheads; + } else { + Symbol symbol_after_next = item.production->at(next_step).symbol; + next_lookaheads = first_sets.find(symbol_after_next)->second; } - } - for (const auto &buffer_entry : item_set_buffer) { - item_set->entries[buffer_entry.first].insert_all(buffer_entry.second); + for (const ParseItemSetComponent &component : component_cache[next_symbol.index]) { + LookaheadSet ¤t_lookaheads = item_set->entries[component.item]; + current_lookaheads.insert_all(component.lookaheads); + if (component.propagates_lookaheads) current_lookaheads.insert_all(next_lookaheads); + } } } diff --git a/src/compiler/build_tables/parse_item_set_builder.h b/src/compiler/build_tables/parse_item_set_builder.h index b0334e68..5357a385 100644 --- a/src/compiler/build_tables/parse_item_set_builder.h +++ b/src/compiler/build_tables/parse_item_set_builder.h @@ -22,7 +22,6 @@ class ParseItemSetBuilder { std::map first_sets; std::map last_sets; std::map> component_cache; - std::vector> item_set_buffer; public: ParseItemSetBuilder(const SyntaxGrammar &, const LexicalGrammar &);