From 06215607d13773fdb4762012ca566b05543b9ba2 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Sat, 19 Nov 2016 22:35:23 -0800 Subject: [PATCH] Precompute transitive closure contributions by grammar symbol --- spec/helpers/stream_methods.cc | 6 +- .../build_tables/parse_item_set_builder.cc | 155 +++++++++++------- .../build_tables/parse_item_set_builder.h | 4 +- 3 files changed, 105 insertions(+), 60 deletions(-) diff --git a/spec/helpers/stream_methods.cc b/spec/helpers/stream_methods.cc index cf35512d..4d411d66 100644 --- a/spec/helpers/stream_methods.cc +++ b/spec/helpers/stream_methods.cc @@ -132,7 +132,11 @@ std::ostream &operator<<(std::ostream &stream, const ParseItemSet &item_set) { } std::ostream &operator<<(std::ostream &stream, const LookaheadSet &set) { - return stream << *set.entries; + if (set.entries.get()) { + return stream << *set.entries; + } else { + return stream << "{}"; + } } } // namespace build_tables diff --git a/src/compiler/build_tables/parse_item_set_builder.cc b/src/compiler/build_tables/parse_item_set_builder.cc index 45406f88..24408e6d 100644 --- a/src/compiler/build_tables/parse_item_set_builder.cc +++ b/src/compiler/build_tables/parse_item_set_builder.cc @@ -13,6 +13,7 @@ using std::vector; using std::set; using std::map; using std::get; +using std::pair; using std::tuple; using std::make_tuple; using std::shared_ptr; @@ -20,28 +21,36 @@ using std::make_shared; using rules::Symbol; using rules::NONE; -ParseItemSetBuilder::ParseItemSetBuilder(const SyntaxGrammar &grammar, - const LexicalGrammar &lexical_grammar) : - grammar{&grammar} { - vector symbol_stack; - set processed_symbols; +static Symbol::Index PROPAGATE = -5; - for (size_t i = 0; i < grammar.variables.size(); i++) { +ParseItemSetBuilder::ParseItemSetBuilder(const SyntaxGrammar &grammar, + const LexicalGrammar &lexical_grammar) { + vector> items_to_process; + vector symbols_to_process; + set processed_non_terminals; + + for (size_t i = 0, n = lexical_grammar.variables.size(); i < n; i++) { + Symbol symbol(i, true); + first_sets.insert({symbol, LookaheadSet({ static_cast(i) })}); + } + + for (size_t i = 0, n = grammar.variables.size(); i < n; i++) { Symbol symbol(i); LookaheadSet first_set; - processed_symbols.clear(); - symbol_stack.clear(); - symbol_stack.push_back(symbol); - while (!symbol_stack.empty()) { - Symbol current_symbol = symbol_stack.back(); - symbol_stack.pop_back(); + processed_non_terminals.clear(); + symbols_to_process.clear(); + symbols_to_process.push_back(symbol); + while (!symbols_to_process.empty()) { + Symbol current_symbol = symbols_to_process.back(); + symbols_to_process.pop_back(); + if (current_symbol.is_token) { first_set.insert(current_symbol.index); - } else if (processed_symbols.insert(current_symbol).second) { + } else if (processed_non_terminals.insert(current_symbol.index).second) { for (const Production &production : grammar.productions(current_symbol)) { if (!production.empty()) { - symbol_stack.push_back(production[0].symbol); + symbols_to_process.push_back(production[0].symbol); } } } @@ -50,55 +59,87 @@ ParseItemSetBuilder::ParseItemSetBuilder(const SyntaxGrammar &grammar, first_sets.insert({symbol, first_set}); } - for (size_t i = 0; i < lexical_grammar.variables.size(); i++) { - Symbol symbol(i, true); - first_sets.insert({symbol, LookaheadSet({ static_cast(i) })}); + for (size_t i = 0, n = grammar.variables.size(); i < n; i++) { + Symbol symbol(i); + ParseItemSet item_set; + + items_to_process.clear(); + for (const Production &production : grammar.productions(symbol)) { + items_to_process.push_back({ + ParseItem(symbol, production, 0), + LookaheadSet({ PROPAGATE }), + }); + } + + while (!items_to_process.empty()) { + ParseItem item = items_to_process.back().first; + LookaheadSet lookaheads = items_to_process.back().second; + items_to_process.pop_back(); + + if (item_set.entries[item].insert_all(lookaheads)) { + Symbol next_symbol = item.next_symbol(); + if (next_symbol.is_built_in() || next_symbol.is_token) + continue; + + LookaheadSet next_lookaheads; + size_t next_step = item.step_index + 1; + if (next_step == item.production->size()) { + next_lookaheads = lookaheads; + } else { + Symbol symbol_after_next = item.production->at(next_step).symbol; + next_lookaheads = first_sets.find(symbol_after_next)->second; + } + + for (const Production &production : grammar.productions(next_symbol)) { + items_to_process.push_back({ + ParseItem(next_symbol, production, 0), + next_lookaheads, + }); + } + } + } + + cached_item_sets.insert({symbol.index, item_set}); } } void ParseItemSetBuilder::apply_transitive_closure(ParseItemSet *item_set) { - items_to_process.clear(); - for (const auto &entry : item_set->entries) { - items_to_process.push_back(make_tuple(entry.first, entry.second, true)); + item_set_buffer.clear(); + for (const auto &pair : item_set->entries) { + const ParseItem &item = pair.first; + const LookaheadSet &lookaheads = pair.second; + + const Symbol &next_symbol = item.next_symbol(); + if (!next_symbol.is_token && !next_symbol.is_built_in()) { + LookaheadSet next_lookaheads; + size_t next_step = item.step_index + 1; + if (next_step == item.production->size()) { + next_lookaheads = lookaheads; + } else { + Symbol symbol_after_next = item.production->at(next_step).symbol; + next_lookaheads = first_sets.find(symbol_after_next)->second; + } + + for (const auto &cached_pair : cached_item_sets[next_symbol.index].entries) { + const ParseItem &cached_item = cached_pair.first; + const LookaheadSet &cached_lookaheads = cached_pair.second; + + LookaheadSet new_lookaheads; + for (auto entry : *cached_lookaheads.entries) { + if (entry == PROPAGATE) { + new_lookaheads.insert_all(next_lookaheads); + } else { + new_lookaheads.insert(entry); + } + } + + item_set_buffer.push_back({cached_item, new_lookaheads}); + } + } } - while (!items_to_process.empty()) { - ParseItem item = get<0>(items_to_process.back()); - LookaheadSet lookahead_symbols = get<1>(items_to_process.back()); - bool from_original_set = get<2>(items_to_process.back()); - items_to_process.pop_back(); - - // Add the parse-item and lookahead symbols to the item set. - // If they were already present, skip to the next item. - if (!from_original_set && !item_set->entries[item].insert_all(lookahead_symbols)) - continue; - - // If the next symbol in the production is not a non-terminal, skip to the - // next item. - Symbol next_symbol = item.next_symbol(); - if (next_symbol == NONE() || next_symbol.is_token || - next_symbol.is_built_in()) - continue; - - // If the next symbol is the last symbol in the item's production, then the - // lookahead symbols for the new items are the same as for the current item. - // Otherwise, they are the FOLLOW set of the symbol in this production. - LookaheadSet next_lookahead_symbols; - size_t next_step = item.step_index + 1; - if (next_step == item.production->size()) { - next_lookahead_symbols = lookahead_symbols; - } else { - Symbol symbol_after_next = item.production->at(next_step).symbol; - next_lookahead_symbols = first_sets.find(symbol_after_next)->second; - } - - // Add each of the next symbol's productions to be processed recursively. - for (const Production &production : grammar->productions(next_symbol)) - items_to_process.push_back(make_tuple( - ParseItem(next_symbol, production, 0), - next_lookahead_symbols, - false - )); + for (const auto &pair : item_set_buffer) { + item_set->entries[pair.first].insert_all(pair.second); } } diff --git a/src/compiler/build_tables/parse_item_set_builder.h b/src/compiler/build_tables/parse_item_set_builder.h index db3ca930..6279798a 100644 --- a/src/compiler/build_tables/parse_item_set_builder.h +++ b/src/compiler/build_tables/parse_item_set_builder.h @@ -13,9 +13,9 @@ struct LexicalGrammar; namespace build_tables { class ParseItemSetBuilder { - const SyntaxGrammar *grammar; std::map first_sets; - std::vector> items_to_process; + std::map cached_item_sets; + std::vector> item_set_buffer; public: ParseItemSetBuilder(const SyntaxGrammar &, const LexicalGrammar &);