diff --git a/spec/helpers/stream_methods.cc b/spec/helpers/stream_methods.cc index cf35512d..4d411d66 100644 --- a/spec/helpers/stream_methods.cc +++ b/spec/helpers/stream_methods.cc @@ -132,7 +132,11 @@ std::ostream &operator<<(std::ostream &stream, const ParseItemSet &item_set) { } std::ostream &operator<<(std::ostream &stream, const LookaheadSet &set) { - return stream << *set.entries; + if (set.entries.get()) { + return stream << *set.entries; + } else { + return stream << "{}"; + } } } // namespace build_tables diff --git a/src/compiler/build_tables/parse_item_set_builder.cc b/src/compiler/build_tables/parse_item_set_builder.cc index 45406f88..34b347fe 100644 --- a/src/compiler/build_tables/parse_item_set_builder.cc +++ b/src/compiler/build_tables/parse_item_set_builder.cc @@ -13,6 +13,7 @@ using std::vector; using std::set; using std::map; using std::get; +using std::pair; using std::tuple; using std::make_tuple; using std::shared_ptr; @@ -21,27 +22,32 @@ using rules::Symbol; using rules::NONE; ParseItemSetBuilder::ParseItemSetBuilder(const SyntaxGrammar &grammar, - const LexicalGrammar &lexical_grammar) : - grammar{&grammar} { - vector symbol_stack; - set processed_symbols; + const LexicalGrammar &lexical_grammar) { + vector symbols_to_process; + set processed_non_terminals; - for (size_t i = 0; i < grammar.variables.size(); i++) { + for (size_t i = 0, n = lexical_grammar.variables.size(); i < n; i++) { + Symbol symbol(i, true); + first_sets.insert({symbol, LookaheadSet({ static_cast(i) })}); + } + + for (size_t i = 0, n = grammar.variables.size(); i < n; i++) { Symbol symbol(i); LookaheadSet first_set; - processed_symbols.clear(); - symbol_stack.clear(); - symbol_stack.push_back(symbol); - while (!symbol_stack.empty()) { - Symbol current_symbol = symbol_stack.back(); - symbol_stack.pop_back(); + processed_non_terminals.clear(); + symbols_to_process.clear(); + symbols_to_process.push_back(symbol); + while (!symbols_to_process.empty()) { + Symbol current_symbol = symbols_to_process.back(); + symbols_to_process.pop_back(); + if (current_symbol.is_token) { first_set.insert(current_symbol.index); - } else if (processed_symbols.insert(current_symbol).second) { + } else if (processed_non_terminals.insert(current_symbol.index).second) { for (const Production &production : grammar.productions(current_symbol)) { if (!production.empty()) { - symbol_stack.push_back(production[0].symbol); + symbols_to_process.push_back(production[0].symbol); } } } @@ -50,55 +56,101 @@ ParseItemSetBuilder::ParseItemSetBuilder(const SyntaxGrammar &grammar, first_sets.insert({symbol, first_set}); } - for (size_t i = 0; i < lexical_grammar.variables.size(); i++) { - Symbol symbol(i, true); - first_sets.insert({symbol, LookaheadSet({ static_cast(i) })}); + vector components_to_process; + + for (size_t i = 0, n = grammar.variables.size(); i < n; i++) { + Symbol symbol(i); + map> cache_entry; + + components_to_process.clear(); + for (const Production &production : grammar.productions(symbol)) { + components_to_process.push_back(ParseItemSetComponent{ + ParseItem(symbol, production, 0), + LookaheadSet(), + true + }); + } + + while (!components_to_process.empty()) { + ParseItemSetComponent component = components_to_process.back(); + ParseItem &item = component.item; + LookaheadSet &lookaheads = component.lookaheads; + components_to_process.pop_back(); + + bool component_is_new; + if (component.propagates_lookaheads) { + component_is_new = !cache_entry[item].second; + cache_entry[item].second = true; + } else { + component_is_new = cache_entry[item].first.insert_all(lookaheads); + } + + if (component_is_new) { + Symbol next_symbol = item.next_symbol(); + if (next_symbol.is_built_in() || next_symbol.is_token) + continue; + + LookaheadSet next_lookaheads; + bool propagates_lookaheads; + size_t next_step = item.step_index + 1; + if (next_step == item.production->size()) { + next_lookaheads = lookaheads; + propagates_lookaheads = component.propagates_lookaheads; + } else { + Symbol symbol_after_next = item.production->at(next_step).symbol; + next_lookaheads = first_sets.find(symbol_after_next)->second; + propagates_lookaheads = false; + } + + for (const Production &production : grammar.productions(next_symbol)) { + components_to_process.push_back(ParseItemSetComponent{ + ParseItem(next_symbol, production, 0), + next_lookaheads, + propagates_lookaheads + }); + } + } + } + + for (auto &pair : cache_entry) { + component_cache[symbol.index].push_back(ParseItemSetComponent{ + pair.first, + pair.second.first, + pair.second.second + }); + } } } void ParseItemSetBuilder::apply_transitive_closure(ParseItemSet *item_set) { - items_to_process.clear(); - for (const auto &entry : item_set->entries) { - items_to_process.push_back(make_tuple(entry.first, entry.second, true)); + item_set_buffer.clear(); + + for (const auto &pair : item_set->entries) { + const ParseItem &item = pair.first; + const LookaheadSet &lookaheads = pair.second; + + const Symbol &next_symbol = item.next_symbol(); + if (!next_symbol.is_token && !next_symbol.is_built_in()) { + LookaheadSet next_lookaheads; + size_t next_step = item.step_index + 1; + if (next_step == item.production->size()) { + next_lookaheads = lookaheads; + } else { + Symbol symbol_after_next = item.production->at(next_step).symbol; + next_lookaheads = first_sets.find(symbol_after_next)->second; + } + + for (const ParseItemSetComponent &component : component_cache[next_symbol.index]) { + item_set_buffer.push_back({component.item, component.lookaheads}); + if (component.propagates_lookaheads) { + item_set_buffer.push_back({component.item, next_lookaheads}); + } + } + } } - while (!items_to_process.empty()) { - ParseItem item = get<0>(items_to_process.back()); - LookaheadSet lookahead_symbols = get<1>(items_to_process.back()); - bool from_original_set = get<2>(items_to_process.back()); - items_to_process.pop_back(); - - // Add the parse-item and lookahead symbols to the item set. - // If they were already present, skip to the next item. - if (!from_original_set && !item_set->entries[item].insert_all(lookahead_symbols)) - continue; - - // If the next symbol in the production is not a non-terminal, skip to the - // next item. - Symbol next_symbol = item.next_symbol(); - if (next_symbol == NONE() || next_symbol.is_token || - next_symbol.is_built_in()) - continue; - - // If the next symbol is the last symbol in the item's production, then the - // lookahead symbols for the new items are the same as for the current item. - // Otherwise, they are the FOLLOW set of the symbol in this production. - LookaheadSet next_lookahead_symbols; - size_t next_step = item.step_index + 1; - if (next_step == item.production->size()) { - next_lookahead_symbols = lookahead_symbols; - } else { - Symbol symbol_after_next = item.production->at(next_step).symbol; - next_lookahead_symbols = first_sets.find(symbol_after_next)->second; - } - - // Add each of the next symbol's productions to be processed recursively. - for (const Production &production : grammar->productions(next_symbol)) - items_to_process.push_back(make_tuple( - ParseItem(next_symbol, production, 0), - next_lookahead_symbols, - false - )); + for (const auto &buffer_entry : item_set_buffer) { + item_set->entries[buffer_entry.first].insert_all(buffer_entry.second); } } diff --git a/src/compiler/build_tables/parse_item_set_builder.h b/src/compiler/build_tables/parse_item_set_builder.h index db3ca930..2a0de268 100644 --- a/src/compiler/build_tables/parse_item_set_builder.h +++ b/src/compiler/build_tables/parse_item_set_builder.h @@ -13,9 +13,15 @@ struct LexicalGrammar; namespace build_tables { class ParseItemSetBuilder { - const SyntaxGrammar *grammar; + struct ParseItemSetComponent { + ParseItem item; + LookaheadSet lookaheads; + bool propagates_lookaheads; + }; + std::map first_sets; - std::vector> items_to_process; + std::map> component_cache; + std::vector> item_set_buffer; public: ParseItemSetBuilder(const SyntaxGrammar &, const LexicalGrammar &);