Optimize ParseTableBuilder for non-terminals w/ many productions

This commit is contained in:
Max Brunsfeld 2017-07-11 12:54:29 -07:00
parent 68c3ba1b8b
commit a199b217f3
2 changed files with 66 additions and 66 deletions

View file

@ -1,6 +1,7 @@
#include "compiler/build_tables/parse_item_set_builder.h"
#include <cassert>
#include <set>
#include <unordered_map>
#include <vector>
#include <utility>
#include "compiler/syntax_grammar.h"
@ -12,10 +13,10 @@ namespace build_tables {
using std::vector;
using std::set;
using std::map;
using std::get;
using std::pair;
using std::tuple;
using std::unordered_map;
using std::make_tuple;
using rules::Symbol;
using rules::NONE;
@ -79,58 +80,62 @@ ParseItemSetBuilder::ParseItemSetBuilder(const SyntaxGrammar &grammar,
}
}
}
last_sets.insert({symbol, last_set});
}
vector<ParseItemSetComponent> components_to_process;
struct NonTerminalQueueEntry {
Symbol::Index non_terminal;
LookaheadSet lookaheads;
bool propagates_lookaheads;
};
vector<NonTerminalQueueEntry> non_terminal_queue_entry;
for (size_t i = 0, n = grammar.variables.size(); i < n; i++) {
Symbol symbol = Symbol::non_terminal(i);
map<ParseItem, pair<LookaheadSet, bool>> cache_entry;
components_to_process.clear();
for (const Production &production : grammar.variables[i].productions) {
components_to_process.push_back(ParseItemSetComponent{
ParseItem(symbol, production, 0),
LookaheadSet(),
true
});
}
unordered_map<Symbol::Index, pair<LookaheadSet, bool>> cached_lookaheads_by_non_terminal;
while (!components_to_process.empty()) {
ParseItemSetComponent component = components_to_process.back();
ParseItem &item = component.item;
LookaheadSet &lookaheads = component.lookaheads;
components_to_process.pop_back();
non_terminal_queue_entry.clear();
non_terminal_queue_entry.push_back({
symbol.index,
LookaheadSet(),
true
});
bool component_is_new;
if (component.propagates_lookaheads) {
component_is_new = !cache_entry[item].second;
cache_entry[item].second = true;
while (!non_terminal_queue_entry.empty()) {
NonTerminalQueueEntry queue_entry = non_terminal_queue_entry.back();
non_terminal_queue_entry.pop_back();
bool queue_entry_is_new;
auto &cache_entry = cached_lookaheads_by_non_terminal[queue_entry.non_terminal];
if (queue_entry.propagates_lookaheads) {
queue_entry_is_new = !cache_entry.second;
cache_entry.second = true;
} else {
component_is_new = cache_entry[item].first.insert_all(lookaheads);
queue_entry_is_new = cache_entry.first.insert_all(queue_entry.lookaheads);
}
if (component_is_new) {
Symbol next_symbol = item.next_symbol();
if (!next_symbol.is_non_terminal() || next_symbol.is_built_in())
continue;
if (queue_entry_is_new) {
for (const Production &production : grammar.variables[queue_entry.non_terminal].productions) {
if (production.empty()) continue;
Symbol next_symbol = production.at(0).symbol;
if (!next_symbol.is_non_terminal() || next_symbol.is_built_in()) continue;
LookaheadSet next_lookaheads;
bool propagates_lookaheads;
size_t next_step = item.step_index + 1;
if (next_step == item.production->size()) {
next_lookaheads = lookaheads;
propagates_lookaheads = component.propagates_lookaheads;
} else {
Symbol symbol_after_next = item.production->at(next_step).symbol;
next_lookaheads = first_sets.find(symbol_after_next)->second;
propagates_lookaheads = false;
}
LookaheadSet next_lookaheads;
bool propagates_lookaheads;
if (production.size() == 1) {
next_lookaheads = queue_entry.lookaheads;
propagates_lookaheads = queue_entry.propagates_lookaheads;
} else {
Symbol symbol_after_next = production.at(1).symbol;
next_lookaheads = first_sets.find(symbol_after_next)->second;
propagates_lookaheads = false;
}
for (const Production &production : grammar.variables[next_symbol.index].productions) {
components_to_process.push_back(ParseItemSetComponent{
ParseItem(next_symbol, production, 0),
non_terminal_queue_entry.push_back({
next_symbol.index,
next_lookaheads,
propagates_lookaheads
});
@ -138,45 +143,41 @@ ParseItemSetBuilder::ParseItemSetBuilder(const SyntaxGrammar &grammar,
}
}
for (auto &pair : cache_entry) {
component_cache[symbol.index].push_back(ParseItemSetComponent{
pair.first,
pair.second.first,
pair.second.second
});
for (auto &pair : cached_lookaheads_by_non_terminal) {
for (const Production &production : grammar.variables[pair.first].productions) {
component_cache[i].push_back({
ParseItem(Symbol::non_terminal(pair.first), production, 0),
pair.second.first,
pair.second.second
});
}
}
}
}
void ParseItemSetBuilder::apply_transitive_closure(ParseItemSet *item_set) {
item_set_buffer.clear();
for (const auto &pair : item_set->entries) {
const ParseItem &item = pair.first;
const LookaheadSet &lookaheads = pair.second;
if (item.lhs() != rules::START() && item.step_index == 0) continue;
const Symbol &next_symbol = item.next_symbol();
if (next_symbol.is_non_terminal() && !next_symbol.is_built_in()) {
LookaheadSet next_lookaheads;
size_t next_step = item.step_index + 1;
if (next_step == item.production->size()) {
next_lookaheads = lookaheads;
} else {
Symbol symbol_after_next = item.production->at(next_step).symbol;
next_lookaheads = first_sets.find(symbol_after_next)->second;
}
if (!next_symbol.is_non_terminal() || next_symbol.is_built_in()) continue;
for (const ParseItemSetComponent &component : component_cache[next_symbol.index]) {
item_set_buffer.push_back({component.item, component.lookaheads});
if (component.propagates_lookaheads) {
item_set_buffer.push_back({component.item, next_lookaheads});
}
}
LookaheadSet next_lookaheads;
size_t next_step = item.step_index + 1;
if (next_step == item.production->size()) {
next_lookaheads = lookaheads;
} else {
Symbol symbol_after_next = item.production->at(next_step).symbol;
next_lookaheads = first_sets.find(symbol_after_next)->second;
}
}
for (const auto &buffer_entry : item_set_buffer) {
item_set->entries[buffer_entry.first].insert_all(buffer_entry.second);
for (const ParseItemSetComponent &component : component_cache[next_symbol.index]) {
LookaheadSet &current_lookaheads = item_set->entries[component.item];
current_lookaheads.insert_all(component.lookaheads);
if (component.propagates_lookaheads) current_lookaheads.insert_all(next_lookaheads);
}
}
}

View file

@ -22,7 +22,6 @@ class ParseItemSetBuilder {
std::map<rules::Symbol, LookaheadSet> first_sets;
std::map<rules::Symbol, LookaheadSet> last_sets;
std::map<rules::Symbol::Index, std::vector<ParseItemSetComponent>> component_cache;
std::vector<std::pair<ParseItem, LookaheadSet>> item_set_buffer;
public:
ParseItemSetBuilder(const SyntaxGrammar &, const LexicalGrammar &);