Optimize ParseTableBuilder for non-terminals w/ many productions
This commit is contained in:
parent
68c3ba1b8b
commit
a199b217f3
2 changed files with 66 additions and 66 deletions
|
|
@ -1,6 +1,7 @@
|
|||
#include "compiler/build_tables/parse_item_set_builder.h"
|
||||
#include <cassert>
|
||||
#include <set>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
#include "compiler/syntax_grammar.h"
|
||||
|
|
@ -12,10 +13,10 @@ namespace build_tables {
|
|||
|
||||
using std::vector;
|
||||
using std::set;
|
||||
using std::map;
|
||||
using std::get;
|
||||
using std::pair;
|
||||
using std::tuple;
|
||||
using std::unordered_map;
|
||||
using std::make_tuple;
|
||||
using rules::Symbol;
|
||||
using rules::NONE;
|
||||
|
|
@ -79,58 +80,62 @@ ParseItemSetBuilder::ParseItemSetBuilder(const SyntaxGrammar &grammar,
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
last_sets.insert({symbol, last_set});
|
||||
}
|
||||
|
||||
vector<ParseItemSetComponent> components_to_process;
|
||||
struct NonTerminalQueueEntry {
|
||||
Symbol::Index non_terminal;
|
||||
LookaheadSet lookaheads;
|
||||
bool propagates_lookaheads;
|
||||
};
|
||||
|
||||
vector<NonTerminalQueueEntry> non_terminal_queue_entry;
|
||||
|
||||
for (size_t i = 0, n = grammar.variables.size(); i < n; i++) {
|
||||
Symbol symbol = Symbol::non_terminal(i);
|
||||
map<ParseItem, pair<LookaheadSet, bool>> cache_entry;
|
||||
|
||||
components_to_process.clear();
|
||||
for (const Production &production : grammar.variables[i].productions) {
|
||||
components_to_process.push_back(ParseItemSetComponent{
|
||||
ParseItem(symbol, production, 0),
|
||||
LookaheadSet(),
|
||||
true
|
||||
});
|
||||
}
|
||||
unordered_map<Symbol::Index, pair<LookaheadSet, bool>> cached_lookaheads_by_non_terminal;
|
||||
|
||||
while (!components_to_process.empty()) {
|
||||
ParseItemSetComponent component = components_to_process.back();
|
||||
ParseItem &item = component.item;
|
||||
LookaheadSet &lookaheads = component.lookaheads;
|
||||
components_to_process.pop_back();
|
||||
non_terminal_queue_entry.clear();
|
||||
non_terminal_queue_entry.push_back({
|
||||
symbol.index,
|
||||
LookaheadSet(),
|
||||
true
|
||||
});
|
||||
|
||||
bool component_is_new;
|
||||
if (component.propagates_lookaheads) {
|
||||
component_is_new = !cache_entry[item].second;
|
||||
cache_entry[item].second = true;
|
||||
while (!non_terminal_queue_entry.empty()) {
|
||||
NonTerminalQueueEntry queue_entry = non_terminal_queue_entry.back();
|
||||
non_terminal_queue_entry.pop_back();
|
||||
|
||||
bool queue_entry_is_new;
|
||||
auto &cache_entry = cached_lookaheads_by_non_terminal[queue_entry.non_terminal];
|
||||
if (queue_entry.propagates_lookaheads) {
|
||||
queue_entry_is_new = !cache_entry.second;
|
||||
cache_entry.second = true;
|
||||
} else {
|
||||
component_is_new = cache_entry[item].first.insert_all(lookaheads);
|
||||
queue_entry_is_new = cache_entry.first.insert_all(queue_entry.lookaheads);
|
||||
}
|
||||
|
||||
if (component_is_new) {
|
||||
Symbol next_symbol = item.next_symbol();
|
||||
if (!next_symbol.is_non_terminal() || next_symbol.is_built_in())
|
||||
continue;
|
||||
if (queue_entry_is_new) {
|
||||
for (const Production &production : grammar.variables[queue_entry.non_terminal].productions) {
|
||||
if (production.empty()) continue;
|
||||
Symbol next_symbol = production.at(0).symbol;
|
||||
if (!next_symbol.is_non_terminal() || next_symbol.is_built_in()) continue;
|
||||
|
||||
LookaheadSet next_lookaheads;
|
||||
bool propagates_lookaheads;
|
||||
size_t next_step = item.step_index + 1;
|
||||
if (next_step == item.production->size()) {
|
||||
next_lookaheads = lookaheads;
|
||||
propagates_lookaheads = component.propagates_lookaheads;
|
||||
} else {
|
||||
Symbol symbol_after_next = item.production->at(next_step).symbol;
|
||||
next_lookaheads = first_sets.find(symbol_after_next)->second;
|
||||
propagates_lookaheads = false;
|
||||
}
|
||||
LookaheadSet next_lookaheads;
|
||||
bool propagates_lookaheads;
|
||||
if (production.size() == 1) {
|
||||
next_lookaheads = queue_entry.lookaheads;
|
||||
propagates_lookaheads = queue_entry.propagates_lookaheads;
|
||||
} else {
|
||||
Symbol symbol_after_next = production.at(1).symbol;
|
||||
next_lookaheads = first_sets.find(symbol_after_next)->second;
|
||||
propagates_lookaheads = false;
|
||||
}
|
||||
|
||||
for (const Production &production : grammar.variables[next_symbol.index].productions) {
|
||||
components_to_process.push_back(ParseItemSetComponent{
|
||||
ParseItem(next_symbol, production, 0),
|
||||
non_terminal_queue_entry.push_back({
|
||||
next_symbol.index,
|
||||
next_lookaheads,
|
||||
propagates_lookaheads
|
||||
});
|
||||
|
|
@ -138,45 +143,41 @@ ParseItemSetBuilder::ParseItemSetBuilder(const SyntaxGrammar &grammar,
|
|||
}
|
||||
}
|
||||
|
||||
for (auto &pair : cache_entry) {
|
||||
component_cache[symbol.index].push_back(ParseItemSetComponent{
|
||||
pair.first,
|
||||
pair.second.first,
|
||||
pair.second.second
|
||||
});
|
||||
for (auto &pair : cached_lookaheads_by_non_terminal) {
|
||||
for (const Production &production : grammar.variables[pair.first].productions) {
|
||||
component_cache[i].push_back({
|
||||
ParseItem(Symbol::non_terminal(pair.first), production, 0),
|
||||
pair.second.first,
|
||||
pair.second.second
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ParseItemSetBuilder::apply_transitive_closure(ParseItemSet *item_set) {
|
||||
item_set_buffer.clear();
|
||||
|
||||
for (const auto &pair : item_set->entries) {
|
||||
const ParseItem &item = pair.first;
|
||||
const LookaheadSet &lookaheads = pair.second;
|
||||
if (item.lhs() != rules::START() && item.step_index == 0) continue;
|
||||
|
||||
const Symbol &next_symbol = item.next_symbol();
|
||||
if (next_symbol.is_non_terminal() && !next_symbol.is_built_in()) {
|
||||
LookaheadSet next_lookaheads;
|
||||
size_t next_step = item.step_index + 1;
|
||||
if (next_step == item.production->size()) {
|
||||
next_lookaheads = lookaheads;
|
||||
} else {
|
||||
Symbol symbol_after_next = item.production->at(next_step).symbol;
|
||||
next_lookaheads = first_sets.find(symbol_after_next)->second;
|
||||
}
|
||||
if (!next_symbol.is_non_terminal() || next_symbol.is_built_in()) continue;
|
||||
|
||||
for (const ParseItemSetComponent &component : component_cache[next_symbol.index]) {
|
||||
item_set_buffer.push_back({component.item, component.lookaheads});
|
||||
if (component.propagates_lookaheads) {
|
||||
item_set_buffer.push_back({component.item, next_lookaheads});
|
||||
}
|
||||
}
|
||||
LookaheadSet next_lookaheads;
|
||||
size_t next_step = item.step_index + 1;
|
||||
if (next_step == item.production->size()) {
|
||||
next_lookaheads = lookaheads;
|
||||
} else {
|
||||
Symbol symbol_after_next = item.production->at(next_step).symbol;
|
||||
next_lookaheads = first_sets.find(symbol_after_next)->second;
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto &buffer_entry : item_set_buffer) {
|
||||
item_set->entries[buffer_entry.first].insert_all(buffer_entry.second);
|
||||
for (const ParseItemSetComponent &component : component_cache[next_symbol.index]) {
|
||||
LookaheadSet ¤t_lookaheads = item_set->entries[component.item];
|
||||
current_lookaheads.insert_all(component.lookaheads);
|
||||
if (component.propagates_lookaheads) current_lookaheads.insert_all(next_lookaheads);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -22,7 +22,6 @@ class ParseItemSetBuilder {
|
|||
std::map<rules::Symbol, LookaheadSet> first_sets;
|
||||
std::map<rules::Symbol, LookaheadSet> last_sets;
|
||||
std::map<rules::Symbol::Index, std::vector<ParseItemSetComponent>> component_cache;
|
||||
std::vector<std::pair<ParseItem, LookaheadSet>> item_set_buffer;
|
||||
|
||||
public:
|
||||
ParseItemSetBuilder(const SyntaxGrammar &, const LexicalGrammar &);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue