Precompute transitive closure contributions by grammar symbol
This commit is contained in:
parent
5332fd3418
commit
06215607d1
3 changed files with 105 additions and 60 deletions
|
|
@ -132,7 +132,11 @@ std::ostream &operator<<(std::ostream &stream, const ParseItemSet &item_set) {
|
|||
}
|
||||
|
||||
std::ostream &operator<<(std::ostream &stream, const LookaheadSet &set) {
|
||||
return stream << *set.entries;
|
||||
if (set.entries.get()) {
|
||||
return stream << *set.entries;
|
||||
} else {
|
||||
return stream << "{}";
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace build_tables
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ using std::vector;
|
|||
using std::set;
|
||||
using std::map;
|
||||
using std::get;
|
||||
using std::pair;
|
||||
using std::tuple;
|
||||
using std::make_tuple;
|
||||
using std::shared_ptr;
|
||||
|
|
@ -20,28 +21,36 @@ using std::make_shared;
|
|||
using rules::Symbol;
|
||||
using rules::NONE;
|
||||
|
||||
ParseItemSetBuilder::ParseItemSetBuilder(const SyntaxGrammar &grammar,
|
||||
const LexicalGrammar &lexical_grammar) :
|
||||
grammar{&grammar} {
|
||||
vector<Symbol> symbol_stack;
|
||||
set<Symbol> processed_symbols;
|
||||
static Symbol::Index PROPAGATE = -5;
|
||||
|
||||
for (size_t i = 0; i < grammar.variables.size(); i++) {
|
||||
ParseItemSetBuilder::ParseItemSetBuilder(const SyntaxGrammar &grammar,
|
||||
const LexicalGrammar &lexical_grammar) {
|
||||
vector<pair<ParseItem, LookaheadSet>> items_to_process;
|
||||
vector<Symbol> symbols_to_process;
|
||||
set<Symbol::Index> processed_non_terminals;
|
||||
|
||||
for (size_t i = 0, n = lexical_grammar.variables.size(); i < n; i++) {
|
||||
Symbol symbol(i, true);
|
||||
first_sets.insert({symbol, LookaheadSet({ static_cast<Symbol::Index>(i) })});
|
||||
}
|
||||
|
||||
for (size_t i = 0, n = grammar.variables.size(); i < n; i++) {
|
||||
Symbol symbol(i);
|
||||
LookaheadSet first_set;
|
||||
|
||||
processed_symbols.clear();
|
||||
symbol_stack.clear();
|
||||
symbol_stack.push_back(symbol);
|
||||
while (!symbol_stack.empty()) {
|
||||
Symbol current_symbol = symbol_stack.back();
|
||||
symbol_stack.pop_back();
|
||||
processed_non_terminals.clear();
|
||||
symbols_to_process.clear();
|
||||
symbols_to_process.push_back(symbol);
|
||||
while (!symbols_to_process.empty()) {
|
||||
Symbol current_symbol = symbols_to_process.back();
|
||||
symbols_to_process.pop_back();
|
||||
|
||||
if (current_symbol.is_token) {
|
||||
first_set.insert(current_symbol.index);
|
||||
} else if (processed_symbols.insert(current_symbol).second) {
|
||||
} else if (processed_non_terminals.insert(current_symbol.index).second) {
|
||||
for (const Production &production : grammar.productions(current_symbol)) {
|
||||
if (!production.empty()) {
|
||||
symbol_stack.push_back(production[0].symbol);
|
||||
symbols_to_process.push_back(production[0].symbol);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -50,55 +59,87 @@ ParseItemSetBuilder::ParseItemSetBuilder(const SyntaxGrammar &grammar,
|
|||
first_sets.insert({symbol, first_set});
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < lexical_grammar.variables.size(); i++) {
|
||||
Symbol symbol(i, true);
|
||||
first_sets.insert({symbol, LookaheadSet({ static_cast<Symbol::Index>(i) })});
|
||||
for (size_t i = 0, n = grammar.variables.size(); i < n; i++) {
|
||||
Symbol symbol(i);
|
||||
ParseItemSet item_set;
|
||||
|
||||
items_to_process.clear();
|
||||
for (const Production &production : grammar.productions(symbol)) {
|
||||
items_to_process.push_back({
|
||||
ParseItem(symbol, production, 0),
|
||||
LookaheadSet({ PROPAGATE }),
|
||||
});
|
||||
}
|
||||
|
||||
while (!items_to_process.empty()) {
|
||||
ParseItem item = items_to_process.back().first;
|
||||
LookaheadSet lookaheads = items_to_process.back().second;
|
||||
items_to_process.pop_back();
|
||||
|
||||
if (item_set.entries[item].insert_all(lookaheads)) {
|
||||
Symbol next_symbol = item.next_symbol();
|
||||
if (next_symbol.is_built_in() || next_symbol.is_token)
|
||||
continue;
|
||||
|
||||
LookaheadSet next_lookaheads;
|
||||
size_t next_step = item.step_index + 1;
|
||||
if (next_step == item.production->size()) {
|
||||
next_lookaheads = lookaheads;
|
||||
} else {
|
||||
Symbol symbol_after_next = item.production->at(next_step).symbol;
|
||||
next_lookaheads = first_sets.find(symbol_after_next)->second;
|
||||
}
|
||||
|
||||
for (const Production &production : grammar.productions(next_symbol)) {
|
||||
items_to_process.push_back({
|
||||
ParseItem(next_symbol, production, 0),
|
||||
next_lookaheads,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cached_item_sets.insert({symbol.index, item_set});
|
||||
}
|
||||
}
|
||||
|
||||
void ParseItemSetBuilder::apply_transitive_closure(ParseItemSet *item_set) {
|
||||
items_to_process.clear();
|
||||
for (const auto &entry : item_set->entries) {
|
||||
items_to_process.push_back(make_tuple(entry.first, entry.second, true));
|
||||
item_set_buffer.clear();
|
||||
for (const auto &pair : item_set->entries) {
|
||||
const ParseItem &item = pair.first;
|
||||
const LookaheadSet &lookaheads = pair.second;
|
||||
|
||||
const Symbol &next_symbol = item.next_symbol();
|
||||
if (!next_symbol.is_token && !next_symbol.is_built_in()) {
|
||||
LookaheadSet next_lookaheads;
|
||||
size_t next_step = item.step_index + 1;
|
||||
if (next_step == item.production->size()) {
|
||||
next_lookaheads = lookaheads;
|
||||
} else {
|
||||
Symbol symbol_after_next = item.production->at(next_step).symbol;
|
||||
next_lookaheads = first_sets.find(symbol_after_next)->second;
|
||||
}
|
||||
|
||||
for (const auto &cached_pair : cached_item_sets[next_symbol.index].entries) {
|
||||
const ParseItem &cached_item = cached_pair.first;
|
||||
const LookaheadSet &cached_lookaheads = cached_pair.second;
|
||||
|
||||
LookaheadSet new_lookaheads;
|
||||
for (auto entry : *cached_lookaheads.entries) {
|
||||
if (entry == PROPAGATE) {
|
||||
new_lookaheads.insert_all(next_lookaheads);
|
||||
} else {
|
||||
new_lookaheads.insert(entry);
|
||||
}
|
||||
}
|
||||
|
||||
item_set_buffer.push_back({cached_item, new_lookaheads});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
while (!items_to_process.empty()) {
|
||||
ParseItem item = get<0>(items_to_process.back());
|
||||
LookaheadSet lookahead_symbols = get<1>(items_to_process.back());
|
||||
bool from_original_set = get<2>(items_to_process.back());
|
||||
items_to_process.pop_back();
|
||||
|
||||
// Add the parse-item and lookahead symbols to the item set.
|
||||
// If they were already present, skip to the next item.
|
||||
if (!from_original_set && !item_set->entries[item].insert_all(lookahead_symbols))
|
||||
continue;
|
||||
|
||||
// If the next symbol in the production is not a non-terminal, skip to the
|
||||
// next item.
|
||||
Symbol next_symbol = item.next_symbol();
|
||||
if (next_symbol == NONE() || next_symbol.is_token ||
|
||||
next_symbol.is_built_in())
|
||||
continue;
|
||||
|
||||
// If the next symbol is the last symbol in the item's production, then the
|
||||
// lookahead symbols for the new items are the same as for the current item.
|
||||
// Otherwise, they are the FOLLOW set of the symbol in this production.
|
||||
LookaheadSet next_lookahead_symbols;
|
||||
size_t next_step = item.step_index + 1;
|
||||
if (next_step == item.production->size()) {
|
||||
next_lookahead_symbols = lookahead_symbols;
|
||||
} else {
|
||||
Symbol symbol_after_next = item.production->at(next_step).symbol;
|
||||
next_lookahead_symbols = first_sets.find(symbol_after_next)->second;
|
||||
}
|
||||
|
||||
// Add each of the next symbol's productions to be processed recursively.
|
||||
for (const Production &production : grammar->productions(next_symbol))
|
||||
items_to_process.push_back(make_tuple(
|
||||
ParseItem(next_symbol, production, 0),
|
||||
next_lookahead_symbols,
|
||||
false
|
||||
));
|
||||
for (const auto &pair : item_set_buffer) {
|
||||
item_set->entries[pair.first].insert_all(pair.second);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -13,9 +13,9 @@ struct LexicalGrammar;
|
|||
namespace build_tables {
|
||||
|
||||
class ParseItemSetBuilder {
|
||||
const SyntaxGrammar *grammar;
|
||||
std::map<rules::Symbol, LookaheadSet> first_sets;
|
||||
std::vector<std::tuple<ParseItem, LookaheadSet, bool>> items_to_process;
|
||||
std::map<rules::Symbol::Index, ParseItemSet> cached_item_sets;
|
||||
std::vector<std::pair<ParseItem, LookaheadSet>> item_set_buffer;
|
||||
|
||||
public:
|
||||
ParseItemSetBuilder(const SyntaxGrammar &, const LexicalGrammar &);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue