Compute closures of item sets lazily

This commit is contained in:
Max Brunsfeld 2015-10-03 22:36:04 -07:00
parent ebc52f109d
commit a0bf3d0bd8
6 changed files with 51 additions and 28 deletions

View file

@ -35,14 +35,12 @@ describe("item_set_closure", []() {
}, {}, {}};
it("adds items at the beginnings of referenced rules", [&]() {
ParseItemSet item_set({
ParseItemSet item_set = item_set_closure(ParseItemSet({
{
ParseItem(Symbol(0), 0, 0, 100),
set<Symbol>({ Symbol(10, true) }),
}
});
item_set_closure(&item_set, grammar);
}), grammar);
AssertThat(item_set, Equals(ParseItemSet({
{

View file

@ -44,39 +44,65 @@ describe("char_transitions(LexItemSet)", []() {
describe("sym_transitions(ParseItemSet, InitialSyntaxGrammar)", [&]() {
it("computes the closure of the new item sets", [&]() {
SyntaxGrammar grammar{{
SyntaxVariable("A", VariableTypeNamed, {
SyntaxVariable("rule_0", VariableTypeNamed, {
Production({
{Symbol(11, true), 0, AssociativityNone, 101},
{Symbol(12, true), 0, AssociativityNone, 102},
{Symbol(13, true), 0, AssociativityNone, 103},
{Symbol(1), 0, AssociativityNone, 104},
{Symbol(14, true), 0, AssociativityNone, 105},
{Symbol(1), 0, AssociativityNone, 103},
{Symbol(13, true), 0, AssociativityNone, 104},
})
}),
SyntaxVariable("B", VariableTypeNamed, {
SyntaxVariable("rule_1", VariableTypeNamed, {
Production({
{Symbol(15, true), 0, AssociativityNone, 106},
{Symbol(2), 0, AssociativityNone, 105},
{Symbol(14, true), 0, AssociativityNone, 106},
})
}),
SyntaxVariable("rule_2", VariableTypeNamed, {
Production({
{Symbol(15, true), 0, AssociativityNone, 105},
})
})
}, {}, {}};
ParseItemSet set1({
{
// Step 2 of rule_0's production: right before the reference to rule_1.
ParseItem(Symbol(0), 0, 2, 103),
set<Symbol>({ Symbol(16, true) })
}
});
AssertThat(sym_transitions(set1, grammar), Equals(map<Symbol, ParseItemSet>({
// Consume symbol 1 -> step 3 of rule_0's production
{
Symbol(13, true),
Symbol(1),
ParseItemSet({
{
ParseItem(Symbol(0), 0, 3, 104),
set<Symbol>({ Symbol(16, true) })
},
}
})
},
// Consume symbol 2 -> step 1 of rule_1's production
{
Symbol(2),
ParseItemSet({
{
ParseItem(Symbol(1), 0, 0, 106),
ParseItem(Symbol(1), 0, 1, 106),
set<Symbol>({ Symbol(13, true) })
},
})
},
// Consume token 15 -> step 1 of rule_2's production
{
Symbol(15, true),
ParseItemSet({
{
ParseItem(Symbol(2), 0, 1, 0),
set<Symbol>({ Symbol(14, true) })
},
})

View file

@ -6,7 +6,6 @@
#include <unordered_map>
#include <utility>
#include "compiler/parse_table.h"
#include "compiler/build_tables/item_set_closure.h"
#include "compiler/build_tables/item_set_transitions.h"
#include "compiler/build_tables/parse_conflict_manager.h"
#include "compiler/build_tables/parse_item.h"
@ -49,9 +48,9 @@ class ParseTableBuilder {
pair<ParseTable, const GrammarError *> build() {
ParseItem start_item(rules::START(), 0, 0, -2);
ParseItemSet start_item_set({ { start_item, { rules::END_OF_INPUT() } } });
item_set_closure(&start_item_set, grammar);
add_parse_state(start_item_set);
add_parse_state(ParseItemSet({
{ start_item, set<Symbol>({ rules::END_OF_INPUT() }) },
}));
while (!item_sets_to_process.empty()) {
auto pair = item_sets_to_process.back();

View file

@ -16,18 +16,17 @@ using std::vector;
using std::pair;
using rules::Symbol;
void item_set_closure(ParseItemSet *item_set, const SyntaxGrammar &grammar) {
vector<pair<ParseItem, set<Symbol>>> items_to_process;
items_to_process.insert(items_to_process.end(), item_set->begin(),
item_set->end());
item_set->clear();
ParseItemSet item_set_closure(const ParseItemSet &input_item_set, const SyntaxGrammar &grammar) {
ParseItemSet result;
vector<pair<ParseItem, set<Symbol>>> items_to_process(input_item_set.begin(),
input_item_set.end());
while (!items_to_process.empty()) {
ParseItem item = items_to_process.back().first;
set<Symbol> new_lookahead_symbols = items_to_process.back().second;
items_to_process.pop_back();
set<Symbol> &lookahead_symbols = item_set->operator[](item);
set<Symbol> &lookahead_symbols = result[item];
size_t previous_size = lookahead_symbols.size();
lookahead_symbols.insert(new_lookahead_symbols.begin(),
new_lookahead_symbols.end());
@ -73,6 +72,8 @@ void item_set_closure(ParseItemSet *item_set, const SyntaxGrammar &grammar) {
i++;
}
}
return result;
}
} // namespace build_tables

View file

@ -10,7 +10,7 @@ struct SyntaxGrammar;
namespace build_tables {
void item_set_closure(ParseItemSet *, const SyntaxGrammar &);
ParseItemSet item_set_closure(const ParseItemSet &, const SyntaxGrammar &);
} // namespace build_tables
} // namespace tree_sitter

View file

@ -16,8 +16,10 @@ using std::vector;
using rules::CharacterSet;
using rules::Symbol;
map<Symbol, ParseItemSet> sym_transitions(const ParseItemSet &item_set,
map<Symbol, ParseItemSet> sym_transitions(const ParseItemSet &input_item_set,
const SyntaxGrammar &grammar) {
ParseItemSet item_set(item_set_closure(input_item_set, grammar));
map<Symbol, ParseItemSet> result;
for (const auto &pair : item_set) {
const ParseItem &item = pair.first;
@ -36,9 +38,6 @@ map<Symbol, ParseItemSet> sym_transitions(const ParseItemSet &item_set,
lookahead_symbols.end());
}
for (auto &pair : result)
item_set_closure(&pair.second, grammar);
return result;
}