Share common lookahead sets between parse item sets

This commit is contained in:
Max Brunsfeld 2015-10-04 21:33:54 -07:00
parent a0bf3d0bd8
commit c4ef228397
12 changed files with 132 additions and 49 deletions

View file

@ -20,6 +20,7 @@
'src/compiler/build_tables/item_set_transitions.cc',
'src/compiler/build_tables/lex_item.cc',
'src/compiler/build_tables/lex_conflict_manager.cc',
'src/compiler/build_tables/lookahead_set.cc',
'src/compiler/build_tables/parse_item.cc',
'src/compiler/build_tables/parse_conflict_manager.cc',
'src/compiler/build_tables/rule_can_be_blank.cc',

View file

@ -1,7 +1,7 @@
#include "compiler/compiler_spec_helper.h"
#include "compiler/syntax_grammar.h"
#include "compiler/build_tables/item_set_closure.h"
#include "compiler/build_tables/item_set_transitions.h"
#include "compiler/build_tables/lookahead_set.h"
#include "compiler/rules/built_in_symbols.h"
using namespace build_tables;
@ -45,19 +45,19 @@ describe("item_set_closure", []() {
AssertThat(item_set, Equals(ParseItemSet({
{
ParseItem(Symbol(0), 0, 0, 100),
set<Symbol>({ Symbol(10, true) })
LookaheadSet({ Symbol(10, true) })
},
{
ParseItem(Symbol(1), 0, 0, 102),
set<Symbol>({ Symbol(11, true) })
LookaheadSet({ Symbol(11, true) })
},
{
ParseItem(Symbol(1), 1, 0, 104),
set<Symbol>({ Symbol(11, true) })
LookaheadSet({ Symbol(11, true) })
},
{
ParseItem(Symbol(2), 0, 0, 105),
set<Symbol>({ Symbol(11, true) })
LookaheadSet({ Symbol(11, true) })
},
})));
});

View file

@ -1,5 +1,6 @@
#include "compiler/compiler_spec_helper.h"
#include "compiler/build_tables/item_set_transitions.h"
#include "compiler/build_tables/lookahead_set.h"
#include "compiler/syntax_grammar.h"
#include "compiler/helpers/rule_helpers.h"
@ -69,7 +70,7 @@ describe("sym_transitions(ParseItemSet, InitialSyntaxGrammar)", [&]() {
{
// Step 2 of rule_0's production: right before the reference to rule_1.
ParseItem(Symbol(0), 0, 2, 103),
set<Symbol>({ Symbol(16, true) })
LookaheadSet({ Symbol(16, true) })
}
});
@ -81,7 +82,7 @@ describe("sym_transitions(ParseItemSet, InitialSyntaxGrammar)", [&]() {
ParseItemSet({
{
ParseItem(Symbol(0), 0, 3, 104),
set<Symbol>({ Symbol(16, true) })
LookaheadSet({ Symbol(16, true) })
}
})
},
@ -92,7 +93,7 @@ describe("sym_transitions(ParseItemSet, InitialSyntaxGrammar)", [&]() {
ParseItemSet({
{
ParseItem(Symbol(1), 0, 1, 106),
set<Symbol>({ Symbol(13, true) })
LookaheadSet({ Symbol(13, true) })
},
})
},
@ -103,7 +104,7 @@ describe("sym_transitions(ParseItemSet, InitialSyntaxGrammar)", [&]() {
ParseItemSet({
{
ParseItem(Symbol(2), 0, 1, 0),
set<Symbol>({ Symbol(14, true) })
LookaheadSet({ Symbol(14, true) })
},
})
},

View file

@ -129,6 +129,10 @@ std::ostream &operator<<(std::ostream &stream, const MetadataRange &range) {
<< to_string(range.max) << string("}");
}
std::ostream &operator<<(std::ostream &stream, const LookaheadSet &set) {
return stream << *set.entries;
}
} // namespace build_tables
} // namespace tree_sitter

View file

@ -112,10 +112,12 @@ namespace build_tables {
struct MetadataRange;
class LexItem;
class ParseItem;
class LookaheadSet;
ostream &operator<<(ostream &, const MetadataRange &);
ostream &operator<<(ostream &, const LexItem &);
ostream &operator<<(ostream &, const ParseItem &);
ostream &operator<<(ostream &, const LookaheadSet &);
} // namespace build_tables
} // namespace tree_sitter

View file

@ -47,15 +47,17 @@ class ParseTableBuilder {
conflict_manager(grammar) {}
pair<ParseTable, const GrammarError *> build() {
ParseItem start_item(rules::START(), 0, 0, -2);
add_parse_state(ParseItemSet({
{ start_item, set<Symbol>({ rules::END_OF_INPUT() }) },
{
ParseItem(rules::START(), 0, 0, -2),
LookaheadSet({ rules::END_OF_INPUT() }),
},
}));
while (!item_sets_to_process.empty()) {
auto pair = item_sets_to_process.back();
ParseItemSet &item_set = pair.first;
ParseStateId &state_id = pair.second;
ParseItemSet item_set = std::move(pair.first);
ParseStateId state_id = pair.second;
item_sets_to_process.pop_back();
add_reduce_actions(item_set, state_id);
@ -127,7 +129,7 @@ class ParseTableBuilder {
void add_reduce_actions(const ParseItemSet &item_set, ParseStateId state_id) {
for (const auto &pair : item_set) {
const ParseItem &item = pair.first;
const set<Symbol> &lookahead_symbols = pair.second;
const auto &lookahead_symbols = pair.second;
CompletionStatus completion_status = get_completion_status(item);
if (completion_status.is_done) {
@ -139,7 +141,7 @@ class ParseTableBuilder {
completion_status.associativity,
item.production_index);
for (const auto &lookahead_sym : lookahead_symbols)
for (const auto &lookahead_sym : *lookahead_symbols.entries)
add_action(state_id, lookahead_sym, action, item_set);
}
}

View file

@ -11,50 +11,58 @@
namespace tree_sitter {
namespace build_tables {
using std::set;
using std::vector;
using std::pair;
using std::shared_ptr;
using std::make_shared;
using rules::Symbol;
ParseItemSet item_set_closure(const ParseItemSet &input_item_set, const SyntaxGrammar &grammar) {
ParseItemSet item_set_closure(const ParseItemSet &input_item_set,
const SyntaxGrammar &grammar) {
ParseItemSet result;
vector<pair<ParseItem, set<Symbol>>> items_to_process(input_item_set.begin(),
input_item_set.end());
// An item set's closure is defined recursively. Use an explicit stack to
// store the recursively-added items.
vector<pair<ParseItem, LookaheadSet>> items_to_process(input_item_set.begin(),
input_item_set.end());
while (!items_to_process.empty()) {
ParseItem item = items_to_process.back().first;
set<Symbol> new_lookahead_symbols = items_to_process.back().second;
LookaheadSet lookahead_symbols = items_to_process.back().second;
items_to_process.pop_back();
set<Symbol> &lookahead_symbols = result[item];
size_t previous_size = lookahead_symbols.size();
lookahead_symbols.insert(new_lookahead_symbols.begin(),
new_lookahead_symbols.end());
if (lookahead_symbols.size() == previous_size)
// Add the parse-item and lookahead symbols to the item set.
// If they were already present, skip to the next item.
if (!result[item].insert_all(lookahead_symbols))
continue;
// If the item is at the end of its production, skip to the next item.
const Production &item_production =
grammar.productions(item.lhs())[item.production_index];
if (item.step_index == item_production.size())
continue;
Symbol symbol = item_production[item.step_index].symbol;
if (symbol.is_token || symbol.is_built_in())
// If the next symbol in the production is not a non-terminal, skip to the
// next item.
Symbol next_symbol = item_production[item.step_index].symbol;
if (next_symbol.is_token || next_symbol.is_built_in())
continue;
set<Symbol> next_lookahead_symbols;
unsigned int next_step = item.step_index + 1;
// If the next symbol is the last symbol in the item's production, then the
// lookahead symbols for the new items are the same as for the current item.
// Otherwise, compute the FOLLOW-SET of the symbol in this production. This
// is defined recursively as well, so use another queue to store the
// recursively-added follow symbols.
LookaheadSet next_lookahead_symbols;
size_t next_step = item.step_index + 1;
if (next_step == item_production.size()) {
next_lookahead_symbols = lookahead_symbols;
} else {
vector<Symbol> symbols_to_process({ item_production[next_step].symbol });
while (!symbols_to_process.empty()) {
Symbol following_symbol = symbols_to_process.back();
symbols_to_process.pop_back();
if (!next_lookahead_symbols.insert(following_symbol).second)
if (!next_lookahead_symbols.insert(following_symbol))
continue;
for (const auto &production : grammar.productions(following_symbol))
@ -63,12 +71,14 @@ ParseItemSet item_set_closure(const ParseItemSet &input_item_set, const SyntaxGr
}
}
// Add each of the next symbol's productions to be processed recursively.
size_t i = 0;
for (const Production &production : grammar.productions(symbol)) {
for (const Production &production : grammar.productions(next_symbol)) {
if (!production.empty())
items_to_process.push_back(
{ ParseItem(symbol, i, 0, production[0].rule_id),
next_lookahead_symbols });
items_to_process.push_back({
ParseItem(next_symbol, i, 0, production[0].rule_id),
next_lookahead_symbols,
});
i++;
}
}

View file

@ -11,19 +11,16 @@ namespace tree_sitter {
namespace build_tables {
using std::map;
using std::set;
using std::vector;
using rules::CharacterSet;
using rules::Symbol;
map<Symbol, ParseItemSet> sym_transitions(const ParseItemSet &input_item_set,
const SyntaxGrammar &grammar) {
ParseItemSet item_set(item_set_closure(input_item_set, grammar));
map<Symbol, ParseItemSet> result;
for (const auto &pair : item_set) {
const ParseItem &item = pair.first;
const set<Symbol> &lookahead_symbols = pair.second;
const LookaheadSet &lookahead_symbols = pair.second;
const Production &production =
grammar.productions(item.lhs())[item.production_index];
if (item.step_index == production.size())
@ -34,8 +31,7 @@ map<Symbol, ParseItemSet> sym_transitions(const ParseItemSet &input_item_set,
int rule_id = step < production.size() ? production[step].rule_id : 0;
ParseItem new_item(item.lhs(), item.production_index, step, rule_id);
result[symbol][new_item].insert(lookahead_symbols.begin(),
lookahead_symbols.end());
result[symbol][new_item] = lookahead_symbols;
}
return result;

View file

@ -0,0 +1,41 @@
#include "compiler/build_tables/lookahead_set.h"
#include <set>
#include <memory>
#include "compiler/rules/symbol.h"
namespace tree_sitter {
namespace build_tables {
using std::set;
using std::make_shared;
using rules::Symbol;
LookaheadSet::LookaheadSet() : entries(nullptr) {}
LookaheadSet::LookaheadSet(const set<Symbol> &symbols)
: entries(make_shared<set<Symbol>>(symbols)) {}
bool LookaheadSet::empty() const {
return !entries.get() || entries->empty();
}
bool LookaheadSet::operator==(const LookaheadSet &other) const {
return *entries == *other.entries;
}
bool LookaheadSet::insert_all(const LookaheadSet &other) {
if (!entries.get())
entries = make_shared<set<Symbol>>();
size_t previous_size = entries->size();
entries->insert(other.entries->begin(), other.entries->end());
return entries->size() > previous_size;
}
bool LookaheadSet::insert(const Symbol &symbol) {
if (!entries.get())
entries = make_shared<set<Symbol>>();
return entries->insert(symbol).second;
}
} // namespace build_tables
} // namespace tree_sitter

View file

@ -0,0 +1,27 @@
#ifndef COMPILER_BUILD_TABLES_LOOKAHEAD_SET_H_
#define COMPILER_BUILD_TABLES_LOOKAHEAD_SET_H_
#include <set>
#include <memory>
#include "compiler/rules/symbol.h"
namespace tree_sitter {
namespace build_tables {
class LookaheadSet {
public:
LookaheadSet();
LookaheadSet(const std::set<rules::Symbol> &);
bool empty() const;
bool operator==(const LookaheadSet &) const;
bool insert_all(const LookaheadSet &);
bool insert(const rules::Symbol &);
std::shared_ptr<std::set<rules::Symbol>> entries;
};
} // namespace build_tables
} // namespace tree_sitter
#endif // COMPILER_BUILD_TABLES_LOOKAHEAD_SET_H_

View file

@ -8,7 +8,6 @@ namespace build_tables {
using std::string;
using std::to_string;
using std::ostream;
using rules::Symbol;
ParseItem::ParseItem(const Symbol &lhs, unsigned int production_index,

View file

@ -1,10 +1,9 @@
#ifndef COMPILER_BUILD_TABLES_PARSE_ITEM_H_
#define COMPILER_BUILD_TABLES_PARSE_ITEM_H_
#include <set>
#include <map>
#include <vector>
#include "compiler/build_tables/item.h"
#include "compiler/build_tables/lookahead_set.h"
#include "compiler/rules/symbol.h"
namespace tree_sitter {
@ -13,6 +12,7 @@ namespace build_tables {
class ParseItem {
public:
ParseItem(const rules::Symbol &, unsigned int, unsigned int, int);
bool operator==(const ParseItem &other) const;
bool operator<(const ParseItem &other) const;
rules::Symbol lhs() const;
@ -23,7 +23,7 @@ class ParseItem {
int rule_id;
};
typedef std::map<ParseItem, std::set<rules::Symbol>> ParseItemSet;
typedef std::map<ParseItem, LookaheadSet> ParseItemSet;
} // namespace build_tables
} // namespace tree_sitter
@ -44,8 +44,8 @@ struct hash<const tree_sitter::build_tables::ParseItemSet> {
size_t result = hash<size_t>()(set.size());
for (auto &pair : set) {
result ^= hash<tree_sitter::build_tables::ParseItem>()(pair.first);
result ^= hash<size_t>()(pair.second.size());
for (auto &symbol : pair.second)
result ^= hash<size_t>()(pair.second.entries->size());
for (auto &symbol : *pair.second.entries)
result ^= hash<tree_sitter::rules::Symbol>()(symbol);
}
return result;