Share common lookahead sets between parse item sets
This commit is contained in:
parent
a0bf3d0bd8
commit
c4ef228397
12 changed files with 132 additions and 49 deletions
|
|
@ -20,6 +20,7 @@
|
|||
'src/compiler/build_tables/item_set_transitions.cc',
|
||||
'src/compiler/build_tables/lex_item.cc',
|
||||
'src/compiler/build_tables/lex_conflict_manager.cc',
|
||||
'src/compiler/build_tables/lookahead_set.cc',
|
||||
'src/compiler/build_tables/parse_item.cc',
|
||||
'src/compiler/build_tables/parse_conflict_manager.cc',
|
||||
'src/compiler/build_tables/rule_can_be_blank.cc',
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
#include "compiler/compiler_spec_helper.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/build_tables/item_set_closure.h"
|
||||
#include "compiler/build_tables/item_set_transitions.h"
|
||||
#include "compiler/build_tables/lookahead_set.h"
|
||||
#include "compiler/rules/built_in_symbols.h"
|
||||
|
||||
using namespace build_tables;
|
||||
|
|
@ -45,19 +45,19 @@ describe("item_set_closure", []() {
|
|||
AssertThat(item_set, Equals(ParseItemSet({
|
||||
{
|
||||
ParseItem(Symbol(0), 0, 0, 100),
|
||||
set<Symbol>({ Symbol(10, true) })
|
||||
LookaheadSet({ Symbol(10, true) })
|
||||
},
|
||||
{
|
||||
ParseItem(Symbol(1), 0, 0, 102),
|
||||
set<Symbol>({ Symbol(11, true) })
|
||||
LookaheadSet({ Symbol(11, true) })
|
||||
},
|
||||
{
|
||||
ParseItem(Symbol(1), 1, 0, 104),
|
||||
set<Symbol>({ Symbol(11, true) })
|
||||
LookaheadSet({ Symbol(11, true) })
|
||||
},
|
||||
{
|
||||
ParseItem(Symbol(2), 0, 0, 105),
|
||||
set<Symbol>({ Symbol(11, true) })
|
||||
LookaheadSet({ Symbol(11, true) })
|
||||
},
|
||||
})));
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
#include "compiler/compiler_spec_helper.h"
|
||||
#include "compiler/build_tables/item_set_transitions.h"
|
||||
#include "compiler/build_tables/lookahead_set.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/helpers/rule_helpers.h"
|
||||
|
||||
|
|
@ -69,7 +70,7 @@ describe("sym_transitions(ParseItemSet, InitialSyntaxGrammar)", [&]() {
|
|||
{
|
||||
// Step 2 of rule_0's production: right before the reference to rule_1.
|
||||
ParseItem(Symbol(0), 0, 2, 103),
|
||||
set<Symbol>({ Symbol(16, true) })
|
||||
LookaheadSet({ Symbol(16, true) })
|
||||
}
|
||||
});
|
||||
|
||||
|
|
@ -81,7 +82,7 @@ describe("sym_transitions(ParseItemSet, InitialSyntaxGrammar)", [&]() {
|
|||
ParseItemSet({
|
||||
{
|
||||
ParseItem(Symbol(0), 0, 3, 104),
|
||||
set<Symbol>({ Symbol(16, true) })
|
||||
LookaheadSet({ Symbol(16, true) })
|
||||
}
|
||||
})
|
||||
},
|
||||
|
|
@ -92,7 +93,7 @@ describe("sym_transitions(ParseItemSet, InitialSyntaxGrammar)", [&]() {
|
|||
ParseItemSet({
|
||||
{
|
||||
ParseItem(Symbol(1), 0, 1, 106),
|
||||
set<Symbol>({ Symbol(13, true) })
|
||||
LookaheadSet({ Symbol(13, true) })
|
||||
},
|
||||
})
|
||||
},
|
||||
|
|
@ -103,7 +104,7 @@ describe("sym_transitions(ParseItemSet, InitialSyntaxGrammar)", [&]() {
|
|||
ParseItemSet({
|
||||
{
|
||||
ParseItem(Symbol(2), 0, 1, 0),
|
||||
set<Symbol>({ Symbol(14, true) })
|
||||
LookaheadSet({ Symbol(14, true) })
|
||||
},
|
||||
})
|
||||
},
|
||||
|
|
|
|||
|
|
@ -129,6 +129,10 @@ std::ostream &operator<<(std::ostream &stream, const MetadataRange &range) {
|
|||
<< to_string(range.max) << string("}");
|
||||
}
|
||||
|
||||
std::ostream &operator<<(std::ostream &stream, const LookaheadSet &set) {
|
||||
return stream << *set.entries;
|
||||
}
|
||||
|
||||
} // namespace build_tables
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -112,10 +112,12 @@ namespace build_tables {
|
|||
struct MetadataRange;
|
||||
class LexItem;
|
||||
class ParseItem;
|
||||
class LookaheadSet;
|
||||
|
||||
ostream &operator<<(ostream &, const MetadataRange &);
|
||||
ostream &operator<<(ostream &, const LexItem &);
|
||||
ostream &operator<<(ostream &, const ParseItem &);
|
||||
ostream &operator<<(ostream &, const LookaheadSet &);
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -47,15 +47,17 @@ class ParseTableBuilder {
|
|||
conflict_manager(grammar) {}
|
||||
|
||||
pair<ParseTable, const GrammarError *> build() {
|
||||
ParseItem start_item(rules::START(), 0, 0, -2);
|
||||
add_parse_state(ParseItemSet({
|
||||
{ start_item, set<Symbol>({ rules::END_OF_INPUT() }) },
|
||||
{
|
||||
ParseItem(rules::START(), 0, 0, -2),
|
||||
LookaheadSet({ rules::END_OF_INPUT() }),
|
||||
},
|
||||
}));
|
||||
|
||||
while (!item_sets_to_process.empty()) {
|
||||
auto pair = item_sets_to_process.back();
|
||||
ParseItemSet &item_set = pair.first;
|
||||
ParseStateId &state_id = pair.second;
|
||||
ParseItemSet item_set = std::move(pair.first);
|
||||
ParseStateId state_id = pair.second;
|
||||
item_sets_to_process.pop_back();
|
||||
|
||||
add_reduce_actions(item_set, state_id);
|
||||
|
|
@ -127,7 +129,7 @@ class ParseTableBuilder {
|
|||
void add_reduce_actions(const ParseItemSet &item_set, ParseStateId state_id) {
|
||||
for (const auto &pair : item_set) {
|
||||
const ParseItem &item = pair.first;
|
||||
const set<Symbol> &lookahead_symbols = pair.second;
|
||||
const auto &lookahead_symbols = pair.second;
|
||||
|
||||
CompletionStatus completion_status = get_completion_status(item);
|
||||
if (completion_status.is_done) {
|
||||
|
|
@ -139,7 +141,7 @@ class ParseTableBuilder {
|
|||
completion_status.associativity,
|
||||
item.production_index);
|
||||
|
||||
for (const auto &lookahead_sym : lookahead_symbols)
|
||||
for (const auto &lookahead_sym : *lookahead_symbols.entries)
|
||||
add_action(state_id, lookahead_sym, action, item_set);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -11,50 +11,58 @@
|
|||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
||||
using std::set;
|
||||
using std::vector;
|
||||
using std::pair;
|
||||
using std::shared_ptr;
|
||||
using std::make_shared;
|
||||
using rules::Symbol;
|
||||
|
||||
ParseItemSet item_set_closure(const ParseItemSet &input_item_set, const SyntaxGrammar &grammar) {
|
||||
ParseItemSet item_set_closure(const ParseItemSet &input_item_set,
|
||||
const SyntaxGrammar &grammar) {
|
||||
ParseItemSet result;
|
||||
vector<pair<ParseItem, set<Symbol>>> items_to_process(input_item_set.begin(),
|
||||
input_item_set.end());
|
||||
|
||||
// An item set's closure is defined recursively. Use an explicit stack to
|
||||
// store the recursively-added items.
|
||||
vector<pair<ParseItem, LookaheadSet>> items_to_process(input_item_set.begin(),
|
||||
input_item_set.end());
|
||||
while (!items_to_process.empty()) {
|
||||
ParseItem item = items_to_process.back().first;
|
||||
set<Symbol> new_lookahead_symbols = items_to_process.back().second;
|
||||
LookaheadSet lookahead_symbols = items_to_process.back().second;
|
||||
items_to_process.pop_back();
|
||||
|
||||
set<Symbol> &lookahead_symbols = result[item];
|
||||
size_t previous_size = lookahead_symbols.size();
|
||||
lookahead_symbols.insert(new_lookahead_symbols.begin(),
|
||||
new_lookahead_symbols.end());
|
||||
if (lookahead_symbols.size() == previous_size)
|
||||
// Add the parse-item and lookahead symbols to the item set.
|
||||
// If they were already present, skip to the next item.
|
||||
if (!result[item].insert_all(lookahead_symbols))
|
||||
continue;
|
||||
|
||||
// If the item is at the end of its production, skip to the next item.
|
||||
const Production &item_production =
|
||||
grammar.productions(item.lhs())[item.production_index];
|
||||
|
||||
if (item.step_index == item_production.size())
|
||||
continue;
|
||||
|
||||
Symbol symbol = item_production[item.step_index].symbol;
|
||||
|
||||
if (symbol.is_token || symbol.is_built_in())
|
||||
// If the next symbol in the production is not a non-terminal, skip to the
|
||||
// next item.
|
||||
Symbol next_symbol = item_production[item.step_index].symbol;
|
||||
if (next_symbol.is_token || next_symbol.is_built_in())
|
||||
continue;
|
||||
|
||||
set<Symbol> next_lookahead_symbols;
|
||||
unsigned int next_step = item.step_index + 1;
|
||||
// If the next symbol is the last symbol in the item's production, then the
|
||||
// lookahead symbols for the new items are the same as for the current item.
|
||||
// Otherwise, compute the FOLLOW-SET of the symbol in this production. This
|
||||
// is defined recursively as well, so use another queue to store the
|
||||
// recursively-added follow symbols.
|
||||
LookaheadSet next_lookahead_symbols;
|
||||
size_t next_step = item.step_index + 1;
|
||||
if (next_step == item_production.size()) {
|
||||
next_lookahead_symbols = lookahead_symbols;
|
||||
} else {
|
||||
vector<Symbol> symbols_to_process({ item_production[next_step].symbol });
|
||||
|
||||
while (!symbols_to_process.empty()) {
|
||||
Symbol following_symbol = symbols_to_process.back();
|
||||
symbols_to_process.pop_back();
|
||||
if (!next_lookahead_symbols.insert(following_symbol).second)
|
||||
|
||||
if (!next_lookahead_symbols.insert(following_symbol))
|
||||
continue;
|
||||
|
||||
for (const auto &production : grammar.productions(following_symbol))
|
||||
|
|
@ -63,12 +71,14 @@ ParseItemSet item_set_closure(const ParseItemSet &input_item_set, const SyntaxGr
|
|||
}
|
||||
}
|
||||
|
||||
// Add each of the next symbol's productions to be processed recursively.
|
||||
size_t i = 0;
|
||||
for (const Production &production : grammar.productions(symbol)) {
|
||||
for (const Production &production : grammar.productions(next_symbol)) {
|
||||
if (!production.empty())
|
||||
items_to_process.push_back(
|
||||
{ ParseItem(symbol, i, 0, production[0].rule_id),
|
||||
next_lookahead_symbols });
|
||||
items_to_process.push_back({
|
||||
ParseItem(next_symbol, i, 0, production[0].rule_id),
|
||||
next_lookahead_symbols,
|
||||
});
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -11,19 +11,16 @@ namespace tree_sitter {
|
|||
namespace build_tables {
|
||||
|
||||
using std::map;
|
||||
using std::set;
|
||||
using std::vector;
|
||||
using rules::CharacterSet;
|
||||
using rules::Symbol;
|
||||
|
||||
map<Symbol, ParseItemSet> sym_transitions(const ParseItemSet &input_item_set,
|
||||
const SyntaxGrammar &grammar) {
|
||||
|
||||
ParseItemSet item_set(item_set_closure(input_item_set, grammar));
|
||||
map<Symbol, ParseItemSet> result;
|
||||
for (const auto &pair : item_set) {
|
||||
const ParseItem &item = pair.first;
|
||||
const set<Symbol> &lookahead_symbols = pair.second;
|
||||
const LookaheadSet &lookahead_symbols = pair.second;
|
||||
const Production &production =
|
||||
grammar.productions(item.lhs())[item.production_index];
|
||||
if (item.step_index == production.size())
|
||||
|
|
@ -34,8 +31,7 @@ map<Symbol, ParseItemSet> sym_transitions(const ParseItemSet &input_item_set,
|
|||
int rule_id = step < production.size() ? production[step].rule_id : 0;
|
||||
ParseItem new_item(item.lhs(), item.production_index, step, rule_id);
|
||||
|
||||
result[symbol][new_item].insert(lookahead_symbols.begin(),
|
||||
lookahead_symbols.end());
|
||||
result[symbol][new_item] = lookahead_symbols;
|
||||
}
|
||||
|
||||
return result;
|
||||
|
|
|
|||
41
src/compiler/build_tables/lookahead_set.cc
Normal file
41
src/compiler/build_tables/lookahead_set.cc
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
#include "compiler/build_tables/lookahead_set.h"
|
||||
#include <set>
|
||||
#include <memory>
|
||||
#include "compiler/rules/symbol.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
||||
using std::set;
|
||||
using std::make_shared;
|
||||
using rules::Symbol;
|
||||
|
||||
LookaheadSet::LookaheadSet() : entries(nullptr) {}
|
||||
|
||||
LookaheadSet::LookaheadSet(const set<Symbol> &symbols)
|
||||
: entries(make_shared<set<Symbol>>(symbols)) {}
|
||||
|
||||
bool LookaheadSet::empty() const {
|
||||
return !entries.get() || entries->empty();
|
||||
}
|
||||
|
||||
bool LookaheadSet::operator==(const LookaheadSet &other) const {
|
||||
return *entries == *other.entries;
|
||||
}
|
||||
|
||||
bool LookaheadSet::insert_all(const LookaheadSet &other) {
|
||||
if (!entries.get())
|
||||
entries = make_shared<set<Symbol>>();
|
||||
size_t previous_size = entries->size();
|
||||
entries->insert(other.entries->begin(), other.entries->end());
|
||||
return entries->size() > previous_size;
|
||||
}
|
||||
|
||||
bool LookaheadSet::insert(const Symbol &symbol) {
|
||||
if (!entries.get())
|
||||
entries = make_shared<set<Symbol>>();
|
||||
return entries->insert(symbol).second;
|
||||
}
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
27
src/compiler/build_tables/lookahead_set.h
Normal file
27
src/compiler/build_tables/lookahead_set.h
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
#ifndef COMPILER_BUILD_TABLES_LOOKAHEAD_SET_H_
|
||||
#define COMPILER_BUILD_TABLES_LOOKAHEAD_SET_H_
|
||||
|
||||
#include <set>
|
||||
#include <memory>
|
||||
#include "compiler/rules/symbol.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
||||
class LookaheadSet {
|
||||
public:
|
||||
LookaheadSet();
|
||||
LookaheadSet(const std::set<rules::Symbol> &);
|
||||
|
||||
bool empty() const;
|
||||
bool operator==(const LookaheadSet &) const;
|
||||
bool insert_all(const LookaheadSet &);
|
||||
bool insert(const rules::Symbol &);
|
||||
|
||||
std::shared_ptr<std::set<rules::Symbol>> entries;
|
||||
};
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_BUILD_TABLES_LOOKAHEAD_SET_H_
|
||||
|
|
@ -8,7 +8,6 @@ namespace build_tables {
|
|||
|
||||
using std::string;
|
||||
using std::to_string;
|
||||
using std::ostream;
|
||||
using rules::Symbol;
|
||||
|
||||
ParseItem::ParseItem(const Symbol &lhs, unsigned int production_index,
|
||||
|
|
|
|||
|
|
@ -1,10 +1,9 @@
|
|||
#ifndef COMPILER_BUILD_TABLES_PARSE_ITEM_H_
|
||||
#define COMPILER_BUILD_TABLES_PARSE_ITEM_H_
|
||||
|
||||
#include <set>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include "compiler/build_tables/item.h"
|
||||
#include "compiler/build_tables/lookahead_set.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
|
@ -13,6 +12,7 @@ namespace build_tables {
|
|||
class ParseItem {
|
||||
public:
|
||||
ParseItem(const rules::Symbol &, unsigned int, unsigned int, int);
|
||||
|
||||
bool operator==(const ParseItem &other) const;
|
||||
bool operator<(const ParseItem &other) const;
|
||||
rules::Symbol lhs() const;
|
||||
|
|
@ -23,7 +23,7 @@ class ParseItem {
|
|||
int rule_id;
|
||||
};
|
||||
|
||||
typedef std::map<ParseItem, std::set<rules::Symbol>> ParseItemSet;
|
||||
typedef std::map<ParseItem, LookaheadSet> ParseItemSet;
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
|
@ -44,8 +44,8 @@ struct hash<const tree_sitter::build_tables::ParseItemSet> {
|
|||
size_t result = hash<size_t>()(set.size());
|
||||
for (auto &pair : set) {
|
||||
result ^= hash<tree_sitter::build_tables::ParseItem>()(pair.first);
|
||||
result ^= hash<size_t>()(pair.second.size());
|
||||
for (auto &symbol : pair.second)
|
||||
result ^= hash<size_t>()(pair.second.entries->size());
|
||||
for (auto &symbol : *pair.second.entries)
|
||||
result ^= hash<tree_sitter::rules::Symbol>()(symbol);
|
||||
}
|
||||
return result;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue