Make ParseItemSet & LexItemSet classes
This commit is contained in:
parent
f01972c64e
commit
ef2acf9496
11 changed files with 88 additions and 44 deletions
|
|
@ -48,7 +48,7 @@ describe("lex_item_set_transitions", [&]() {
|
|||
LexItem(Symbol(2), CharacterSet().include('e', 'x').copy())
|
||||
});
|
||||
|
||||
AssertThat(lex_item_set_transitions(set1), Equals(map<CharacterSet, LexItemSet>({
|
||||
AssertThat(set1.transitions(), Equals(map<CharacterSet, LexItemSet>({
|
||||
{
|
||||
CharacterSet().include('a', 'd'),
|
||||
LexItemSet({
|
||||
|
|
|
|||
|
|
@ -47,7 +47,7 @@ describe("parse_item_set_transitions(ParseItemSet, SyntaxGrammar)", [&]() {
|
|||
}
|
||||
});
|
||||
|
||||
AssertThat(parse_item_set_transitions(set1, grammar), Equals(map<Symbol, ParseItemSet>({
|
||||
AssertThat(set1.transitions(grammar), Equals(map<Symbol, ParseItemSet>({
|
||||
{
|
||||
Symbol(1),
|
||||
ParseItemSet({
|
||||
|
|
|
|||
|
|
@ -111,11 +111,15 @@ ostream &operator<<(ostream &stream, const ProductionStep &step) {
|
|||
|
||||
namespace build_tables {
|
||||
|
||||
ostream &operator<<(ostream &stream, const build_tables::LexItem &item) {
|
||||
ostream &operator<<(ostream &stream, const LexItem &item) {
|
||||
return stream << string("(item ") << item.lhs << string(" ") << *item.rule
|
||||
<< string(")");
|
||||
}
|
||||
|
||||
ostream &operator<<(ostream &stream, const LexItemSet &item_set) {
|
||||
return stream << item_set.entries;
|
||||
}
|
||||
|
||||
ostream &operator<<(ostream &stream, const ParseItem &item) {
|
||||
return stream << string("(item variable:") << to_string(item.variable_index)
|
||||
<< string(" production:") << to_string(item.production_index)
|
||||
|
|
@ -124,6 +128,10 @@ ostream &operator<<(ostream &stream, const ParseItem &item) {
|
|||
<< string(")");
|
||||
}
|
||||
|
||||
std::ostream &operator<<(std::ostream &stream, const ParseItemSet &item_set) {
|
||||
return stream << item_set.entries;
|
||||
}
|
||||
|
||||
std::ostream &operator<<(std::ostream &stream, const MetadataRange &range) {
|
||||
return stream << string("{") << to_string(range.min) << string(", ")
|
||||
<< to_string(range.max) << string("}");
|
||||
|
|
|
|||
|
|
@ -111,12 +111,16 @@ namespace build_tables {
|
|||
|
||||
struct MetadataRange;
|
||||
class LexItem;
|
||||
class LexItemSet;
|
||||
class ParseItem;
|
||||
class ParseItemSet;
|
||||
class LookaheadSet;
|
||||
|
||||
ostream &operator<<(ostream &, const MetadataRange &);
|
||||
ostream &operator<<(ostream &, const LexItem &);
|
||||
ostream &operator<<(ostream &, const LexItemSet &);
|
||||
ostream &operator<<(ostream &, const ParseItem &);
|
||||
ostream &operator<<(ostream &, const ParseItemSet &);
|
||||
ostream &operator<<(ostream &, const LookaheadSet &);
|
||||
|
||||
} // namespace build_tables
|
||||
|
|
|
|||
|
|
@ -34,7 +34,7 @@ class LexTableBuilder {
|
|||
const LexicalGrammar lex_grammar;
|
||||
const LexConflictManager conflict_manager;
|
||||
ParseTable *parse_table;
|
||||
unordered_map<const LexItemSet, LexStateId, LexItemSetHash> lex_state_ids;
|
||||
unordered_map<const LexItemSet, LexStateId, LexItemSet::Hash> lex_state_ids;
|
||||
LexTable lex_table;
|
||||
|
||||
public:
|
||||
|
|
@ -60,11 +60,11 @@ class LexTableBuilder {
|
|||
continue;
|
||||
|
||||
if (symbol == rules::END_OF_INPUT())
|
||||
result.insert(
|
||||
result.entries.insert(
|
||||
LexItem(symbol, after_separators(CharacterSet().include(0).copy())));
|
||||
|
||||
else if (symbol.is_token)
|
||||
result.insert(LexItem(
|
||||
result.entries.insert(LexItem(
|
||||
symbol, after_separators(lex_grammar.variables[symbol.index].rule)));
|
||||
}
|
||||
return result;
|
||||
|
|
@ -94,8 +94,7 @@ class LexTableBuilder {
|
|||
}
|
||||
|
||||
void add_advance_actions(const LexItemSet &item_set, LexStateId state_id) {
|
||||
auto transitions = lex_item_set_transitions(item_set);
|
||||
for (const auto &transition : transitions) {
|
||||
for (const auto &transition : item_set.transitions()) {
|
||||
CharacterSet rule = transition.first;
|
||||
LexItemSet new_item_set = transition.second;
|
||||
LexStateId new_state_id = add_lex_state(new_item_set);
|
||||
|
|
@ -108,7 +107,7 @@ class LexTableBuilder {
|
|||
}
|
||||
|
||||
void add_accept_token_actions(const LexItemSet &item_set, LexStateId state_id) {
|
||||
for (const LexItem &item : item_set) {
|
||||
for (const LexItem &item : item_set.entries) {
|
||||
CompletionStatus completion_status = get_completion_status(item.rule);
|
||||
if (completion_status.is_done) {
|
||||
auto current_action = lex_table.state(state_id).default_action;
|
||||
|
|
@ -121,7 +120,7 @@ class LexTableBuilder {
|
|||
}
|
||||
|
||||
void add_token_start(const LexItemSet &item_set, LexStateId state_id) {
|
||||
for (const auto &item : item_set)
|
||||
for (const auto &item : item_set.entries)
|
||||
if (item.is_token_start())
|
||||
lex_table.state(state_id).is_token_start = true;
|
||||
}
|
||||
|
|
@ -145,7 +144,7 @@ class LexTableBuilder {
|
|||
|
||||
set<int> precedence_values_for_item_set(const LexItemSet &item_set) const {
|
||||
set<int> result;
|
||||
for (const auto &item : item_set) {
|
||||
for (const auto &item : item_set.entries) {
|
||||
auto precedence_range = get_metadata(item.rule, rules::PRECEDENCE);
|
||||
result.insert(precedence_range.min);
|
||||
result.insert(precedence_range.max);
|
||||
|
|
|
|||
|
|
@ -34,7 +34,7 @@ class ParseTableBuilder {
|
|||
const SyntaxGrammar grammar;
|
||||
const LexicalGrammar lexical_grammar;
|
||||
ParseConflictManager conflict_manager;
|
||||
unordered_map<const ParseItemSet, ParseStateId, ParseItemSetHash> parse_state_ids;
|
||||
unordered_map<const ParseItemSet, ParseStateId, ParseItemSet::Hash> parse_state_ids;
|
||||
vector<pair<ParseItemSet, ParseStateId>> item_sets_to_process;
|
||||
ParseTable parse_table;
|
||||
std::set<string> conflicts;
|
||||
|
|
@ -92,7 +92,7 @@ class ParseTableBuilder {
|
|||
}
|
||||
|
||||
void add_shift_actions(const ParseItemSet &item_set, ParseStateId state_id) {
|
||||
for (const auto &transition : parse_item_set_transitions(item_set, grammar)) {
|
||||
for (const auto &transition : item_set.transitions(grammar)) {
|
||||
const Symbol &symbol = transition.first;
|
||||
const ParseItemSet &next_item_set = transition.second;
|
||||
|
||||
|
|
@ -127,7 +127,7 @@ class ParseTableBuilder {
|
|||
}
|
||||
|
||||
void add_reduce_actions(const ParseItemSet &item_set, ParseStateId state_id) {
|
||||
for (const auto &pair : item_set) {
|
||||
for (const auto &pair : item_set.entries) {
|
||||
const ParseItem &item = pair.first;
|
||||
const auto &lookahead_symbols = pair.second;
|
||||
|
||||
|
|
@ -235,7 +235,7 @@ class ParseTableBuilder {
|
|||
|
||||
set<int> precedence_values_for_item_set(const ParseItemSet &item_set) {
|
||||
set<int> result;
|
||||
for (const auto &pair : item_set) {
|
||||
for (const auto &pair : item_set.entries) {
|
||||
const ParseItem &item = pair.first;
|
||||
const Production &production =
|
||||
grammar.productions(item.lhs())[item.production_index];
|
||||
|
|
@ -255,7 +255,7 @@ class ParseTableBuilder {
|
|||
set<Symbol> result;
|
||||
switch (action.type) {
|
||||
case ParseActionTypeShift: {
|
||||
for (const auto &pair : item_set) {
|
||||
for (const auto &pair : item_set.entries) {
|
||||
const ParseItem &item = pair.first;
|
||||
const Production &production =
|
||||
grammar.productions(item.lhs())[item.production_index];
|
||||
|
|
|
|||
|
|
@ -22,8 +22,9 @@ ParseItemSet item_set_closure(const ParseItemSet &input_item_set,
|
|||
|
||||
// An item set's closure is defined recursively. Use an explicit stack to
|
||||
// store the recursively-added items.
|
||||
vector<pair<ParseItem, LookaheadSet>> items_to_process(input_item_set.begin(),
|
||||
input_item_set.end());
|
||||
vector<pair<ParseItem, LookaheadSet>> items_to_process(
|
||||
input_item_set.entries.begin(), input_item_set.entries.end());
|
||||
|
||||
while (!items_to_process.empty()) {
|
||||
ParseItem item = items_to_process.back().first;
|
||||
LookaheadSet lookahead_symbols = items_to_process.back().second;
|
||||
|
|
@ -31,7 +32,7 @@ ParseItemSet item_set_closure(const ParseItemSet &input_item_set,
|
|||
|
||||
// Add the parse-item and lookahead symbols to the item set.
|
||||
// If they were already present, skip to the next item.
|
||||
if (!result[item].insert_all(lookahead_symbols))
|
||||
if (!result.entries[item].insert_all(lookahead_symbols))
|
||||
continue;
|
||||
|
||||
// If the item is at the end of its production, skip to the next item.
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@
|
|||
#include "compiler/build_tables/rule_transitions.h"
|
||||
#include "compiler/build_tables/merge_transitions.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include <unordered_set>
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
|
@ -10,6 +11,7 @@ namespace build_tables {
|
|||
using std::hash;
|
||||
using std::map;
|
||||
using std::string;
|
||||
using std::unordered_set;
|
||||
using rules::CharacterSet;
|
||||
using rules::Symbol;
|
||||
|
||||
|
|
@ -28,22 +30,31 @@ size_t LexItem::Hash::operator()(const LexItem &item) const {
|
|||
return hash<Symbol>()(item.lhs) ^ hash<rule_ptr>()(item.rule);
|
||||
}
|
||||
|
||||
size_t LexItemSetHash::operator()(const LexItemSet &item_set) const {
|
||||
size_t result = hash<size_t>()(item_set.size());
|
||||
for (const auto &item : item_set)
|
||||
size_t LexItemSet::Hash::operator()(const LexItemSet &item_set) const {
|
||||
size_t result = hash<size_t>()(item_set.entries.size());
|
||||
for (const auto &item : item_set.entries)
|
||||
result ^= LexItem::Hash()(item);
|
||||
return result;
|
||||
}
|
||||
|
||||
map<CharacterSet, LexItemSet> lex_item_set_transitions(const LexItemSet &item_set) {
|
||||
LexItemSet::LexItemSet() {}
|
||||
|
||||
LexItemSet::LexItemSet(const unordered_set<LexItem, LexItem::Hash> &entries)
|
||||
: entries(entries) {}
|
||||
|
||||
bool LexItemSet::operator==(const LexItemSet &other) const {
|
||||
return entries == other.entries;
|
||||
}
|
||||
|
||||
map<CharacterSet, LexItemSet> LexItemSet::transitions() const {
|
||||
map<CharacterSet, LexItemSet> result;
|
||||
for (const LexItem &item : item_set) {
|
||||
for (const LexItem &item : entries) {
|
||||
for (auto &transition : rule_transitions(item.rule)) {
|
||||
LexItem next_item(item.lhs, transition.second);
|
||||
merge_transition<LexItemSet>(
|
||||
&result, { transition.first, LexItemSet({ next_item }) },
|
||||
[](LexItemSet *left, const LexItemSet *right) {
|
||||
left->insert(right->begin(), right->end());
|
||||
left->entries.insert(right->entries.begin(), right->entries.end());
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -24,15 +24,21 @@ class LexItem {
|
|||
};
|
||||
};
|
||||
|
||||
typedef std::unordered_set<LexItem, LexItem::Hash> LexItemSet;
|
||||
class LexItemSet {
|
||||
public:
|
||||
LexItemSet();
|
||||
LexItemSet(const std::unordered_set<LexItem, LexItem::Hash> &);
|
||||
|
||||
struct LexItemSetHash {
|
||||
size_t operator()(const LexItemSet &) const;
|
||||
bool operator==(const LexItemSet &) const;
|
||||
std::map<rules::CharacterSet, LexItemSet> transitions() const;
|
||||
|
||||
std::unordered_set<LexItem, LexItem::Hash> entries;
|
||||
|
||||
struct Hash {
|
||||
size_t operator()(const LexItemSet &) const;
|
||||
};
|
||||
};
|
||||
|
||||
std::map<rules::CharacterSet, LexItemSet> lex_item_set_transitions(
|
||||
const LexItemSet &);
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
||||
|
|
|
|||
|
|
@ -40,9 +40,18 @@ Symbol ParseItem::lhs() const {
|
|||
return Symbol(variable_index);
|
||||
}
|
||||
|
||||
size_t ParseItemSetHash::operator()(const ParseItemSet &item_set) const {
|
||||
size_t result = hash<size_t>()(item_set.size());
|
||||
for (auto &pair : item_set) {
|
||||
ParseItemSet::ParseItemSet() {}
|
||||
|
||||
ParseItemSet::ParseItemSet(const map<ParseItem, LookaheadSet> &entries)
|
||||
: entries(entries) {}
|
||||
|
||||
bool ParseItemSet::operator==(const ParseItemSet &other) const {
|
||||
return entries == other.entries;
|
||||
}
|
||||
|
||||
size_t ParseItemSet::Hash::operator()(const ParseItemSet &item_set) const {
|
||||
size_t result = hash<size_t>()(item_set.entries.size());
|
||||
for (auto &pair : item_set.entries) {
|
||||
const ParseItem &item = pair.first;
|
||||
result ^= hash<unsigned int>()(item.variable_index) ^
|
||||
hash<int>()(item.rule_id) ^ hash<unsigned int>()(item.step_index);
|
||||
|
|
@ -56,10 +65,10 @@ size_t ParseItemSetHash::operator()(const ParseItemSet &item_set) const {
|
|||
return result;
|
||||
}
|
||||
|
||||
map<Symbol, ParseItemSet> parse_item_set_transitions(
|
||||
const ParseItemSet &item_set, const SyntaxGrammar &grammar) {
|
||||
map<Symbol, ParseItemSet> ParseItemSet::transitions(
|
||||
const SyntaxGrammar &grammar) const {
|
||||
map<Symbol, ParseItemSet> result;
|
||||
for (const auto &pair : item_set) {
|
||||
for (const auto &pair : entries) {
|
||||
const ParseItem &item = pair.first;
|
||||
const LookaheadSet &lookahead_symbols = pair.second;
|
||||
const Production &production =
|
||||
|
|
@ -72,7 +81,7 @@ map<Symbol, ParseItemSet> parse_item_set_transitions(
|
|||
int rule_id = step < production.size() ? production[step].rule_id : 0;
|
||||
ParseItem new_item(item.lhs(), item.production_index, step, rule_id);
|
||||
|
||||
result[symbol][new_item] = lookahead_symbols;
|
||||
result[symbol].entries[new_item] = lookahead_symbols;
|
||||
}
|
||||
|
||||
return result;
|
||||
|
|
|
|||
|
|
@ -23,15 +23,21 @@ class ParseItem {
|
|||
int rule_id;
|
||||
};
|
||||
|
||||
typedef std::map<ParseItem, LookaheadSet> ParseItemSet;
|
||||
class ParseItemSet {
|
||||
public:
|
||||
ParseItemSet();
|
||||
ParseItemSet(const std::map<ParseItem, LookaheadSet> &);
|
||||
|
||||
struct ParseItemSetHash {
|
||||
size_t operator()(const ParseItemSet &) const;
|
||||
std::map<rules::Symbol, ParseItemSet> transitions(const SyntaxGrammar &) const;
|
||||
bool operator==(const ParseItemSet &) const;
|
||||
|
||||
std::map<ParseItem, LookaheadSet> entries;
|
||||
|
||||
struct Hash {
|
||||
size_t operator()(const ParseItemSet &) const;
|
||||
};
|
||||
};
|
||||
|
||||
std::map<rules::Symbol, ParseItemSet> parse_item_set_transitions(
|
||||
const ParseItemSet &, const SyntaxGrammar &);
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue