Make ParseItemSet & LexItemSet classes

This commit is contained in:
Max Brunsfeld 2015-10-05 15:13:43 -07:00
parent f01972c64e
commit ef2acf9496
11 changed files with 88 additions and 44 deletions

View file

@ -48,7 +48,7 @@ describe("lex_item_set_transitions", [&]() {
LexItem(Symbol(2), CharacterSet().include('e', 'x').copy())
});
AssertThat(lex_item_set_transitions(set1), Equals(map<CharacterSet, LexItemSet>({
AssertThat(set1.transitions(), Equals(map<CharacterSet, LexItemSet>({
{
CharacterSet().include('a', 'd'),
LexItemSet({

View file

@ -47,7 +47,7 @@ describe("parse_item_set_transitions(ParseItemSet, SyntaxGrammar)", [&]() {
}
});
AssertThat(parse_item_set_transitions(set1, grammar), Equals(map<Symbol, ParseItemSet>({
AssertThat(set1.transitions(grammar), Equals(map<Symbol, ParseItemSet>({
{
Symbol(1),
ParseItemSet({

View file

@ -111,11 +111,15 @@ ostream &operator<<(ostream &stream, const ProductionStep &step) {
namespace build_tables {
ostream &operator<<(ostream &stream, const build_tables::LexItem &item) {
ostream &operator<<(ostream &stream, const LexItem &item) {
return stream << string("(item ") << item.lhs << string(" ") << *item.rule
<< string(")");
}
ostream &operator<<(ostream &stream, const LexItemSet &item_set) {
return stream << item_set.entries;
}
ostream &operator<<(ostream &stream, const ParseItem &item) {
return stream << string("(item variable:") << to_string(item.variable_index)
<< string(" production:") << to_string(item.production_index)
@ -124,6 +128,10 @@ ostream &operator<<(ostream &stream, const ParseItem &item) {
<< string(")");
}
std::ostream &operator<<(std::ostream &stream, const ParseItemSet &item_set) {
return stream << item_set.entries;
}
std::ostream &operator<<(std::ostream &stream, const MetadataRange &range) {
return stream << string("{") << to_string(range.min) << string(", ")
<< to_string(range.max) << string("}");

View file

@ -111,12 +111,16 @@ namespace build_tables {
struct MetadataRange;
class LexItem;
class LexItemSet;
class ParseItem;
class ParseItemSet;
class LookaheadSet;
ostream &operator<<(ostream &, const MetadataRange &);
ostream &operator<<(ostream &, const LexItem &);
ostream &operator<<(ostream &, const LexItemSet &);
ostream &operator<<(ostream &, const ParseItem &);
ostream &operator<<(ostream &, const ParseItemSet &);
ostream &operator<<(ostream &, const LookaheadSet &);
} // namespace build_tables

View file

@ -34,7 +34,7 @@ class LexTableBuilder {
const LexicalGrammar lex_grammar;
const LexConflictManager conflict_manager;
ParseTable *parse_table;
unordered_map<const LexItemSet, LexStateId, LexItemSetHash> lex_state_ids;
unordered_map<const LexItemSet, LexStateId, LexItemSet::Hash> lex_state_ids;
LexTable lex_table;
public:
@ -60,11 +60,11 @@ class LexTableBuilder {
continue;
if (symbol == rules::END_OF_INPUT())
result.insert(
result.entries.insert(
LexItem(symbol, after_separators(CharacterSet().include(0).copy())));
else if (symbol.is_token)
result.insert(LexItem(
result.entries.insert(LexItem(
symbol, after_separators(lex_grammar.variables[symbol.index].rule)));
}
return result;
@ -94,8 +94,7 @@ class LexTableBuilder {
}
void add_advance_actions(const LexItemSet &item_set, LexStateId state_id) {
auto transitions = lex_item_set_transitions(item_set);
for (const auto &transition : transitions) {
for (const auto &transition : item_set.transitions()) {
CharacterSet rule = transition.first;
LexItemSet new_item_set = transition.second;
LexStateId new_state_id = add_lex_state(new_item_set);
@ -108,7 +107,7 @@ class LexTableBuilder {
}
void add_accept_token_actions(const LexItemSet &item_set, LexStateId state_id) {
for (const LexItem &item : item_set) {
for (const LexItem &item : item_set.entries) {
CompletionStatus completion_status = get_completion_status(item.rule);
if (completion_status.is_done) {
auto current_action = lex_table.state(state_id).default_action;
@ -121,7 +120,7 @@ class LexTableBuilder {
}
void add_token_start(const LexItemSet &item_set, LexStateId state_id) {
for (const auto &item : item_set)
for (const auto &item : item_set.entries)
if (item.is_token_start())
lex_table.state(state_id).is_token_start = true;
}
@ -145,7 +144,7 @@ class LexTableBuilder {
set<int> precedence_values_for_item_set(const LexItemSet &item_set) const {
set<int> result;
for (const auto &item : item_set) {
for (const auto &item : item_set.entries) {
auto precedence_range = get_metadata(item.rule, rules::PRECEDENCE);
result.insert(precedence_range.min);
result.insert(precedence_range.max);

View file

@ -34,7 +34,7 @@ class ParseTableBuilder {
const SyntaxGrammar grammar;
const LexicalGrammar lexical_grammar;
ParseConflictManager conflict_manager;
unordered_map<const ParseItemSet, ParseStateId, ParseItemSetHash> parse_state_ids;
unordered_map<const ParseItemSet, ParseStateId, ParseItemSet::Hash> parse_state_ids;
vector<pair<ParseItemSet, ParseStateId>> item_sets_to_process;
ParseTable parse_table;
std::set<string> conflicts;
@ -92,7 +92,7 @@ class ParseTableBuilder {
}
void add_shift_actions(const ParseItemSet &item_set, ParseStateId state_id) {
for (const auto &transition : parse_item_set_transitions(item_set, grammar)) {
for (const auto &transition : item_set.transitions(grammar)) {
const Symbol &symbol = transition.first;
const ParseItemSet &next_item_set = transition.second;
@ -127,7 +127,7 @@ class ParseTableBuilder {
}
void add_reduce_actions(const ParseItemSet &item_set, ParseStateId state_id) {
for (const auto &pair : item_set) {
for (const auto &pair : item_set.entries) {
const ParseItem &item = pair.first;
const auto &lookahead_symbols = pair.second;
@ -235,7 +235,7 @@ class ParseTableBuilder {
set<int> precedence_values_for_item_set(const ParseItemSet &item_set) {
set<int> result;
for (const auto &pair : item_set) {
for (const auto &pair : item_set.entries) {
const ParseItem &item = pair.first;
const Production &production =
grammar.productions(item.lhs())[item.production_index];
@ -255,7 +255,7 @@ class ParseTableBuilder {
set<Symbol> result;
switch (action.type) {
case ParseActionTypeShift: {
for (const auto &pair : item_set) {
for (const auto &pair : item_set.entries) {
const ParseItem &item = pair.first;
const Production &production =
grammar.productions(item.lhs())[item.production_index];

View file

@ -22,8 +22,9 @@ ParseItemSet item_set_closure(const ParseItemSet &input_item_set,
// An item set's closure is defined recursively. Use an explicit stack to
// store the recursively-added items.
vector<pair<ParseItem, LookaheadSet>> items_to_process(input_item_set.begin(),
input_item_set.end());
vector<pair<ParseItem, LookaheadSet>> items_to_process(
input_item_set.entries.begin(), input_item_set.entries.end());
while (!items_to_process.empty()) {
ParseItem item = items_to_process.back().first;
LookaheadSet lookahead_symbols = items_to_process.back().second;
@ -31,7 +32,7 @@ ParseItemSet item_set_closure(const ParseItemSet &input_item_set,
// Add the parse-item and lookahead symbols to the item set.
// If they were already present, skip to the next item.
if (!result[item].insert_all(lookahead_symbols))
if (!result.entries[item].insert_all(lookahead_symbols))
continue;
// If the item is at the end of its production, skip to the next item.

View file

@ -3,6 +3,7 @@
#include "compiler/build_tables/rule_transitions.h"
#include "compiler/build_tables/merge_transitions.h"
#include "compiler/rules/symbol.h"
#include <unordered_set>
namespace tree_sitter {
namespace build_tables {
@ -10,6 +11,7 @@ namespace build_tables {
using std::hash;
using std::map;
using std::string;
using std::unordered_set;
using rules::CharacterSet;
using rules::Symbol;
@ -28,22 +30,31 @@ size_t LexItem::Hash::operator()(const LexItem &item) const {
return hash<Symbol>()(item.lhs) ^ hash<rule_ptr>()(item.rule);
}
size_t LexItemSetHash::operator()(const LexItemSet &item_set) const {
size_t result = hash<size_t>()(item_set.size());
for (const auto &item : item_set)
size_t LexItemSet::Hash::operator()(const LexItemSet &item_set) const {
size_t result = hash<size_t>()(item_set.entries.size());
for (const auto &item : item_set.entries)
result ^= LexItem::Hash()(item);
return result;
}
map<CharacterSet, LexItemSet> lex_item_set_transitions(const LexItemSet &item_set) {
LexItemSet::LexItemSet() {}
LexItemSet::LexItemSet(const unordered_set<LexItem, LexItem::Hash> &entries)
: entries(entries) {}
bool LexItemSet::operator==(const LexItemSet &other) const {
return entries == other.entries;
}
map<CharacterSet, LexItemSet> LexItemSet::transitions() const {
map<CharacterSet, LexItemSet> result;
for (const LexItem &item : item_set) {
for (const LexItem &item : entries) {
for (auto &transition : rule_transitions(item.rule)) {
LexItem next_item(item.lhs, transition.second);
merge_transition<LexItemSet>(
&result, { transition.first, LexItemSet({ next_item }) },
[](LexItemSet *left, const LexItemSet *right) {
left->insert(right->begin(), right->end());
left->entries.insert(right->entries.begin(), right->entries.end());
});
}
}

View file

@ -24,15 +24,21 @@ class LexItem {
};
};
typedef std::unordered_set<LexItem, LexItem::Hash> LexItemSet;
class LexItemSet {
public:
LexItemSet();
LexItemSet(const std::unordered_set<LexItem, LexItem::Hash> &);
struct LexItemSetHash {
size_t operator()(const LexItemSet &) const;
bool operator==(const LexItemSet &) const;
std::map<rules::CharacterSet, LexItemSet> transitions() const;
std::unordered_set<LexItem, LexItem::Hash> entries;
struct Hash {
size_t operator()(const LexItemSet &) const;
};
};
std::map<rules::CharacterSet, LexItemSet> lex_item_set_transitions(
const LexItemSet &);
} // namespace build_tables
} // namespace tree_sitter

View file

@ -40,9 +40,18 @@ Symbol ParseItem::lhs() const {
return Symbol(variable_index);
}
size_t ParseItemSetHash::operator()(const ParseItemSet &item_set) const {
size_t result = hash<size_t>()(item_set.size());
for (auto &pair : item_set) {
ParseItemSet::ParseItemSet() {}
ParseItemSet::ParseItemSet(const map<ParseItem, LookaheadSet> &entries)
: entries(entries) {}
bool ParseItemSet::operator==(const ParseItemSet &other) const {
return entries == other.entries;
}
size_t ParseItemSet::Hash::operator()(const ParseItemSet &item_set) const {
size_t result = hash<size_t>()(item_set.entries.size());
for (auto &pair : item_set.entries) {
const ParseItem &item = pair.first;
result ^= hash<unsigned int>()(item.variable_index) ^
hash<int>()(item.rule_id) ^ hash<unsigned int>()(item.step_index);
@ -56,10 +65,10 @@ size_t ParseItemSetHash::operator()(const ParseItemSet &item_set) const {
return result;
}
map<Symbol, ParseItemSet> parse_item_set_transitions(
const ParseItemSet &item_set, const SyntaxGrammar &grammar) {
map<Symbol, ParseItemSet> ParseItemSet::transitions(
const SyntaxGrammar &grammar) const {
map<Symbol, ParseItemSet> result;
for (const auto &pair : item_set) {
for (const auto &pair : entries) {
const ParseItem &item = pair.first;
const LookaheadSet &lookahead_symbols = pair.second;
const Production &production =
@ -72,7 +81,7 @@ map<Symbol, ParseItemSet> parse_item_set_transitions(
int rule_id = step < production.size() ? production[step].rule_id : 0;
ParseItem new_item(item.lhs(), item.production_index, step, rule_id);
result[symbol][new_item] = lookahead_symbols;
result[symbol].entries[new_item] = lookahead_symbols;
}
return result;

View file

@ -23,15 +23,21 @@ class ParseItem {
int rule_id;
};
typedef std::map<ParseItem, LookaheadSet> ParseItemSet;
class ParseItemSet {
public:
ParseItemSet();
ParseItemSet(const std::map<ParseItem, LookaheadSet> &);
struct ParseItemSetHash {
size_t operator()(const ParseItemSet &) const;
std::map<rules::Symbol, ParseItemSet> transitions(const SyntaxGrammar &) const;
bool operator==(const ParseItemSet &) const;
std::map<ParseItem, LookaheadSet> entries;
struct Hash {
size_t operator()(const ParseItemSet &) const;
};
};
std::map<rules::Symbol, ParseItemSet> parse_item_set_transitions(
const ParseItemSet &, const SyntaxGrammar &);
} // namespace build_tables
} // namespace tree_sitter