Reorganize ParseItemSet and LexItemSet
This commit is contained in:
parent
39a0934088
commit
f01972c64e
16 changed files with 219 additions and 304 deletions
|
|
@ -15,9 +15,7 @@
|
|||
'src/compiler/build_tables/build_tables.cc',
|
||||
'src/compiler/build_tables/get_completion_status.cc',
|
||||
'src/compiler/build_tables/get_metadata.cc',
|
||||
'src/compiler/build_tables/item.cc',
|
||||
'src/compiler/build_tables/item_set_closure.cc',
|
||||
'src/compiler/build_tables/item_set_transitions.cc',
|
||||
'src/compiler/build_tables/lex_item.cc',
|
||||
'src/compiler/build_tables/lex_conflict_manager.cc',
|
||||
'src/compiler/build_tables/lookahead_set.cc',
|
||||
|
|
|
|||
|
|
@ -1,115 +0,0 @@
|
|||
#include "compiler/compiler_spec_helper.h"
|
||||
#include "compiler/build_tables/item_set_transitions.h"
|
||||
#include "compiler/build_tables/lookahead_set.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/helpers/rule_helpers.h"
|
||||
|
||||
using namespace rules;
|
||||
using namespace build_tables;
|
||||
|
||||
START_TEST
|
||||
|
||||
describe("char_transitions(LexItemSet)", []() {
|
||||
describe("when two items in the set have transitions on the same character", [&]() {
|
||||
it("merges the transitions by computing the union of the two item sets", [&]() {
|
||||
LexItemSet set1({
|
||||
LexItem(Symbol(1), CharacterSet().include('a', 'f').copy()),
|
||||
LexItem(Symbol(2), CharacterSet().include('e', 'x').copy())
|
||||
});
|
||||
|
||||
AssertThat(char_transitions(set1), Equals(map<CharacterSet, LexItemSet>({
|
||||
{
|
||||
CharacterSet().include('a', 'd'),
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), blank()),
|
||||
})
|
||||
},
|
||||
{
|
||||
CharacterSet().include('e', 'f'),
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), blank()),
|
||||
LexItem(Symbol(2), blank()),
|
||||
})
|
||||
},
|
||||
{
|
||||
CharacterSet().include('g', 'x'),
|
||||
LexItemSet({
|
||||
LexItem(Symbol(2), blank()),
|
||||
})
|
||||
},
|
||||
})));
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("sym_transitions(ParseItemSet, InitialSyntaxGrammar)", [&]() {
|
||||
it("computes the closure of the new item sets", [&]() {
|
||||
SyntaxGrammar grammar{{
|
||||
SyntaxVariable("rule_0", VariableTypeNamed, {
|
||||
Production({
|
||||
{Symbol(11, true), 0, AssociativityNone, 101},
|
||||
{Symbol(12, true), 0, AssociativityNone, 102},
|
||||
{Symbol(1), 0, AssociativityNone, 103},
|
||||
{Symbol(13, true), 0, AssociativityNone, 104},
|
||||
})
|
||||
}),
|
||||
SyntaxVariable("rule_1", VariableTypeNamed, {
|
||||
Production({
|
||||
{Symbol(2), 0, AssociativityNone, 105},
|
||||
{Symbol(14, true), 0, AssociativityNone, 106},
|
||||
})
|
||||
}),
|
||||
SyntaxVariable("rule_2", VariableTypeNamed, {
|
||||
Production({
|
||||
{Symbol(15, true), 0, AssociativityNone, 105},
|
||||
})
|
||||
})
|
||||
}, {}, {}};
|
||||
|
||||
ParseItemSet set1({
|
||||
{
|
||||
// Step 2 of rule_0's production: right before the reference to rule_1.
|
||||
ParseItem(Symbol(0), 0, 2, 103),
|
||||
LookaheadSet({ Symbol(16, true) })
|
||||
}
|
||||
});
|
||||
|
||||
AssertThat(sym_transitions(set1, grammar), Equals(map<Symbol, ParseItemSet>({
|
||||
|
||||
// Consume symbol 1 -> step 3 of rule_0's production
|
||||
{
|
||||
Symbol(1),
|
||||
ParseItemSet({
|
||||
{
|
||||
ParseItem(Symbol(0), 0, 3, 104),
|
||||
LookaheadSet({ Symbol(16, true) })
|
||||
}
|
||||
})
|
||||
},
|
||||
|
||||
// Consume symbol 2 -> step 1 of rule_1's production
|
||||
{
|
||||
Symbol(2),
|
||||
ParseItemSet({
|
||||
{
|
||||
ParseItem(Symbol(1), 0, 1, 106),
|
||||
LookaheadSet({ Symbol(13, true) })
|
||||
},
|
||||
})
|
||||
},
|
||||
|
||||
// Consume token 15 -> step 1 of rule_2's production
|
||||
{
|
||||
Symbol(15, true),
|
||||
ParseItemSet({
|
||||
{
|
||||
ParseItem(Symbol(2), 0, 1, 0),
|
||||
LookaheadSet({ Symbol(14, true) })
|
||||
},
|
||||
})
|
||||
},
|
||||
})));
|
||||
});
|
||||
});
|
||||
|
||||
END_TEST
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
#include "compiler/compiler_spec_helper.h"
|
||||
#include "compiler/build_tables/item_set_transitions.h"
|
||||
#include "compiler/build_tables/lex_item.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
|
||||
using namespace rules;
|
||||
|
|
@ -8,7 +8,7 @@ using namespace build_tables;
|
|||
START_TEST
|
||||
|
||||
describe("LexItem", []() {
|
||||
describe("determining if an item is the start of a token", [&]() {
|
||||
describe("is_token_start()", [&]() {
|
||||
Symbol sym(1);
|
||||
rule_ptr token_start = make_shared<Metadata>(str("a"), map<MetadataKey, int>({
|
||||
{ START_TOKEN, 1 }
|
||||
|
|
@ -40,4 +40,37 @@ describe("LexItem", []() {
|
|||
});
|
||||
});
|
||||
|
||||
describe("lex_item_set_transitions", [&]() {
|
||||
describe("when two items in the set have transitions on the same character", [&]() {
|
||||
it("merges the transitions by computing the union of the two item sets", [&]() {
|
||||
LexItemSet set1({
|
||||
LexItem(Symbol(1), CharacterSet().include('a', 'f').copy()),
|
||||
LexItem(Symbol(2), CharacterSet().include('e', 'x').copy())
|
||||
});
|
||||
|
||||
AssertThat(lex_item_set_transitions(set1), Equals(map<CharacterSet, LexItemSet>({
|
||||
{
|
||||
CharacterSet().include('a', 'd'),
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), blank()),
|
||||
})
|
||||
},
|
||||
{
|
||||
CharacterSet().include('e', 'f'),
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), blank()),
|
||||
LexItem(Symbol(2), blank()),
|
||||
})
|
||||
},
|
||||
{
|
||||
CharacterSet().include('g', 'x'),
|
||||
LexItemSet({
|
||||
LexItem(Symbol(2), blank()),
|
||||
})
|
||||
},
|
||||
})));
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
END_TEST
|
||||
|
|
|
|||
74
spec/compiler/build_tables/parse_item_spec.cc
Normal file
74
spec/compiler/build_tables/parse_item_spec.cc
Normal file
|
|
@ -0,0 +1,74 @@
|
|||
#include "compiler/compiler_spec_helper.h"
|
||||
#include "compiler/build_tables/parse_item.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/helpers/rule_helpers.h"
|
||||
|
||||
using namespace rules;
|
||||
using namespace build_tables;
|
||||
|
||||
START_TEST
|
||||
|
||||
describe("parse_item_set_transitions(ParseItemSet, SyntaxGrammar)", [&]() {
|
||||
it("computes the closure of the new item sets", [&]() {
|
||||
SyntaxGrammar grammar{{
|
||||
SyntaxVariable("rule_0", VariableTypeNamed, {
|
||||
Production({
|
||||
{Symbol(11, true), 0, AssociativityNone, 101},
|
||||
{Symbol(12, true), 0, AssociativityNone, 102},
|
||||
{Symbol(1), 0, AssociativityNone, 103},
|
||||
{Symbol(13, true), 0, AssociativityNone, 104},
|
||||
})
|
||||
}),
|
||||
SyntaxVariable("rule_1", VariableTypeNamed, {
|
||||
Production({
|
||||
{Symbol(2), 0, AssociativityNone, 105},
|
||||
{Symbol(14, true), 0, AssociativityNone, 106},
|
||||
})
|
||||
}),
|
||||
SyntaxVariable("rule_2", VariableTypeNamed, {
|
||||
Production({
|
||||
{Symbol(15, true), 0, AssociativityNone, 105},
|
||||
})
|
||||
})
|
||||
}, {}, {}};
|
||||
|
||||
ParseItemSet set1({
|
||||
{
|
||||
ParseItem(Symbol(0), 0, 2, 103),
|
||||
LookaheadSet({ Symbol(16, true) })
|
||||
},
|
||||
{
|
||||
ParseItem(Symbol(1), 0, 0, 106),
|
||||
LookaheadSet({ Symbol(17, true) })
|
||||
},
|
||||
{
|
||||
ParseItem(Symbol(2), 0, 1, 106),
|
||||
LookaheadSet({ Symbol(17, true) })
|
||||
}
|
||||
});
|
||||
|
||||
AssertThat(parse_item_set_transitions(set1, grammar), Equals(map<Symbol, ParseItemSet>({
|
||||
{
|
||||
Symbol(1),
|
||||
ParseItemSet({
|
||||
{
|
||||
ParseItem(Symbol(0), 0, 3, 104),
|
||||
LookaheadSet({ Symbol(16, true) })
|
||||
}
|
||||
})
|
||||
},
|
||||
|
||||
{
|
||||
Symbol(2),
|
||||
ParseItemSet({
|
||||
{
|
||||
ParseItem(Symbol(1), 0, 1, 106),
|
||||
LookaheadSet({ Symbol(17, true) })
|
||||
},
|
||||
})
|
||||
},
|
||||
})));
|
||||
});
|
||||
});
|
||||
|
||||
END_TEST
|
||||
|
|
@ -6,7 +6,6 @@
|
|||
#include <utility>
|
||||
#include <vector>
|
||||
#include "compiler/build_tables/lex_conflict_manager.h"
|
||||
#include "compiler/build_tables/item_set_transitions.h"
|
||||
#include "compiler/build_tables/get_completion_status.h"
|
||||
#include "compiler/build_tables/get_metadata.h"
|
||||
#include "compiler/build_tables/lex_item.h"
|
||||
|
|
@ -35,7 +34,7 @@ class LexTableBuilder {
|
|||
const LexicalGrammar lex_grammar;
|
||||
const LexConflictManager conflict_manager;
|
||||
ParseTable *parse_table;
|
||||
unordered_map<const LexItemSet, LexStateId> lex_state_ids;
|
||||
unordered_map<const LexItemSet, LexStateId, LexItemSetHash> lex_state_ids;
|
||||
LexTable lex_table;
|
||||
|
||||
public:
|
||||
|
|
@ -95,7 +94,7 @@ class LexTableBuilder {
|
|||
}
|
||||
|
||||
void add_advance_actions(const LexItemSet &item_set, LexStateId state_id) {
|
||||
auto transitions = char_transitions(item_set);
|
||||
auto transitions = lex_item_set_transitions(item_set);
|
||||
for (const auto &transition : transitions) {
|
||||
CharacterSet rule = transition.first;
|
||||
LexItemSet new_item_set = transition.second;
|
||||
|
|
|
|||
|
|
@ -6,11 +6,11 @@
|
|||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include "compiler/parse_table.h"
|
||||
#include "compiler/build_tables/item_set_transitions.h"
|
||||
#include "compiler/build_tables/parse_conflict_manager.h"
|
||||
#include "compiler/build_tables/parse_item.h"
|
||||
#include "compiler/build_tables/get_completion_status.h"
|
||||
#include "compiler/build_tables/get_metadata.h"
|
||||
#include "compiler/build_tables/item_set_closure.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
|
|
@ -34,7 +34,7 @@ class ParseTableBuilder {
|
|||
const SyntaxGrammar grammar;
|
||||
const LexicalGrammar lexical_grammar;
|
||||
ParseConflictManager conflict_manager;
|
||||
unordered_map<const ParseItemSet, ParseStateId> parse_state_ids;
|
||||
unordered_map<const ParseItemSet, ParseStateId, ParseItemSetHash> parse_state_ids;
|
||||
vector<pair<ParseItemSet, ParseStateId>> item_sets_to_process;
|
||||
ParseTable parse_table;
|
||||
std::set<string> conflicts;
|
||||
|
|
@ -56,7 +56,7 @@ class ParseTableBuilder {
|
|||
|
||||
while (!item_sets_to_process.empty()) {
|
||||
auto pair = item_sets_to_process.back();
|
||||
ParseItemSet item_set = std::move(pair.first);
|
||||
ParseItemSet item_set = item_set_closure(pair.first, grammar);
|
||||
ParseStateId state_id = pair.second;
|
||||
item_sets_to_process.pop_back();
|
||||
|
||||
|
|
@ -92,7 +92,7 @@ class ParseTableBuilder {
|
|||
}
|
||||
|
||||
void add_shift_actions(const ParseItemSet &item_set, ParseStateId state_id) {
|
||||
for (const auto &transition : sym_transitions(item_set, grammar)) {
|
||||
for (const auto &transition : parse_item_set_transitions(item_set, grammar)) {
|
||||
const Symbol &symbol = transition.first;
|
||||
const ParseItemSet &next_item_set = transition.second;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,11 +0,0 @@
|
|||
#include "compiler/build_tables/item.h"
|
||||
#include "tree_sitter/compiler.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
||||
Item::Item(const rules::Symbol &lhs, const rule_ptr rule)
|
||||
: lhs(lhs), rule(rule) {}
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
|
@ -1,21 +0,0 @@
|
|||
#ifndef COMPILER_BUILD_TABLES_ITEM_H_
|
||||
#define COMPILER_BUILD_TABLES_ITEM_H_
|
||||
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "tree_sitter/compiler.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
||||
class Item {
|
||||
public:
|
||||
Item(const rules::Symbol &lhs, rule_ptr rule);
|
||||
|
||||
rules::Symbol lhs;
|
||||
rule_ptr rule;
|
||||
};
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_BUILD_TABLES_ITEM_H_
|
||||
|
|
@ -5,7 +5,6 @@
|
|||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/build_tables/rule_transitions.h"
|
||||
#include "compiler/build_tables/rule_can_be_blank.h"
|
||||
#include "compiler/build_tables/item.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
|
|
|||
|
|
@ -1,56 +0,0 @@
|
|||
#include <set>
|
||||
#include "compiler/build_tables/item_set_closure.h"
|
||||
#include "compiler/build_tables/lex_item.h"
|
||||
#include "compiler/build_tables/merge_transitions.h"
|
||||
#include "compiler/build_tables/parse_item.h"
|
||||
#include "compiler/build_tables/rule_transitions.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
||||
using std::map;
|
||||
using rules::CharacterSet;
|
||||
using rules::Symbol;
|
||||
|
||||
map<Symbol, ParseItemSet> sym_transitions(const ParseItemSet &input_item_set,
|
||||
const SyntaxGrammar &grammar) {
|
||||
ParseItemSet item_set(item_set_closure(input_item_set, grammar));
|
||||
map<Symbol, ParseItemSet> result;
|
||||
for (const auto &pair : item_set) {
|
||||
const ParseItem &item = pair.first;
|
||||
const LookaheadSet &lookahead_symbols = pair.second;
|
||||
const Production &production =
|
||||
grammar.productions(item.lhs())[item.production_index];
|
||||
if (item.step_index == production.size())
|
||||
continue;
|
||||
|
||||
const Symbol &symbol = production[item.step_index].symbol;
|
||||
unsigned int step = item.step_index + 1;
|
||||
int rule_id = step < production.size() ? production[step].rule_id : 0;
|
||||
ParseItem new_item(item.lhs(), item.production_index, step, rule_id);
|
||||
|
||||
result[symbol][new_item] = lookahead_symbols;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
map<CharacterSet, LexItemSet> char_transitions(const LexItemSet &item_set) {
|
||||
map<CharacterSet, LexItemSet> result;
|
||||
for (const LexItem &item : item_set) {
|
||||
for (auto &transition : rule_transitions(item.rule)) {
|
||||
LexItem next_item(item.lhs, transition.second);
|
||||
merge_transition<LexItemSet>(
|
||||
&result, { transition.first, LexItemSet({ next_item }) },
|
||||
[](LexItemSet *left, const LexItemSet *right) {
|
||||
left->insert(right->begin(), right->end());
|
||||
});
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
|
@ -1,28 +0,0 @@
|
|||
#ifndef COMPILER_BUILD_TABLES_ITEM_SET_TRANSITIONS_H_
|
||||
#define COMPILER_BUILD_TABLES_ITEM_SET_TRANSITIONS_H_
|
||||
|
||||
#include <map>
|
||||
#include "compiler/build_tables/lex_item.h"
|
||||
#include "compiler/build_tables/parse_item.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
struct SyntaxGrammar;
|
||||
|
||||
namespace rules {
|
||||
class CharacterSet;
|
||||
class Symbol;
|
||||
}
|
||||
|
||||
namespace build_tables {
|
||||
|
||||
std::map<rules::Symbol, ParseItemSet> sym_transitions(
|
||||
const ParseItemSet &item_set, const SyntaxGrammar &grammar);
|
||||
|
||||
std::map<rules::CharacterSet, LexItemSet> char_transitions(
|
||||
const LexItemSet &item_set);
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_BUILD_TABLES_ITEM_SET_TRANSITIONS_H_
|
||||
|
|
@ -1,18 +1,20 @@
|
|||
#include "compiler/build_tables/lex_item.h"
|
||||
#include "compiler/build_tables/get_metadata.h"
|
||||
#include "compiler/build_tables/rule_transitions.h"
|
||||
#include "compiler/build_tables/merge_transitions.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
#include "compiler/rules/seq.h"
|
||||
#include "compiler/rules/visitor.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
||||
using std::hash;
|
||||
using std::map;
|
||||
using std::string;
|
||||
using std::ostream;
|
||||
using rules::CharacterSet;
|
||||
using rules::Symbol;
|
||||
|
||||
LexItem::LexItem(const rules::Symbol &lhs, const rule_ptr rule)
|
||||
: Item(lhs, rule) {}
|
||||
: lhs(lhs), rule(rule) {}
|
||||
|
||||
bool LexItem::operator==(const LexItem &other) const {
|
||||
return (other.lhs == lhs) && other.rule->operator==(*rule);
|
||||
|
|
@ -22,5 +24,31 @@ bool LexItem::is_token_start() const {
|
|||
return get_metadata(rule, rules::START_TOKEN).max > 0;
|
||||
}
|
||||
|
||||
size_t LexItem::Hash::operator()(const LexItem &item) const {
|
||||
return hash<Symbol>()(item.lhs) ^ hash<rule_ptr>()(item.rule);
|
||||
}
|
||||
|
||||
size_t LexItemSetHash::operator()(const LexItemSet &item_set) const {
|
||||
size_t result = hash<size_t>()(item_set.size());
|
||||
for (const auto &item : item_set)
|
||||
result ^= LexItem::Hash()(item);
|
||||
return result;
|
||||
}
|
||||
|
||||
map<CharacterSet, LexItemSet> lex_item_set_transitions(const LexItemSet &item_set) {
|
||||
map<CharacterSet, LexItemSet> result;
|
||||
for (const LexItem &item : item_set) {
|
||||
for (auto &transition : rule_transitions(item.rule)) {
|
||||
LexItem next_item(item.lhs, transition.second);
|
||||
merge_transition<LexItemSet>(
|
||||
&result, { transition.first, LexItemSet({ next_item }) },
|
||||
[](LexItemSet *left, const LexItemSet *right) {
|
||||
left->insert(right->begin(), right->end());
|
||||
});
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -2,44 +2,38 @@
|
|||
#define COMPILER_BUILD_TABLES_LEX_ITEM_H_
|
||||
|
||||
#include <unordered_set>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include "compiler/build_tables/item.h"
|
||||
#include "compiler/rules/character_set.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
||||
class LexItem : public Item {
|
||||
class LexItem {
|
||||
public:
|
||||
LexItem(const rules::Symbol &lhs, rule_ptr rule);
|
||||
bool operator==(const LexItem &other) const;
|
||||
bool is_token_start() const;
|
||||
|
||||
rules::Symbol lhs;
|
||||
rule_ptr rule;
|
||||
|
||||
struct Hash {
|
||||
size_t operator()(const LexItem &) const;
|
||||
};
|
||||
};
|
||||
|
||||
typedef std::unordered_set<LexItem> LexItemSet;
|
||||
typedef std::unordered_set<LexItem, LexItem::Hash> LexItemSet;
|
||||
|
||||
struct LexItemSetHash {
|
||||
size_t operator()(const LexItemSet &) const;
|
||||
};
|
||||
|
||||
std::map<rules::CharacterSet, LexItemSet> lex_item_set_transitions(
|
||||
const LexItemSet &);
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
||||
namespace std {
|
||||
|
||||
template <>
|
||||
struct hash<tree_sitter::build_tables::LexItem> {
|
||||
size_t operator()(const tree_sitter::build_tables::Item &item) const {
|
||||
return hash<tree_sitter::rules::Symbol>()(item.lhs) ^
|
||||
hash<tree_sitter::rule_ptr>()(item.rule);
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct hash<const tree_sitter::build_tables::LexItemSet> {
|
||||
size_t operator()(const tree_sitter::build_tables::LexItemSet &set) const {
|
||||
size_t result = hash<size_t>()(set.size());
|
||||
for (auto item : set)
|
||||
result ^= hash<tree_sitter::build_tables::LexItem>()(item);
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace std
|
||||
|
||||
#endif // COMPILER_BUILD_TABLES_LEX_ITEM_H_
|
||||
|
|
|
|||
|
|
@ -6,8 +6,10 @@
|
|||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
||||
using std::map;
|
||||
using std::string;
|
||||
using std::to_string;
|
||||
using std::hash;
|
||||
using rules::Symbol;
|
||||
|
||||
ParseItem::ParseItem(const Symbol &lhs, unsigned int production_index,
|
||||
|
|
@ -38,5 +40,43 @@ Symbol ParseItem::lhs() const {
|
|||
return Symbol(variable_index);
|
||||
}
|
||||
|
||||
size_t ParseItemSetHash::operator()(const ParseItemSet &item_set) const {
|
||||
size_t result = hash<size_t>()(item_set.size());
|
||||
for (auto &pair : item_set) {
|
||||
const ParseItem &item = pair.first;
|
||||
result ^= hash<unsigned int>()(item.variable_index) ^
|
||||
hash<int>()(item.rule_id) ^ hash<unsigned int>()(item.step_index);
|
||||
|
||||
const LookaheadSet &lookahead_set = pair.second;
|
||||
result ^= hash<size_t>()(lookahead_set.entries->size());
|
||||
for (auto &symbol : *pair.second.entries) {
|
||||
result ^= hash<tree_sitter::rules::Symbol>()(symbol);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
map<Symbol, ParseItemSet> parse_item_set_transitions(
|
||||
const ParseItemSet &item_set, const SyntaxGrammar &grammar) {
|
||||
map<Symbol, ParseItemSet> result;
|
||||
for (const auto &pair : item_set) {
|
||||
const ParseItem &item = pair.first;
|
||||
const LookaheadSet &lookahead_symbols = pair.second;
|
||||
const Production &production =
|
||||
grammar.productions(item.lhs())[item.production_index];
|
||||
if (item.step_index == production.size())
|
||||
continue;
|
||||
|
||||
size_t step = item.step_index + 1;
|
||||
Symbol symbol = production[item.step_index].symbol;
|
||||
int rule_id = step < production.size() ? production[step].rule_id : 0;
|
||||
ParseItem new_item(item.lhs(), item.production_index, step, rule_id);
|
||||
|
||||
result[symbol][new_item] = lookahead_symbols;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -2,9 +2,9 @@
|
|||
#define COMPILER_BUILD_TABLES_PARSE_ITEM_H_
|
||||
|
||||
#include <map>
|
||||
#include "compiler/build_tables/item.h"
|
||||
#include "compiler/build_tables/lookahead_set.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
|
@ -25,33 +25,14 @@ class ParseItem {
|
|||
|
||||
typedef std::map<ParseItem, LookaheadSet> ParseItemSet;
|
||||
|
||||
struct ParseItemSetHash {
|
||||
size_t operator()(const ParseItemSet &) const;
|
||||
};
|
||||
|
||||
std::map<rules::Symbol, ParseItemSet> parse_item_set_transitions(
|
||||
const ParseItemSet &, const SyntaxGrammar &);
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
||||
namespace std {
|
||||
|
||||
template <>
|
||||
struct hash<tree_sitter::build_tables::ParseItem> {
|
||||
size_t operator()(const tree_sitter::build_tables::ParseItem &item) const {
|
||||
return hash<unsigned int>()(item.variable_index) ^
|
||||
hash<int>()(item.rule_id) ^ hash<unsigned int>()(item.step_index);
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct hash<const tree_sitter::build_tables::ParseItemSet> {
|
||||
size_t operator()(const tree_sitter::build_tables::ParseItemSet &set) const {
|
||||
size_t result = hash<size_t>()(set.size());
|
||||
for (auto &pair : set) {
|
||||
result ^= hash<tree_sitter::build_tables::ParseItem>()(pair.first);
|
||||
result ^= hash<size_t>()(pair.second.entries->size());
|
||||
for (auto &symbol : *pair.second.entries)
|
||||
result ^= hash<tree_sitter::rules::Symbol>()(symbol);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace std
|
||||
|
||||
#endif // COMPILER_BUILD_TABLES_PARSE_ITEM_H_
|
||||
|
|
|
|||
|
|
@ -24,10 +24,10 @@ class RuleTransitions : public rules::RuleFn<map<CharacterSet, rule_ptr>> {
|
|||
void merge_transitions(map<CharacterSet, rule_ptr> *left,
|
||||
const map<CharacterSet, rule_ptr> &right) {
|
||||
for (auto &pair : right)
|
||||
merge_transition<rule_ptr>(
|
||||
left, pair, [](rule_ptr *left, const rule_ptr *right) {
|
||||
*left = Choice::build({ *left, *right });
|
||||
});
|
||||
merge_transition<rule_ptr>(left, pair,
|
||||
[](rule_ptr *left, const rule_ptr *right) {
|
||||
*left = Choice::build({ *left, *right });
|
||||
});
|
||||
}
|
||||
|
||||
map<CharacterSet, rule_ptr> apply_to(const CharacterSet *rule) {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue