Make separate Item classes for parsing and lexing
This commit is contained in:
parent
d015d57a53
commit
289992344e
10 changed files with 132 additions and 117 deletions
|
|
@ -7,35 +7,9 @@ using std::ostream;
|
|||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
const int NO_SYMBOLS = -1;
|
||||
|
||||
Item::Item(const string &rule_name, const rules::rule_ptr rule, int consumed_sym_count) :
|
||||
Item::Item(const string &rule_name, const rules::rule_ptr rule) :
|
||||
rule_name(rule_name),
|
||||
rule(rule),
|
||||
consumed_sym_count(consumed_sym_count) {};
|
||||
|
||||
Item Item::at_beginning_of_rule(const string &rule_name, const Grammar &grammar) {
|
||||
return Item(rule_name, grammar.rule(rule_name), 0);
|
||||
}
|
||||
|
||||
Item Item::at_beginning_of_token(const string &rule_name, const Grammar &grammar) {
|
||||
return Item(rule_name, grammar.rule(rule_name), NO_SYMBOLS);
|
||||
}
|
||||
|
||||
int Item::next_sym_count() const {
|
||||
return (consumed_sym_count == NO_SYMBOLS) ? NO_SYMBOLS : (consumed_sym_count + 1);
|
||||
}
|
||||
|
||||
bool Item::operator==(const Item &other) const {
|
||||
bool rule_names_eq = other.rule_name == rule_name;
|
||||
bool rules_eq = (*other.rule == *rule);
|
||||
bool consumed_sym_counts_eq = (other.consumed_sym_count == consumed_sym_count);
|
||||
return rule_names_eq && rules_eq && consumed_sym_counts_eq;
|
||||
}
|
||||
|
||||
bool Item::operator<(const Item &other) const {
|
||||
return rule_name < other.rule_name;
|
||||
}
|
||||
rule(rule) {};
|
||||
|
||||
bool Item::is_done() const {
|
||||
for (auto pair : rule_transitions(rule))
|
||||
|
|
@ -46,11 +20,35 @@ namespace tree_sitter {
|
|||
|
||||
ostream& operator<<(ostream &stream, const Item &item) {
|
||||
return stream <<
|
||||
string("#<item '") <<
|
||||
item.rule_name <<
|
||||
string("' ") <<
|
||||
*item.rule <<
|
||||
string(">");
|
||||
string("#<item '") <<
|
||||
item.rule_name <<
|
||||
string("' ") <<
|
||||
*item.rule <<
|
||||
string(">");
|
||||
}
|
||||
|
||||
bool Item::operator<(const Item &other) const {
|
||||
return rule_name < other.rule_name;
|
||||
}
|
||||
|
||||
LexItem::LexItem(const std::string &rule_name, const rules::rule_ptr rule) : Item(rule_name, rule) {}
|
||||
|
||||
bool LexItem::operator==(const LexItem &other) const {
|
||||
bool rule_names_eq = other.rule_name == rule_name;
|
||||
bool rules_eq = (*other.rule == *rule);
|
||||
return rule_names_eq && rules_eq;
|
||||
}
|
||||
|
||||
ParseItem::ParseItem(const std::string &rule_name, const rules::rule_ptr rule, int consumed_sym_count) :
|
||||
Item(rule_name, rule),
|
||||
consumed_sym_count(consumed_sym_count),
|
||||
lookahead_sym_name("") {}
|
||||
|
||||
bool ParseItem::operator==(const ParseItem &other) const {
|
||||
bool rule_names_eq = other.rule_name == rule_name;
|
||||
bool rules_eq = (*other.rule == *rule);
|
||||
bool consumed_sym_counts_eq = (other.consumed_sym_count == consumed_sym_count);
|
||||
return rule_names_eq && rules_eq && consumed_sym_counts_eq;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -10,25 +10,33 @@ namespace tree_sitter {
|
|||
class Grammar;
|
||||
|
||||
namespace build_tables {
|
||||
class Item;
|
||||
|
||||
class Item {
|
||||
public:
|
||||
Item(const std::string &rule_name, const rules::rule_ptr rule, int consumed_sym_count);
|
||||
static Item at_beginning_of_rule(const std::string &rule_name, const Grammar &grammar);
|
||||
static Item at_beginning_of_token(const std::string &rule_name, const Grammar &grammar);
|
||||
|
||||
bool operator==(const Item &other) const;
|
||||
Item(const std::string &rule_name, const rules::rule_ptr rule);
|
||||
bool operator<(const Item &other) const;
|
||||
bool is_done() const;
|
||||
int next_sym_count() const;
|
||||
|
||||
const std::string rule_name;
|
||||
const rules::rule_ptr rule;
|
||||
const int consumed_sym_count;
|
||||
};
|
||||
|
||||
typedef std::set<Item> ItemSet;
|
||||
|
||||
class LexItem : public Item {
|
||||
public:
|
||||
LexItem(const std::string &rule_name, const rules::rule_ptr rule);
|
||||
bool operator==(const LexItem &other) const;
|
||||
};
|
||||
|
||||
class ParseItem : public Item {
|
||||
public:
|
||||
ParseItem(const std::string &rule_name, const rules::rule_ptr rule, int consumed_sym_count);
|
||||
bool operator==(const ParseItem &other) const;
|
||||
|
||||
const int consumed_sym_count;
|
||||
const std::string lookahead_sym_name;
|
||||
};
|
||||
|
||||
typedef std::set<ParseItem> ParseItemSet;
|
||||
typedef std::set<LexItem> LexItemSet;
|
||||
|
||||
std::ostream& operator<<(std::ostream &stream, const Item &item);
|
||||
}
|
||||
|
|
@ -36,21 +44,30 @@ namespace tree_sitter {
|
|||
|
||||
namespace std {
|
||||
template<>
|
||||
struct hash<tree_sitter::build_tables::Item> {
|
||||
struct hash<tree_sitter::build_tables::LexItem> {
|
||||
size_t operator()(const tree_sitter::build_tables::Item &item) const {
|
||||
return
|
||||
hash<std::string>()(item.rule_name) ^
|
||||
hash<tree_sitter::rules::Rule>()(*item.rule) ^
|
||||
hash<int>()(item.consumed_sym_count);
|
||||
hash<tree_sitter::rules::Rule>()(*item.rule);
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct hash<const tree_sitter::build_tables::ItemSet> {
|
||||
size_t operator()(const tree_sitter::build_tables::ItemSet &item_set) const {
|
||||
size_t result = hash<size_t>()(item_set.size());
|
||||
for (auto item : item_set)
|
||||
result ^= hash<tree_sitter::build_tables::Item>()(item);
|
||||
struct hash<tree_sitter::build_tables::ParseItem> {
|
||||
size_t operator()(const tree_sitter::build_tables::ParseItem &item) const {
|
||||
return
|
||||
hash<std::string>()(item.rule_name) ^
|
||||
hash<tree_sitter::rules::Rule>()(*item.rule) ^
|
||||
hash<size_t>()(item.consumed_sym_count);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
struct hash<const set<T>> {
|
||||
size_t operator()(const set<T> &set) const {
|
||||
size_t result = hash<size_t>()(set.size());
|
||||
for (auto item : set)
|
||||
result ^= hash<T>()(item);
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
|
|
|||
|
|
@ -8,23 +8,27 @@ using std::vector;
|
|||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
static bool contains(ItemSet items, Item item) {
|
||||
static bool contains(ParseItemSet items, ParseItem item) {
|
||||
return (std::find(items.begin(), items.end(), item) != items.end());
|
||||
}
|
||||
|
||||
static void add_item(ItemSet &item_set, const Item &item, const Grammar &grammar) {
|
||||
ParseItem parse_item_at_beginning_of_rule(const rules::Symbol &symbol, const Grammar &grammar) {
|
||||
return ParseItem(symbol.name, grammar.rule(symbol.name), 0);
|
||||
}
|
||||
|
||||
static void add_item(ParseItemSet &item_set, const ParseItem &item, const Grammar &grammar) {
|
||||
if (!contains(item_set, item)) {
|
||||
item_set.insert(item);
|
||||
for (rules::Symbol rule : next_non_terminals(item, grammar)) {
|
||||
Item next_item = Item::at_beginning_of_rule(rule.name, grammar);
|
||||
auto next_item = parse_item_at_beginning_of_rule(rule, grammar);
|
||||
add_item(item_set, next_item, grammar);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const ItemSet item_set_closure(const ItemSet &item_set, const Grammar &grammar) {
|
||||
ItemSet result;
|
||||
for (Item item : item_set)
|
||||
|
||||
const ParseItemSet item_set_closure(const ParseItemSet &item_set, const Grammar &grammar) {
|
||||
ParseItemSet result;
|
||||
for (ParseItem item : item_set)
|
||||
add_item(result, item, grammar);
|
||||
return result;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ namespace tree_sitter {
|
|||
class Grammar;
|
||||
|
||||
namespace build_tables {
|
||||
const ItemSet item_set_closure(const ItemSet &item_set, const Grammar &grammar);
|
||||
const ParseItemSet item_set_closure(const ParseItemSet &item_set, const Grammar &grammar);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -7,31 +7,34 @@ using std::make_shared;
|
|||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
transition_map<rules::Rule, Item> item_transitions(const Item &item) {
|
||||
return rule_transitions(item.rule).map<Item>([&](rules::rule_ptr to_rule) {
|
||||
return make_shared<Item>(item.rule_name, to_rule, item.next_sym_count());
|
||||
});
|
||||
};
|
||||
|
||||
template<typename RuleClass>
|
||||
transition_map<RuleClass, ItemSet> transitions(const ItemSet &item_set, const Grammar &grammar) {
|
||||
transition_map<RuleClass, ItemSet> result;
|
||||
for (Item item : item_set) {
|
||||
for (auto transition : item_transitions(item)) {
|
||||
auto rule = dynamic_pointer_cast<const RuleClass>(transition.first);
|
||||
auto new_item_set = make_shared<ItemSet>(item_set_closure(ItemSet({ *transition.second }), grammar));
|
||||
if (rule.get()) result.add(rule, new_item_set);
|
||||
transition_map<rules::Character, LexItemSet> char_transitions(const LexItemSet &item_set, const Grammar &grammar) {
|
||||
transition_map<rules::Character, LexItemSet> result;
|
||||
for (LexItem item : item_set) {
|
||||
for (auto transition : rule_transitions(item.rule)) {
|
||||
auto new_item = LexItem(item.rule_name, transition.second);
|
||||
auto rule = dynamic_pointer_cast<const rules::Character>(transition.first);
|
||||
if (rule.get()) {
|
||||
auto new_item_set = make_shared<LexItemSet>(LexItemSet({ new_item }));
|
||||
result.add(rule, new_item_set);
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
transition_map<rules::Character, ItemSet> char_transitions(const ItemSet &item_set, const Grammar &grammar) {
|
||||
return transitions<rules::Character>(item_set, grammar);
|
||||
}
|
||||
|
||||
transition_map<rules::Symbol, ItemSet> sym_transitions(const ItemSet &item_set, const Grammar &grammar) {
|
||||
return transitions<rules::Symbol>(item_set, grammar);
|
||||
transition_map<rules::Symbol, ParseItemSet> sym_transitions(const ParseItemSet &item_set, const Grammar &grammar) {
|
||||
transition_map<rules::Symbol, ParseItemSet> result;
|
||||
for (ParseItem item : item_set) {
|
||||
for (auto transition : rule_transitions(item.rule)) {
|
||||
auto new_item = ParseItem(item.rule_name, transition.second, item.consumed_sym_count + 1);
|
||||
auto rule = dynamic_pointer_cast<const rules::Symbol>(transition.first);
|
||||
if (rule.get()) {
|
||||
auto new_item_set = make_shared<ParseItemSet>(item_set_closure(ParseItemSet({ new_item }), grammar));
|
||||
result.add(rule, new_item_set);
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -8,8 +8,8 @@
|
|||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
transition_map<rules::Character, ItemSet> char_transitions(const ItemSet &item_set, const Grammar &grammar);
|
||||
transition_map<rules::Symbol, ItemSet> sym_transitions(const ItemSet &item_set, const Grammar &grammar);
|
||||
transition_map<rules::Character, LexItemSet> char_transitions(const LexItemSet &item_set, const Grammar &grammar);
|
||||
transition_map<rules::Symbol, ParseItemSet> sym_transitions(const ParseItemSet &item_set, const Grammar &grammar);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -65,17 +65,17 @@ namespace tree_sitter {
|
|||
return next_symbols<false>(rule, grammar);
|
||||
}
|
||||
|
||||
set<rules::Symbol> next_terminals(const Item &item, const Grammar &grammar) {
|
||||
set<rules::Symbol> next_terminals(const ParseItem &item, const Grammar &grammar) {
|
||||
return next_terminals(item.rule, grammar);
|
||||
}
|
||||
|
||||
set<rules::Symbol> next_non_terminals(const Item &item, const Grammar &grammar) {
|
||||
set<rules::Symbol> next_non_terminals(const ParseItem &item, const Grammar &grammar) {
|
||||
return next_non_terminals(item.rule, grammar);
|
||||
}
|
||||
|
||||
set<rules::Symbol> next_terminals(const ItemSet &item_set, const Grammar &grammar) {
|
||||
set<rules::Symbol> next_terminals(const ParseItemSet &item_set, const Grammar &grammar) {
|
||||
set<rules::Symbol> result;
|
||||
for (Item item : item_set)
|
||||
for (auto item : item_set)
|
||||
for (rules::Symbol symbol : next_terminals(item, grammar))
|
||||
result.insert(symbol);
|
||||
return result;
|
||||
|
|
|
|||
|
|
@ -10,9 +10,10 @@ namespace tree_sitter {
|
|||
|
||||
namespace build_tables {
|
||||
std::set<rules::Symbol> next_terminals(const rules::rule_ptr &rule, const Grammar &grammar);
|
||||
std::set<rules::Symbol> next_terminals(const ItemSet &item_set, const Grammar &grammar);
|
||||
std::set<rules::Symbol> next_terminals(const Item &item, const Grammar &grammar);
|
||||
std::set<rules::Symbol> next_non_terminals(const Item &item, const Grammar &grammar);
|
||||
std::set<rules::Symbol> next_terminals(const ParseItemSet &item_set, const Grammar &grammar);
|
||||
std::set<rules::Symbol> next_terminals(const ParseItem &item, const Grammar &grammar);
|
||||
std::set<rules::Symbol> next_non_terminals(const ParseItem &item, const Grammar &grammar);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -17,50 +17,50 @@ namespace tree_sitter {
|
|||
class TableBuilder {
|
||||
const Grammar grammar;
|
||||
const Grammar lex_grammar;
|
||||
unordered_map<const ItemSet, size_t> parse_state_indices;
|
||||
unordered_map<const ItemSet, size_t> lex_state_indices;
|
||||
unordered_map<const ParseItemSet, size_t> parse_state_indices;
|
||||
unordered_map<const LexItemSet, size_t> lex_state_indices;
|
||||
ParseTable parse_table;
|
||||
LexTable lex_table;
|
||||
|
||||
long parse_state_index_for_item_set(const ItemSet &item_set) const {
|
||||
long parse_state_index_for_item_set(const ParseItemSet &item_set) const {
|
||||
auto entry = parse_state_indices.find(item_set);
|
||||
return (entry == parse_state_indices.end()) ? NOT_FOUND : entry->second;
|
||||
}
|
||||
|
||||
long lex_state_index_for_item_set(const ItemSet &item_set) const {
|
||||
long lex_state_index_for_item_set(const LexItemSet &item_set) const {
|
||||
auto entry = lex_state_indices.find(item_set);
|
||||
return (entry == lex_state_indices.end()) ? NOT_FOUND : entry->second;
|
||||
}
|
||||
|
||||
void add_shift_actions(const ItemSet &item_set, size_t state_index) {
|
||||
void add_shift_actions(const ParseItemSet &item_set, size_t state_index) {
|
||||
auto x = sym_transitions(item_set, grammar);
|
||||
for (auto transition : x) {
|
||||
rules::Symbol symbol = *transition.first;
|
||||
ItemSet item_set = *transition.second;
|
||||
ParseItemSet item_set = *transition.second;
|
||||
size_t new_state_index = add_parse_state(item_set);
|
||||
parse_table.add_action(state_index, symbol.name, ParseAction::Shift(new_state_index));
|
||||
}
|
||||
}
|
||||
|
||||
void add_advance_actions(const ItemSet &item_set, size_t state_index) {
|
||||
void add_advance_actions(const LexItemSet &item_set, size_t state_index) {
|
||||
for (auto transition : char_transitions(item_set, grammar)) {
|
||||
rules::Character rule = *transition.first;
|
||||
ItemSet item_set = *transition.second;
|
||||
LexItemSet item_set = *transition.second;
|
||||
size_t new_state_index = add_lex_state(item_set);
|
||||
lex_table.add_action(state_index, rule.value, LexAction::Advance(new_state_index));
|
||||
}
|
||||
}
|
||||
|
||||
void add_accept_token_actions(const ItemSet &item_set, size_t state_index) {
|
||||
for (Item item : item_set) {
|
||||
void add_accept_token_actions(const LexItemSet &item_set, size_t state_index) {
|
||||
for (LexItem item : item_set) {
|
||||
if (item.is_done()) {
|
||||
lex_table.add_default_action(state_index, LexAction::Accept(item.rule_name));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void add_reduce_actions(const ItemSet &item_set, size_t state_index) {
|
||||
for (Item item : item_set) {
|
||||
void add_reduce_actions(const ParseItemSet &item_set, size_t state_index) {
|
||||
for (ParseItem item : item_set) {
|
||||
if (item.is_done()) {
|
||||
if (item.rule_name == ParseTable::START) {
|
||||
parse_table.add_action(state_index, ParseTable::END_OF_INPUT, ParseAction::Accept());
|
||||
|
|
@ -71,7 +71,7 @@ namespace tree_sitter {
|
|||
}
|
||||
}
|
||||
|
||||
size_t add_lex_state(const ItemSet &item_set) {
|
||||
size_t add_lex_state(const LexItemSet &item_set) {
|
||||
auto state_index = lex_state_index_for_item_set(item_set);
|
||||
if (state_index == NOT_FOUND) {
|
||||
state_index = lex_table.add_state();
|
||||
|
|
@ -82,20 +82,20 @@ namespace tree_sitter {
|
|||
return state_index;
|
||||
}
|
||||
|
||||
ItemSet lex_item_set_for_parse_item_set(const ItemSet &parse_item_set) {
|
||||
ItemSet result;
|
||||
LexItemSet lex_item_set_for_parse_item_set(const ParseItemSet &parse_item_set) {
|
||||
LexItemSet result;
|
||||
for (rules::Symbol symbol : next_terminals(parse_item_set, grammar))
|
||||
result.insert(Item::at_beginning_of_token(symbol.name, lex_grammar));
|
||||
result.insert(LexItem(symbol.name, lex_grammar.rule(symbol.name)));
|
||||
return result;
|
||||
}
|
||||
|
||||
size_t add_parse_state(const ItemSet &item_set) {
|
||||
size_t add_parse_state(const ParseItemSet &item_set) {
|
||||
auto state_index = parse_state_index_for_item_set(item_set);
|
||||
if (state_index == NOT_FOUND) {
|
||||
state_index = parse_table.add_state();
|
||||
parse_state_indices[item_set] = state_index;
|
||||
|
||||
ItemSet lex_item_set = lex_item_set_for_parse_item_set(item_set);
|
||||
LexItemSet lex_item_set = lex_item_set_for_parse_item_set(item_set);
|
||||
parse_table.states[state_index].lex_state_index = add_lex_state(lex_item_set);
|
||||
add_shift_actions(item_set, state_index);
|
||||
add_reduce_actions(item_set, state_index);
|
||||
|
|
@ -110,8 +110,8 @@ namespace tree_sitter {
|
|||
lex_grammar(lex_grammar) {};
|
||||
|
||||
pair<ParseTable, LexTable> build() {
|
||||
auto item = Item(ParseTable::START, rules::sym(grammar.start_rule_name), 0);
|
||||
auto item_set = item_set_closure(ItemSet({ item }), grammar);
|
||||
auto item = ParseItem(ParseTable::START, rules::sym(grammar.start_rule_name), 0);
|
||||
ParseItemSet item_set = item_set_closure(ParseItemSet({ item }), grammar);
|
||||
add_parse_state(item_set);
|
||||
return pair<ParseTable, LexTable>(parse_table, lex_table);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -42,14 +42,6 @@ namespace tree_sitter {
|
|||
add(other_pair.first, other_pair.second);
|
||||
}
|
||||
}
|
||||
|
||||
transition_map<TKey, TValue> where(std::function<bool(TKeyPtr)> filter_fn) {
|
||||
transition_map<TKey, TValue> result;
|
||||
for (pair_type pair : *this)
|
||||
if (filter_fn(pair.first))
|
||||
result.add(pair.first, pair.second);
|
||||
return result;
|
||||
}
|
||||
|
||||
template<typename NewV>
|
||||
transition_map<TKey, NewV> map(std::function<const std::shared_ptr<const NewV>(TValuePtr)> map_fn) {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue