diff --git a/src/compiler/build_tables/item.cpp b/src/compiler/build_tables/item.cpp index e13a8651..c83f04df 100644 --- a/src/compiler/build_tables/item.cpp +++ b/src/compiler/build_tables/item.cpp @@ -7,35 +7,9 @@ using std::ostream; namespace tree_sitter { namespace build_tables { - const int NO_SYMBOLS = -1; - - Item::Item(const string &rule_name, const rules::rule_ptr rule, int consumed_sym_count) : + Item::Item(const string &rule_name, const rules::rule_ptr rule) : rule_name(rule_name), - rule(rule), - consumed_sym_count(consumed_sym_count) {}; - - Item Item::at_beginning_of_rule(const string &rule_name, const Grammar &grammar) { - return Item(rule_name, grammar.rule(rule_name), 0); - } - - Item Item::at_beginning_of_token(const string &rule_name, const Grammar &grammar) { - return Item(rule_name, grammar.rule(rule_name), NO_SYMBOLS); - } - - int Item::next_sym_count() const { - return (consumed_sym_count == NO_SYMBOLS) ? NO_SYMBOLS : (consumed_sym_count + 1); - } - - bool Item::operator==(const Item &other) const { - bool rule_names_eq = other.rule_name == rule_name; - bool rules_eq = (*other.rule == *rule); - bool consumed_sym_counts_eq = (other.consumed_sym_count == consumed_sym_count); - return rule_names_eq && rules_eq && consumed_sym_counts_eq; - } - - bool Item::operator<(const Item &other) const { - return rule_name < other.rule_name; - } + rule(rule) {}; bool Item::is_done() const { for (auto pair : rule_transitions(rule)) @@ -46,11 +20,35 @@ namespace tree_sitter { ostream& operator<<(ostream &stream, const Item &item) { return stream << - string("#"); + string("#"); + } + + bool Item::operator<(const Item &other) const { + return rule_name < other.rule_name; + } + + LexItem::LexItem(const std::string &rule_name, const rules::rule_ptr rule) : Item(rule_name, rule) {} + + bool LexItem::operator==(const LexItem &other) const { + bool rule_names_eq = other.rule_name == rule_name; + bool rules_eq = (*other.rule == *rule); + return rule_names_eq && rules_eq; + } + + ParseItem::ParseItem(const std::string &rule_name, const rules::rule_ptr rule, int consumed_sym_count) : + Item(rule_name, rule), + consumed_sym_count(consumed_sym_count), + lookahead_sym_name("") {} + + bool ParseItem::operator==(const ParseItem &other) const { + bool rule_names_eq = other.rule_name == rule_name; + bool rules_eq = (*other.rule == *rule); + bool consumed_sym_counts_eq = (other.consumed_sym_count == consumed_sym_count); + return rule_names_eq && rules_eq && consumed_sym_counts_eq; } } } diff --git a/src/compiler/build_tables/item.h b/src/compiler/build_tables/item.h index 6f45f293..841df11f 100644 --- a/src/compiler/build_tables/item.h +++ b/src/compiler/build_tables/item.h @@ -10,25 +10,33 @@ namespace tree_sitter { class Grammar; namespace build_tables { - class Item; - class Item { public: - Item(const std::string &rule_name, const rules::rule_ptr rule, int consumed_sym_count); - static Item at_beginning_of_rule(const std::string &rule_name, const Grammar &grammar); - static Item at_beginning_of_token(const std::string &rule_name, const Grammar &grammar); - - bool operator==(const Item &other) const; + Item(const std::string &rule_name, const rules::rule_ptr rule); bool operator<(const Item &other) const; bool is_done() const; - int next_sym_count() const; const std::string rule_name; const rules::rule_ptr rule; - const int consumed_sym_count; }; - - typedef std::set ItemSet; + + class LexItem : public Item { + public: + LexItem(const std::string &rule_name, const rules::rule_ptr rule); + bool operator==(const LexItem &other) const; + }; + + class ParseItem : public Item { + public: + ParseItem(const std::string &rule_name, const rules::rule_ptr rule, int consumed_sym_count); + bool operator==(const ParseItem &other) const; + + const int consumed_sym_count; + const std::string lookahead_sym_name; + }; + + typedef std::set ParseItemSet; + typedef std::set LexItemSet; std::ostream& operator<<(std::ostream &stream, const Item &item); } @@ -36,21 +44,30 @@ namespace tree_sitter { namespace std { template<> - struct hash { + struct hash { size_t operator()(const tree_sitter::build_tables::Item &item) const { return hash()(item.rule_name) ^ - hash()(*item.rule) ^ - hash()(item.consumed_sym_count); + hash()(*item.rule); } }; template<> - struct hash { - size_t operator()(const tree_sitter::build_tables::ItemSet &item_set) const { - size_t result = hash()(item_set.size()); - for (auto item : item_set) - result ^= hash()(item); + struct hash { + size_t operator()(const tree_sitter::build_tables::ParseItem &item) const { + return + hash()(item.rule_name) ^ + hash()(*item.rule) ^ + hash()(item.consumed_sym_count); + } + }; + + template + struct hash> { + size_t operator()(const set &set) const { + size_t result = hash()(set.size()); + for (auto item : set) + result ^= hash()(item); return result; } }; diff --git a/src/compiler/build_tables/item_set_closure.cpp b/src/compiler/build_tables/item_set_closure.cpp index 0ccb1900..190246a1 100644 --- a/src/compiler/build_tables/item_set_closure.cpp +++ b/src/compiler/build_tables/item_set_closure.cpp @@ -8,23 +8,27 @@ using std::vector; namespace tree_sitter { namespace build_tables { - static bool contains(ItemSet items, Item item) { + static bool contains(ParseItemSet items, ParseItem item) { return (std::find(items.begin(), items.end(), item) != items.end()); } - static void add_item(ItemSet &item_set, const Item &item, const Grammar &grammar) { + ParseItem parse_item_at_beginning_of_rule(const rules::Symbol &symbol, const Grammar &grammar) { + return ParseItem(symbol.name, grammar.rule(symbol.name), 0); + } + + static void add_item(ParseItemSet &item_set, const ParseItem &item, const Grammar &grammar) { if (!contains(item_set, item)) { item_set.insert(item); for (rules::Symbol rule : next_non_terminals(item, grammar)) { - Item next_item = Item::at_beginning_of_rule(rule.name, grammar); + auto next_item = parse_item_at_beginning_of_rule(rule, grammar); add_item(item_set, next_item, grammar); } } } - - const ItemSet item_set_closure(const ItemSet &item_set, const Grammar &grammar) { - ItemSet result; - for (Item item : item_set) + + const ParseItemSet item_set_closure(const ParseItemSet &item_set, const Grammar &grammar) { + ParseItemSet result; + for (ParseItem item : item_set) add_item(result, item, grammar); return result; } diff --git a/src/compiler/build_tables/item_set_closure.h b/src/compiler/build_tables/item_set_closure.h index 9c03840c..aa0fc307 100644 --- a/src/compiler/build_tables/item_set_closure.h +++ b/src/compiler/build_tables/item_set_closure.h @@ -7,7 +7,7 @@ namespace tree_sitter { class Grammar; namespace build_tables { - const ItemSet item_set_closure(const ItemSet &item_set, const Grammar &grammar); + const ParseItemSet item_set_closure(const ParseItemSet &item_set, const Grammar &grammar); } } diff --git a/src/compiler/build_tables/item_set_transitions.cpp b/src/compiler/build_tables/item_set_transitions.cpp index 17a8a6a6..bd874042 100644 --- a/src/compiler/build_tables/item_set_transitions.cpp +++ b/src/compiler/build_tables/item_set_transitions.cpp @@ -7,31 +7,34 @@ using std::make_shared; namespace tree_sitter { namespace build_tables { - transition_map item_transitions(const Item &item) { - return rule_transitions(item.rule).map([&](rules::rule_ptr to_rule) { - return make_shared(item.rule_name, to_rule, item.next_sym_count()); - }); - }; - - template - transition_map transitions(const ItemSet &item_set, const Grammar &grammar) { - transition_map result; - for (Item item : item_set) { - for (auto transition : item_transitions(item)) { - auto rule = dynamic_pointer_cast(transition.first); - auto new_item_set = make_shared(item_set_closure(ItemSet({ *transition.second }), grammar)); - if (rule.get()) result.add(rule, new_item_set); + transition_map char_transitions(const LexItemSet &item_set, const Grammar &grammar) { + transition_map result; + for (LexItem item : item_set) { + for (auto transition : rule_transitions(item.rule)) { + auto new_item = LexItem(item.rule_name, transition.second); + auto rule = dynamic_pointer_cast(transition.first); + if (rule.get()) { + auto new_item_set = make_shared(LexItemSet({ new_item })); + result.add(rule, new_item_set); + } } } return result; } - transition_map char_transitions(const ItemSet &item_set, const Grammar &grammar) { - return transitions(item_set, grammar); - } - - transition_map sym_transitions(const ItemSet &item_set, const Grammar &grammar) { - return transitions(item_set, grammar); + transition_map sym_transitions(const ParseItemSet &item_set, const Grammar &grammar) { + transition_map result; + for (ParseItem item : item_set) { + for (auto transition : rule_transitions(item.rule)) { + auto new_item = ParseItem(item.rule_name, transition.second, item.consumed_sym_count + 1); + auto rule = dynamic_pointer_cast(transition.first); + if (rule.get()) { + auto new_item_set = make_shared(item_set_closure(ParseItemSet({ new_item }), grammar)); + result.add(rule, new_item_set); + } + } + } + return result; } } } \ No newline at end of file diff --git a/src/compiler/build_tables/item_set_transitions.h b/src/compiler/build_tables/item_set_transitions.h index e23404c5..b6e175a8 100644 --- a/src/compiler/build_tables/item_set_transitions.h +++ b/src/compiler/build_tables/item_set_transitions.h @@ -8,8 +8,8 @@ namespace tree_sitter { namespace build_tables { - transition_map char_transitions(const ItemSet &item_set, const Grammar &grammar); - transition_map sym_transitions(const ItemSet &item_set, const Grammar &grammar); + transition_map char_transitions(const LexItemSet &item_set, const Grammar &grammar); + transition_map sym_transitions(const ParseItemSet &item_set, const Grammar &grammar); } } diff --git a/src/compiler/build_tables/next_symbols.cpp b/src/compiler/build_tables/next_symbols.cpp index fabe6531..35f0c51f 100644 --- a/src/compiler/build_tables/next_symbols.cpp +++ b/src/compiler/build_tables/next_symbols.cpp @@ -65,17 +65,17 @@ namespace tree_sitter { return next_symbols(rule, grammar); } - set next_terminals(const Item &item, const Grammar &grammar) { + set next_terminals(const ParseItem &item, const Grammar &grammar) { return next_terminals(item.rule, grammar); } - set next_non_terminals(const Item &item, const Grammar &grammar) { + set next_non_terminals(const ParseItem &item, const Grammar &grammar) { return next_non_terminals(item.rule, grammar); } - set next_terminals(const ItemSet &item_set, const Grammar &grammar) { + set next_terminals(const ParseItemSet &item_set, const Grammar &grammar) { set result; - for (Item item : item_set) + for (auto item : item_set) for (rules::Symbol symbol : next_terminals(item, grammar)) result.insert(symbol); return result; diff --git a/src/compiler/build_tables/next_symbols.h b/src/compiler/build_tables/next_symbols.h index dbf51922..e94537f5 100644 --- a/src/compiler/build_tables/next_symbols.h +++ b/src/compiler/build_tables/next_symbols.h @@ -10,9 +10,10 @@ namespace tree_sitter { namespace build_tables { std::set next_terminals(const rules::rule_ptr &rule, const Grammar &grammar); - std::set next_terminals(const ItemSet &item_set, const Grammar &grammar); - std::set next_terminals(const Item &item, const Grammar &grammar); - std::set next_non_terminals(const Item &item, const Grammar &grammar); + std::set next_terminals(const ParseItemSet &item_set, const Grammar &grammar); + std::set next_terminals(const ParseItem &item, const Grammar &grammar); + std::set next_non_terminals(const ParseItem &item, const Grammar &grammar); } } + #endif diff --git a/src/compiler/build_tables/perform.cpp b/src/compiler/build_tables/perform.cpp index 32815c87..b02532a0 100644 --- a/src/compiler/build_tables/perform.cpp +++ b/src/compiler/build_tables/perform.cpp @@ -17,50 +17,50 @@ namespace tree_sitter { class TableBuilder { const Grammar grammar; const Grammar lex_grammar; - unordered_map parse_state_indices; - unordered_map lex_state_indices; + unordered_map parse_state_indices; + unordered_map lex_state_indices; ParseTable parse_table; LexTable lex_table; - long parse_state_index_for_item_set(const ItemSet &item_set) const { + long parse_state_index_for_item_set(const ParseItemSet &item_set) const { auto entry = parse_state_indices.find(item_set); return (entry == parse_state_indices.end()) ? NOT_FOUND : entry->second; } - long lex_state_index_for_item_set(const ItemSet &item_set) const { + long lex_state_index_for_item_set(const LexItemSet &item_set) const { auto entry = lex_state_indices.find(item_set); return (entry == lex_state_indices.end()) ? NOT_FOUND : entry->second; } - void add_shift_actions(const ItemSet &item_set, size_t state_index) { + void add_shift_actions(const ParseItemSet &item_set, size_t state_index) { auto x = sym_transitions(item_set, grammar); for (auto transition : x) { rules::Symbol symbol = *transition.first; - ItemSet item_set = *transition.second; + ParseItemSet item_set = *transition.second; size_t new_state_index = add_parse_state(item_set); parse_table.add_action(state_index, symbol.name, ParseAction::Shift(new_state_index)); } } - void add_advance_actions(const ItemSet &item_set, size_t state_index) { + void add_advance_actions(const LexItemSet &item_set, size_t state_index) { for (auto transition : char_transitions(item_set, grammar)) { rules::Character rule = *transition.first; - ItemSet item_set = *transition.second; + LexItemSet item_set = *transition.second; size_t new_state_index = add_lex_state(item_set); lex_table.add_action(state_index, rule.value, LexAction::Advance(new_state_index)); } } - void add_accept_token_actions(const ItemSet &item_set, size_t state_index) { - for (Item item : item_set) { + void add_accept_token_actions(const LexItemSet &item_set, size_t state_index) { + for (LexItem item : item_set) { if (item.is_done()) { lex_table.add_default_action(state_index, LexAction::Accept(item.rule_name)); } } } - void add_reduce_actions(const ItemSet &item_set, size_t state_index) { - for (Item item : item_set) { + void add_reduce_actions(const ParseItemSet &item_set, size_t state_index) { + for (ParseItem item : item_set) { if (item.is_done()) { if (item.rule_name == ParseTable::START) { parse_table.add_action(state_index, ParseTable::END_OF_INPUT, ParseAction::Accept()); @@ -71,7 +71,7 @@ namespace tree_sitter { } } - size_t add_lex_state(const ItemSet &item_set) { + size_t add_lex_state(const LexItemSet &item_set) { auto state_index = lex_state_index_for_item_set(item_set); if (state_index == NOT_FOUND) { state_index = lex_table.add_state(); @@ -82,20 +82,20 @@ namespace tree_sitter { return state_index; } - ItemSet lex_item_set_for_parse_item_set(const ItemSet &parse_item_set) { - ItemSet result; + LexItemSet lex_item_set_for_parse_item_set(const ParseItemSet &parse_item_set) { + LexItemSet result; for (rules::Symbol symbol : next_terminals(parse_item_set, grammar)) - result.insert(Item::at_beginning_of_token(symbol.name, lex_grammar)); + result.insert(LexItem(symbol.name, lex_grammar.rule(symbol.name))); return result; } - size_t add_parse_state(const ItemSet &item_set) { + size_t add_parse_state(const ParseItemSet &item_set) { auto state_index = parse_state_index_for_item_set(item_set); if (state_index == NOT_FOUND) { state_index = parse_table.add_state(); parse_state_indices[item_set] = state_index; - ItemSet lex_item_set = lex_item_set_for_parse_item_set(item_set); + LexItemSet lex_item_set = lex_item_set_for_parse_item_set(item_set); parse_table.states[state_index].lex_state_index = add_lex_state(lex_item_set); add_shift_actions(item_set, state_index); add_reduce_actions(item_set, state_index); @@ -110,8 +110,8 @@ namespace tree_sitter { lex_grammar(lex_grammar) {}; pair build() { - auto item = Item(ParseTable::START, rules::sym(grammar.start_rule_name), 0); - auto item_set = item_set_closure(ItemSet({ item }), grammar); + auto item = ParseItem(ParseTable::START, rules::sym(grammar.start_rule_name), 0); + ParseItemSet item_set = item_set_closure(ParseItemSet({ item }), grammar); add_parse_state(item_set); return pair(parse_table, lex_table); } diff --git a/src/compiler/build_tables/transition_map.h b/src/compiler/build_tables/transition_map.h index ad63844d..de879a7a 100644 --- a/src/compiler/build_tables/transition_map.h +++ b/src/compiler/build_tables/transition_map.h @@ -42,14 +42,6 @@ namespace tree_sitter { add(other_pair.first, other_pair.second); } } - - transition_map where(std::function filter_fn) { - transition_map result; - for (pair_type pair : *this) - if (filter_fn(pair.first)) - result.add(pair.first, pair.second); - return result; - } template transition_map map(std::function(TValuePtr)> map_fn) {