diff --git a/spec/compiler/build_tables/follow_sets_spec.cc b/spec/compiler/build_tables/follow_sets_spec.cc index 02379cff..2c0d21e9 100644 --- a/spec/compiler/build_tables/follow_sets_spec.cc +++ b/spec/compiler/build_tables/follow_sets_spec.cc @@ -19,9 +19,9 @@ describe("computing FOLLOW sets", []() { ParseItem item(Symbol(2), choice({ seq({ i_sym(0), choice({ i_token(0), i_token(1) }) }), seq({ i_sym(1), i_token(2) }), - }), 0, Symbol(10, SymbolOptionToken)); + }), 0); - AssertThat(follow_sets(item, grammar), Equals(map>({ + AssertThat(follow_sets(item, { Symbol(10, SymbolOptionToken) }, grammar), Equals(map>({ { Symbol(0), set({ Symbol(0, SymbolOptionToken), Symbol(1, SymbolOptionToken) }) }, @@ -34,21 +34,21 @@ describe("computing FOLLOW sets", []() { ParseItem item(Symbol(2), choice({ seq({ i_sym(0), choice({ i_token(0), i_token(1) }) }), seq({ i_token(2), i_token(3) }), - }), 0, Symbol(10, SymbolOptionToken)); + }), 0); - AssertThat(follow_sets(item, grammar), Equals(map>({ + AssertThat(follow_sets(item, { Symbol(10, SymbolOptionToken) }, grammar), Equals(map>({ { Symbol(0), set({ Symbol(0, SymbolOptionToken), Symbol(1, SymbolOptionToken) }) }, }))); }); - it("includes the item's lookahead terminal if the rule after the non-terminal might be blank", [&]() { + it("includes the item's lookahead symbol if the rule after the non-terminal might be blank", [&]() { ParseItem item(Symbol(2), choice({ seq({ i_sym(0), choice({ i_token(0), blank() }) }), - }), 0, Symbol(10, SymbolOptionToken)); + }), 0); - AssertThat(follow_sets(item, grammar), Equals(map>({ + AssertThat(follow_sets(item, { Symbol(10, SymbolOptionToken) }, grammar), Equals(map>({ { Symbol(0), set({ Symbol(0, SymbolOptionToken), Symbol(10, SymbolOptionToken) }) }, diff --git a/spec/compiler/build_tables/item_set_closure_spec.cc b/spec/compiler/build_tables/item_set_closure_spec.cc index e8ef8734..de4ce5e0 100644 --- a/spec/compiler/build_tables/item_set_closure_spec.cc +++ b/spec/compiler/build_tables/item_set_closure_spec.cc @@ -19,13 +19,13 @@ describe("computing closures of item sets", []() { }, {}); it("adds items at the beginnings of referenced rules", [&]() { - ParseItemSet item_set = item_set_closure( - ParseItem(Symbol(0), grammar.rule(Symbol(0)), 0, Symbol(10, SymbolOptionToken)), - grammar); + ParseItemSet item_set = item_set_closure(ParseItem(Symbol(0), grammar.rule(Symbol(0)), 0), + { Symbol(10, SymbolOptionToken) }, + grammar); AssertThat(item_set, Equals(ParseItemSet({ - ParseItem(Symbol(1), grammar.rule(Symbol(1)), 0, Symbol(11, SymbolOptionToken)), - ParseItem(Symbol(0), grammar.rule(Symbol(0)), 0, Symbol(10, SymbolOptionToken)), + { ParseItem(Symbol(1), grammar.rule(Symbol(1)), 0), { Symbol(11, SymbolOptionToken) } }, + { ParseItem(Symbol(0), grammar.rule(Symbol(0)), 0), { Symbol(10, SymbolOptionToken) } }, }))); }); }); diff --git a/spec/compiler/build_tables/item_set_transitions_spec.cc b/spec/compiler/build_tables/item_set_transitions_spec.cc index 84c6c1bd..a6262ef1 100644 --- a/spec/compiler/build_tables/item_set_transitions_spec.cc +++ b/spec/compiler/build_tables/item_set_transitions_spec.cc @@ -37,15 +37,15 @@ describe("syntactic item set transitions", [&]() { it("computes the closure of the new item sets", [&]() { ParseItemSet set1({ - ParseItem(Symbol(0), seq({ i_token(22), i_sym(1) }), 3, Symbol(23, SymbolOptionToken)), + { ParseItem(Symbol(0), seq({ i_token(22), i_sym(1) }), 3), { Symbol(23, SymbolOptionToken) } }, }); SymTransitions sym_transitions; AssertThat(sym_transitions(set1, grammar), Equals(map({ { Symbol(22, SymbolOptionToken), ParseItemSet({ - ParseItem(Symbol(0), i_sym(1), 4, Symbol(23, SymbolOptionToken)), - ParseItem(Symbol(1), i_token(21), 0, Symbol(23, SymbolOptionToken)) + { ParseItem(Symbol(0), i_sym(1), 4), { Symbol(23, SymbolOptionToken) } }, + { ParseItem(Symbol(1), i_token(21), 0), { Symbol(23, SymbolOptionToken) } }, }) }, }))); }); diff --git a/src/compiler/build_tables/build_parse_table.cc b/src/compiler/build_tables/build_parse_table.cc index fdca019e..90faf0a1 100644 --- a/src/compiler/build_tables/build_parse_table.cc +++ b/src/compiler/build_tables/build_parse_table.cc @@ -46,14 +46,14 @@ namespace tree_sitter { void add_shift_actions(const ParseItemSet &item_set, ParseStateId state_id) { for (const auto &transition : sym_transitions(item_set, grammar)) { const Symbol &symbol = transition.first; - const ParseItemSet &item_set = transition.second; + const ParseItemSet &next_item_set = transition.second; auto &actions = parse_table.states[state_id].actions; auto current_action = actions.find(symbol); - set precedence_values = precedence_values_for_item_set(item_set); + set precedence_values = precedence_values_for_item_set(next_item_set); if (current_action == actions.end() || conflict_manager.resolve_parse_action(symbol, current_action->second, ParseAction::Shift(0, precedence_values))) { - ParseStateId new_state_id = add_parse_state(item_set); + ParseStateId new_state_id = add_parse_state(next_item_set); parse_table.add_action(state_id, symbol, ParseAction::Shift(new_state_id, precedence_values)); } } @@ -68,17 +68,22 @@ namespace tree_sitter { } void add_reduce_actions(const ParseItemSet &item_set, ParseStateId state_id) { - for (const ParseItem &item : item_set) { + for (const auto &pair : item_set) { + const ParseItem &item = pair.first; + const set &lookahead_symbols = pair.second; + if (item.is_done()) { ParseAction action = (item.lhs == rules::START()) ? ParseAction::Accept() : ParseAction::Reduce(item.lhs, item.consumed_symbol_count, item.precedence()); - auto current_actions = parse_table.states[state_id].actions; - auto current_action = current_actions.find(item.lookahead_sym); - - if (current_action == current_actions.end() || - conflict_manager.resolve_parse_action(item.lookahead_sym, current_action->second, action)) { - parse_table.add_action(state_id, item.lookahead_sym, action); + + for (auto &lookahead_sym : lookahead_symbols) { + auto current_actions = parse_table.states[state_id].actions; + auto current_action = current_actions.find(lookahead_sym); + if (current_action == current_actions.end() || + conflict_manager.resolve_parse_action(lookahead_sym, current_action->second, action)) { + parse_table.add_action(state_id, lookahead_sym, action); + } } } } @@ -86,9 +91,11 @@ namespace tree_sitter { set precedence_values_for_item_set(const ParseItemSet &item_set) { set result; - for (const auto &item : item_set) + for (const auto &pair : item_set) { + const ParseItem &item = pair.first; if (item.consumed_symbol_count > 0) result.insert(item.precedence()); + } return result; } @@ -98,8 +105,8 @@ namespace tree_sitter { conflict_manager(ParseConflictManager(grammar, lex_grammar)) {} pair> build() { - ParseItem start_item(rules::START(), make_shared(0), 0, rules::END_OF_INPUT()); - add_parse_state(item_set_closure(start_item, grammar)); + ParseItem start_item(rules::START(), make_shared(0), 0); + add_parse_state(item_set_closure(start_item, { rules::END_OF_INPUT() }, grammar)); return { parse_table, conflict_manager.conflicts() }; } }; diff --git a/src/compiler/build_tables/first_set.cc b/src/compiler/build_tables/first_set.cc index b7c56609..39558197 100644 --- a/src/compiler/build_tables/first_set.cc +++ b/src/compiler/build_tables/first_set.cc @@ -60,11 +60,13 @@ namespace tree_sitter { set first_set(const ParseItemSet &item_set, const PreparedGrammar &grammar) { set result; - for (auto &item : item_set) { - auto &&rule_set = first_set(item.rule, grammar); + for (const auto &pair : item_set) { + const auto &item = pair.first; + const auto &lookahead_symbols = pair.second; + const auto &rule_set = first_set(item.rule, grammar); result.insert(rule_set.begin(), rule_set.end()); if (rule_can_be_blank(item.rule, grammar)) - result.insert(item.lookahead_sym); + result.insert(lookahead_symbols.begin(), lookahead_symbols.end()); } return result; } diff --git a/src/compiler/build_tables/follow_sets.cc b/src/compiler/build_tables/follow_sets.cc index 1384ab17..76a701b6 100644 --- a/src/compiler/build_tables/follow_sets.cc +++ b/src/compiler/build_tables/follow_sets.cc @@ -12,7 +12,8 @@ namespace tree_sitter { namespace build_tables { map> follow_sets(const ParseItem &item, - const PreparedGrammar &grammar) { + const set &lookahead_symbols, + const PreparedGrammar &grammar) { map> result; for (auto &pair : sym_transitions(item.rule)) { Symbol symbol = pair.first; @@ -20,7 +21,7 @@ namespace tree_sitter { if (!symbol.is_token() && !symbol.is_built_in()) { set following_terminals = first_set(next_rule, grammar); if (rule_can_be_blank(next_rule, grammar)) - following_terminals.insert(item.lookahead_sym); + following_terminals.insert(lookahead_symbols.begin(), lookahead_symbols.end()); result.insert({ symbol, following_terminals }); } } diff --git a/src/compiler/build_tables/follow_sets.h b/src/compiler/build_tables/follow_sets.h index 772b16f8..f55c24ba 100644 --- a/src/compiler/build_tables/follow_sets.h +++ b/src/compiler/build_tables/follow_sets.h @@ -18,7 +18,7 @@ namespace tree_sitter { * after the corresponding non-terminals. */ std::map> - follow_sets(const ParseItem &item, const PreparedGrammar &grammar); + follow_sets(const ParseItem &item, const std::set &lookahead_symbols, const PreparedGrammar &grammar); } } diff --git a/src/compiler/build_tables/item_set_closure.cc b/src/compiler/build_tables/item_set_closure.cc index 0f0c5f77..141b2bb5 100644 --- a/src/compiler/build_tables/item_set_closure.cc +++ b/src/compiler/build_tables/item_set_closure.cc @@ -1,6 +1,7 @@ #include "compiler/build_tables/item_set_closure.h" #include #include +#include #include "tree_sitter/compiler.h" #include "compiler/build_tables/follow_sets.h" #include "compiler/build_tables/item.h" @@ -10,27 +11,39 @@ namespace tree_sitter { using std::set; using rules::Symbol; using std::vector; + using std::pair; namespace build_tables { - const ParseItemSet item_set_closure(const ParseItem &item, + const ParseItemSet item_set_closure(const ParseItem &starting_item, + const set &starting_lookahead_symbols, const PreparedGrammar &grammar) { ParseItemSet result; - vector items_to_add = { item }; - while (!items_to_add.empty()) { - ParseItem item = items_to_add.back(); - items_to_add.pop_back(); - auto insertion_result = result.insert(item); - if (insertion_result.second) { - for (const auto &pair : follow_sets(item, grammar)) { + vector>> pairs_to_add = { {starting_item, starting_lookahead_symbols} }; + while (!pairs_to_add.empty()) { + auto pair = pairs_to_add.back(); + pairs_to_add.pop_back(); + auto &item = pair.first; + auto &lookahead_symbols = pair.second; + + bool new_stuff_added = false; + auto &existing_lookahead_symbols = result[item]; + for (auto &sym : lookahead_symbols) { + auto insertion_result = existing_lookahead_symbols.insert(sym); + if (insertion_result.second) new_stuff_added = true; + } + + if (new_stuff_added) { + for (const auto &pair : follow_sets(item, lookahead_symbols, grammar)) { const Symbol &non_terminal = pair.first; const set &terminals = pair.second; - for (const auto &terminal : terminals) { - ParseItem next_item(non_terminal, grammar.rule(non_terminal), 0, terminal); - items_to_add.push_back(next_item); - } + pairs_to_add.push_back({ + ParseItem(non_terminal, grammar.rule(non_terminal), 0), + terminals + }); } } } + return result; } } diff --git a/src/compiler/build_tables/item_set_closure.h b/src/compiler/build_tables/item_set_closure.h index dde19ff5..b25a2869 100644 --- a/src/compiler/build_tables/item_set_closure.h +++ b/src/compiler/build_tables/item_set_closure.h @@ -1,6 +1,8 @@ #ifndef COMPILER_BUILD_TABLES_ITEM_SET_CLOSURE_H_ #define COMPILER_BUILD_TABLES_ITEM_SET_CLOSURE_H_ +#include +#include "compiler/rules/symbol.h" #include "compiler/build_tables/parse_item.h" namespace tree_sitter { @@ -8,6 +10,7 @@ namespace tree_sitter { namespace build_tables { const ParseItemSet item_set_closure(const ParseItem &item, + const std::set &lookahead_symbols, const PreparedGrammar &grammar); } } diff --git a/src/compiler/build_tables/item_set_transitions.cc b/src/compiler/build_tables/item_set_transitions.cc index 215ee9d7..6c44a23a 100644 --- a/src/compiler/build_tables/item_set_transitions.cc +++ b/src/compiler/build_tables/item_set_transitions.cc @@ -1,6 +1,5 @@ #include "compiler/build_tables/item_set_transitions.h" -#include -#include +#include #include "compiler/build_tables/item_set_closure.h" #include "compiler/build_tables/rule_transitions.h" #include "compiler/build_tables/merge_transitions.h" @@ -8,76 +7,34 @@ namespace tree_sitter { using std::map; - using std::vector; - using std::unordered_set; + using std::set; using rules::CharacterSet; using rules::Symbol; namespace build_tables { - template - static void merge_sets(unordered_set *left, const unordered_set *right) { - left->insert(right->begin(), right->end()); - } - - const Symbol placeholder_lookahead = Symbol(-100); - const Symbol placeholder_lhs = Symbol(-101); - - static map sym_transitions_for_rule(SymTransitions *self, const rules::rule_ptr &rule, const PreparedGrammar &grammar) { - auto pair = self->transitions_cache.find(rule); - if (pair != self->transitions_cache.end()) return pair->second; - map result; - for (auto &transition : sym_transitions(rule)) { - ParseItem new_item(placeholder_lhs, transition.second, 1, placeholder_lookahead); - result.insert({ - transition.first, - item_set_closure(new_item, grammar) - }); - } - self->transitions_cache.insert({ rule, result }); - return result; - } - - static map sym_transitions_for_item(SymTransitions *self, const ParseItem &item, const PreparedGrammar &grammar) { - auto result = sym_transitions_for_rule(self, item.rule, grammar); - for (auto &pair : result) { - vector new_items; - auto &items = pair.second; - for (auto iter = items.begin(), end = items.end(); iter != end;) { - ParseItem new_item(*iter); - bool changed = false; - if (new_item.consumed_symbol_count > 0) { - new_item.consumed_symbol_count = item.consumed_symbol_count + 1; - changed = true; - } - if (new_item.lookahead_sym == placeholder_lookahead) { - new_item.lookahead_sym = item.lookahead_sym; - changed = true; - } - if (new_item.lhs == placeholder_lhs) { - new_item.lhs = item.lhs; - changed = true; - } - if (changed) { - iter = pair.second.erase(iter); - new_items.push_back(new_item); - } else { - ++iter; - } - } - pair.second.insert(new_items.begin(), new_items.end()); - } - return result; - } - map SymTransitions::operator()(const ParseItemSet &item_set, const PreparedGrammar &grammar) { map result; - for (const ParseItem &item : item_set) - merge_sym_transitions(&result, - sym_transitions_for_item(this, item, grammar), - [](ParseItemSet *l, const ParseItemSet *r) { - merge_sets(l, r); + + for (const auto &pair : item_set) { + const ParseItem &item = pair.first; + const set &lookahead_symbols = pair.second; + map result_for_item; + for (auto &transition : sym_transitions(item.rule)) { + ParseItem new_item(item.lhs, transition.second, item.consumed_symbol_count + 1); + result_for_item.insert({ + transition.first, + item_set_closure(new_item, lookahead_symbols, grammar) + }); + } + + merge_sym_transitions(&result, result_for_item, + [](ParseItemSet *left, const ParseItemSet *right) { + for (auto &pair : *right) + left->operator[](pair.first).insert(pair.second.begin(), pair.second.end()); }); + } + return result; } @@ -93,9 +50,10 @@ namespace tree_sitter { LexItemSet({ next_item }) }); } - merge_char_transitions(&result, item_transitions, [](LexItemSet *l, const LexItemSet *r) { - merge_sets(l, r); - }); + merge_char_transitions(&result, item_transitions, + [](LexItemSet *left, const LexItemSet *right) { + left->insert(right->begin(), right->end()); + }); } return result; } diff --git a/src/compiler/build_tables/parse_item.cc b/src/compiler/build_tables/parse_item.cc index 2b92f2be..280bf620 100644 --- a/src/compiler/build_tables/parse_item.cc +++ b/src/compiler/build_tables/parse_item.cc @@ -1,7 +1,10 @@ #include "compiler/build_tables/parse_item.h" +#include #include "tree_sitter/compiler.h" namespace tree_sitter { + using std::pair; + using std::set; using std::string; using std::to_string; using std::ostream; @@ -9,31 +12,19 @@ namespace tree_sitter { namespace build_tables { ParseItem::ParseItem(const rules::Symbol &lhs, const rules::rule_ptr rule, - size_t consumed_symbol_count, - const rules::Symbol &lookahead_sym) : + size_t consumed_symbol_count) : Item(lhs, rule), - consumed_symbol_count(consumed_symbol_count), - lookahead_sym(lookahead_sym) {} - + consumed_symbol_count(consumed_symbol_count) {} + bool ParseItem::operator==(const ParseItem &other) const { return - (other.lhs == lhs) && - (other.consumed_symbol_count == consumed_symbol_count) && - (other.lookahead_sym == lookahead_sym) && - (other.rule == rule || other.rule->operator==(*rule)); + (lhs == other.lhs) && + (consumed_symbol_count == other.consumed_symbol_count) && + (rule == other.rule || rule->operator==(*other.rule)); } ostream& operator<<(ostream &stream, const ParseItem &item) { - return stream << - string("#"); + return stream << string("#"); } } } diff --git a/src/compiler/build_tables/parse_item.h b/src/compiler/build_tables/parse_item.h index 4d0de966..2c638885 100644 --- a/src/compiler/build_tables/parse_item.h +++ b/src/compiler/build_tables/parse_item.h @@ -1,8 +1,9 @@ #ifndef COMPILER_BUILD_TABLES_PARSE_ITEM_H_ #define COMPILER_BUILD_TABLES_PARSE_ITEM_H_ -#include +#include #include +#include #include "compiler/rules/symbol.h" #include "compiler/build_tables/item.h" @@ -10,19 +11,14 @@ namespace tree_sitter { namespace build_tables { class ParseItem : public Item { public: - ParseItem(const rules::Symbol &lhs, - rules::rule_ptr rule, - const size_t consumed_symbol_count, - const rules::Symbol &lookahead_sym); + ParseItem(const rules::Symbol &lhs, rules::rule_ptr rule, const size_t consumed_symbol_count); bool operator==(const ParseItem &other) const; - size_t consumed_symbol_count; - rules::Symbol lookahead_sym; }; std::ostream& operator<<(std::ostream &stream, const ParseItem &item); - typedef std::unordered_set ParseItemSet; + typedef std::unordered_map> ParseItemSet; } } @@ -31,10 +27,9 @@ namespace std { struct hash { size_t operator()(const tree_sitter::build_tables::ParseItem &item) const { return - hash()(item.lhs) ^ - hash()(item.rule) ^ - hash()(item.consumed_symbol_count) ^ - hash()(item.lookahead_sym); + hash()(item.lhs) ^ + hash()(item.rule) ^ + hash()(item.consumed_symbol_count); } }; @@ -42,8 +37,12 @@ namespace std { struct hash { size_t operator()(const tree_sitter::build_tables::ParseItemSet &set) const { size_t result = hash()(set.size()); - for (auto item : set) - result ^= hash()(item); + for (auto &pair : set) { + result ^= hash()(pair.first); + result ^= hash()(pair.second.size()); + for (auto &symbol : pair.second) + result ^= hash()(symbol); + } return result; } }; diff --git a/src/compiler/rules/symbol.cc b/src/compiler/rules/symbol.cc index aa27a33b..3e0337ff 100644 --- a/src/compiler/rules/symbol.cc +++ b/src/compiler/rules/symbol.cc @@ -37,7 +37,7 @@ namespace tree_sitter { string Symbol::to_string() const { string name = (options & SymbolOptionAuxiliary) ? "aux_" : ""; name += (options & SymbolOptionToken) ? "token" : "sym"; - return "#<" + name + std::to_string(index) + ">"; + return "#<" + name + " " + std::to_string(index) + ">"; } bool Symbol::operator<(const Symbol &other) const {