From 1118a9142ab522179ab0261d33dbb8802e685421 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 14 Nov 2016 10:25:26 -0800 Subject: [PATCH] Introduce Symbol::Index type alias --- .../build_tables/distinctive_tokens_spec.cc | 4 +- .../parse_item_set_builder_spec.cc | 35 +++++++++++------ spec/compiler/build_tables/parse_item_spec.cc | 14 +++---- src/compiler/build_tables/build_lex_table.cc | 4 +- .../build_tables/build_parse_table.cc | 38 +++++++++---------- .../build_tables/lex_conflict_manager.h | 4 +- src/compiler/build_tables/lookahead_set.cc | 12 +++--- src/compiler/build_tables/lookahead_set.h | 8 ++-- src/compiler/build_tables/parse_item.cc | 4 +- .../build_tables/parse_item_set_builder.cc | 26 ++++++++----- .../build_tables/parse_item_set_builder.h | 4 +- src/compiler/build_tables/recovery_tokens.cc | 6 +-- src/compiler/build_tables/recovery_tokens.h | 2 +- src/compiler/parse_table.cc | 31 ++++++++------- src/compiler/parse_table.h | 6 +-- src/compiler/rules/symbol.cc | 12 ++++-- src/compiler/rules/symbol.h | 10 +++-- src/runtime/parser.c | 7 ++-- 18 files changed, 130 insertions(+), 97 deletions(-) diff --git a/spec/compiler/build_tables/distinctive_tokens_spec.cc b/spec/compiler/build_tables/distinctive_tokens_spec.cc index c5d197b3..104cd721 100644 --- a/spec/compiler/build_tables/distinctive_tokens_spec.cc +++ b/spec/compiler/build_tables/distinctive_tokens_spec.cc @@ -27,9 +27,7 @@ describe("recovery_tokens(rule)", []() { })), }; - AssertThat(recovery_tokens(grammar), Equals>({ - Symbol(1, true), - })); + AssertThat(recovery_tokens(grammar), Equals>({ 1 })); }); }); diff --git a/spec/compiler/build_tables/parse_item_set_builder_spec.cc b/spec/compiler/build_tables/parse_item_set_builder_spec.cc index 5e387e51..a1dd2231 100644 --- a/spec/compiler/build_tables/parse_item_set_builder_spec.cc +++ b/spec/compiler/build_tables/parse_item_set_builder_spec.cc @@ -1,8 +1,10 @@ #include "spec_helper.h" #include "compiler/syntax_grammar.h" +#include "compiler/lexical_grammar.h" #include "compiler/build_tables/parse_item_set_builder.h" #include "compiler/build_tables/lookahead_set.h" #include "compiler/rules/built_in_symbols.h" +#include "helpers/rule_helpers.h" using namespace build_tables; using namespace rules; @@ -10,6 +12,17 @@ using namespace rules; START_TEST describe("ParseItemSetBuilder", []() { + vector lexical_variables; + for (size_t i = 0; i < 20; i++) { + lexical_variables.push_back(Variable{ + "token_" + to_string(i), + VariableTypeNamed, + blank(), + }); + } + + LexicalGrammar lexical_grammar{lexical_variables, {}}; + it("adds items at the beginnings of referenced rules", [&]() { SyntaxGrammar grammar{{ SyntaxVariable("rule0", VariableTypeNamed, { @@ -42,29 +55,29 @@ describe("ParseItemSetBuilder", []() { ParseItemSet item_set({ { ParseItem(Symbol(0), production(0, 0), 0), - LookaheadSet({ Symbol(10, true) }), + LookaheadSet({ 10 }), } }); - ParseItemSetBuilder item_set_builder(grammar); + ParseItemSetBuilder item_set_builder(grammar, lexical_grammar); item_set_builder.apply_transitive_closure(&item_set); AssertThat(item_set, Equals(ParseItemSet({ { ParseItem(Symbol(0), production(0, 0), 0), - LookaheadSet({ Symbol(10, true) }) + LookaheadSet({ 10 }) }, { ParseItem(Symbol(1), production(1, 0), 0), - LookaheadSet({ Symbol(11, true) }) + LookaheadSet({ 11 }) }, { ParseItem(Symbol(1), production(1, 1), 0), - LookaheadSet({ Symbol(11, true) }) + LookaheadSet({ 11 }) }, { ParseItem(Symbol(2), production(2, 0), 0), - LookaheadSet({ Symbol(11, true) }) + LookaheadSet({ 11 }) }, }))); }); @@ -93,25 +106,25 @@ describe("ParseItemSetBuilder", []() { ParseItemSet item_set({ { ParseItem(Symbol(0), production(0, 0), 0), - LookaheadSet({ Symbol(10, true) }), + LookaheadSet({ 10 }), } }); - ParseItemSetBuilder item_set_builder(grammar); + ParseItemSetBuilder item_set_builder(grammar, lexical_grammar); item_set_builder.apply_transitive_closure(&item_set); AssertThat(item_set, Equals(ParseItemSet({ { ParseItem(Symbol(0), production(0, 0), 0), - LookaheadSet({ Symbol(10, true) }) + LookaheadSet({ 10 }) }, { ParseItem(Symbol(1), production(1, 0), 0), - LookaheadSet({ Symbol(11, true) }) + LookaheadSet({ 11 }) }, { ParseItem(Symbol(1), production(1, 1), 0), - LookaheadSet({ Symbol(11, true) }) + LookaheadSet({ 11 }) }, }))); }); diff --git a/spec/compiler/build_tables/parse_item_spec.cc b/spec/compiler/build_tables/parse_item_spec.cc index 51c3e231..83c9121a 100644 --- a/spec/compiler/build_tables/parse_item_spec.cc +++ b/spec/compiler/build_tables/parse_item_spec.cc @@ -91,25 +91,25 @@ describe("ParseItemSet::transitions())", [&]() { // Two symbols into the first production for rule_0 { ParseItem(Symbol(0), production(0, 0), 2), - LookaheadSet({ Symbol(21, true) }) + LookaheadSet({ 21 }) }, // Two symbols into the second production for rule_0 { ParseItem(Symbol(0), production(0, 1), 2), - LookaheadSet({ Symbol(21, true) }) + LookaheadSet({ 21 }) }, // At the beginning of the first production for rule_1 { ParseItem(Symbol(1), production(1, 0), 0), - LookaheadSet({ Symbol(22, true) }) + LookaheadSet({ 22 }) }, // At the end of the first production for rule_2 { ParseItem(Symbol(2), production(2, 0), 1), - LookaheadSet({ Symbol(22, true) }) + LookaheadSet({ 22 }) } }); @@ -122,7 +122,7 @@ describe("ParseItemSet::transitions())", [&]() { ParseItemSet({ { ParseItem(Symbol(0), production(0, 0), 3), - LookaheadSet({ Symbol(21, true) }) + LookaheadSet({ 21 }) } }), PrecedenceRange(5, 5) @@ -137,11 +137,11 @@ describe("ParseItemSet::transitions())", [&]() { ParseItemSet({ { ParseItem(Symbol(0), production(0, 1), 3), - LookaheadSet({ Symbol(21, true) }) + LookaheadSet({ 21 }) }, { ParseItem(Symbol(1), production(1, 0), 1), - LookaheadSet({ Symbol(22, true) }) + LookaheadSet({ 22 }) }, }), PrecedenceRange(6, 7) diff --git a/src/compiler/build_tables/build_lex_table.cc b/src/compiler/build_tables/build_lex_table.cc index 0b75c368..94100349 100644 --- a/src/compiler/build_tables/build_lex_table.cc +++ b/src/compiler/build_tables/build_lex_table.cc @@ -117,7 +117,7 @@ class LexTableBuilder { for (auto &entry : state.terminal_entries) { auto homonyms = conflict_manager.possible_homonyms.find(entry.first); if (homonyms != conflict_manager.possible_homonyms.end()) - for (int homonym : homonyms->second) + for (Symbol::Index homonym : homonyms->second) if (state.terminal_entries.count(homonym)) { entry.second.reusable = false; break; @@ -128,7 +128,7 @@ class LexTableBuilder { auto extensions = conflict_manager.possible_extensions.find(entry.first); if (extensions != conflict_manager.possible_extensions.end()) - for (int extension : extensions->second) + for (Symbol::Index extension : extensions->second) if (state.terminal_entries.count(extension)) { entry.second.depends_on_lookahead = true; break; diff --git a/src/compiler/build_tables/build_parse_table.cc b/src/compiler/build_tables/build_parse_table.cc index 8e69e228..a8d38973 100644 --- a/src/compiler/build_tables/build_parse_table.cc +++ b/src/compiler/build_tables/build_parse_table.cc @@ -1,4 +1,4 @@ -#include "compiler/build_tables/build_parse_table.h" + #include "compiler/build_tables/build_parse_table.h" #include #include #include @@ -49,7 +49,7 @@ class ParseTableBuilder { const LexicalGrammar &lex_grammar) : grammar(grammar), lexical_grammar(lex_grammar), - item_set_builder(grammar), + item_set_builder(grammar, lex_grammar), allow_any_conflict(false) {} pair build() { @@ -64,7 +64,7 @@ class ParseTableBuilder { add_parse_state(ParseItemSet({ { ParseItem(rules::START(), start_production, 0), - LookaheadSet({ END_OF_INPUT() }), + LookaheadSet({ END_OF_INPUT().index }), }, })); @@ -111,8 +111,8 @@ class ParseTableBuilder { void build_error_parse_state() { ParseState error_state; - for (const Symbol &symbol : parse_table.mergeable_symbols) { - add_out_of_context_parse_state(&error_state, symbol); + for (const Symbol::Index index : parse_table.mergeable_symbols) { + add_out_of_context_parse_state(&error_state, Symbol(index, true)); } for (const Symbol &symbol : grammar.extra_tokens) { @@ -167,7 +167,7 @@ class ParseTableBuilder { if (symbol.is_token) { ParseAction *new_action = add_terminal_action( - state_id, symbol, ParseAction::Shift(0, precedence), item_set); + state_id, symbol.index, ParseAction::Shift(0, precedence), item_set); if (new_action) { new_action->state_index = add_parse_state(next_item_set); } @@ -193,7 +193,7 @@ class ParseTableBuilder { status.associativity, *item.production); } - for (const Symbol &lookahead : *lookahead_symbols.entries) { + for (const Symbol::Index lookahead : *lookahead_symbols.entries) { add_terminal_action(state_id, lookahead, action, item_set); } } @@ -253,15 +253,15 @@ class ParseTableBuilder { remove_duplicate_states(&parse_table); } - ParseAction *add_terminal_action(ParseStateId state_id, Symbol lookahead, + ParseAction *add_terminal_action(ParseStateId state_id, Symbol::Index lookahead, const ParseAction &new_action, const ParseItemSet &item_set) { const ParseState &state = parse_table.states[state_id]; - const auto ¤t_entry = state.terminal_entries.find(lookahead.index); + const auto ¤t_entry = state.terminal_entries.find(lookahead); if (current_entry == state.terminal_entries.end()) - return &parse_table.set_terminal_action(state_id, lookahead.index, new_action); + return &parse_table.set_terminal_action(state_id, lookahead, new_action); if (allow_any_conflict) - return &parse_table.add_terminal_action(state_id, lookahead.index, new_action); + return &parse_table.add_terminal_action(state_id, lookahead, new_action); const ParseAction old_action = current_entry->second.actions[0]; auto resolution = conflict_manager.resolve(new_action, old_action); @@ -269,7 +269,7 @@ class ParseTableBuilder { switch (resolution.second) { case ConflictTypeNone: if (resolution.first) { - return &parse_table.set_terminal_action(state_id, lookahead.index, new_action); + return &parse_table.set_terminal_action(state_id, lookahead, new_action); } break; @@ -277,7 +277,7 @@ class ParseTableBuilder { if (resolution.first) { if (old_action.type == ParseActionTypeReduce) fragile_productions.insert(old_action.production); - return &parse_table.set_terminal_action(state_id, lookahead.index, new_action); + return &parse_table.set_terminal_action(state_id, lookahead, new_action); } else { if (new_action.type == ParseActionTypeReduce) fragile_productions.insert(new_action.production); @@ -291,7 +291,7 @@ class ParseTableBuilder { fragile_productions.insert(old_action.production); if (new_action.type == ParseActionTypeReduce) fragile_productions.insert(new_action.production); - return &parse_table.add_terminal_action(state_id, lookahead.index, new_action); + return &parse_table.add_terminal_action(state_id, lookahead, new_action); } break; } @@ -301,7 +301,7 @@ class ParseTableBuilder { } bool handle_unresolved_conflict(const ParseItemSet &item_set, - const Symbol &lookahead) { + const Symbol::Index lookahead) { set involved_symbols; set reduce_items; set core_shift_items; @@ -319,12 +319,12 @@ class ParseTableBuilder { } } else { if (item.step_index > 0) { - set first_set = get_first_set(next_symbol); - if (first_set.count(lookahead)) { + LookaheadSet first_set = item_set_builder.get_first_set(next_symbol); + if (first_set.contains(lookahead)) { involved_symbols.insert(item.lhs()); core_shift_items.insert(item); } - } else if (next_symbol == lookahead) { + } else if (next_symbol.is_token && next_symbol.index == lookahead) { other_shift_items.insert(item); } } @@ -334,7 +334,7 @@ class ParseTableBuilder { if (involved_symbols == conflict_set) return true; - string description = "Lookahead symbol: " + symbol_name(lookahead) + "\n"; + string description = "Lookahead symbol: " + symbol_name(Symbol(lookahead, true)) + "\n"; if (!reduce_items.empty()) { description += "Reduce items:\n"; diff --git a/src/compiler/build_tables/lex_conflict_manager.h b/src/compiler/build_tables/lex_conflict_manager.h index 9777dc36..0d3177dd 100644 --- a/src/compiler/build_tables/lex_conflict_manager.h +++ b/src/compiler/build_tables/lex_conflict_manager.h @@ -21,8 +21,8 @@ class LexConflictManager { const AcceptTokenAction &); bool resolve(const AcceptTokenAction &, const AcceptTokenAction &); - std::map> possible_homonyms; - std::map> possible_extensions; + std::map> possible_homonyms; + std::map> possible_extensions; }; } // namespace build_tables diff --git a/src/compiler/build_tables/lookahead_set.cc b/src/compiler/build_tables/lookahead_set.cc index 239bc029..1ecb0baf 100644 --- a/src/compiler/build_tables/lookahead_set.cc +++ b/src/compiler/build_tables/lookahead_set.cc @@ -12,8 +12,8 @@ using rules::Symbol; LookaheadSet::LookaheadSet() : entries(nullptr) {} -LookaheadSet::LookaheadSet(const set &symbols) - : entries(make_shared>(symbols)) {} +LookaheadSet::LookaheadSet(const set &symbols) + : entries(make_shared>(symbols)) {} bool LookaheadSet::empty() const { return !entries.get() || entries->empty(); @@ -23,7 +23,7 @@ bool LookaheadSet::operator==(const LookaheadSet &other) const { return *entries == *other.entries; } -bool LookaheadSet::contains(const Symbol &symbol) const { +bool LookaheadSet::contains(const Symbol::Index &symbol) const { return entries->find(symbol) != entries->end(); } @@ -31,15 +31,15 @@ bool LookaheadSet::insert_all(const LookaheadSet &other) { if (!other.entries.get()) return false; if (!entries.get()) - entries = make_shared>(); + entries = make_shared>(); size_t previous_size = entries->size(); entries->insert(other.entries->begin(), other.entries->end()); return entries->size() > previous_size; } -bool LookaheadSet::insert(const Symbol &symbol) { +bool LookaheadSet::insert(const Symbol::Index &symbol) { if (!entries.get()) - entries = make_shared>(); + entries = make_shared>(); return entries->insert(symbol).second; } diff --git a/src/compiler/build_tables/lookahead_set.h b/src/compiler/build_tables/lookahead_set.h index e62ee34d..fe99b4d5 100644 --- a/src/compiler/build_tables/lookahead_set.h +++ b/src/compiler/build_tables/lookahead_set.h @@ -11,15 +11,15 @@ namespace build_tables { class LookaheadSet { public: LookaheadSet(); - explicit LookaheadSet(const std::set &); + explicit LookaheadSet(const std::set &); bool empty() const; bool operator==(const LookaheadSet &) const; - bool contains(const rules::Symbol &) const; + bool contains(const rules::Symbol::Index &) const; bool insert_all(const LookaheadSet &); - bool insert(const rules::Symbol &); + bool insert(const rules::Symbol::Index &); - std::shared_ptr> entries; + std::shared_ptr> entries; }; } // namespace build_tables diff --git a/src/compiler/build_tables/parse_item.cc b/src/compiler/build_tables/parse_item.cc index 470200a6..5054e578 100644 --- a/src/compiler/build_tables/parse_item.cc +++ b/src/compiler/build_tables/parse_item.cc @@ -102,8 +102,8 @@ size_t ParseItemSet::Hash::operator()(const ParseItemSet &item_set) const { const LookaheadSet &lookahead_set = pair.second; result ^= hash()(lookahead_set.entries->size()); - for (auto &symbol : *pair.second.entries) - result ^= hash()(symbol); + for (Symbol::Index index : *pair.second.entries) + result ^= hash()(index); } return result; } diff --git a/src/compiler/build_tables/parse_item_set_builder.cc b/src/compiler/build_tables/parse_item_set_builder.cc index 93ffbf78..8259662f 100644 --- a/src/compiler/build_tables/parse_item_set_builder.cc +++ b/src/compiler/build_tables/parse_item_set_builder.cc @@ -3,6 +3,7 @@ #include #include #include "compiler/syntax_grammar.h" +#include "compiler/lexical_grammar.h" #include "compiler/rules/built_in_symbols.h" namespace tree_sitter { @@ -19,7 +20,8 @@ using std::make_shared; using rules::Symbol; using rules::NONE; -static map build_first_sets(const SyntaxGrammar &grammar) { +static map build_first_sets(const SyntaxGrammar &grammar, + const LexicalGrammar &lexical_grammar) { map result; vector symbol_stack; set processed_symbols; @@ -35,7 +37,7 @@ static map build_first_sets(const SyntaxGrammar &grammar) Symbol current_symbol = symbol_stack.back(); symbol_stack.pop_back(); if (current_symbol.is_token) { - first_set.insert(current_symbol); + first_set.insert(current_symbol.index); } else if (processed_symbols.insert(current_symbol).second) { for (const Production &production : grammar.productions(current_symbol)) { if (!production.empty()) { @@ -48,11 +50,17 @@ static map build_first_sets(const SyntaxGrammar &grammar) result.insert({symbol, first_set}); } + for (int i = 0; i < lexical_grammar.variables.size(); i++) { + Symbol symbol(i, true); + result.insert({symbol, LookaheadSet({ i })}); + } + return result; } -ParseItemSetBuilder::ParseItemSetBuilder(const SyntaxGrammar &grammar) : - grammar{&grammar}, first_sets{build_first_sets(grammar)} { +ParseItemSetBuilder::ParseItemSetBuilder(const SyntaxGrammar &grammar, + const LexicalGrammar &lexical_grammar) : + grammar{&grammar}, first_sets{build_first_sets(grammar, lexical_grammar)} { } void ParseItemSetBuilder::apply_transitive_closure(ParseItemSet *item_set) { @@ -88,11 +96,7 @@ void ParseItemSetBuilder::apply_transitive_closure(ParseItemSet *item_set) { next_lookahead_symbols = lookahead_symbols; } else { Symbol symbol_after_next = item.production->at(next_step).symbol; - if (symbol_after_next.is_token) { - next_lookahead_symbols.insert(symbol_after_next); - } else { - next_lookahead_symbols = first_sets.find(symbol_after_next)->second; - } + next_lookahead_symbols = first_sets.find(symbol_after_next)->second; } // Add each of the next symbol's productions to be processed recursively. @@ -105,5 +109,9 @@ void ParseItemSetBuilder::apply_transitive_closure(ParseItemSet *item_set) { } } +LookaheadSet ParseItemSetBuilder::get_first_set(rules::Symbol &symbol) const { + return first_sets.find(symbol)->second; +} + } // namespace build_tables } // namespace tree_sitter diff --git a/src/compiler/build_tables/parse_item_set_builder.h b/src/compiler/build_tables/parse_item_set_builder.h index 3d8eb5c6..8043437e 100644 --- a/src/compiler/build_tables/parse_item_set_builder.h +++ b/src/compiler/build_tables/parse_item_set_builder.h @@ -8,6 +8,7 @@ namespace tree_sitter { struct SyntaxGrammar; +struct LexicalGrammar; namespace build_tables { @@ -17,8 +18,9 @@ class ParseItemSetBuilder { std::vector> items_to_process; public: - ParseItemSetBuilder(const SyntaxGrammar &); + ParseItemSetBuilder(const SyntaxGrammar &, const LexicalGrammar &); void apply_transitive_closure(ParseItemSet *); + LookaheadSet get_first_set(rules::Symbol &) const; }; } // namespace build_tables diff --git a/src/compiler/build_tables/recovery_tokens.cc b/src/compiler/build_tables/recovery_tokens.cc index e8d96aad..479de6b8 100644 --- a/src/compiler/build_tables/recovery_tokens.cc +++ b/src/compiler/build_tables/recovery_tokens.cc @@ -47,8 +47,8 @@ class FirstCharacters : public CharacterAggregator {}; class LastCharacters : public CharacterAggregator {}; class AllCharacters : public CharacterAggregator {}; -set recovery_tokens(const LexicalGrammar &grammar) { - set result; +set recovery_tokens(const LexicalGrammar &grammar) { + set result; AllCharacters all_separator_characters; for (const rule_ptr &separator : grammar.separators) @@ -79,7 +79,7 @@ set recovery_tokens(const LexicalGrammar &grammar) { !all_characters.result.intersects(all_separator_characters.result); if ((has_distinct_start && has_distinct_end) || has_no_separators) - result.insert(Symbol(i, true)); + result.insert(i); } return result; diff --git a/src/compiler/build_tables/recovery_tokens.h b/src/compiler/build_tables/recovery_tokens.h index c97a8cfd..4873b5a9 100644 --- a/src/compiler/build_tables/recovery_tokens.h +++ b/src/compiler/build_tables/recovery_tokens.h @@ -11,7 +11,7 @@ struct LexicalGrammar; namespace build_tables { -std::set recovery_tokens(const LexicalGrammar &); +std::set recovery_tokens(const LexicalGrammar &); } // namespace build_tables } // namespace tree_sitter diff --git a/src/compiler/parse_table.cc b/src/compiler/parse_table.cc index 47218d36..944036a6 100644 --- a/src/compiler/parse_table.cc +++ b/src/compiler/parse_table.cc @@ -167,13 +167,15 @@ ParseStateId ParseTable::add_state() { return states.size() - 1; } -ParseAction &ParseTable::set_terminal_action(ParseStateId state_id, int index, - ParseAction action) { +ParseAction &ParseTable::set_terminal_action(ParseStateId state_id, + Symbol::Index index, + ParseAction action) { states[state_id].terminal_entries[index].actions.clear(); return add_terminal_action(state_id, index, action); } -ParseAction &ParseTable::add_terminal_action(ParseStateId state_id, int index, +ParseAction &ParseTable::add_terminal_action(ParseStateId state_id, + Symbol::Index index, ParseAction action) { Symbol symbol(index, true); if (action.type == ParseActionTypeShift && action.extra) @@ -186,7 +188,8 @@ ParseAction &ParseTable::add_terminal_action(ParseStateId state_id, int index, return *entry.actions.rbegin(); } -void ParseTable::set_nonterminal_action(ParseStateId state_id, int index, +void ParseTable::set_nonterminal_action(ParseStateId state_id, + Symbol::Index index, ParseStateId next_state_id) { Symbol symbol(index, false); symbols[symbol].structural = true; @@ -208,12 +211,12 @@ bool ParseTable::merge_state(size_t i, size_t j) { return false; for (auto &entry : state.terminal_entries) { - Symbol symbol(entry.first, true); + Symbol::Index index = entry.first; const vector &actions = entry.second.actions; - const auto &other_entry = other.terminal_entries.find(symbol.index); + const auto &other_entry = other.terminal_entries.find(index); if (other_entry == other.terminal_entries.end()) { - if (mergeable_symbols.count(symbol) == 0 && !symbol.is_built_in()) + if (mergeable_symbols.count(index) == 0 && !Symbol::is_built_in(index)) return false; if (actions.back().type != ParseActionTypeReduce) return false; @@ -224,25 +227,25 @@ bool ParseTable::merge_state(size_t i, size_t j) { } } - set symbols_to_merge; + set symbols_to_merge; for (auto &entry : other.terminal_entries) { - Symbol symbol(entry.first, true); + Symbol::Index index = entry.first; const vector &actions = entry.second.actions; - if (!state.terminal_entries.count(symbol.index)) { - if (mergeable_symbols.count(symbol) == 0 && !symbol.is_built_in()) + if (!state.terminal_entries.count(index)) { + if (mergeable_symbols.count(index) == 0 && !Symbol::is_built_in(index)) return false; if (actions.back().type != ParseActionTypeReduce) return false; if (!has_entry(state, entry.second)) return false; - symbols_to_merge.insert(symbol); + symbols_to_merge.insert(index); } } - for (const Symbol &symbol : symbols_to_merge) - state.terminal_entries[symbol.index] = other.terminal_entries.find(symbol.index)->second; + for (const Symbol::Index &index : symbols_to_merge) + state.terminal_entries[index] = other.terminal_entries.find(index)->second; return true; } diff --git a/src/compiler/parse_table.h b/src/compiler/parse_table.h index ec4f5271..5f660ecd 100644 --- a/src/compiler/parse_table.h +++ b/src/compiler/parse_table.h @@ -75,8 +75,8 @@ class ParseState { void each_referenced_state(std::function); bool has_shift_action() const; - std::map terminal_entries; - std::map nonterminal_entries; + std::map terminal_entries; + std::map nonterminal_entries; LexStateId lex_state_id; }; @@ -97,7 +97,7 @@ class ParseTable { std::vector states; std::map symbols; - std::set mergeable_symbols; + std::set mergeable_symbols; }; } // namespace tree_sitter diff --git a/src/compiler/rules/symbol.cc b/src/compiler/rules/symbol.cc index 697a3465..96c4bd60 100644 --- a/src/compiler/rules/symbol.cc +++ b/src/compiler/rules/symbol.cc @@ -10,9 +10,9 @@ using std::string; using std::to_string; using std::hash; -Symbol::Symbol(int index) : index(index), is_token(false) {} +Symbol::Symbol(Symbol::Index index) : index(index), is_token(false) {} -Symbol::Symbol(int index, bool is_token) : index(index), is_token(is_token) {} +Symbol::Symbol(Symbol::Index index, bool is_token) : index(index), is_token(is_token) {} bool Symbol::operator==(const Symbol &other) const { return (other.index == index) && (other.is_token == is_token); @@ -24,7 +24,7 @@ bool Symbol::operator==(const Rule &rule) const { } size_t Symbol::hash_code() const { - return hash()(index) ^ hash()(is_token); + return hash()(index) ^ hash()(is_token); } rule_ptr Symbol::copy() const { @@ -44,10 +44,14 @@ bool Symbol::operator<(const Symbol &other) const { return (index < other.index); } -bool Symbol::is_built_in() const { +bool Symbol::is_built_in(Symbol::Index index) { return index < 0; } +bool Symbol::is_built_in() const { + return is_built_in(index); +} + void Symbol::accept(Visitor *visitor) const { visitor->visit(this); } diff --git a/src/compiler/rules/symbol.h b/src/compiler/rules/symbol.h index 81d74d85..4ae9ece3 100644 --- a/src/compiler/rules/symbol.h +++ b/src/compiler/rules/symbol.h @@ -9,8 +9,11 @@ namespace rules { class Symbol : public Rule { public: - explicit Symbol(int index); - Symbol(int index, bool is_token); + typedef int Index; + + + explicit Symbol(Index index); + Symbol(Index index, bool is_token); bool operator==(const Symbol &other) const; bool operator==(const Rule &other) const; @@ -21,9 +24,10 @@ class Symbol : public Rule { void accept(Visitor *visitor) const; bool operator<(const Symbol &other) const; + static bool is_built_in(Index); bool is_built_in() const; - int index; + Index index; bool is_token; }; diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 8ee9be7b..a9c9e5bb 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -840,9 +840,10 @@ static StackIterateAction parser__repair_consumed_error_callback( SkipPrecedingTokensSession *session = payload; Parser *self = session->parser; TSSymbol lookahead_symbol = session->lookahead_symbol; - const TSParseAction *action = - ts_language_last_action(self->language, state, lookahead_symbol); - if (action && action->type == TSParseActionTypeReduce) { + size_t action_count; + const TSParseAction *actions = + ts_language_actions(self->language, state, lookahead_symbol, &action_count); + if (action_count > 0 && actions[0].type == TSParseActionTypeReduce) { return StackIteratePop | StackIterateStop; } }