From 8d9c261e3ab59f6acc348ace53ce87e829c0b787 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 10 Nov 2016 10:25:32 -0800 Subject: [PATCH 1/6] Don't include reduce actions for nonterminal lookaheads --- project.gyp | 2 +- ...spec.cc => parse_item_set_builder_spec.cc} | 18 ++- .../build_tables/build_parse_table.cc | 10 +- src/compiler/build_tables/item_set_closure.cc | 80 ------------- src/compiler/build_tables/item_set_closure.h | 18 --- src/compiler/build_tables/lookahead_set.cc | 2 + .../build_tables/parse_item_set_builder.cc | 109 ++++++++++++++++++ .../build_tables/parse_item_set_builder.h | 27 +++++ src/runtime/parser.c | 2 +- 9 files changed, 158 insertions(+), 110 deletions(-) rename spec/compiler/build_tables/{item_set_closure_spec.cc => parse_item_set_builder_spec.cc} (88%) delete mode 100644 src/compiler/build_tables/item_set_closure.cc delete mode 100644 src/compiler/build_tables/item_set_closure.h create mode 100644 src/compiler/build_tables/parse_item_set_builder.cc create mode 100644 src/compiler/build_tables/parse_item_set_builder.h diff --git a/project.gyp b/project.gyp index 74124b15..91001727 100644 --- a/project.gyp +++ b/project.gyp @@ -15,12 +15,12 @@ 'src/compiler/build_tables/build_parse_table.cc', 'src/compiler/build_tables/build_tables.cc', 'src/compiler/build_tables/recovery_tokens.cc', - 'src/compiler/build_tables/item_set_closure.cc', 'src/compiler/build_tables/lex_item.cc', 'src/compiler/build_tables/lex_item_transitions.cc', 'src/compiler/build_tables/lex_conflict_manager.cc', 'src/compiler/build_tables/lookahead_set.cc', 'src/compiler/build_tables/parse_item.cc', + 'src/compiler/build_tables/parse_item_set_builder.cc', 'src/compiler/build_tables/parse_conflict_manager.cc', 'src/compiler/build_tables/rule_can_be_blank.cc', 'src/compiler/compile.cc', diff --git a/spec/compiler/build_tables/item_set_closure_spec.cc b/spec/compiler/build_tables/parse_item_set_builder_spec.cc similarity index 88% rename from spec/compiler/build_tables/item_set_closure_spec.cc rename to spec/compiler/build_tables/parse_item_set_builder_spec.cc index c8b30b71..5e387e51 100644 --- a/spec/compiler/build_tables/item_set_closure_spec.cc +++ b/spec/compiler/build_tables/parse_item_set_builder_spec.cc @@ -1,6 +1,6 @@ #include "spec_helper.h" #include "compiler/syntax_grammar.h" -#include "compiler/build_tables/item_set_closure.h" +#include "compiler/build_tables/parse_item_set_builder.h" #include "compiler/build_tables/lookahead_set.h" #include "compiler/rules/built_in_symbols.h" @@ -9,7 +9,7 @@ using namespace rules; START_TEST -describe("item_set_closure", []() { +describe("ParseItemSetBuilder", []() { it("adds items at the beginnings of referenced rules", [&]() { SyntaxGrammar grammar{{ SyntaxVariable("rule0", VariableTypeNamed, { @@ -39,12 +39,15 @@ describe("item_set_closure", []() { return grammar.variables[variable_index].productions[production_index]; }; - ParseItemSet item_set = item_set_closure(ParseItemSet({ + ParseItemSet item_set({ { ParseItem(Symbol(0), production(0, 0), 0), LookaheadSet({ Symbol(10, true) }), } - }), grammar); + }); + + ParseItemSetBuilder item_set_builder(grammar); + item_set_builder.apply_transitive_closure(&item_set); AssertThat(item_set, Equals(ParseItemSet({ { @@ -87,12 +90,15 @@ describe("item_set_closure", []() { return grammar.variables[variable_index].productions[production_index]; }; - ParseItemSet item_set = item_set_closure(ParseItemSet({ + ParseItemSet item_set({ { ParseItem(Symbol(0), production(0, 0), 0), LookaheadSet({ Symbol(10, true) }), } - }), grammar); + }); + + ParseItemSetBuilder item_set_builder(grammar); + item_set_builder.apply_transitive_closure(&item_set); AssertThat(item_set, Equals(ParseItemSet({ { diff --git a/src/compiler/build_tables/build_parse_table.cc b/src/compiler/build_tables/build_parse_table.cc index c5079848..829d8e93 100644 --- a/src/compiler/build_tables/build_parse_table.cc +++ b/src/compiler/build_tables/build_parse_table.cc @@ -9,7 +9,7 @@ #include "compiler/build_tables/parse_conflict_manager.h" #include "compiler/build_tables/remove_duplicate_states.h" #include "compiler/build_tables/parse_item.h" -#include "compiler/build_tables/item_set_closure.h" +#include "compiler/build_tables/parse_item_set_builder.h" #include "compiler/lexical_grammar.h" #include "compiler/syntax_grammar.h" #include "compiler/rules/symbol.h" @@ -40,6 +40,7 @@ class ParseTableBuilder { vector> item_sets_to_process; ParseTable parse_table; set conflicts; + ParseItemSetBuilder item_set_builder; set fragile_productions; bool allow_any_conflict; @@ -48,6 +49,7 @@ class ParseTableBuilder { const LexicalGrammar &lex_grammar) : grammar(grammar), lexical_grammar(lex_grammar), + item_set_builder(grammar), allow_any_conflict(false) {} pair build() { @@ -88,11 +90,11 @@ class ParseTableBuilder { CompileError process_part_state_queue() { while (!item_sets_to_process.empty()) { auto pair = item_sets_to_process.back(); - ParseItemSet item_set = item_set_closure(pair.first, grammar); - + ParseItemSet &item_set = pair.first; ParseStateId state_id = pair.second; item_sets_to_process.pop_back(); + item_set_builder.apply_transitive_closure(&item_set); add_reduce_actions(item_set, state_id); add_shift_actions(item_set, state_id); add_shift_extra_actions(state_id); @@ -143,7 +145,7 @@ class ParseTableBuilder { ParseStateId state_id = parse_table.add_state(); parse_state_ids[item_set] = state_id; - item_sets_to_process.push_back({ item_set, state_id }); + item_sets_to_process.push_back({ std::move(item_set), state_id }); return state_id; } else { return pair->second; diff --git a/src/compiler/build_tables/item_set_closure.cc b/src/compiler/build_tables/item_set_closure.cc deleted file mode 100644 index b74431ba..00000000 --- a/src/compiler/build_tables/item_set_closure.cc +++ /dev/null @@ -1,80 +0,0 @@ -#include "compiler/build_tables/item_set_closure.h" -#include -#include -#include -#include "compiler/syntax_grammar.h" -#include "compiler/rules/built_in_symbols.h" - -namespace tree_sitter { -namespace build_tables { - -using std::vector; -using std::pair; -using std::shared_ptr; -using std::make_shared; -using rules::Symbol; -using rules::NONE; - -ParseItemSet item_set_closure(const ParseItemSet &input_item_set, - const SyntaxGrammar &grammar) { - ParseItemSet result; - - // An item set's closure is defined recursively. Use an explicit stack to - // store the recursively-added items. - vector> items_to_process( - input_item_set.entries.begin(), input_item_set.entries.end()); - - while (!items_to_process.empty()) { - ParseItem item = items_to_process.back().first; - LookaheadSet lookahead_symbols = items_to_process.back().second; - items_to_process.pop_back(); - - // Add the parse-item and lookahead symbols to the item set. - // If they were already present, skip to the next item. - if (!result.entries[item].insert_all(lookahead_symbols)) - continue; - - // If the next symbol in the production is not a non-terminal, skip to the - // next item. - Symbol next_symbol = item.next_symbol(); - if (next_symbol == NONE() || next_symbol.is_token || - next_symbol.is_built_in()) - continue; - - // If the next symbol is the last symbol in the item's production, then the - // lookahead symbols for the new items are the same as for the current item. - // Otherwise, compute the FOLLOW-SET of the symbol in this production. This - // is defined recursively as well, so use another queue to store the - // recursively-added follow symbols. - LookaheadSet next_lookahead_symbols; - size_t next_step = item.step_index + 1; - if (next_step == item.production->size()) { - next_lookahead_symbols = lookahead_symbols; - } else { - vector symbols_to_process( - { item.production->at(next_step).symbol }); - while (!symbols_to_process.empty()) { - Symbol symbol = symbols_to_process.back(); - symbols_to_process.pop_back(); - - if (!next_lookahead_symbols.insert(symbol)) - continue; - - for (const Production &production : grammar.productions(symbol)) - if (!production.empty()) - symbols_to_process.push_back(production[0].symbol); - } - } - - // Add each of the next symbol's productions to be processed recursively. - for (const Production &production : grammar.productions(next_symbol)) - items_to_process.push_back({ - ParseItem(next_symbol, production, 0), next_lookahead_symbols, - }); - } - - return result; -} - -} // namespace build_tables -} // namespace tree_sitter diff --git a/src/compiler/build_tables/item_set_closure.h b/src/compiler/build_tables/item_set_closure.h deleted file mode 100644 index 093d19d7..00000000 --- a/src/compiler/build_tables/item_set_closure.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef COMPILER_BUILD_TABLES_ITEM_SET_CLOSURE_H_ -#define COMPILER_BUILD_TABLES_ITEM_SET_CLOSURE_H_ - -#include "compiler/build_tables/parse_item.h" -#include "compiler/rules/symbol.h" - -namespace tree_sitter { - -struct SyntaxGrammar; - -namespace build_tables { - -ParseItemSet item_set_closure(const ParseItemSet &, const SyntaxGrammar &); - -} // namespace build_tables -} // namespace tree_sitter - -#endif // COMPILER_BUILD_TABLES_ITEM_SET_CLOSURE_H_ diff --git a/src/compiler/build_tables/lookahead_set.cc b/src/compiler/build_tables/lookahead_set.cc index 55a1f78e..239bc029 100644 --- a/src/compiler/build_tables/lookahead_set.cc +++ b/src/compiler/build_tables/lookahead_set.cc @@ -28,6 +28,8 @@ bool LookaheadSet::contains(const Symbol &symbol) const { } bool LookaheadSet::insert_all(const LookaheadSet &other) { + if (!other.entries.get()) + return false; if (!entries.get()) entries = make_shared>(); size_t previous_size = entries->size(); diff --git a/src/compiler/build_tables/parse_item_set_builder.cc b/src/compiler/build_tables/parse_item_set_builder.cc new file mode 100644 index 00000000..93ffbf78 --- /dev/null +++ b/src/compiler/build_tables/parse_item_set_builder.cc @@ -0,0 +1,109 @@ +#include "compiler/build_tables/parse_item_set_builder.h" +#include +#include +#include +#include "compiler/syntax_grammar.h" +#include "compiler/rules/built_in_symbols.h" + +namespace tree_sitter { +namespace build_tables { + +using std::vector; +using std::set; +using std::map; +using std::get; +using std::tuple; +using std::make_tuple; +using std::shared_ptr; +using std::make_shared; +using rules::Symbol; +using rules::NONE; + +static map build_first_sets(const SyntaxGrammar &grammar) { + map result; + vector symbol_stack; + set processed_symbols; + + for (size_t i = 0; i < grammar.variables.size(); i++) { + Symbol symbol(i); + LookaheadSet first_set; + + processed_symbols.clear(); + symbol_stack.clear(); + symbol_stack.push_back(symbol); + while (!symbol_stack.empty()) { + Symbol current_symbol = symbol_stack.back(); + symbol_stack.pop_back(); + if (current_symbol.is_token) { + first_set.insert(current_symbol); + } else if (processed_symbols.insert(current_symbol).second) { + for (const Production &production : grammar.productions(current_symbol)) { + if (!production.empty()) { + symbol_stack.push_back(production[0].symbol); + } + } + } + } + + result.insert({symbol, first_set}); + } + + return result; +} + +ParseItemSetBuilder::ParseItemSetBuilder(const SyntaxGrammar &grammar) : + grammar{&grammar}, first_sets{build_first_sets(grammar)} { +} + +void ParseItemSetBuilder::apply_transitive_closure(ParseItemSet *item_set) { + items_to_process.clear(); + for (const auto &entry : item_set->entries) { + items_to_process.push_back(make_tuple(entry.first, entry.second, true)); + } + + while (!items_to_process.empty()) { + ParseItem item = get<0>(items_to_process.back()); + LookaheadSet lookahead_symbols = get<1>(items_to_process.back()); + bool from_original_set = get<2>(items_to_process.back()); + items_to_process.pop_back(); + + // Add the parse-item and lookahead symbols to the item set. + // If they were already present, skip to the next item. + if (!from_original_set && !item_set->entries[item].insert_all(lookahead_symbols)) + continue; + + // If the next symbol in the production is not a non-terminal, skip to the + // next item. + Symbol next_symbol = item.next_symbol(); + if (next_symbol == NONE() || next_symbol.is_token || + next_symbol.is_built_in()) + continue; + + // If the next symbol is the last symbol in the item's production, then the + // lookahead symbols for the new items are the same as for the current item. + // Otherwise, they are the FOLLOW set of the symbol in this production. + LookaheadSet next_lookahead_symbols; + size_t next_step = item.step_index + 1; + if (next_step == item.production->size()) { + next_lookahead_symbols = lookahead_symbols; + } else { + Symbol symbol_after_next = item.production->at(next_step).symbol; + if (symbol_after_next.is_token) { + next_lookahead_symbols.insert(symbol_after_next); + } else { + next_lookahead_symbols = first_sets.find(symbol_after_next)->second; + } + } + + // Add each of the next symbol's productions to be processed recursively. + for (const Production &production : grammar->productions(next_symbol)) + items_to_process.push_back(make_tuple( + ParseItem(next_symbol, production, 0), + next_lookahead_symbols, + false + )); + } +} + +} // namespace build_tables +} // namespace tree_sitter diff --git a/src/compiler/build_tables/parse_item_set_builder.h b/src/compiler/build_tables/parse_item_set_builder.h new file mode 100644 index 00000000..3d8eb5c6 --- /dev/null +++ b/src/compiler/build_tables/parse_item_set_builder.h @@ -0,0 +1,27 @@ +#ifndef COMPILER_BUILD_TABLES_PARSE_ITEM_SET_BUILDER_H_ +#define COMPILER_BUILD_TABLES_PARSE_ITEM_SET_BUILDER_H_ + +#include "compiler/build_tables/parse_item.h" +#include "compiler/rules/symbol.h" +#include + +namespace tree_sitter { + +struct SyntaxGrammar; + +namespace build_tables { + +class ParseItemSetBuilder { + const SyntaxGrammar *grammar; + std::map first_sets; + std::vector> items_to_process; + + public: + ParseItemSetBuilder(const SyntaxGrammar &); + void apply_transitive_closure(ParseItemSet *); +}; + +} // namespace build_tables +} // namespace tree_sitter + +#endif // COMPILER_BUILD_TABLES_PARSE_ITEM_SET_BUILDER_H_ diff --git a/src/runtime/parser.c b/src/runtime/parser.c index fa7a7a26..04556ebb 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -1086,7 +1086,7 @@ static void parser__advance(Parser *self, StackVersion version, return; } - parser__handle_error(self, version, lookahead->symbol); + parser__handle_error(self, version, lookahead->first_leaf.symbol); if (ts_stack_is_halted(self->stack, version)) { ts_tree_release(lookahead); From fad7294ba4a4893e62d480e73bf1421184fcbbde Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 14 Nov 2016 08:36:06 -0800 Subject: [PATCH 2/6] Store shift states for non-terminals directly in the main parse table --- include/tree_sitter/parser.h | 6 +- .../build_tables/lex_conflict_manager_spec.cc | 5 +- spec/fixtures/error_corpus/c_errors.txt | 3 +- .../error_corpus/javascript_errors.txt | 9 +- spec/helpers/stream_methods.cc | 22 ++--- spec/runtime/tree_spec.cc | 2 +- src/compiler/build_tables/build_lex_table.cc | 15 ++- .../build_tables/build_parse_table.cc | 92 +++++++++---------- .../build_tables/lex_conflict_manager.cc | 6 +- .../build_tables/lex_conflict_manager.h | 4 +- .../build_tables/remove_duplicate_states.h | 8 +- src/compiler/generate_code/c_code.cc | 12 ++- src/compiler/lex_table.cc | 4 +- src/compiler/lex_table.h | 8 +- src/compiler/parse_table.cc | 79 ++++++++-------- src/compiler/parse_table.h | 15 +-- src/compiler/rules/symbol.cc | 4 +- src/runtime/language.c | 1 + src/runtime/language.h | 17 ++++ src/runtime/parser.c | 87 ++++++++---------- 20 files changed, 204 insertions(+), 195 deletions(-) diff --git a/include/tree_sitter/parser.h b/include/tree_sitter/parser.h index 9219032b..10d17582 100644 --- a/include/tree_sitter/parser.h +++ b/include/tree_sitter/parser.h @@ -14,7 +14,6 @@ typedef unsigned short TSStateId; #define ts_builtin_sym_error ((TSSymbol)-1) #define ts_builtin_sym_end 0 -#define ts_builtin_sym_start 1 typedef struct { bool visible : 1; @@ -60,6 +59,7 @@ typedef union { typedef struct TSLanguage { size_t symbol_count; + size_t token_count; const char **symbol_names; const TSSymbolMetadata *symbol_metadata; const unsigned short *parse_table; @@ -103,6 +103,9 @@ typedef struct TSLanguage { * Parse Table Macros */ +#define STATE(id) id +#define ACTIONS(id) id + #define SHIFT(to_state_value) \ { \ { \ @@ -146,6 +149,7 @@ typedef struct TSLanguage { #define EXPORT_LANGUAGE(language_name) \ static TSLanguage language = { \ .symbol_count = SYMBOL_COUNT, \ + .token_count = TOKEN_COUNT, \ .symbol_metadata = ts_symbol_metadata, \ .parse_table = (const unsigned short *)ts_parse_table, \ .parse_actions = ts_parse_actions, \ diff --git a/spec/compiler/build_tables/lex_conflict_manager_spec.cc b/spec/compiler/build_tables/lex_conflict_manager_spec.cc index b62b9137..7f43e175 100644 --- a/spec/compiler/build_tables/lex_conflict_manager_spec.cc +++ b/spec/compiler/build_tables/lex_conflict_manager_spec.cc @@ -1,5 +1,6 @@ #include "spec_helper.h" #include "helpers/rule_helpers.h" +#include "helpers/stream_methods.h" #include "compiler/rules/built_in_symbols.h" #include "compiler/parse_table.h" #include "compiler/build_tables/lex_conflict_manager.h" @@ -36,7 +37,7 @@ describe("LexConflictManager::resolve(new_action, old_action)", []() { it("adds the preferred token as a possible homonym for the discarded one", [&]() { conflict_manager.resolve(AcceptTokenAction(sym2, 1, false), AcceptTokenAction(sym1, 2, false)); - AssertThat(conflict_manager.possible_homonyms[sym2], Contains(sym1)); + AssertThat(conflict_manager.possible_homonyms[sym2.index], Contains(sym1.index)); }); }); @@ -78,7 +79,7 @@ describe("LexConflictManager::resolve(new_action, old_action)", []() { it("adds the in-progress tokens as possible extensions of the discarded token", [&]() { conflict_manager.resolve(item_set, AdvanceAction(1, { 1, 2 }, true), AcceptTokenAction(sym3, 3, true)); - AssertThat(conflict_manager.possible_extensions[sym3], Contains(sym4)); + AssertThat(conflict_manager.possible_extensions[sym3.index], Contains(sym4.index)); }); }); }); diff --git a/spec/fixtures/error_corpus/c_errors.txt b/spec/fixtures/error_corpus/c_errors.txt index 4dfb0894..05bc7ec2 100644 --- a/spec/fixtures/error_corpus/c_errors.txt +++ b/spec/fixtures/error_corpus/c_errors.txt @@ -127,6 +127,5 @@ int b() { (ERROR (identifier) (identifier)) (identifier) (number_literal))) (declaration - (ERROR (identifier) (identifier)) (identifier) - (init_declarator (identifier) (number_literal)))))) + (init_declarator (ERROR (identifier) (identifier)) (identifier) (number_literal)))))) diff --git a/spec/fixtures/error_corpus/javascript_errors.txt b/spec/fixtures/error_corpus/javascript_errors.txt index 0c5e976e..c308e443 100644 --- a/spec/fixtures/error_corpus/javascript_errors.txt +++ b/spec/fixtures/error_corpus/javascript_errors.txt @@ -14,10 +14,8 @@ e f; (ERROR (identifier)) (identifier) (statement_block - (ERROR (identifier)) - (expression_statement (identifier)))) - (ERROR (identifier)) - (expression_statement (identifier))) + (expression_statement (ERROR (identifier)) (identifier)))) + (expression_statement (ERROR (identifier)) (identifier))) ======================================================= multiple invalid tokens right after the viable prefix @@ -35,8 +33,7 @@ h i j k; (ERROR (identifier) (identifier)) (identifier) (statement_block - (ERROR (identifier) (identifier) (identifier)) - (expression_statement (identifier)))) + (expression_statement (ERROR (identifier) (identifier) (identifier)) (identifier)))) (expression_statement (ERROR (identifier) (identifier) (identifier)) (identifier))) diff --git a/spec/helpers/stream_methods.cc b/spec/helpers/stream_methods.cc index 514d6181..69483ed3 100644 --- a/spec/helpers/stream_methods.cc +++ b/spec/helpers/stream_methods.cc @@ -75,21 +75,15 @@ ostream &operator<<(ostream &stream, const ParseAction &action) { } } +ostream &operator<<(ostream &stream, const ParseTableEntry &entry) { + return stream << entry.actions; +} + ostream &operator<<(ostream &stream, const ParseState &state) { - stream << string("# {"); - for (auto &action : entry.second.actions) { - stream << string(" ") << action; - } - stream << string("}"); - started = true; - } - stream << string(">"); - return stream; + stream << string("#"); } ostream &operator<<(ostream &stream, const ProductionStep &step) { diff --git a/spec/runtime/tree_spec.cc b/spec/runtime/tree_spec.cc index 83bb67a5..9f451829 100644 --- a/spec/runtime/tree_spec.cc +++ b/spec/runtime/tree_spec.cc @@ -23,7 +23,7 @@ void assert_consistent(const Tree *tree) { START_TEST enum { - cat = ts_builtin_sym_start, + cat = 1, dog, eel, fox, diff --git a/src/compiler/build_tables/build_lex_table.cc b/src/compiler/build_tables/build_lex_table.cc index 56de23cf..0b75c368 100644 --- a/src/compiler/build_tables/build_lex_table.cc +++ b/src/compiler/build_tables/build_lex_table.cc @@ -114,14 +114,11 @@ class LexTableBuilder { void mark_fragile_tokens() { for (ParseState &state : parse_table->states) { - for (auto &entry : state.entries) { - if (!entry.first.is_token) - continue; - + for (auto &entry : state.terminal_entries) { auto homonyms = conflict_manager.possible_homonyms.find(entry.first); if (homonyms != conflict_manager.possible_homonyms.end()) - for (const Symbol &homonym : homonyms->second) - if (state.entries.count(homonym)) { + for (int homonym : homonyms->second) + if (state.terminal_entries.count(homonym)) { entry.second.reusable = false; break; } @@ -131,8 +128,8 @@ class LexTableBuilder { auto extensions = conflict_manager.possible_extensions.find(entry.first); if (extensions != conflict_manager.possible_extensions.end()) - for (const Symbol &extension : extensions->second) - if (state.entries.count(extension)) { + for (int extension : extensions->second) + if (state.terminal_entries.count(extension)) { entry.second.depends_on_lookahead = true; break; } @@ -147,7 +144,7 @@ class LexTableBuilder { } auto replacements = - remove_duplicate_states(&lex_table); + remove_duplicate_states(&lex_table); for (ParseState &parse_state : parse_table->states) { auto replacement = replacements.find(parse_state.lex_state_id); diff --git a/src/compiler/build_tables/build_parse_table.cc b/src/compiler/build_tables/build_parse_table.cc index 829d8e93..8e69e228 100644 --- a/src/compiler/build_tables/build_parse_table.cc +++ b/src/compiler/build_tables/build_parse_table.cc @@ -116,17 +116,16 @@ class ParseTableBuilder { } for (const Symbol &symbol : grammar.extra_tokens) { - if (!error_state.entries.count(symbol)) { - error_state.entries[symbol].actions.push_back(ParseAction::ShiftExtra()); + if (!error_state.terminal_entries.count(symbol.index)) { + error_state.terminal_entries[symbol.index].actions.push_back(ParseAction::ShiftExtra()); } } for (size_t i = 0; i < grammar.variables.size(); i++) { - Symbol symbol(i, false); - add_out_of_context_parse_state(&error_state, symbol); + add_out_of_context_parse_state(&error_state, Symbol(i, false)); } - error_state.entries[END_OF_INPUT()].actions.push_back(ParseAction::Recover(0)); + error_state.terminal_entries[END_OF_INPUT().index].actions.push_back(ParseAction::Recover(0)); parse_table.states[0] = error_state; } @@ -135,7 +134,11 @@ class ParseTableBuilder { const ParseItemSet &item_set = recovery_states[symbol]; if (!item_set.entries.empty()) { ParseStateId state = add_parse_state(item_set); - error_state->entries[symbol].actions.push_back(ParseAction::Recover(state)); + if (symbol.is_token) { + error_state->terminal_entries[symbol.index].actions.assign({ ParseAction::Recover(state) }); + } else { + error_state->nonterminal_entries[symbol.index] = state; + } } } @@ -158,14 +161,19 @@ class ParseTableBuilder { const ParseItemSet &next_item_set = transition.second.first; const PrecedenceRange &precedence = transition.second.second; - ParseAction *new_action = add_action( - state_id, symbol, ParseAction::Shift(0, precedence), item_set); - - if (!allow_any_conflict) + if (!allow_any_conflict) { recovery_states[symbol].add(next_item_set); + } - if (new_action) - new_action->state_index = add_parse_state(next_item_set); + if (symbol.is_token) { + ParseAction *new_action = add_terminal_action( + state_id, symbol, ParseAction::Shift(0, precedence), item_set); + if (new_action) { + new_action->state_index = add_parse_state(next_item_set); + } + } else { + parse_table.set_nonterminal_action(state_id, symbol.index, add_parse_state(next_item_set)); + } } } @@ -185,8 +193,9 @@ class ParseTableBuilder { status.associativity, *item.production); } - for (const auto &lookahead_sym : *lookahead_symbols.entries) - add_action(state_id, lookahead_sym, action, item_set); + for (const Symbol &lookahead : *lookahead_symbols.entries) { + add_terminal_action(state_id, lookahead, action, item_set); + } } } } @@ -195,24 +204,25 @@ class ParseTableBuilder { ParseAction action = ParseAction::ShiftExtra(); ParseState &state = parse_table.states[state_id]; for (const Symbol &extra_symbol : grammar.extra_tokens) - if (!state.entries.count(extra_symbol) || state.has_shift_action() || - allow_any_conflict) - parse_table.add_action(state_id, extra_symbol, action); + if (!state.terminal_entries.count(extra_symbol.index) || + state.has_shift_action() || allow_any_conflict) + parse_table.add_terminal_action(state_id, extra_symbol.index, action); } void mark_fragile_actions() { for (ParseState &state : parse_table.states) { set symbols_with_multiple_actions; - for (auto &entry : state.entries) { - const Symbol &symbol = entry.first; + for (auto &entry : state.terminal_entries) { + const Symbol symbol(entry.first, true); auto &actions = entry.second.actions; - if (actions.size() > 1) + if (actions.size() > 1) { symbols_with_multiple_actions.insert(symbol); + } for (ParseAction &action : actions) { - if (action.type == ParseActionTypeReduce && !action.extra) { + if (action.type == ParseActionTypeReduce) { if (has_fragile_production(action.production)) action.fragile = true; @@ -231,21 +241,8 @@ class ParseTableBuilder { break; } } - if (!erased) + if (!erased) { ++i; - } - } - - if (!symbols_with_multiple_actions.empty()) { - for (auto &entry : state.entries) { - if (!entry.first.is_token) { - set first_set = get_first_set(entry.first); - for (const Symbol &symbol : symbols_with_multiple_actions) { - if (first_set.count(symbol)) { - entry.second.reusable = false; - break; - } - } } } } @@ -253,33 +250,34 @@ class ParseTableBuilder { } void remove_duplicate_parse_states() { - remove_duplicate_states(&parse_table); + remove_duplicate_states(&parse_table); } - ParseAction *add_action(ParseStateId state_id, Symbol lookahead, - const ParseAction &new_action, - const ParseItemSet &item_set) { + ParseAction *add_terminal_action(ParseStateId state_id, Symbol lookahead, + const ParseAction &new_action, + const ParseItemSet &item_set) { const ParseState &state = parse_table.states[state_id]; - const auto ¤t_entry = state.entries.find(lookahead); - if (current_entry == state.entries.end()) - return &parse_table.set_action(state_id, lookahead, new_action); + const auto ¤t_entry = state.terminal_entries.find(lookahead.index); + if (current_entry == state.terminal_entries.end()) + return &parse_table.set_terminal_action(state_id, lookahead.index, new_action); if (allow_any_conflict) - return &parse_table.add_action(state_id, lookahead, new_action); + return &parse_table.add_terminal_action(state_id, lookahead.index, new_action); const ParseAction old_action = current_entry->second.actions[0]; auto resolution = conflict_manager.resolve(new_action, old_action); switch (resolution.second) { case ConflictTypeNone: - if (resolution.first) - return &parse_table.set_action(state_id, lookahead, new_action); + if (resolution.first) { + return &parse_table.set_terminal_action(state_id, lookahead.index, new_action); + } break; case ConflictTypeResolved: { if (resolution.first) { if (old_action.type == ParseActionTypeReduce) fragile_productions.insert(old_action.production); - return &parse_table.set_action(state_id, lookahead, new_action); + return &parse_table.set_terminal_action(state_id, lookahead.index, new_action); } else { if (new_action.type == ParseActionTypeReduce) fragile_productions.insert(new_action.production); @@ -293,7 +291,7 @@ class ParseTableBuilder { fragile_productions.insert(old_action.production); if (new_action.type == ParseActionTypeReduce) fragile_productions.insert(new_action.production); - return &parse_table.add_action(state_id, lookahead, new_action); + return &parse_table.add_terminal_action(state_id, lookahead.index, new_action); } break; } diff --git a/src/compiler/build_tables/lex_conflict_manager.cc b/src/compiler/build_tables/lex_conflict_manager.cc index b89228d4..3fc22ed2 100644 --- a/src/compiler/build_tables/lex_conflict_manager.cc +++ b/src/compiler/build_tables/lex_conflict_manager.cc @@ -14,7 +14,7 @@ bool LexConflictManager::resolve(const LexItemSet &item_set, return true; if (new_action.precedence_range.max >= old_action.precedence) { for (const LexItem &item : item_set.entries) - possible_extensions[old_action.symbol].insert(item.lhs); + possible_extensions[old_action.symbol.index].insert(item.lhs.index); return true; } else { return false; @@ -44,9 +44,9 @@ bool LexConflictManager::resolve(const AcceptTokenAction &new_action, result = false; if (result) - possible_homonyms[old_action.symbol].insert(new_action.symbol); + possible_homonyms[old_action.symbol.index].insert(new_action.symbol.index); else - possible_homonyms[new_action.symbol].insert(old_action.symbol); + possible_homonyms[new_action.symbol.index].insert(old_action.symbol.index); return result; } diff --git a/src/compiler/build_tables/lex_conflict_manager.h b/src/compiler/build_tables/lex_conflict_manager.h index 8fb0f075..9777dc36 100644 --- a/src/compiler/build_tables/lex_conflict_manager.h +++ b/src/compiler/build_tables/lex_conflict_manager.h @@ -21,8 +21,8 @@ class LexConflictManager { const AcceptTokenAction &); bool resolve(const AcceptTokenAction &, const AcceptTokenAction &); - std::map> possible_homonyms; - std::map> possible_extensions; + std::map> possible_homonyms; + std::map> possible_extensions; }; } // namespace build_tables diff --git a/src/compiler/build_tables/remove_duplicate_states.h b/src/compiler/build_tables/remove_duplicate_states.h index 601737a5..a154c05a 100644 --- a/src/compiler/build_tables/remove_duplicate_states.h +++ b/src/compiler/build_tables/remove_duplicate_states.h @@ -7,7 +7,7 @@ namespace tree_sitter { namespace build_tables { -template +template std::map remove_duplicate_states(TableType *table) { std::map replacements; @@ -46,10 +46,10 @@ std::map remove_duplicate_states(TableType *table) { } for (auto &state : table->states) - state.each_advance_action([&new_replacements](ActionType *action) { - auto new_replacement = new_replacements.find(action->state_index); + state.each_referenced_state([&new_replacements](int64_t *state_index) { + auto new_replacement = new_replacements.find(*state_index); if (new_replacement != new_replacements.end()) - action->state_index = new_replacement->second; + *state_index = new_replacement->second; }); for (auto i = duplicates.rbegin(); i != duplicates.rend(); ++i) diff --git a/src/compiler/generate_code/c_code.cc b/src/compiler/generate_code/c_code.cc index 65244fdf..78a8c707 100644 --- a/src/compiler/generate_code/c_code.cc +++ b/src/compiler/generate_code/c_code.cc @@ -115,6 +115,7 @@ class CCodeGenerator { void add_state_and_symbol_counts() { line("#define STATE_COUNT " + to_string(parse_table.states.size())); line("#define SYMBOL_COUNT " + to_string(parse_table.symbols.size())); + line("#define TOKEN_COUNT " + to_string(lexical_grammar.variables.size() + 1)); line(); } @@ -222,10 +223,15 @@ class CCodeGenerator { for (const auto &state : parse_table.states) { line("[" + to_string(state_id++) + "] = {"); indent([&]() { - for (const auto &entry : state.entries) { - line("[" + symbol_id(entry.first) + "] = "); + for (const auto &entry : state.nonterminal_entries) { + line("[" + symbol_id(rules::Symbol(entry.first)) + "] = STATE("); + add(to_string(entry.second)); + add("),"); + } + for (const auto &entry : state.terminal_entries) { + line("[" + symbol_id(rules::Symbol(entry.first, true)) + "] = ACTIONS("); add(to_string(add_parse_action_list_id(entry.second))); - add(","); + add("),"); } }); line("},"); diff --git a/src/compiler/lex_table.cc b/src/compiler/lex_table.cc index 852586e5..8f8d2ded 100644 --- a/src/compiler/lex_table.cc +++ b/src/compiler/lex_table.cc @@ -57,9 +57,9 @@ bool LexState::operator==(const LexState &other) const { is_token_start == other.is_token_start; } -void LexState::each_advance_action(function fn) { +void LexState::each_referenced_state(function fn) { for (auto &entry : advance_actions) - fn(&entry.second); + fn(&entry.second.state_index); } LexStateId LexTable::add_state() { diff --git a/src/compiler/lex_table.h b/src/compiler/lex_table.h index d508e9da..ac7357a1 100644 --- a/src/compiler/lex_table.h +++ b/src/compiler/lex_table.h @@ -11,6 +11,8 @@ namespace tree_sitter { +typedef int64_t LexStateId; + typedef enum { LexActionTypeError, LexActionTypeAccept, @@ -24,7 +26,7 @@ struct AdvanceAction { bool operator==(const AdvanceAction &other) const; - size_t state_index; + LexStateId state_index; PrecedenceRange precedence_range; bool in_main_token; }; @@ -52,15 +54,13 @@ class LexState { LexState(); std::set expected_inputs() const; bool operator==(const LexState &) const; - void each_advance_action(std::function); + void each_referenced_state(std::function); std::map advance_actions; AcceptTokenAction accept_action; bool is_token_start; }; -typedef int64_t LexStateId; - class LexTable { public: LexStateId add_state(); diff --git a/src/compiler/parse_table.cc b/src/compiler/parse_table.cc index ef0e235d..47218d36 100644 --- a/src/compiler/parse_table.cc +++ b/src/compiler/parse_table.cc @@ -125,29 +125,34 @@ bool ParseTableEntry::operator==(const ParseTableEntry &other) const { ParseState::ParseState() : lex_state_id(-1) {} bool ParseState::has_shift_action() const { - for (const auto &pair : entries) + for (const auto &pair : terminal_entries) if (pair.second.actions.size() > 0 && pair.second.actions.back().type == ParseActionTypeShift) return true; - return false; + return (!nonterminal_entries.empty()); } set ParseState::expected_inputs() const { set result; - for (auto &entry : entries) - result.insert(entry.first); + for (auto &entry : terminal_entries) + result.insert(Symbol(entry.first, true)); + for (auto &entry : nonterminal_entries) + result.insert(Symbol(entry.first, false)); return result; } -void ParseState::each_advance_action(function fn) { - for (auto &entry : entries) +void ParseState::each_referenced_state(function fn) { + for (auto &entry : terminal_entries) for (ParseAction &action : entry.second.actions) if (action.type == ParseActionTypeShift || ParseActionTypeRecover) - fn(&action); + fn(&action.state_index); + for (auto &entry : nonterminal_entries) + fn(&entry.second); } bool ParseState::operator==(const ParseState &other) const { - return entries == other.entries; + return terminal_entries == other.terminal_entries && + nonterminal_entries == other.nonterminal_entries; } set ParseTable::all_symbols() const { @@ -162,35 +167,34 @@ ParseStateId ParseTable::add_state() { return states.size() - 1; } -ParseAction &ParseTable::set_action(ParseStateId id, Symbol symbol, - ParseAction action) { - if (action.type == ParseActionTypeShift && action.extra) - symbols[symbol].extra = true; - else - symbols[symbol].structural = true; - - states[id].entries[symbol].actions = { action }; - return *states[id].entries[symbol].actions.begin(); +ParseAction &ParseTable::set_terminal_action(ParseStateId state_id, int index, + ParseAction action) { + states[state_id].terminal_entries[index].actions.clear(); + return add_terminal_action(state_id, index, action); } -ParseAction &ParseTable::add_action(ParseStateId id, Symbol symbol, - ParseAction action) { +ParseAction &ParseTable::add_terminal_action(ParseStateId state_id, int index, + ParseAction action) { + Symbol symbol(index, true); if (action.type == ParseActionTypeShift && action.extra) symbols[symbol].extra = true; else symbols[symbol].structural = true; - ParseState &state = states[id]; - for (ParseAction &existing_action : state.entries[symbol].actions) - if (existing_action == action) - return existing_action; + ParseTableEntry &entry = states[state_id].terminal_entries[index]; + entry.actions.push_back(action); + return *entry.actions.rbegin(); +} - state.entries[symbol].actions.push_back(action); - return *state.entries[symbol].actions.rbegin(); +void ParseTable::set_nonterminal_action(ParseStateId state_id, int index, + ParseStateId next_state_id) { + Symbol symbol(index, false); + symbols[symbol].structural = true; + states[state_id].nonterminal_entries[index] = next_state_id; } static bool has_entry(const ParseState &state, const ParseTableEntry &entry) { - for (const auto &pair : state.entries) + for (const auto &pair : state.terminal_entries) if (pair.second == entry) return true; return false; @@ -200,13 +204,16 @@ bool ParseTable::merge_state(size_t i, size_t j) { ParseState &state = states[i]; ParseState &other = states[j]; - for (auto &entry : state.entries) { - const Symbol &symbol = entry.first; + if (state.nonterminal_entries != other.nonterminal_entries) + return false; + + for (auto &entry : state.terminal_entries) { + Symbol symbol(entry.first, true); const vector &actions = entry.second.actions; - const auto &other_entry = other.entries.find(symbol); - if (other_entry == other.entries.end()) { - if (mergeable_symbols.count(symbol) == 0 && !symbol.is_built_in() && symbol.is_token) + const auto &other_entry = other.terminal_entries.find(symbol.index); + if (other_entry == other.terminal_entries.end()) { + if (mergeable_symbols.count(symbol) == 0 && !symbol.is_built_in()) return false; if (actions.back().type != ParseActionTypeReduce) return false; @@ -219,12 +226,12 @@ bool ParseTable::merge_state(size_t i, size_t j) { set symbols_to_merge; - for (auto &entry : other.entries) { - const Symbol &symbol = entry.first; + for (auto &entry : other.terminal_entries) { + Symbol symbol(entry.first, true); const vector &actions = entry.second.actions; - if (!state.entries.count(symbol)) { - if (mergeable_symbols.count(symbol) == 0 && !symbol.is_built_in() && symbol.is_token) + if (!state.terminal_entries.count(symbol.index)) { + if (mergeable_symbols.count(symbol) == 0 && !symbol.is_built_in()) return false; if (actions.back().type != ParseActionTypeReduce) return false; @@ -235,7 +242,7 @@ bool ParseTable::merge_state(size_t i, size_t j) { } for (const Symbol &symbol : symbols_to_merge) - state.entries[symbol] = other.entries.find(symbol)->second; + state.terminal_entries[symbol.index] = other.terminal_entries.find(symbol.index)->second; return true; } diff --git a/src/compiler/parse_table.h b/src/compiler/parse_table.h index cf1b2a1b..6c883c26 100644 --- a/src/compiler/parse_table.h +++ b/src/compiler/parse_table.h @@ -1,6 +1,7 @@ #ifndef COMPILER_PARSE_TABLE_H_ #define COMPILER_PARSE_TABLE_H_ +#include #include #include #include @@ -13,7 +14,7 @@ namespace tree_sitter { -typedef uint64_t ParseStateId; +typedef int64_t ParseStateId; enum ParseActionType { ParseActionTypeError, @@ -72,10 +73,11 @@ class ParseState { std::set expected_inputs() const; bool operator==(const ParseState &) const; bool merge(const ParseState &); - void each_advance_action(std::function); + void each_referenced_state(std::function); bool has_shift_action() const; - std::map entries; + std::map terminal_entries; + std::map nonterminal_entries; LexStateId lex_state_id; }; @@ -88,10 +90,9 @@ class ParseTable { public: std::set all_symbols() const; ParseStateId add_state(); - ParseAction &set_action(ParseStateId state_id, rules::Symbol symbol, - ParseAction action); - ParseAction &add_action(ParseStateId state_id, rules::Symbol symbol, - ParseAction action); + ParseAction &add_terminal_action(ParseStateId state_id, int, ParseAction); + ParseAction &set_terminal_action(ParseStateId state_id, int index, ParseAction); + void set_nonterminal_action(ParseStateId state_id, int index, ParseStateId); bool merge_state(size_t i, size_t j); std::vector states; diff --git a/src/compiler/rules/symbol.cc b/src/compiler/rules/symbol.cc index cdfb78cf..697a3465 100644 --- a/src/compiler/rules/symbol.cc +++ b/src/compiler/rules/symbol.cc @@ -37,9 +37,9 @@ string Symbol::to_string() const { } bool Symbol::operator<(const Symbol &other) const { - if (!is_token && other.is_token) - return true; if (is_token && !other.is_token) + return true; + if (!is_token && other.is_token) return false; return (index < other.index); } diff --git a/src/runtime/language.c b/src/runtime/language.c index 0bc4ae7e..78ce0a7f 100644 --- a/src/runtime/language.c +++ b/src/runtime/language.c @@ -19,6 +19,7 @@ void ts_language_table_entry(const TSLanguage *self, TSStateId state, } action_index = 0; } else { + assert(symbol < self->token_count); action_index = self->parse_table[state * self->symbol_count + symbol]; } diff --git a/src/runtime/language.h b/src/runtime/language.h index 3941d875..7aefeed9 100644 --- a/src/runtime/language.h +++ b/src/runtime/language.h @@ -40,6 +40,23 @@ static inline const TSParseAction *ts_language_last_action( return NULL; } +static inline TSStateId ts_language_next_state(const TSLanguage *self, + TSStateId state, + TSSymbol symbol) { + if (symbol == ts_builtin_sym_error) { + return 0; + } else if (symbol < self->token_count) { + const TSParseAction *action = ts_language_last_action(self, state, symbol); + if (action && (action->type == TSParseActionTypeShift || action->type == TSParseActionTypeRecover)) { + return action->params.to_state; + } else { + return 0; + } + } else { + return self->parse_table[state * self->symbol_count + symbol]; + } +} + static inline bool ts_language_is_reusable(const TSLanguage *self, TSStateId state, TSSymbol symbol) { TableEntry entry; diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 04556ebb..e5b6f517 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -87,11 +87,7 @@ static bool parser__breakdown_top_of_stack(Parser *self, StackVersion version) { if (child->symbol == ts_builtin_sym_error) { state = ERROR_STATE; } else if (!child->extra) { - const TSParseAction *action = - ts_language_last_action(self->language, state, child->symbol); - assert(action && (action->type == TSParseActionTypeShift || - action->type == TSParseActionTypeRecover)); - state = action->params.to_state; + state = ts_language_next_state(self->language, state, child->symbol); } ts_stack_push(self->stack, slice.version, child, pending, state); @@ -486,13 +482,8 @@ static Reduction parser__reduce(Parser *self, StackVersion version, parent->parse_state = state; } - const TSParseAction *action = - ts_language_last_action(language, state, symbol); - assert(action->type == TSParseActionTypeShift || - action->type == TSParseActionTypeRecover); - - if (action->type == TSParseActionTypeRecover && child_count > 1 && - allow_skipping) { + TSStateId next_state = ts_language_next_state(language, state, symbol); + if (state == ERROR_STATE && allow_skipping) { StackVersion other_version = ts_stack_duplicate_version(self->stack, slice.version); @@ -508,10 +499,10 @@ static Reduction parser__reduce(Parser *self, StackVersion version, ts_stack_remove_version(self->stack, other_version); } - parser__push(self, slice.version, parent, action->params.to_state); + parser__push(self, slice.version, parent, next_state); for (size_t j = parent->child_count; j < slice.trees.size; j++) { Tree *tree = slice.trees.contents[j]; - parser__push(self, slice.version, tree, action->params.to_state); + parser__push(self, slice.version, tree, next_state); } } @@ -540,26 +531,24 @@ static inline const TSParseAction *parser__reductions_after_sequence( if (child_count == tree_count_below) break; Tree *tree = trees_below->contents[trees_below->size - 1 - i]; - const TSParseAction *action = - ts_language_last_action(self->language, state, tree->symbol); - if (!action || action->type != TSParseActionTypeShift) + TSStateId next_state = ts_language_next_state(self->language, state, tree->symbol); + if (next_state == ERROR_STATE) return NULL; - if (action->extra || tree->extra) - continue; - child_count++; - state = action->params.to_state; + if (next_state != state) { + child_count++; + state = next_state; + } } for (size_t i = 0; i < trees_above->size; i++) { Tree *tree = trees_above->contents[i]; - const TSParseAction *action = - ts_language_last_action(self->language, state, tree->symbol); - if (!action || action->type != TSParseActionTypeShift) + TSStateId next_state = ts_language_next_state(self->language, state, tree->symbol); + if (next_state == ERROR_STATE) return NULL; - if (action->extra || tree->extra) - continue; - child_count++; - state = action->params.to_state; + if (next_state != state) { + child_count++; + state = next_state; + } } const TSParseAction *actions = @@ -610,15 +599,9 @@ static StackIterateAction parser__error_repair_callback( continue; } - const TSParseAction *repair_symbol_action = - ts_language_last_action(self->language, state, repair->symbol); - if (!repair_symbol_action || - repair_symbol_action->type != TSParseActionTypeShift) - continue; - - TSStateId state_after_repair = repair_symbol_action->params.to_state; - if (!ts_language_last_action(self->language, state_after_repair, - lookahead_symbol)) + TSStateId state_after_repair = ts_language_next_state(self->language, state, repair->symbol); + if (state == ERROR_STATE || state_after_repair == ERROR_STATE || + !ts_language_last_action(self->language, state_after_repair, lookahead_symbol)) continue; if (count_needed_below_error != last_repair_count) { @@ -795,7 +778,7 @@ static bool parser__do_potential_reductions( size_t previous_version_count = ts_stack_version_count(self->stack); array_clear(&self->reduce_actions); - for (TSSymbol symbol = 0; symbol < self->language->symbol_count; symbol++) { + for (TSSymbol symbol = 0; symbol < self->language->token_count; symbol++) { TableEntry entry; ts_language_table_entry(self->language, state, symbol, &entry); for (size_t i = 0; i < entry.action_count; i++) { @@ -915,6 +898,9 @@ static void parser__handle_error(Parser *self, StackVersion version, ts_stack_push(self->stack, version, NULL, false, ERROR_STATE); while (ts_stack_version_count(self->stack) > previous_version_count) { ts_stack_push(self->stack, previous_version_count, NULL, false, ERROR_STATE); + + LOG_STACK(); + assert(ts_stack_merge(self->stack, version, previous_version_count)); } } @@ -982,6 +968,17 @@ static void parser__advance(Parser *self, StackVersion version, switch (action.type) { case TSParseActionTypeShift: { + bool extra = action.extra; + TSStateId next_state; + + if (action.extra) { + next_state = state; + LOG("shift_extra"); + } else { + next_state = action.params.to_state; + LOG("shift state:%u", next_state); + } + if (lookahead->child_count > 0) { if (parser__breakdown_lookahead(self, &lookahead, state, reusable_node)) { @@ -992,20 +989,10 @@ static void parser__advance(Parser *self, StackVersion version, } } - action = *ts_language_last_action(self->language, state, - lookahead->symbol); + next_state = ts_language_next_state(self->language, state, lookahead->symbol); } - TSStateId next_state; - if (action.extra) { - next_state = state; - LOG("shift_extra"); - } else { - next_state = action.params.to_state; - LOG("shift state:%u", next_state); - } - - parser__shift(self, version, next_state, lookahead, action.extra); + parser__shift(self, version, next_state, lookahead, extra); if (lookahead == reusable_node->tree) parser__pop_reusable_node(reusable_node); From a89f8c086b61ad08dd7bbb8122b443ad11fcddc0 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 14 Nov 2016 09:31:32 -0800 Subject: [PATCH 3/6] Remove stray #include --- src/compiler/parse_table.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/compiler/parse_table.h b/src/compiler/parse_table.h index 6c883c26..ec4f5271 100644 --- a/src/compiler/parse_table.h +++ b/src/compiler/parse_table.h @@ -1,7 +1,6 @@ #ifndef COMPILER_PARSE_TABLE_H_ #define COMPILER_PARSE_TABLE_H_ -#include #include #include #include From 1fddb124b31421a03b45b38487f720cf3dfb2c29 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 14 Nov 2016 09:32:05 -0800 Subject: [PATCH 4/6] Remove stray LOG_STACK() call --- src/runtime/parser.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/runtime/parser.c b/src/runtime/parser.c index e5b6f517..8ee9be7b 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -898,9 +898,6 @@ static void parser__handle_error(Parser *self, StackVersion version, ts_stack_push(self->stack, version, NULL, false, ERROR_STATE); while (ts_stack_version_count(self->stack) > previous_version_count) { ts_stack_push(self->stack, previous_version_count, NULL, false, ERROR_STATE); - - LOG_STACK(); - assert(ts_stack_merge(self->stack, version, previous_version_count)); } } From 1118a9142ab522179ab0261d33dbb8802e685421 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 14 Nov 2016 10:25:26 -0800 Subject: [PATCH 5/6] Introduce Symbol::Index type alias --- .../build_tables/distinctive_tokens_spec.cc | 4 +- .../parse_item_set_builder_spec.cc | 35 +++++++++++------ spec/compiler/build_tables/parse_item_spec.cc | 14 +++---- src/compiler/build_tables/build_lex_table.cc | 4 +- .../build_tables/build_parse_table.cc | 38 +++++++++---------- .../build_tables/lex_conflict_manager.h | 4 +- src/compiler/build_tables/lookahead_set.cc | 12 +++--- src/compiler/build_tables/lookahead_set.h | 8 ++-- src/compiler/build_tables/parse_item.cc | 4 +- .../build_tables/parse_item_set_builder.cc | 26 ++++++++----- .../build_tables/parse_item_set_builder.h | 4 +- src/compiler/build_tables/recovery_tokens.cc | 6 +-- src/compiler/build_tables/recovery_tokens.h | 2 +- src/compiler/parse_table.cc | 31 ++++++++------- src/compiler/parse_table.h | 6 +-- src/compiler/rules/symbol.cc | 12 ++++-- src/compiler/rules/symbol.h | 10 +++-- src/runtime/parser.c | 7 ++-- 18 files changed, 130 insertions(+), 97 deletions(-) diff --git a/spec/compiler/build_tables/distinctive_tokens_spec.cc b/spec/compiler/build_tables/distinctive_tokens_spec.cc index c5d197b3..104cd721 100644 --- a/spec/compiler/build_tables/distinctive_tokens_spec.cc +++ b/spec/compiler/build_tables/distinctive_tokens_spec.cc @@ -27,9 +27,7 @@ describe("recovery_tokens(rule)", []() { })), }; - AssertThat(recovery_tokens(grammar), Equals>({ - Symbol(1, true), - })); + AssertThat(recovery_tokens(grammar), Equals>({ 1 })); }); }); diff --git a/spec/compiler/build_tables/parse_item_set_builder_spec.cc b/spec/compiler/build_tables/parse_item_set_builder_spec.cc index 5e387e51..a1dd2231 100644 --- a/spec/compiler/build_tables/parse_item_set_builder_spec.cc +++ b/spec/compiler/build_tables/parse_item_set_builder_spec.cc @@ -1,8 +1,10 @@ #include "spec_helper.h" #include "compiler/syntax_grammar.h" +#include "compiler/lexical_grammar.h" #include "compiler/build_tables/parse_item_set_builder.h" #include "compiler/build_tables/lookahead_set.h" #include "compiler/rules/built_in_symbols.h" +#include "helpers/rule_helpers.h" using namespace build_tables; using namespace rules; @@ -10,6 +12,17 @@ using namespace rules; START_TEST describe("ParseItemSetBuilder", []() { + vector lexical_variables; + for (size_t i = 0; i < 20; i++) { + lexical_variables.push_back(Variable{ + "token_" + to_string(i), + VariableTypeNamed, + blank(), + }); + } + + LexicalGrammar lexical_grammar{lexical_variables, {}}; + it("adds items at the beginnings of referenced rules", [&]() { SyntaxGrammar grammar{{ SyntaxVariable("rule0", VariableTypeNamed, { @@ -42,29 +55,29 @@ describe("ParseItemSetBuilder", []() { ParseItemSet item_set({ { ParseItem(Symbol(0), production(0, 0), 0), - LookaheadSet({ Symbol(10, true) }), + LookaheadSet({ 10 }), } }); - ParseItemSetBuilder item_set_builder(grammar); + ParseItemSetBuilder item_set_builder(grammar, lexical_grammar); item_set_builder.apply_transitive_closure(&item_set); AssertThat(item_set, Equals(ParseItemSet({ { ParseItem(Symbol(0), production(0, 0), 0), - LookaheadSet({ Symbol(10, true) }) + LookaheadSet({ 10 }) }, { ParseItem(Symbol(1), production(1, 0), 0), - LookaheadSet({ Symbol(11, true) }) + LookaheadSet({ 11 }) }, { ParseItem(Symbol(1), production(1, 1), 0), - LookaheadSet({ Symbol(11, true) }) + LookaheadSet({ 11 }) }, { ParseItem(Symbol(2), production(2, 0), 0), - LookaheadSet({ Symbol(11, true) }) + LookaheadSet({ 11 }) }, }))); }); @@ -93,25 +106,25 @@ describe("ParseItemSetBuilder", []() { ParseItemSet item_set({ { ParseItem(Symbol(0), production(0, 0), 0), - LookaheadSet({ Symbol(10, true) }), + LookaheadSet({ 10 }), } }); - ParseItemSetBuilder item_set_builder(grammar); + ParseItemSetBuilder item_set_builder(grammar, lexical_grammar); item_set_builder.apply_transitive_closure(&item_set); AssertThat(item_set, Equals(ParseItemSet({ { ParseItem(Symbol(0), production(0, 0), 0), - LookaheadSet({ Symbol(10, true) }) + LookaheadSet({ 10 }) }, { ParseItem(Symbol(1), production(1, 0), 0), - LookaheadSet({ Symbol(11, true) }) + LookaheadSet({ 11 }) }, { ParseItem(Symbol(1), production(1, 1), 0), - LookaheadSet({ Symbol(11, true) }) + LookaheadSet({ 11 }) }, }))); }); diff --git a/spec/compiler/build_tables/parse_item_spec.cc b/spec/compiler/build_tables/parse_item_spec.cc index 51c3e231..83c9121a 100644 --- a/spec/compiler/build_tables/parse_item_spec.cc +++ b/spec/compiler/build_tables/parse_item_spec.cc @@ -91,25 +91,25 @@ describe("ParseItemSet::transitions())", [&]() { // Two symbols into the first production for rule_0 { ParseItem(Symbol(0), production(0, 0), 2), - LookaheadSet({ Symbol(21, true) }) + LookaheadSet({ 21 }) }, // Two symbols into the second production for rule_0 { ParseItem(Symbol(0), production(0, 1), 2), - LookaheadSet({ Symbol(21, true) }) + LookaheadSet({ 21 }) }, // At the beginning of the first production for rule_1 { ParseItem(Symbol(1), production(1, 0), 0), - LookaheadSet({ Symbol(22, true) }) + LookaheadSet({ 22 }) }, // At the end of the first production for rule_2 { ParseItem(Symbol(2), production(2, 0), 1), - LookaheadSet({ Symbol(22, true) }) + LookaheadSet({ 22 }) } }); @@ -122,7 +122,7 @@ describe("ParseItemSet::transitions())", [&]() { ParseItemSet({ { ParseItem(Symbol(0), production(0, 0), 3), - LookaheadSet({ Symbol(21, true) }) + LookaheadSet({ 21 }) } }), PrecedenceRange(5, 5) @@ -137,11 +137,11 @@ describe("ParseItemSet::transitions())", [&]() { ParseItemSet({ { ParseItem(Symbol(0), production(0, 1), 3), - LookaheadSet({ Symbol(21, true) }) + LookaheadSet({ 21 }) }, { ParseItem(Symbol(1), production(1, 0), 1), - LookaheadSet({ Symbol(22, true) }) + LookaheadSet({ 22 }) }, }), PrecedenceRange(6, 7) diff --git a/src/compiler/build_tables/build_lex_table.cc b/src/compiler/build_tables/build_lex_table.cc index 0b75c368..94100349 100644 --- a/src/compiler/build_tables/build_lex_table.cc +++ b/src/compiler/build_tables/build_lex_table.cc @@ -117,7 +117,7 @@ class LexTableBuilder { for (auto &entry : state.terminal_entries) { auto homonyms = conflict_manager.possible_homonyms.find(entry.first); if (homonyms != conflict_manager.possible_homonyms.end()) - for (int homonym : homonyms->second) + for (Symbol::Index homonym : homonyms->second) if (state.terminal_entries.count(homonym)) { entry.second.reusable = false; break; @@ -128,7 +128,7 @@ class LexTableBuilder { auto extensions = conflict_manager.possible_extensions.find(entry.first); if (extensions != conflict_manager.possible_extensions.end()) - for (int extension : extensions->second) + for (Symbol::Index extension : extensions->second) if (state.terminal_entries.count(extension)) { entry.second.depends_on_lookahead = true; break; diff --git a/src/compiler/build_tables/build_parse_table.cc b/src/compiler/build_tables/build_parse_table.cc index 8e69e228..a8d38973 100644 --- a/src/compiler/build_tables/build_parse_table.cc +++ b/src/compiler/build_tables/build_parse_table.cc @@ -1,4 +1,4 @@ -#include "compiler/build_tables/build_parse_table.h" + #include "compiler/build_tables/build_parse_table.h" #include #include #include @@ -49,7 +49,7 @@ class ParseTableBuilder { const LexicalGrammar &lex_grammar) : grammar(grammar), lexical_grammar(lex_grammar), - item_set_builder(grammar), + item_set_builder(grammar, lex_grammar), allow_any_conflict(false) {} pair build() { @@ -64,7 +64,7 @@ class ParseTableBuilder { add_parse_state(ParseItemSet({ { ParseItem(rules::START(), start_production, 0), - LookaheadSet({ END_OF_INPUT() }), + LookaheadSet({ END_OF_INPUT().index }), }, })); @@ -111,8 +111,8 @@ class ParseTableBuilder { void build_error_parse_state() { ParseState error_state; - for (const Symbol &symbol : parse_table.mergeable_symbols) { - add_out_of_context_parse_state(&error_state, symbol); + for (const Symbol::Index index : parse_table.mergeable_symbols) { + add_out_of_context_parse_state(&error_state, Symbol(index, true)); } for (const Symbol &symbol : grammar.extra_tokens) { @@ -167,7 +167,7 @@ class ParseTableBuilder { if (symbol.is_token) { ParseAction *new_action = add_terminal_action( - state_id, symbol, ParseAction::Shift(0, precedence), item_set); + state_id, symbol.index, ParseAction::Shift(0, precedence), item_set); if (new_action) { new_action->state_index = add_parse_state(next_item_set); } @@ -193,7 +193,7 @@ class ParseTableBuilder { status.associativity, *item.production); } - for (const Symbol &lookahead : *lookahead_symbols.entries) { + for (const Symbol::Index lookahead : *lookahead_symbols.entries) { add_terminal_action(state_id, lookahead, action, item_set); } } @@ -253,15 +253,15 @@ class ParseTableBuilder { remove_duplicate_states(&parse_table); } - ParseAction *add_terminal_action(ParseStateId state_id, Symbol lookahead, + ParseAction *add_terminal_action(ParseStateId state_id, Symbol::Index lookahead, const ParseAction &new_action, const ParseItemSet &item_set) { const ParseState &state = parse_table.states[state_id]; - const auto ¤t_entry = state.terminal_entries.find(lookahead.index); + const auto ¤t_entry = state.terminal_entries.find(lookahead); if (current_entry == state.terminal_entries.end()) - return &parse_table.set_terminal_action(state_id, lookahead.index, new_action); + return &parse_table.set_terminal_action(state_id, lookahead, new_action); if (allow_any_conflict) - return &parse_table.add_terminal_action(state_id, lookahead.index, new_action); + return &parse_table.add_terminal_action(state_id, lookahead, new_action); const ParseAction old_action = current_entry->second.actions[0]; auto resolution = conflict_manager.resolve(new_action, old_action); @@ -269,7 +269,7 @@ class ParseTableBuilder { switch (resolution.second) { case ConflictTypeNone: if (resolution.first) { - return &parse_table.set_terminal_action(state_id, lookahead.index, new_action); + return &parse_table.set_terminal_action(state_id, lookahead, new_action); } break; @@ -277,7 +277,7 @@ class ParseTableBuilder { if (resolution.first) { if (old_action.type == ParseActionTypeReduce) fragile_productions.insert(old_action.production); - return &parse_table.set_terminal_action(state_id, lookahead.index, new_action); + return &parse_table.set_terminal_action(state_id, lookahead, new_action); } else { if (new_action.type == ParseActionTypeReduce) fragile_productions.insert(new_action.production); @@ -291,7 +291,7 @@ class ParseTableBuilder { fragile_productions.insert(old_action.production); if (new_action.type == ParseActionTypeReduce) fragile_productions.insert(new_action.production); - return &parse_table.add_terminal_action(state_id, lookahead.index, new_action); + return &parse_table.add_terminal_action(state_id, lookahead, new_action); } break; } @@ -301,7 +301,7 @@ class ParseTableBuilder { } bool handle_unresolved_conflict(const ParseItemSet &item_set, - const Symbol &lookahead) { + const Symbol::Index lookahead) { set involved_symbols; set reduce_items; set core_shift_items; @@ -319,12 +319,12 @@ class ParseTableBuilder { } } else { if (item.step_index > 0) { - set first_set = get_first_set(next_symbol); - if (first_set.count(lookahead)) { + LookaheadSet first_set = item_set_builder.get_first_set(next_symbol); + if (first_set.contains(lookahead)) { involved_symbols.insert(item.lhs()); core_shift_items.insert(item); } - } else if (next_symbol == lookahead) { + } else if (next_symbol.is_token && next_symbol.index == lookahead) { other_shift_items.insert(item); } } @@ -334,7 +334,7 @@ class ParseTableBuilder { if (involved_symbols == conflict_set) return true; - string description = "Lookahead symbol: " + symbol_name(lookahead) + "\n"; + string description = "Lookahead symbol: " + symbol_name(Symbol(lookahead, true)) + "\n"; if (!reduce_items.empty()) { description += "Reduce items:\n"; diff --git a/src/compiler/build_tables/lex_conflict_manager.h b/src/compiler/build_tables/lex_conflict_manager.h index 9777dc36..0d3177dd 100644 --- a/src/compiler/build_tables/lex_conflict_manager.h +++ b/src/compiler/build_tables/lex_conflict_manager.h @@ -21,8 +21,8 @@ class LexConflictManager { const AcceptTokenAction &); bool resolve(const AcceptTokenAction &, const AcceptTokenAction &); - std::map> possible_homonyms; - std::map> possible_extensions; + std::map> possible_homonyms; + std::map> possible_extensions; }; } // namespace build_tables diff --git a/src/compiler/build_tables/lookahead_set.cc b/src/compiler/build_tables/lookahead_set.cc index 239bc029..1ecb0baf 100644 --- a/src/compiler/build_tables/lookahead_set.cc +++ b/src/compiler/build_tables/lookahead_set.cc @@ -12,8 +12,8 @@ using rules::Symbol; LookaheadSet::LookaheadSet() : entries(nullptr) {} -LookaheadSet::LookaheadSet(const set &symbols) - : entries(make_shared>(symbols)) {} +LookaheadSet::LookaheadSet(const set &symbols) + : entries(make_shared>(symbols)) {} bool LookaheadSet::empty() const { return !entries.get() || entries->empty(); @@ -23,7 +23,7 @@ bool LookaheadSet::operator==(const LookaheadSet &other) const { return *entries == *other.entries; } -bool LookaheadSet::contains(const Symbol &symbol) const { +bool LookaheadSet::contains(const Symbol::Index &symbol) const { return entries->find(symbol) != entries->end(); } @@ -31,15 +31,15 @@ bool LookaheadSet::insert_all(const LookaheadSet &other) { if (!other.entries.get()) return false; if (!entries.get()) - entries = make_shared>(); + entries = make_shared>(); size_t previous_size = entries->size(); entries->insert(other.entries->begin(), other.entries->end()); return entries->size() > previous_size; } -bool LookaheadSet::insert(const Symbol &symbol) { +bool LookaheadSet::insert(const Symbol::Index &symbol) { if (!entries.get()) - entries = make_shared>(); + entries = make_shared>(); return entries->insert(symbol).second; } diff --git a/src/compiler/build_tables/lookahead_set.h b/src/compiler/build_tables/lookahead_set.h index e62ee34d..fe99b4d5 100644 --- a/src/compiler/build_tables/lookahead_set.h +++ b/src/compiler/build_tables/lookahead_set.h @@ -11,15 +11,15 @@ namespace build_tables { class LookaheadSet { public: LookaheadSet(); - explicit LookaheadSet(const std::set &); + explicit LookaheadSet(const std::set &); bool empty() const; bool operator==(const LookaheadSet &) const; - bool contains(const rules::Symbol &) const; + bool contains(const rules::Symbol::Index &) const; bool insert_all(const LookaheadSet &); - bool insert(const rules::Symbol &); + bool insert(const rules::Symbol::Index &); - std::shared_ptr> entries; + std::shared_ptr> entries; }; } // namespace build_tables diff --git a/src/compiler/build_tables/parse_item.cc b/src/compiler/build_tables/parse_item.cc index 470200a6..5054e578 100644 --- a/src/compiler/build_tables/parse_item.cc +++ b/src/compiler/build_tables/parse_item.cc @@ -102,8 +102,8 @@ size_t ParseItemSet::Hash::operator()(const ParseItemSet &item_set) const { const LookaheadSet &lookahead_set = pair.second; result ^= hash()(lookahead_set.entries->size()); - for (auto &symbol : *pair.second.entries) - result ^= hash()(symbol); + for (Symbol::Index index : *pair.second.entries) + result ^= hash()(index); } return result; } diff --git a/src/compiler/build_tables/parse_item_set_builder.cc b/src/compiler/build_tables/parse_item_set_builder.cc index 93ffbf78..8259662f 100644 --- a/src/compiler/build_tables/parse_item_set_builder.cc +++ b/src/compiler/build_tables/parse_item_set_builder.cc @@ -3,6 +3,7 @@ #include #include #include "compiler/syntax_grammar.h" +#include "compiler/lexical_grammar.h" #include "compiler/rules/built_in_symbols.h" namespace tree_sitter { @@ -19,7 +20,8 @@ using std::make_shared; using rules::Symbol; using rules::NONE; -static map build_first_sets(const SyntaxGrammar &grammar) { +static map build_first_sets(const SyntaxGrammar &grammar, + const LexicalGrammar &lexical_grammar) { map result; vector symbol_stack; set processed_symbols; @@ -35,7 +37,7 @@ static map build_first_sets(const SyntaxGrammar &grammar) Symbol current_symbol = symbol_stack.back(); symbol_stack.pop_back(); if (current_symbol.is_token) { - first_set.insert(current_symbol); + first_set.insert(current_symbol.index); } else if (processed_symbols.insert(current_symbol).second) { for (const Production &production : grammar.productions(current_symbol)) { if (!production.empty()) { @@ -48,11 +50,17 @@ static map build_first_sets(const SyntaxGrammar &grammar) result.insert({symbol, first_set}); } + for (int i = 0; i < lexical_grammar.variables.size(); i++) { + Symbol symbol(i, true); + result.insert({symbol, LookaheadSet({ i })}); + } + return result; } -ParseItemSetBuilder::ParseItemSetBuilder(const SyntaxGrammar &grammar) : - grammar{&grammar}, first_sets{build_first_sets(grammar)} { +ParseItemSetBuilder::ParseItemSetBuilder(const SyntaxGrammar &grammar, + const LexicalGrammar &lexical_grammar) : + grammar{&grammar}, first_sets{build_first_sets(grammar, lexical_grammar)} { } void ParseItemSetBuilder::apply_transitive_closure(ParseItemSet *item_set) { @@ -88,11 +96,7 @@ void ParseItemSetBuilder::apply_transitive_closure(ParseItemSet *item_set) { next_lookahead_symbols = lookahead_symbols; } else { Symbol symbol_after_next = item.production->at(next_step).symbol; - if (symbol_after_next.is_token) { - next_lookahead_symbols.insert(symbol_after_next); - } else { - next_lookahead_symbols = first_sets.find(symbol_after_next)->second; - } + next_lookahead_symbols = first_sets.find(symbol_after_next)->second; } // Add each of the next symbol's productions to be processed recursively. @@ -105,5 +109,9 @@ void ParseItemSetBuilder::apply_transitive_closure(ParseItemSet *item_set) { } } +LookaheadSet ParseItemSetBuilder::get_first_set(rules::Symbol &symbol) const { + return first_sets.find(symbol)->second; +} + } // namespace build_tables } // namespace tree_sitter diff --git a/src/compiler/build_tables/parse_item_set_builder.h b/src/compiler/build_tables/parse_item_set_builder.h index 3d8eb5c6..8043437e 100644 --- a/src/compiler/build_tables/parse_item_set_builder.h +++ b/src/compiler/build_tables/parse_item_set_builder.h @@ -8,6 +8,7 @@ namespace tree_sitter { struct SyntaxGrammar; +struct LexicalGrammar; namespace build_tables { @@ -17,8 +18,9 @@ class ParseItemSetBuilder { std::vector> items_to_process; public: - ParseItemSetBuilder(const SyntaxGrammar &); + ParseItemSetBuilder(const SyntaxGrammar &, const LexicalGrammar &); void apply_transitive_closure(ParseItemSet *); + LookaheadSet get_first_set(rules::Symbol &) const; }; } // namespace build_tables diff --git a/src/compiler/build_tables/recovery_tokens.cc b/src/compiler/build_tables/recovery_tokens.cc index e8d96aad..479de6b8 100644 --- a/src/compiler/build_tables/recovery_tokens.cc +++ b/src/compiler/build_tables/recovery_tokens.cc @@ -47,8 +47,8 @@ class FirstCharacters : public CharacterAggregator {}; class LastCharacters : public CharacterAggregator {}; class AllCharacters : public CharacterAggregator {}; -set recovery_tokens(const LexicalGrammar &grammar) { - set result; +set recovery_tokens(const LexicalGrammar &grammar) { + set result; AllCharacters all_separator_characters; for (const rule_ptr &separator : grammar.separators) @@ -79,7 +79,7 @@ set recovery_tokens(const LexicalGrammar &grammar) { !all_characters.result.intersects(all_separator_characters.result); if ((has_distinct_start && has_distinct_end) || has_no_separators) - result.insert(Symbol(i, true)); + result.insert(i); } return result; diff --git a/src/compiler/build_tables/recovery_tokens.h b/src/compiler/build_tables/recovery_tokens.h index c97a8cfd..4873b5a9 100644 --- a/src/compiler/build_tables/recovery_tokens.h +++ b/src/compiler/build_tables/recovery_tokens.h @@ -11,7 +11,7 @@ struct LexicalGrammar; namespace build_tables { -std::set recovery_tokens(const LexicalGrammar &); +std::set recovery_tokens(const LexicalGrammar &); } // namespace build_tables } // namespace tree_sitter diff --git a/src/compiler/parse_table.cc b/src/compiler/parse_table.cc index 47218d36..944036a6 100644 --- a/src/compiler/parse_table.cc +++ b/src/compiler/parse_table.cc @@ -167,13 +167,15 @@ ParseStateId ParseTable::add_state() { return states.size() - 1; } -ParseAction &ParseTable::set_terminal_action(ParseStateId state_id, int index, - ParseAction action) { +ParseAction &ParseTable::set_terminal_action(ParseStateId state_id, + Symbol::Index index, + ParseAction action) { states[state_id].terminal_entries[index].actions.clear(); return add_terminal_action(state_id, index, action); } -ParseAction &ParseTable::add_terminal_action(ParseStateId state_id, int index, +ParseAction &ParseTable::add_terminal_action(ParseStateId state_id, + Symbol::Index index, ParseAction action) { Symbol symbol(index, true); if (action.type == ParseActionTypeShift && action.extra) @@ -186,7 +188,8 @@ ParseAction &ParseTable::add_terminal_action(ParseStateId state_id, int index, return *entry.actions.rbegin(); } -void ParseTable::set_nonterminal_action(ParseStateId state_id, int index, +void ParseTable::set_nonterminal_action(ParseStateId state_id, + Symbol::Index index, ParseStateId next_state_id) { Symbol symbol(index, false); symbols[symbol].structural = true; @@ -208,12 +211,12 @@ bool ParseTable::merge_state(size_t i, size_t j) { return false; for (auto &entry : state.terminal_entries) { - Symbol symbol(entry.first, true); + Symbol::Index index = entry.first; const vector &actions = entry.second.actions; - const auto &other_entry = other.terminal_entries.find(symbol.index); + const auto &other_entry = other.terminal_entries.find(index); if (other_entry == other.terminal_entries.end()) { - if (mergeable_symbols.count(symbol) == 0 && !symbol.is_built_in()) + if (mergeable_symbols.count(index) == 0 && !Symbol::is_built_in(index)) return false; if (actions.back().type != ParseActionTypeReduce) return false; @@ -224,25 +227,25 @@ bool ParseTable::merge_state(size_t i, size_t j) { } } - set symbols_to_merge; + set symbols_to_merge; for (auto &entry : other.terminal_entries) { - Symbol symbol(entry.first, true); + Symbol::Index index = entry.first; const vector &actions = entry.second.actions; - if (!state.terminal_entries.count(symbol.index)) { - if (mergeable_symbols.count(symbol) == 0 && !symbol.is_built_in()) + if (!state.terminal_entries.count(index)) { + if (mergeable_symbols.count(index) == 0 && !Symbol::is_built_in(index)) return false; if (actions.back().type != ParseActionTypeReduce) return false; if (!has_entry(state, entry.second)) return false; - symbols_to_merge.insert(symbol); + symbols_to_merge.insert(index); } } - for (const Symbol &symbol : symbols_to_merge) - state.terminal_entries[symbol.index] = other.terminal_entries.find(symbol.index)->second; + for (const Symbol::Index &index : symbols_to_merge) + state.terminal_entries[index] = other.terminal_entries.find(index)->second; return true; } diff --git a/src/compiler/parse_table.h b/src/compiler/parse_table.h index ec4f5271..5f660ecd 100644 --- a/src/compiler/parse_table.h +++ b/src/compiler/parse_table.h @@ -75,8 +75,8 @@ class ParseState { void each_referenced_state(std::function); bool has_shift_action() const; - std::map terminal_entries; - std::map nonterminal_entries; + std::map terminal_entries; + std::map nonterminal_entries; LexStateId lex_state_id; }; @@ -97,7 +97,7 @@ class ParseTable { std::vector states; std::map symbols; - std::set mergeable_symbols; + std::set mergeable_symbols; }; } // namespace tree_sitter diff --git a/src/compiler/rules/symbol.cc b/src/compiler/rules/symbol.cc index 697a3465..96c4bd60 100644 --- a/src/compiler/rules/symbol.cc +++ b/src/compiler/rules/symbol.cc @@ -10,9 +10,9 @@ using std::string; using std::to_string; using std::hash; -Symbol::Symbol(int index) : index(index), is_token(false) {} +Symbol::Symbol(Symbol::Index index) : index(index), is_token(false) {} -Symbol::Symbol(int index, bool is_token) : index(index), is_token(is_token) {} +Symbol::Symbol(Symbol::Index index, bool is_token) : index(index), is_token(is_token) {} bool Symbol::operator==(const Symbol &other) const { return (other.index == index) && (other.is_token == is_token); @@ -24,7 +24,7 @@ bool Symbol::operator==(const Rule &rule) const { } size_t Symbol::hash_code() const { - return hash()(index) ^ hash()(is_token); + return hash()(index) ^ hash()(is_token); } rule_ptr Symbol::copy() const { @@ -44,10 +44,14 @@ bool Symbol::operator<(const Symbol &other) const { return (index < other.index); } -bool Symbol::is_built_in() const { +bool Symbol::is_built_in(Symbol::Index index) { return index < 0; } +bool Symbol::is_built_in() const { + return is_built_in(index); +} + void Symbol::accept(Visitor *visitor) const { visitor->visit(this); } diff --git a/src/compiler/rules/symbol.h b/src/compiler/rules/symbol.h index 81d74d85..4ae9ece3 100644 --- a/src/compiler/rules/symbol.h +++ b/src/compiler/rules/symbol.h @@ -9,8 +9,11 @@ namespace rules { class Symbol : public Rule { public: - explicit Symbol(int index); - Symbol(int index, bool is_token); + typedef int Index; + + + explicit Symbol(Index index); + Symbol(Index index, bool is_token); bool operator==(const Symbol &other) const; bool operator==(const Rule &other) const; @@ -21,9 +24,10 @@ class Symbol : public Rule { void accept(Visitor *visitor) const; bool operator<(const Symbol &other) const; + static bool is_built_in(Index); bool is_built_in() const; - int index; + Index index; bool is_token; }; diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 8ee9be7b..a9c9e5bb 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -840,9 +840,10 @@ static StackIterateAction parser__repair_consumed_error_callback( SkipPrecedingTokensSession *session = payload; Parser *self = session->parser; TSSymbol lookahead_symbol = session->lookahead_symbol; - const TSParseAction *action = - ts_language_last_action(self->language, state, lookahead_symbol); - if (action && action->type == TSParseActionTypeReduce) { + size_t action_count; + const TSParseAction *actions = + ts_language_actions(self->language, state, lookahead_symbol, &action_count); + if (action_count > 0 && actions[0].type == TSParseActionTypeReduce) { return StackIteratePop | StackIterateStop; } } From 8edb8df5300b5d67c1e8a100c5dbc580499fdbed Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 14 Nov 2016 10:35:33 -0800 Subject: [PATCH 6/6] Remove extraneous Language methods --- src/runtime/language.h | 37 ++++++++++--------------------------- src/runtime/parser.c | 13 ++++++------- 2 files changed, 16 insertions(+), 34 deletions(-) diff --git a/src/runtime/language.h b/src/runtime/language.h index 7aefeed9..2ecf83fb 100644 --- a/src/runtime/language.h +++ b/src/runtime/language.h @@ -15,10 +15,9 @@ typedef struct { bool depends_on_lookahead; } TableEntry; -void ts_language_table_entry(const TSLanguage *, TSStateId, TSSymbol, - TableEntry *); +void ts_language_table_entry(const TSLanguage *, TSStateId, TSSymbol, TableEntry *); -bool ts_language_symbol_is_in_progress(const TSLanguage *, TSStateId, TSSymbol); +TSSymbolMetadata ts_language_symbol_metadata(const TSLanguage *, TSSymbol); static inline const TSParseAction *ts_language_actions(const TSLanguage *self, TSStateId state, @@ -30,42 +29,26 @@ static inline const TSParseAction *ts_language_actions(const TSLanguage *self, return entry.actions; } -static inline const TSParseAction *ts_language_last_action( - const TSLanguage *self, TSStateId state, TSSymbol symbol) { - TableEntry entry; - ts_language_table_entry(self, state, symbol, &entry); - if (entry.action_count) - return &entry.actions[entry.action_count - 1]; - else - return NULL; -} - static inline TSStateId ts_language_next_state(const TSLanguage *self, TSStateId state, TSSymbol symbol) { if (symbol == ts_builtin_sym_error) { return 0; } else if (symbol < self->token_count) { - const TSParseAction *action = ts_language_last_action(self, state, symbol); - if (action && (action->type == TSParseActionTypeShift || action->type == TSParseActionTypeRecover)) { - return action->params.to_state; - } else { - return 0; + size_t count; + const TSParseAction *actions = ts_language_actions(self, state, symbol, &count); + if (count > 0) { + TSParseAction action = actions[count - 1]; + if (action.type == TSParseActionTypeShift || action.type == TSParseActionTypeRecover) { + return action.params.to_state; + } } + return 0; } else { return self->parse_table[state * self->symbol_count + symbol]; } } -static inline bool ts_language_is_reusable(const TSLanguage *self, - TSStateId state, TSSymbol symbol) { - TableEntry entry; - ts_language_table_entry(self, state, symbol, &entry); - return entry.is_reusable; -} - -TSSymbolMetadata ts_language_symbol_metadata(const TSLanguage *, TSSymbol); - #ifdef __cplusplus } #endif diff --git a/src/runtime/parser.c b/src/runtime/parser.c index a9c9e5bb..7aee210f 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -600,8 +600,12 @@ static StackIterateAction parser__error_repair_callback( } TSStateId state_after_repair = ts_language_next_state(self->language, state, repair->symbol); - if (state == ERROR_STATE || state_after_repair == ERROR_STATE || - !ts_language_last_action(self->language, state_after_repair, lookahead_symbol)) + if (state == ERROR_STATE || state_after_repair == ERROR_STATE) + continue; + + size_t action_count; + ts_language_actions(self->language, state_after_repair, lookahead_symbol, &action_count); + if (action_count == 0) continue; if (count_needed_below_error != last_repair_count) { @@ -1030,7 +1034,6 @@ static void parser__advance(Parser *self, StackVersion version, LOG("accept"); parser__accept(self, version, lookahead); - ts_tree_release(lookahead); return; } @@ -1042,14 +1045,10 @@ static void parser__advance(Parser *self, StackVersion version, lookahead = reusable_node->tree; ts_tree_retain(lookahead); } - action = - *ts_language_last_action(self->language, state, lookahead->symbol); parser__recover(self, version, action.params.to_state, lookahead); - if (lookahead == reusable_node->tree) parser__pop_reusable_node(reusable_node); - ts_tree_release(lookahead); return; }