From 0e2bbbd7ee300c0357fd4d2b03958f4ddafc971f Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 29 Jun 2016 09:54:08 -0700 Subject: [PATCH 1/9] Compress parse table by allowing reductions w/ unexpected lookaheads --- src/compiler/build_tables/build_lex_table.cc | 2 +- .../build_tables/build_parse_table.cc | 10 ++-- .../build_tables/remove_duplicate_states.h | 14 +++--- src/compiler/lex_table.cc | 4 ++ src/compiler/lex_table.h | 4 +- src/compiler/parse_table.cc | 47 +++++++++++++++++++ src/compiler/parse_table.h | 7 +++ 7 files changed, 76 insertions(+), 12 deletions(-) diff --git a/src/compiler/build_tables/build_lex_table.cc b/src/compiler/build_tables/build_lex_table.cc index 93de59f0..56de23cf 100644 --- a/src/compiler/build_tables/build_lex_table.cc +++ b/src/compiler/build_tables/build_lex_table.cc @@ -147,7 +147,7 @@ class LexTableBuilder { } auto replacements = - remove_duplicate_states(&lex_table.states); + remove_duplicate_states(&lex_table); for (ParseState &parse_state : parse_table->states) { auto replacement = replacements.find(parse_state.lex_state_id); diff --git a/src/compiler/build_tables/build_parse_table.cc b/src/compiler/build_tables/build_parse_table.cc index 3ef8e6ea..d027e1e8 100644 --- a/src/compiler/build_tables/build_parse_table.cc +++ b/src/compiler/build_tables/build_parse_table.cc @@ -70,6 +70,11 @@ class ParseTableBuilder { if (error.type != TSCompileErrorTypeNone) return { parse_table, error }; + for (const ParseState &state : parse_table.states) + for (const auto &pair1 : state.entries) + for (const auto &pair2 : state.entries) + parse_table.symbols[pair1.first].compatible_symbols.insert(pair2.first); + build_error_parse_state(); allow_any_conflict = true; @@ -134,8 +139,7 @@ class ParseTableBuilder { const ParseItemSet &item_set = recovery_states[symbol]; if (!item_set.entries.empty()) { ParseStateId state = add_parse_state(item_set); - error_state->entries[symbol].actions.push_back( - ParseAction::Recover(state)); + error_state->entries[symbol].actions.push_back(ParseAction::Recover(state)); } } @@ -268,7 +272,7 @@ class ParseTableBuilder { } void remove_duplicate_parse_states() { - remove_duplicate_states(&parse_table.states); + remove_duplicate_states(&parse_table); } ParseAction *add_action(ParseStateId state_id, Symbol lookahead, diff --git a/src/compiler/build_tables/remove_duplicate_states.h b/src/compiler/build_tables/remove_duplicate_states.h index b70bb351..601737a5 100644 --- a/src/compiler/build_tables/remove_duplicate_states.h +++ b/src/compiler/build_tables/remove_duplicate_states.h @@ -7,15 +7,15 @@ namespace tree_sitter { namespace build_tables { -template -std::map remove_duplicate_states(std::vector *states) { +template +std::map remove_duplicate_states(TableType *table) { std::map replacements; while (true) { std::map duplicates; - for (size_t i = 0, size = states->size(); i < size; i++) + for (size_t i = 0, size = table->states.size(); i < size; i++) for (size_t j = 0; j < i; j++) - if (states->at(i) == states->at(j)) { + if (!duplicates.count(j) && table->merge_state(j, i)) { duplicates.insert({ i, j }); break; } @@ -24,7 +24,7 @@ std::map remove_duplicate_states(std::vector *states) break; std::map new_replacements; - for (size_t i = 0, size = states->size(); i < size; i++) { + for (size_t i = 0, size = table->states.size(); i < size; i++) { size_t new_state_index = i; auto duplicate = duplicates.find(i); if (duplicate != duplicates.end()) @@ -45,7 +45,7 @@ std::map remove_duplicate_states(std::vector *states) replacement.second = new_state_index; } - for (StateType &state : *states) + for (auto &state : table->states) state.each_advance_action([&new_replacements](ActionType *action) { auto new_replacement = new_replacements.find(action->state_index); if (new_replacement != new_replacements.end()) @@ -53,7 +53,7 @@ std::map remove_duplicate_states(std::vector *states) }); for (auto i = duplicates.rbegin(); i != duplicates.rend(); ++i) - states->erase(states->begin() + i->first); + table->states.erase(table->states.begin() + i->first); } return replacements; diff --git a/src/compiler/lex_table.cc b/src/compiler/lex_table.cc index 946fd712..852586e5 100644 --- a/src/compiler/lex_table.cc +++ b/src/compiler/lex_table.cc @@ -71,4 +71,8 @@ LexState &LexTable::state(LexStateId id) { return states[id]; } +bool LexTable::merge_state(size_t i, size_t j) { + return states[i] == states[j]; +} + } // namespace tree_sitter diff --git a/src/compiler/lex_table.h b/src/compiler/lex_table.h index f5f8b4ce..d508e9da 100644 --- a/src/compiler/lex_table.h +++ b/src/compiler/lex_table.h @@ -22,7 +22,7 @@ struct AdvanceAction { AdvanceAction(); AdvanceAction(size_t, PrecedenceRange, bool); - bool operator==(const AdvanceAction &action) const; + bool operator==(const AdvanceAction &other) const; size_t state_index; PrecedenceRange precedence_range; @@ -66,6 +66,8 @@ class LexTable { LexStateId add_state(); LexState &state(LexStateId state_id); std::vector states; + + bool merge_state(size_t i, size_t j); }; } // namespace tree_sitter diff --git a/src/compiler/parse_table.cc b/src/compiler/parse_table.cc index cd2a549a..6efdac28 100644 --- a/src/compiler/parse_table.cc +++ b/src/compiler/parse_table.cc @@ -190,4 +190,51 @@ ParseAction &ParseTable::add_action(ParseStateId id, Symbol symbol, return *state.entries[symbol].actions.rbegin(); } +static bool has_entry(const ParseState &state, const ParseTableEntry &entry) { + for (const auto &pair : state.entries) + if (pair.second == entry) + return true; + return false; +} + +bool ParseTable::merge_state(size_t i, size_t j) { + ParseState &state = states[i]; + ParseState &other = states[j]; + + for (auto &entry : state.entries) { + const Symbol &symbol = entry.first; + const vector &actions = entry.second.actions; + + const auto &other_entry = other.entries.find(symbol); + if (other_entry == other.entries.end()) { + if (actions.back().type != ParseActionTypeReduce) + return false; + if (!has_entry(other, entry.second)) + return false; + } else if (entry.second != other_entry->second) { + return false; + } + } + + set symbols_to_merge; + + for (auto &entry : other.entries) { + const Symbol &symbol = entry.first; + const vector &actions = entry.second.actions; + + if (!state.entries.count(symbol)) { + if (actions.back().type != ParseActionTypeReduce) + return false; + if (!has_entry(state, entry.second)) + return false; + symbols_to_merge.insert(symbol); + } + } + + for (const Symbol &symbol : symbols_to_merge) + state.entries[symbol] = other.entries.find(symbol)->second; + + return true; +} + } // namespace tree_sitter diff --git a/src/compiler/parse_table.h b/src/compiler/parse_table.h index 4ce13bd5..4ffcb273 100644 --- a/src/compiler/parse_table.h +++ b/src/compiler/parse_table.h @@ -61,6 +61,10 @@ struct ParseTableEntry { ParseTableEntry(); ParseTableEntry(const std::vector &, bool, bool); bool operator==(const ParseTableEntry &other) const; + + inline bool operator!=(const ParseTableEntry &other) const { + return !operator==(other); + } }; class ParseState { @@ -68,6 +72,7 @@ class ParseState { ParseState(); std::set expected_inputs() const; bool operator==(const ParseState &) const; + bool merge(const ParseState &); void each_advance_action(std::function); std::map entries; @@ -77,6 +82,7 @@ class ParseState { struct ParseTableSymbolMetadata { bool extra; bool structural; + std::set compatible_symbols; }; class ParseTable { @@ -87,6 +93,7 @@ class ParseTable { ParseAction action); ParseAction &add_action(ParseStateId state_id, rules::Symbol symbol, ParseAction action); + bool merge_state(size_t i, size_t j); std::vector states; std::map symbols; From c3a242740bb3d91ac3e17a5fb5f0d7abad92ee72 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 1 Jul 2016 15:08:19 -0700 Subject: [PATCH 2/9] Allow lookahead to be broken down further after performing reductions --- spec/runtime/parser_spec.cc | 2 +- src/runtime/parser.c | 252 ++++++++++++++++++++++-------------- src/runtime/parser.h | 8 ++ 3 files changed, 164 insertions(+), 98 deletions(-) diff --git a/spec/runtime/parser_spec.cc b/spec/runtime/parser_spec.cc index ff6b05fe..888a229c 100644 --- a/spec/runtime/parser_spec.cc +++ b/spec/runtime/parser_spec.cc @@ -224,7 +224,7 @@ describe("Parser", [&]() { "(identifier) " "(math_op (number) (identifier)))))"); - insert_text(strlen("x ^ (100 + abc"), ".d"); + insert_text(strlen("x * (100 + abc"), ".d"); assert_root_node( "(program (expression_statement (math_op " diff --git a/src/runtime/parser.c b/src/runtime/parser.c index d102c682..1dba1876 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -49,11 +49,6 @@ static const unsigned ERROR_COST_THRESHOLD = 3; -typedef struct { - TSTree *tree; - size_t char_index; -} ReusableNode; - typedef struct { TSParser *parser; TSSymbol lookahead_symbol; @@ -112,8 +107,6 @@ static BreakdownResult ts_parser__breakdown_top_of_stack(TSParser *self, TSStateId state = ts_stack_top_state(self->stack, slice.version); TSTree *parent = *array_front(&slice.trees); - LOG("breakdown_top_of_stack tree:%s", SYM_NAME(parent->symbol)); - for (size_t j = 0; j < parent->child_count; j++) { TSTree *child = parent->children[j]; pending = child->child_count > 0; @@ -136,6 +129,9 @@ static BreakdownResult ts_parser__breakdown_top_of_stack(TSParser *self, CHECK(ts_parser__push(self, slice.version, tree, state)); } + LOG("breakdown_top_of_stack tree:%s", SYM_NAME(parent->symbol)); + LOG_STACK(); + ts_tree_release(parent); array_delete(&slice.trees); } @@ -197,41 +193,31 @@ static bool ts_parser__condense_stack(TSParser *self) { return result; } -static bool ts_parser__can_reuse(TSParser *self, StackVersion version, - TSTree *tree) { - if (tree->symbol == ts_builtin_sym_error) { - LOG("cant_reuse_error tree:%s", SYM_NAME(tree->symbol)); - return false; - } - - if (tree->has_changes) { - LOG("cant_reuse_changed tree:%s", SYM_NAME(tree->symbol)); - return false; - } - - TSStateId state = ts_stack_top_state(self->stack, version); +static bool ts_parser__can_reuse(TSParser *self, TSStateId state, + TableEntry *table_entry, TSTree *tree) { if (tree->parse_state != state) { if (ts_tree_is_fragile(tree)) { - LOG("cant_reuse_fragile sym:%s", SYM_NAME(tree->symbol)); + LOG("cant_reuse_fragile sym:%s, size:%lu", SYM_NAME(tree->symbol), + tree->size.chars); return false; } - TableEntry entry; - ts_language_table_entry(self->language, state, tree->symbol, &entry); - - if (!entry.is_reusable) { - LOG("cant_reuse_ambiguous sym:%s", SYM_NAME(tree->symbol)); + if (!table_entry->is_reusable) { + LOG("cant_reuse_ambiguous sym:%s, size:%lu", SYM_NAME(tree->symbol), + tree->size.chars); return false; } - if (entry.action_count == 0) { - LOG("cant_reuse_unexpected sym:%s", SYM_NAME(tree->symbol)); + if (table_entry->action_count == 0) { + LOG("cant_reuse_unexpected sym:%s, size:%lu", SYM_NAME(tree->symbol), + tree->size.chars); return false; } - TSParseAction action = entry.actions[entry.action_count - 1]; + TSParseAction action = table_entry->actions[table_entry->action_count - 1]; if (tree->extra != action.extra) { - LOG("cant_reuse_extra sym:%s", SYM_NAME(tree->symbol)); + LOG("cant_reuse_extra sym:%s, size:%lu", SYM_NAME(tree->symbol), + tree->size.chars); return false; } @@ -243,24 +229,28 @@ static bool ts_parser__can_reuse(TSParser *self, StackVersion version, &leaf_entry); if (!leaf_entry.is_reusable) { - LOG("cant_reuse_first_leaf sym:%s, leaf_sym:%s", - SYM_NAME(tree->symbol), SYM_NAME(tree->first_leaf.symbol)); + LOG("cant_reuse_first_leaf sym:%s, leaf_sym:%s, size:%lu", + SYM_NAME(tree->symbol), SYM_NAME(tree->first_leaf.symbol), + tree->size.chars); return false; } if (tree->child_count == 1 && leaf_entry.depends_on_lookahead) { - LOG("cant_reuse_lookahead_dependent sym:%s, leaf_sym:%s", - SYM_NAME(tree->symbol), SYM_NAME(tree->first_leaf.symbol)); + LOG("cant_reuse_lookahead_dependent sym:%s, leaf_sym:%s, size:%lu", + SYM_NAME(tree->symbol), SYM_NAME(tree->first_leaf.symbol), + tree->size.chars); return false; } - } else if (entry.depends_on_lookahead) { - LOG("cant_reuse_lookahead_dependent sym:%s, leaf_sym:%s", - SYM_NAME(tree->symbol), SYM_NAME(tree->first_leaf.symbol)); + } else if (table_entry->depends_on_lookahead) { + LOG("cant_reuse_lookahead_dependent sym:%s, leaf_sym:%s, size:%lu", + SYM_NAME(tree->symbol), SYM_NAME(tree->first_leaf.symbol), + tree->size.chars); return false; } } } + LOG("reuse sym:%s size:%lu", SYM_NAME(tree->symbol), tree->size.chars); return true; } @@ -298,22 +288,45 @@ static TSTree *ts_parser__lex(TSParser *self, TSStateId parse_state, return result; } +static void ts_parser__clear_cached_token(TSParser *self) { + ts_tree_release(self->cached_token); + self->cached_token = NULL; +} + static TSTree *ts_parser__get_lookahead(TSParser *self, StackVersion version, - ReusableNode *reusable_node) { + ReusableNode *reusable_node, + bool *is_fresh) { TSLength position = ts_stack_top_position(self->stack, version); while (reusable_node->tree) { if (reusable_node->char_index > position.chars) { + LOG("before_reusable sym:%s, pos:%lu", + SYM_NAME(reusable_node->tree->symbol), reusable_node->char_index); break; } if (reusable_node->char_index < position.chars) { - LOG("past_reusable sym:%s", SYM_NAME(reusable_node->tree->symbol)); + LOG("past_reusable sym:%s, pos:%lu", + SYM_NAME(reusable_node->tree->symbol), reusable_node->char_index); ts_parser__pop_reusable_node(reusable_node); continue; } - if (!ts_parser__can_reuse(self, version, reusable_node->tree)) { + if (reusable_node->tree->symbol == ts_builtin_sym_error) { + LOG("cant_reuse_error sym:%s, size:%lu", + SYM_NAME(reusable_node->tree->symbol), + reusable_node->tree->size.chars); + if (!ts_parser__breakdown_reusable_node(reusable_node)) { + ts_parser__pop_reusable_node(reusable_node); + CHECK(ts_parser__breakdown_top_of_stack(self, version)); + } + continue; + } + + if (reusable_node->tree->has_changes) { + LOG("cant_reuse_changed tree:%s, size:%lu", + SYM_NAME(reusable_node->tree->symbol), + reusable_node->tree->size.chars); if (!ts_parser__breakdown_reusable_node(reusable_node)) { ts_parser__pop_reusable_node(reusable_node); CHECK(ts_parser__breakdown_top_of_stack(self, version)); @@ -322,17 +335,20 @@ static TSTree *ts_parser__get_lookahead(TSParser *self, StackVersion version, } TSTree *result = reusable_node->tree; - TSLength size = ts_tree_total_size(result); - LOG("reuse sym:%s size:%lu extra:%d", SYM_NAME(result->symbol), size.chars, - result->extra); - ts_parser__pop_reusable_node(reusable_node); ts_tree_retain(result); + *is_fresh = false; return result; } + if (self->cached_token && position.chars == self->cached_token_char_index) { + ts_tree_retain(self->cached_token); + return self->cached_token; + } + ts_lexer_reset(&self->lexer, position); TSStateId parse_state = ts_stack_top_state(self->stack, version); bool error_mode = parse_state == TS_STATE_ERROR; + *is_fresh = true; return ts_parser__lex(self, parse_state, error_mode); error: @@ -784,7 +800,8 @@ static void ts_parser__start(TSParser *self, TSInput input, ts_lexer_set_input(&self->lexer, input); ts_stack_clear(self->stack); - + self->reusable_node = (ReusableNode){ previous_tree, 0 }; + self->cached_token = NULL; self->finished_tree = NULL; } @@ -835,8 +852,9 @@ error: } static bool ts_parser__handle_error(TSParser *self, StackVersion version, - TSStateId state, TSTree *lookahead) { + TSTree *lookahead) { size_t previous_version_count = ts_stack_version_count(self->stack); + TSStateId state = ts_stack_top_state(self->stack, version); unsigned error_cost = ts_stack_error_cost(self->stack, version); unsigned error_depth = ts_stack_error_depth(self->stack, version) + 1; @@ -944,19 +962,34 @@ error: return false; } -static bool ts_parser__consume_lookahead(TSParser *self, StackVersion version, - TSTree *lookahead) { +typedef enum { + ConsumeFailed, + ConsumeInvalid, + ConsumeSucceeded, +} ConsumeResult; + +static ConsumeResult ts_parser__consume_lookahead(TSParser *self, + StackVersion version, + TSTree *lookahead, + bool lookahead_is_fresh) { for (;;) { TSStateId state = ts_stack_top_state(self->stack, version); + + TableEntry entry; + ts_language_table_entry(self->language, state, lookahead->symbol, &entry); + + if (!lookahead_is_fresh && + !ts_parser__can_reuse(self, state, &entry, lookahead)) + return ConsumeInvalid; + + LOG("lookahead sym:%s, size:%lu", SYM_NAME(lookahead->symbol), + lookahead->size.chars); + bool reduction_stopped_at_error = false; StackVersion last_reduction_version = STACK_VERSION_NONE; - size_t action_count; - const TSParseAction *actions = ts_language_actions( - self->language, state, lookahead->symbol, &action_count); - - for (size_t i = 0; i < action_count; i++) { - TSParseAction action = actions[i]; + for (size_t i = 0; i < entry.action_count; i++) { + TSParseAction action = entry.actions[i]; switch (action.type) { case TSParseActionTypeShift: { @@ -971,7 +1004,7 @@ static bool ts_parser__consume_lookahead(TSParser *self, StackVersion version, CHECK(ts_parser__shift(self, version, next_state, lookahead, action.extra)); - return true; + return ConsumeSucceeded; } case TSParseActionTypeReduce: { @@ -999,7 +1032,8 @@ static bool ts_parser__consume_lookahead(TSParser *self, StackVersion version, case ReduceStoppedAtError: { reduction_stopped_at_error = true; switch (ts_parser__repair_error(self, reduction.slice, lookahead, - actions, action_count)) { + entry.actions, + entry.action_count)) { case RepairFailed: goto error; case RepairNoneFound: @@ -1020,20 +1054,58 @@ static bool ts_parser__consume_lookahead(TSParser *self, StackVersion version, LOG("accept"); CHECK(ts_parser__accept(self, version)); - return true; + return ConsumeSucceeded; } case TSParseActionTypeRecover: { CHECK(ts_parser__recover(self, version, action.to_state, lookahead)); - return true; + return ConsumeSucceeded; } } - - LOG_STACK(); } - if (last_reduction_version != STACK_VERSION_NONE) { - ts_stack_renumber_version(self->stack, last_reduction_version, version); + if (last_reduction_version == STACK_VERSION_NONE) + return ConsumeInvalid; + + ts_stack_renumber_version(self->stack, last_reduction_version, version); + LOG_STACK(); + } + +error: + return ConsumeFailed; +} + +static bool ts_parser__advance(TSParser *self, StackVersion version, + ReusableNode *reusable_node) { + TSTree *lookahead = NULL; + + for (;;) { + bool lookahead_is_fresh; + CHECK(lookahead = ts_parser__get_lookahead(self, version, reusable_node, + &lookahead_is_fresh)); + + switch (ts_parser__consume_lookahead(self, version, lookahead, + lookahead_is_fresh)) { + case ConsumeFailed: + goto error; + case ConsumeSucceeded: + if (lookahead == reusable_node->tree) + ts_parser__pop_reusable_node(reusable_node); + ts_tree_release(lookahead); + return true; + case ConsumeInvalid: + break; + } + + if (!lookahead_is_fresh) { + if (lookahead == reusable_node->tree) { + if (!ts_parser__breakdown_reusable_node(reusable_node)) + ts_parser__pop_reusable_node(reusable_node); + } else { + ts_parser__clear_cached_token(self); + } + + ts_tree_release(lookahead); continue; } @@ -1041,15 +1113,22 @@ static bool ts_parser__consume_lookahead(TSParser *self, StackVersion version, case BreakdownFailed: goto error; case BreakdownPerformed: - break; + ts_tree_release(lookahead); + continue; case BreakdownAborted: - CHECK(ts_parser__handle_error(self, version, state, lookahead)); - if (ts_stack_is_halted(self->stack, version)) - return true; + break; } + + CHECK(ts_parser__handle_error(self, version, lookahead)); + ts_tree_release(lookahead); + + if (ts_stack_is_halted(self->stack, version)) + return true; } error: + if (lookahead) + ts_tree_release(lookahead); return false; } @@ -1087,16 +1166,14 @@ void ts_parser_destroy(TSParser *self) { TSTree *ts_parser_parse(TSParser *self, TSInput input, TSTree *old_tree) { ts_parser__start(self, input, old_tree); - StackVersion version = 0; - size_t last_position = 0, position = 0; - ReusableNode reusable_node, current_reusable_node = { old_tree, 0 }; - for (;;) { - TSTree *lookahead = NULL; - size_t lookahead_position = 0; + StackVersion version = STACK_VERSION_NONE; + size_t position = 0, last_position = 0; + ReusableNode reusable_node; + do { for (version = 0; version < ts_stack_version_count(self->stack); version++) { - reusable_node = current_reusable_node; + reusable_node = self->reusable_node; last_position = position; while (!ts_stack_is_halted(self->stack, version)) { @@ -1111,44 +1188,25 @@ TSTree *ts_parser_parse(TSParser *self, TSInput input, TSTree *old_tree) { ts_stack_top_position(self->stack, version).rows + 1, ts_stack_top_position(self->stack, version).columns + 1); - if (!lookahead || (position != lookahead_position) || - !ts_parser__can_reuse(self, version, lookahead)) { - ts_tree_release(lookahead); - lookahead = ts_parser__get_lookahead(self, version, &reusable_node); - lookahead_position = position; - CHECK(lookahead); - } - - LOG("lookahead sym:%s, size:%lu", SYM_NAME(lookahead->symbol), - ts_tree_total_chars(lookahead)); - - if (!ts_parser__consume_lookahead(self, version, lookahead)) { - ts_tree_release(lookahead); - goto error; - } - + CHECK(ts_parser__advance(self, version, &reusable_node)); LOG_STACK(); } } - current_reusable_node = reusable_node; + self->reusable_node = reusable_node; if (ts_parser__condense_stack(self)) { LOG("condense"); LOG_STACK(); } - ts_tree_release(lookahead); - - if (version == 0) - break; - else - self->is_split = (version > 1); - } + self->is_split = (version > 1); + } while (version != 0); LOG("done"); LOG_TREE(); ts_stack_clear(self->stack); + ts_parser__clear_cached_token(self); ts_tree_assign_parents(self->finished_tree); return self->finished_tree; diff --git a/src/runtime/parser.h b/src/runtime/parser.h index 9a2f4015..9d827d39 100644 --- a/src/runtime/parser.h +++ b/src/runtime/parser.h @@ -9,6 +9,11 @@ extern "C" { #include "runtime/array.h" #include "runtime/reduce_action.h" +typedef struct { + TSTree *tree; + size_t char_index; +} ReusableNode; + typedef struct { TSLexer lexer; Stack *stack; @@ -18,6 +23,9 @@ typedef struct { bool is_split; bool print_debugging_graphs; TSTree scratch_tree; + TSTree *cached_token; + size_t cached_token_char_index; + ReusableNode reusable_node; } TSParser; bool ts_parser_init(TSParser *); From 285f2272fd43be01cfefd597c4190a6a3129b921 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Sun, 17 Jul 2016 06:22:05 -0700 Subject: [PATCH 3/9] Move random string helpers into a separate file --- spec/helpers/random_helpers.cc | 35 ++++++++++++++++++++++++++++++++ spec/helpers/random_helpers.h | 8 ++++++++ spec/integration/corpus_specs.cc | 32 +---------------------------- 3 files changed, 44 insertions(+), 31 deletions(-) create mode 100644 spec/helpers/random_helpers.cc create mode 100644 spec/helpers/random_helpers.h diff --git a/spec/helpers/random_helpers.cc b/spec/helpers/random_helpers.cc new file mode 100644 index 00000000..1ce26400 --- /dev/null +++ b/spec/helpers/random_helpers.cc @@ -0,0 +1,35 @@ +#include +#include + +using std::string; + +static string random_string(char min, char max) { + string result; + size_t length = random() % 12; + for (size_t i = 0; i < length; i++) { + char inserted_char = min + (random() % (max - min)); + result += inserted_char; + } + return result; +} + +static string random_char(string characters) { + size_t index = random() % characters.size(); + return string() + characters[index]; +} + +string random_words(size_t count) { + string result; + bool just_inserted_word = false; + for (size_t i = 0; i < count; i++) { + if (random() % 10 < 6) { + result += random_char("!(){}[]<>+-="); + } else { + if (just_inserted_word) + result += " "; + result += random_string('a', 'z'); + just_inserted_word = true; + } + } + return result; +} diff --git a/spec/helpers/random_helpers.h b/spec/helpers/random_helpers.h new file mode 100644 index 00000000..84aa1a23 --- /dev/null +++ b/spec/helpers/random_helpers.h @@ -0,0 +1,8 @@ +#ifndef HELPERS_RANDOM_HELPERS_H_ +#define HELPERS_RANDOM_HELPERS_H_ + +#include + +std::string random_words(size_t count); + +#endif // HELPERS_RANDOM_HELPERS_H_ diff --git a/spec/integration/corpus_specs.cc b/spec/integration/corpus_specs.cc index 96f399b8..c701e2e0 100644 --- a/spec/integration/corpus_specs.cc +++ b/spec/integration/corpus_specs.cc @@ -7,6 +7,7 @@ #include "helpers/point_helpers.h" #include "helpers/encoding_helpers.h" #include "helpers/record_alloc.h" +#include "helpers/random_helpers.h" #include static void expect_the_correct_tree(TSNode node, TSDocument *document, string tree_string) { @@ -60,37 +61,6 @@ static void expect_a_consistent_tree(TSNode node, TSDocument *document) { AssertThat(has_changes, Equals(some_child_has_changes)); } -static string random_string(char min, char max) { - string result; - size_t length = random() % 12; - for (size_t i = 0; i < length; i++) { - char inserted_char = min + (random() % (max - min)); - result += inserted_char; - } - return result; -} - -static string random_char(string characters) { - size_t index = random() % characters.size(); - return string() + characters[index]; -} - -static string random_words(size_t count) { - string result; - bool just_inserted_word = false; - for (size_t i = 0; i < count; i++) { - if (random() % 10 < 6) { - result += random_char("!(){}[]<>+-="); - } else { - if (just_inserted_word) - result += " "; - result += random_string('a', 'z'); - just_inserted_word = true; - } - } - return result; -} - START_TEST describe("The Corpus", []() { From fa8993460ececbe433d8a652f32b24bea9828320 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Sun, 17 Jul 2016 07:25:13 -0700 Subject: [PATCH 4/9] Don't reuse unexpected tokens for now --- src/compiler/generate_code/c_code.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/compiler/generate_code/c_code.cc b/src/compiler/generate_code/c_code.cc index af664fa4..79e164b1 100644 --- a/src/compiler/generate_code/c_code.cc +++ b/src/compiler/generate_code/c_code.cc @@ -211,7 +211,7 @@ class CCodeGenerator { } void add_parse_table() { - add_parse_action_list_id(ParseTableEntry{ {}, true, false }); + add_parse_action_list_id(ParseTableEntry{ {}, false, false }); size_t state_id = 0; line("#pragma GCC diagnostic push"); From 1c66d90203b7eb1471dcb3825ce3d42dea82d021 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Sun, 17 Jul 2016 08:16:03 -0700 Subject: [PATCH 5/9] Mark repeat symbols as anonymous --- src/compiler/generate_code/c_code.cc | 3 +++ src/runtime/node.c | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/compiler/generate_code/c_code.cc b/src/compiler/generate_code/c_code.cc index 79e164b1..f747332e 100644 --- a/src/compiler/generate_code/c_code.cc +++ b/src/compiler/generate_code/c_code.cc @@ -165,6 +165,9 @@ class CCodeGenerator { line(".named = false,"); break; case VariableTypeHidden: + line(".visible = false,"); + line(".named = true,"); + break; case VariableTypeAuxiliary: line(".visible = false,"); line(".named = false,"); diff --git a/src/runtime/node.c b/src/runtime/node.c index f0e59d36..ad3264d8 100644 --- a/src/runtime/node.c +++ b/src/runtime/node.c @@ -33,7 +33,7 @@ static inline size_t ts_node__offset_row(TSNode self) { static inline bool ts_node__is_relevant(TSNode self, bool include_anonymous) { const TSTree *tree = ts_node__tree(self); - return include_anonymous ? tree->visible : tree->named; + return include_anonymous ? tree->visible : tree->visible && tree->named; } static inline size_t ts_node__relevant_child_count(TSNode self, From 1faa7c60f93d665b3e094a85d560a4a8ad51f7c6 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Sun, 17 Jul 2016 08:17:01 -0700 Subject: [PATCH 6/9] Log stack after pruning halted versions --- src/runtime/parser.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 1dba1876..92466648 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -170,6 +170,7 @@ static bool ts_parser__condense_stack(TSParser *self) { bool result = false; for (StackVersion i = 0; i < ts_stack_version_count(self->stack); i++) { if (ts_stack_is_halted(self->stack, i)) { + result = true; ts_stack_remove_version(self->stack, i); i--; continue; From 0ee1994078c8a5e5cbb15454b4e2024a629d6166 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Sun, 17 Jul 2016 07:25:57 -0700 Subject: [PATCH 7/9] Don't have both shift and shift-extra actions in recovery states --- src/compiler/build_tables/build_parse_table.cc | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/compiler/build_tables/build_parse_table.cc b/src/compiler/build_tables/build_parse_table.cc index d027e1e8..def80f55 100644 --- a/src/compiler/build_tables/build_parse_table.cc +++ b/src/compiler/build_tables/build_parse_table.cc @@ -115,13 +115,12 @@ class ParseTableBuilder { void build_error_parse_state() { ParseState error_state; - for (const Symbol &symbol : recovery_tokens(lexical_grammar)) { + for (const Symbol &symbol : recovery_tokens(lexical_grammar)) add_out_of_context_parse_state(&error_state, symbol); - } - for (const Symbol &symbol : grammar.extra_tokens) { - error_state.entries[symbol].actions.push_back(ParseAction::ShiftExtra()); - } + for (const Symbol &symbol : grammar.extra_tokens) + if (!error_state.entries.count(symbol)) + error_state.entries[symbol].actions.push_back(ParseAction::ShiftExtra()); for (size_t i = 0; i < grammar.variables.size(); i++) { Symbol symbol(i, false); @@ -197,8 +196,13 @@ class ParseTableBuilder { void add_shift_extra_actions(ParseStateId state_id) { ParseAction action = ParseAction::ShiftExtra(); + ParseState &state = parse_table.states[state_id]; for (const Symbol &extra_symbol : grammar.extra_tokens) - add_action(state_id, extra_symbol, action, null_item_set); + if (!state.entries.count(extra_symbol) || + (allow_any_conflict && + state.entries[extra_symbol].actions.back().type == + ParseActionTypeReduce)) + parse_table.add_action(state_id, extra_symbol, action); } void add_reduce_extra_actions(ParseStateId state_id) { From 87ca3cb09948736d8b2f816f8d8a4e97b084de45 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Sun, 17 Jul 2016 13:35:43 -0700 Subject: [PATCH 8/9] Reuse nodes based on state matching, not sentential form validity I think that state matching is the only correct strategy for incremental node reuse that is compatible with the new error recovery algorithm. It's also simpler than the sentential-form algorithm. With the compressed parse tables, state matching shouldn't be too conservative of a test. --- spec/runtime/document_spec.cc | 2 +- spec/runtime/parser_spec.cc | 2 +- src/runtime/parser.c | 293 ++++++++++++++++------------------ 3 files changed, 137 insertions(+), 160 deletions(-) diff --git a/spec/runtime/document_spec.cc b/spec/runtime/document_spec.cc index 584eeb87..05ded944 100644 --- a/spec/runtime/document_spec.cc +++ b/spec/runtime/document_spec.cc @@ -173,7 +173,7 @@ describe("Document", [&]() { AssertThat(debugger->messages, Contains("new_parse")); AssertThat(debugger->messages, Contains("lookahead char:'['")); - AssertThat(debugger->messages, Contains("reduce sym:array, child_count:4, fragile:false")); + AssertThat(debugger->messages, Contains("reduce sym:array, child_count:4")); AssertThat(debugger->messages, Contains("accept")); }); diff --git a/spec/runtime/parser_spec.cc b/spec/runtime/parser_spec.cc index 888a229c..70b94674 100644 --- a/spec/runtime/parser_spec.cc +++ b/spec/runtime/parser_spec.cc @@ -231,7 +231,7 @@ describe("Parser", [&]() { "(identifier) " "(math_op (number) (member_access (identifier) (identifier))))))"); - AssertThat(input->strings_read, Equals(vector({ " abc.d);", "" }))); + AssertThat(input->strings_read, Equals(vector({ " + abc.d)", "" }))); }); }); diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 92466648..d11082c2 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -157,8 +157,7 @@ static void ts_parser__pop_reusable_node(ReusableNode *reusable_node) { } static bool ts_parser__breakdown_reusable_node(ReusableNode *reusable_node) { - if (reusable_node->tree->symbol == ts_builtin_sym_error || - reusable_node->tree->child_count == 0) { + if (reusable_node->tree->child_count == 0) { return false; } else { reusable_node->tree = reusable_node->tree->children[0]; @@ -166,6 +165,43 @@ static bool ts_parser__breakdown_reusable_node(ReusableNode *reusable_node) { } } +static bool ts_parser__breakdown_lookahead(TSParser *self, TSTree **lookahead, TSStateId state, ReusableNode *reusable_node) { + bool result = false; + while (reusable_node->tree->child_count > 0 && + (reusable_node->tree->parse_state != state || + reusable_node->tree->fragile_left || + reusable_node->tree->fragile_right)) { + LOG("state_mismatch sym:%s", SYM_NAME(reusable_node->tree->symbol)); + ts_parser__breakdown_reusable_node(reusable_node); + result = true; + } + + if (result) { + LOG("lookahead sym:%s", SYM_NAME(reusable_node->tree->symbol)); + ts_tree_release(*lookahead); + ts_tree_retain(*lookahead = reusable_node->tree); + } + + return result; +} + +static void ts_parser__pop_reusable_node_leaf(ReusableNode *reusable_node) { + while (reusable_node->tree->child_count > 0) + reusable_node->tree = reusable_node->tree->children[0]; + ts_parser__pop_reusable_node(reusable_node); +} + +static bool ts_parser__can_reuse(TSParser *self, TSStateId state, TSTree *tree, + TableEntry *table_entry) { + if (tree->first_leaf.lex_state == self->language->lex_states[state]) + return true; + if (!table_entry->is_reusable) + return false; + if (!table_entry->depends_on_lookahead) + return true; + return tree->child_count > 1 && tree->error_size == 0; +} + static bool ts_parser__condense_stack(TSParser *self) { bool result = false; for (StackVersion i = 0; i < ts_stack_version_count(self->stack); i++) { @@ -194,67 +230,6 @@ static bool ts_parser__condense_stack(TSParser *self) { return result; } -static bool ts_parser__can_reuse(TSParser *self, TSStateId state, - TableEntry *table_entry, TSTree *tree) { - if (tree->parse_state != state) { - if (ts_tree_is_fragile(tree)) { - LOG("cant_reuse_fragile sym:%s, size:%lu", SYM_NAME(tree->symbol), - tree->size.chars); - return false; - } - - if (!table_entry->is_reusable) { - LOG("cant_reuse_ambiguous sym:%s, size:%lu", SYM_NAME(tree->symbol), - tree->size.chars); - return false; - } - - if (table_entry->action_count == 0) { - LOG("cant_reuse_unexpected sym:%s, size:%lu", SYM_NAME(tree->symbol), - tree->size.chars); - return false; - } - - TSParseAction action = table_entry->actions[table_entry->action_count - 1]; - if (tree->extra != action.extra) { - LOG("cant_reuse_extra sym:%s, size:%lu", SYM_NAME(tree->symbol), - tree->size.chars); - return false; - } - - TSStateId lex_state = self->language->lex_states[state]; - if (tree->first_leaf.lex_state != lex_state) { - if (tree->child_count > 0) { - TableEntry leaf_entry; - ts_language_table_entry(self->language, state, tree->first_leaf.symbol, - &leaf_entry); - - if (!leaf_entry.is_reusable) { - LOG("cant_reuse_first_leaf sym:%s, leaf_sym:%s, size:%lu", - SYM_NAME(tree->symbol), SYM_NAME(tree->first_leaf.symbol), - tree->size.chars); - return false; - } - - if (tree->child_count == 1 && leaf_entry.depends_on_lookahead) { - LOG("cant_reuse_lookahead_dependent sym:%s, leaf_sym:%s, size:%lu", - SYM_NAME(tree->symbol), SYM_NAME(tree->first_leaf.symbol), - tree->size.chars); - return false; - } - } else if (table_entry->depends_on_lookahead) { - LOG("cant_reuse_lookahead_dependent sym:%s, leaf_sym:%s, size:%lu", - SYM_NAME(tree->symbol), SYM_NAME(tree->first_leaf.symbol), - tree->size.chars); - return false; - } - } - } - - LOG("reuse sym:%s size:%lu", SYM_NAME(tree->symbol), tree->size.chars); - return true; -} - static TSTree *ts_parser__lex(TSParser *self, TSStateId parse_state, bool error_mode) { TSStateId state = self->language->lex_states[parse_state]; @@ -283,9 +258,10 @@ static TSTree *ts_parser__lex(TSParser *self, TSStateId parse_state, if (!result) return NULL; result->parse_state = parse_state; - result->first_leaf.lex_state = state; } + result->first_leaf.lex_state = state; + return result; } @@ -295,8 +271,7 @@ static void ts_parser__clear_cached_token(TSParser *self) { } static TSTree *ts_parser__get_lookahead(TSParser *self, StackVersion version, - ReusableNode *reusable_node, - bool *is_fresh) { + ReusableNode *reusable_node) { TSLength position = ts_stack_top_position(self->stack, version); while (reusable_node->tree) { @@ -313,17 +288,6 @@ static TSTree *ts_parser__get_lookahead(TSParser *self, StackVersion version, continue; } - if (reusable_node->tree->symbol == ts_builtin_sym_error) { - LOG("cant_reuse_error sym:%s, size:%lu", - SYM_NAME(reusable_node->tree->symbol), - reusable_node->tree->size.chars); - if (!ts_parser__breakdown_reusable_node(reusable_node)) { - ts_parser__pop_reusable_node(reusable_node); - CHECK(ts_parser__breakdown_top_of_stack(self, version)); - } - continue; - } - if (reusable_node->tree->has_changes) { LOG("cant_reuse_changed tree:%s, size:%lu", SYM_NAME(reusable_node->tree->symbol), @@ -335,9 +299,19 @@ static TSTree *ts_parser__get_lookahead(TSParser *self, StackVersion version, continue; } + if (reusable_node->tree->symbol == ts_builtin_sym_error) { + LOG("cant_reuse_error tree:%s, size:%lu", + SYM_NAME(reusable_node->tree->symbol), + reusable_node->tree->size.chars); + if (!ts_parser__breakdown_reusable_node(reusable_node)) { + ts_parser__pop_reusable_node(reusable_node); + CHECK(ts_parser__breakdown_top_of_stack(self, version)); + } + continue; + } + TSTree *result = reusable_node->tree; ts_tree_retain(result); - *is_fresh = false; return result; } @@ -349,7 +323,6 @@ static TSTree *ts_parser__get_lookahead(TSParser *self, StackVersion version, ts_lexer_reset(&self->lexer, position); TSStateId parse_state = ts_stack_top_state(self->stack, version); bool error_mode = parse_state == TS_STATE_ERROR; - *is_fresh = true; return ts_parser__lex(self, parse_state, error_mode); error: @@ -514,7 +487,7 @@ static Reduction ts_parser__reduce(TSParser *self, StackVersion version, } TSStateId state = ts_stack_top_state(self->stack, slice.version); - if (fragile || self->is_split || initial_version_count > 1) { + if (fragile || self->is_split || pop.slices.size > 1 || initial_version_count > 1) { parent->fragile_left = true; parent->fragile_right = true; parent->parse_state = TS_TREE_STATE_NONE; @@ -699,13 +672,13 @@ static StackIterateAction ts_parser__error_repair_callback( } static RepairResult ts_parser__repair_error(TSParser *self, StackSlice slice, - TSTree *lookahead, + TSSymbol lookahead_symbol, const TSParseAction *actions, size_t action_count) { LOG("repair_error"); ErrorRepairSession session = { .parser = self, - .lookahead_symbol = lookahead->symbol, + .lookahead_symbol = lookahead_symbol, .found_repair = false, .trees_above_error = &slice.trees, .tree_count_above_error = ts_tree_array_essential_count(&slice.trees), @@ -837,6 +810,7 @@ static bool ts_parser__accept(TSParser *self, StackVersion version) { if (ts_parser__select_tree(self, self->finished_tree, root)) { ts_tree_release(self->finished_tree); + assert(root->ref_count > 0); self->finished_tree = root; } else { ts_tree_release(root); @@ -852,8 +826,7 @@ error: return false; } -static bool ts_parser__handle_error(TSParser *self, StackVersion version, - TSTree *lookahead) { +static bool ts_parser__handle_error(TSParser *self, StackVersion version) { size_t previous_version_count = ts_stack_version_count(self->stack); TSStateId state = ts_stack_top_state(self->stack, version); @@ -963,37 +936,59 @@ error: return false; } -typedef enum { - ConsumeFailed, - ConsumeInvalid, - ConsumeSucceeded, -} ConsumeResult; +static bool ts_parser__advance(TSParser *self, StackVersion version, + ReusableNode *reusable_node) { + bool validated_lookahead = false; + TSTree *lookahead = ts_parser__get_lookahead(self, version, reusable_node); + CHECK(lookahead); -static ConsumeResult ts_parser__consume_lookahead(TSParser *self, - StackVersion version, - TSTree *lookahead, - bool lookahead_is_fresh) { for (;;) { TSStateId state = ts_stack_top_state(self->stack, version); - TableEntry entry; - ts_language_table_entry(self->language, state, lookahead->symbol, &entry); + TableEntry table_entry; + ts_language_table_entry(self->language, state, lookahead->first_leaf.symbol, + &table_entry); - if (!lookahead_is_fresh && - !ts_parser__can_reuse(self, state, &entry, lookahead)) - return ConsumeInvalid; + if (!validated_lookahead) { + if (!ts_parser__can_reuse(self, state, lookahead, &table_entry)) { + if (lookahead == reusable_node->tree) + ts_parser__pop_reusable_node_leaf(reusable_node); + else + ts_parser__clear_cached_token(self); - LOG("lookahead sym:%s, size:%lu", SYM_NAME(lookahead->symbol), - lookahead->size.chars); + ts_tree_release(lookahead); + lookahead = ts_parser__get_lookahead(self, version, reusable_node); + CHECK(lookahead); + continue; + } + + validated_lookahead = true; + LOG("lookahead sym:%s, size:%lu", SYM_NAME(lookahead->symbol), + lookahead->size.chars); + } bool reduction_stopped_at_error = false; StackVersion last_reduction_version = STACK_VERSION_NONE; - for (size_t i = 0; i < entry.action_count; i++) { - TSParseAction action = entry.actions[i]; + for (size_t i = 0; i < table_entry.action_count; i++) { + TSParseAction action = table_entry.actions[i]; switch (action.type) { case TSParseActionTypeShift: { + if (lookahead->child_count > 0) { + if (ts_parser__breakdown_lookahead(self, &lookahead, state, reusable_node)) { + if (!ts_parser__can_reuse(self, state, lookahead, &table_entry)) { + ts_parser__pop_reusable_node(reusable_node); + ts_tree_release(lookahead); + lookahead = ts_parser__get_lookahead(self, version, reusable_node); + CHECK(lookahead); + } + } + + action = *ts_language_last_action(self->language, state, + lookahead->symbol); + } + TSStateId next_state; if (action.extra) { next_state = state; @@ -1005,7 +1000,12 @@ static ConsumeResult ts_parser__consume_lookahead(TSParser *self, CHECK(ts_parser__shift(self, version, next_state, lookahead, action.extra)); - return ConsumeSucceeded; + + if (lookahead == reusable_node->tree) + ts_parser__pop_reusable_node(reusable_node); + + ts_tree_release(lookahead); + return true; } case TSParseActionTypeReduce: { @@ -1015,14 +1015,14 @@ static ConsumeResult ts_parser__consume_lookahead(TSParser *self, if (action.extra) { LOG("reduce_extra"); } else { - LOG("reduce sym:%s, child_count:%u, fragile:%s", - SYM_NAME(action.symbol), action.child_count, - BOOL_STRING(action.fragile)); + LOG("reduce sym:%s, child_count:%u", SYM_NAME(action.symbol), + action.child_count); } Reduction reduction = ts_parser__reduce(self, version, action.symbol, action.child_count, - action.extra, action.fragile, true); + action.extra, (i < table_entry.action_count - 1), + true); switch (reduction.status) { case ReduceFailed: @@ -1032,9 +1032,9 @@ static ConsumeResult ts_parser__consume_lookahead(TSParser *self, break; case ReduceStoppedAtError: { reduction_stopped_at_error = true; - switch (ts_parser__repair_error(self, reduction.slice, lookahead, - entry.actions, - entry.action_count)) { + switch (ts_parser__repair_error( + self, reduction.slice, lookahead->first_leaf.symbol, + table_entry.actions, table_entry.action_count)) { case RepairFailed: goto error; case RepairNoneFound: @@ -1055,58 +1055,35 @@ static ConsumeResult ts_parser__consume_lookahead(TSParser *self, LOG("accept"); CHECK(ts_parser__accept(self, version)); - return ConsumeSucceeded; + + ts_tree_release(lookahead); + return true; } case TSParseActionTypeRecover: { + while (lookahead->child_count > 0) { + ts_parser__breakdown_reusable_node(reusable_node); + ts_tree_release(lookahead); + lookahead = reusable_node->tree; + ts_tree_retain(lookahead); + } + action = + *ts_language_last_action(self->language, state, lookahead->symbol); + CHECK(ts_parser__recover(self, version, action.to_state, lookahead)); - return ConsumeSucceeded; + + if (lookahead == reusable_node->tree) + ts_parser__pop_reusable_node(reusable_node); + + ts_tree_release(lookahead); + return true; } } } - if (last_reduction_version == STACK_VERSION_NONE) - return ConsumeInvalid; - - ts_stack_renumber_version(self->stack, last_reduction_version, version); - LOG_STACK(); - } - -error: - return ConsumeFailed; -} - -static bool ts_parser__advance(TSParser *self, StackVersion version, - ReusableNode *reusable_node) { - TSTree *lookahead = NULL; - - for (;;) { - bool lookahead_is_fresh; - CHECK(lookahead = ts_parser__get_lookahead(self, version, reusable_node, - &lookahead_is_fresh)); - - switch (ts_parser__consume_lookahead(self, version, lookahead, - lookahead_is_fresh)) { - case ConsumeFailed: - goto error; - case ConsumeSucceeded: - if (lookahead == reusable_node->tree) - ts_parser__pop_reusable_node(reusable_node); - ts_tree_release(lookahead); - return true; - case ConsumeInvalid: - break; - } - - if (!lookahead_is_fresh) { - if (lookahead == reusable_node->tree) { - if (!ts_parser__breakdown_reusable_node(reusable_node)) - ts_parser__pop_reusable_node(reusable_node); - } else { - ts_parser__clear_cached_token(self); - } - - ts_tree_release(lookahead); + if (last_reduction_version != STACK_VERSION_NONE) { + ts_stack_renumber_version(self->stack, last_reduction_version, version); + LOG_STACK(); continue; } @@ -1114,17 +1091,17 @@ static bool ts_parser__advance(TSParser *self, StackVersion version, case BreakdownFailed: goto error; case BreakdownPerformed: - ts_tree_release(lookahead); continue; case BreakdownAborted: break; } - CHECK(ts_parser__handle_error(self, version, lookahead)); - ts_tree_release(lookahead); + CHECK(ts_parser__handle_error(self, version)); - if (ts_stack_is_halted(self->stack, version)) + if (ts_stack_is_halted(self->stack, version)) { + ts_tree_release(lookahead); return true; + } } error: From f8c2e9d908873be69da8f365f92faa4c0e528f1a Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Sun, 31 Jul 2016 21:23:40 -0700 Subject: [PATCH 9/9] Include tree's original parse state in debug graphs --- src/runtime/tree.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/runtime/tree.c b/src/runtime/tree.c index 82f14baa..93ea81a5 100644 --- a/src/runtime/tree.c +++ b/src/runtime/tree.c @@ -436,8 +436,8 @@ void ts_tree__print_dot_graph(const TSTree *self, size_t offset, if (self->extra) fprintf(f, ", fontcolor=gray"); - fprintf(f, ", tooltip=\"%lu - %lu\"]\n", offset, - offset + ts_tree_total_chars(self)); + fprintf(f, ", tooltip=\"range:%lu - %lu\nstate:%d\"]\n", offset, + offset + ts_tree_total_chars(self), self->parse_state); for (size_t i = 0; i < self->child_count; i++) { const TSTree *child = self->children[i]; ts_tree__print_dot_graph(child, offset, language, f);