From 87ca3cb09948736d8b2f816f8d8a4e97b084de45 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Sun, 17 Jul 2016 13:35:43 -0700 Subject: [PATCH] Reuse nodes based on state matching, not sentential form validity I think that state matching is the only correct strategy for incremental node reuse that is compatible with the new error recovery algorithm. It's also simpler than the sentential-form algorithm. With the compressed parse tables, state matching shouldn't be too conservative of a test. --- spec/runtime/document_spec.cc | 2 +- spec/runtime/parser_spec.cc | 2 +- src/runtime/parser.c | 293 ++++++++++++++++------------------ 3 files changed, 137 insertions(+), 160 deletions(-) diff --git a/spec/runtime/document_spec.cc b/spec/runtime/document_spec.cc index 584eeb87..05ded944 100644 --- a/spec/runtime/document_spec.cc +++ b/spec/runtime/document_spec.cc @@ -173,7 +173,7 @@ describe("Document", [&]() { AssertThat(debugger->messages, Contains("new_parse")); AssertThat(debugger->messages, Contains("lookahead char:'['")); - AssertThat(debugger->messages, Contains("reduce sym:array, child_count:4, fragile:false")); + AssertThat(debugger->messages, Contains("reduce sym:array, child_count:4")); AssertThat(debugger->messages, Contains("accept")); }); diff --git a/spec/runtime/parser_spec.cc b/spec/runtime/parser_spec.cc index 888a229c..70b94674 100644 --- a/spec/runtime/parser_spec.cc +++ b/spec/runtime/parser_spec.cc @@ -231,7 +231,7 @@ describe("Parser", [&]() { "(identifier) " "(math_op (number) (member_access (identifier) (identifier))))))"); - AssertThat(input->strings_read, Equals(vector({ " abc.d);", "" }))); + AssertThat(input->strings_read, Equals(vector({ " + abc.d)", "" }))); }); }); diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 92466648..d11082c2 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -157,8 +157,7 @@ static void ts_parser__pop_reusable_node(ReusableNode *reusable_node) { } static bool ts_parser__breakdown_reusable_node(ReusableNode *reusable_node) { - if (reusable_node->tree->symbol == ts_builtin_sym_error || - reusable_node->tree->child_count == 0) { + if (reusable_node->tree->child_count == 0) { return false; } else { reusable_node->tree = reusable_node->tree->children[0]; @@ -166,6 +165,43 @@ static bool ts_parser__breakdown_reusable_node(ReusableNode *reusable_node) { } } +static bool ts_parser__breakdown_lookahead(TSParser *self, TSTree **lookahead, TSStateId state, ReusableNode *reusable_node) { + bool result = false; + while (reusable_node->tree->child_count > 0 && + (reusable_node->tree->parse_state != state || + reusable_node->tree->fragile_left || + reusable_node->tree->fragile_right)) { + LOG("state_mismatch sym:%s", SYM_NAME(reusable_node->tree->symbol)); + ts_parser__breakdown_reusable_node(reusable_node); + result = true; + } + + if (result) { + LOG("lookahead sym:%s", SYM_NAME(reusable_node->tree->symbol)); + ts_tree_release(*lookahead); + ts_tree_retain(*lookahead = reusable_node->tree); + } + + return result; +} + +static void ts_parser__pop_reusable_node_leaf(ReusableNode *reusable_node) { + while (reusable_node->tree->child_count > 0) + reusable_node->tree = reusable_node->tree->children[0]; + ts_parser__pop_reusable_node(reusable_node); +} + +static bool ts_parser__can_reuse(TSParser *self, TSStateId state, TSTree *tree, + TableEntry *table_entry) { + if (tree->first_leaf.lex_state == self->language->lex_states[state]) + return true; + if (!table_entry->is_reusable) + return false; + if (!table_entry->depends_on_lookahead) + return true; + return tree->child_count > 1 && tree->error_size == 0; +} + static bool ts_parser__condense_stack(TSParser *self) { bool result = false; for (StackVersion i = 0; i < ts_stack_version_count(self->stack); i++) { @@ -194,67 +230,6 @@ static bool ts_parser__condense_stack(TSParser *self) { return result; } -static bool ts_parser__can_reuse(TSParser *self, TSStateId state, - TableEntry *table_entry, TSTree *tree) { - if (tree->parse_state != state) { - if (ts_tree_is_fragile(tree)) { - LOG("cant_reuse_fragile sym:%s, size:%lu", SYM_NAME(tree->symbol), - tree->size.chars); - return false; - } - - if (!table_entry->is_reusable) { - LOG("cant_reuse_ambiguous sym:%s, size:%lu", SYM_NAME(tree->symbol), - tree->size.chars); - return false; - } - - if (table_entry->action_count == 0) { - LOG("cant_reuse_unexpected sym:%s, size:%lu", SYM_NAME(tree->symbol), - tree->size.chars); - return false; - } - - TSParseAction action = table_entry->actions[table_entry->action_count - 1]; - if (tree->extra != action.extra) { - LOG("cant_reuse_extra sym:%s, size:%lu", SYM_NAME(tree->symbol), - tree->size.chars); - return false; - } - - TSStateId lex_state = self->language->lex_states[state]; - if (tree->first_leaf.lex_state != lex_state) { - if (tree->child_count > 0) { - TableEntry leaf_entry; - ts_language_table_entry(self->language, state, tree->first_leaf.symbol, - &leaf_entry); - - if (!leaf_entry.is_reusable) { - LOG("cant_reuse_first_leaf sym:%s, leaf_sym:%s, size:%lu", - SYM_NAME(tree->symbol), SYM_NAME(tree->first_leaf.symbol), - tree->size.chars); - return false; - } - - if (tree->child_count == 1 && leaf_entry.depends_on_lookahead) { - LOG("cant_reuse_lookahead_dependent sym:%s, leaf_sym:%s, size:%lu", - SYM_NAME(tree->symbol), SYM_NAME(tree->first_leaf.symbol), - tree->size.chars); - return false; - } - } else if (table_entry->depends_on_lookahead) { - LOG("cant_reuse_lookahead_dependent sym:%s, leaf_sym:%s, size:%lu", - SYM_NAME(tree->symbol), SYM_NAME(tree->first_leaf.symbol), - tree->size.chars); - return false; - } - } - } - - LOG("reuse sym:%s size:%lu", SYM_NAME(tree->symbol), tree->size.chars); - return true; -} - static TSTree *ts_parser__lex(TSParser *self, TSStateId parse_state, bool error_mode) { TSStateId state = self->language->lex_states[parse_state]; @@ -283,9 +258,10 @@ static TSTree *ts_parser__lex(TSParser *self, TSStateId parse_state, if (!result) return NULL; result->parse_state = parse_state; - result->first_leaf.lex_state = state; } + result->first_leaf.lex_state = state; + return result; } @@ -295,8 +271,7 @@ static void ts_parser__clear_cached_token(TSParser *self) { } static TSTree *ts_parser__get_lookahead(TSParser *self, StackVersion version, - ReusableNode *reusable_node, - bool *is_fresh) { + ReusableNode *reusable_node) { TSLength position = ts_stack_top_position(self->stack, version); while (reusable_node->tree) { @@ -313,17 +288,6 @@ static TSTree *ts_parser__get_lookahead(TSParser *self, StackVersion version, continue; } - if (reusable_node->tree->symbol == ts_builtin_sym_error) { - LOG("cant_reuse_error sym:%s, size:%lu", - SYM_NAME(reusable_node->tree->symbol), - reusable_node->tree->size.chars); - if (!ts_parser__breakdown_reusable_node(reusable_node)) { - ts_parser__pop_reusable_node(reusable_node); - CHECK(ts_parser__breakdown_top_of_stack(self, version)); - } - continue; - } - if (reusable_node->tree->has_changes) { LOG("cant_reuse_changed tree:%s, size:%lu", SYM_NAME(reusable_node->tree->symbol), @@ -335,9 +299,19 @@ static TSTree *ts_parser__get_lookahead(TSParser *self, StackVersion version, continue; } + if (reusable_node->tree->symbol == ts_builtin_sym_error) { + LOG("cant_reuse_error tree:%s, size:%lu", + SYM_NAME(reusable_node->tree->symbol), + reusable_node->tree->size.chars); + if (!ts_parser__breakdown_reusable_node(reusable_node)) { + ts_parser__pop_reusable_node(reusable_node); + CHECK(ts_parser__breakdown_top_of_stack(self, version)); + } + continue; + } + TSTree *result = reusable_node->tree; ts_tree_retain(result); - *is_fresh = false; return result; } @@ -349,7 +323,6 @@ static TSTree *ts_parser__get_lookahead(TSParser *self, StackVersion version, ts_lexer_reset(&self->lexer, position); TSStateId parse_state = ts_stack_top_state(self->stack, version); bool error_mode = parse_state == TS_STATE_ERROR; - *is_fresh = true; return ts_parser__lex(self, parse_state, error_mode); error: @@ -514,7 +487,7 @@ static Reduction ts_parser__reduce(TSParser *self, StackVersion version, } TSStateId state = ts_stack_top_state(self->stack, slice.version); - if (fragile || self->is_split || initial_version_count > 1) { + if (fragile || self->is_split || pop.slices.size > 1 || initial_version_count > 1) { parent->fragile_left = true; parent->fragile_right = true; parent->parse_state = TS_TREE_STATE_NONE; @@ -699,13 +672,13 @@ static StackIterateAction ts_parser__error_repair_callback( } static RepairResult ts_parser__repair_error(TSParser *self, StackSlice slice, - TSTree *lookahead, + TSSymbol lookahead_symbol, const TSParseAction *actions, size_t action_count) { LOG("repair_error"); ErrorRepairSession session = { .parser = self, - .lookahead_symbol = lookahead->symbol, + .lookahead_symbol = lookahead_symbol, .found_repair = false, .trees_above_error = &slice.trees, .tree_count_above_error = ts_tree_array_essential_count(&slice.trees), @@ -837,6 +810,7 @@ static bool ts_parser__accept(TSParser *self, StackVersion version) { if (ts_parser__select_tree(self, self->finished_tree, root)) { ts_tree_release(self->finished_tree); + assert(root->ref_count > 0); self->finished_tree = root; } else { ts_tree_release(root); @@ -852,8 +826,7 @@ error: return false; } -static bool ts_parser__handle_error(TSParser *self, StackVersion version, - TSTree *lookahead) { +static bool ts_parser__handle_error(TSParser *self, StackVersion version) { size_t previous_version_count = ts_stack_version_count(self->stack); TSStateId state = ts_stack_top_state(self->stack, version); @@ -963,37 +936,59 @@ error: return false; } -typedef enum { - ConsumeFailed, - ConsumeInvalid, - ConsumeSucceeded, -} ConsumeResult; +static bool ts_parser__advance(TSParser *self, StackVersion version, + ReusableNode *reusable_node) { + bool validated_lookahead = false; + TSTree *lookahead = ts_parser__get_lookahead(self, version, reusable_node); + CHECK(lookahead); -static ConsumeResult ts_parser__consume_lookahead(TSParser *self, - StackVersion version, - TSTree *lookahead, - bool lookahead_is_fresh) { for (;;) { TSStateId state = ts_stack_top_state(self->stack, version); - TableEntry entry; - ts_language_table_entry(self->language, state, lookahead->symbol, &entry); + TableEntry table_entry; + ts_language_table_entry(self->language, state, lookahead->first_leaf.symbol, + &table_entry); - if (!lookahead_is_fresh && - !ts_parser__can_reuse(self, state, &entry, lookahead)) - return ConsumeInvalid; + if (!validated_lookahead) { + if (!ts_parser__can_reuse(self, state, lookahead, &table_entry)) { + if (lookahead == reusable_node->tree) + ts_parser__pop_reusable_node_leaf(reusable_node); + else + ts_parser__clear_cached_token(self); - LOG("lookahead sym:%s, size:%lu", SYM_NAME(lookahead->symbol), - lookahead->size.chars); + ts_tree_release(lookahead); + lookahead = ts_parser__get_lookahead(self, version, reusable_node); + CHECK(lookahead); + continue; + } + + validated_lookahead = true; + LOG("lookahead sym:%s, size:%lu", SYM_NAME(lookahead->symbol), + lookahead->size.chars); + } bool reduction_stopped_at_error = false; StackVersion last_reduction_version = STACK_VERSION_NONE; - for (size_t i = 0; i < entry.action_count; i++) { - TSParseAction action = entry.actions[i]; + for (size_t i = 0; i < table_entry.action_count; i++) { + TSParseAction action = table_entry.actions[i]; switch (action.type) { case TSParseActionTypeShift: { + if (lookahead->child_count > 0) { + if (ts_parser__breakdown_lookahead(self, &lookahead, state, reusable_node)) { + if (!ts_parser__can_reuse(self, state, lookahead, &table_entry)) { + ts_parser__pop_reusable_node(reusable_node); + ts_tree_release(lookahead); + lookahead = ts_parser__get_lookahead(self, version, reusable_node); + CHECK(lookahead); + } + } + + action = *ts_language_last_action(self->language, state, + lookahead->symbol); + } + TSStateId next_state; if (action.extra) { next_state = state; @@ -1005,7 +1000,12 @@ static ConsumeResult ts_parser__consume_lookahead(TSParser *self, CHECK(ts_parser__shift(self, version, next_state, lookahead, action.extra)); - return ConsumeSucceeded; + + if (lookahead == reusable_node->tree) + ts_parser__pop_reusable_node(reusable_node); + + ts_tree_release(lookahead); + return true; } case TSParseActionTypeReduce: { @@ -1015,14 +1015,14 @@ static ConsumeResult ts_parser__consume_lookahead(TSParser *self, if (action.extra) { LOG("reduce_extra"); } else { - LOG("reduce sym:%s, child_count:%u, fragile:%s", - SYM_NAME(action.symbol), action.child_count, - BOOL_STRING(action.fragile)); + LOG("reduce sym:%s, child_count:%u", SYM_NAME(action.symbol), + action.child_count); } Reduction reduction = ts_parser__reduce(self, version, action.symbol, action.child_count, - action.extra, action.fragile, true); + action.extra, (i < table_entry.action_count - 1), + true); switch (reduction.status) { case ReduceFailed: @@ -1032,9 +1032,9 @@ static ConsumeResult ts_parser__consume_lookahead(TSParser *self, break; case ReduceStoppedAtError: { reduction_stopped_at_error = true; - switch (ts_parser__repair_error(self, reduction.slice, lookahead, - entry.actions, - entry.action_count)) { + switch (ts_parser__repair_error( + self, reduction.slice, lookahead->first_leaf.symbol, + table_entry.actions, table_entry.action_count)) { case RepairFailed: goto error; case RepairNoneFound: @@ -1055,58 +1055,35 @@ static ConsumeResult ts_parser__consume_lookahead(TSParser *self, LOG("accept"); CHECK(ts_parser__accept(self, version)); - return ConsumeSucceeded; + + ts_tree_release(lookahead); + return true; } case TSParseActionTypeRecover: { + while (lookahead->child_count > 0) { + ts_parser__breakdown_reusable_node(reusable_node); + ts_tree_release(lookahead); + lookahead = reusable_node->tree; + ts_tree_retain(lookahead); + } + action = + *ts_language_last_action(self->language, state, lookahead->symbol); + CHECK(ts_parser__recover(self, version, action.to_state, lookahead)); - return ConsumeSucceeded; + + if (lookahead == reusable_node->tree) + ts_parser__pop_reusable_node(reusable_node); + + ts_tree_release(lookahead); + return true; } } } - if (last_reduction_version == STACK_VERSION_NONE) - return ConsumeInvalid; - - ts_stack_renumber_version(self->stack, last_reduction_version, version); - LOG_STACK(); - } - -error: - return ConsumeFailed; -} - -static bool ts_parser__advance(TSParser *self, StackVersion version, - ReusableNode *reusable_node) { - TSTree *lookahead = NULL; - - for (;;) { - bool lookahead_is_fresh; - CHECK(lookahead = ts_parser__get_lookahead(self, version, reusable_node, - &lookahead_is_fresh)); - - switch (ts_parser__consume_lookahead(self, version, lookahead, - lookahead_is_fresh)) { - case ConsumeFailed: - goto error; - case ConsumeSucceeded: - if (lookahead == reusable_node->tree) - ts_parser__pop_reusable_node(reusable_node); - ts_tree_release(lookahead); - return true; - case ConsumeInvalid: - break; - } - - if (!lookahead_is_fresh) { - if (lookahead == reusable_node->tree) { - if (!ts_parser__breakdown_reusable_node(reusable_node)) - ts_parser__pop_reusable_node(reusable_node); - } else { - ts_parser__clear_cached_token(self); - } - - ts_tree_release(lookahead); + if (last_reduction_version != STACK_VERSION_NONE) { + ts_stack_renumber_version(self->stack, last_reduction_version, version); + LOG_STACK(); continue; } @@ -1114,17 +1091,17 @@ static bool ts_parser__advance(TSParser *self, StackVersion version, case BreakdownFailed: goto error; case BreakdownPerformed: - ts_tree_release(lookahead); continue; case BreakdownAborted: break; } - CHECK(ts_parser__handle_error(self, version, lookahead)); - ts_tree_release(lookahead); + CHECK(ts_parser__handle_error(self, version)); - if (ts_stack_is_halted(self->stack, version)) + if (ts_stack_is_halted(self->stack, version)) { + ts_tree_release(lookahead); return true; + } } error: