From 00a09395045b76d3b1c4e17d275139736cd6c4c6 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 2 Jun 2016 14:04:48 -0700 Subject: [PATCH] Abort erroneous parse versions more eagerly --- spec/integration/corpus_specs.cc | 3 - spec/runtime/stack_spec.cc | 16 ++-- src/runtime/parser.c | 138 ++++++++++++++++++++++--------- src/runtime/stack.c | 88 ++++---------------- src/runtime/stack.h | 10 +-- 5 files changed, 125 insertions(+), 130 deletions(-) diff --git a/spec/integration/corpus_specs.cc b/spec/integration/corpus_specs.cc index 6a70da6f..c24340b5 100644 --- a/spec/integration/corpus_specs.cc +++ b/spec/integration/corpus_specs.cc @@ -156,9 +156,6 @@ describe("The Corpus", []() { string description = "\"" + inserted_text + "\" at " + to_string(edit_position); it_handles_edit_sequence("repairing an insertion of " + description, [&]() { - // ts_document_set_debugger(document, log_debugger_make(true)); - // ts_document_print_debugging_graphs(document, true); - ts_document_edit(document, input->replace(edit_position, 0, inserted_text)); ts_document_parse(document); diff --git a/spec/runtime/stack_spec.cc b/spec/runtime/stack_spec.cc index e12d19ff..80826312 100644 --- a/spec/runtime/stack_spec.cc +++ b/spec/runtime/stack_spec.cc @@ -121,13 +121,13 @@ describe("Stack", [&]() { }); }); - describe("merge_all()", [&]() { + describe("merge()", [&]() { before_each([&]() { // . <──0── A <──1── B* // ↑ // └───2─── C* ts_stack_push(stack, 0, trees[0], false, stateA); - ts_stack_split(stack, 0); + ts_stack_duplicate_version(stack, 0); ts_stack_push(stack, 0, trees[1], false, stateB); ts_stack_push(stack, 1, trees[2], false, stateC); }); @@ -142,7 +142,7 @@ describe("Stack", [&]() { // . <──0── A <──1── B <──3── D* // ↑ | // └───2─── C <──4───┘ - ts_stack_merge_all(stack); + AssertThat(ts_stack_merge(stack, 0, 1), IsTrue()); AssertThat(ts_stack_version_count(stack), Equals(1)); AssertThat(get_stack_entries(stack, 0), Equals(vector({ {stateD, 0}, @@ -154,7 +154,7 @@ describe("Stack", [&]() { }); it("does not combine versions that have different states", [&]() { - ts_stack_merge_all(stack); + AssertThat(ts_stack_merge(stack, 0, 1), IsFalse()); AssertThat(ts_stack_version_count(stack), Equals(2)); }); @@ -166,7 +166,7 @@ describe("Stack", [&]() { ts_stack_push(stack, 0, trees[3], false, stateD); ts_stack_push(stack, 1, trees[4], false, stateD); - ts_stack_merge_all(stack); + AssertThat(ts_stack_merge(stack, 0, 1), IsFalse()); AssertThat(ts_stack_version_count(stack), Equals(2)); }); @@ -183,7 +183,7 @@ describe("Stack", [&]() { // . <──0── A <──1── B <──3── D <──5── E* // ↑ | // └───2─── C <──4───┘ - ts_stack_merge_all(stack); + AssertThat(ts_stack_merge(stack, 0, 1), IsTrue()); AssertThat(ts_stack_version_count(stack), Equals(1)); AssertThat(get_stack_entries(stack, 0), Equals(vector({ {stateE, 0}, @@ -272,7 +272,7 @@ describe("Stack", [&]() { ts_stack_push(stack, 1, trees[4], false, stateE); ts_stack_push(stack, 1, trees[5], false, stateF); ts_stack_push(stack, 1, trees[6], false, stateD); - ts_stack_merge_all(stack); + ts_stack_merge(stack, 0, 1); ts_stack_push(stack, 0, trees[10], false, stateI); AssertThat(ts_stack_version_count(stack), Equals(1)); @@ -393,7 +393,7 @@ describe("Stack", [&]() { ts_stack_push(stack, 1, trees[8], false, stateH); ts_stack_push(stack, 1, trees[9], false, stateD); ts_stack_push(stack, 1, trees[10], false, stateI); - ts_stack_merge_all(stack); + ts_stack_merge(stack, 0, 1); AssertThat(ts_stack_version_count(stack), Equals(1)); AssertThat(get_stack_entries(stack, 0), Equals(vector({ diff --git a/src/runtime/parser.c b/src/runtime/parser.c index ed203fd2..b0748090 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -1,6 +1,7 @@ #include "runtime/parser.h" #include #include +#include #include #include "tree_sitter/runtime.h" #include "tree_sitter/parser.h" @@ -43,6 +44,8 @@ goto error; \ } +static const unsigned ERROR_COST_THRESHOLD = 5; + static const TSParseAction ERROR_ACTION = {.type = TSParseActionTypeError }; static const size_t NO_ERROR_DEPTH = (size_t)(-1); @@ -62,12 +65,6 @@ typedef struct { size_t best_repair_skip_count; } ErrorRepairSession; -typedef enum { - ParseActionFailed, - ParseActionUpdated, - ParseActionRemoved, -} ParseActionResult; - typedef struct { enum { ReduceFailed, @@ -122,7 +119,8 @@ static BreakdownResult ts_parser__breakdown_top_of_stack(TSParser *self, } else if (!last_child->extra) { TSParseAction action = ts_language_last_action(self->language, state, last_child->symbol); - assert(action.type == TSParseActionTypeShift || action.type == TSParseActionTypeRecover); + assert(action.type == TSParseActionTypeShift || + action.type == TSParseActionTypeRecover); state = action.data.to_state; } @@ -180,6 +178,33 @@ static void ts_parser__breakdown_reusable_node(ReusableNode *reusable_node) { } while (ts_tree_is_fragile(reusable_node->tree)); } +static bool ts_parser__condense_stack(TSParser *self) { + bool result = false; + for (StackVersion i = 0; i < ts_stack_version_count(self->stack); i++) { + if (ts_stack_is_halted(self->stack, i)) { + ts_stack_remove_version(self->stack, i); + i--; + continue; + } + + bool did_merge = false; + for (size_t j = 0; j < i; j++) { + + if (ts_stack_merge(self->stack, j, i)) { + did_merge = true; + break; + } + } + + if (did_merge) { + result = true; + i--; + continue; + } + } + return result; +} + static bool ts_parser__can_reuse(TSParser *self, StackVersion version, TSTree *tree) { if (tree->symbol == ts_builtin_sym_error) @@ -441,7 +466,15 @@ static Reduction ts_parser__reduce(TSParser *self, StackVersion version, } } - ts_stack_merge_from(self->stack, initial_version_count); + for (StackVersion i = initial_version_count; + i < ts_stack_version_count(self->stack); i++) { + for (StackVersion j = initial_version_count; j < i; j++) { + if (ts_stack_merge(self->stack, j, i)) { + i--; + break; + } + } + } return (Reduction){ ReduceSucceeded, pop.slices.contents[0] }; @@ -617,6 +650,20 @@ static RepairResult ts_parser__repair_error(TSParser *self, StackSlice slice, SYM_NAME(symbol), repair.count + count_above_error, parent->error_size); + unsigned my_error_cost = ts_stack_error_cost(self->stack, slice.version); + unsigned my_error_depth = ts_stack_error_depth(self->stack, slice.version); + for (StackVersion i = 0; i < ts_stack_version_count(self->stack); i++) { + if (i != slice.version) { + unsigned error_cost = ts_stack_error_cost(self->stack, i); + unsigned error_depth = ts_stack_error_depth(self->stack, i); + if (error_depth > my_error_depth + 1 || + (error_depth == my_error_depth + 1 && error_cost >= my_error_cost)) { + LOG_ACTION("halt_other version:%u", i); + ts_stack_halt(self->stack, i); + } + } + } + return RepairSucceeded; error: @@ -673,7 +720,7 @@ static bool ts_parser__accept(TSParser *self, StackVersion version) { } ts_stack_remove_version(self->stack, pop.slices.contents[0].version); - ts_stack_remove_version(self->stack, version); + ts_stack_halt(self->stack, version); return true; @@ -750,6 +797,30 @@ error: static bool ts_parser__recover(TSParser *self, StackVersion version, TSStateId state, TSTree *lookahead) { + if (lookahead->symbol == ts_builtin_sym_end) { + LOG_ACTION("recover_eof"); + TreeArray children = array_new(); + TSTree *parent = ts_tree_make_error_node(&children); + return ts_parser__push(self, version, parent, 1); + } + + unsigned my_error_cost = ts_stack_error_cost(self->stack, version); + unsigned my_error_depth = ts_stack_error_depth(self->stack, version); + for (StackVersion i = 0; i < ts_stack_version_count(self->stack); i++) { + if (i != version) { + unsigned error_cost = ts_stack_error_cost(self->stack, i); + unsigned error_depth = ts_stack_error_depth(self->stack, i); + if (error_depth < my_error_depth - 1 || + (error_depth == my_error_depth - 1 && error_cost <= my_error_cost) || + (error_depth == my_error_depth && + error_cost + ERROR_COST_THRESHOLD <= my_error_cost)) { + ts_stack_halt(self->stack, version); + LOG_ACTION("bail_on_error"); + return true; + } + } + } + LOG_ACTION("recover state:%u", state); StackVersion new_version = ts_stack_duplicate_version(self->stack, version); @@ -765,15 +836,8 @@ error: return false; } -static bool ts_parser__recover_eof(TSParser *self, StackVersion version) { - TreeArray children = array_new(); - TSTree *parent = ts_tree_make_error_node(&children); - return ts_parser__push(self, version, parent, 1); -} - -static ParseActionResult ts_parser__consume_lookahead(TSParser *self, - StackVersion version, - TSTree *lookahead) { +static bool ts_parser__consume_lookahead(TSParser *self, StackVersion version, + TSTree *lookahead) { for (;;) { TSStateId state = ts_stack_top_state(self->stack, version); @@ -830,7 +894,7 @@ static ParseActionResult ts_parser__consume_lookahead(TSParser *self, CHECK(ts_parser__shift(self, version, next_state, lookahead, action.extra)); - return ParseActionUpdated; + return true; } case TSParseActionTypeReduce: { @@ -879,18 +943,13 @@ static ParseActionResult ts_parser__consume_lookahead(TSParser *self, case TSParseActionTypeAccept: { LOG_ACTION("accept"); CHECK(ts_parser__accept(self, version)); - return ParseActionRemoved; + return true; } case TSParseActionTypeRecover: { - if (lookahead->symbol == ts_builtin_sym_end) { - LOG_ACTION("recover_eof"); - CHECK(ts_parser__recover_eof(self, version)); - } else { - CHECK(ts_parser__recover(self, version, action.data.to_state, - lookahead)); - } - return ParseActionUpdated; + CHECK(ts_parser__recover(self, version, action.data.to_state, + lookahead)); + return true; } } } @@ -900,7 +959,7 @@ static ParseActionResult ts_parser__consume_lookahead(TSParser *self, } error: - return ParseActionFailed; + return false; } bool ts_parser_init(TSParser *self) { @@ -958,7 +1017,12 @@ TSTree *ts_parser_parse(TSParser *self, TSInput input, TSTree *previous_tree) { version < ts_stack_version_count(self->stack);) { reusable_node = current_reusable_node; - for (bool removed = false; !removed;) { + for (;;) { + if (ts_stack_is_halted(self->stack, version)) { + version++; + break; + } + last_position = position; size_t new_position = ts_stack_top_position(self->stack, version).chars; if (new_position > max_position) { @@ -987,22 +1051,16 @@ TSTree *ts_parser_parse(TSParser *self, TSInput input, TSTree *previous_tree) { LOG_ACTION("lookahead sym:%s, size:%lu", SYM_NAME(lookahead->symbol), ts_tree_total_chars(lookahead)); - switch (ts_parser__consume_lookahead(self, version, lookahead)) { - case ParseActionFailed: - ts_tree_release(lookahead); - goto error; - case ParseActionRemoved: - removed = true; - break; - case ParseActionUpdated: - break; + if (!ts_parser__consume_lookahead(self, version, lookahead)) { + ts_tree_release(lookahead); + goto error; } } } current_reusable_node = reusable_node; - if (ts_stack_condense(self->stack)) { + if (ts_parser__condense_stack(self)) { LOG_ACTION("condense"); LOG_STACK(); } diff --git a/src/runtime/stack.c b/src/runtime/stack.c index ee4bf0f1..859ed8c7 100644 --- a/src/runtime/stack.c +++ b/src/runtime/stack.c @@ -6,7 +6,6 @@ #include "runtime/length.h" #include #include -#include #define MAX_LINK_COUNT 8 #define MAX_NODE_POOL_SIZE 50 @@ -339,6 +338,14 @@ TSLength ts_stack_top_position(const Stack *self, StackVersion version) { return array_get(&self->heads, version)->node->position; } +unsigned ts_stack_error_cost(const Stack *self, StackVersion version) { + return array_get(&self->heads, version)->node->min_error_cost; +} + +unsigned ts_stack_error_depth(const Stack *self, StackVersion version) { + return array_get(&self->heads, version)->node->error_depth; +} + size_t ts_stack_last_repaired_error_size(const Stack *self, StackVersion version) { StackNode *node = array_get(&self->heads, version)->node; @@ -364,7 +371,7 @@ bool ts_stack_push(Stack *self, StackVersion version, TSTree *tree, if (!new_node) return false; stack_node_release(node, &self->node_pool); - self->heads.contents[version] = (StackHead){ new_node, false }; + self->heads.contents[version].node = new_node; return true; } @@ -473,13 +480,6 @@ StackVersion ts_stack_duplicate_version(Stack *self, StackVersion version) { return self->heads.size - 1; } -StackVersion ts_stack_split(Stack *self, StackVersion version) { - if (!array_push(&self->heads, self->heads.contents[version])) - return STACK_VERSION_NONE; - stack_node_retain(self->heads.contents[version].node); - return self->heads.size - 1; -} - bool ts_stack_merge(Stack *self, StackVersion version, StackVersion new_version) { StackNode *node = self->heads.contents[version].node; StackNode *new_node = self->heads.contents[new_version].node; @@ -496,21 +496,6 @@ bool ts_stack_merge(Stack *self, StackVersion version, StackVersion new_version) } } -void ts_stack_merge_from(Stack *self, StackVersion start_version) { - for (size_t i = start_version; i < self->heads.size; i++) { - for (size_t j = start_version; j < i; j++) { - if (ts_stack_merge(self, j, i)) { - i--; - break; - } - } - } -} - -void ts_stack_merge_all(Stack *self) { - ts_stack_merge_from(self, 0); -} - void stack_node_remove_link(StackNode *self, size_t i, StackNodeArray *node_pool) { self->link_count--; @@ -536,57 +521,12 @@ void stack_node_prune_paths_with_error_cost(StackNode *self, size_t cost, } } -bool ts_stack_condense(Stack *self) { - bool did_condense = false; - unsigned min_error_cost = UINT_MAX; - unsigned min_error_depth = UINT_MAX; - for (size_t i = 0; i < self->heads.size; i++) { - StackNode *node = self->heads.contents[i].node; +void ts_stack_halt(Stack *self, StackVersion version) { + array_get(&self->heads, version)->is_halted = true; +} - bool did_remove = false; - for (size_t j = 0; j < i; j++) { - if (ts_stack_merge(self, j, i)) { - did_condense = true; - did_remove = true; - break; - } - } - - if (did_remove) { - i--; - continue; - } - - if (node->error_depth < min_error_depth || - (node->error_depth == min_error_depth && - node->min_error_cost < min_error_cost)) { - min_error_depth = node->error_depth; - min_error_cost = node->min_error_cost; - } - } - - for (size_t i = 0; i < self->heads.size; i++) { - StackNode *node = self->heads.contents[i].node; - if (node->error_depth > min_error_depth + 1) { - did_condense = true; - ts_stack_remove_version(self, i); - i--; - continue; - } else if (node->error_depth == min_error_depth + 1) { - if (node->min_error_cost >= min_error_cost) { - did_condense = true; - ts_stack_remove_version(self, i); - i--; - continue; - } else if (node->max_error_cost >= min_error_cost) { - did_condense = true; - stack_node_prune_paths_with_error_cost(node, min_error_cost, - &self->node_pool); - } - } - } - - return did_condense; +bool ts_stack_is_halted(Stack *self, StackVersion version) { + return array_get(&self->heads, version)->is_halted; } void ts_stack_clear(Stack *self) { diff --git a/src/runtime/stack.h b/src/runtime/stack.h index 8b556d6e..532a4420 100644 --- a/src/runtime/stack.h +++ b/src/runtime/stack.h @@ -95,15 +95,15 @@ StackPopResult ts_stack_pop_pending(Stack *, StackVersion); StackPopResult ts_stack_pop_all(Stack *, StackVersion); -StackVersion ts_stack_split(Stack *, StackVersion); +unsigned ts_stack_error_depth(const Stack *, StackVersion); + +unsigned ts_stack_error_cost(const Stack *, StackVersion); bool ts_stack_merge(Stack *, StackVersion, StackVersion); -void ts_stack_merge_from(Stack *, StackVersion); +void ts_stack_halt(Stack *, StackVersion); -void ts_stack_merge_all(Stack *); - -bool ts_stack_condense(Stack *); +bool ts_stack_is_halted(Stack *, StackVersion); void ts_stack_renumber_version(Stack *, StackVersion, StackVersion);