diff --git a/spec/integration/corpus_specs.cc b/spec/integration/corpus_specs.cc index c24340b5..6a70da6f 100644 --- a/spec/integration/corpus_specs.cc +++ b/spec/integration/corpus_specs.cc @@ -156,6 +156,9 @@ describe("The Corpus", []() { string description = "\"" + inserted_text + "\" at " + to_string(edit_position); it_handles_edit_sequence("repairing an insertion of " + description, [&]() { + // ts_document_set_debugger(document, log_debugger_make(true)); + // ts_document_print_debugging_graphs(document, true); + ts_document_edit(document, input->replace(edit_position, 0, inserted_text)); ts_document_parse(document); diff --git a/src/runtime/parser.c b/src/runtime/parser.c index f5631e46..14fa2af3 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -583,7 +583,7 @@ static RepairResult ts_parser__repair_error(TSParser *self, StackSlice slice, TSSymbol symbol = repair.symbol; StackSlice new_slice = array_pop(&pop.slices); - TreeArray children_below = new_slice.trees; + TreeArray children = new_slice.trees; ts_stack_renumber_version(self->stack, new_slice.version, slice.version); for (size_t i = pop.slices.size - 1; i + 1 > 0; i--) { @@ -593,38 +593,29 @@ static RepairResult ts_parser__repair_error(TSParser *self, StackSlice slice, ts_stack_remove_version(self->stack, other_slice.version); } - LOG_ACTION("repair_found sym:%s, child_count:%lu, skipped:%lu", - SYM_NAME(symbol), repair.count + count_above_error, skip_count); + TreeArray skipped_children = array_new(); + CHECK(array_grow(&skipped_children, skip_count)); + for (size_t i = repair.count; i < children.size; i++) + array_push(&skipped_children, children.contents[i]); - if (skip_count > 0) { - TreeArray skipped_children = array_new(); - CHECK(array_grow(&skipped_children, skip_count)); - for (size_t i = repair.count; i < children_below.size; i++) - array_push(&skipped_children, children_below.contents[i]); - - TSTree *error = ts_tree_make_error_node(&skipped_children); - CHECK(error); - children_below.size = repair.count; - array_push(&children_below, error); - } + TSTree *error = ts_tree_make_error_node(&skipped_children); + CHECK(error); + children.size = repair.count; + array_push(&children, error); for (size_t i = 0; i < slice.trees.size; i++) - array_push(&children_below, slice.trees.contents[i]); + array_push(&children, slice.trees.contents[i]); array_delete(&slice.trees); TSTree *parent = - ts_tree_make_node(symbol, children_below.size, children_below.contents, + ts_tree_make_node(symbol, children.size, children.contents, ts_language_symbol_metadata(self->language, symbol)); CHECK(parent); CHECK(ts_parser__push(self, slice.version, parent, next_state)); - for (StackVersion i = 0, n = ts_stack_version_count(self->stack); i < n; i++) { - size_t error_length = ts_stack_error_length(self->stack, i); - if ((error_length >= parent->error_size) || - (error_length == 0 && - ts_stack_last_repaired_error_size(self->stack, i) > parent->error_size)) - ts_stack_halt(self->stack, i); - } + LOG_ACTION("repair_found sym:%s, child_count:%lu, skipped:%lu", + SYM_NAME(symbol), repair.count + count_above_error, + parent->error_size); return RepairSucceeded; @@ -692,41 +683,64 @@ error: static bool ts_parser__handle_error(TSParser *self, StackVersion version, TSStateId state, TSTree *lookahead) { + size_t previous_version_count = ts_stack_version_count(self->stack); + + bool has_shift_action = false; array_clear(&self->reduce_actions); for (TSSymbol symbol = 0; symbol < self->language->symbol_count; symbol++) { size_t action_count; const TSParseAction *actions = ts_language_actions(self->language, state, symbol, &action_count); + for (size_t i = 0; i < action_count; i++) { TSParseAction action = actions[i]; - if (action.type == TSParseActionTypeReduce && !action.extra) - CHECK(ts_reduce_action_set_add( - &self->reduce_actions, - (ReduceAction){ - .symbol = action.data.symbol, .count = action.data.child_count, - })); + if (action.extra) + continue; + switch (action.type) { + case TSParseActionTypeShift: + case TSParseActionTypeRecover: + has_shift_action = true; + break; + case TSParseActionTypeReduce: + if (action.data.child_count > 0) + CHECK(ts_reduce_action_set_add( + &self->reduce_actions, + (ReduceAction){ + .symbol = action.data.symbol, .count = action.data.child_count, + })); + default: + break; + } } } - StackVersion scratch_version = ts_stack_split(self->stack, version); - CHECK(scratch_version != STACK_VERSION_NONE); - CHECK(ts_stack_push(self->stack, version, NULL, false, ts_parse_state_error)); - - size_t previous_version_count = ts_stack_version_count(self->stack); + bool did_reduce = false; for (size_t i = 0; i < self->reduce_actions.size; i++) { ReduceAction action = self->reduce_actions.contents[i]; - Reduction reduction = ts_parser__reduce(self, scratch_version, action.symbol, + Reduction reduction = ts_parser__reduce(self, version, action.symbol, action.count, false, true); - CHECK(reduction.status != ReduceFailed); - assert(reduction.status == ReduceSucceeded); - while (ts_stack_version_count(self->stack) > previous_version_count) { - CHECK(ts_stack_push(self->stack, previous_version_count, NULL, false, - ts_parse_state_error)); - assert(ts_stack_merge(self->stack, version, previous_version_count)); + switch (reduction.status) { + case ReduceFailed: + goto error; + case ReduceStoppedAtError: + ts_tree_array_delete(&reduction.slice.trees); + ts_stack_remove_version(self->stack, reduction.slice.version); + continue; + default: + did_reduce = true; + break; } } - ts_stack_remove_version(self->stack, scratch_version); + if (did_reduce && !has_shift_action) + ts_stack_renumber_version(self->stack, previous_version_count, version); + + CHECK(ts_stack_push(self->stack, version, NULL, false, ts_parse_state_error)); + while (ts_stack_version_count(self->stack) > previous_version_count) { + CHECK(ts_stack_push(self->stack, previous_version_count, NULL, false, + ts_parse_state_error)); + assert(ts_stack_merge(self->stack, version, previous_version_count)); + } return true; @@ -736,27 +750,13 @@ error: static bool ts_parser__recover(TSParser *self, StackVersion version, TSStateId state, TSTree *lookahead) { - size_t error_length = ts_stack_error_length(self->stack, version); + LOG_ACTION("recover state:%u", state); - bool has_repaired = false; - for (StackVersion i = 0; i < ts_stack_version_count(self->stack); i++) - if (i != version && ts_stack_error_length(self->stack, i) == 0 && - ts_stack_last_repaired_error_size(self->stack, i) <= error_length) { - has_repaired = true; - break; - } - - if (has_repaired) { - LOG_ACTION("final_recover state:%u, error_length:%lu ", state, error_length); - } else { - StackVersion new_version = ts_stack_duplicate_version(self->stack, version); - CHECK(new_version != STACK_VERSION_NONE); - CHECK(ts_parser__shift( - self, new_version, ts_parse_state_error, lookahead, - ts_language_symbol_metadata(self->language, lookahead->symbol).extra)); - LOG_ACTION("recover_and_discard state:%u, error_length:%lu", state, - error_length); - } + StackVersion new_version = ts_stack_duplicate_version(self->stack, version); + CHECK(new_version != STACK_VERSION_NONE); + CHECK(ts_parser__shift( + self, new_version, ts_parse_state_error, lookahead, + ts_language_symbol_metadata(self->language, lookahead->symbol).extra)); CHECK(ts_parser__shift(self, version, state, lookahead, false)); return true; @@ -812,15 +812,10 @@ static ParseActionResult ts_parser__consume_lookahead(TSParser *self, break; } - if (ts_stack_version_count(self->stack) == 1 && !self->finished_tree) { - LOG_ACTION("handle_error"); - CHECK(ts_parser__handle_error(self, version, state, lookahead)); - break; - } else { - LOG_ACTION("bail version:%d", version); - ts_stack_remove_version(self->stack, version); - return ParseActionRemoved; - } + LOG_ACTION("handle_error"); + CHECK(ts_parser__handle_error(self, version, state, lookahead)); + error_repair_failed = false; + break; } case TSParseActionTypeShift: { @@ -867,7 +862,9 @@ static ParseActionResult ts_parser__consume_lookahead(TSParser *self, case RepairFailed: goto error; case RepairNoneFound: - error_repair_failed = true; + if (last_reduction_version == STACK_VERSION_NONE) { + error_repair_failed = true; + } break; case RepairSucceeded: last_reduction_version = reduction.slice.version; @@ -949,10 +946,9 @@ void ts_parser_set_debugger(TSParser *self, TSDebugger debugger) { TSTree *ts_parser_parse(TSParser *self, TSInput input, TSTree *previous_tree) { ts_parser__start(self, input, previous_tree); size_t max_position = 0; - ReusableNode current_reusable_node, next_reusable_node = { previous_tree, 0 }; + ReusableNode reusable_node, current_reusable_node = { previous_tree, 0 }; for (;;) { - current_reusable_node = next_reusable_node; TSTree *lookahead = NULL; size_t last_position, position = 0; @@ -960,19 +956,13 @@ TSTree *ts_parser_parse(TSParser *self, TSInput input, TSTree *previous_tree) { for (StackVersion version = 0; version < ts_stack_version_count(self->stack);) { - if (ts_stack_is_halted(self->stack, version)) { - version++; - continue; - } - - ReusableNode reusable_node = current_reusable_node; + reusable_node = current_reusable_node; for (bool removed = false; !removed;) { last_position = position; size_t new_position = ts_stack_top_position(self->stack, version).chars; if (new_position > max_position) { max_position = new_position; - next_reusable_node = reusable_node; version++; break; } else if (new_position == max_position && version > 0) { @@ -1010,7 +1000,13 @@ TSTree *ts_parser_parse(TSParser *self, TSInput input, TSTree *previous_tree) { } } - ts_stack_merge_all(self->stack); + current_reusable_node = reusable_node; + + if (ts_stack_condense(self->stack)) { + LOG_ACTION("condense"); + LOG_STACK(); + } + ts_tree_release(lookahead); if (ts_stack_version_count(self->stack) == 0) { diff --git a/src/runtime/stack.c b/src/runtime/stack.c index 7b3d8fc6..c8458dac 100644 --- a/src/runtime/stack.c +++ b/src/runtime/stack.c @@ -6,6 +6,7 @@ #include "runtime/length.h" #include #include +#include #define MAX_LINK_COUNT 8 #define MAX_NODE_POOL_SIZE 50 @@ -26,7 +27,9 @@ struct StackNode { StackLink links[MAX_LINK_COUNT]; short unsigned int link_count; short unsigned int ref_count; - size_t error_length; + unsigned min_error_cost; + unsigned max_error_cost; + unsigned error_depth; }; typedef struct { @@ -90,22 +93,35 @@ static StackNode *stack_node_new(StackNode *next, TSTree *tree, bool is_pending, else if (!(node = ts_malloc(sizeof(StackNode)))) return NULL; + bool is_error = (state == ts_parse_state_error); + *node = (StackNode){ .ref_count = 1, .link_count = 0, .links = {}, .state = state, .position = position, - .error_length = (state == ts_parse_state_error) ? 1 : 0, + .error_depth = 0, + .min_error_cost = is_error ? 1 : 0, + .max_error_cost = is_error ? 1 : 0, }; if (next) { - if (tree) - ts_tree_retain(tree); stack_node_retain(next); - node->link_count = 1; node->links[0] = (StackLink){ next, tree, is_pending }; - node->error_length += next->error_length; + + node->link_count = 1; + node->min_error_cost += next->min_error_cost; + node->max_error_cost += next->max_error_cost; + node->error_depth = next->error_depth; + + if (tree) { + ts_tree_retain(tree); + node->min_error_cost += tree->error_size; + node->max_error_cost += tree->error_size; + } else { + node->error_depth++; + } } return node; @@ -127,8 +143,16 @@ static void stack_node_add_link(StackNode *self, StackLink link) { if (self->link_count < MAX_LINK_COUNT) { stack_node_retain(link.node); - if (link.tree) + if (link.tree) { ts_tree_retain(link.tree); + size_t min_error_cost = link.tree->error_size + link.node->min_error_cost; + size_t max_error_cost = link.tree->error_size + link.node->max_error_cost; + if (min_error_cost < self->min_error_cost) + self->min_error_cost = min_error_cost; + if (max_error_cost < self->max_error_cost) + self->max_error_cost = max_error_cost; + } + self->links[self->link_count++] = (StackLink){ link.node, link.tree, link.is_pending, }; @@ -257,7 +281,7 @@ Stack *ts_stack_new() { if (!array_grow(&self->pop_paths, 4)) goto error; - if (!array_grow(&self->node_pool, 20)) + if (!array_grow(&self->node_pool, MAX_NODE_POOL_SIZE)) goto error; self->base_node = @@ -315,10 +339,6 @@ TSLength ts_stack_top_position(const Stack *self, StackVersion version) { return array_get(&self->heads, version)->node->position; } -size_t ts_stack_error_length(const Stack *self, StackVersion version) { - return array_get(&self->heads, version)->node->error_length; -} - size_t ts_stack_last_repaired_error_size(const Stack *self, StackVersion version) { StackNode *node = array_get(&self->heads, version)->node; @@ -333,14 +353,6 @@ size_t ts_stack_last_repaired_error_size(const Stack *self, return 0; } -void ts_stack_halt(Stack *self, StackVersion version) { - array_get(&self->heads, version)->is_halted = true; -} - -bool ts_stack_is_halted(const Stack *self, StackVersion version) { - return array_get(&self->heads, version)->is_halted; -} - bool ts_stack_push(Stack *self, StackVersion version, TSTree *tree, bool is_pending, TSStateId state) { StackNode *node = array_get(&self->heads, version)->node; @@ -394,7 +406,8 @@ StackPopResult ts_stack_pop_count(Stack *self, StackVersion version, ts_stack_remove_version(self, error_slice.version); ts_tree_array_delete(&error_slice.trees); array_erase(&pop.slices, 0); - pop.slices.contents[0].version--; + for (StackVersion i = 0; i < pop.slices.size; i++) + pop.slices.contents[i].version--; } else { pop.status = StackPopStoppedAtError; } @@ -470,7 +483,8 @@ bool ts_stack_merge(Stack *self, StackVersion version, StackVersion new_version) StackNode *new_node = self->heads.contents[new_version].node; if (new_node->state == node->state && - new_node->position.chars == node->position.chars) { + new_node->position.chars == node->position.chars && + new_node->error_depth == node->error_depth) { for (size_t j = 0; j < new_node->link_count; j++) stack_node_add_link(node, new_node->links[j]); ts_stack_remove_version(self, new_version); @@ -482,26 +496,8 @@ bool ts_stack_merge(Stack *self, StackVersion version, StackVersion new_version) void ts_stack_merge_from(Stack *self, StackVersion start_version) { for (size_t i = start_version; i < self->heads.size; i++) { - if (self->heads.contents[i].is_halted) { - ts_stack_remove_version(self, i); - i--; - continue; - } - - StackNode *node = self->heads.contents[i].node; for (size_t j = start_version; j < i; j++) { - StackNode *prior_node = self->heads.contents[j].node; - if (prior_node->state == node->state && - prior_node->position.chars == node->position.chars) { - if (prior_node->error_length < node->error_length) { - ts_stack_remove_version(self, i); - } else if (node->error_length < prior_node->error_length) { - ts_stack_remove_version(self, j); - } else { - for (size_t k = 0; k < node->link_count; k++) - stack_node_add_link(prior_node, node->links[k]); - ts_stack_remove_version(self, i); - } + if (ts_stack_merge(self, j, i)) { i--; break; } @@ -513,6 +509,84 @@ void ts_stack_merge_all(Stack *self) { ts_stack_merge_from(self, 0); } +void stack_node_remove_link(StackNode *self, size_t i, + StackNodeArray *node_pool) { + self->link_count--; + ts_tree_release(self->links[i].tree); + stack_node_release(self->links[i].node, node_pool); + memmove(&self->links[i], &self->links[i + 1], + (self->link_count - i) * sizeof(StackLink)); +} + +void stack_node_prune_paths_with_error_cost(StackNode *self, size_t cost, + StackNodeArray *node_pool) { + for (size_t i = 0; i < self->link_count; i++) { + StackLink link = self->links[i]; + size_t link_cost = cost; + if (link.tree) + link_cost -= link.tree->error_size; + if (link.node->min_error_cost >= link_cost) { + stack_node_remove_link(self, i, node_pool); + i--; + } else if (link.node->max_error_cost >= link_cost) { + stack_node_prune_paths_with_error_cost(link.node, link_cost, node_pool); + } + } +} + +bool ts_stack_condense(Stack *self) { + bool did_condense = false; + unsigned min_error_cost = UINT_MAX; + unsigned min_error_depth = UINT_MAX; + for (size_t i = 0; i < self->heads.size; i++) { + StackNode *node = self->heads.contents[i].node; + + bool did_remove = false; + for (size_t j = 0; j < i; j++) { + if (ts_stack_merge(self, j, i)) { + did_condense = true; + did_remove = true; + break; + } + } + + if (did_remove) { + i--; + continue; + } + + if (node->error_depth < min_error_depth || + (node->error_depth == min_error_depth && + node->min_error_cost < min_error_cost)) { + min_error_depth = node->error_depth; + min_error_cost = node->min_error_cost; + } + } + + for (size_t i = 0; i < self->heads.size; i++) { + StackNode *node = self->heads.contents[i].node; + if (node->error_depth > min_error_depth + 1) { + did_condense = true; + ts_stack_remove_version(self, i); + i--; + continue; + } else if (node->error_depth == min_error_depth + 1) { + if (node->min_error_cost >= min_error_cost) { + did_condense = true; + ts_stack_remove_version(self, i); + i--; + continue; + } else if (node->max_error_cost >= min_error_cost) { + did_condense = true; + stack_node_prune_paths_with_error_cost(node, min_error_cost, + &self->node_pool); + } + } + } + + return did_condense; +} + void ts_stack_clear(Stack *self) { stack_node_retain(self->base_node); for (size_t i = 0; i < self->heads.size; i++) @@ -571,8 +645,13 @@ bool ts_stack_print_dot_graph(Stack *self, const char **symbol_names, FILE *f) { node->links[0].tree->extra) fprintf(f, "shape=point margin=0 label=\"\""); else - fprintf(f, "label=%d", node->state); - fprintf(f, "];\n"); + fprintf(f, "label=\"%d\"", node->state); + fprintf(f, " tooltip=\"error-count:%u, error-cost:", node->error_depth); + if (node->min_error_cost == node->max_error_cost) + fprintf(f, "%u", node->min_error_cost); + else + fprintf(f, "%u-%u", node->min_error_cost, node->max_error_cost); + fprintf(f, "\"];\n"); for (int j = 0; j < node->link_count; j++) { StackLink link = node->links[j]; diff --git a/src/runtime/stack.h b/src/runtime/stack.h index a36c5dae..8b556d6e 100644 --- a/src/runtime/stack.h +++ b/src/runtime/stack.h @@ -73,14 +73,6 @@ TSStateId ts_stack_top_state(const Stack *, StackVersion); */ TSLength ts_stack_top_position(const Stack *, StackVersion); -size_t ts_stack_error_length(const Stack *, StackVersion); - -size_t ts_stack_last_repaired_error_size(const Stack *, StackVersion); - -void ts_stack_halt(Stack *, StackVersion); - -bool ts_stack_is_halted(const Stack *, StackVersion); - /* * Push a tree and state onto the given head of the stack. This could cause * the version to merge with an existing version. @@ -111,6 +103,8 @@ void ts_stack_merge_from(Stack *, StackVersion); void ts_stack_merge_all(Stack *); +bool ts_stack_condense(Stack *); + void ts_stack_renumber_version(Stack *, StackVersion, StackVersion); StackVersion ts_stack_duplicate_version(Stack *, StackVersion); diff --git a/src/runtime/tree.c b/src/runtime/tree.c index f691866d..b73f64ad 100644 --- a/src/runtime/tree.c +++ b/src/runtime/tree.c @@ -92,7 +92,7 @@ recur: offset = ts_length_zero(); for (size_t i = 0; i < self->child_count; i++) { TSTree *child = self->children[i]; - if (child->context.parent != self) { + if (child->context.parent != self || child->context.index != i) { child->context.parent = self; child->context.index = i; child->context.offset = offset;