diff --git a/src/runtime/error_costs.h b/src/runtime/error_costs.h index 5ba1fc62..f543b3ff 100644 --- a/src/runtime/error_costs.h +++ b/src/runtime/error_costs.h @@ -2,7 +2,7 @@ #define RUNTIME_ERROR_COSTS_H_ #define ERROR_STATE 0 -#define ERROR_COST_PER_MISSING_TREE 150 +#define ERROR_COST_PER_MISSING_TREE 110 #define ERROR_COST_PER_SKIPPED_TREE 100 #define ERROR_COST_PER_SKIPPED_LINE 30 #define ERROR_COST_PER_SKIPPED_CHAR 1 diff --git a/src/runtime/parser.c b/src/runtime/parser.c index a6dfa53a..76bcb4b9 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -39,7 +39,7 @@ static const unsigned MAX_COST_DIFFERENCE = 16 * ERROR_COST_PER_SKIPPED_TREE; typedef struct { unsigned cost; - unsigned push_count; + unsigned node_count; int dynamic_precedence; bool is_in_error; } ErrorStatus; @@ -105,7 +105,6 @@ static bool parser__breakdown_top_of_stack(Parser *self, StackVersion version) { ts_stack_push(self->stack, slice.version, tree, false, state); } - ts_stack_decrease_push_count(self->stack, slice.version, parent->child_count + 1); ts_tree_release(&self->tree_pool, parent); array_delete(&slice.trees); @@ -151,7 +150,7 @@ static ErrorComparison parser__compare_versions(Parser *self, ErrorStatus a, Err } if (a.cost < b.cost) { - if ((b.cost - a.cost) * (1 + a.push_count) > MAX_COST_DIFFERENCE) { + if ((b.cost - a.cost) * (1 + a.node_count) > MAX_COST_DIFFERENCE) { return ErrorComparisonTakeLeft; } else { return ErrorComparisonPreferLeft; @@ -159,7 +158,7 @@ static ErrorComparison parser__compare_versions(Parser *self, ErrorStatus a, Err } if (b.cost < a.cost) { - if ((a.cost - b.cost) * (1 + b.push_count) > MAX_COST_DIFFERENCE) { + if ((a.cost - b.cost) * (1 + b.node_count) > MAX_COST_DIFFERENCE) { return ErrorComparisonTakeRight; } else { return ErrorComparisonPreferRight; @@ -177,7 +176,7 @@ static ErrorStatus parser__version_status(Parser *self, StackVersion version) { if (is_paused) cost += ERROR_COST_PER_SKIPPED_TREE; return (ErrorStatus) { .cost = cost, - .push_count = ts_stack_push_count(self->stack, version), + .node_count = ts_stack_node_count_since_error(self->stack, version), .dynamic_precedence = ts_stack_dynamic_precedence(self->stack, version), .is_in_error = is_paused || ts_stack_state(self->stack, version) == ERROR_STATE }; @@ -192,7 +191,7 @@ static bool parser__better_version_exists(Parser *self, StackVersion version, .cost = cost, .is_in_error = is_in_error, .dynamic_precedence = ts_stack_dynamic_precedence(self->stack, version), - .push_count = 0, + .node_count = ts_stack_node_count_since_error(self->stack, version), }; for (StackVersion i = 0, n = ts_stack_version_count(self->stack); i < n; i++) { @@ -933,7 +932,7 @@ static void parser__recover(Parser *self, StackVersion version, Tree *lookahead) unsigned previous_version_count = ts_stack_version_count(self->stack); Length position = ts_stack_position(self->stack, version); StackSummary *summary = ts_stack_get_summary(self->stack, version); - unsigned depth_since_error = ts_stack_depth_since_error(self->stack, version); + unsigned depth_since_error = ts_stack_node_count_since_error(self->stack, version); for (unsigned i = 0; i < summary->size; i++) { StackSummaryEntry entry = summary->contents[i]; @@ -1150,6 +1149,7 @@ static unsigned parser__condense_stack(Parser *self) { TSSymbol lookahead_symbol = ts_stack_resume(self->stack, i); parser__handle_error(self, i, lookahead_symbol); has_unpaused_version = true; + min_error_cost = ts_stack_error_cost(self->stack, i); } else { ts_stack_remove_version(self->stack, i); i--; diff --git a/src/runtime/stack.c b/src/runtime/stack.c index 60ed6575..f993f59f 100644 --- a/src/runtime/stack.c +++ b/src/runtime/stack.c @@ -31,7 +31,7 @@ struct StackNode { short unsigned int link_count; uint32_t ref_count; unsigned error_cost; - unsigned depth; + unsigned node_count; int dynamic_precedence; }; @@ -59,7 +59,7 @@ typedef struct { StackNode *node; Tree *last_external_token; StackSummary *summary; - uint32_t push_count; + unsigned node_count_at_last_error; TSSymbol lookahead_when_paused; StackStatus status; } StackHead; @@ -123,7 +123,7 @@ static StackNode *stack_node_new(StackNode *previous_node, Tree *tree, bool is_p StackNode *node = pool->size > 0 ? array_pop(pool) : ts_malloc(sizeof(StackNode)); - *node = (StackNode){.ref_count = 1, .link_count = 0, .state = state, .depth = 0}; + *node = (StackNode){.ref_count = 1, .link_count = 0, .state = state}; if (previous_node) { node->link_count = 1; @@ -136,22 +136,25 @@ static StackNode *stack_node_new(StackNode *previous_node, Tree *tree, bool is_p node->position = previous_node->position; node->error_cost = previous_node->error_cost; node->dynamic_precedence = previous_node->dynamic_precedence; + node->node_count = previous_node->node_count; if (tree) { - node->depth = previous_node->depth; - if (!tree->extra) node->depth++; node->error_cost += tree->error_cost; node->position = length_add(node->position, ts_tree_total_size(tree)); node->dynamic_precedence += tree->dynamic_precedence; - if (state == ERROR_STATE && !tree->extra) { - node->error_cost += - ERROR_COST_PER_SKIPPED_TREE * ((tree->visible || tree->child_count == 0) ? 1 : tree->visible_child_count) + - ERROR_COST_PER_SKIPPED_CHAR * tree->size.bytes + - ERROR_COST_PER_SKIPPED_LINE * tree->size.extent.row; - if (previous_node->links[0].tree) { + if (!tree->extra) { + node->node_count += tree->node_count; + + if (state == ERROR_STATE) { node->error_cost += - ERROR_COST_PER_SKIPPED_CHAR * tree->padding.bytes + - ERROR_COST_PER_SKIPPED_LINE * tree->padding.extent.row; + ERROR_COST_PER_SKIPPED_TREE * ((tree->visible || tree->child_count == 0) ? 1 : tree->visible_child_count) + + ERROR_COST_PER_SKIPPED_CHAR * tree->size.bytes + + ERROR_COST_PER_SKIPPED_LINE * tree->size.extent.row; + if (previous_node->links[0].tree) { + node->error_cost += + ERROR_COST_PER_SKIPPED_CHAR * tree->padding.bytes + + ERROR_COST_PER_SKIPPED_LINE * tree->padding.extent.row; + } } } } @@ -198,6 +201,10 @@ static void stack_node_add_link(StackNode *self, StackLink link) { stack_node_retain(link.node); if (link.tree) ts_tree_retain(link.tree); self->links[self->link_count++] = link; + + unsigned node_count = link.node->node_count; + if (link.tree) node_count += link.tree->node_count; + if (node_count > self->node_count) self->node_count = node_count; } } @@ -215,22 +222,22 @@ static void stack_head_delete(StackHead *self, StackNodeArray *pool, TreePool *t } static StackVersion ts_stack__add_version(Stack *self, StackVersion original_version, - StackNode *node, Tree *last_external_token) { + StackNode *node) { StackHead head = { .node = node, - .push_count = self->heads.contents[original_version].push_count, - .last_external_token = last_external_token, + .node_count_at_last_error = self->heads.contents[original_version].node_count_at_last_error, + .last_external_token = self->heads.contents[original_version].last_external_token, .status = StackStatusActive, .lookahead_when_paused = 0, }; array_push(&self->heads, head); stack_node_retain(node); - if (last_external_token) ts_tree_retain(last_external_token); + if (head.last_external_token) ts_tree_retain(head.last_external_token); return (StackVersion)(self->heads.size - 1); } -static void ts_stack__add_slice(Stack *self, StackVersion original_version, StackNode *node, - TreeArray *trees, Tree *last_external_token) { +static void ts_stack__add_slice(Stack *self, StackVersion original_version, + StackNode *node, TreeArray *trees) { for (uint32_t i = self->slices.size - 1; i + 1 > 0; i--) { StackVersion version = self->slices.contents[i].version; if (self->heads.contents[version].node == node) { @@ -240,7 +247,7 @@ static void ts_stack__add_slice(Stack *self, StackVersion original_version, Stac } } - StackVersion version = ts_stack__add_version(self, original_version, node, last_external_token); + StackVersion version = ts_stack__add_version(self, original_version, node); StackSlice slice = { *trees, version }; array_push(&self->slices, slice); } @@ -252,7 +259,6 @@ inline StackSliceArray stack__iter(Stack *self, StackVersion version, array_clear(&self->iterators); StackHead *head = array_get(&self->heads, version); - Tree *last_external_token = head->last_external_token; Iterator iterator = { .node = head->node, .trees = array_new(), @@ -279,8 +285,7 @@ inline StackSliceArray stack__iter(Stack *self, StackVersion version, self, version, node, - &trees, - last_external_token + &trees ); } @@ -381,14 +386,6 @@ Length ts_stack_position(const Stack *self, StackVersion version) { return array_get(&self->heads, version)->node->position; } -unsigned ts_stack_push_count(const Stack *self, StackVersion version) { - return array_get(&self->heads, version)->push_count; -} - -void ts_stack_decrease_push_count(Stack *self, StackVersion version, unsigned decrement) { - array_get(&self->heads, version)->push_count -= decrement; -} - Tree *ts_stack_last_external_token(const Stack *self, StackVersion version) { return array_get(&self->heads, version)->last_external_token; } @@ -405,14 +402,15 @@ unsigned ts_stack_error_cost(const Stack *self, StackVersion version) { return head->node->error_cost; } +unsigned ts_stack_node_count_since_error(const Stack *self, StackVersion version) { + StackHead *head = array_get(&self->heads, version); + return head->node->node_count - head->node_count_at_last_error; +} + void ts_stack_push(Stack *self, StackVersion version, Tree *tree, bool pending, TSStateId state) { StackHead *head = array_get(&self->heads, version); StackNode *new_node = stack_node_new(head->node, tree, pending, state, &self->node_pool); - if (state == ERROR_STATE) { - head->push_count = 0; - } else if (!tree->extra) { - head->push_count++; - } + if (!tree) head->node_count_at_last_error = new_node->node_count; head->node = new_node; } @@ -536,10 +534,6 @@ StackSummary *ts_stack_get_summary(Stack *self, StackVersion version) { return array_get(&self->heads, version)->summary; } -unsigned ts_stack_depth_since_error(Stack *self, StackVersion version) { - return array_get(&self->heads, version)->node->depth; -} - int ts_stack_dynamic_precedence(Stack *self, StackVersion version) { return array_get(&self->heads, version)->node->dynamic_precedence; } @@ -590,7 +584,6 @@ bool ts_stack_can_merge(Stack *self, StackVersion version1, StackVersion version head2->status == StackStatusActive && head1->node->state == head2->node->state && head1->node->position.bytes == head2->node->position.bytes && - head1->node->depth == head2->node->depth && ts_tree_external_token_state_eq(head1->last_external_token, head2->last_external_token); } @@ -600,6 +593,9 @@ void ts_stack_force_merge(Stack *self, StackVersion version1, StackVersion versi for (uint32_t i = 0; i < head2->node->link_count; i++) { stack_node_add_link(head1->node, head2->node->links[i]); } + if (head2->node_count_at_last_error > head1->node_count_at_last_error) { + head1->node_count_at_last_error = head2->node_count_at_last_error; + } ts_stack_remove_version(self, version2); } @@ -611,6 +607,7 @@ void ts_stack_pause(Stack *self, StackVersion version, TSSymbol lookahead) { StackHead *head = array_get(&self->heads, version); head->status = StackStatusPaused; head->lookahead_when_paused = lookahead; + head->node_count_at_last_error = head->node->node_count; } bool ts_stack_is_active(const Stack *self, StackVersion version) { @@ -671,8 +668,8 @@ bool ts_stack_print_dot_graph(Stack *self, const char **symbol_names, FILE *f) { fprintf(f, "color=red "); } fprintf(f, - "label=%u, fontcolor=blue, weight=10000, labeltooltip=\"push_count: %u\ndepth: %u", - i, head->push_count, head->node->depth + "label=%u, fontcolor=blue, weight=10000, labeltooltip=\"node_count: %u", + i, head->node->node_count - head->node_count_at_last_error ); if (head->last_external_token) { diff --git a/src/runtime/stack.h b/src/runtime/stack.h index 0be0a4dd..4704d90e 100644 --- a/src/runtime/stack.h +++ b/src/runtime/stack.h @@ -41,15 +41,6 @@ uint32_t ts_stack_version_count(const Stack *); // empty, this returns the initial state, 0. TSStateId ts_stack_state(const Stack *, StackVersion); -// Get the number of trees that have been pushed to a given version of -// the stack. -unsigned ts_stack_push_count(const Stack *, StackVersion); - -// In the event that trees were permanently removed from some version -// of the stack, decrease the version's push count to account for the -// removal. -void ts_stack_decrease_push_count(Stack *, StackVersion, unsigned); - // Get the last external token associated with a given version of the stack. Tree *ts_stack_last_external_token(const Stack *, StackVersion); @@ -82,7 +73,8 @@ StackSliceArray ts_stack_pop_pending(Stack *, StackVersion); // Remove any all trees from the given version of the stack. StackSliceArray ts_stack_pop_all(Stack *, StackVersion); -unsigned ts_stack_depth_since_error(Stack *, StackVersion); +// Get the number of tree nodes on the stack since the most recent error. +unsigned ts_stack_node_count_since_error(const Stack *, StackVersion); int ts_stack_dynamic_precedence(Stack *, StackVersion); diff --git a/src/runtime/tree.c b/src/runtime/tree.c index ae35e1f8..c58c987a 100644 --- a/src/runtime/tree.c +++ b/src/runtime/tree.c @@ -177,6 +177,7 @@ Tree *ts_tree_make_leaf(TreePool *pool, TSSymbol symbol, Length padding, Length .padding = padding, .visible = metadata.visible, .named = metadata.named, + .node_count = 1, .has_changes = false, .first_leaf = { .symbol = symbol, @@ -305,6 +306,7 @@ void ts_tree_set_children(Tree *self, uint32_t child_count, Tree **children, self->visible_child_count = 0; self->error_cost = 0; self->repeat_depth = 0; + self->node_count = 1; self->has_external_tokens = false; self->dynamic_precedence = 0; @@ -326,6 +328,7 @@ void ts_tree_set_children(Tree *self, uint32_t child_count, Tree **children, self->error_cost += child->error_cost; self->dynamic_precedence += child->dynamic_precedence; + self->node_count += child->node_count; if (alias_sequence && alias_sequence[non_extra_index] != 0 && !child->extra) { self->visible_child_count++; diff --git a/src/runtime/tree.h b/src/runtime/tree.h index 0e3c2880..ad104894 100644 --- a/src/runtime/tree.h +++ b/src/runtime/tree.h @@ -50,6 +50,7 @@ typedef struct Tree { TSSymbol symbol; TSStateId parse_state; unsigned error_cost; + unsigned node_count; unsigned repeat_depth; struct { diff --git a/test/fixtures/error_corpus/javascript_errors.txt b/test/fixtures/error_corpus/javascript_errors.txt index 250f13b6..d435ba86 100644 --- a/test/fixtures/error_corpus/javascript_errors.txt +++ b/test/fixtures/error_corpus/javascript_errors.txt @@ -79,12 +79,13 @@ if ({a: 'b'} {c: 'd'}) { (statement_block (expression_statement (assignment_expression (identifier) - (ERROR (function - (formal_parameters (identifier)) - (statement_block (expression_statement (identifier))))) - (function - (formal_parameters (identifier)) - (statement_block (expression_statement (identifier))))))))) + (call_expression + (function (formal_parameters (identifier)) (statement_block (expression_statement (identifier)))) + (ERROR) + (arguments (identifier)))) + (MISSING)) + (statement_block + (expression_statement (identifier)))))) =================================================== Extra tokens at the end of the file diff --git a/test/integration/real_grammars.cc b/test/integration/real_grammars.cc index 6d5ac27e..6cee7870 100644 --- a/test/integration/real_grammars.cc +++ b/test/integration/real_grammars.cc @@ -32,6 +32,8 @@ vector test_languages({ for (auto &language_name : test_languages) { describe(("the " + language_name + " language").c_str(), [&]() { TSDocument *document; + const bool debug_graphs_enabled = getenv("TREE_SITTER_ENABLE_DEBUG_GRAPHS"); + before_each([&]() { record_alloc::start(); @@ -39,7 +41,7 @@ for (auto &language_name : test_languages) { ts_document_set_language(document, load_real_language(language_name)); // ts_document_set_logger(document, stderr_logger_new(true)); - if (getenv("TREE_SITTER_ENABLE_DEBUG_GRAPHS")) { + if (debug_graphs_enabled) { ts_document_print_debugging_graphs(document, true); } }); @@ -55,6 +57,7 @@ for (auto &language_name : test_languages) { auto it_handles_edit_sequence = [&](string name, std::function edit_sequence){ it(("parses " + entry.description + ": " + name).c_str(), [&]() { input = new SpyInput(entry.input, 3); + if (debug_graphs_enabled) printf("%s\n\n", input->content.c_str()); ts_document_set_input(document, input->input()); edit_sequence(); @@ -88,9 +91,11 @@ for (auto &language_name : test_languages) { ts_document_edit(document, input->replace(edit_position, 0, inserted_text)); ts_document_parse(document); assert_correct_tree_size(document, input->content); + if (debug_graphs_enabled) printf("%s\n\n", input->content.c_str()); ts_document_edit(document, input->undo()); assert_correct_tree_size(document, input->content); + if (debug_graphs_enabled) printf("%s\n\n", input->content.c_str()); TSRange *ranges; uint32_t range_count; @@ -112,9 +117,11 @@ for (auto &language_name : test_languages) { ts_document_edit(document, input->replace(edit_position, deletion_size, "")); ts_document_parse(document); assert_correct_tree_size(document, input->content); + if (debug_graphs_enabled) printf("%s\n\n", input->content.c_str()); ts_document_edit(document, input->undo()); assert_correct_tree_size(document, input->content); + if (debug_graphs_enabled) printf("%s\n\n", input->content.c_str()); TSRange *ranges; uint32_t range_count;