From 80f856cef56f5c0a350625a0483211b8559a8892 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 2 Apr 2018 10:57:44 -0700 Subject: [PATCH] Maintain a total node count on every tree This simplifies (and fixes bugs in) the parse stack's tracking of its total node count since the last error, which is needed for error recovery. --- src/runtime/error_costs.h | 2 +- src/runtime/parser.c | 14 ++-- src/runtime/stack.c | 83 +++++++++---------- src/runtime/stack.h | 12 +-- src/runtime/tree.c | 3 + src/runtime/tree.h | 1 + .../error_corpus/javascript_errors.txt | 13 +-- test/integration/real_grammars.cc | 9 +- 8 files changed, 69 insertions(+), 68 deletions(-) diff --git a/src/runtime/error_costs.h b/src/runtime/error_costs.h index 5ba1fc62..f543b3ff 100644 --- a/src/runtime/error_costs.h +++ b/src/runtime/error_costs.h @@ -2,7 +2,7 @@ #define RUNTIME_ERROR_COSTS_H_ #define ERROR_STATE 0 -#define ERROR_COST_PER_MISSING_TREE 150 +#define ERROR_COST_PER_MISSING_TREE 110 #define ERROR_COST_PER_SKIPPED_TREE 100 #define ERROR_COST_PER_SKIPPED_LINE 30 #define ERROR_COST_PER_SKIPPED_CHAR 1 diff --git a/src/runtime/parser.c b/src/runtime/parser.c index a6dfa53a..76bcb4b9 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -39,7 +39,7 @@ static const unsigned MAX_COST_DIFFERENCE = 16 * ERROR_COST_PER_SKIPPED_TREE; typedef struct { unsigned cost; - unsigned push_count; + unsigned node_count; int dynamic_precedence; bool is_in_error; } ErrorStatus; @@ -105,7 +105,6 @@ static bool parser__breakdown_top_of_stack(Parser *self, StackVersion version) { ts_stack_push(self->stack, slice.version, tree, false, state); } - ts_stack_decrease_push_count(self->stack, slice.version, parent->child_count + 1); ts_tree_release(&self->tree_pool, parent); array_delete(&slice.trees); @@ -151,7 +150,7 @@ static ErrorComparison parser__compare_versions(Parser *self, ErrorStatus a, Err } if (a.cost < b.cost) { - if ((b.cost - a.cost) * (1 + a.push_count) > MAX_COST_DIFFERENCE) { + if ((b.cost - a.cost) * (1 + a.node_count) > MAX_COST_DIFFERENCE) { return ErrorComparisonTakeLeft; } else { return ErrorComparisonPreferLeft; @@ -159,7 +158,7 @@ static ErrorComparison parser__compare_versions(Parser *self, ErrorStatus a, Err } if (b.cost < a.cost) { - if ((a.cost - b.cost) * (1 + b.push_count) > MAX_COST_DIFFERENCE) { + if ((a.cost - b.cost) * (1 + b.node_count) > MAX_COST_DIFFERENCE) { return ErrorComparisonTakeRight; } else { return ErrorComparisonPreferRight; @@ -177,7 +176,7 @@ static ErrorStatus parser__version_status(Parser *self, StackVersion version) { if (is_paused) cost += ERROR_COST_PER_SKIPPED_TREE; return (ErrorStatus) { .cost = cost, - .push_count = ts_stack_push_count(self->stack, version), + .node_count = ts_stack_node_count_since_error(self->stack, version), .dynamic_precedence = ts_stack_dynamic_precedence(self->stack, version), .is_in_error = is_paused || ts_stack_state(self->stack, version) == ERROR_STATE }; @@ -192,7 +191,7 @@ static bool parser__better_version_exists(Parser *self, StackVersion version, .cost = cost, .is_in_error = is_in_error, .dynamic_precedence = ts_stack_dynamic_precedence(self->stack, version), - .push_count = 0, + .node_count = ts_stack_node_count_since_error(self->stack, version), }; for (StackVersion i = 0, n = ts_stack_version_count(self->stack); i < n; i++) { @@ -933,7 +932,7 @@ static void parser__recover(Parser *self, StackVersion version, Tree *lookahead) unsigned previous_version_count = ts_stack_version_count(self->stack); Length position = ts_stack_position(self->stack, version); StackSummary *summary = ts_stack_get_summary(self->stack, version); - unsigned depth_since_error = ts_stack_depth_since_error(self->stack, version); + unsigned depth_since_error = ts_stack_node_count_since_error(self->stack, version); for (unsigned i = 0; i < summary->size; i++) { StackSummaryEntry entry = summary->contents[i]; @@ -1150,6 +1149,7 @@ static unsigned parser__condense_stack(Parser *self) { TSSymbol lookahead_symbol = ts_stack_resume(self->stack, i); parser__handle_error(self, i, lookahead_symbol); has_unpaused_version = true; + min_error_cost = ts_stack_error_cost(self->stack, i); } else { ts_stack_remove_version(self->stack, i); i--; diff --git a/src/runtime/stack.c b/src/runtime/stack.c index 60ed6575..f993f59f 100644 --- a/src/runtime/stack.c +++ b/src/runtime/stack.c @@ -31,7 +31,7 @@ struct StackNode { short unsigned int link_count; uint32_t ref_count; unsigned error_cost; - unsigned depth; + unsigned node_count; int dynamic_precedence; }; @@ -59,7 +59,7 @@ typedef struct { StackNode *node; Tree *last_external_token; StackSummary *summary; - uint32_t push_count; + unsigned node_count_at_last_error; TSSymbol lookahead_when_paused; StackStatus status; } StackHead; @@ -123,7 +123,7 @@ static StackNode *stack_node_new(StackNode *previous_node, Tree *tree, bool is_p StackNode *node = pool->size > 0 ? array_pop(pool) : ts_malloc(sizeof(StackNode)); - *node = (StackNode){.ref_count = 1, .link_count = 0, .state = state, .depth = 0}; + *node = (StackNode){.ref_count = 1, .link_count = 0, .state = state}; if (previous_node) { node->link_count = 1; @@ -136,22 +136,25 @@ static StackNode *stack_node_new(StackNode *previous_node, Tree *tree, bool is_p node->position = previous_node->position; node->error_cost = previous_node->error_cost; node->dynamic_precedence = previous_node->dynamic_precedence; + node->node_count = previous_node->node_count; if (tree) { - node->depth = previous_node->depth; - if (!tree->extra) node->depth++; node->error_cost += tree->error_cost; node->position = length_add(node->position, ts_tree_total_size(tree)); node->dynamic_precedence += tree->dynamic_precedence; - if (state == ERROR_STATE && !tree->extra) { - node->error_cost += - ERROR_COST_PER_SKIPPED_TREE * ((tree->visible || tree->child_count == 0) ? 1 : tree->visible_child_count) + - ERROR_COST_PER_SKIPPED_CHAR * tree->size.bytes + - ERROR_COST_PER_SKIPPED_LINE * tree->size.extent.row; - if (previous_node->links[0].tree) { + if (!tree->extra) { + node->node_count += tree->node_count; + + if (state == ERROR_STATE) { node->error_cost += - ERROR_COST_PER_SKIPPED_CHAR * tree->padding.bytes + - ERROR_COST_PER_SKIPPED_LINE * tree->padding.extent.row; + ERROR_COST_PER_SKIPPED_TREE * ((tree->visible || tree->child_count == 0) ? 1 : tree->visible_child_count) + + ERROR_COST_PER_SKIPPED_CHAR * tree->size.bytes + + ERROR_COST_PER_SKIPPED_LINE * tree->size.extent.row; + if (previous_node->links[0].tree) { + node->error_cost += + ERROR_COST_PER_SKIPPED_CHAR * tree->padding.bytes + + ERROR_COST_PER_SKIPPED_LINE * tree->padding.extent.row; + } } } } @@ -198,6 +201,10 @@ static void stack_node_add_link(StackNode *self, StackLink link) { stack_node_retain(link.node); if (link.tree) ts_tree_retain(link.tree); self->links[self->link_count++] = link; + + unsigned node_count = link.node->node_count; + if (link.tree) node_count += link.tree->node_count; + if (node_count > self->node_count) self->node_count = node_count; } } @@ -215,22 +222,22 @@ static void stack_head_delete(StackHead *self, StackNodeArray *pool, TreePool *t } static StackVersion ts_stack__add_version(Stack *self, StackVersion original_version, - StackNode *node, Tree *last_external_token) { + StackNode *node) { StackHead head = { .node = node, - .push_count = self->heads.contents[original_version].push_count, - .last_external_token = last_external_token, + .node_count_at_last_error = self->heads.contents[original_version].node_count_at_last_error, + .last_external_token = self->heads.contents[original_version].last_external_token, .status = StackStatusActive, .lookahead_when_paused = 0, }; array_push(&self->heads, head); stack_node_retain(node); - if (last_external_token) ts_tree_retain(last_external_token); + if (head.last_external_token) ts_tree_retain(head.last_external_token); return (StackVersion)(self->heads.size - 1); } -static void ts_stack__add_slice(Stack *self, StackVersion original_version, StackNode *node, - TreeArray *trees, Tree *last_external_token) { +static void ts_stack__add_slice(Stack *self, StackVersion original_version, + StackNode *node, TreeArray *trees) { for (uint32_t i = self->slices.size - 1; i + 1 > 0; i--) { StackVersion version = self->slices.contents[i].version; if (self->heads.contents[version].node == node) { @@ -240,7 +247,7 @@ static void ts_stack__add_slice(Stack *self, StackVersion original_version, Stac } } - StackVersion version = ts_stack__add_version(self, original_version, node, last_external_token); + StackVersion version = ts_stack__add_version(self, original_version, node); StackSlice slice = { *trees, version }; array_push(&self->slices, slice); } @@ -252,7 +259,6 @@ inline StackSliceArray stack__iter(Stack *self, StackVersion version, array_clear(&self->iterators); StackHead *head = array_get(&self->heads, version); - Tree *last_external_token = head->last_external_token; Iterator iterator = { .node = head->node, .trees = array_new(), @@ -279,8 +285,7 @@ inline StackSliceArray stack__iter(Stack *self, StackVersion version, self, version, node, - &trees, - last_external_token + &trees ); } @@ -381,14 +386,6 @@ Length ts_stack_position(const Stack *self, StackVersion version) { return array_get(&self->heads, version)->node->position; } -unsigned ts_stack_push_count(const Stack *self, StackVersion version) { - return array_get(&self->heads, version)->push_count; -} - -void ts_stack_decrease_push_count(Stack *self, StackVersion version, unsigned decrement) { - array_get(&self->heads, version)->push_count -= decrement; -} - Tree *ts_stack_last_external_token(const Stack *self, StackVersion version) { return array_get(&self->heads, version)->last_external_token; } @@ -405,14 +402,15 @@ unsigned ts_stack_error_cost(const Stack *self, StackVersion version) { return head->node->error_cost; } +unsigned ts_stack_node_count_since_error(const Stack *self, StackVersion version) { + StackHead *head = array_get(&self->heads, version); + return head->node->node_count - head->node_count_at_last_error; +} + void ts_stack_push(Stack *self, StackVersion version, Tree *tree, bool pending, TSStateId state) { StackHead *head = array_get(&self->heads, version); StackNode *new_node = stack_node_new(head->node, tree, pending, state, &self->node_pool); - if (state == ERROR_STATE) { - head->push_count = 0; - } else if (!tree->extra) { - head->push_count++; - } + if (!tree) head->node_count_at_last_error = new_node->node_count; head->node = new_node; } @@ -536,10 +534,6 @@ StackSummary *ts_stack_get_summary(Stack *self, StackVersion version) { return array_get(&self->heads, version)->summary; } -unsigned ts_stack_depth_since_error(Stack *self, StackVersion version) { - return array_get(&self->heads, version)->node->depth; -} - int ts_stack_dynamic_precedence(Stack *self, StackVersion version) { return array_get(&self->heads, version)->node->dynamic_precedence; } @@ -590,7 +584,6 @@ bool ts_stack_can_merge(Stack *self, StackVersion version1, StackVersion version head2->status == StackStatusActive && head1->node->state == head2->node->state && head1->node->position.bytes == head2->node->position.bytes && - head1->node->depth == head2->node->depth && ts_tree_external_token_state_eq(head1->last_external_token, head2->last_external_token); } @@ -600,6 +593,9 @@ void ts_stack_force_merge(Stack *self, StackVersion version1, StackVersion versi for (uint32_t i = 0; i < head2->node->link_count; i++) { stack_node_add_link(head1->node, head2->node->links[i]); } + if (head2->node_count_at_last_error > head1->node_count_at_last_error) { + head1->node_count_at_last_error = head2->node_count_at_last_error; + } ts_stack_remove_version(self, version2); } @@ -611,6 +607,7 @@ void ts_stack_pause(Stack *self, StackVersion version, TSSymbol lookahead) { StackHead *head = array_get(&self->heads, version); head->status = StackStatusPaused; head->lookahead_when_paused = lookahead; + head->node_count_at_last_error = head->node->node_count; } bool ts_stack_is_active(const Stack *self, StackVersion version) { @@ -671,8 +668,8 @@ bool ts_stack_print_dot_graph(Stack *self, const char **symbol_names, FILE *f) { fprintf(f, "color=red "); } fprintf(f, - "label=%u, fontcolor=blue, weight=10000, labeltooltip=\"push_count: %u\ndepth: %u", - i, head->push_count, head->node->depth + "label=%u, fontcolor=blue, weight=10000, labeltooltip=\"node_count: %u", + i, head->node->node_count - head->node_count_at_last_error ); if (head->last_external_token) { diff --git a/src/runtime/stack.h b/src/runtime/stack.h index 0be0a4dd..4704d90e 100644 --- a/src/runtime/stack.h +++ b/src/runtime/stack.h @@ -41,15 +41,6 @@ uint32_t ts_stack_version_count(const Stack *); // empty, this returns the initial state, 0. TSStateId ts_stack_state(const Stack *, StackVersion); -// Get the number of trees that have been pushed to a given version of -// the stack. -unsigned ts_stack_push_count(const Stack *, StackVersion); - -// In the event that trees were permanently removed from some version -// of the stack, decrease the version's push count to account for the -// removal. -void ts_stack_decrease_push_count(Stack *, StackVersion, unsigned); - // Get the last external token associated with a given version of the stack. Tree *ts_stack_last_external_token(const Stack *, StackVersion); @@ -82,7 +73,8 @@ StackSliceArray ts_stack_pop_pending(Stack *, StackVersion); // Remove any all trees from the given version of the stack. StackSliceArray ts_stack_pop_all(Stack *, StackVersion); -unsigned ts_stack_depth_since_error(Stack *, StackVersion); +// Get the number of tree nodes on the stack since the most recent error. +unsigned ts_stack_node_count_since_error(const Stack *, StackVersion); int ts_stack_dynamic_precedence(Stack *, StackVersion); diff --git a/src/runtime/tree.c b/src/runtime/tree.c index ae35e1f8..c58c987a 100644 --- a/src/runtime/tree.c +++ b/src/runtime/tree.c @@ -177,6 +177,7 @@ Tree *ts_tree_make_leaf(TreePool *pool, TSSymbol symbol, Length padding, Length .padding = padding, .visible = metadata.visible, .named = metadata.named, + .node_count = 1, .has_changes = false, .first_leaf = { .symbol = symbol, @@ -305,6 +306,7 @@ void ts_tree_set_children(Tree *self, uint32_t child_count, Tree **children, self->visible_child_count = 0; self->error_cost = 0; self->repeat_depth = 0; + self->node_count = 1; self->has_external_tokens = false; self->dynamic_precedence = 0; @@ -326,6 +328,7 @@ void ts_tree_set_children(Tree *self, uint32_t child_count, Tree **children, self->error_cost += child->error_cost; self->dynamic_precedence += child->dynamic_precedence; + self->node_count += child->node_count; if (alias_sequence && alias_sequence[non_extra_index] != 0 && !child->extra) { self->visible_child_count++; diff --git a/src/runtime/tree.h b/src/runtime/tree.h index 0e3c2880..ad104894 100644 --- a/src/runtime/tree.h +++ b/src/runtime/tree.h @@ -50,6 +50,7 @@ typedef struct Tree { TSSymbol symbol; TSStateId parse_state; unsigned error_cost; + unsigned node_count; unsigned repeat_depth; struct { diff --git a/test/fixtures/error_corpus/javascript_errors.txt b/test/fixtures/error_corpus/javascript_errors.txt index 250f13b6..d435ba86 100644 --- a/test/fixtures/error_corpus/javascript_errors.txt +++ b/test/fixtures/error_corpus/javascript_errors.txt @@ -79,12 +79,13 @@ if ({a: 'b'} {c: 'd'}) { (statement_block (expression_statement (assignment_expression (identifier) - (ERROR (function - (formal_parameters (identifier)) - (statement_block (expression_statement (identifier))))) - (function - (formal_parameters (identifier)) - (statement_block (expression_statement (identifier))))))))) + (call_expression + (function (formal_parameters (identifier)) (statement_block (expression_statement (identifier)))) + (ERROR) + (arguments (identifier)))) + (MISSING)) + (statement_block + (expression_statement (identifier)))))) =================================================== Extra tokens at the end of the file diff --git a/test/integration/real_grammars.cc b/test/integration/real_grammars.cc index 6d5ac27e..6cee7870 100644 --- a/test/integration/real_grammars.cc +++ b/test/integration/real_grammars.cc @@ -32,6 +32,8 @@ vector test_languages({ for (auto &language_name : test_languages) { describe(("the " + language_name + " language").c_str(), [&]() { TSDocument *document; + const bool debug_graphs_enabled = getenv("TREE_SITTER_ENABLE_DEBUG_GRAPHS"); + before_each([&]() { record_alloc::start(); @@ -39,7 +41,7 @@ for (auto &language_name : test_languages) { ts_document_set_language(document, load_real_language(language_name)); // ts_document_set_logger(document, stderr_logger_new(true)); - if (getenv("TREE_SITTER_ENABLE_DEBUG_GRAPHS")) { + if (debug_graphs_enabled) { ts_document_print_debugging_graphs(document, true); } }); @@ -55,6 +57,7 @@ for (auto &language_name : test_languages) { auto it_handles_edit_sequence = [&](string name, std::function edit_sequence){ it(("parses " + entry.description + ": " + name).c_str(), [&]() { input = new SpyInput(entry.input, 3); + if (debug_graphs_enabled) printf("%s\n\n", input->content.c_str()); ts_document_set_input(document, input->input()); edit_sequence(); @@ -88,9 +91,11 @@ for (auto &language_name : test_languages) { ts_document_edit(document, input->replace(edit_position, 0, inserted_text)); ts_document_parse(document); assert_correct_tree_size(document, input->content); + if (debug_graphs_enabled) printf("%s\n\n", input->content.c_str()); ts_document_edit(document, input->undo()); assert_correct_tree_size(document, input->content); + if (debug_graphs_enabled) printf("%s\n\n", input->content.c_str()); TSRange *ranges; uint32_t range_count; @@ -112,9 +117,11 @@ for (auto &language_name : test_languages) { ts_document_edit(document, input->replace(edit_position, deletion_size, "")); ts_document_parse(document); assert_correct_tree_size(document, input->content); + if (debug_graphs_enabled) printf("%s\n\n", input->content.c_str()); ts_document_edit(document, input->undo()); assert_correct_tree_size(document, input->content); + if (debug_graphs_enabled) printf("%s\n\n", input->content.c_str()); TSRange *ranges; uint32_t range_count;