From e59558c83b24ce07f142506add1763413d1fcecc Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 2 Apr 2018 09:47:01 -0700 Subject: [PATCH 01/16] Allow stack versions to be temporarily paused This way, when detecting an error, we can defer the decision about whether to bail or recover until all stack versions are processed. --- src/runtime/parser.c | 257 ++++++++++++------------ src/runtime/stack.c | 64 ++++-- src/runtime/stack.h | 10 +- test/fixtures/error_corpus/c_errors.txt | 21 ++ 4 files changed, 206 insertions(+), 146 deletions(-) diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 34b4506b..a6dfa53a 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -171,6 +171,18 @@ static ErrorComparison parser__compare_versions(Parser *self, ErrorStatus a, Err return ErrorComparisonNone; } +static ErrorStatus parser__version_status(Parser *self, StackVersion version) { + unsigned cost = ts_stack_error_cost(self->stack, version); + bool is_paused = ts_stack_is_paused(self->stack, version); + if (is_paused) cost += ERROR_COST_PER_SKIPPED_TREE; + return (ErrorStatus) { + .cost = cost, + .push_count = ts_stack_push_count(self->stack, version), + .dynamic_precedence = ts_stack_dynamic_precedence(self->stack, version), + .is_in_error = is_paused || ts_stack_state(self->stack, version) == ERROR_STATE + }; +} + static bool parser__better_version_exists(Parser *self, StackVersion version, bool is_in_error, unsigned cost) { if (self->finished_tree && self->finished_tree->error_cost <= cost) return true; @@ -185,14 +197,9 @@ static bool parser__better_version_exists(Parser *self, StackVersion version, for (StackVersion i = 0, n = ts_stack_version_count(self->stack); i < n; i++) { if (i == version || - ts_stack_is_halted(self->stack, i) || + !ts_stack_is_active(self->stack, i) || ts_stack_position(self->stack, i).bytes < position.bytes) continue; - ErrorStatus status_i = { - .cost = ts_stack_error_cost(self->stack, i), - .is_in_error = ts_stack_state(self->stack, i) == ERROR_STATE, - .dynamic_precedence = ts_stack_dynamic_precedence(self->stack, i), - .push_count = ts_stack_push_count(self->stack, i) - }; + ErrorStatus status_i = parser__version_status(self, i); switch (parser__compare_versions(self, status, status_i)) { case ErrorComparisonTakeRight: return true; @@ -206,83 +213,6 @@ static bool parser__better_version_exists(Parser *self, StackVersion version, return false; } -static unsigned parser__condense_stack(Parser *self) { - bool made_changes = false; - unsigned min_error_cost = UINT_MAX; - for (StackVersion i = 0; i < ts_stack_version_count(self->stack); i++) { - if (ts_stack_is_halted(self->stack, i)) { - ts_stack_remove_version(self->stack, i); - i--; - continue; - } - - ErrorStatus status_i = { - .cost = ts_stack_error_cost(self->stack, i), - .push_count = ts_stack_push_count(self->stack, i), - .dynamic_precedence = ts_stack_dynamic_precedence(self->stack, i), - .is_in_error = ts_stack_state(self->stack, i) == ERROR_STATE, - }; - if (!status_i.is_in_error && status_i.cost < min_error_cost) { - min_error_cost = status_i.cost; - } - - for (StackVersion j = 0; j < i; j++) { - ErrorStatus status_j = { - .cost = ts_stack_error_cost(self->stack, j), - .push_count = ts_stack_push_count(self->stack, j), - .dynamic_precedence = ts_stack_dynamic_precedence(self->stack, j), - .is_in_error = ts_stack_state(self->stack, j) == ERROR_STATE, - }; - - bool can_merge = ts_stack_can_merge(self->stack, j, i); - switch (parser__compare_versions(self, status_j, status_i)) { - case ErrorComparisonTakeLeft: - made_changes = true; - ts_stack_remove_version(self->stack, i); - i--; - j = i; - break; - case ErrorComparisonPreferLeft: - case ErrorComparisonNone: - if (can_merge) { - made_changes = true; - ts_stack_force_merge(self->stack, j, i); - i--; - j = i; - } - break; - case ErrorComparisonPreferRight: - made_changes = true; - ts_stack_swap_versions(self->stack, i, j); - if (can_merge) { - ts_stack_force_merge(self->stack, j, i); - i--; - j = i; - } - break; - case ErrorComparisonTakeRight: - made_changes = true; - ts_stack_remove_version(self->stack, j); - i--; - j--; - break; - } - } - } - - while (ts_stack_version_count(self->stack) > MAX_VERSION_COUNT) { - ts_stack_remove_version(self->stack, MAX_VERSION_COUNT); - made_changes = true; - } - - if (made_changes) { - LOG("condense"); - LOG_STACK(); - } - - return min_error_cost; -} - static void parser__restore_external_scanner(Parser *self, Tree *external_token) { if (external_token) { self->language->external_scanner.deserialize( @@ -334,9 +264,7 @@ static Tree *parser__lex(Parser *self, StackVersion version, TSStateId parse_sta self->lexer.token_end_position = self->lexer.current_position; } - if (error_mode && self->lexer.token_end_position.bytes <= current_position.bytes) { - LOG("disregard_empty_token"); - } else { + if (!error_mode || self->lexer.token_end_position.bytes > current_position.bytes) { found_external_token = true; break; } @@ -360,7 +288,6 @@ static Tree *parser__lex(Parser *self, StackVersion version, TSStateId parse_sta } if (!error_mode) { - LOG("retry_in_error_mode"); error_mode = true; lex_mode = self->language->lex_modes[ERROR_STATE]; valid_external_tokens = ts_language_enabled_external_tokens( @@ -797,7 +724,8 @@ static void parser__accept(Parser *self, StackVersion version, static bool parser__do_all_potential_reductions(Parser *self, StackVersion starting_version, TSSymbol lookahead_symbol) { bool result = false; - for (StackVersion version = starting_version;;) { + for (StackVersion version = starting_version; + ts_stack_version_count(self->stack) < MAX_VERSION_COUNT;) { uint32_t version_count = ts_stack_version_count(self->stack); if (version >= version_count) break; @@ -869,24 +797,7 @@ static bool parser__do_all_potential_reductions(Parser *self, StackVersion start } static void parser__handle_error(Parser *self, StackVersion version, TSSymbol lookahead_symbol) { - // If enough parse versions have already completed, just halt this version. - if (self->accept_count > MAX_VERSION_COUNT) { - ts_stack_halt(self->stack, version); - LOG("bail_after_too_many_tries"); - return; - } - - // If there are other in-progress versions that are clearly better than this one, - // just halt this version. - unsigned new_cost = ts_stack_error_cost(self->stack, version) + ERROR_COST_PER_SKIPPED_TREE; - if (parser__better_version_exists(self, version, true, new_cost)) { - ts_stack_halt(self->stack, version); - LOG("bail_on_error"); - return; - } - // Perform any reductions that could have happened in this state, regardless of the lookahead. - LOG("handle_error"); uint32_t previous_version_count = ts_stack_version_count(self->stack); parser__do_all_potential_reductions(self, version, 0); uint32_t version_count = ts_stack_version_count(self->stack); @@ -923,7 +834,6 @@ static void parser__handle_error(Parser *self, StackVersion version, TSSymbol lo lookahead_symbol )) { LOG("recover_with_missing symbol:%s, state:%u", SYM_NAME(missing_symbol), state_after_missing_symbol); - LOG_STACK(); did_insert_missing_token = true; break; } @@ -1023,13 +933,15 @@ static void parser__recover(Parser *self, StackVersion version, Tree *lookahead) unsigned previous_version_count = ts_stack_version_count(self->stack); Length position = ts_stack_position(self->stack, version); StackSummary *summary = ts_stack_get_summary(self->stack, version); + unsigned depth_since_error = ts_stack_depth_since_error(self->stack, version); for (unsigned i = 0; i < summary->size; i++) { StackSummaryEntry entry = summary->contents[i]; if (entry.state == ERROR_STATE) continue; if (entry.position.bytes == position.bytes) continue; - unsigned depth = entry.depth + ts_stack_depth_since_error(self->stack, version); + unsigned depth = entry.depth + depth_since_error; + if (depth > MAX_SUMMARY_DEPTH) break; unsigned new_cost = depth * ERROR_COST_PER_SKIPPED_TREE + @@ -1041,21 +953,22 @@ static void parser__recover(Parser *self, StackVersion version, Tree *lookahead) if (parser__recover_to_state(self, version, depth, entry.state)) { did_recover = true; LOG("recover state:%u, depth:%u", entry.state, depth); + LOG_STACK(); break; } } } for (unsigned i = previous_version_count; i < ts_stack_version_count(self->stack); i++) { - if (ts_stack_is_halted(self->stack, i)) { - ts_stack_remove_version(self->stack, i--); - } else { + if (ts_stack_is_active(self->stack, i)) { for (unsigned j = 0; j < i; j++) { if (ts_stack_can_merge(self->stack, j, i)) { ts_stack_remove_version(self->stack, i--); break; } } + } else { + ts_stack_remove_version(self->stack, i--); } } @@ -1154,21 +1067,14 @@ static void parser__advance(Parser *self, StackVersion version, ReusableNode *re if (last_reduction_version != STACK_VERSION_NONE) { ts_stack_renumber_version(self->stack, last_reduction_version, version); LOG_STACK(); + } else if (state == ERROR_STATE) { + ts_stack_push(self->stack, version, lookahead, false, ERROR_STATE); + return; } else if (!parser__breakdown_top_of_stack(self, version)) { - if (state == ERROR_STATE) { - ts_stack_push(self->stack, version, lookahead, false, ERROR_STATE); - return; - } - - parser__handle_error(self, version, lookahead->first_leaf.symbol); - if (ts_stack_is_halted(self->stack, version)) { - ts_tree_release(&self->tree_pool, lookahead); - return; - } else if (lookahead->size.bytes == 0) { - ts_tree_release(&self->tree_pool, lookahead); - state = ts_stack_state(self->stack, version); - lookahead = parser__get_lookahead(self, version, &state, reusable_node, &table_entry); - } + LOG("detect_error"); + ts_stack_pause(self->stack, version, lookahead->first_leaf.symbol); + ts_tree_release(&self->tree_pool, lookahead); + return; } state = ts_stack_state(self->stack, version); @@ -1176,6 +1082,93 @@ static void parser__advance(Parser *self, StackVersion version, ReusableNode *re } } +static unsigned parser__condense_stack(Parser *self) { + bool made_changes = false; + unsigned min_error_cost = UINT_MAX; + for (StackVersion i = 0; i < ts_stack_version_count(self->stack); i++) { + if (ts_stack_is_halted(self->stack, i)) { + ts_stack_remove_version(self->stack, i); + i--; + continue; + } + + ErrorStatus status_i = parser__version_status(self, i); + if (!status_i.is_in_error && status_i.cost < min_error_cost) { + min_error_cost = status_i.cost; + } + + for (StackVersion j = 0; j < i; j++) { + ErrorStatus status_j = parser__version_status(self, j); + + bool can_merge = ts_stack_can_merge(self->stack, j, i); + switch (parser__compare_versions(self, status_j, status_i)) { + case ErrorComparisonTakeLeft: + made_changes = true; + ts_stack_remove_version(self->stack, i); + i--; + j = i; + break; + case ErrorComparisonPreferLeft: + case ErrorComparisonNone: + if (can_merge) { + made_changes = true; + ts_stack_force_merge(self->stack, j, i); + i--; + j = i; + } + break; + case ErrorComparisonPreferRight: + made_changes = true; + ts_stack_swap_versions(self->stack, i, j); + if (can_merge) { + ts_stack_force_merge(self->stack, j, i); + i--; + j = i; + } + break; + case ErrorComparisonTakeRight: + made_changes = true; + ts_stack_remove_version(self->stack, j); + i--; + j--; + break; + } + } + } + + while (ts_stack_version_count(self->stack) > MAX_VERSION_COUNT) { + ts_stack_remove_version(self->stack, MAX_VERSION_COUNT); + made_changes = true; + } + + if (ts_stack_version_count(self->stack) > 0) { + bool has_unpaused_version = false; + for (StackVersion i = 0, n = ts_stack_version_count(self->stack); i < n; i++) { + if (ts_stack_is_paused(self->stack, i)) { + if (!has_unpaused_version) { + LOG("resume version:%u", i); + TSSymbol lookahead_symbol = ts_stack_resume(self->stack, i); + parser__handle_error(self, i, lookahead_symbol); + has_unpaused_version = true; + } else { + ts_stack_remove_version(self->stack, i); + i--; + n--; + } + } else { + has_unpaused_version = true; + } + } + } + + if (made_changes) { + LOG("condense"); + LOG_STACK(); + } + + return min_error_cost; +} + bool parser_init(Parser *self) { ts_lexer_init(&self->lexer); array_init(&self->reduce_actions); @@ -1219,13 +1212,7 @@ Tree *parser_parse(Parser *self, TSInput input, Tree *old_tree, bool halt_on_err for (version = 0; version < ts_stack_version_count(self->stack); version++) { reusable_node = self->reusable_node; - while (!ts_stack_is_halted(self->stack, version)) { - position = ts_stack_position(self->stack, version).bytes; - if (position > last_position || (version > 0 && position == last_position)) { - last_position = position; - break; - } - + while (ts_stack_is_active(self->stack, version)) { LOG("process version:%d, version_count:%u, state:%d, row:%u, col:%u", version, ts_stack_version_count(self->stack), ts_stack_state(self->stack, version), @@ -1234,6 +1221,12 @@ Tree *parser_parse(Parser *self, TSInput input, Tree *old_tree, bool halt_on_err parser__advance(self, version, &reusable_node); LOG_STACK(); + + position = ts_stack_position(self->stack, version).bytes; + if (position > last_position || (version > 0 && position == last_position)) { + last_position = position; + break; + } } } diff --git a/src/runtime/stack.c b/src/runtime/stack.c index bd60a9ec..1ea8a181 100644 --- a/src/runtime/stack.c +++ b/src/runtime/stack.c @@ -49,12 +49,19 @@ typedef struct { typedef Array(StackNode *) StackNodeArray; +typedef enum { + StackStatusActive, + StackStatusPaused, + StackStatusHalted, +} StackStatus; + typedef struct { StackNode *node; Tree *last_external_token; - uint32_t push_count; - bool is_halted; StackSummary *summary; + uint32_t push_count; + TSSymbol lookahead_when_paused; + StackStatus status; } StackHead; struct Stack { @@ -215,7 +222,8 @@ static StackVersion ts_stack__add_version(Stack *self, StackVersion original_ver .node = node, .push_count = self->heads.contents[original_version].push_count, .last_external_token = last_external_token, - .is_halted = false, + .status = StackStatusActive, + .lookahead_when_paused = 0, }; array_push(&self->heads, head); stack_node_retain(node); @@ -581,7 +589,8 @@ bool ts_stack_can_merge(Stack *self, StackVersion version1, StackVersion version StackHead *head1 = &self->heads.contents[version1]; StackHead *head2 = &self->heads.contents[version2]; return - !head1->is_halted && !head2->is_halted && + head1->status == StackStatusActive && + head2->status == StackStatusActive && head1->node->state == head2->node->state && head1->node->position.bytes == head2->node->position.bytes && head1->node->depth == head2->node->depth && @@ -598,11 +607,34 @@ void ts_stack_force_merge(Stack *self, StackVersion version1, StackVersion versi } void ts_stack_halt(Stack *self, StackVersion version) { - array_get(&self->heads, version)->is_halted = true; + array_get(&self->heads, version)->status = StackStatusHalted; } -bool ts_stack_is_halted(Stack *self, StackVersion version) { - return array_get(&self->heads, version)->is_halted; +void ts_stack_pause(Stack *self, StackVersion version, TSSymbol lookahead) { + StackHead *head = array_get(&self->heads, version); + head->status = StackStatusPaused; + head->lookahead_when_paused = lookahead; +} + +bool ts_stack_is_active(const Stack *self, StackVersion version) { + return array_get(&self->heads, version)->status == StackStatusActive; +} + +bool ts_stack_is_halted(const Stack *self, StackVersion version) { + return array_get(&self->heads, version)->status == StackStatusHalted; +} + +bool ts_stack_is_paused(const Stack *self, StackVersion version) { + return array_get(&self->heads, version)->status == StackStatusPaused; +} + +TSSymbol ts_stack_resume(Stack *self, StackVersion version) { + StackHead *head = array_get(&self->heads, version); + assert(head->status == StackStatusPaused); + TSSymbol result = head->lookahead_when_paused; + head->status = StackStatusActive; + head->lookahead_when_paused = 0; + return result; } void ts_stack_clear(Stack *self) { @@ -614,7 +646,8 @@ void ts_stack_clear(Stack *self) { array_push(&self->heads, ((StackHead){ .node = self->base_node, .last_external_token = NULL, - .is_halted = false, + .status = StackStatusActive, + .lookahead_when_paused = 0, })); } @@ -631,13 +664,18 @@ bool ts_stack_print_dot_graph(Stack *self, const char **symbol_names, FILE *f) { array_clear(&self->iterators); for (uint32_t i = 0; i < self->heads.size; i++) { - if (ts_stack_is_halted(self, i)) continue; StackHead *head = &self->heads.contents[i]; + if (head->status == StackStatusHalted) continue; + fprintf(f, "node_head_%u [shape=none, label=\"\"]\n", i); - fprintf( - f, - "node_head_%u -> node_%p [label=%u, fontcolor=blue, weight=10000, " - "labeltooltip=\"push_count: %u\ndepth: %u", i, head->node, i, head->push_count, head->node->depth + fprintf(f, "node_head_%u -> node_%p [", i, head->node); + + if (head->status == StackStatusPaused) { + fprintf(f, "color=red "); + } + fprintf(f, + "label=%u, fontcolor=blue, weight=10000, labeltooltip=\"push_count: %u\ndepth: %u", + i, head->push_count, head->node->depth ); if (head->last_external_token) { diff --git a/src/runtime/stack.h b/src/runtime/stack.h index 14f0f2a7..0be0a4dd 100644 --- a/src/runtime/stack.h +++ b/src/runtime/stack.h @@ -102,9 +102,17 @@ bool ts_stack_can_merge(Stack *, StackVersion, StackVersion); void ts_stack_force_merge(Stack *, StackVersion, StackVersion); +TSSymbol ts_stack_resume(Stack *, StackVersion); + +void ts_stack_pause(Stack *, StackVersion, TSSymbol); + void ts_stack_halt(Stack *, StackVersion); -bool ts_stack_is_halted(Stack *, StackVersion); +bool ts_stack_is_active(const Stack *, StackVersion); + +bool ts_stack_is_paused(const Stack *, StackVersion); + +bool ts_stack_is_halted(const Stack *, StackVersion); void ts_stack_renumber_version(Stack *, StackVersion, StackVersion); diff --git a/test/fixtures/error_corpus/c_errors.txt b/test/fixtures/error_corpus/c_errors.txt index d1d76d78..c5833156 100644 --- a/test/fixtures/error_corpus/c_errors.txt +++ b/test/fixtures/error_corpus/c_errors.txt @@ -141,3 +141,24 @@ int y = 5; (translation_unit (declaration (primitive_type) (ERROR (identifier)) (identifier)) (declaration (primitive_type) (init_declarator (identifier) (number_literal)))) + +========================================== +Declarations with missing variable names +========================================== + +int a() { + struct x = 1; + int = 2; +} + +--- + +(translation_unit + (function_definition + (primitive_type) + (function_declarator (identifier) (parameter_list)) + (compound_statement + (struct_specifier (type_identifier)) + (ERROR (number_literal)) + (primitive_type) + (ERROR (number_literal))))) From f3bbf045b704a3b250ea60210b89ce9db759a8ad Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 2 Apr 2018 10:05:12 -0700 Subject: [PATCH 02/16] Avoid unnecessary stack node retain and release on every push --- src/runtime/stack.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/runtime/stack.c b/src/runtime/stack.c index 1ea8a181..60ed6575 100644 --- a/src/runtime/stack.c +++ b/src/runtime/stack.c @@ -126,8 +126,6 @@ static StackNode *stack_node_new(StackNode *previous_node, Tree *tree, bool is_p *node = (StackNode){.ref_count = 1, .link_count = 0, .state = state, .depth = 0}; if (previous_node) { - stack_node_retain(previous_node); - node->link_count = 1; node->links[0] = (StackLink){ .node = previous_node, @@ -415,7 +413,6 @@ void ts_stack_push(Stack *self, StackVersion version, Tree *tree, bool pending, } else if (!tree->extra) { head->push_count++; } - stack_node_release(head->node, &self->node_pool, self->tree_pool); head->node = new_node; } From 80f856cef56f5c0a350625a0483211b8559a8892 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 2 Apr 2018 10:57:44 -0700 Subject: [PATCH 03/16] Maintain a total node count on every tree This simplifies (and fixes bugs in) the parse stack's tracking of its total node count since the last error, which is needed for error recovery. --- src/runtime/error_costs.h | 2 +- src/runtime/parser.c | 14 ++-- src/runtime/stack.c | 83 +++++++++---------- src/runtime/stack.h | 12 +-- src/runtime/tree.c | 3 + src/runtime/tree.h | 1 + .../error_corpus/javascript_errors.txt | 13 +-- test/integration/real_grammars.cc | 9 +- 8 files changed, 69 insertions(+), 68 deletions(-) diff --git a/src/runtime/error_costs.h b/src/runtime/error_costs.h index 5ba1fc62..f543b3ff 100644 --- a/src/runtime/error_costs.h +++ b/src/runtime/error_costs.h @@ -2,7 +2,7 @@ #define RUNTIME_ERROR_COSTS_H_ #define ERROR_STATE 0 -#define ERROR_COST_PER_MISSING_TREE 150 +#define ERROR_COST_PER_MISSING_TREE 110 #define ERROR_COST_PER_SKIPPED_TREE 100 #define ERROR_COST_PER_SKIPPED_LINE 30 #define ERROR_COST_PER_SKIPPED_CHAR 1 diff --git a/src/runtime/parser.c b/src/runtime/parser.c index a6dfa53a..76bcb4b9 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -39,7 +39,7 @@ static const unsigned MAX_COST_DIFFERENCE = 16 * ERROR_COST_PER_SKIPPED_TREE; typedef struct { unsigned cost; - unsigned push_count; + unsigned node_count; int dynamic_precedence; bool is_in_error; } ErrorStatus; @@ -105,7 +105,6 @@ static bool parser__breakdown_top_of_stack(Parser *self, StackVersion version) { ts_stack_push(self->stack, slice.version, tree, false, state); } - ts_stack_decrease_push_count(self->stack, slice.version, parent->child_count + 1); ts_tree_release(&self->tree_pool, parent); array_delete(&slice.trees); @@ -151,7 +150,7 @@ static ErrorComparison parser__compare_versions(Parser *self, ErrorStatus a, Err } if (a.cost < b.cost) { - if ((b.cost - a.cost) * (1 + a.push_count) > MAX_COST_DIFFERENCE) { + if ((b.cost - a.cost) * (1 + a.node_count) > MAX_COST_DIFFERENCE) { return ErrorComparisonTakeLeft; } else { return ErrorComparisonPreferLeft; @@ -159,7 +158,7 @@ static ErrorComparison parser__compare_versions(Parser *self, ErrorStatus a, Err } if (b.cost < a.cost) { - if ((a.cost - b.cost) * (1 + b.push_count) > MAX_COST_DIFFERENCE) { + if ((a.cost - b.cost) * (1 + b.node_count) > MAX_COST_DIFFERENCE) { return ErrorComparisonTakeRight; } else { return ErrorComparisonPreferRight; @@ -177,7 +176,7 @@ static ErrorStatus parser__version_status(Parser *self, StackVersion version) { if (is_paused) cost += ERROR_COST_PER_SKIPPED_TREE; return (ErrorStatus) { .cost = cost, - .push_count = ts_stack_push_count(self->stack, version), + .node_count = ts_stack_node_count_since_error(self->stack, version), .dynamic_precedence = ts_stack_dynamic_precedence(self->stack, version), .is_in_error = is_paused || ts_stack_state(self->stack, version) == ERROR_STATE }; @@ -192,7 +191,7 @@ static bool parser__better_version_exists(Parser *self, StackVersion version, .cost = cost, .is_in_error = is_in_error, .dynamic_precedence = ts_stack_dynamic_precedence(self->stack, version), - .push_count = 0, + .node_count = ts_stack_node_count_since_error(self->stack, version), }; for (StackVersion i = 0, n = ts_stack_version_count(self->stack); i < n; i++) { @@ -933,7 +932,7 @@ static void parser__recover(Parser *self, StackVersion version, Tree *lookahead) unsigned previous_version_count = ts_stack_version_count(self->stack); Length position = ts_stack_position(self->stack, version); StackSummary *summary = ts_stack_get_summary(self->stack, version); - unsigned depth_since_error = ts_stack_depth_since_error(self->stack, version); + unsigned depth_since_error = ts_stack_node_count_since_error(self->stack, version); for (unsigned i = 0; i < summary->size; i++) { StackSummaryEntry entry = summary->contents[i]; @@ -1150,6 +1149,7 @@ static unsigned parser__condense_stack(Parser *self) { TSSymbol lookahead_symbol = ts_stack_resume(self->stack, i); parser__handle_error(self, i, lookahead_symbol); has_unpaused_version = true; + min_error_cost = ts_stack_error_cost(self->stack, i); } else { ts_stack_remove_version(self->stack, i); i--; diff --git a/src/runtime/stack.c b/src/runtime/stack.c index 60ed6575..f993f59f 100644 --- a/src/runtime/stack.c +++ b/src/runtime/stack.c @@ -31,7 +31,7 @@ struct StackNode { short unsigned int link_count; uint32_t ref_count; unsigned error_cost; - unsigned depth; + unsigned node_count; int dynamic_precedence; }; @@ -59,7 +59,7 @@ typedef struct { StackNode *node; Tree *last_external_token; StackSummary *summary; - uint32_t push_count; + unsigned node_count_at_last_error; TSSymbol lookahead_when_paused; StackStatus status; } StackHead; @@ -123,7 +123,7 @@ static StackNode *stack_node_new(StackNode *previous_node, Tree *tree, bool is_p StackNode *node = pool->size > 0 ? array_pop(pool) : ts_malloc(sizeof(StackNode)); - *node = (StackNode){.ref_count = 1, .link_count = 0, .state = state, .depth = 0}; + *node = (StackNode){.ref_count = 1, .link_count = 0, .state = state}; if (previous_node) { node->link_count = 1; @@ -136,22 +136,25 @@ static StackNode *stack_node_new(StackNode *previous_node, Tree *tree, bool is_p node->position = previous_node->position; node->error_cost = previous_node->error_cost; node->dynamic_precedence = previous_node->dynamic_precedence; + node->node_count = previous_node->node_count; if (tree) { - node->depth = previous_node->depth; - if (!tree->extra) node->depth++; node->error_cost += tree->error_cost; node->position = length_add(node->position, ts_tree_total_size(tree)); node->dynamic_precedence += tree->dynamic_precedence; - if (state == ERROR_STATE && !tree->extra) { - node->error_cost += - ERROR_COST_PER_SKIPPED_TREE * ((tree->visible || tree->child_count == 0) ? 1 : tree->visible_child_count) + - ERROR_COST_PER_SKIPPED_CHAR * tree->size.bytes + - ERROR_COST_PER_SKIPPED_LINE * tree->size.extent.row; - if (previous_node->links[0].tree) { + if (!tree->extra) { + node->node_count += tree->node_count; + + if (state == ERROR_STATE) { node->error_cost += - ERROR_COST_PER_SKIPPED_CHAR * tree->padding.bytes + - ERROR_COST_PER_SKIPPED_LINE * tree->padding.extent.row; + ERROR_COST_PER_SKIPPED_TREE * ((tree->visible || tree->child_count == 0) ? 1 : tree->visible_child_count) + + ERROR_COST_PER_SKIPPED_CHAR * tree->size.bytes + + ERROR_COST_PER_SKIPPED_LINE * tree->size.extent.row; + if (previous_node->links[0].tree) { + node->error_cost += + ERROR_COST_PER_SKIPPED_CHAR * tree->padding.bytes + + ERROR_COST_PER_SKIPPED_LINE * tree->padding.extent.row; + } } } } @@ -198,6 +201,10 @@ static void stack_node_add_link(StackNode *self, StackLink link) { stack_node_retain(link.node); if (link.tree) ts_tree_retain(link.tree); self->links[self->link_count++] = link; + + unsigned node_count = link.node->node_count; + if (link.tree) node_count += link.tree->node_count; + if (node_count > self->node_count) self->node_count = node_count; } } @@ -215,22 +222,22 @@ static void stack_head_delete(StackHead *self, StackNodeArray *pool, TreePool *t } static StackVersion ts_stack__add_version(Stack *self, StackVersion original_version, - StackNode *node, Tree *last_external_token) { + StackNode *node) { StackHead head = { .node = node, - .push_count = self->heads.contents[original_version].push_count, - .last_external_token = last_external_token, + .node_count_at_last_error = self->heads.contents[original_version].node_count_at_last_error, + .last_external_token = self->heads.contents[original_version].last_external_token, .status = StackStatusActive, .lookahead_when_paused = 0, }; array_push(&self->heads, head); stack_node_retain(node); - if (last_external_token) ts_tree_retain(last_external_token); + if (head.last_external_token) ts_tree_retain(head.last_external_token); return (StackVersion)(self->heads.size - 1); } -static void ts_stack__add_slice(Stack *self, StackVersion original_version, StackNode *node, - TreeArray *trees, Tree *last_external_token) { +static void ts_stack__add_slice(Stack *self, StackVersion original_version, + StackNode *node, TreeArray *trees) { for (uint32_t i = self->slices.size - 1; i + 1 > 0; i--) { StackVersion version = self->slices.contents[i].version; if (self->heads.contents[version].node == node) { @@ -240,7 +247,7 @@ static void ts_stack__add_slice(Stack *self, StackVersion original_version, Stac } } - StackVersion version = ts_stack__add_version(self, original_version, node, last_external_token); + StackVersion version = ts_stack__add_version(self, original_version, node); StackSlice slice = { *trees, version }; array_push(&self->slices, slice); } @@ -252,7 +259,6 @@ inline StackSliceArray stack__iter(Stack *self, StackVersion version, array_clear(&self->iterators); StackHead *head = array_get(&self->heads, version); - Tree *last_external_token = head->last_external_token; Iterator iterator = { .node = head->node, .trees = array_new(), @@ -279,8 +285,7 @@ inline StackSliceArray stack__iter(Stack *self, StackVersion version, self, version, node, - &trees, - last_external_token + &trees ); } @@ -381,14 +386,6 @@ Length ts_stack_position(const Stack *self, StackVersion version) { return array_get(&self->heads, version)->node->position; } -unsigned ts_stack_push_count(const Stack *self, StackVersion version) { - return array_get(&self->heads, version)->push_count; -} - -void ts_stack_decrease_push_count(Stack *self, StackVersion version, unsigned decrement) { - array_get(&self->heads, version)->push_count -= decrement; -} - Tree *ts_stack_last_external_token(const Stack *self, StackVersion version) { return array_get(&self->heads, version)->last_external_token; } @@ -405,14 +402,15 @@ unsigned ts_stack_error_cost(const Stack *self, StackVersion version) { return head->node->error_cost; } +unsigned ts_stack_node_count_since_error(const Stack *self, StackVersion version) { + StackHead *head = array_get(&self->heads, version); + return head->node->node_count - head->node_count_at_last_error; +} + void ts_stack_push(Stack *self, StackVersion version, Tree *tree, bool pending, TSStateId state) { StackHead *head = array_get(&self->heads, version); StackNode *new_node = stack_node_new(head->node, tree, pending, state, &self->node_pool); - if (state == ERROR_STATE) { - head->push_count = 0; - } else if (!tree->extra) { - head->push_count++; - } + if (!tree) head->node_count_at_last_error = new_node->node_count; head->node = new_node; } @@ -536,10 +534,6 @@ StackSummary *ts_stack_get_summary(Stack *self, StackVersion version) { return array_get(&self->heads, version)->summary; } -unsigned ts_stack_depth_since_error(Stack *self, StackVersion version) { - return array_get(&self->heads, version)->node->depth; -} - int ts_stack_dynamic_precedence(Stack *self, StackVersion version) { return array_get(&self->heads, version)->node->dynamic_precedence; } @@ -590,7 +584,6 @@ bool ts_stack_can_merge(Stack *self, StackVersion version1, StackVersion version head2->status == StackStatusActive && head1->node->state == head2->node->state && head1->node->position.bytes == head2->node->position.bytes && - head1->node->depth == head2->node->depth && ts_tree_external_token_state_eq(head1->last_external_token, head2->last_external_token); } @@ -600,6 +593,9 @@ void ts_stack_force_merge(Stack *self, StackVersion version1, StackVersion versi for (uint32_t i = 0; i < head2->node->link_count; i++) { stack_node_add_link(head1->node, head2->node->links[i]); } + if (head2->node_count_at_last_error > head1->node_count_at_last_error) { + head1->node_count_at_last_error = head2->node_count_at_last_error; + } ts_stack_remove_version(self, version2); } @@ -611,6 +607,7 @@ void ts_stack_pause(Stack *self, StackVersion version, TSSymbol lookahead) { StackHead *head = array_get(&self->heads, version); head->status = StackStatusPaused; head->lookahead_when_paused = lookahead; + head->node_count_at_last_error = head->node->node_count; } bool ts_stack_is_active(const Stack *self, StackVersion version) { @@ -671,8 +668,8 @@ bool ts_stack_print_dot_graph(Stack *self, const char **symbol_names, FILE *f) { fprintf(f, "color=red "); } fprintf(f, - "label=%u, fontcolor=blue, weight=10000, labeltooltip=\"push_count: %u\ndepth: %u", - i, head->push_count, head->node->depth + "label=%u, fontcolor=blue, weight=10000, labeltooltip=\"node_count: %u", + i, head->node->node_count - head->node_count_at_last_error ); if (head->last_external_token) { diff --git a/src/runtime/stack.h b/src/runtime/stack.h index 0be0a4dd..4704d90e 100644 --- a/src/runtime/stack.h +++ b/src/runtime/stack.h @@ -41,15 +41,6 @@ uint32_t ts_stack_version_count(const Stack *); // empty, this returns the initial state, 0. TSStateId ts_stack_state(const Stack *, StackVersion); -// Get the number of trees that have been pushed to a given version of -// the stack. -unsigned ts_stack_push_count(const Stack *, StackVersion); - -// In the event that trees were permanently removed from some version -// of the stack, decrease the version's push count to account for the -// removal. -void ts_stack_decrease_push_count(Stack *, StackVersion, unsigned); - // Get the last external token associated with a given version of the stack. Tree *ts_stack_last_external_token(const Stack *, StackVersion); @@ -82,7 +73,8 @@ StackSliceArray ts_stack_pop_pending(Stack *, StackVersion); // Remove any all trees from the given version of the stack. StackSliceArray ts_stack_pop_all(Stack *, StackVersion); -unsigned ts_stack_depth_since_error(Stack *, StackVersion); +// Get the number of tree nodes on the stack since the most recent error. +unsigned ts_stack_node_count_since_error(const Stack *, StackVersion); int ts_stack_dynamic_precedence(Stack *, StackVersion); diff --git a/src/runtime/tree.c b/src/runtime/tree.c index ae35e1f8..c58c987a 100644 --- a/src/runtime/tree.c +++ b/src/runtime/tree.c @@ -177,6 +177,7 @@ Tree *ts_tree_make_leaf(TreePool *pool, TSSymbol symbol, Length padding, Length .padding = padding, .visible = metadata.visible, .named = metadata.named, + .node_count = 1, .has_changes = false, .first_leaf = { .symbol = symbol, @@ -305,6 +306,7 @@ void ts_tree_set_children(Tree *self, uint32_t child_count, Tree **children, self->visible_child_count = 0; self->error_cost = 0; self->repeat_depth = 0; + self->node_count = 1; self->has_external_tokens = false; self->dynamic_precedence = 0; @@ -326,6 +328,7 @@ void ts_tree_set_children(Tree *self, uint32_t child_count, Tree **children, self->error_cost += child->error_cost; self->dynamic_precedence += child->dynamic_precedence; + self->node_count += child->node_count; if (alias_sequence && alias_sequence[non_extra_index] != 0 && !child->extra) { self->visible_child_count++; diff --git a/src/runtime/tree.h b/src/runtime/tree.h index 0e3c2880..ad104894 100644 --- a/src/runtime/tree.h +++ b/src/runtime/tree.h @@ -50,6 +50,7 @@ typedef struct Tree { TSSymbol symbol; TSStateId parse_state; unsigned error_cost; + unsigned node_count; unsigned repeat_depth; struct { diff --git a/test/fixtures/error_corpus/javascript_errors.txt b/test/fixtures/error_corpus/javascript_errors.txt index 250f13b6..d435ba86 100644 --- a/test/fixtures/error_corpus/javascript_errors.txt +++ b/test/fixtures/error_corpus/javascript_errors.txt @@ -79,12 +79,13 @@ if ({a: 'b'} {c: 'd'}) { (statement_block (expression_statement (assignment_expression (identifier) - (ERROR (function - (formal_parameters (identifier)) - (statement_block (expression_statement (identifier))))) - (function - (formal_parameters (identifier)) - (statement_block (expression_statement (identifier))))))))) + (call_expression + (function (formal_parameters (identifier)) (statement_block (expression_statement (identifier)))) + (ERROR) + (arguments (identifier)))) + (MISSING)) + (statement_block + (expression_statement (identifier)))))) =================================================== Extra tokens at the end of the file diff --git a/test/integration/real_grammars.cc b/test/integration/real_grammars.cc index 6d5ac27e..6cee7870 100644 --- a/test/integration/real_grammars.cc +++ b/test/integration/real_grammars.cc @@ -32,6 +32,8 @@ vector test_languages({ for (auto &language_name : test_languages) { describe(("the " + language_name + " language").c_str(), [&]() { TSDocument *document; + const bool debug_graphs_enabled = getenv("TREE_SITTER_ENABLE_DEBUG_GRAPHS"); + before_each([&]() { record_alloc::start(); @@ -39,7 +41,7 @@ for (auto &language_name : test_languages) { ts_document_set_language(document, load_real_language(language_name)); // ts_document_set_logger(document, stderr_logger_new(true)); - if (getenv("TREE_SITTER_ENABLE_DEBUG_GRAPHS")) { + if (debug_graphs_enabled) { ts_document_print_debugging_graphs(document, true); } }); @@ -55,6 +57,7 @@ for (auto &language_name : test_languages) { auto it_handles_edit_sequence = [&](string name, std::function edit_sequence){ it(("parses " + entry.description + ": " + name).c_str(), [&]() { input = new SpyInput(entry.input, 3); + if (debug_graphs_enabled) printf("%s\n\n", input->content.c_str()); ts_document_set_input(document, input->input()); edit_sequence(); @@ -88,9 +91,11 @@ for (auto &language_name : test_languages) { ts_document_edit(document, input->replace(edit_position, 0, inserted_text)); ts_document_parse(document); assert_correct_tree_size(document, input->content); + if (debug_graphs_enabled) printf("%s\n\n", input->content.c_str()); ts_document_edit(document, input->undo()); assert_correct_tree_size(document, input->content); + if (debug_graphs_enabled) printf("%s\n\n", input->content.c_str()); TSRange *ranges; uint32_t range_count; @@ -112,9 +117,11 @@ for (auto &language_name : test_languages) { ts_document_edit(document, input->replace(edit_position, deletion_size, "")); ts_document_parse(document); assert_correct_tree_size(document, input->content); + if (debug_graphs_enabled) printf("%s\n\n", input->content.c_str()); ts_document_edit(document, input->undo()); assert_correct_tree_size(document, input->content); + if (debug_graphs_enabled) printf("%s\n\n", input->content.c_str()); TSRange *ranges; uint32_t range_count; From 1d9d6f37ad770df8eed4c8ff80dc8863616f6ef3 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 2 Apr 2018 11:52:34 -0700 Subject: [PATCH 04/16] Introduce an error cost per error instance to favor fewer errors --- src/runtime/error_costs.h | 1 + src/runtime/stack.c | 10 +++++++--- src/runtime/tree.c | 11 +++++++---- 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/src/runtime/error_costs.h b/src/runtime/error_costs.h index f543b3ff..d6420488 100644 --- a/src/runtime/error_costs.h +++ b/src/runtime/error_costs.h @@ -2,6 +2,7 @@ #define RUNTIME_ERROR_COSTS_H_ #define ERROR_STATE 0 +#define ERROR_COST_PER_RECOVERY 500 #define ERROR_COST_PER_MISSING_TREE 110 #define ERROR_COST_PER_SKIPPED_TREE 100 #define ERROR_COST_PER_SKIPPED_LINE 30 diff --git a/src/runtime/stack.c b/src/runtime/stack.c index f993f59f..17a301fe 100644 --- a/src/runtime/stack.c +++ b/src/runtime/stack.c @@ -399,7 +399,9 @@ void ts_stack_set_last_external_token(Stack *self, StackVersion version, Tree *t unsigned ts_stack_error_cost(const Stack *self, StackVersion version) { StackHead *head = array_get(&self->heads, version); - return head->node->error_cost; + unsigned result = head->node->error_cost; + if (head->node->state == ERROR_STATE) result += ERROR_COST_PER_RECOVERY; + return result; } unsigned ts_stack_node_count_since_error(const Stack *self, StackVersion version) { @@ -668,8 +670,10 @@ bool ts_stack_print_dot_graph(Stack *self, const char **symbol_names, FILE *f) { fprintf(f, "color=red "); } fprintf(f, - "label=%u, fontcolor=blue, weight=10000, labeltooltip=\"node_count: %u", - i, head->node->node_count - head->node_count_at_last_error + "label=%u, fontcolor=blue, weight=10000, labeltooltip=\"node_count: %u\nerror_cost: %u", + i, + ts_stack_node_count_since_error(self, i), + ts_stack_error_cost(self, i) ); if (head->last_external_token) { diff --git a/src/runtime/tree.c b/src/runtime/tree.c index c58c987a..f88895d2 100644 --- a/src/runtime/tree.c +++ b/src/runtime/tree.c @@ -354,11 +354,14 @@ void ts_tree_set_children(Tree *self, uint32_t child_count, Tree **children, } if (self->symbol == ts_builtin_sym_error) { - self->error_cost += ERROR_COST_PER_SKIPPED_CHAR * self->size.bytes + + self->error_cost += ERROR_COST_PER_RECOVERY + + ERROR_COST_PER_SKIPPED_CHAR * self->size.bytes + ERROR_COST_PER_SKIPPED_LINE * self->size.extent.row; - for (uint32_t i = 0; i < child_count; i++) - if (!self->children[i]->extra) + for (uint32_t i = 0; i < child_count; i++) { + if (!self->children[i]->extra) { self->error_cost += ERROR_COST_PER_SKIPPED_TREE; + } + } } if (child_count > 0) { @@ -418,7 +421,7 @@ Tree *ts_tree_make_error_node(TreePool *pool, TreeArray *children, const TSLangu Tree *ts_tree_make_missing_leaf(TreePool *pool, TSSymbol symbol, const TSLanguage *language) { Tree *result = ts_tree_make_leaf(pool, symbol, length_zero(), length_zero(), language); result->is_missing = true; - result->error_cost = ERROR_COST_PER_MISSING_TREE; + result->error_cost = ERROR_COST_PER_MISSING_TREE + ERROR_COST_PER_RECOVERY; return result; } From 0ec7e5ce4261da16b68c0405aa30997a9be33d9e Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 2 Apr 2018 11:57:26 -0700 Subject: [PATCH 05/16] Remove ts_stack_force_merge function --- src/runtime/parser.c | 12 +++++------- src/runtime/stack.c | 43 ++++++++++++++++++------------------------- src/runtime/stack.h | 9 ++++++--- 3 files changed, 29 insertions(+), 35 deletions(-) diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 76bcb4b9..96f98cdf 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -845,7 +845,7 @@ static void parser__handle_error(Parser *self, StackVersion version, TSSymbol lo } for (unsigned i = previous_version_count; i < version_count; i++) { - ts_stack_force_merge(self->stack, version, previous_version_count); + assert(ts_stack_merge(self->stack, version, previous_version_count)); } ts_stack_record_summary(self->stack, version, MAX_SUMMARY_DEPTH); @@ -1099,7 +1099,6 @@ static unsigned parser__condense_stack(Parser *self) { for (StackVersion j = 0; j < i; j++) { ErrorStatus status_j = parser__version_status(self, j); - bool can_merge = ts_stack_can_merge(self->stack, j, i); switch (parser__compare_versions(self, status_j, status_i)) { case ErrorComparisonTakeLeft: made_changes = true; @@ -1109,20 +1108,19 @@ static unsigned parser__condense_stack(Parser *self) { break; case ErrorComparisonPreferLeft: case ErrorComparisonNone: - if (can_merge) { + if (ts_stack_merge(self->stack, j, i)) { made_changes = true; - ts_stack_force_merge(self->stack, j, i); i--; j = i; } break; case ErrorComparisonPreferRight: made_changes = true; - ts_stack_swap_versions(self->stack, i, j); - if (can_merge) { - ts_stack_force_merge(self->stack, j, i); + if (ts_stack_merge(self->stack, j, i)) { i--; j = i; + } else { + ts_stack_swap_versions(self->stack, i, j); } break; case ErrorComparisonTakeRight: diff --git a/src/runtime/stack.c b/src/runtime/stack.c index 17a301fe..9186397f 100644 --- a/src/runtime/stack.c +++ b/src/runtime/stack.c @@ -197,15 +197,15 @@ static void stack_node_add_link(StackNode *self, StackLink link) { } } - if (self->link_count < MAX_LINK_COUNT) { - stack_node_retain(link.node); - if (link.tree) ts_tree_retain(link.tree); - self->links[self->link_count++] = link; + if (self->link_count == MAX_LINK_COUNT) return; - unsigned node_count = link.node->node_count; - if (link.tree) node_count += link.tree->node_count; - if (node_count > self->node_count) self->node_count = node_count; - } + stack_node_retain(link.node); + if (link.tree) ts_tree_retain(link.tree); + self->links[self->link_count++] = link; + + unsigned node_count = link.node->node_count; + if (link.tree) node_count += link.tree->node_count; + if (node_count > self->node_count) self->node_count = node_count; } static void stack_head_delete(StackHead *self, StackNodeArray *pool, TreePool *tree_pool) { @@ -570,12 +570,17 @@ StackVersion ts_stack_copy_version(Stack *self, StackVersion version) { } bool ts_stack_merge(Stack *self, StackVersion version1, StackVersion version2) { - if (ts_stack_can_merge(self, version1, version2)) { - ts_stack_force_merge(self, version1, version2); - return true; - } else { - return false; + if (!ts_stack_can_merge(self, version1, version2)) return false; + StackHead *head1 = &self->heads.contents[version1]; + StackHead *head2 = &self->heads.contents[version2]; + for (uint32_t i = 0; i < head2->node->link_count; i++) { + stack_node_add_link(head1->node, head2->node->links[i]); } + if (head2->node_count_at_last_error > head1->node_count_at_last_error) { + head1->node_count_at_last_error = head2->node_count_at_last_error; + } + ts_stack_remove_version(self, version2); + return true; } bool ts_stack_can_merge(Stack *self, StackVersion version1, StackVersion version2) { @@ -589,18 +594,6 @@ bool ts_stack_can_merge(Stack *self, StackVersion version1, StackVersion version ts_tree_external_token_state_eq(head1->last_external_token, head2->last_external_token); } -void ts_stack_force_merge(Stack *self, StackVersion version1, StackVersion version2) { - StackHead *head1 = &self->heads.contents[version1]; - StackHead *head2 = &self->heads.contents[version2]; - for (uint32_t i = 0; i < head2->node->link_count; i++) { - stack_node_add_link(head1->node, head2->node->links[i]); - } - if (head2->node_count_at_last_error > head1->node_count_at_last_error) { - head1->node_count_at_last_error = head2->node_count_at_last_error; - } - ts_stack_remove_version(self, version2); -} - void ts_stack_halt(Stack *self, StackVersion version) { array_get(&self->heads, version)->status = StackStatusHalted; } diff --git a/src/runtime/stack.h b/src/runtime/stack.h index 4704d90e..92a09b69 100644 --- a/src/runtime/stack.h +++ b/src/runtime/stack.h @@ -73,7 +73,8 @@ StackSliceArray ts_stack_pop_pending(Stack *, StackVersion); // Remove any all trees from the given version of the stack. StackSliceArray ts_stack_pop_all(Stack *, StackVersion); -// Get the number of tree nodes on the stack since the most recent error. +// Get the maximum number of tree nodes reachable from this version of the stack +// since the last error was detected. unsigned ts_stack_node_count_since_error(const Stack *, StackVersion); int ts_stack_dynamic_precedence(Stack *, StackVersion); @@ -86,14 +87,16 @@ void ts_stack_record_summary(Stack *, StackVersion, unsigned max_depth); // given version of the stack. StackSummary *ts_stack_get_summary(Stack *, StackVersion); +// Get the total cost of all errors on the given version of the stack. unsigned ts_stack_error_cost(const Stack *, StackVersion version); +// Determine whether the given two stack versions can be merged. bool ts_stack_merge(Stack *, StackVersion, StackVersion); +// Merge the given two stack versions if possible, returning true +// if they were successfully merged and false otherwise. bool ts_stack_can_merge(Stack *, StackVersion, StackVersion); -void ts_stack_force_merge(Stack *, StackVersion, StackVersion); - TSSymbol ts_stack_resume(Stack *, StackVersion); void ts_stack_pause(Stack *, StackVersion, TSSymbol); From 34349f9cbb19870f3fd1f4e76a11a8c38368b207 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 2 Apr 2018 12:07:14 -0700 Subject: [PATCH 06/16] Put back limit on error handling based on accepted tree count --- src/runtime/parser.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 96f98cdf..9baf4ce2 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -1142,12 +1142,12 @@ static unsigned parser__condense_stack(Parser *self) { bool has_unpaused_version = false; for (StackVersion i = 0, n = ts_stack_version_count(self->stack); i < n; i++) { if (ts_stack_is_paused(self->stack, i)) { - if (!has_unpaused_version) { + if (!has_unpaused_version && self->accept_count < MAX_VERSION_COUNT) { LOG("resume version:%u", i); + min_error_cost = ts_stack_error_cost(self->stack, i); TSSymbol lookahead_symbol = ts_stack_resume(self->stack, i); parser__handle_error(self, i, lookahead_symbol); has_unpaused_version = true; - min_error_cost = ts_stack_error_cost(self->stack, i); } else { ts_stack_remove_version(self->stack, i); i--; From a6cf2e87e7b146fff6f326e624fad118ea5a5a81 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 2 Apr 2018 13:58:20 -0700 Subject: [PATCH 07/16] Fix halt_on_error tests --- src/runtime/stack.c | 4 +++- test/integration/real_grammars.cc | 1 - test/runtime/document_test.cc | 6 +++++- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/runtime/stack.c b/src/runtime/stack.c index 9186397f..885083e4 100644 --- a/src/runtime/stack.c +++ b/src/runtime/stack.c @@ -400,7 +400,9 @@ void ts_stack_set_last_external_token(Stack *self, StackVersion version, Tree *t unsigned ts_stack_error_cost(const Stack *self, StackVersion version) { StackHead *head = array_get(&self->heads, version); unsigned result = head->node->error_cost; - if (head->node->state == ERROR_STATE) result += ERROR_COST_PER_RECOVERY; + if (head->node->state == ERROR_STATE || head->status == StackStatusPaused) { + result += ERROR_COST_PER_RECOVERY; + } return result; } diff --git a/test/integration/real_grammars.cc b/test/integration/real_grammars.cc index 6cee7870..37465add 100644 --- a/test/integration/real_grammars.cc +++ b/test/integration/real_grammars.cc @@ -34,7 +34,6 @@ for (auto &language_name : test_languages) { TSDocument *document; const bool debug_graphs_enabled = getenv("TREE_SITTER_ENABLE_DEBUG_GRAPHS"); - before_each([&]() { record_alloc::start(); document = ts_document_new(); diff --git a/test/runtime/document_test.cc b/test/runtime/document_test.cc index 7c4bad6c..0be03657 100644 --- a/test/runtime/document_test.cc +++ b/test/runtime/document_test.cc @@ -22,6 +22,10 @@ describe("Document", [&]() { before_each([&]() { record_alloc::start(); document = ts_document_new(); + + if (getenv("TREE_SITTER_ENABLE_DEBUG_GRAPHS")) { + ts_document_print_debugging_graphs(document, true); + } }); after_each([&]() { @@ -434,7 +438,7 @@ describe("Document", [&]() { root = ts_document_root_node(document); assert_node_string_equals( root, - "(ERROR (number) (null) (UNEXPECTED 'e'))"); + "(ERROR (number) (null))"); AssertThat(ts_node_end_byte(root), Equals(input_string.size())); }); From 09be0b6ef5902ce114494b994a9118880e4f4762 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 2 Apr 2018 18:04:26 -0700 Subject: [PATCH 08/16] Store trees' children in TreeArrays, not w/ separate pointer and length --- src/runtime/array.h | 8 +- src/runtime/get_changed_ranges.c | 8 +- src/runtime/node.c | 20 ++-- src/runtime/parser.c | 53 +++++----- src/runtime/reusable_node.h | 12 +-- src/runtime/stack.c | 4 +- src/runtime/tree.c | 163 +++++++++++++++---------------- src/runtime/tree.h | 72 +++++++------- test/helpers/tree_helpers.cc | 11 ++- test/helpers/tree_helpers.h | 2 +- test/runtime/tree_test.cc | 88 ++++++++--------- 11 files changed, 220 insertions(+), 221 deletions(-) diff --git a/src/runtime/array.h b/src/runtime/array.h index 0fa08d5a..e4e7ff0f 100644 --- a/src/runtime/array.h +++ b/src/runtime/array.h @@ -12,18 +12,18 @@ extern "C" { #include #include "runtime/alloc.h" -#define Array(T) \ - struct { \ - T *contents; \ +#define Array(T) \ + struct { \ uint32_t size; \ uint32_t capacity; \ + T *contents; \ } #define array_init(self) \ ((self)->size = 0, (self)->capacity = 0, (self)->contents = NULL) #define array_new() \ - { NULL, 0, 0 } + { 0, 0, NULL } #define array_get(self, index) \ (assert((uint32_t)index < (self)->size), &(self)->contents[index]) diff --git a/src/runtime/get_changed_ranges.c b/src/runtime/get_changed_ranges.c index 41fc1877..26211613 100644 --- a/src/runtime/get_changed_ranges.c +++ b/src/runtime/get_changed_ranges.c @@ -127,8 +127,8 @@ static bool iterator_descend(Iterator *self, uint32_t goal_position) { TreePathEntry entry = *array_back(&self->path); Length position = entry.position; uint32_t structural_child_index = 0; - for (uint32_t i = 0; i < entry.tree->child_count; i++) { - Tree *child = entry.tree->children[i]; + for (uint32_t i = 0; i < entry.tree->children.size; i++) { + Tree *child = entry.tree->children.contents[i]; Length child_left = length_add(position, child->padding); Length child_right = length_add(child_left, child->size); @@ -179,11 +179,11 @@ static void iterator_advance(Iterator *self) { Tree *parent = array_back(&self->path)->tree; uint32_t child_index = entry.child_index + 1; - if (parent->child_count > child_index) { + if (parent->children.size > child_index) { Length position = length_add(entry.position, ts_tree_total_size(entry.tree)); uint32_t structural_child_index = entry.structural_child_index; if (!entry.tree->extra) structural_child_index++; - Tree *next_child = parent->children[child_index]; + Tree *next_child = parent->children.contents[child_index]; array_push(&self->path, ((TreePathEntry){ .tree = next_child, diff --git a/src/runtime/node.c b/src/runtime/node.c index 1f7487cb..c825a104 100644 --- a/src/runtime/node.c +++ b/src/runtime/node.c @@ -39,7 +39,7 @@ static inline bool ts_node__is_relevant(TSNode self, bool include_anonymous) { static inline uint32_t ts_node__relevant_child_count(TSNode self, bool include_anonymous) { const Tree *tree = ts_node__tree(self); - if (tree->child_count > 0) { + if (tree->children.size > 0) { if (include_anonymous) { return tree->visible_child_count; } else { @@ -61,7 +61,7 @@ static inline TSNode ts_node__direct_parent(TSNode self, uint32_t *index) { } static inline TSNode ts_node__direct_child(TSNode self, uint32_t i) { - const Tree *child_tree = ts_node__tree(self)->children[i]; + const Tree *child_tree = ts_node__tree(self)->children.contents[i]; return ts_node_make( child_tree, ts_node__offset_byte(self) + child_tree->context.offset.bytes, @@ -78,7 +78,7 @@ static inline TSNode ts_node__child(TSNode self, uint32_t child_index, did_descend = false; uint32_t index = 0; - for (uint32_t i = 0; i < ts_node__tree(result)->child_count; i++) { + for (uint32_t i = 0; i < ts_node__tree(result)->children.size; i++) { TSNode child = ts_node__direct_child(result, i); if (ts_node__is_relevant(child, include_anonymous)) { if (index == child_index) @@ -134,7 +134,7 @@ static inline TSNode ts_node__next_sibling(TSNode self, bool include_anonymous) if (!result.data) break; - for (uint32_t i = index + 1; i < ts_node__tree(result)->child_count; i++) { + for (uint32_t i = index + 1; i < ts_node__tree(result)->children.size; i++) { TSNode child = ts_node__direct_child(result, i); if (ts_node__is_relevant(child, include_anonymous)) return child; @@ -160,7 +160,7 @@ static inline TSNode ts_node__first_child_for_byte(TSNode self, uint32_t goal, while (did_descend) { did_descend = false; - for (uint32_t i = 0; i < ts_node__tree(node)->child_count; i++) { + for (uint32_t i = 0; i < ts_node__tree(node)->children.size; i++) { TSNode child = ts_node__direct_child(node, i); if (ts_node_end_byte(child) > goal) { if (ts_node__is_relevant(child, include_anonymous)) { @@ -187,7 +187,7 @@ static inline TSNode ts_node__descendant_for_byte_range(TSNode self, uint32_t mi while (did_descend) { did_descend = false; - for (uint32_t i = 0, n = ts_node__tree(node)->child_count; i < n; i++) { + for (uint32_t i = 0, n = ts_node__tree(node)->children.size; i < n; i++) { TSNode child = ts_node__direct_child(node, i); if (ts_node_end_byte(child) > max) { if (ts_node_start_byte(child) > min) break; @@ -214,7 +214,7 @@ static inline TSNode ts_node__descendant_for_point_range(TSNode self, TSPoint mi while (did_descend) { did_descend = false; - for (uint32_t i = 0, n = ts_node__tree(node)->child_count; i < n; i++) { + for (uint32_t i = 0, n = ts_node__tree(node)->children.size; i < n; i++) { TSNode child = ts_node__direct_child(node, i); const Tree *child_tree = ts_node__tree(child); if (i > 0) start_position = point_add(start_position, child_tree->padding.extent); @@ -318,7 +318,7 @@ uint32_t ts_node_child_index(TSNode self) { uint32_t index = tree->context.index; if (!parent) return UINT32_MAX; for (uint32_t i = 0; i < index; i++) { - Tree *child = parent->children[i]; + Tree *child = parent->children.contents[i]; result += child->visible ? 1 : child->visible_child_count; } if (parent->visible) break; @@ -338,7 +338,7 @@ TSNode ts_node_named_child(TSNode self, uint32_t child_index) { uint32_t ts_node_child_count(TSNode self) { const Tree *tree = ts_node__tree(self); - if (tree->child_count > 0) { + if (tree->children.size > 0) { return tree->visible_child_count; } else { return 0; @@ -347,7 +347,7 @@ uint32_t ts_node_child_count(TSNode self) { uint32_t ts_node_named_child_count(TSNode self) { const Tree *tree = ts_node__tree(self); - if (tree->child_count > 0) { + if (tree->children.size > 0) { return tree->named_child_count; } else { return 0; diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 9baf4ce2..a377c42a 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -86,9 +86,9 @@ static bool parser__breakdown_top_of_stack(Parser *self, StackVersion version) { TSStateId state = ts_stack_state(self->stack, slice.version); Tree *parent = *array_front(&slice.trees); - for (uint32_t j = 0; j < parent->child_count; j++) { - Tree *child = parent->children[j]; - pending = child->child_count > 0; + for (uint32_t j = 0; j < parent->children.size; j++) { + Tree *child = parent->children.contents[j]; + pending = child->children.size > 0; if (child->symbol == ts_builtin_sym_error) { state = ERROR_STATE; @@ -120,7 +120,7 @@ static void parser__breakdown_lookahead(Parser *self, Tree **lookahead, TSStateId state, ReusableNode *reusable_node) { bool did_break_down = false; - while (reusable_node->tree->child_count > 0 && reusable_node->tree->parse_state != state) { + while (reusable_node->tree->children.size > 0 && reusable_node->tree->parse_state != state) { LOG("state_mismatch sym:%s", SYM_NAME(reusable_node->tree->symbol)); reusable_node_breakdown(reusable_node); did_break_down = true; @@ -447,7 +447,7 @@ static Tree *parser__get_lookahead(Parser *self, StackVersion version, TSStateId reason = "is_missing"; } else if (result->fragile_left || result->fragile_right) { reason = "is_fragile"; - } else if (self->in_ambiguity && result->child_count) { + } else if (self->in_ambiguity && result->children.size) { reason = "in_ambiguity"; } @@ -555,7 +555,7 @@ static void parser__shift(Parser *self, StackVersion version, TSStateId state, ts_tree_retain(lookahead); } - bool is_pending = lookahead->child_count > 0; + bool is_pending = lookahead->children.size > 0; ts_stack_push(self->stack, version, lookahead, is_pending, state); if (lookahead->has_external_tokens) { ts_stack_set_last_external_token( @@ -564,10 +564,10 @@ static void parser__shift(Parser *self, StackVersion version, TSStateId state, } } -static bool parser__replace_children(Parser *self, Tree *tree, Tree **children, uint32_t count) { +static bool parser__replace_children(Parser *self, Tree *tree, TreeArray *children) { self->scratch_tree = *tree; - self->scratch_tree.child_count = 0; - ts_tree_set_children(&self->scratch_tree, count, children, self->language); + self->scratch_tree.children.size = 0; + ts_tree_set_children(&self->scratch_tree, children, self->language); if (parser__select_tree(self, tree, &self->scratch_tree)) { *tree = self->scratch_tree; return true; @@ -589,13 +589,13 @@ static StackSliceArray parser__reduce(Parser *self, StackVersion version, TSSymb // Extra tokens on top of the stack should not be included in this new parent // node. They will be re-pushed onto the stack after the parent node is // created and pushed. - uint32_t child_count = slice.trees.size; - while (child_count > 0 && slice.trees.contents[child_count - 1]->extra) { - child_count--; + TreeArray children = slice.trees; + while (children.size > 0 && children.contents[children.size - 1]->extra) { + children.size--; } Tree *parent = ts_tree_make_node(&self->tree_pool, - symbol, child_count, slice.trees.contents, alias_sequence_id, self->language + symbol, &children, alias_sequence_id, self->language ); // This pop operation may have caused multiple stack versions to collapse @@ -607,12 +607,12 @@ static StackSliceArray parser__reduce(Parser *self, StackVersion version, TSSymb if (next_slice.version != slice.version) break; i++; - uint32_t child_count = next_slice.trees.size; - while (child_count > 0 && next_slice.trees.contents[child_count - 1]->extra) { - child_count--; + TreeArray children = next_slice.trees; + while (children.size > 0 && children.contents[children.size - 1]->extra) { + children.size--; } - if (parser__replace_children(self, parent, next_slice.trees.contents, child_count)) { + if (parser__replace_children(self, parent, &children)) { ts_tree_array_delete(&self->tree_pool, &slice.trees); slice = next_slice; } else { @@ -636,9 +636,8 @@ static StackSliceArray parser__reduce(Parser *self, StackVersion version, TSSymb // Push the parent node onto the stack, along with any extra tokens that // were previously on top of the stack. ts_stack_push(self->stack, slice.version, parent, false, next_state); - for (uint32_t j = parent->child_count; j < slice.trees.size; j++) { - Tree *tree = slice.trees.contents[j]; - ts_stack_push(self->stack, slice.version, tree, false, next_state); + for (uint32_t j = parent->children.size; j < slice.trees.size; j++) { + ts_stack_push(self->stack, slice.version, slice.trees.contents[j], false, next_state); } } @@ -690,12 +689,12 @@ static void parser__accept(Parser *self, StackVersion version, Tree *child = trees.contents[j]; if (!child->extra) { root = ts_tree_make_copy(&self->tree_pool, child); - root->child_count = 0; - for (uint32_t k = 0; k < child->child_count; k++) { - ts_tree_retain(child->children[k]); + root->children.size = 0; + for (uint32_t k = 0; k < child->children.size; k++) { + ts_tree_retain(child->children.contents[k]); } - array_splice(&trees, j, 1, child->child_count, child->children); - ts_tree_set_children(root, trees.size, trees.contents, self->language); + array_splice(&trees, j, 1, child->children.size, child->children.contents); + ts_tree_set_children(root, &trees, self->language); ts_tree_release(&self->tree_pool, child); break; } @@ -1020,7 +1019,7 @@ static void parser__advance(Parser *self, StackVersion version, ReusableNode *re LOG("shift state:%u", next_state); } - if (lookahead->child_count > 0) { + if (lookahead->children.size > 0) { parser__breakdown_lookahead(self, &lookahead, state, reusable_node); next_state = ts_language_next_state(self->language, state, lookahead->symbol); } @@ -1052,7 +1051,7 @@ static void parser__advance(Parser *self, StackVersion version, ReusableNode *re } case TSParseActionTypeRecover: { - while (lookahead->child_count > 0) { + while (lookahead->children.size > 0) { parser__breakdown_lookahead(self, &lookahead, state, reusable_node); } parser__recover(self, version, lookahead); diff --git a/src/runtime/reusable_node.h b/src/runtime/reusable_node.h index 87d20dbf..04b9af7e 100644 --- a/src/runtime/reusable_node.h +++ b/src/runtime/reusable_node.h @@ -20,8 +20,8 @@ static inline void reusable_node_pop(ReusableNode *self) { while (self->tree) { Tree *parent = self->tree->context.parent; uint32_t next_index = self->tree->context.index + 1; - if (parent && parent->child_count > next_index) { - self->tree = parent->children[next_index]; + if (parent && parent->children.size > next_index) { + self->tree = parent->children.contents[next_index]; return; } self->tree = parent; @@ -30,17 +30,17 @@ static inline void reusable_node_pop(ReusableNode *self) { static inline ReusableNode reusable_node_after_leaf(const ReusableNode *self) { ReusableNode result = *self; - while (result.tree->child_count > 0) - result.tree = result.tree->children[0]; + while (result.tree->children.size > 0) + result.tree = result.tree->children.contents[0]; reusable_node_pop(&result); return result; } static inline bool reusable_node_breakdown(ReusableNode *self) { - if (self->tree->child_count == 0) { + if (self->tree->children.size == 0) { return false; } else { - self->tree = self->tree->children[0]; + self->tree = self->tree->children.contents[0]; return true; } } diff --git a/src/runtime/stack.c b/src/runtime/stack.c index 885083e4..aa2eaed4 100644 --- a/src/runtime/stack.c +++ b/src/runtime/stack.c @@ -147,7 +147,7 @@ static StackNode *stack_node_new(StackNode *previous_node, Tree *tree, bool is_p if (state == ERROR_STATE) { node->error_cost += - ERROR_COST_PER_SKIPPED_TREE * ((tree->visible || tree->child_count == 0) ? 1 : tree->visible_child_count) + + ERROR_COST_PER_SKIPPED_TREE * ((tree->visible || tree->children.size == 0) ? 1 : tree->visible_child_count) + ERROR_COST_PER_SKIPPED_CHAR * tree->size.bytes + ERROR_COST_PER_SKIPPED_LINE * tree->size.extent.row; if (previous_node->links[0].tree) { @@ -173,7 +173,7 @@ static bool stack__tree_is_equivalent(const Tree *left, const Tree *right) { right && left->symbol == right->symbol && ((left->error_cost > 0 && right->error_cost > 0) || - (left->child_count == 0 && right->child_count == 0 && + (left->children.size == 0 && right->children.size == 0 && left->padding.bytes == right->padding.bytes && left->size.bytes == right->size.bytes && left->extra == right->extra && diff --git a/src/runtime/tree.c b/src/runtime/tree.c index f88895d2..06828f3b 100644 --- a/src/runtime/tree.c +++ b/src/runtime/tree.c @@ -169,8 +169,6 @@ Tree *ts_tree_make_leaf(TreePool *pool, TSSymbol symbol, Length padding, Length .ref_count = 1, .symbol = symbol, .size = size, - .child_count = 0, - .children = NULL, .visible_child_count = 0, .named_child_count = 0, .alias_sequence_id = 0, @@ -207,50 +205,50 @@ Tree *ts_tree_make_copy(TreePool *pool, Tree *self) { static void ts_tree__compress(Tree *self, unsigned count, const TSLanguage *language) { Tree *tree = self; for (unsigned i = 0; i < count; i++) { - if (tree->ref_count > 1 || tree->child_count != 2) break; + if (tree->ref_count > 1 || tree->children.size != 2) break; - Tree *child = tree->children[0]; + Tree *child = tree->children.contents[0]; if ( child->ref_count > 1 || - child->child_count != 2 || + child->children.size != 2 || child->symbol != tree->symbol ) break; - Tree *grandchild = child->children[0]; + Tree *grandchild = child->children.contents[0]; if ( grandchild->ref_count > 1 || - grandchild->child_count != 2 || + grandchild->children.size != 2 || grandchild->symbol != tree->symbol ) break; - tree->children[0] = grandchild; + tree->children.contents[0] = grandchild; grandchild->context.parent = tree; grandchild->context.index = -1; - child->children[0] = grandchild->children[1]; - child->children[0]->context.parent = child; - child->children[0]->context.index = -1; + child->children.contents[0] = grandchild->children.contents[1]; + child->children.contents[0]->context.parent = child; + child->children.contents[0]->context.index = -1; - grandchild->children[1] = child; - grandchild->children[1]->context.parent = grandchild; - grandchild->children[1]->context.index = -1; + grandchild->children.contents[1] = child; + grandchild->children.contents[1]->context.parent = grandchild; + grandchild->children.contents[1]->context.index = -1; tree = grandchild; } while (tree != self) { tree = tree->context.parent; - Tree *child = tree->children[0]; - Tree *grandchild = child->children[1]; - ts_tree_set_children(grandchild, 2, grandchild->children, language); - ts_tree_set_children(child, 2, child->children, language); - ts_tree_set_children(tree, 2, tree->children, language); + Tree *child = tree->children.contents[0]; + Tree *grandchild = child->children.contents[1]; + ts_tree_set_children(grandchild, &grandchild->children, language); + ts_tree_set_children(child, &child->children, language); + ts_tree_set_children(tree, &tree->children, language); } } void ts_tree__balance(Tree *self, const TSLanguage *language) { - if (self->children[0]->repeat_depth > self->children[1]->repeat_depth) { - unsigned n = self->children[0]->repeat_depth - self->children[1]->repeat_depth; + if (self->children.contents[0]->repeat_depth > self->children.contents[1]->repeat_depth) { + unsigned n = self->children.contents[0]->repeat_depth - self->children.contents[1]->repeat_depth; for (unsigned i = n / 2; i > 0; i /= 2) { ts_tree__compress(self, i, language); n -= i; @@ -273,8 +271,8 @@ void ts_tree_assign_parents(Tree *self, TreePool *pool, const TSLanguage *langua const TSSymbol *alias_sequence = ts_language_alias_sequence(language, tree->alias_sequence_id); uint32_t non_extra_index = 0; bool earlier_child_was_changed = false; - for (uint32_t i = 0; i < tree->child_count; i++) { - Tree *child = tree->children[i]; + for (uint32_t i = 0; i < tree->children.size; i++) { + Tree *child = tree->children.contents[i]; if (earlier_child_was_changed || child->context.parent != tree || child->context.index != i) { earlier_child_was_changed = true; child->context.parent = tree; @@ -296,12 +294,12 @@ void ts_tree_assign_parents(Tree *self, TreePool *pool, const TSLanguage *langua } } -void ts_tree_set_children(Tree *self, uint32_t child_count, Tree **children, - const TSLanguage *language) { - if (self->child_count > 0 && children != self->children) ts_free(self->children); +void ts_tree_set_children(Tree *self, TreeArray *children, const TSLanguage *language) { + if (self->children.size > 0 && children->contents != self->children.contents) { + array_delete(&self->children); + } - self->children = children; - self->child_count = child_count; + self->children = *children; self->named_child_count = 0; self->visible_child_count = 0; self->error_cost = 0; @@ -313,8 +311,8 @@ void ts_tree_set_children(Tree *self, uint32_t child_count, Tree **children, uint32_t non_extra_index = 0; const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self->alias_sequence_id); - for (uint32_t i = 0; i < child_count; i++) { - Tree *child = children[i]; + for (uint32_t i = 0; i < self->children.size; i++) { + Tree *child = self->children.contents[i]; if (i == 0) { self->padding = child->padding; @@ -338,7 +336,7 @@ void ts_tree_set_children(Tree *self, uint32_t child_count, Tree **children, } else if (child->visible) { self->visible_child_count++; if (child->named) self->named_child_count++; - } else if (child->child_count > 0) { + } else if (child->children.size > 0) { self->visible_child_count += child->visible_child_count; self->named_child_count += child->named_child_count; } @@ -357,62 +355,55 @@ void ts_tree_set_children(Tree *self, uint32_t child_count, Tree **children, self->error_cost += ERROR_COST_PER_RECOVERY + ERROR_COST_PER_SKIPPED_CHAR * self->size.bytes + ERROR_COST_PER_SKIPPED_LINE * self->size.extent.row; - for (uint32_t i = 0; i < child_count; i++) { - if (!self->children[i]->extra) { + for (uint32_t i = 0; i < self->children.size; i++) { + if (!self->children.contents[i]->extra) { self->error_cost += ERROR_COST_PER_SKIPPED_TREE; } } } - if (child_count > 0) { - self->first_leaf = children[0]->first_leaf; - if (children[0]->fragile_left) { - self->fragile_left = true; - } - if (children[child_count - 1]->fragile_right) { - self->fragile_right = true; - } + if (self->children.size > 0) { + Tree *first_child = self->children.contents[0]; + Tree *last_child = self->children.contents[self->children.size - 1]; + self->first_leaf = first_child->first_leaf; + if (first_child->fragile_left) self->fragile_left = true; + if (last_child->fragile_right) self->fragile_right = true; if ( - self->child_count == 2 && + self->children.size == 2 && !self->visible && !self->named && - self->children[0]->symbol == self->symbol && - self->children[1]->symbol == self->symbol + first_child->symbol == self->symbol && + last_child->symbol == self->symbol ) { - if (self->children[0]->repeat_depth > self->children[1]->repeat_depth) { - self->repeat_depth = self->children[0]->repeat_depth + 1; + if (first_child->repeat_depth > last_child->repeat_depth) { + self->repeat_depth = first_child->repeat_depth + 1; } else { - self->repeat_depth = self->children[1]->repeat_depth + 1; + self->repeat_depth = last_child->repeat_depth + 1; } } } } -Tree *ts_tree_make_node(TreePool *pool, TSSymbol symbol, uint32_t child_count, Tree **children, +Tree *ts_tree_make_node(TreePool *pool, TSSymbol symbol, TreeArray *children, unsigned alias_sequence_id, const TSLanguage *language) { Tree *result = ts_tree_make_leaf(pool, symbol, length_zero(), length_zero(), language); result->alias_sequence_id = alias_sequence_id; - ts_tree_set_children(result, child_count, children, language); + ts_tree_set_children(result, children, language); return result; } Tree *ts_tree_make_error_node(TreePool *pool, TreeArray *children, const TSLanguage *language) { for (uint32_t i = 0; i < children->size; i++) { Tree *child = children->contents[i]; - if (child->symbol == ts_builtin_sym_error && child->child_count > 0) { - array_splice(children, i, 1, child->child_count, child->children); - i += child->child_count - 1; - for (uint32_t j = 0; j < child->child_count; j++) - ts_tree_retain(child->children[j]); + if (child->symbol == ts_builtin_sym_error && child->children.size > 0) { + array_splice(children, i, 1, child->children.size, child->children.contents); + i += child->children.size - 1; + for (uint32_t j = 0; j < child->children.size; j++) + ts_tree_retain(child->children.contents[j]); ts_tree_release(pool, child); } } - Tree *result = ts_tree_make_node( - pool, ts_builtin_sym_error, - children->size, children->contents, - 0, language - ); - + Tree *result = ts_tree_make_node(pool, ts_builtin_sym_error, children, 0, language); result->fragile_left = true; result->fragile_right = true; return result; @@ -439,11 +430,11 @@ void ts_tree_release(TreePool *pool, Tree *self) { assert(tree->ref_count > 0); tree->ref_count--; if (tree->ref_count == 0) { - if (tree->child_count > 0) { - for (uint32_t i = 0; i < tree->child_count; i++) { - array_push(&pool->tree_stack, tree->children[i]); + if (tree->children.size > 0) { + for (uint32_t i = 0; i < tree->children.size; i++) { + array_push(&pool->tree_stack, tree->children.contents[i]); } - ts_free(tree->children); + array_delete(&tree->children); } else if (tree->has_external_tokens) { ts_external_token_state_delete(&tree->external_token_state); } @@ -484,12 +475,12 @@ bool ts_tree_eq(const Tree *self, const Tree *other) { if (self->padding.bytes != other->padding.bytes) return false; if (self->size.bytes != other->size.bytes) return false; if (self->symbol == ts_builtin_sym_error) return self->lookahead_char == other->lookahead_char; - if (self->child_count != other->child_count) return false; + if (self->children.size != other->children.size) return false; if (self->visible_child_count != other->visible_child_count) return false; if (self->named_child_count != other->named_child_count) return false; - for (uint32_t i = 0; i < self->child_count; i++) { - if (!ts_tree_eq(self->children[i], other->children[i])) { + for (uint32_t i = 0; i < self->children.size; i++) { + if (!ts_tree_eq(self->children.contents[i], other->children.contents[i])) { return false; } } @@ -501,13 +492,13 @@ int ts_tree_compare(const Tree *left, const Tree *right) { return -1; if (right->symbol < left->symbol) return 1; - if (left->child_count < right->child_count) + if (left->children.size < right->children.size) return -1; - if (right->child_count < left->child_count) + if (right->children.size < left->children.size) return 1; - for (uint32_t i = 0; i < left->child_count; i++) { - Tree *left_child = left->children[i]; - Tree *right_child = right->children[i]; + for (uint32_t i = 0; i < left->children.size; i++) { + Tree *left_child = left->children.contents[i]; + Tree *right_child = right->children.contents[i]; switch (ts_tree_compare(left_child, right_child)) { case -1: return -1; @@ -527,10 +518,10 @@ static inline long min_byte(long a, long b) { bool ts_tree_invalidate_lookahead(Tree *self, uint32_t edit_byte_offset) { if (edit_byte_offset >= self->bytes_scanned) return false; self->has_changes = true; - if (self->child_count > 0) { + if (self->children.size > 0) { uint32_t child_start_byte = 0; - for (uint32_t i = 0; i < self->child_count; i++) { - Tree *child = self->children[i]; + for (uint32_t i = 0; i < self->children.size; i++) { + Tree *child = self->children.contents[i]; if (child_start_byte > edit_byte_offset) break; ts_tree_invalidate_lookahead(child, edit_byte_offset - child_start_byte); child_start_byte += ts_tree_total_bytes(child); @@ -581,8 +572,8 @@ void ts_tree_edit(Tree *self, const TSInputEdit *edit) { long remaining_bytes_to_delete = 0; TSPoint remaining_extent_to_delete = {0, 0}; Length child_left, child_right = length_zero(); - for (uint32_t i = 0; i < self->child_count; i++) { - Tree *child = self->children[i]; + for (uint32_t i = 0; i < self->children.size; i++) { + Tree *child = self->children.contents[i]; child_left = child_right; child_right = length_add(child_left, ts_tree_total_size(child)); @@ -628,9 +619,9 @@ void ts_tree_edit(Tree *self, const TSInputEdit *edit) { Tree *ts_tree_last_external_token(Tree *tree) { if (!tree->has_external_tokens) return NULL; - while (tree->child_count > 0) { - for (uint32_t i = tree->child_count - 1; i + 1 > 0; i--) { - Tree *child = tree->children[i]; + while (tree->children.size > 0) { + for (uint32_t i = tree->children.size - 1; i + 1 > 0; i--) { + Tree *child = tree->children.contents[i]; if (child->has_external_tokens) { tree = child; break; @@ -676,7 +667,7 @@ static size_t ts_tree__write_to_string(const Tree *self, const TSLanguage *langu } if (visible) { - if (self->symbol == ts_builtin_sym_error && self->child_count == 0 && self->size.bytes > 0) { + if (self->symbol == ts_builtin_sym_error && self->children.size == 0 && self->size.bytes > 0) { cursor += snprintf(*writer, limit, "(UNEXPECTED "); cursor += ts_tree__write_char_to_string(*writer, limit, self->lookahead_char); } else if (self->is_missing) { @@ -688,8 +679,8 @@ static size_t ts_tree__write_to_string(const Tree *self, const TSLanguage *langu } } - for (uint32_t i = 0; i < self->child_count; i++) { - Tree *child = self->children[i]; + for (uint32_t i = 0; i < self->children.size; i++) { + Tree *child = self->children.contents[i]; cursor += ts_tree__write_to_string(child, language, *writer, limit, false, include_all); } @@ -711,7 +702,7 @@ void ts_tree__print_dot_graph(const Tree *self, uint32_t byte_offset, TSSymbol symbol = self->context.alias_symbol ? self->context.alias_symbol : self->symbol; fprintf(f, "tree_%p [label=\"%s\"", self, ts_language_symbol_name(language, symbol)); - if (self->child_count == 0) + if (self->children.size == 0) fprintf(f, ", shape=plaintext"); if (self->extra) fprintf(f, ", fontcolor=gray"); @@ -719,8 +710,8 @@ void ts_tree__print_dot_graph(const Tree *self, uint32_t byte_offset, fprintf(f, ", tooltip=\"address:%p\nrange:%u - %u\nstate:%d\nerror-cost:%u\nrepeat-depth:%u\"]\n", self, byte_offset, byte_offset + ts_tree_total_bytes(self), self->parse_state, self->error_cost, self->repeat_depth); - for (uint32_t i = 0; i < self->child_count; i++) { - const Tree *child = self->children[i]; + for (uint32_t i = 0; i < self->children.size; i++) { + const Tree *child = self->children.contents[i]; ts_tree__print_dot_graph(child, byte_offset, language, f); fprintf(f, "tree_%p -> tree_%p [tooltip=%u]\n", self, child, i); byte_offset += ts_tree_total_bytes(child); diff --git a/src/runtime/tree.h b/src/runtime/tree.h index ad104894..09dae3dd 100644 --- a/src/runtime/tree.h +++ b/src/runtime/tree.h @@ -17,12 +17,16 @@ extern TSStateId TS_TREE_STATE_NONE; typedef struct { union { char *long_data; - char short_data[sizeof(char *) + sizeof(unsigned)]; + char short_data[sizeof(char *) + sizeof(uint32_t)]; }; - unsigned length; + uint32_t length; } TSExternalTokenState; -typedef struct Tree { +typedef struct Tree Tree; + +typedef Array(Tree *) TreeArray; + +struct Tree { struct { struct Tree *parent; uint32_t index; @@ -31,35 +35,16 @@ typedef struct Tree { bool alias_is_named : 1; } context; - uint32_t child_count; - union { - struct { - struct Tree **children; - uint32_t visible_child_count; - uint32_t named_child_count; - unsigned short alias_sequence_id; - }; - TSExternalTokenState external_token_state; - int32_t lookahead_char; - }; - Length padding; Length size; - uint32_t bytes_scanned; - - TSSymbol symbol; - TSStateId parse_state; - unsigned error_cost; - unsigned node_count; - unsigned repeat_depth; - - struct { - TSSymbol symbol; - TSLexMode lex_mode; - } first_leaf; - uint32_t ref_count; - int dynamic_precedence; + uint32_t bytes_scanned; + uint32_t error_cost; + uint32_t node_count; + uint32_t repeat_depth; + uint32_t child_count; + int32_t dynamic_precedence; + bool visible : 1; bool named : 1; bool extra : 1; @@ -68,9 +53,30 @@ typedef struct Tree { bool has_changes : 1; bool has_external_tokens : 1; bool is_missing : 1; -} Tree; + TSSymbol symbol; + TSStateId parse_state; + struct { + TSSymbol symbol; + TSLexMode lex_mode; + } first_leaf; -typedef Array(Tree *) TreeArray; + union { + struct { + TreeArray children; + uint32_t visible_child_count; + uint32_t named_child_count; + uint16_t alias_sequence_id; + }; + struct { + uint32_t _2; + TSExternalTokenState external_token_state; + }; + struct { + uint32_t _1; + int32_t lookahead_char; + }; + }; +}; typedef struct { TreeArray free_trees; @@ -93,7 +99,7 @@ Tree *ts_tree_pool_allocate(TreePool *); void ts_tree_pool_free(TreePool *, Tree *); Tree *ts_tree_make_leaf(TreePool *, TSSymbol, Length, Length, const TSLanguage *); -Tree *ts_tree_make_node(TreePool *, TSSymbol, uint32_t, Tree **, unsigned, const TSLanguage *); +Tree *ts_tree_make_node(TreePool *, TSSymbol, TreeArray *, unsigned, const TSLanguage *); Tree *ts_tree_make_copy(TreePool *, Tree *child); Tree *ts_tree_make_error_node(TreePool *, TreeArray *, const TSLanguage *); Tree *ts_tree_make_error(TreePool *, Length, Length, int32_t, const TSLanguage *); @@ -104,7 +110,7 @@ bool ts_tree_eq(const Tree *tree1, const Tree *tree2); int ts_tree_compare(const Tree *tree1, const Tree *tree2); uint32_t ts_tree_start_column(const Tree *self); uint32_t ts_tree_end_column(const Tree *self); -void ts_tree_set_children(Tree *, uint32_t, Tree **, const TSLanguage *); +void ts_tree_set_children(Tree *, TreeArray *, const TSLanguage *); void ts_tree_assign_parents(Tree *, TreePool *, const TSLanguage *); void ts_tree_edit(Tree *, const TSInputEdit *edit); char *ts_tree_string(const Tree *, const TSLanguage *, bool include_all); diff --git a/test/helpers/tree_helpers.cc b/test/helpers/tree_helpers.cc index abd6bc10..5142ca8d 100644 --- a/test/helpers/tree_helpers.cc +++ b/test/helpers/tree_helpers.cc @@ -16,11 +16,14 @@ const char *symbol_names[24] = { "twenty-two", "twenty-three" }; -Tree ** tree_array(std::vector trees) { - Tree ** result = (Tree **)calloc(trees.size(), sizeof(Tree *)); +TreeArray *tree_array(std::vector trees) { + static TreeArray result; + result.capacity = trees.size(); + result.size = trees.size(); + result.contents = (Tree **)calloc(trees.size(), sizeof(Tree *)); for (size_t i = 0; i < trees.size(); i++) - result[i] = trees[i]; - return result; + result.contents[i] = trees[i]; + return &result; } ostream &operator<<(std::ostream &stream, const Tree *tree) { diff --git a/test/helpers/tree_helpers.h b/test/helpers/tree_helpers.h index 19ae2c70..c28dcd98 100644 --- a/test/helpers/tree_helpers.h +++ b/test/helpers/tree_helpers.h @@ -6,7 +6,7 @@ #include extern const char *symbol_names[24]; -Tree ** tree_array(std::vector trees); +TreeArray *tree_array(std::vector trees); std::ostream &operator<<(std::ostream &stream, const Tree *tree); std::ostream &operator<<(std::ostream &stream, const TSNode &node); diff --git a/test/runtime/tree_test.cc b/test/runtime/tree_test.cc index 96f485f7..8669b6c1 100644 --- a/test/runtime/tree_test.cc +++ b/test/runtime/tree_test.cc @@ -7,11 +7,11 @@ void assert_consistent(const Tree *tree) { if (tree->child_count == 0) return; - AssertThat(tree->children[0]->padding, Equals(tree->padding)); + AssertThat(tree->children.contents[0]->padding, Equals(tree->padding)); Length total_children_size = length_zero(); - for (size_t i = 0; i < tree->child_count; i++) { - Tree *child = tree->children[i]; + for (size_t i = 0; i < tree->children.size; i++) { + Tree *child = tree->children.contents[i]; AssertThat(child->context.offset, Equals(total_children_size)); assert_consistent(child); total_children_size = length_add(total_children_size, ts_tree_total_size(child)); @@ -86,7 +86,7 @@ describe("Tree", []() { ts_tree_retain(tree1); ts_tree_retain(tree2); - parent1 = ts_tree_make_node(&pool, symbol3, 2, tree_array({ + parent1 = ts_tree_make_node(&pool, symbol3, tree_array({ tree1, tree2, }), 0, &language); @@ -114,7 +114,7 @@ describe("Tree", []() { ts_tree_retain(tree1); ts_tree_retain(tree2); - parent = ts_tree_make_node(&pool, symbol3, 2, tree_array({ + parent = ts_tree_make_node(&pool, symbol3, tree_array({ tree1, tree2, }), 0, &language); @@ -138,7 +138,7 @@ describe("Tree", []() { ts_tree_retain(tree1); ts_tree_retain(tree2); - parent = ts_tree_make_node(&pool, symbol3, 2, tree_array({ + parent = ts_tree_make_node(&pool, symbol3, tree_array({ tree1, tree2, }), 0, &language); @@ -162,7 +162,7 @@ describe("Tree", []() { ts_tree_retain(tree1); ts_tree_retain(tree2); - parent = ts_tree_make_node(&pool, symbol3, 2, tree_array({ + parent = ts_tree_make_node(&pool, symbol3, tree_array({ tree1, tree2, }), 0, &language); @@ -183,7 +183,7 @@ describe("Tree", []() { Tree *tree = nullptr; before_each([&]() { - tree = ts_tree_make_node(&pool, symbol1, 3, tree_array({ + tree = ts_tree_make_node(&pool, symbol1, tree_array({ ts_tree_make_leaf(&pool, symbol2, {2, {0, 2}}, {3, {0, 3}}, &language), ts_tree_make_leaf(&pool, symbol3, {2, {0, 2}}, {3, {0, 3}}, &language), ts_tree_make_leaf(&pool, symbol4, {2, {0, 2}}, {3, {0, 3}}, &language), @@ -213,13 +213,13 @@ describe("Tree", []() { AssertThat(tree->padding, Equals({3, {0, 3}})); AssertThat(tree->size, Equals({13, {0, 13}})); - AssertThat(tree->children[0]->has_changes, IsTrue()); - AssertThat(tree->children[0]->padding, Equals({3, {0, 3}})); - AssertThat(tree->children[0]->size, Equals({3, {0, 3}})); + AssertThat(tree->children.contents[0]->has_changes, IsTrue()); + AssertThat(tree->children.contents[0]->padding, Equals({3, {0, 3}})); + AssertThat(tree->children.contents[0]->size, Equals({3, {0, 3}})); - AssertThat(tree->children[1]->has_changes, IsFalse()); - AssertThat(tree->children[1]->padding, Equals({2, {0, 2}})); - AssertThat(tree->children[1]->size, Equals({3, {0, 3}})); + AssertThat(tree->children.contents[1]->has_changes, IsFalse()); + AssertThat(tree->children.contents[1]->padding, Equals({2, {0, 2}})); + AssertThat(tree->children.contents[1]->size, Equals({3, {0, 3}})); }); }); @@ -239,9 +239,9 @@ describe("Tree", []() { AssertThat(tree->padding, Equals({5, {0, 5}})); AssertThat(tree->size, Equals({11, {0, 11}})); - AssertThat(tree->children[0]->has_changes, IsTrue()); - AssertThat(tree->children[0]->padding, Equals({5, {0, 5}})); - AssertThat(tree->children[0]->size, Equals({1, {0, 1}})); + AssertThat(tree->children.contents[0]->has_changes, IsTrue()); + AssertThat(tree->children.contents[0]->padding, Equals({5, {0, 5}})); + AssertThat(tree->children.contents[0]->size, Equals({1, {0, 1}})); }); }); @@ -263,11 +263,11 @@ describe("Tree", []() { AssertThat(tree->padding, Equals({4, {0, 4}})); AssertThat(tree->size, Equals({13, {0, 13}})); - AssertThat(tree->children[0]->has_changes, IsTrue()); - AssertThat(tree->children[0]->padding, Equals({4, {0, 4}})); - AssertThat(tree->children[0]->size, Equals({3, {0, 3}})); + AssertThat(tree->children.contents[0]->has_changes, IsTrue()); + AssertThat(tree->children.contents[0]->padding, Equals({4, {0, 4}})); + AssertThat(tree->children.contents[0]->size, Equals({3, {0, 3}})); - AssertThat(tree->children[1]->has_changes, IsFalse()); + AssertThat(tree->children.contents[1]->has_changes, IsFalse()); }); }); @@ -287,11 +287,11 @@ describe("Tree", []() { AssertThat(tree->padding, Equals({2, {0, 2}})); AssertThat(tree->size, Equals({16, {0, 16}})); - AssertThat(tree->children[0]->has_changes, IsTrue()); - AssertThat(tree->children[0]->padding, Equals({2, {0, 2}})); - AssertThat(tree->children[0]->size, Equals({6, {0, 6}})); + AssertThat(tree->children.contents[0]->has_changes, IsTrue()); + AssertThat(tree->children.contents[0]->padding, Equals({2, {0, 2}})); + AssertThat(tree->children.contents[0]->size, Equals({6, {0, 6}})); - AssertThat(tree->children[1]->has_changes, IsFalse()); + AssertThat(tree->children.contents[1]->has_changes, IsFalse()); }); }); @@ -313,23 +313,23 @@ describe("Tree", []() { AssertThat(tree->padding, Equals({4, {0, 4}})); AssertThat(tree->size, Equals({4, {0, 4}})); - AssertThat(tree->children[0]->has_changes, IsTrue()); - AssertThat(tree->children[0]->padding, Equals({4, {0, 4}})); - AssertThat(tree->children[0]->size, Equals({0, {0, 0}})); + AssertThat(tree->children.contents[0]->has_changes, IsTrue()); + AssertThat(tree->children.contents[0]->padding, Equals({4, {0, 4}})); + AssertThat(tree->children.contents[0]->size, Equals({0, {0, 0}})); - AssertThat(tree->children[1]->has_changes, IsTrue()); - AssertThat(tree->children[1]->padding, Equals({0, {0, 0}})); - AssertThat(tree->children[1]->size, Equals({0, {0, 0}})); + AssertThat(tree->children.contents[1]->has_changes, IsTrue()); + AssertThat(tree->children.contents[1]->padding, Equals({0, {0, 0}})); + AssertThat(tree->children.contents[1]->size, Equals({0, {0, 0}})); - AssertThat(tree->children[2]->has_changes, IsTrue()); - AssertThat(tree->children[2]->padding, Equals({1, {0, 1}})); - AssertThat(tree->children[2]->size, Equals({3, {0, 3}})); + AssertThat(tree->children.contents[2]->has_changes, IsTrue()); + AssertThat(tree->children.contents[2]->padding, Equals({1, {0, 1}})); + AssertThat(tree->children.contents[2]->size, Equals({3, {0, 3}})); }); }); describe("edits within a tree's range of scanned bytes", [&]() { it("marks preceding trees as changed", [&]() { - tree->children[0]->bytes_scanned = 7; + tree->children.contents[0]->bytes_scanned = 7; TSInputEdit edit; edit.start_byte = 6; @@ -341,7 +341,7 @@ describe("Tree", []() { ts_tree_edit(tree, &edit); assert_consistent(tree); - AssertThat(tree->children[0]->has_changes, IsTrue()); + AssertThat(tree->children.contents[0]->has_changes, IsTrue()); }); }); }); @@ -361,14 +361,14 @@ describe("Tree", []() { Tree *leaf_copy = ts_tree_make_leaf(&pool, symbol1, {2, {1, 1}}, {5, {1, 4}}, &language); AssertThat(ts_tree_eq(leaf, leaf_copy), IsTrue()); - Tree *parent = ts_tree_make_node(&pool, symbol2, 2, tree_array({ + Tree *parent = ts_tree_make_node(&pool, symbol2, tree_array({ leaf, leaf_copy, }), 0, &language); ts_tree_retain(leaf); ts_tree_retain(leaf_copy); - Tree *parent_copy = ts_tree_make_node(&pool, symbol2, 2, tree_array({ + Tree *parent_copy = ts_tree_make_node(&pool, symbol2, tree_array({ leaf, leaf_copy, }), 0, &language); @@ -415,14 +415,14 @@ describe("Tree", []() { it("returns false for trees with different children", [&]() { Tree *leaf2 = ts_tree_make_leaf(&pool, symbol2, {1, {0, 1}}, {3, {0, 3}}, &language); - Tree *parent = ts_tree_make_node(&pool, symbol2, 2, tree_array({ + Tree *parent = ts_tree_make_node(&pool, symbol2, tree_array({ leaf, leaf2, }), 0, &language); ts_tree_retain(leaf); ts_tree_retain(leaf2); - Tree *different_parent = ts_tree_make_node(&pool, symbol2, 2, tree_array({ + Tree *different_parent = ts_tree_make_node(&pool, symbol2, tree_array({ leaf2, leaf, }), 0, &language); @@ -450,14 +450,14 @@ describe("Tree", []() { it("returns the last serialized external token state in the given tree", [&]() { Tree *tree1, *tree2, *tree3, *tree4, *tree5, *tree6, *tree7, *tree8, *tree9; - tree1 = ts_tree_make_node(&pool, symbol1, 2, tree_array({ - (tree2 = ts_tree_make_node(&pool, symbol2, 3, tree_array({ + tree1 = ts_tree_make_node(&pool, symbol1, tree_array({ + (tree2 = ts_tree_make_node(&pool, symbol2, tree_array({ (tree3 = make_external(ts_tree_make_leaf(&pool, symbol3, padding, size, &language))), (tree4 = ts_tree_make_leaf(&pool, symbol4, padding, size, &language)), (tree5 = ts_tree_make_leaf(&pool, symbol5, padding, size, &language)), }), 0, &language)), - (tree6 = ts_tree_make_node(&pool, symbol6, 2, tree_array({ - (tree7 = ts_tree_make_node(&pool, symbol7, 1, tree_array({ + (tree6 = ts_tree_make_node(&pool, symbol6, tree_array({ + (tree7 = ts_tree_make_node(&pool, symbol7, tree_array({ (tree8 = ts_tree_make_leaf(&pool, symbol8, padding, size, &language)), }), 0, &language)), (tree9 = ts_tree_make_leaf(&pool, symbol9, padding, size, &language)), From 94ed1b696488e76a2b1380361372bd2a5de12553 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 6 Apr 2018 13:28:32 -0700 Subject: [PATCH 09/16] Make array_splice take an array, not a pointer and length --- src/runtime/array.h | 8 ++++---- src/runtime/parser.c | 17 ++++++++--------- src/runtime/tree.c | 2 +- 3 files changed, 13 insertions(+), 14 deletions(-) diff --git a/src/runtime/array.h b/src/runtime/array.h index e4e7ff0f..a0492526 100644 --- a/src/runtime/array.h +++ b/src/runtime/array.h @@ -47,14 +47,14 @@ extern "C" { (self)->contents[(self)->size++] = (element)) #define array_push_all(self, other) \ - array_splice((self), (self)->size, 0, (other)->size, (other)->contents) + array_splice((self), (self)->size, 0, (other)) -#define array_splice(self, index, old_count, new_count, new_elements) \ +#define array_splice(self, index, old_count, new_array) \ array__splice((VoidArray *)(self), array__elem_size(self), index, old_count, \ - new_count, (new_elements)) + (new_array)->size, (new_array)->contents) #define array_insert(self, index, element) \ - array_splice(self, index, 0, 1, &(element)) + array__splice((VoidArray *)(self), array__elem_size(self), index, 0, 1, &element) #define array_pop(self) ((self)->contents[--(self)->size]) diff --git a/src/runtime/parser.c b/src/runtime/parser.c index a377c42a..0977a077 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -672,29 +672,28 @@ static void parser__start(Parser *self, TSInput input, Tree *previous_tree) { self->in_ambiguity = false; } -static void parser__accept(Parser *self, StackVersion version, - Tree *lookahead) { +static void parser__accept(Parser *self, StackVersion version, Tree *lookahead) { lookahead->extra = true; assert(lookahead->symbol == ts_builtin_sym_end); ts_tree_retain(lookahead); ts_stack_push(self->stack, version, lookahead, false, 1); - StackSliceArray pop = ts_stack_pop_all(self->stack, version); + StackSliceArray pop = ts_stack_pop_all(self->stack, version); for (uint32_t i = 0; i < pop.size; i++) { - StackSlice slice = pop.contents[i]; - TreeArray trees = slice.trees; + TreeArray trees = pop.contents[i].trees; Tree *root = NULL; for (uint32_t j = trees.size - 1; j + 1 > 0; j--) { Tree *child = trees.contents[j]; if (!child->extra) { - root = ts_tree_make_copy(&self->tree_pool, child); - root->children.size = 0; for (uint32_t k = 0; k < child->children.size; k++) { ts_tree_retain(child->children.contents[k]); } - array_splice(&trees, j, 1, child->children.size, child->children.contents); - ts_tree_set_children(root, &trees, self->language); + array_splice(&trees, j, 1, &child->children); + root = ts_tree_make_node( + &self->tree_pool, child->symbol, &trees, + child->alias_sequence_id, self->language + ); ts_tree_release(&self->tree_pool, child); break; } diff --git a/src/runtime/tree.c b/src/runtime/tree.c index 06828f3b..f6dc542f 100644 --- a/src/runtime/tree.c +++ b/src/runtime/tree.c @@ -395,7 +395,7 @@ Tree *ts_tree_make_error_node(TreePool *pool, TreeArray *children, const TSLangu for (uint32_t i = 0; i < children->size; i++) { Tree *child = children->contents[i]; if (child->symbol == ts_builtin_sym_error && child->children.size > 0) { - array_splice(children, i, 1, child->children.size, child->children.contents); + array_splice(children, i, 1, &child->children); i += child->children.size - 1; for (uint32_t j = 0; j < child->children.size; j++) ts_tree_retain(child->children.contents[j]); From 87098760de8b77a167d95943ba164a0a3c8fed8b Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 6 Apr 2018 09:35:17 -0700 Subject: [PATCH 10/16] :art: --- src/runtime/language.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/runtime/language.c b/src/runtime/language.c index 7cdc71fc..7ef941ae 100644 --- a/src/runtime/language.c +++ b/src/runtime/language.c @@ -8,7 +8,6 @@ void ts_language_table_entry(const TSLanguage *self, TSStateId state, result->action_count = 0; result->is_reusable = false; result->actions = NULL; - return; } else { assert(symbol < self->token_count); uint32_t action_index = self->parse_table[state * self->symbol_count + symbol]; @@ -27,8 +26,7 @@ uint32_t ts_language_version(const TSLanguage *language) { return language->version; } -TSSymbolMetadata ts_language_symbol_metadata(const TSLanguage *language, - TSSymbol symbol) { +TSSymbolMetadata ts_language_symbol_metadata(const TSLanguage *language, TSSymbol symbol) { if (symbol == ts_builtin_sym_error) { return (TSSymbolMetadata){.visible = true, .named = true}; } else { From 3c737d82950337aea7a9d2ffb6d98bbb80957433 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 6 Apr 2018 09:35:17 -0700 Subject: [PATCH 11/16] Respect -D flag in fuzzing examples --- test/integration/fuzzing-examples.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/test/integration/fuzzing-examples.cc b/test/integration/fuzzing-examples.cc index c962440e..46cac15a 100644 --- a/test/integration/fuzzing-examples.cc +++ b/test/integration/fuzzing-examples.cc @@ -30,7 +30,10 @@ describe("examples found via fuzzing", [&]() { it(("parses example number " + to_string(i)).c_str(), [&]() { TSDocument *document = ts_document_new(); - // ts_document_print_debugging_graphs(document, true); + + if (getenv("TREE_SITTER_ENABLE_DEBUG_GRAPHS")) { + ts_document_print_debugging_graphs(document, true); + } const string &language_name = examples[i].first; ts_document_set_language(document, load_real_language(language_name)); From a79a8c216ff2d56402f910f7c7f9beb578c9c2d4 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 6 Apr 2018 09:35:17 -0700 Subject: [PATCH 12/16] Include each node count and error cost in stack dot graphs --- src/runtime/stack.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/src/runtime/stack.c b/src/runtime/stack.c index aa2eaed4..e05d6a06 100644 --- a/src/runtime/stack.c +++ b/src/runtime/stack.c @@ -711,9 +711,10 @@ bool ts_stack_print_dot_graph(Stack *self, const char **symbol_names, FILE *f) { fprintf( f, - " tooltip=\"position: %u,%u\nerror_cost: %u\ndynamic_precedence: %d\"];\n", + " tooltip=\"position: %u,%u\nnode_count:%u\nerror_cost: %u\ndynamic_precedence: %d\"];\n", node->position.extent.row, node->position.extent.column, + node->node_count, node->error_cost, node->dynamic_precedence ); @@ -728,21 +729,21 @@ bool ts_stack_print_dot_graph(Stack *self, const char **symbol_names, FILE *f) { if (!link.tree) { fprintf(f, "color=red"); - } else if (link.tree->symbol == ts_builtin_sym_error) { - fprintf(f, "label=\"ERROR\""); } else { - fprintf(f, "label=\""); - if (!link.tree->named) - fprintf(f, "'"); - const char *name = symbol_names[link.tree->symbol]; - for (const char *c = name; *c; c++) { - if (*c == '\"' || *c == '\\') - fprintf(f, "\\"); - fprintf(f, "%c", *c); + if (link.tree->symbol == ts_builtin_sym_error) { + fprintf(f, "label=\"ERROR\""); + } else { + fprintf(f, "label=\""); + if (!link.tree->named) fprintf(f, "'"); + const char *name = symbol_names[link.tree->symbol]; + for (const char *c = name; *c; c++) { + if (*c == '\"' || *c == '\\') fprintf(f, "\\"); + fprintf(f, "%c", *c); + } + if (!link.tree->named) fprintf(f, "'"); + fprintf(f, "\""); } - if (!link.tree->named) - fprintf(f, "'"); - fprintf(f, "\" labeltooltip=\"error_cost: %u\ndynamic_precedence: %u\"", + fprintf(f, "labeltooltip=\"error_cost: %u\ndynamic_precedence: %u\"", link.tree->error_cost, link.tree->dynamic_precedence); } From 1eafcf0ba74bc38820943f0b8a3936882930135c Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 6 Apr 2018 09:35:17 -0700 Subject: [PATCH 13/16] Make test script generate debug graphs even when assertions fail --- script/test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/test b/script/test index 34e66241..74025499 100755 --- a/script/test +++ b/script/test @@ -132,7 +132,7 @@ case ${mode} in if [[ -n $line_count ]]; then head -n $line_count $dot_file | dot -Tsvg >> $html_file else - cat $dot_file | dot -Tsvg >> $html_file + cat $dot_file | grep -v 'Assertion' | dot -Tsvg >> $html_file fi rm $dot_file echo "Wrote $html_file - $line_count" From 379a2fd1214960d1896915ba88df84a0a9b3ced1 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 6 Apr 2018 09:35:17 -0700 Subject: [PATCH 14/16] Incrementally build a tree of skipped tokens Rather than pushing them to the stack individually --- .../build_tables/parse_table_builder.cc | 8 +- src/runtime/language.c | 8 +- src/runtime/language.h | 4 +- src/runtime/parser.c | 195 +++++++++++++----- src/runtime/stack.c | 50 ++--- src/runtime/stack.h | 2 +- src/runtime/tree.c | 33 ++- test/fixtures/error_corpus/c_errors.txt | 4 +- .../error_corpus/javascript_errors.txt | 45 +++- 9 files changed, 233 insertions(+), 116 deletions(-) diff --git a/src/compiler/build_tables/parse_table_builder.cc b/src/compiler/build_tables/parse_table_builder.cc index 51ed5cc4..6218fec6 100644 --- a/src/compiler/build_tables/parse_table_builder.cc +++ b/src/compiler/build_tables/parse_table_builder.cc @@ -150,6 +150,8 @@ class ParseTableBuilderImpl : public ParseTableBuilder { MatchesLongerStringWithValidNextChar ); + parse_table.states[state_id].terminal_entries.clear(); + // Add all the tokens that have no conflict with other tokens. LookaheadSet non_conflicting_tokens; for (unsigned i = 0; i < lexical_grammar.variables.size(); i++) { @@ -186,12 +188,6 @@ class ParseTableBuilderImpl : public ParseTableBuilder { } } - for (const Symbol &symbol : grammar.extra_tokens) { - if (!parse_table.states[state_id].terminal_entries.count(symbol)) { - parse_table.add_terminal_action(state_id, symbol, ParseAction::ShiftExtra()); - } - } - for (size_t i = 0; i < grammar.external_tokens.size(); i++) { if (grammar.external_tokens[i].corresponding_internal_token == rules::NONE()) { parse_table.states[state_id].terminal_entries[Symbol::external(i)].actions.push_back(ParseAction::Recover()); diff --git a/src/runtime/language.c b/src/runtime/language.c index 7ef941ae..9bf1fc63 100644 --- a/src/runtime/language.c +++ b/src/runtime/language.c @@ -4,7 +4,7 @@ void ts_language_table_entry(const TSLanguage *self, TSStateId state, TSSymbol symbol, TableEntry *result) { - if (symbol == ts_builtin_sym_error) { + if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) { result->action_count = 0; result->is_reusable = false; result->actions = NULL; @@ -27,8 +27,10 @@ uint32_t ts_language_version(const TSLanguage *language) { } TSSymbolMetadata ts_language_symbol_metadata(const TSLanguage *language, TSSymbol symbol) { - if (symbol == ts_builtin_sym_error) { + if (symbol == ts_builtin_sym_error) { return (TSSymbolMetadata){.visible = true, .named = true}; + } else if (symbol == ts_builtin_sym_error_repeat) { + return (TSSymbolMetadata){.visible = false, .named = false}; } else { return language->symbol_metadata[symbol]; } @@ -37,6 +39,8 @@ TSSymbolMetadata ts_language_symbol_metadata(const TSLanguage *language, TSSymbo const char *ts_language_symbol_name(const TSLanguage *language, TSSymbol symbol) { if (symbol == ts_builtin_sym_error) { return "ERROR"; + } else if (symbol == ts_builtin_sym_error_repeat) { + return "_ERROR"; } else { return language->symbol_names[symbol]; } diff --git a/src/runtime/language.h b/src/runtime/language.h index 966d15df..64733242 100644 --- a/src/runtime/language.h +++ b/src/runtime/language.h @@ -8,6 +8,8 @@ extern "C" { #include "tree_sitter/parser.h" #include "runtime/tree.h" +#define ts_builtin_sym_error_repeat (ts_builtin_sym_error - 1) + typedef struct { const TSParseAction *actions; uint32_t action_count; @@ -51,7 +53,7 @@ static inline bool ts_language_has_reduce_action(const TSLanguage *self, static inline TSStateId ts_language_next_state(const TSLanguage *self, TSStateId state, TSSymbol symbol) { - if (symbol == ts_builtin_sym_error) { + if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) { return 0; } else if (symbol < self->token_count) { uint32_t count; diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 0977a077..23ce42b1 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -639,6 +639,20 @@ static StackSliceArray parser__reduce(Parser *self, StackVersion version, TSSymb for (uint32_t j = parent->children.size; j < slice.trees.size; j++) { ts_stack_push(self->stack, slice.version, slice.trees.contents[j], false, next_state); } + + if (ts_stack_version_count(self->stack) > MAX_VERSION_COUNT) { + i++; + while (i < pop.size) { + StackSlice slice = pop.contents[i]; + ts_tree_array_delete(&self->tree_pool, &slice.trees); + ts_stack_halt(self->stack, slice.version); + i++; + } + while (ts_stack_version_count(self->stack) > slice.version + 1) { + ts_stack_remove_version(self->stack, slice.version + 1); + } + break; + } } for (StackVersion i = initial_version_count; i < ts_stack_version_count(self->stack); i++) { @@ -720,12 +734,23 @@ static void parser__accept(Parser *self, StackVersion version, Tree *lookahead) static bool parser__do_all_potential_reductions(Parser *self, StackVersion starting_version, TSSymbol lookahead_symbol) { - bool result = false; - for (StackVersion version = starting_version; - ts_stack_version_count(self->stack) < MAX_VERSION_COUNT;) { + uint32_t initial_version_count = ts_stack_version_count(self->stack); + + bool can_shift_lookahead_symbol = false; + StackVersion version = starting_version; + for (unsigned i = 0; true; i++) { uint32_t version_count = ts_stack_version_count(self->stack); if (version >= version_count) break; + bool merged = false; + for (StackVersion i = initial_version_count; i < version; i++) { + if (ts_stack_merge(self->stack, i, version)) { + merged = true; + break; + } + } + if (merged) continue; + TSStateId state = ts_stack_state(self->stack, version); bool has_shift_action = false; array_clear(&self->reduce_actions); @@ -747,7 +772,7 @@ static bool parser__do_all_potential_reductions(Parser *self, StackVersion start switch (action.type) { case TSParseActionTypeShift: case TSParseActionTypeRecover: - if (!action.params.extra) has_shift_action = true; + if (!action.params.extra && !action.params.repetition) has_shift_action = true; break; case TSParseActionTypeReduce: if (action.params.child_count > 0) @@ -763,9 +788,9 @@ static bool parser__do_all_potential_reductions(Parser *self, StackVersion start } } - bool has_reduce_action = self->reduce_actions.size > 0; for (uint32_t i = 0; i < self->reduce_actions.size; i++) { ReduceAction action = self->reduce_actions.contents[i]; + parser__reduce( self, version, action.symbol, action.count, action.dynamic_precedence, action.alias_sequence_id, @@ -774,14 +799,12 @@ static bool parser__do_all_potential_reductions(Parser *self, StackVersion start } if (has_shift_action) { - result = true; - } else { - if (has_reduce_action) { - ts_stack_renumber_version(self->stack, version_count, version); - continue; - } else if (lookahead_symbol != 0) { - ts_stack_remove_version(self->stack, version); - } + can_shift_lookahead_symbol = true; + } else if (self->reduce_actions.size > 0 && i < MAX_VERSION_COUNT) { + ts_stack_renumber_version(self->stack, version_count, version); + continue; + } else if (lookahead_symbol != 0) { + ts_stack_remove_version(self->stack, version); } if (version == starting_version) { @@ -790,7 +813,8 @@ static bool parser__do_all_potential_reductions(Parser *self, StackVersion start version++; } } - return result; + + return can_shift_lookahead_symbol; } static void parser__handle_error(Parser *self, StackVersion version, TSSymbol lookahead_symbol) { @@ -830,7 +854,11 @@ static void parser__handle_error(Parser *self, StackVersion version, TSSymbol lo self, version_with_missing_tree, lookahead_symbol )) { - LOG("recover_with_missing symbol:%s, state:%u", SYM_NAME(missing_symbol), state_after_missing_symbol); + LOG( + "recover_with_missing symbol:%s, state:%u", + SYM_NAME(missing_symbol), + ts_stack_state(self->stack, version_with_missing_tree) + ); did_insert_missing_token = true; break; } @@ -894,13 +922,14 @@ static bool parser__recover_to_state(Parser *self, StackVersion version, unsigne continue; } - StackSliceArray error_pop = ts_stack_pop_error(self->stack, slice.version); - if (error_pop.size > 0) { - StackSlice error_slice = error_pop.contents[0]; - array_push_all(&error_slice.trees, &slice.trees); - array_delete(&slice.trees); - slice.trees = error_slice.trees; - ts_stack_renumber_version(self->stack, error_slice.version, slice.version); + TreeArray error_trees = ts_stack_pop_error(self->stack, slice.version); + if (error_trees.size > 0) { + assert(error_trees.size == 1); + array_splice(&slice.trees, 0, 0, &error_trees.contents[0]->children); + for (unsigned j = 0; j < error_trees.contents[0]->children.size; j++) { + ts_tree_retain(slice.trees.contents[j]); + } + ts_tree_array_delete(&self->tree_pool, &error_trees); } TreeArray trailing_extras = ts_tree_array_remove_trailing_extras(&slice.trees); @@ -930,41 +959,51 @@ static void parser__recover(Parser *self, StackVersion version, Tree *lookahead) unsigned previous_version_count = ts_stack_version_count(self->stack); Length position = ts_stack_position(self->stack, version); StackSummary *summary = ts_stack_get_summary(self->stack, version); - unsigned depth_since_error = ts_stack_node_count_since_error(self->stack, version); + unsigned node_count_since_error = ts_stack_node_count_since_error(self->stack, version); + unsigned current_error_cost = ts_stack_error_cost(self->stack, version); - for (unsigned i = 0; i < summary->size; i++) { - StackSummaryEntry entry = summary->contents[i]; + if (summary && lookahead->symbol != ts_builtin_sym_error) { + for (unsigned i = 0; i < summary->size; i++) { + StackSummaryEntry entry = summary->contents[i]; - if (entry.state == ERROR_STATE) continue; - if (entry.position.bytes == position.bytes) continue; - unsigned depth = entry.depth + depth_since_error; - if (depth > MAX_SUMMARY_DEPTH) break; + if (entry.state == ERROR_STATE) continue; + if (entry.position.bytes == position.bytes) continue; + unsigned depth = entry.depth; + if (node_count_since_error > 0) depth++; - unsigned new_cost = - depth * ERROR_COST_PER_SKIPPED_TREE + - (position.bytes - entry.position.bytes) * ERROR_COST_PER_SKIPPED_CHAR + - (position.extent.row - entry.position.extent.row) * ERROR_COST_PER_SKIPPED_LINE; - if (parser__better_version_exists(self, version, false, new_cost)) break; + bool would_merge = false; + for (unsigned j = 0; j < previous_version_count; j++) { + if ( + ts_stack_state(self->stack, j) == entry.state && + ts_stack_position(self->stack, j).bytes == position.bytes + ) { + would_merge = true; + break; + } + } - if (ts_language_has_actions(self->language, entry.state, lookahead->symbol)) { - if (parser__recover_to_state(self, version, depth, entry.state)) { - did_recover = true; - LOG("recover state:%u, depth:%u", entry.state, depth); - LOG_STACK(); - break; + if (would_merge) continue; + + unsigned new_cost = + current_error_cost + + entry.depth * ERROR_COST_PER_SKIPPED_TREE + + (position.bytes - entry.position.bytes) * ERROR_COST_PER_SKIPPED_CHAR + + (position.extent.row - entry.position.extent.row) * ERROR_COST_PER_SKIPPED_LINE; + if (parser__better_version_exists(self, version, false, new_cost)) break; + + if (ts_language_has_actions(self->language, entry.state, lookahead->symbol)) { + if (parser__recover_to_state(self, version, depth, entry.state)) { + did_recover = true; + LOG("recover_to_previous state:%u, depth:%u", entry.state, depth); + LOG_STACK(); + break; + } } } } for (unsigned i = previous_version_count; i < ts_stack_version_count(self->stack); i++) { - if (ts_stack_is_active(self->stack, i)) { - for (unsigned j = 0; j < i; j++) { - if (ts_stack_can_merge(self->stack, j, i)) { - ts_stack_remove_version(self->stack, i--); - break; - } - } - } else { + if (!ts_stack_is_active(self->stack, i)) { ts_stack_remove_version(self->stack, i--); } } @@ -983,15 +1022,56 @@ static void parser__recover(Parser *self, StackVersion version, Tree *lookahead) return; } + unsigned new_cost = + current_error_cost + ERROR_COST_PER_SKIPPED_TREE + + ts_tree_total_bytes(lookahead) * ERROR_COST_PER_SKIPPED_CHAR + + ts_tree_total_size(lookahead).extent.row * ERROR_COST_PER_SKIPPED_LINE; + + if (parser__better_version_exists(self, version, false, new_cost)) { + ts_stack_halt(self->stack, version); + return; + } + unsigned n; const TSParseAction *actions = ts_language_actions(self->language, 1, lookahead->symbol, &n); - bool extra = n > 0 && actions[n - 1].type == TSParseActionTypeShift && actions[n - 1].params.extra; - parser__shift(self, version, ERROR_STATE, lookahead, extra); + if (n > 0 && actions[n - 1].type == TSParseActionTypeShift && actions[n - 1].params.extra) { + lookahead->extra = true; + } - if (parser__better_version_exists(self, version, true, ts_stack_error_cost(self->stack, version))) { - ts_stack_halt(self->stack, version); - } else { - LOG("skip_token symbol:%s", SYM_NAME(lookahead->symbol)); + LOG("skip_token symbol:%s", SYM_NAME(lookahead->symbol)); + ts_tree_retain(lookahead); + TreeArray children = array_new(); + array_grow(&children, 1); + array_push(&children, lookahead); + Tree *error_repeat = ts_tree_make_node( + &self->tree_pool, + ts_builtin_sym_error_repeat, + &children, + 0, + self->language + ); + + if (node_count_since_error > 0) { + StackSliceArray pop = ts_stack_pop_count(self->stack, version, 1); + assert(pop.size == 1); + assert(pop.contents[0].trees.size == 1); + ts_stack_renumber_version(self->stack, pop.contents[0].version, version); + array_push(&pop.contents[0].trees, error_repeat); + error_repeat = ts_tree_make_node( + &self->tree_pool, + ts_builtin_sym_error_repeat, + &pop.contents[0].trees, + 0, + self->language + ); + } + + ts_stack_push(self->stack, version, error_repeat, false, ERROR_STATE); + + if (lookahead->has_external_tokens) { + ts_stack_set_last_external_token( + self->stack, version, ts_tree_last_external_token(lookahead) + ); } } @@ -1011,6 +1091,10 @@ static void parser__advance(Parser *self, StackVersion version, ReusableNode *re if (action.params.repetition) break; TSStateId next_state; if (action.params.extra) { + + // TODO remove when TREE_SITTER_LANGUAGE_VERSION 9 is out. + if (state == ERROR_STATE) continue; + next_state = state; LOG("shift_extra"); } else { @@ -1065,7 +1149,8 @@ static void parser__advance(Parser *self, StackVersion version, ReusableNode *re ts_stack_renumber_version(self->stack, last_reduction_version, version); LOG_STACK(); } else if (state == ERROR_STATE) { - ts_stack_push(self->stack, version, lookahead, false, ERROR_STATE); + parser__recover(self, version, lookahead); + ts_tree_release(&self->tree_pool, lookahead); return; } else if (!parser__breakdown_top_of_stack(self, version)) { LOG("detect_error"); diff --git a/src/runtime/stack.c b/src/runtime/stack.c index e05d6a06..4d9f5efe 100644 --- a/src/runtime/stack.c +++ b/src/runtime/stack.c @@ -142,21 +142,7 @@ static StackNode *stack_node_new(StackNode *previous_node, Tree *tree, bool is_p node->error_cost += tree->error_cost; node->position = length_add(node->position, ts_tree_total_size(tree)); node->dynamic_precedence += tree->dynamic_precedence; - if (!tree->extra) { - node->node_count += tree->node_count; - - if (state == ERROR_STATE) { - node->error_cost += - ERROR_COST_PER_SKIPPED_TREE * ((tree->visible || tree->children.size == 0) ? 1 : tree->visible_child_count) + - ERROR_COST_PER_SKIPPED_CHAR * tree->size.bytes + - ERROR_COST_PER_SKIPPED_LINE * tree->size.extent.row; - if (previous_node->links[0].tree) { - node->error_cost += - ERROR_COST_PER_SKIPPED_CHAR * tree->padding.bytes + - ERROR_COST_PER_SKIPPED_LINE * tree->padding.extent.row; - } - } - } + if (!tree->extra) node->node_count += tree->node_count; } } else { node->position = length_zero(); @@ -400,7 +386,9 @@ void ts_stack_set_last_external_token(Stack *self, StackVersion version, Tree *t unsigned ts_stack_error_cost(const Stack *self, StackVersion version) { StackHead *head = array_get(&self->heads, version); unsigned result = head->node->error_cost; - if (head->node->state == ERROR_STATE || head->status == StackStatusPaused) { + if ( + head->status == StackStatusPaused || + (head->node->state == ERROR_STATE && !head->node->links[0].tree)) { result += ERROR_COST_PER_RECOVERY; } return result; @@ -408,6 +396,9 @@ unsigned ts_stack_error_cost(const Stack *self, StackVersion version) { unsigned ts_stack_node_count_since_error(const Stack *self, StackVersion version) { StackHead *head = array_get(&self->heads, version); + if (head->node->node_count < head->node_count_at_last_error) { + head->node_count_at_last_error = head->node->node_count; + } return head->node->node_count - head->node_count_at_last_error; } @@ -482,15 +473,21 @@ inline StackAction pop_error_callback(void *payload, const Iterator *iterator) { } } -StackSliceArray ts_stack_pop_error(Stack *self, StackVersion version) { +TreeArray ts_stack_pop_error(Stack *self, StackVersion version) { StackNode *node = array_get(&self->heads, version)->node; for (unsigned i = 0; i < node->link_count; i++) { if (node->links[i].tree && node->links[i].tree->symbol == ts_builtin_sym_error) { bool found_error = false; - return stack__iter(self, version, pop_error_callback, &found_error, true); + StackSliceArray pop = stack__iter(self, version, pop_error_callback, &found_error, true); + if (pop.size > 0) { + assert(pop.size == 1); + ts_stack_renumber_version(self, pop.contents[0].version, version); + return pop.contents[0].trees; + } + break; } } - return (StackSliceArray){.size = 0}; + return (TreeArray){.size = 0}; } inline StackAction pop_all_callback(void *payload, const Iterator *iterator) { @@ -550,8 +547,14 @@ void ts_stack_remove_version(Stack *self, StackVersion version) { void ts_stack_renumber_version(Stack *self, StackVersion v1, StackVersion v2) { assert(v2 < v1); assert((uint32_t)v1 < self->heads.size); - stack_head_delete(&self->heads.contents[v2], &self->node_pool, self->tree_pool); - self->heads.contents[v2] = self->heads.contents[v1]; + StackHead *source_head = &self->heads.contents[v1]; + StackHead *target_head = &self->heads.contents[v2]; + if (target_head->summary && !source_head->summary) { + source_head->summary = target_head->summary; + target_head->summary = NULL; + } + stack_head_delete(target_head, &self->node_pool, self->tree_pool); + *target_head = *source_head; array_erase(&self->heads, v1); } @@ -578,8 +581,8 @@ bool ts_stack_merge(Stack *self, StackVersion version1, StackVersion version2) { for (uint32_t i = 0; i < head2->node->link_count; i++) { stack_node_add_link(head1->node, head2->node->links[i]); } - if (head2->node_count_at_last_error > head1->node_count_at_last_error) { - head1->node_count_at_last_error = head2->node_count_at_last_error; + if (head1->node->state == ERROR_STATE) { + head1->node_count_at_last_error = head1->node->node_count; } ts_stack_remove_version(self, version2); return true; @@ -593,6 +596,7 @@ bool ts_stack_can_merge(Stack *self, StackVersion version1, StackVersion version head2->status == StackStatusActive && head1->node->state == head2->node->state && head1->node->position.bytes == head2->node->position.bytes && + head1->node->error_cost == head2->node->error_cost && ts_tree_external_token_state_eq(head1->last_external_token, head2->last_external_token); } diff --git a/src/runtime/stack.h b/src/runtime/stack.h index 92a09b69..32e68e6e 100644 --- a/src/runtime/stack.h +++ b/src/runtime/stack.h @@ -65,7 +65,7 @@ void ts_stack_push(Stack *, StackVersion, Tree *, bool, TSStateId); StackSliceArray ts_stack_pop_count(Stack *, StackVersion, uint32_t count); // Remove an error at the top of the given version of the stack. -StackSliceArray ts_stack_pop_error(Stack *, StackVersion); +TreeArray ts_stack_pop_error(Stack *, StackVersion); // Remove any pending trees from the top of the given version of the stack. StackSliceArray ts_stack_pop_pending(Stack *, StackVersion); diff --git a/src/runtime/tree.c b/src/runtime/tree.c index f6dc542f..f0ffd1b4 100644 --- a/src/runtime/tree.c +++ b/src/runtime/tree.c @@ -324,7 +324,9 @@ void ts_tree_set_children(Tree *self, TreeArray *children, const TSLanguage *lan self->size = length_add(self->size, ts_tree_total_size(child)); } - self->error_cost += child->error_cost; + if (child->symbol != ts_builtin_sym_error_repeat) { + self->error_cost += child->error_cost; + } self->dynamic_precedence += child->dynamic_precedence; self->node_count += child->node_count; @@ -351,13 +353,18 @@ void ts_tree_set_children(Tree *self, TreeArray *children, const TSLanguage *lan if (!child->extra) non_extra_index++; } - if (self->symbol == ts_builtin_sym_error) { + if (self->symbol == ts_builtin_sym_error || self->symbol == ts_builtin_sym_error_repeat) { self->error_cost += ERROR_COST_PER_RECOVERY + ERROR_COST_PER_SKIPPED_CHAR * self->size.bytes + ERROR_COST_PER_SKIPPED_LINE * self->size.extent.row; for (uint32_t i = 0; i < self->children.size; i++) { - if (!self->children.contents[i]->extra) { + Tree *child = self->children.contents[i]; + if (child->extra) continue; + if (child->symbol == ts_builtin_sym_error && child->children.size == 0) continue; + if (child->visible) { self->error_cost += ERROR_COST_PER_SKIPPED_TREE; + } else { + self->error_cost += ERROR_COST_PER_SKIPPED_TREE * child->visible_child_count; } } } @@ -387,26 +394,16 @@ Tree *ts_tree_make_node(TreePool *pool, TSSymbol symbol, TreeArray *children, unsigned alias_sequence_id, const TSLanguage *language) { Tree *result = ts_tree_make_leaf(pool, symbol, length_zero(), length_zero(), language); result->alias_sequence_id = alias_sequence_id; + if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) { + result->fragile_left = true; + result->fragile_right = true; + } ts_tree_set_children(result, children, language); return result; } Tree *ts_tree_make_error_node(TreePool *pool, TreeArray *children, const TSLanguage *language) { - for (uint32_t i = 0; i < children->size; i++) { - Tree *child = children->contents[i]; - if (child->symbol == ts_builtin_sym_error && child->children.size > 0) { - array_splice(children, i, 1, &child->children); - i += child->children.size - 1; - for (uint32_t j = 0; j < child->children.size; j++) - ts_tree_retain(child->children.contents[j]); - ts_tree_release(pool, child); - } - } - - Tree *result = ts_tree_make_node(pool, ts_builtin_sym_error, children, 0, language); - result->fragile_left = true; - result->fragile_right = true; - return result; + return ts_tree_make_node(pool, ts_builtin_sym_error, children, 0, language); } Tree *ts_tree_make_missing_leaf(TreePool *pool, TSSymbol symbol, const TSLanguage *language) { diff --git a/test/fixtures/error_corpus/c_errors.txt b/test/fixtures/error_corpus/c_errors.txt index c5833156..b2931b7d 100644 --- a/test/fixtures/error_corpus/c_errors.txt +++ b/test/fixtures/error_corpus/c_errors.txt @@ -81,7 +81,9 @@ int main() { (function_declarator (identifier) (parameter_list)) (compound_statement (if_statement - (field_expression (identifier) (MISSING)) + (field_expression + (identifier) + (MISSING)) (compound_statement (expression_statement (call_expression (identifier) (argument_list))) (expression_statement (call_expression (identifier) (argument_list))) diff --git a/test/fixtures/error_corpus/javascript_errors.txt b/test/fixtures/error_corpus/javascript_errors.txt index d435ba86..fb31d9e2 100644 --- a/test/fixtures/error_corpus/javascript_errors.txt +++ b/test/fixtures/error_corpus/javascript_errors.txt @@ -77,15 +77,12 @@ if ({a: 'b'} {c: 'd'}) { (ERROR (object (pair (property_identifier) (string)))) (object (pair (property_identifier) (string)))) (statement_block - (expression_statement (assignment_expression - (identifier) - (call_expression - (function (formal_parameters (identifier)) (statement_block (expression_statement (identifier)))) - (ERROR) - (arguments (identifier)))) - (MISSING)) - (statement_block - (expression_statement (identifier)))))) + (expression_statement + (assignment_expression + (identifier) + (function (formal_parameters (identifier)) (statement_block (expression_statement (identifier))))) + (MISSING)) + (function (formal_parameters (identifier)) (statement_block (expression_statement (identifier))))))) =================================================== Extra tokens at the end of the file @@ -150,3 +147,33 @@ const a = `b c ${d +} f g` (variable_declarator (identifier) (template_string (template_substitution (identifier) (ERROR)))))) + +========================================================= +Long sequences of invalid tokens +========================================================= + +function main(x) { + console.log('a'); + what?????????????????????????????????????????????????? + console.log('b'); + return {}; +} + +--- + +(program + (function + (identifier) + (formal_parameters (identifier)) + (statement_block + (expression_statement + (call_expression + (member_expression (identifier) (property_identifier)) + (arguments (string)))) + (expression_statement + (identifier) + (ERROR + (call_expression + (member_expression (identifier) (property_identifier)) + (arguments (string))))) + (return_statement (object))))) From 33820253e8463b27776eeaad87c792848d26508a Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Sun, 8 Apr 2018 13:49:20 -0700 Subject: [PATCH 15/16] Make stack_print_dot_graph function take a language as an argument --- src/runtime/parser.c | 8 ++++---- src/runtime/stack.c | 23 ++++++++++------------- src/runtime/stack.h | 2 +- src/runtime/tree.c | 3 +-- 4 files changed, 16 insertions(+), 20 deletions(-) diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 23ce42b1..bfb0ab37 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -19,10 +19,10 @@ parser__log(self); \ } -#define LOG_STACK() \ - if (self->print_debugging_graphs) { \ - ts_stack_print_dot_graph(self->stack, self->language->symbol_names, stderr); \ - fputs("\n\n", stderr); \ +#define LOG_STACK() \ + if (self->print_debugging_graphs) { \ + ts_stack_print_dot_graph(self->stack, self->language, stderr); \ + fputs("\n\n", stderr); \ } #define LOG_TREE() \ diff --git a/src/runtime/stack.c b/src/runtime/stack.c index 4d9f5efe..76a6e89c 100644 --- a/src/runtime/stack.c +++ b/src/runtime/stack.c @@ -1,4 +1,5 @@ #include "runtime/alloc.h" +#include "runtime/language.h" #include "runtime/tree.h" #include "runtime/array.h" #include "runtime/stack.h" @@ -646,7 +647,7 @@ void ts_stack_clear(Stack *self) { })); } -bool ts_stack_print_dot_graph(Stack *self, const char **symbol_names, FILE *f) { +bool ts_stack_print_dot_graph(Stack *self, const TSLanguage *language, FILE *f) { bool was_recording_allocations = ts_toggle_allocation_recording(false); if (!f) f = stderr; @@ -734,19 +735,15 @@ bool ts_stack_print_dot_graph(Stack *self, const char **symbol_names, FILE *f) { if (!link.tree) { fprintf(f, "color=red"); } else { - if (link.tree->symbol == ts_builtin_sym_error) { - fprintf(f, "label=\"ERROR\""); - } else { - fprintf(f, "label=\""); - if (!link.tree->named) fprintf(f, "'"); - const char *name = symbol_names[link.tree->symbol]; - for (const char *c = name; *c; c++) { - if (*c == '\"' || *c == '\\') fprintf(f, "\\"); - fprintf(f, "%c", *c); - } - if (!link.tree->named) fprintf(f, "'"); - fprintf(f, "\""); + fprintf(f, "label=\""); + if (link.tree->visible && !link.tree->named) fprintf(f, "'"); + const char *name = ts_language_symbol_name(language, link.tree->symbol); + for (const char *c = name; *c; c++) { + if (*c == '\"' || *c == '\\') fprintf(f, "\\"); + fprintf(f, "%c", *c); } + if (link.tree->visible && !link.tree->named) fprintf(f, "'"); + fprintf(f, "\""); fprintf(f, "labeltooltip=\"error_cost: %u\ndynamic_precedence: %u\"", link.tree->error_cost, link.tree->dynamic_precedence); diff --git a/src/runtime/stack.h b/src/runtime/stack.h index 32e68e6e..4a552323 100644 --- a/src/runtime/stack.h +++ b/src/runtime/stack.h @@ -120,7 +120,7 @@ void ts_stack_remove_version(Stack *, StackVersion); void ts_stack_clear(Stack *); -bool ts_stack_print_dot_graph(Stack *, const char **, FILE *); +bool ts_stack_print_dot_graph(Stack *, const TSLanguage *, FILE *); typedef void (*StackIterateCallback)(void *, TSStateId, uint32_t); diff --git a/src/runtime/tree.c b/src/runtime/tree.c index f0ffd1b4..b6dd7a9c 100644 --- a/src/runtime/tree.c +++ b/src/runtime/tree.c @@ -715,8 +715,7 @@ void ts_tree__print_dot_graph(const Tree *self, uint32_t byte_offset, } } -void ts_tree_print_dot_graph(const Tree *self, const TSLanguage *language, - FILE *f) { +void ts_tree_print_dot_graph(const Tree *self, const TSLanguage *language, FILE *f) { fprintf(f, "digraph tree {\n"); fprintf(f, "edge [arrowhead=none]\n"); ts_tree__print_dot_graph(self, 0, language, f); From 3672a8ad879261df5d8d3241772174b691ac2026 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Sun, 8 Apr 2018 13:49:32 -0700 Subject: [PATCH 16/16] Remove unused tree method --- src/runtime/tree.c | 10 ---------- src/runtime/tree.h | 1 - 2 files changed, 11 deletions(-) diff --git a/src/runtime/tree.c b/src/runtime/tree.c index b6dd7a9c..ad58bfd3 100644 --- a/src/runtime/tree.c +++ b/src/runtime/tree.c @@ -68,16 +68,6 @@ void ts_tree_array_delete(TreePool *pool, TreeArray *self) { array_delete(self); } -uint32_t ts_tree_array_essential_count(const TreeArray *self) { - uint32_t result = 0; - for (uint32_t i = 0; i < self->size; i++) { - Tree *tree = self->contents[i]; - if (!tree->extra && tree->symbol != ts_builtin_sym_error) - result++; - } - return result; -} - TreeArray ts_tree_array_remove_last_n(TreeArray *self, uint32_t remove_count) { TreeArray result = array_new(); if (self->size == 0 || remove_count == 0) return result; diff --git a/src/runtime/tree.h b/src/runtime/tree.h index 09dae3dd..5b655afb 100644 --- a/src/runtime/tree.h +++ b/src/runtime/tree.h @@ -88,7 +88,6 @@ const char *ts_external_token_state_data(const TSExternalTokenState *); bool ts_tree_array_copy(TreeArray, TreeArray *); void ts_tree_array_delete(TreePool *, TreeArray *); -uint32_t ts_tree_array_essential_count(const TreeArray *); TreeArray ts_tree_array_remove_last_n(TreeArray *, uint32_t); TreeArray ts_tree_array_remove_trailing_extras(TreeArray *); void ts_tree_array_reverse(TreeArray *);