From e59558c83b24ce07f142506add1763413d1fcecc Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 2 Apr 2018 09:47:01 -0700 Subject: [PATCH] Allow stack versions to be temporarily paused This way, when detecting an error, we can defer the decision about whether to bail or recover until all stack versions are processed. --- src/runtime/parser.c | 257 ++++++++++++------------ src/runtime/stack.c | 64 ++++-- src/runtime/stack.h | 10 +- test/fixtures/error_corpus/c_errors.txt | 21 ++ 4 files changed, 206 insertions(+), 146 deletions(-) diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 34b4506b..a6dfa53a 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -171,6 +171,18 @@ static ErrorComparison parser__compare_versions(Parser *self, ErrorStatus a, Err return ErrorComparisonNone; } +static ErrorStatus parser__version_status(Parser *self, StackVersion version) { + unsigned cost = ts_stack_error_cost(self->stack, version); + bool is_paused = ts_stack_is_paused(self->stack, version); + if (is_paused) cost += ERROR_COST_PER_SKIPPED_TREE; + return (ErrorStatus) { + .cost = cost, + .push_count = ts_stack_push_count(self->stack, version), + .dynamic_precedence = ts_stack_dynamic_precedence(self->stack, version), + .is_in_error = is_paused || ts_stack_state(self->stack, version) == ERROR_STATE + }; +} + static bool parser__better_version_exists(Parser *self, StackVersion version, bool is_in_error, unsigned cost) { if (self->finished_tree && self->finished_tree->error_cost <= cost) return true; @@ -185,14 +197,9 @@ static bool parser__better_version_exists(Parser *self, StackVersion version, for (StackVersion i = 0, n = ts_stack_version_count(self->stack); i < n; i++) { if (i == version || - ts_stack_is_halted(self->stack, i) || + !ts_stack_is_active(self->stack, i) || ts_stack_position(self->stack, i).bytes < position.bytes) continue; - ErrorStatus status_i = { - .cost = ts_stack_error_cost(self->stack, i), - .is_in_error = ts_stack_state(self->stack, i) == ERROR_STATE, - .dynamic_precedence = ts_stack_dynamic_precedence(self->stack, i), - .push_count = ts_stack_push_count(self->stack, i) - }; + ErrorStatus status_i = parser__version_status(self, i); switch (parser__compare_versions(self, status, status_i)) { case ErrorComparisonTakeRight: return true; @@ -206,83 +213,6 @@ static bool parser__better_version_exists(Parser *self, StackVersion version, return false; } -static unsigned parser__condense_stack(Parser *self) { - bool made_changes = false; - unsigned min_error_cost = UINT_MAX; - for (StackVersion i = 0; i < ts_stack_version_count(self->stack); i++) { - if (ts_stack_is_halted(self->stack, i)) { - ts_stack_remove_version(self->stack, i); - i--; - continue; - } - - ErrorStatus status_i = { - .cost = ts_stack_error_cost(self->stack, i), - .push_count = ts_stack_push_count(self->stack, i), - .dynamic_precedence = ts_stack_dynamic_precedence(self->stack, i), - .is_in_error = ts_stack_state(self->stack, i) == ERROR_STATE, - }; - if (!status_i.is_in_error && status_i.cost < min_error_cost) { - min_error_cost = status_i.cost; - } - - for (StackVersion j = 0; j < i; j++) { - ErrorStatus status_j = { - .cost = ts_stack_error_cost(self->stack, j), - .push_count = ts_stack_push_count(self->stack, j), - .dynamic_precedence = ts_stack_dynamic_precedence(self->stack, j), - .is_in_error = ts_stack_state(self->stack, j) == ERROR_STATE, - }; - - bool can_merge = ts_stack_can_merge(self->stack, j, i); - switch (parser__compare_versions(self, status_j, status_i)) { - case ErrorComparisonTakeLeft: - made_changes = true; - ts_stack_remove_version(self->stack, i); - i--; - j = i; - break; - case ErrorComparisonPreferLeft: - case ErrorComparisonNone: - if (can_merge) { - made_changes = true; - ts_stack_force_merge(self->stack, j, i); - i--; - j = i; - } - break; - case ErrorComparisonPreferRight: - made_changes = true; - ts_stack_swap_versions(self->stack, i, j); - if (can_merge) { - ts_stack_force_merge(self->stack, j, i); - i--; - j = i; - } - break; - case ErrorComparisonTakeRight: - made_changes = true; - ts_stack_remove_version(self->stack, j); - i--; - j--; - break; - } - } - } - - while (ts_stack_version_count(self->stack) > MAX_VERSION_COUNT) { - ts_stack_remove_version(self->stack, MAX_VERSION_COUNT); - made_changes = true; - } - - if (made_changes) { - LOG("condense"); - LOG_STACK(); - } - - return min_error_cost; -} - static void parser__restore_external_scanner(Parser *self, Tree *external_token) { if (external_token) { self->language->external_scanner.deserialize( @@ -334,9 +264,7 @@ static Tree *parser__lex(Parser *self, StackVersion version, TSStateId parse_sta self->lexer.token_end_position = self->lexer.current_position; } - if (error_mode && self->lexer.token_end_position.bytes <= current_position.bytes) { - LOG("disregard_empty_token"); - } else { + if (!error_mode || self->lexer.token_end_position.bytes > current_position.bytes) { found_external_token = true; break; } @@ -360,7 +288,6 @@ static Tree *parser__lex(Parser *self, StackVersion version, TSStateId parse_sta } if (!error_mode) { - LOG("retry_in_error_mode"); error_mode = true; lex_mode = self->language->lex_modes[ERROR_STATE]; valid_external_tokens = ts_language_enabled_external_tokens( @@ -797,7 +724,8 @@ static void parser__accept(Parser *self, StackVersion version, static bool parser__do_all_potential_reductions(Parser *self, StackVersion starting_version, TSSymbol lookahead_symbol) { bool result = false; - for (StackVersion version = starting_version;;) { + for (StackVersion version = starting_version; + ts_stack_version_count(self->stack) < MAX_VERSION_COUNT;) { uint32_t version_count = ts_stack_version_count(self->stack); if (version >= version_count) break; @@ -869,24 +797,7 @@ static bool parser__do_all_potential_reductions(Parser *self, StackVersion start } static void parser__handle_error(Parser *self, StackVersion version, TSSymbol lookahead_symbol) { - // If enough parse versions have already completed, just halt this version. - if (self->accept_count > MAX_VERSION_COUNT) { - ts_stack_halt(self->stack, version); - LOG("bail_after_too_many_tries"); - return; - } - - // If there are other in-progress versions that are clearly better than this one, - // just halt this version. - unsigned new_cost = ts_stack_error_cost(self->stack, version) + ERROR_COST_PER_SKIPPED_TREE; - if (parser__better_version_exists(self, version, true, new_cost)) { - ts_stack_halt(self->stack, version); - LOG("bail_on_error"); - return; - } - // Perform any reductions that could have happened in this state, regardless of the lookahead. - LOG("handle_error"); uint32_t previous_version_count = ts_stack_version_count(self->stack); parser__do_all_potential_reductions(self, version, 0); uint32_t version_count = ts_stack_version_count(self->stack); @@ -923,7 +834,6 @@ static void parser__handle_error(Parser *self, StackVersion version, TSSymbol lo lookahead_symbol )) { LOG("recover_with_missing symbol:%s, state:%u", SYM_NAME(missing_symbol), state_after_missing_symbol); - LOG_STACK(); did_insert_missing_token = true; break; } @@ -1023,13 +933,15 @@ static void parser__recover(Parser *self, StackVersion version, Tree *lookahead) unsigned previous_version_count = ts_stack_version_count(self->stack); Length position = ts_stack_position(self->stack, version); StackSummary *summary = ts_stack_get_summary(self->stack, version); + unsigned depth_since_error = ts_stack_depth_since_error(self->stack, version); for (unsigned i = 0; i < summary->size; i++) { StackSummaryEntry entry = summary->contents[i]; if (entry.state == ERROR_STATE) continue; if (entry.position.bytes == position.bytes) continue; - unsigned depth = entry.depth + ts_stack_depth_since_error(self->stack, version); + unsigned depth = entry.depth + depth_since_error; + if (depth > MAX_SUMMARY_DEPTH) break; unsigned new_cost = depth * ERROR_COST_PER_SKIPPED_TREE + @@ -1041,21 +953,22 @@ static void parser__recover(Parser *self, StackVersion version, Tree *lookahead) if (parser__recover_to_state(self, version, depth, entry.state)) { did_recover = true; LOG("recover state:%u, depth:%u", entry.state, depth); + LOG_STACK(); break; } } } for (unsigned i = previous_version_count; i < ts_stack_version_count(self->stack); i++) { - if (ts_stack_is_halted(self->stack, i)) { - ts_stack_remove_version(self->stack, i--); - } else { + if (ts_stack_is_active(self->stack, i)) { for (unsigned j = 0; j < i; j++) { if (ts_stack_can_merge(self->stack, j, i)) { ts_stack_remove_version(self->stack, i--); break; } } + } else { + ts_stack_remove_version(self->stack, i--); } } @@ -1154,21 +1067,14 @@ static void parser__advance(Parser *self, StackVersion version, ReusableNode *re if (last_reduction_version != STACK_VERSION_NONE) { ts_stack_renumber_version(self->stack, last_reduction_version, version); LOG_STACK(); + } else if (state == ERROR_STATE) { + ts_stack_push(self->stack, version, lookahead, false, ERROR_STATE); + return; } else if (!parser__breakdown_top_of_stack(self, version)) { - if (state == ERROR_STATE) { - ts_stack_push(self->stack, version, lookahead, false, ERROR_STATE); - return; - } - - parser__handle_error(self, version, lookahead->first_leaf.symbol); - if (ts_stack_is_halted(self->stack, version)) { - ts_tree_release(&self->tree_pool, lookahead); - return; - } else if (lookahead->size.bytes == 0) { - ts_tree_release(&self->tree_pool, lookahead); - state = ts_stack_state(self->stack, version); - lookahead = parser__get_lookahead(self, version, &state, reusable_node, &table_entry); - } + LOG("detect_error"); + ts_stack_pause(self->stack, version, lookahead->first_leaf.symbol); + ts_tree_release(&self->tree_pool, lookahead); + return; } state = ts_stack_state(self->stack, version); @@ -1176,6 +1082,93 @@ static void parser__advance(Parser *self, StackVersion version, ReusableNode *re } } +static unsigned parser__condense_stack(Parser *self) { + bool made_changes = false; + unsigned min_error_cost = UINT_MAX; + for (StackVersion i = 0; i < ts_stack_version_count(self->stack); i++) { + if (ts_stack_is_halted(self->stack, i)) { + ts_stack_remove_version(self->stack, i); + i--; + continue; + } + + ErrorStatus status_i = parser__version_status(self, i); + if (!status_i.is_in_error && status_i.cost < min_error_cost) { + min_error_cost = status_i.cost; + } + + for (StackVersion j = 0; j < i; j++) { + ErrorStatus status_j = parser__version_status(self, j); + + bool can_merge = ts_stack_can_merge(self->stack, j, i); + switch (parser__compare_versions(self, status_j, status_i)) { + case ErrorComparisonTakeLeft: + made_changes = true; + ts_stack_remove_version(self->stack, i); + i--; + j = i; + break; + case ErrorComparisonPreferLeft: + case ErrorComparisonNone: + if (can_merge) { + made_changes = true; + ts_stack_force_merge(self->stack, j, i); + i--; + j = i; + } + break; + case ErrorComparisonPreferRight: + made_changes = true; + ts_stack_swap_versions(self->stack, i, j); + if (can_merge) { + ts_stack_force_merge(self->stack, j, i); + i--; + j = i; + } + break; + case ErrorComparisonTakeRight: + made_changes = true; + ts_stack_remove_version(self->stack, j); + i--; + j--; + break; + } + } + } + + while (ts_stack_version_count(self->stack) > MAX_VERSION_COUNT) { + ts_stack_remove_version(self->stack, MAX_VERSION_COUNT); + made_changes = true; + } + + if (ts_stack_version_count(self->stack) > 0) { + bool has_unpaused_version = false; + for (StackVersion i = 0, n = ts_stack_version_count(self->stack); i < n; i++) { + if (ts_stack_is_paused(self->stack, i)) { + if (!has_unpaused_version) { + LOG("resume version:%u", i); + TSSymbol lookahead_symbol = ts_stack_resume(self->stack, i); + parser__handle_error(self, i, lookahead_symbol); + has_unpaused_version = true; + } else { + ts_stack_remove_version(self->stack, i); + i--; + n--; + } + } else { + has_unpaused_version = true; + } + } + } + + if (made_changes) { + LOG("condense"); + LOG_STACK(); + } + + return min_error_cost; +} + bool parser_init(Parser *self) { ts_lexer_init(&self->lexer); array_init(&self->reduce_actions); @@ -1219,13 +1212,7 @@ Tree *parser_parse(Parser *self, TSInput input, Tree *old_tree, bool halt_on_err for (version = 0; version < ts_stack_version_count(self->stack); version++) { reusable_node = self->reusable_node; - while (!ts_stack_is_halted(self->stack, version)) { - position = ts_stack_position(self->stack, version).bytes; - if (position > last_position || (version > 0 && position == last_position)) { - last_position = position; - break; - } - + while (ts_stack_is_active(self->stack, version)) { LOG("process version:%d, version_count:%u, state:%d, row:%u, col:%u", version, ts_stack_version_count(self->stack), ts_stack_state(self->stack, version), @@ -1234,6 +1221,12 @@ Tree *parser_parse(Parser *self, TSInput input, Tree *old_tree, bool halt_on_err parser__advance(self, version, &reusable_node); LOG_STACK(); + + position = ts_stack_position(self->stack, version).bytes; + if (position > last_position || (version > 0 && position == last_position)) { + last_position = position; + break; + } } } diff --git a/src/runtime/stack.c b/src/runtime/stack.c index bd60a9ec..1ea8a181 100644 --- a/src/runtime/stack.c +++ b/src/runtime/stack.c @@ -49,12 +49,19 @@ typedef struct { typedef Array(StackNode *) StackNodeArray; +typedef enum { + StackStatusActive, + StackStatusPaused, + StackStatusHalted, +} StackStatus; + typedef struct { StackNode *node; Tree *last_external_token; - uint32_t push_count; - bool is_halted; StackSummary *summary; + uint32_t push_count; + TSSymbol lookahead_when_paused; + StackStatus status; } StackHead; struct Stack { @@ -215,7 +222,8 @@ static StackVersion ts_stack__add_version(Stack *self, StackVersion original_ver .node = node, .push_count = self->heads.contents[original_version].push_count, .last_external_token = last_external_token, - .is_halted = false, + .status = StackStatusActive, + .lookahead_when_paused = 0, }; array_push(&self->heads, head); stack_node_retain(node); @@ -581,7 +589,8 @@ bool ts_stack_can_merge(Stack *self, StackVersion version1, StackVersion version StackHead *head1 = &self->heads.contents[version1]; StackHead *head2 = &self->heads.contents[version2]; return - !head1->is_halted && !head2->is_halted && + head1->status == StackStatusActive && + head2->status == StackStatusActive && head1->node->state == head2->node->state && head1->node->position.bytes == head2->node->position.bytes && head1->node->depth == head2->node->depth && @@ -598,11 +607,34 @@ void ts_stack_force_merge(Stack *self, StackVersion version1, StackVersion versi } void ts_stack_halt(Stack *self, StackVersion version) { - array_get(&self->heads, version)->is_halted = true; + array_get(&self->heads, version)->status = StackStatusHalted; } -bool ts_stack_is_halted(Stack *self, StackVersion version) { - return array_get(&self->heads, version)->is_halted; +void ts_stack_pause(Stack *self, StackVersion version, TSSymbol lookahead) { + StackHead *head = array_get(&self->heads, version); + head->status = StackStatusPaused; + head->lookahead_when_paused = lookahead; +} + +bool ts_stack_is_active(const Stack *self, StackVersion version) { + return array_get(&self->heads, version)->status == StackStatusActive; +} + +bool ts_stack_is_halted(const Stack *self, StackVersion version) { + return array_get(&self->heads, version)->status == StackStatusHalted; +} + +bool ts_stack_is_paused(const Stack *self, StackVersion version) { + return array_get(&self->heads, version)->status == StackStatusPaused; +} + +TSSymbol ts_stack_resume(Stack *self, StackVersion version) { + StackHead *head = array_get(&self->heads, version); + assert(head->status == StackStatusPaused); + TSSymbol result = head->lookahead_when_paused; + head->status = StackStatusActive; + head->lookahead_when_paused = 0; + return result; } void ts_stack_clear(Stack *self) { @@ -614,7 +646,8 @@ void ts_stack_clear(Stack *self) { array_push(&self->heads, ((StackHead){ .node = self->base_node, .last_external_token = NULL, - .is_halted = false, + .status = StackStatusActive, + .lookahead_when_paused = 0, })); } @@ -631,13 +664,18 @@ bool ts_stack_print_dot_graph(Stack *self, const char **symbol_names, FILE *f) { array_clear(&self->iterators); for (uint32_t i = 0; i < self->heads.size; i++) { - if (ts_stack_is_halted(self, i)) continue; StackHead *head = &self->heads.contents[i]; + if (head->status == StackStatusHalted) continue; + fprintf(f, "node_head_%u [shape=none, label=\"\"]\n", i); - fprintf( - f, - "node_head_%u -> node_%p [label=%u, fontcolor=blue, weight=10000, " - "labeltooltip=\"push_count: %u\ndepth: %u", i, head->node, i, head->push_count, head->node->depth + fprintf(f, "node_head_%u -> node_%p [", i, head->node); + + if (head->status == StackStatusPaused) { + fprintf(f, "color=red "); + } + fprintf(f, + "label=%u, fontcolor=blue, weight=10000, labeltooltip=\"push_count: %u\ndepth: %u", + i, head->push_count, head->node->depth ); if (head->last_external_token) { diff --git a/src/runtime/stack.h b/src/runtime/stack.h index 14f0f2a7..0be0a4dd 100644 --- a/src/runtime/stack.h +++ b/src/runtime/stack.h @@ -102,9 +102,17 @@ bool ts_stack_can_merge(Stack *, StackVersion, StackVersion); void ts_stack_force_merge(Stack *, StackVersion, StackVersion); +TSSymbol ts_stack_resume(Stack *, StackVersion); + +void ts_stack_pause(Stack *, StackVersion, TSSymbol); + void ts_stack_halt(Stack *, StackVersion); -bool ts_stack_is_halted(Stack *, StackVersion); +bool ts_stack_is_active(const Stack *, StackVersion); + +bool ts_stack_is_paused(const Stack *, StackVersion); + +bool ts_stack_is_halted(const Stack *, StackVersion); void ts_stack_renumber_version(Stack *, StackVersion, StackVersion); diff --git a/test/fixtures/error_corpus/c_errors.txt b/test/fixtures/error_corpus/c_errors.txt index d1d76d78..c5833156 100644 --- a/test/fixtures/error_corpus/c_errors.txt +++ b/test/fixtures/error_corpus/c_errors.txt @@ -141,3 +141,24 @@ int y = 5; (translation_unit (declaration (primitive_type) (ERROR (identifier)) (identifier)) (declaration (primitive_type) (init_declarator (identifier) (number_literal)))) + +========================================== +Declarations with missing variable names +========================================== + +int a() { + struct x = 1; + int = 2; +} + +--- + +(translation_unit + (function_definition + (primitive_type) + (function_declarator (identifier) (parameter_list)) + (compound_statement + (struct_specifier (type_identifier)) + (ERROR (number_literal)) + (primitive_type) + (ERROR (number_literal)))))