diff --git a/include/tree_sitter/parser.h b/include/tree_sitter/parser.h index e4b27d12..d9a8e197 100644 --- a/include/tree_sitter/parser.h +++ b/include/tree_sitter/parser.h @@ -139,12 +139,9 @@ typedef struct TSLanguage { } \ } -#define RECOVER(state_value) \ - { \ - { \ - .type = TSParseActionTypeRecover, \ - .params = {.state = state_value} \ - } \ +#define RECOVER() \ + { \ + { .type = TSParseActionTypeRecover } \ } #define SHIFT_EXTRA() \ diff --git a/src/compiler/build_tables/parse_table_builder.cc b/src/compiler/build_tables/parse_table_builder.cc index 4b983fa5..8666ce1d 100644 --- a/src/compiler/build_tables/parse_table_builder.cc +++ b/src/compiler/build_tables/parse_table_builder.cc @@ -44,7 +44,6 @@ struct ParseStateQueueEntry { class ParseTableBuilderImpl : public ParseTableBuilder { const SyntaxGrammar grammar; const LexicalGrammar lexical_grammar; - unordered_map recovery_item_sets_by_lookahead; unordered_map state_ids_by_item_set; vector item_sets_by_state_id; deque parse_state_queue; @@ -54,7 +53,6 @@ class ParseTableBuilderImpl : public ParseTableBuilder { set fragile_reductions; vector following_tokens_by_token; vector coincident_tokens_by_token; - bool processing_recovery_states; public: ParseTableBuilderImpl(const SyntaxGrammar &syntax_grammar, const LexicalGrammar &lexical_grammar) @@ -62,8 +60,7 @@ class ParseTableBuilderImpl : public ParseTableBuilder { lexical_grammar(lexical_grammar), item_set_builder(syntax_grammar, lexical_grammar), following_tokens_by_token(lexical_grammar.variables.size()), - coincident_tokens_by_token(lexical_grammar.variables.size()), - processing_recovery_states(false) { + coincident_tokens_by_token(lexical_grammar.variables.size()) { for (unsigned i = 0, n = lexical_grammar.variables.size(); i < n; i++) { coincident_tokens_by_token[i].insert(rules::END_OF_INPUT()); @@ -109,10 +106,7 @@ class ParseTableBuilderImpl : public ParseTableBuilder { coincident_tokens_by_token ); - processing_recovery_states = true; build_error_parse_state(error_state_id); - process_part_state_queue(); - mark_fragile_actions(); remove_duplicate_parse_states(); @@ -142,8 +136,6 @@ class ParseTableBuilderImpl : public ParseTableBuilder { } void build_error_parse_state(ParseStateId state_id) { - ParseState error_state; - for (unsigned i = 0; i < lexical_grammar.variables.size(); i++) { Symbol token = Symbol::terminal(i); const LexicalVariable &variable = lexical_grammar.variables[i]; @@ -158,38 +150,21 @@ class ParseTableBuilderImpl : public ParseTableBuilder { } } if (!exclude_from_recovery_state) { - add_out_of_context_parse_state(&error_state, Symbol::terminal(i)); + parse_table.add_terminal_action(state_id, Symbol::terminal(i), ParseAction::Recover()); } } for (const Symbol &symbol : grammar.extra_tokens) { - if (!error_state.terminal_entries.count(symbol)) { - error_state.terminal_entries[symbol].actions.push_back(ParseAction::ShiftExtra()); + if (!parse_table.states[state_id].terminal_entries.count(symbol)) { + parse_table.add_terminal_action(state_id, symbol, ParseAction::ShiftExtra()); } } for (size_t i = 0; i < grammar.external_tokens.size(); i++) { - add_out_of_context_parse_state(&error_state, Symbol::external(i)); + parse_table.states[state_id].terminal_entries[Symbol::external(i)].actions.push_back(ParseAction::Recover()); } - for (size_t i = 0; i < grammar.variables.size(); i++) { - add_out_of_context_parse_state(&error_state, Symbol::non_terminal(i)); - } - - error_state.terminal_entries[END_OF_INPUT()].actions.push_back(ParseAction::Recover(0)); - parse_table.states[state_id] = error_state; - } - - void add_out_of_context_parse_state(ParseState *error_state, const rules::Symbol &symbol) { - const ParseItemSet &item_set = recovery_item_sets_by_lookahead[symbol]; - if (!item_set.entries.empty()) { - ParseStateId state = add_parse_state({}, item_set); - if (symbol.is_non_terminal()) { - error_state->nonterminal_entries[symbol.index] = state; - } else { - error_state->terminal_entries[symbol].actions.assign({ ParseAction::Recover(state) }); - } - } + parse_table.add_terminal_action(state_id, END_OF_INPUT(), ParseAction::Recover()); } ParseStateId add_parse_state(SymbolSequence &&preceding_symbols, const ParseItemSet &item_set) { @@ -241,7 +216,7 @@ class ParseTableBuilderImpl : public ParseTableBuilder { parse_table.add_terminal_action(state_id, lookahead, action); } else { ParseAction &existing_action = entry.actions[0]; - if (existing_action.type == ParseActionTypeAccept || processing_recovery_states) { + if (existing_action.type == ParseActionTypeAccept) { entry.actions.push_back(action); } else { if (action.precedence > existing_action.precedence) { @@ -281,11 +256,8 @@ class ParseTableBuilderImpl : public ParseTableBuilder { ParseItemSet &next_item_set = pair.second; ParseStateId next_state_id = add_parse_state(append_symbol(sequence, lookahead), next_item_set); - if (!processing_recovery_states) { - recovery_item_sets_by_lookahead[lookahead].add(next_item_set); - if (!parse_table.states[state_id].terminal_entries[lookahead].actions.empty()) { - lookaheads_with_conflicts.insert(lookahead); - } + if (!parse_table.states[state_id].terminal_entries[lookahead].actions.empty()) { + lookaheads_with_conflicts.insert(lookahead); } parse_table.add_terminal_action(state_id, lookahead, ParseAction::Shift(next_state_id)); @@ -297,9 +269,6 @@ class ParseTableBuilderImpl : public ParseTableBuilder { ParseItemSet &next_item_set = pair.second; ParseStateId next_state_id = add_parse_state(append_symbol(sequence, lookahead), next_item_set); parse_table.set_nonterminal_action(state_id, lookahead.index, next_state_id); - if (!processing_recovery_states) { - recovery_item_sets_by_lookahead[lookahead].add(next_item_set); - } } for (Symbol lookahead : lookaheads_with_conflicts) { @@ -310,8 +279,7 @@ class ParseTableBuilderImpl : public ParseTableBuilder { ParseAction shift_extra = ParseAction::ShiftExtra(); ParseState &state = parse_table.states[state_id]; for (const Symbol &extra_symbol : grammar.extra_tokens) { - if (!state.terminal_entries.count(extra_symbol) || - state.has_shift_action() || processing_recovery_states) { + if (!state.terminal_entries.count(extra_symbol) || state.has_shift_action()) { parse_table.add_terminal_action(state_id, extra_symbol, shift_extra); } } diff --git a/src/compiler/generate_code/c_code.cc b/src/compiler/generate_code/c_code.cc index cbc656a8..d11b3084 100644 --- a/src/compiler/generate_code/c_code.cc +++ b/src/compiler/generate_code/c_code.cc @@ -656,7 +656,7 @@ class CCodeGenerator { add(")"); break; case ParseActionTypeRecover: - add("RECOVER(" + to_string(action.state_index) + ")"); + add("RECOVER()"); break; default: {} } diff --git a/src/compiler/parse_table.cc b/src/compiler/parse_table.cc index e79e02dd..4d10907c 100644 --- a/src/compiler/parse_table.cc +++ b/src/compiler/parse_table.cc @@ -40,10 +40,9 @@ ParseAction ParseAction::Shift(ParseStateId state_index) { return result; } -ParseAction ParseAction::Recover(ParseStateId state_index) { +ParseAction ParseAction::Recover() { ParseAction result; result.type = ParseActionTypeRecover; - result.state_index = state_index; return result; } @@ -133,7 +132,7 @@ bool ParseState::has_shift_action() const { void ParseState::each_referenced_state(function fn) { for (auto &entry : terminal_entries) for (ParseAction &action : entry.second.actions) - if ((action.type == ParseActionTypeShift && !action.extra) || action.type == ParseActionTypeRecover) + if (action.type == ParseActionTypeShift && !action.extra) fn(&action.state_index); for (auto &entry : nonterminal_entries) fn(&entry.second); diff --git a/src/compiler/parse_table.h b/src/compiler/parse_table.h index 40a44dfe..39e0080b 100644 --- a/src/compiler/parse_table.h +++ b/src/compiler/parse_table.h @@ -27,7 +27,7 @@ struct ParseAction { static ParseAction Accept(); static ParseAction Error(); static ParseAction Shift(ParseStateId state_index); - static ParseAction Recover(ParseStateId state_index); + static ParseAction Recover(); static ParseAction Reduce(rules::Symbol symbol, size_t child_count, int precedence, int dynamic_precedence, rules::Associativity, unsigned alias_sequence_id); diff --git a/src/runtime/error_costs.c b/src/runtime/error_costs.c index ac055f45..d165572e 100644 --- a/src/runtime/error_costs.c +++ b/src/runtime/error_costs.c @@ -4,25 +4,21 @@ static const unsigned MAX_COST_DIFFERENCE = 16 * ERROR_COST_PER_SKIPPED_TREE; static const unsigned MAX_PUSH_COUNT_WITH_COUNT_DIFFERENCE = 24; ErrorComparison error_status_compare(ErrorStatus a, ErrorStatus b, bool are_mergeable) { - if (a.count < b.count) { - if (are_mergeable || - a.cost <= b.cost || - a.count + 1 < b.count || - b.push_count > MAX_PUSH_COUNT_WITH_COUNT_DIFFERENCE) { + ErrorComparison result = ErrorComparisonNone; + + if (!a.recovering && b.recovering) { + if (a.push_count > MAX_PUSH_COUNT_WITH_COUNT_DIFFERENCE) { return ErrorComparisonTakeLeft; } else { - return ErrorComparisonPreferLeft; + result = ErrorComparisonPreferLeft; } } - if (b.count < a.count) { - if (are_mergeable || - b.cost <= a.cost || - b.count + 1 < a.count || - a.push_count > MAX_PUSH_COUNT_WITH_COUNT_DIFFERENCE) { + if (!b.recovering && a.recovering) { + if (b.push_count > MAX_PUSH_COUNT_WITH_COUNT_DIFFERENCE) { return ErrorComparisonTakeRight; } else { - return ErrorComparisonPreferRight; + result = ErrorComparisonPreferRight; } } @@ -42,5 +38,5 @@ ErrorComparison error_status_compare(ErrorStatus a, ErrorStatus b, bool are_merg } } - return ErrorComparisonNone; + return result; } diff --git a/src/runtime/error_costs.h b/src/runtime/error_costs.h index f65b9c93..380b96bd 100644 --- a/src/runtime/error_costs.h +++ b/src/runtime/error_costs.h @@ -13,10 +13,10 @@ extern "C" { #define ERROR_COST_PER_SKIPPED_CHAR 1 typedef struct { - unsigned count; unsigned cost; unsigned push_count; unsigned depth; + bool recovering; } ErrorStatus; typedef enum { diff --git a/src/runtime/language.c b/src/runtime/language.c index 8f6c37ac..cb4e7383 100644 --- a/src/runtime/language.c +++ b/src/runtime/language.c @@ -2,33 +2,22 @@ #include "runtime/tree.h" #include "runtime/error_costs.h" -static const TSParseAction SHIFT_ERROR = { - .type = TSParseActionTypeShift, - .params.state = ERROR_STATE, -}; - void ts_language_table_entry(const TSLanguage *self, TSStateId state, TSSymbol symbol, TableEntry *result) { - uint32_t action_index; if (symbol == ts_builtin_sym_error) { - if (state == ERROR_STATE) { - result->action_count = 1; - result->is_reusable = false; - result->depends_on_lookahead = false; - result->actions = &SHIFT_ERROR; - return; - } - action_index = 0; + result->action_count = 0; + result->is_reusable = false; + result->actions = NULL; + return; } else { assert(symbol < self->token_count); - action_index = self->parse_table[state * self->symbol_count + symbol]; + uint32_t action_index = self->parse_table[state * self->symbol_count + symbol]; + const TSParseActionEntry *entry = &self->parse_actions[action_index]; + result->action_count = entry->count; + result->is_reusable = entry->reusable; + result->depends_on_lookahead = entry->depends_on_lookahead; + result->actions = (const TSParseAction *)(entry + 1); } - - const TSParseActionEntry *entry = &self->parse_actions[action_index]; - result->action_count = entry->count; - result->is_reusable = entry->reusable; - result->depends_on_lookahead = entry->depends_on_lookahead; - result->actions = (const TSParseAction *)(entry + 1); } uint32_t ts_language_symbol_count(const TSLanguage *language) { diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 0429da3b..60aa7da4 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -35,23 +35,6 @@ #define SYM_NAME(symbol) ts_language_symbol_name(self->language, symbol) static const uint32_t MAX_VERSION_COUNT = 10; -static const uint32_t MAX_PRECEDING_TREES_TO_SKIP = 32; - -typedef struct { - Parser *parser; - TSSymbol lookahead_symbol; - TreeArray *trees_above_error; - uint32_t tree_count_above_error; - bool found_repair; - ReduceAction best_repair; - TSStateId best_repair_next_state; - uint32_t best_repair_skip_count; -} ErrorRepairSession; - -typedef struct { - Parser *parser; - TSSymbol lookahead_symbol; -} SkipPrecedingTreesSession; static void parser__log(Parser *self) { if (self->lexer.logger.log) { @@ -110,8 +93,8 @@ static bool parser__breakdown_top_of_stack(Parser *self, StackVersion version) { LOG("breakdown_top_of_stack tree:%s", SYM_NAME(parent->symbol)); LOG_STACK(); - ts_stack_decrease_push_count(self->stack, slice.version, - parent->child_count + 1); + ts_stack_decrease_push_count(self->stack, slice.version, parent->child_count + 1); + ts_tree_release(parent); array_delete(&slice.trees); } @@ -148,7 +131,7 @@ static bool parser__condense_stack(Parser *self) { } ErrorStatus right_error_status = ts_stack_error_status(self->stack, i); - if (right_error_status.count == 0) all_versions_have_error = false; + if (!right_error_status.recovering) all_versions_have_error = false; for (StackVersion j = 0; j < i; j++) { bool can_merge = ts_stack_can_merge(self->stack, i, j); @@ -210,7 +193,6 @@ static bool parser__condense_stack(Parser *self) { } static void parser__restore_external_scanner(Parser *self, Tree *external_token) { - LOG("restore_external_scanner"); if (external_token) { self->language->external_scanner.deserialize( self->external_scanner_payload, @@ -222,8 +204,7 @@ static void parser__restore_external_scanner(Parser *self, Tree *external_token) } } -static Tree *parser__lex(Parser *self, StackVersion version) { - TSStateId parse_state = ts_stack_top_state(self->stack, version); +static Tree *parser__lex(Parser *self, StackVersion version, TSStateId parse_state) { Length start_position = ts_stack_top_position(self->stack, version); Tree *external_token = ts_stack_last_external_token(self->stack, version); TSLexMode lex_mode = self->language->lex_modes[parse_state]; @@ -289,7 +270,7 @@ static Tree *parser__lex(Parser *self, StackVersion version) { break; } - if (lex_mode.lex_state != self->language->lex_modes[ERROR_STATE].lex_state) { + if (!error_mode) { LOG("retry_in_error_mode"); error_mode = true; lex_mode = self->language->lex_modes[ERROR_STATE]; @@ -463,7 +444,7 @@ static Tree *parser__get_lookahead(Parser *self, StackVersion version, TSStateId } } - result = parser__lex(self, version); + result = parser__lex(self, version, *state); parser__set_cached_token(self, position.bytes, last_external_token, result); ts_language_table_entry(self->language, *state, result->symbol, table_entry); return result; @@ -580,14 +561,12 @@ static bool parser__replace_children(Parser *self, Tree *tree, Tree **children, } } -static StackPopResult parser__reduce(Parser *self, StackVersion version, - TSSymbol symbol, uint32_t count, - int dynamic_precedence, uint16_t alias_sequence_id, - bool fragile, bool allow_skipping) { +static StackPopResult parser__reduce(Parser *self, StackVersion version, TSSymbol symbol, + uint32_t count, int dynamic_precedence, + uint16_t alias_sequence_id, bool fragile) { uint32_t initial_version_count = ts_stack_version_count(self->stack); StackPopResult pop = ts_stack_pop_count(self->stack, version, count); - if (pop.stopped_at_error) return pop; for (uint32_t i = 0; i < pop.slices.size; i++) { StackSlice slice = pop.slices.contents[i]; @@ -639,24 +618,6 @@ static StackPopResult parser__reduce(Parser *self, StackVersion version, parent->parse_state = state; } - // If this pop operation terminated at the end of an error region, then - // create two stack versions: one in which the parent node is interpreted - // normally, and one in which the parent node is skipped. - if (state == ERROR_STATE && allow_skipping && child_count > 1) { - StackVersion other_version = ts_stack_copy_version(self->stack, slice.version); - - ts_stack_push(self->stack, other_version, parent, false, ERROR_STATE); - for (uint32_t j = parent->child_count; j < slice.trees.size; j++) { - Tree *tree = slice.trees.contents[j]; - ts_stack_push(self->stack, other_version, tree, false, ERROR_STATE); - } - - ErrorStatus error_status = ts_stack_error_status(self->stack, other_version); - if (parser__better_version_exists(self, version, error_status)) { - ts_stack_remove_version(self->stack, other_version); - } - } - // Push the parent node onto the stack, along with any extra tokens that // were previously on top of the stack. ts_stack_push(self->stack, slice.version, parent, false, next_state); @@ -680,211 +641,6 @@ static StackPopResult parser__reduce(Parser *self, StackVersion version, return pop; } -static const TSParseAction *parser__reductions_after_sequence(Parser *self, - TSStateId start_state, - const TreeArray *trees_below, - uint32_t tree_count_below, - const TreeArray *trees_above, - TSSymbol lookahead_symbol, - uint32_t *count) { - TSStateId state = start_state; - uint32_t child_count = 0; - *count = 0; - - for (uint32_t i = 0; i < trees_below->size; i++) { - if (child_count == tree_count_below) - break; - Tree *tree = trees_below->contents[trees_below->size - 1 - i]; - if (tree->extra) continue; - TSStateId next_state = ts_language_next_state(self->language, state, tree->symbol); - if (next_state == ERROR_STATE) - return NULL; - if (next_state != state) { - child_count++; - state = next_state; - } - } - - for (uint32_t i = 0; i < trees_above->size; i++) { - Tree *tree = trees_above->contents[i]; - if (tree->extra) continue; - TSStateId next_state = ts_language_next_state(self->language, state, tree->symbol); - if (next_state == ERROR_STATE) - return NULL; - if (next_state != state) { - child_count++; - state = next_state; - } - } - - const TSParseAction *actions = - ts_language_actions(self->language, state, lookahead_symbol, count); - - if (*count > 0 && actions[*count - 1].type != TSParseActionTypeReduce) { - (*count)--; - } - - while (*count > 0 && actions[0].params.child_count < child_count) { - actions++; - (*count)--; - } - - while (*count > 0 && actions[*count - 1].params.child_count > child_count) { - (*count)--; - } - - return actions; -} - -static StackIterateAction parser__repair_error_callback(void *payload, TSStateId state, - const TreeArray *trees, - uint32_t tree_count) { - ErrorRepairSession *session = payload; - Parser *self = session->parser; - TSSymbol lookahead_symbol = session->lookahead_symbol; - ReduceActionSet *repairs = &self->reduce_actions; - TreeArray *trees_above_error = session->trees_above_error; - uint32_t tree_count_above_error = session->tree_count_above_error; - - StackIterateAction result = StackIterateNone; - - uint32_t last_repair_count = -1; - uint32_t repair_reduction_count = 0; - const TSParseAction *repair_reductions = NULL; - - for (uint32_t i = 0; i < repairs->size; i++) { - ReduceAction *repair = &repairs->contents[i]; - uint32_t count_needed_below_error = repair->count - tree_count_above_error; - if (count_needed_below_error > tree_count) - break; - - uint32_t skip_count = tree_count - count_needed_below_error; - if (session->found_repair && skip_count >= session->best_repair_skip_count) { - array_erase(repairs, i--); - continue; - } - - TSStateId state_after_repair = ts_language_next_state(self->language, state, repair->symbol); - if (state == ERROR_STATE || state_after_repair == ERROR_STATE) - continue; - - uint32_t action_count; - ts_language_actions(self->language, state_after_repair, lookahead_symbol, &action_count); - if (action_count == 0) - continue; - - if (count_needed_below_error != last_repair_count) { - last_repair_count = count_needed_below_error; - repair_reductions = parser__reductions_after_sequence( - self, state, trees, count_needed_below_error, trees_above_error, - lookahead_symbol, &repair_reduction_count); - } - - for (uint32_t j = 0; j < repair_reduction_count; j++) { - if (repair_reductions[j].params.symbol == repair->symbol) { - result |= StackIteratePop; - session->found_repair = true; - session->best_repair = *repair; - session->best_repair_skip_count = skip_count; - session->best_repair_next_state = state_after_repair; - array_erase(repairs, i--); - break; - } - } - } - - if (repairs->size == 0) - result |= StackIterateStop; - - return result; -} - -static bool parser__repair_error(Parser *self, StackSlice slice, - TSSymbol lookahead_symbol, TableEntry entry) { - LOG("repair_error"); - ErrorRepairSession session = { - .parser = self, - .lookahead_symbol = lookahead_symbol, - .found_repair = false, - .trees_above_error = &slice.trees, - .tree_count_above_error = ts_tree_array_essential_count(&slice.trees), - }; - - array_clear(&self->reduce_actions); - for (uint32_t i = 0; i < entry.action_count; i++) { - TSParseAction action = entry.actions[i]; - if (action.type == TSParseActionTypeReduce) { - TSSymbol symbol = action.params.symbol; - uint32_t child_count = action.params.child_count; - if ((child_count > session.tree_count_above_error) || - (child_count == session.tree_count_above_error && - !ts_language_symbol_metadata(self->language, symbol).visible)) - array_push(&self->reduce_actions, ((ReduceAction){ - .symbol = symbol, - .count = child_count, - .alias_sequence_id = action.params.alias_sequence_id, - })); - } - } - - StackPopResult pop = ts_stack_iterate( - self->stack, slice.version, parser__repair_error_callback, &session); - - if (!session.found_repair) { - LOG("no_repair_found"); - ts_stack_remove_version(self->stack, slice.version); - ts_tree_array_delete(&slice.trees); - return false; - } - - ReduceAction repair = session.best_repair; - TSStateId next_state = session.best_repair_next_state; - uint32_t skip_count = session.best_repair_skip_count; - - StackSlice new_slice = array_pop(&pop.slices); - TreeArray children = new_slice.trees; - ts_stack_renumber_version(self->stack, new_slice.version, slice.version); - - for (uint32_t i = pop.slices.size - 1; i + 1 > 0; i--) { - StackSlice other_slice = pop.slices.contents[i]; - ts_tree_array_delete(&other_slice.trees); - if (other_slice.version != pop.slices.contents[i + 1].version) - ts_stack_remove_version(self->stack, other_slice.version); - } - - TreeArray skipped_children = ts_tree_array_remove_last_n(&children, skip_count); - TreeArray trailing_extras = ts_tree_array_remove_trailing_extras(&skipped_children); - Tree *error = ts_tree_make_error_node(&skipped_children, self->language); - error->extra = true; - array_push(&children, error); - array_push_all(&children, &trailing_extras); - trailing_extras.size = 0; - array_delete(&trailing_extras); - - for (uint32_t i = 0; i < slice.trees.size; i++) - array_push(&children, slice.trees.contents[i]); - array_delete(&slice.trees); - - Tree *parent = ts_tree_make_node( - repair.symbol, children.size, children.contents, - repair.alias_sequence_id, self->language - ); - ts_stack_push(self->stack, slice.version, parent, false, next_state); - ts_tree_release(parent); - ts_stack_decrease_push_count(self->stack, slice.version, error->child_count); - - ErrorStatus error_status = ts_stack_error_status(self->stack, slice.version); - if (parser__better_version_exists(self, slice.version, error_status)) { - LOG("no_better_repair_found"); - ts_stack_halt(self->stack, slice.version); - return false; - } else { - LOG("repair_found sym:%s, child_count:%u, cost:%u", SYM_NAME(repair.symbol), - repair.count, parent->error_cost); - return true; - } -} - static void parser__start(Parser *self, TSInput input, Tree *previous_tree) { if (previous_tree) { LOG("parse_after_edit"); @@ -986,18 +742,12 @@ static bool parser__do_potential_reductions(Parser *self, StackVersion version) bool did_reduce = false; for (uint32_t i = 0; i < self->reduce_actions.size; i++) { ReduceAction action = self->reduce_actions.contents[i]; - StackPopResult reduction = parser__reduce( + parser__reduce( self, version, action.symbol, action.count, action.dynamic_precedence, action.alias_sequence_id, - true, false + true ); - if (reduction.stopped_at_error) { - ts_tree_array_delete(&reduction.slices.contents[0].trees); - ts_stack_remove_version(self->stack, reduction.slices.contents[0].version); - continue; - } else { - did_reduce = true; - } + did_reduce = true; } if (did_reduce) { @@ -1012,59 +762,12 @@ static bool parser__do_potential_reductions(Parser *self, StackVersion version) } } -static StackIterateAction parser__skip_preceding_trees_callback( - void *payload, TSStateId state, const TreeArray *trees, uint32_t tree_count) { - if (trees->size > MAX_PRECEDING_TREES_TO_SKIP) return StackIterateStop; - if (tree_count > 0 && state != ERROR_STATE) { - uint32_t bytes_skipped = 0; - for (uint32_t i = 0; i < trees->size; i++) { - bytes_skipped += ts_tree_total_bytes(trees->contents[i]); - } - if (bytes_skipped == 0) return StackIterateNone; - SkipPrecedingTreesSession *session = payload; - Parser *self = session->parser; - TSSymbol lookahead_symbol = session->lookahead_symbol; - uint32_t action_count; - const TSParseAction *actions = - ts_language_actions(self->language, state, lookahead_symbol, &action_count); - if (action_count > 0 && actions[0].type == TSParseActionTypeReduce) { - return StackIteratePop | StackIterateStop; - } - } - return StackIterateNone; -} - -static bool parser__skip_preceding_trees(Parser *self, StackVersion version, - TSSymbol lookahead_symbol) { - SkipPrecedingTreesSession session = { self, lookahead_symbol }; - StackPopResult pop = ts_stack_iterate( - self->stack, version, parser__skip_preceding_trees_callback, &session); - - StackVersion previous_version = STACK_VERSION_NONE; - for (uint32_t i = 0; i < pop.slices.size; i++) { - StackSlice slice = pop.slices.contents[i]; - if (slice.version == previous_version) { - ts_tree_array_delete(&slice.trees); - continue; - } - - previous_version = slice.version; - Tree *error = ts_tree_make_error_node(&slice.trees, self->language); - error->extra = true; - TSStateId state = ts_stack_top_state(self->stack, slice.version); - ts_stack_push(self->stack, slice.version, error, false, state); - ts_tree_release(error); - } - - return pop.slices.size > 0; -} - -static void parser__handle_error(Parser *self, StackVersion version, - TSSymbol lookahead_symbol) { +static void parser__handle_error(Parser *self, StackVersion version, TSSymbol lookahead_symbol) { // If there are other stack versions that are clearly better than this one, // just halt this version. ErrorStatus error_status = ts_stack_error_status(self->stack, version); - error_status.count++; + error_status.recovering = true; + error_status.cost += ERROR_COST_PER_SKIPPED_TREE; if (parser__better_version_exists(self, version, error_status)) { ts_stack_halt(self->stack, version); LOG("bail_on_error"); @@ -1073,16 +776,6 @@ static void parser__handle_error(Parser *self, StackVersion version, LOG("handle_error"); - // If the current lookahead symbol would have been valid in some previous - // state on the stack, create one stack version that repairs the error - // immediately by simply skipping all of the trees that came after that state. - if (ts_stack_version_count(self->stack) < MAX_VERSION_COUNT) { - if (parser__skip_preceding_trees(self, version, lookahead_symbol)) { - LOG("skip_preceding_trees"); - LOG_STACK(); - } - } - // Perform any reductions that could have happened in this state, regardless // of the lookahead. uint32_t previous_version_count = ts_stack_version_count(self->stack); @@ -1103,6 +796,9 @@ static void parser__handle_error(Parser *self, StackVersion version, ts_stack_push(self->stack, previous_version_count, NULL, false, ERROR_STATE); ts_stack_force_merge(self->stack, version, previous_version_count); } + + ts_stack_record_summary(self->stack, version); + LOG_STACK(); } static void parser__halt_parse(Parser *self) { @@ -1130,8 +826,84 @@ static void parser__halt_parse(Parser *self) { ts_tree_release(eof); } -static void parser__recover(Parser *self, StackVersion version, TSStateId state, - Tree *lookahead) { +static void parser__recover(Parser *self, StackVersion version, Tree *lookahead) { + unsigned previous_version_count = ts_stack_version_count(self->stack); + StackSummary *summary = ts_stack_get_summary(self->stack, version); + for (unsigned i = 0; i < summary->size; i++) { + StackSummaryEntry entry = summary->contents[i]; + if (entry.state == ERROR_STATE) continue; + unsigned depth = entry.depth + ts_stack_depth_since_error(self->stack, version); + + unsigned count = 0; + if (ts_language_actions(self->language, entry.state, lookahead->symbol, &count) && count > 0) { + LOG("recover state:%u, depth:%u", entry.state, depth); + StackPopResult pop = ts_stack_pop_count(self->stack, version, depth); + StackVersion previous_version = STACK_VERSION_NONE; + for (unsigned j = 0; j < pop.slices.size; j++) { + StackSlice slice = pop.slices.contents[j]; + if (slice.version == previous_version) { + ts_tree_array_delete(&slice.trees); + continue; + } + + if (ts_stack_top_state(self->stack, slice.version) != entry.state) { + ts_tree_array_delete(&slice.trees); + ts_stack_halt(self->stack, slice.version); + continue; + } + + StackPopResult error_pop = ts_stack_pop_error(self->stack, slice.version); + if (error_pop.slices.size > 0) { + StackSlice error_slice = error_pop.slices.contents[0]; + array_push_all(&error_slice.trees, &slice.trees); + array_delete(&slice.trees); + slice.trees = error_slice.trees; + ts_stack_renumber_version(self->stack, error_slice.version, slice.version); + } + + TreeArray trailing_extras = ts_tree_array_remove_trailing_extras(&slice.trees); + if (slice.trees.size > 0) { + Tree *error = ts_tree_make_error_node(&slice.trees, self->language); + error->extra = true; + ts_stack_push(self->stack, slice.version, error, false, entry.state); + ts_tree_release(error); + } else { + array_delete(&slice.trees); + } + previous_version = slice.version; + + for (unsigned k = 0; k < trailing_extras.size; k++) { + Tree *tree = trailing_extras.contents[k]; + ts_stack_push(self->stack, slice.version, tree, false, entry.state); + ts_tree_release(tree); + } + + array_delete(&trailing_extras); + } + break; + } + } + + for (unsigned i = previous_version_count; i < ts_stack_version_count(self->stack); i++) { + if (ts_stack_is_halted(self->stack, i)) { + ts_stack_remove_version(self->stack, i); + i--; + } else { + for (unsigned j = 0; j < i; j++) { + if (ts_stack_can_merge(self->stack, j, i)) { + ts_stack_remove_version(self->stack, i); + i--; + break; + } + } + } + } + + if (ts_stack_version_count(self->stack) > MAX_VERSION_COUNT) { + ts_stack_halt(self->stack, version); + return; + } + if (lookahead->symbol == ts_builtin_sym_end) { LOG("recover_eof"); TreeArray children = array_new(); @@ -1142,20 +914,14 @@ static void parser__recover(Parser *self, StackVersion version, TSStateId state, return; } - LOG("recover state:%u", state); + LOG("skip_token symbol:%s", SYM_NAME(lookahead->symbol)); + bool can_be_extra = ts_language_symbol_metadata(self->language, lookahead->symbol).extra; + parser__shift(self, version, ERROR_STATE, lookahead, can_be_extra); - if (ts_stack_version_count(self->stack) < MAX_VERSION_COUNT) { - StackVersion new_version = ts_stack_copy_version(self->stack, version); - bool can_be_extra = ts_language_symbol_metadata(self->language, lookahead->symbol).extra; - parser__shift(self, new_version, ERROR_STATE, lookahead, can_be_extra); - - ErrorStatus error_status = ts_stack_error_status(self->stack, new_version); - if (parser__better_version_exists(self, version, error_status)) { - ts_stack_remove_version(self->stack, new_version); - } + ErrorStatus error_status = ts_stack_error_status(self->stack, version); + if (parser__better_version_exists(self, version, error_status)) { + ts_stack_halt(self->stack, version); } - - parser__shift(self, version, state, lookahead, false); } static void parser__advance(Parser *self, StackVersion version, ReusableNode *reusable_node) { @@ -1164,7 +930,6 @@ static void parser__advance(Parser *self, StackVersion version, ReusableNode *re Tree *lookahead = parser__get_lookahead(self, version, &state, reusable_node, &table_entry); for (;;) { - bool reduction_stopped_at_error = false; StackVersion last_reduction_version = STACK_VERSION_NONE; for (uint32_t i = 0; i < table_entry.action_count; i++) { @@ -1193,26 +958,18 @@ static void parser__advance(Parser *self, StackVersion version, ReusableNode *re } case TSParseActionTypeReduce: { - if (reduction_stopped_at_error) continue; LOG("reduce sym:%s, child_count:%u", SYM_NAME(action.params.symbol), action.params.child_count); StackPopResult reduction = parser__reduce( self, version, action.params.symbol, action.params.child_count, action.params.dynamic_precedence, action.params.alias_sequence_id, - action.params.fragile, true + action.params.fragile ); StackSlice slice = *array_front(&reduction.slices); - if (reduction.stopped_at_error) { - reduction_stopped_at_error = true; - if (!parser__repair_error(self, slice, lookahead->first_leaf.symbol, table_entry)) { - break; - } - } last_reduction_version = slice.version; break; } case TSParseActionTypeAccept: { - if (ts_stack_error_status(self->stack, version).count > 0) continue; LOG("accept"); parser__accept(self, version, lookahead); ts_tree_release(lookahead); @@ -1221,13 +978,9 @@ static void parser__advance(Parser *self, StackVersion version, ReusableNode *re case TSParseActionTypeRecover: { while (lookahead->child_count > 0) { - reusable_node_breakdown(reusable_node); - ts_tree_release(lookahead); - lookahead = reusable_node->tree; - ts_tree_retain(lookahead); + parser__breakdown_lookahead(self, &lookahead, state, reusable_node); } - - parser__recover(self, version, action.params.state, lookahead); + parser__recover(self, version, lookahead); if (lookahead == reusable_node->tree) reusable_node_pop(reusable_node); ts_tree_release(lookahead); return; @@ -1307,12 +1060,13 @@ Tree *parser_parse(Parser *self, TSInput input, Tree *old_tree, bool halt_on_err do { for (version = 0; version < ts_stack_version_count(self->stack); version++) { reusable_node = self->reusable_node; - last_position = position; while (!ts_stack_is_halted(self->stack, version)) { - position = ts_stack_top_position(self->stack, version).chars; - if (position > last_position || (version > 0 && position == last_position)) + position = ts_stack_top_position(self->stack, version).bytes; + if (position > last_position || (version > 0 && position == last_position)) { + last_position = position; break; + } LOG("process version:%d, version_count:%u, state:%d, row:%u, col:%u", version, ts_stack_version_count(self->stack), diff --git a/src/runtime/stack.c b/src/runtime/stack.c index ce5f9a6d..f3907ffd 100644 --- a/src/runtime/stack.c +++ b/src/runtime/stack.c @@ -21,8 +21,6 @@ typedef struct StackNode StackNode; typedef struct { StackNode *node; Tree *tree; - uint32_t push_count; - uint32_t depth; bool is_pending; } StackLink; @@ -33,24 +31,16 @@ struct StackNode { short unsigned int link_count; uint32_t ref_count; unsigned error_cost; - unsigned error_count; + unsigned depth; }; typedef struct { StackNode *node; TreeArray trees; uint32_t tree_count; - uint32_t push_count; - uint32_t depth; bool is_pending; } Iterator; -typedef struct { - uint32_t goal_tree_count; - bool found_error; - bool found_valid_path; -} StackPopSession; - typedef struct { void *payload; StackIterateCallback callback; @@ -62,8 +52,8 @@ typedef struct { StackNode *node; Tree *last_external_token; uint32_t push_count; - uint32_t depth; bool is_halted; + StackSummary *summary; } StackHead; struct Stack { @@ -117,7 +107,7 @@ static StackNode *stack_node_new(StackNode *previous_node, Tree *tree, bool is_p StackNode *node = pool->size > 0 ? array_pop(pool) : ts_malloc(sizeof(StackNode)); - *node = (StackNode){.ref_count = 1, .link_count = 0, .state = state}; + *node = (StackNode){.ref_count = 1, .link_count = 0, .state = state, .depth = 0}; if (previous_node) { stack_node_retain(previous_node); @@ -127,30 +117,31 @@ static StackNode *stack_node_new(StackNode *previous_node, Tree *tree, bool is_p .node = previous_node, .tree = tree, .is_pending = is_pending, - .push_count = 0, - .depth = 0, }; node->position = previous_node->position; - node->error_count = previous_node->error_count; node->error_cost = previous_node->error_cost; if (tree) { + node->depth = previous_node->depth; + if (!tree->extra) node->depth++; ts_tree_retain(tree); node->error_cost += tree->error_cost; node->position = length_add(node->position, ts_tree_total_size(tree)); if (state == ERROR_STATE && !tree->extra) { node->error_cost += - ERROR_COST_PER_SKIPPED_TREE * (tree->visible ? 1 : tree->visible_child_count) + - ERROR_COST_PER_SKIPPED_CHAR * (tree->padding.chars + tree->size.chars) + - ERROR_COST_PER_SKIPPED_LINE * (tree->padding.extent.row + tree->size.extent.row); + ERROR_COST_PER_SKIPPED_TREE * ((tree->visible || tree->child_count == 0) ? 1 : tree->visible_child_count) + + ERROR_COST_PER_SKIPPED_CHAR * tree->size.chars + + ERROR_COST_PER_SKIPPED_LINE * tree->size.extent.row; + if (previous_node->links[0].tree) { + node->error_cost += + ERROR_COST_PER_SKIPPED_CHAR * tree->padding.chars + + ERROR_COST_PER_SKIPPED_LINE * tree->padding.extent.row; + } } - } else { - node->error_count++; } } else { node->position = length_zero(); - node->error_count = 0; node->error_cost = 0; } @@ -195,17 +186,19 @@ static void stack_head_delete(StackHead *self, StackNodeArray *pool) { if (self->last_external_token) { ts_tree_release(self->last_external_token); } + if (self->summary) { + array_delete(self->summary); + ts_free(self->summary); + } stack_node_release(self->node, pool); } } -static StackVersion ts_stack__add_version(Stack *self, StackNode *node, - uint32_t push_count, uint32_t depth, - Tree *last_external_token) { +static StackVersion ts_stack__add_version(Stack *self, StackVersion original_version, + StackNode *node, Tree *last_external_token) { StackHead head = { .node = node, - .depth = depth, - .push_count = push_count, + .push_count = self->heads.contents[original_version].push_count, .last_external_token = last_external_token, .is_halted = false, }; @@ -215,38 +208,35 @@ static StackVersion ts_stack__add_version(Stack *self, StackNode *node, return (StackVersion)(self->heads.size - 1); } -static void ts_stack__add_slice(Stack *self, StackNode *node, TreeArray *trees, - uint32_t push_count, uint32_t depth, - Tree *last_external_token) { +static void ts_stack__add_slice(Stack *self, StackVersion original_version, StackNode *node, + TreeArray *trees, Tree *last_external_token) { for (uint32_t i = self->slices.size - 1; i + 1 > 0; i--) { StackVersion version = self->slices.contents[i].version; if (self->heads.contents[version].node == node) { - StackSlice slice = { *trees, version }; + StackSlice slice = {*trees, version}; array_insert(&self->slices, i + 1, slice); return; } } - StackVersion version = ts_stack__add_version(self, node, push_count, depth, last_external_token); + StackVersion version = ts_stack__add_version(self, original_version, node, last_external_token); StackSlice slice = { *trees, version }; array_push(&self->slices, slice); } inline StackPopResult stack__iter(Stack *self, StackVersion version, - StackIterateInternalCallback callback, void *payload) { + StackIterateInternalCallback callback, void *payload, + bool include_trees) { array_clear(&self->slices); array_clear(&self->iterators); StackHead *head = array_get(&self->heads, version); - uint32_t starting_push_count = head->push_count; Tree *last_external_token = head->last_external_token; Iterator iterator = { .node = head->node, .trees = array_new(), .tree_count = 0, .is_pending = true, - .push_count = 0, - .depth = head->depth, }; array_push(&self->iterators, iterator); @@ -266,10 +256,9 @@ inline StackPopResult stack__iter(Stack *self, StackVersion version, ts_tree_array_reverse(&trees); ts_stack__add_slice( self, + version, node, &trees, - starting_push_count + iterator->push_count, - iterator->depth, last_external_token ); } @@ -298,28 +287,27 @@ inline StackPopResult stack__iter(Stack *self, StackVersion version, } next_iterator->node = link.node; - next_iterator->push_count += link.push_count; - if (link.depth > 0) { - next_iterator->depth = link.depth; - } if (link.tree) { + if (include_trees) { + array_push(&next_iterator->trees, link.tree); + ts_tree_retain(link.tree); + } + if (!link.tree->extra) { next_iterator->tree_count++; - next_iterator->depth--; if (!link.is_pending) { next_iterator->is_pending = false; } } - array_push(&next_iterator->trees, link.tree); - ts_tree_retain(link.tree); } else { + next_iterator->tree_count++; next_iterator->is_pending = false; } } } } - return (StackPopResult){ false, self->slices }; + return (StackPopResult){self->slices}; } Stack *ts_stack_new() { @@ -375,8 +363,7 @@ unsigned ts_stack_push_count(const Stack *self, StackVersion version) { return array_get(&self->heads, version)->push_count; } -void ts_stack_decrease_push_count(Stack *self, StackVersion version, - unsigned decrement) { +void ts_stack_decrease_push_count(Stack *self, StackVersion version, unsigned decrement) { array_get(&self->heads, version)->push_count -= decrement; } @@ -395,29 +382,18 @@ ErrorStatus ts_stack_error_status(const Stack *self, StackVersion version) { StackHead *head = array_get(&self->heads, version); return (ErrorStatus){ .cost = head->node->error_cost, - .count = head->node->error_count, + .recovering = head->node->state == ERROR_STATE, .push_count = head->push_count, - .depth = head->depth, }; } -unsigned ts_stack_error_count(const Stack *self, StackVersion version) { - StackNode *node = array_get(&self->heads, version)->node; - return node->error_count; -} - -void ts_stack_push(Stack *self, StackVersion version, Tree *tree, - bool is_pending, TSStateId state) { +void ts_stack_push(Stack *self, StackVersion version, Tree *tree, bool pending, TSStateId state) { StackHead *head = array_get(&self->heads, version); - StackNode *new_node = stack_node_new(head->node, tree, is_pending, state, &self->node_pool); + StackNode *new_node = stack_node_new(head->node, tree, pending, state, &self->node_pool); if (state == ERROR_STATE) { - new_node->links[0].push_count = head->push_count; - new_node->links[0].depth = head->depth; head->push_count = 0; - head->depth = 0; - } else { + } else if (!tree->extra) { head->push_count++; - if (!tree->extra) head->depth++; } stack_node_release(head->node, &self->node_pool); head->node = new_node; @@ -431,55 +407,20 @@ inline StackIterateAction iterate_callback(void *payload, const Iterator *iterat StackPopResult ts_stack_iterate(Stack *self, StackVersion version, StackIterateCallback callback, void *payload) { StackIterateSession session = {payload, callback}; - return stack__iter(self, version, iterate_callback, &session); + return stack__iter(self, version, iterate_callback, &session, true); } inline StackIterateAction pop_count_callback(void *payload, const Iterator *iterator) { - StackPopSession *pop_session = (StackPopSession *)payload; - - if (iterator->tree_count == pop_session->goal_tree_count) { - pop_session->found_valid_path = true; + unsigned *goal_tree_count = payload; + if (iterator->tree_count == *goal_tree_count) { return StackIteratePop | StackIterateStop; + } else { + return StackIterateNone; } - - if (iterator->node->state == ERROR_STATE) { - if (pop_session->found_valid_path || pop_session->found_error) { - return StackIterateStop; - } else { - pop_session->found_error = true; - return StackIteratePop | StackIterateStop; - } - } - return StackIterateNone; } -StackPopResult ts_stack_pop_count(Stack *self, StackVersion version, - uint32_t count) { - StackPopSession session = { - .goal_tree_count = count, - .found_error = false, - .found_valid_path = false, - }; - - StackPopResult pop = stack__iter(self, version, pop_count_callback, &session); - - if (session.found_error) { - if (session.found_valid_path) { - StackSlice error_slice = pop.slices.contents[0]; - ts_tree_array_delete(&error_slice.trees); - array_erase(&pop.slices, 0); - if (array_front(&pop.slices)->version != error_slice.version) { - ts_stack_remove_version(self, error_slice.version); - for (StackVersion i = 0; i < pop.slices.size; i++) { - pop.slices.contents[i].version--; - } - } - } else { - pop.stopped_at_error = true; - } - } - - return pop; +StackPopResult ts_stack_pop_count(Stack *self, StackVersion version, uint32_t count) { + return stack__iter(self, version, pop_count_callback, &count, true); } inline StackIterateAction pop_pending_callback(void *payload, const Iterator *iterator) { @@ -495,7 +436,7 @@ inline StackIterateAction pop_pending_callback(void *payload, const Iterator *it } StackPopResult ts_stack_pop_pending(Stack *self, StackVersion version) { - StackPopResult pop = stack__iter(self, version, pop_pending_callback, NULL); + StackPopResult pop = stack__iter(self, version, pop_pending_callback, NULL, true); if (pop.slices.size > 0) { ts_stack_renumber_version(self, pop.slices.contents[0].version, version); pop.slices.contents[0].version = version; @@ -503,12 +444,71 @@ StackPopResult ts_stack_pop_pending(Stack *self, StackVersion version) { return pop; } +inline StackIterateAction pop_error_callback(void *payload, const Iterator *iterator) { + if (iterator->trees.size > 0) { + bool *found_error = payload; + if (!*found_error && iterator->trees.contents[0]->symbol == ts_builtin_sym_error) { + *found_error = true; + return StackIteratePop | StackIterateStop; + } else { + return StackIterateStop; + } + } else { + return StackIterateNone; + } +} + +StackPopResult ts_stack_pop_error(Stack *self, StackVersion version) { + StackNode *node = array_get(&self->heads, version)->node; + for (unsigned i = 0; i < node->link_count; i++) { + if (node->links[i].tree && node->links[i].tree->symbol == ts_builtin_sym_error) { + bool found_error = false; + return stack__iter(self, version, pop_error_callback, &found_error, true); + } + } + return (StackPopResult){.slices = array_new()}; +} + inline StackIterateAction pop_all_callback(void *payload, const Iterator *iterator) { return iterator->node->link_count == 0 ? StackIteratePop : StackIterateNone; } StackPopResult ts_stack_pop_all(Stack *self, StackVersion version) { - return stack__iter(self, version, pop_all_callback, NULL); + return stack__iter(self, version, pop_all_callback, NULL, true); +} + +inline StackIterateAction summarize_stack_callback(void *payload, const Iterator *iterator) { + StackSummary *summary = payload; + TSStateId state = iterator->node->state; + unsigned depth = iterator->tree_count; + for (unsigned i = summary->size - 1; i + 1 > 0; i--) { + StackSummaryEntry entry = summary->contents[i]; + if (entry.depth < depth) break; + if (entry.depth == depth && entry.state == state) return StackIterateNone; + } + array_push(summary, ((StackSummaryEntry){.depth = depth, .state = state})); + return StackIterateNone; +} + +void ts_stack_record_summary(Stack *self, StackVersion version) { + StackSummary *result = ts_malloc(sizeof(StackSummary)); + array_init(result); + stack__iter(self, version, summarize_stack_callback, result, false); + self->heads.contents[version].summary = result; +} + +StackSummary *ts_stack_get_summary(Stack *self, StackVersion version) { + return array_get(&self->heads, version)->summary; +} + +unsigned ts_stack_depth_since_error(Stack *self, StackVersion version) { + unsigned result = 0; + StackNode *node = array_get(&self->heads, version)->node; + while (node->state == 0) { + result++; + node = node->links[0].node; + } + return result - 1; } void ts_stack_remove_version(Stack *self, StackVersion version) { @@ -536,6 +536,7 @@ StackVersion ts_stack_copy_version(Stack *self, StackVersion version) { StackHead *head = array_back(&self->heads); stack_node_retain(head->node); if (head->last_external_token) ts_tree_retain(head->last_external_token); + head->summary = NULL; return self->heads.size - 1; } @@ -554,9 +555,8 @@ bool ts_stack_can_merge(Stack *self, StackVersion version1, StackVersion version return head1->node->state == head2->node->state && head1->node->position.chars == head2->node->position.chars && - ts_tree_external_token_state_eq(head1->last_external_token, head2->last_external_token) && - ((head1->node->error_count == 0 && head2->node->error_count == 0) || - (head1->depth == head2->depth)); + head1->node->depth == head2->node->depth && + ts_tree_external_token_state_eq(head1->last_external_token, head2->last_external_token); } void ts_stack_force_merge(Stack *self, StackVersion version1, StackVersion version2) { @@ -565,8 +565,6 @@ void ts_stack_force_merge(Stack *self, StackVersion version1, StackVersion versi for (uint32_t i = 0; i < head2->node->link_count; i++) { stack_node_add_link(head1->node, head2->node->links[i]); } - if (head2->push_count > head1->push_count) head1->push_count = head2->push_count; - if (head2->depth > head1->depth) head1->depth = head2->depth; ts_stack_remove_version(self, version2); } @@ -587,8 +585,6 @@ void ts_stack_clear(Stack *self) { array_push(&self->heads, ((StackHead){ .node = self->base_node, .last_external_token = NULL, - .depth = 0, - .push_count = 0, .is_halted = false, })); } @@ -612,8 +608,8 @@ bool ts_stack_print_dot_graph(Stack *self, const char **symbol_names, FILE *f) { fprintf( f, "node_head_%u -> node_%p [label=%u, fontcolor=blue, weight=10000, " - "labeltooltip=\"push_count: %u\ndepth: %u", - i, head->node, i, head->push_count, head->depth); + "labeltooltip=\"push_count: %u\ndepth: %u", i, head->node, i, head->push_count, head->node->depth + ); if (head->last_external_token) { TSExternalTokenState *state = &head->last_external_token->external_token_state; @@ -654,10 +650,11 @@ bool ts_stack_print_dot_graph(Stack *self, const char **symbol_names, FILE *f) { else fprintf(f, "label=\"%d\"", node->state); - fprintf(f, - " tooltip=\"position: %u,%u\nerror_count: %u\nerror_cost: %u\"];\n", - node->position.extent.row, node->position.extent.column, node->error_count, - node->error_cost); + fprintf( + f, + " tooltip=\"position: %u,%u\nerror_cost: %u\"];\n", + node->position.extent.row, node->position.extent.column, node->error_cost + ); for (int j = 0; j < node->link_count; j++) { StackLink link = node->links[j]; @@ -668,7 +665,7 @@ bool ts_stack_print_dot_graph(Stack *self, const char **symbol_names, FILE *f) { fprintf(f, "fontcolor=gray "); if (!link.tree) { - fprintf(f, "color=red, tooltip=\"push_count: %u, depth: %u\"", link.push_count, link.depth); + fprintf(f, "color=red"); } else if (link.tree->symbol == ts_builtin_sym_error) { fprintf(f, "label=\"ERROR\""); } else { diff --git a/src/runtime/stack.h b/src/runtime/stack.h index 1eeb5cd0..36e41c50 100644 --- a/src/runtime/stack.h +++ b/src/runtime/stack.h @@ -23,7 +23,6 @@ typedef struct { typedef Array(StackSlice) StackSliceArray; typedef struct { - bool stopped_at_error; StackSliceArray slices; } StackPopResult; @@ -34,6 +33,13 @@ enum { StackIteratePop = 2, }; +typedef struct { + unsigned depth; + TSStateId state; +} StackSummaryEntry; + +typedef Array(StackSummaryEntry) StackSummary; + typedef StackIterateAction (*StackIterateCallback)(void *, TSStateId state, const TreeArray *trees, uint32_t tree_count); @@ -89,10 +95,18 @@ StackPopResult ts_stack_pop_count(Stack *, StackVersion, uint32_t count); StackPopResult ts_stack_iterate(Stack *, StackVersion, StackIterateCallback, void *); +StackPopResult ts_stack_pop_error(Stack *, StackVersion); + StackPopResult ts_stack_pop_pending(Stack *, StackVersion); StackPopResult ts_stack_pop_all(Stack *, StackVersion); +unsigned ts_stack_depth_since_error(Stack *, StackVersion); + +void ts_stack_record_summary(Stack *, StackVersion); + +StackSummary *ts_stack_get_summary(Stack *, StackVersion); + ErrorStatus ts_stack_error_status(const Stack *, StackVersion); bool ts_stack_merge(Stack *, StackVersion, StackVersion); diff --git a/test/fixtures/error_corpus/c_errors.txt b/test/fixtures/error_corpus/c_errors.txt index f9280ce3..194fa795 100644 --- a/test/fixtures/error_corpus/c_errors.txt +++ b/test/fixtures/error_corpus/c_errors.txt @@ -9,9 +9,11 @@ int x // no semicolon int a; #ifdef __cplusplus -extern "C" +extern "C" { #endif +int c() { return 5; } + int b; #ifdef __cplusplus @@ -23,20 +25,23 @@ int c; --- (translation_unit - (preproc_ifdef (identifier) + (preproc_ifdef + (identifier) (ERROR (type_identifier) (identifier)) (comment)) - (declaration (type_identifier) (identifier)) - - (preproc_ifdef (identifier) - (ERROR (string_literal))) - - (declaration (type_identifier) (identifier)) - - (preproc_ifdef (identifier) - (ERROR)) - + (preproc_ifdef + (identifier) + (linkage_specification + (string_literal) + (declaration_list + (ERROR) + (function_definition + (type_identifier) + (function_declarator (identifier) (parameter_list)) + (compound_statement (return_statement (number_literal)))) + (declaration (type_identifier) (identifier)) + (ERROR (identifier))))) (declaration (type_identifier) (identifier))) ======================================== @@ -76,8 +81,8 @@ int main() { (declaration (type_identifier) (init_declarator (identifier) (parenthesized_expression - (ERROR (number_literal)) - (number_literal))))))) + (number_literal) + (ERROR (number_literal)))))))) ======================================== Errors in declarations @@ -124,13 +129,15 @@ int b() { (compound_statement (declaration (type_identifier) + (ERROR (identifier)) (init_declarator (identifier) - (ERROR (identifier) (identifier)) + (ERROR (identifier)) (number_literal))) (declaration (type_identifier) + (ERROR (identifier)) (init_declarator (identifier) - (ERROR (identifier) (identifier)) + (ERROR (identifier)) (number_literal)))))) diff --git a/test/fixtures/error_corpus/javascript_errors.txt b/test/fixtures/error_corpus/javascript_errors.txt index 45eb1c33..ef67f85c 100644 --- a/test/fixtures/error_corpus/javascript_errors.txt +++ b/test/fixtures/error_corpus/javascript_errors.txt @@ -12,12 +12,13 @@ e f; (program (if_statement (parenthesized_expression - (ERROR (identifier)) - (identifier)) + (identifier) + (ERROR (identifier))) (statement_block (ERROR (identifier)) (expression_statement (identifier)))) - (expression_statement (ERROR (identifier)) (identifier))) + (ERROR (identifier)) + (expression_statement (identifier))) ======================================================= multiple invalid tokens right after the viable prefix @@ -33,16 +34,13 @@ h i j k; (program (if_statement (parenthesized_expression - (ERROR (identifier)) (identifier) - (ERROR (identifier))) + (ERROR (identifier) (identifier))) (statement_block - (expression_statement - (identifier) - (ERROR (jsx_attribute (property_identifier)) (jsx_attribute (property_identifier)) (identifier))))) - (expression_statement - (identifier) - (ERROR (jsx_attribute (property_identifier)) (jsx_attribute (property_identifier)) (identifier)))) + (ERROR (identifier) (identifier) (identifier)) + (expression_statement (identifier)))) + (ERROR (identifier) (identifier) (identifier)) + (expression_statement (identifier))) =================================================== one invalid subtree right after the viable prefix @@ -136,3 +134,17 @@ var x = !!! (function (identifier) (formal_parameters) (statement_block)) (function (identifier) (formal_parameters) (statement_block)) (ERROR (identifier))) + +========================================================= +Errors inside of a template string substitution +========================================================= + +const a = `b c ${d +} f g` + +--- + +(program + (lexical_declaration + (variable_declarator + (identifier) + (template_string (template_substitution (identifier) (ERROR)))))) diff --git a/test/runtime/parser_test.cc b/test/runtime/parser_test.cc index 2f60c0f0..c5ddce41 100644 --- a/test/runtime/parser_test.cc +++ b/test/runtime/parser_test.cc @@ -166,7 +166,7 @@ describe("Parser", [&]() { ts_document_set_language(document, load_real_language("javascript")); set_text("a; ' this string never ends"); assert_root_node( - "(ERROR (program (expression_statement (identifier))) (UNEXPECTED EOF))"); + "(program (expression_statement (identifier)) (ERROR (UNEXPECTED EOF)))"); }); }); @@ -198,7 +198,7 @@ describe("Parser", [&]() { free(string); - assert_root_node("(ERROR (UNEXPECTED INVALID))"); + assert_root_node("(program (ERROR (UNEXPECTED INVALID)))"); }); }); diff --git a/test/runtime/stack_test.cc b/test/runtime/stack_test.cc index e8a258ed..cb5abf2d 100644 --- a/test/runtime/stack_test.cc +++ b/test/runtime/stack_test.cc @@ -124,12 +124,6 @@ describe("Stack", [&]() { {1, 3}, }))); }); - - it("increments the version's push count", [&]() { - AssertThat(ts_stack_push_count(stack, 0), Equals(0)); - ts_stack_push(stack, 0, trees[0], false, stateA); - AssertThat(ts_stack_push_count(stack, 0), Equals(1)); - }); }); describe("merge()", [&]() { @@ -221,7 +215,6 @@ describe("Stack", [&]() { // ↑ // └─* StackPopResult pop = ts_stack_pop_count(stack, 0, 2); - AssertThat(pop.stopped_at_error, Equals(false)); AssertThat(pop.slices.size, Equals(1)); AssertThat(ts_stack_version_count(stack), Equals(2)); @@ -240,7 +233,6 @@ describe("Stack", [&]() { // ↑ // └─* StackPopResult pop = ts_stack_pop_count(stack, 0, 2); - AssertThat(pop.stopped_at_error, Equals(false)); AssertThat(pop.slices.size, Equals(1)); StackSlice slice = pop.slices.contents[0]; @@ -250,40 +242,6 @@ describe("Stack", [&]() { free_slice_array(&pop.slices); }); - it("stops popping entries early if it reaches an error tree", [&]() { - // . <──0── A <──1── B <──2── C <──3── ERROR <──4── D* - ts_stack_push(stack, 0, trees[3], false, ERROR_STATE); - ts_stack_push(stack, 0, trees[4], false, stateD); - - // . <──0── A <──1── B <──2── C <──3── ERROR <──4── D* - // ↑ - // └─* - StackPopResult pop = ts_stack_pop_count(stack, 0, 3); - AssertThat(pop.stopped_at_error, Equals(true)); - - AssertThat(ts_stack_version_count(stack), Equals(2)); - AssertThat(ts_stack_top_state(stack, 1), Equals(ERROR_STATE)); - - AssertThat(pop.slices.size, Equals(1)); - StackSlice slice = pop.slices.contents[0]; - AssertThat(slice.version, Equals(1)); - AssertThat(slice.trees, Equals(vector({ trees[4] }))); - - free_slice_array(&pop.slices); - }); - - it("preserves the push count of the popped version", [&]() { - // . <──0── A <──1── B <──2── C* - // ↑ - // └─* - StackPopResult pop = ts_stack_pop_count(stack, 0, 2); - - AssertThat(ts_stack_push_count(stack, 0), Equals(3)); - AssertThat(ts_stack_push_count(stack, 1), Equals(3)); - - free_slice_array(&pop.slices); - }); - describe("when the version has been merged", [&]() { before_each([&]() { // . <──0── A <──1── B <──2── C <──3── D <──10── I* @@ -475,7 +433,6 @@ describe("Stack", [&]() { ts_stack_push(stack, 0, trees[1], true, stateB); StackPopResult pop = ts_stack_pop_pending(stack, 0); - AssertThat(pop.stopped_at_error, Equals(false)); AssertThat(pop.slices.size, Equals(1)); AssertThat(get_stack_entries(stack, 0), Equals(vector({ @@ -496,7 +453,6 @@ describe("Stack", [&]() { ts_stack_push(stack, 0, trees[3], false, stateB); StackPopResult pop = ts_stack_pop_pending(stack, 0); - AssertThat(pop.stopped_at_error, Equals(false)); AssertThat(pop.slices.size, Equals(1)); AssertThat(pop.slices.contents[0].trees, Equals(vector({ trees[1], trees[2], trees[3] }))); @@ -513,7 +469,6 @@ describe("Stack", [&]() { ts_stack_push(stack, 0, trees[1], false, stateB); StackPopResult pop = ts_stack_pop_pending(stack, 0); - AssertThat(pop.stopped_at_error, Equals(false)); AssertThat(pop.slices.size, Equals(0)); AssertThat(get_stack_entries(stack, 0), Equals(vector({