diff --git a/include/tree_sitter/runtime.h b/include/tree_sitter/runtime.h index c639bd37..06250a5b 100644 --- a/include/tree_sitter/runtime.h +++ b/include/tree_sitter/runtime.h @@ -84,6 +84,7 @@ const char *ts_node_type(TSNode, const TSDocument *); char *ts_node_string(TSNode, const TSDocument *); bool ts_node_eq(TSNode, TSNode); bool ts_node_is_named(TSNode); +bool ts_node_is_missing(TSNode); bool ts_node_has_changes(TSNode); bool ts_node_has_error(TSNode); TSNode ts_node_parent(TSNode); diff --git a/src/runtime/error_costs.h b/src/runtime/error_costs.h index 60119aa1..5ba1fc62 100644 --- a/src/runtime/error_costs.h +++ b/src/runtime/error_costs.h @@ -2,6 +2,7 @@ #define RUNTIME_ERROR_COSTS_H_ #define ERROR_STATE 0 +#define ERROR_COST_PER_MISSING_TREE 150 #define ERROR_COST_PER_SKIPPED_TREE 100 #define ERROR_COST_PER_SKIPPED_LINE 30 #define ERROR_COST_PER_SKIPPED_CHAR 1 diff --git a/src/runtime/language.h b/src/runtime/language.h index 11d7aeeb..ee01d3ec 100644 --- a/src/runtime/language.h +++ b/src/runtime/language.h @@ -33,6 +33,22 @@ static inline const TSParseAction *ts_language_actions(const TSLanguage *self, return entry.actions; } +static inline bool ts_language_has_actions(const TSLanguage *self, + TSStateId state, + TSSymbol symbol) { + TableEntry entry; + ts_language_table_entry(self, state, symbol, &entry); + return entry.action_count > 0; +} + +static inline bool ts_language_has_reduce_action(const TSLanguage *self, + TSStateId state, + TSSymbol symbol) { + TableEntry entry; + ts_language_table_entry(self, state, symbol, &entry); + return entry.action_count > 0 && entry.actions[0].type == TSParseActionTypeReduce; +} + static inline TSStateId ts_language_next_state(const TSLanguage *self, TSStateId state, TSSymbol symbol) { diff --git a/src/runtime/node.c b/src/runtime/node.c index a70a6cba..0a8c0a54 100644 --- a/src/runtime/node.c +++ b/src/runtime/node.c @@ -278,6 +278,11 @@ bool ts_node_is_named(TSNode self) { return tree->context.alias_symbol ? tree->context.alias_is_named : tree->named; } +bool ts_node_is_missing(TSNode self) { + const Tree *tree = ts_node__tree(self); + return tree->is_missing; +} + bool ts_node_has_changes(TSNode self) { return ts_node__tree(self)->has_changes; } diff --git a/src/runtime/parser.c b/src/runtime/parser.c index e4610f01..65e267de 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -170,7 +170,6 @@ static ErrorComparison parser__compare_versions(Parser *self, ErrorStatus a, Err if (a.dynamic_precedence > b.dynamic_precedence) return ErrorComparisonPreferLeft; if (b.dynamic_precedence > a.dynamic_precedence) return ErrorComparisonPreferRight; - return ErrorComparisonNone; } @@ -511,6 +510,8 @@ static Tree *parser__get_lookahead(Parser *self, StackVersion version, TSStateId reason = "has_changes"; } else if (result->symbol == ts_builtin_sym_error) { reason = "is_error"; + } else if (result->is_missing) { + reason = "is_missing"; } else if (result->fragile_left || result->fragile_right) { reason = "is_fragile"; } else if (self->in_ambiguity && result->child_count) { @@ -783,7 +784,9 @@ static void parser__accept(Parser *self, StackVersion version, ts_stack_halt(self->stack, version); } -static void parser__do_potential_reductions(Parser *self, StackVersion starting_version) { +static bool parser__do_all_potential_reductions(Parser *self, StackVersion starting_version, + TSSymbol lookahead_symbol) { + bool result = false; for (StackVersion version = starting_version;;) { uint32_t version_count = ts_stack_version_count(self->stack); if (version >= version_count) break; @@ -792,7 +795,16 @@ static void parser__do_potential_reductions(Parser *self, StackVersion starting_ bool has_shift_action = false; array_clear(&self->reduce_actions); - for (TSSymbol symbol = 0; symbol < self->language->token_count; symbol++) { + TSSymbol first_symbol, end_symbol; + if (lookahead_symbol != 0) { + first_symbol = lookahead_symbol; + end_symbol = lookahead_symbol + 1; + } else { + first_symbol = 1; + end_symbol = self->language->token_count; + } + + for (TSSymbol symbol = first_symbol; symbol < end_symbol; symbol++) { TableEntry entry; ts_language_table_entry(self->language, state, symbol, &entry); for (uint32_t i = 0; i < entry.action_count; i++) { @@ -816,6 +828,7 @@ static void parser__do_potential_reductions(Parser *self, StackVersion starting_ } } + bool has_reduce_action = self->reduce_actions.size > 0; for (uint32_t i = 0; i < self->reduce_actions.size; i++) { ReduceAction action = self->reduce_actions.contents[i]; parser__reduce( @@ -825,9 +838,15 @@ static void parser__do_potential_reductions(Parser *self, StackVersion starting_ ); } - if (self->reduce_actions.size > 0 && !has_shift_action) { - ts_stack_renumber_version(self->stack, version_count, version); - continue; + if (has_shift_action) { + result = true; + } else { + if (has_reduce_action) { + ts_stack_renumber_version(self->stack, version_count, version); + continue; + } else if (lookahead_symbol != 0) { + ts_stack_remove_version(self->stack, version); + } } if (version == starting_version) { @@ -836,6 +855,7 @@ static void parser__do_potential_reductions(Parser *self, StackVersion starting_ version++; } } + return result; } static void parser__handle_error(Parser *self, StackVersion version, TSSymbol lookahead_symbol) { @@ -848,18 +868,58 @@ static void parser__handle_error(Parser *self, StackVersion version, TSSymbol lo return; } + // Perform any reductions that could have happened in this state, regardless of the lookahead. LOG("handle_error"); - - // Perform any reductions that could have happened in this state, regardless - // of the lookahead. uint32_t previous_version_count = ts_stack_version_count(self->stack); - parser__do_potential_reductions(self, version); + parser__do_all_potential_reductions(self, version, 0); + uint32_t version_count = ts_stack_version_count(self->stack); // Push a discontinuity onto the stack. Merge all of the stack versions that // were created in the previous step. - ts_stack_push(self->stack, version, NULL, false, ERROR_STATE); - while (ts_stack_version_count(self->stack) > previous_version_count) { - ts_stack_push(self->stack, previous_version_count, NULL, false, ERROR_STATE); + bool did_insert_missing_token = false; + for (StackVersion v = version; v < version_count;) { + if (!did_insert_missing_token) { + TSStateId state = ts_stack_top_state(self->stack, v); + for (TSSymbol missing_symbol = 1; + missing_symbol < self->language->token_count; + missing_symbol++) { + TSStateId state_after_missing_symbol = ts_language_next_state( + self->language, state, missing_symbol + ); + if (state_after_missing_symbol == 0) continue; + + if (ts_language_has_reduce_action( + self->language, + state_after_missing_symbol, + lookahead_symbol + )) { + StackVersion version_with_missing_tree = ts_stack_copy_version(self->stack, v); + Tree *missing_tree = ts_tree_make_missing_leaf(&self->tree_pool, missing_symbol, self->language); + ts_stack_push( + self->stack, version_with_missing_tree, + missing_tree, false, + state_after_missing_symbol + ); + ts_tree_release(&self->tree_pool, missing_tree); + + if (parser__do_all_potential_reductions( + self, version_with_missing_tree, + lookahead_symbol + )) { + LOG("recover_with_missing symbol:%s, state:%u", SYM_NAME(missing_symbol), state_after_missing_symbol); + LOG_STACK(); + did_insert_missing_token = true; + break; + } + } + } + } + + ts_stack_push(self->stack, v, NULL, false, ERROR_STATE); + v = (v == version) ? previous_version_count : v + 1; + } + + for (unsigned i = previous_version_count; i < version_count; i++) { ts_stack_force_merge(self->stack, version, previous_version_count); } @@ -892,8 +952,8 @@ static void parser__halt_parse(Parser *self) { ts_tree_release(&self->tree_pool, eof); } -static StackVersion parser__recover_to_state(Parser *self, StackVersion version, - unsigned depth, TSStateId goal_state) { +static bool parser__recover_to_state(Parser *self, StackVersion version, unsigned depth, + TSStateId goal_state) { StackPopResult pop = ts_stack_pop_count(self->stack, version, depth); StackVersion previous_version = STACK_VERSION_NONE; @@ -943,7 +1003,7 @@ static StackVersion parser__recover_to_state(Parser *self, StackVersion version, array_delete(&trailing_extras); } - return previous_version; + return previous_version != STACK_VERSION_NONE; } static void parser__recover(Parser *self, StackVersion version, Tree *lookahead) { @@ -951,8 +1011,10 @@ static void parser__recover(Parser *self, StackVersion version, Tree *lookahead) unsigned previous_version_count = ts_stack_version_count(self->stack); Length position = ts_stack_top_position(self->stack, version); StackSummary *summary = ts_stack_get_summary(self->stack, version); + for (unsigned i = 0; i < summary->size; i++) { StackSummaryEntry entry = summary->contents[i]; + if (entry.state == ERROR_STATE) continue; if (entry.position.bytes == position.bytes) continue; unsigned depth = entry.depth + ts_stack_depth_since_error(self->stack, version); @@ -963,10 +1025,8 @@ static void parser__recover(Parser *self, StackVersion version, Tree *lookahead) (position.extent.row - entry.position.extent.row) * ERROR_COST_PER_SKIPPED_LINE; if (parser__better_version_exists(self, version, false, new_cost)) break; - unsigned count = 0; - if (ts_language_actions(self->language, entry.state, lookahead->symbol, &count) && count > 0) { - StackVersion recovered_version = parser__recover_to_state(self, version, depth, entry.state); - if (recovered_version != STACK_VERSION_NONE) { + if (ts_language_has_actions(self->language, entry.state, lookahead->symbol)) { + if (parser__recover_to_state(self, version, depth, entry.state)) { did_recover = true; LOG("recover state:%u, depth:%u", entry.state, depth); break; @@ -976,13 +1036,11 @@ static void parser__recover(Parser *self, StackVersion version, Tree *lookahead) for (unsigned i = previous_version_count; i < ts_stack_version_count(self->stack); i++) { if (ts_stack_is_halted(self->stack, i)) { - ts_stack_remove_version(self->stack, i); - i--; + ts_stack_remove_version(self->stack, i--); } else { for (unsigned j = 0; j < i; j++) { if (ts_stack_can_merge(self->stack, j, i)) { - ts_stack_remove_version(self->stack, i); - i--; + ts_stack_remove_version(self->stack, i--); break; } } @@ -1004,7 +1062,6 @@ static void parser__recover(Parser *self, StackVersion version, Tree *lookahead) return; } - LOG("skip_token symbol:%s", SYM_NAME(lookahead->symbol)); unsigned n; const TSParseAction *actions = ts_language_actions(self->language, 1, lookahead->symbol, &n); bool extra = n > 0 && actions[n - 1].type == TSParseActionTypeShift && actions[n - 1].params.extra; @@ -1012,6 +1069,8 @@ static void parser__recover(Parser *self, StackVersion version, Tree *lookahead) if (parser__better_version_exists(self, version, true, ts_stack_error_cost(self->stack, version))) { ts_stack_halt(self->stack, version); + } else { + LOG("skip_token symbol:%s", SYM_NAME(lookahead->symbol)); } } diff --git a/src/runtime/tree.c b/src/runtime/tree.c index 255029dc..5794a427 100644 --- a/src/runtime/tree.c +++ b/src/runtime/tree.c @@ -324,14 +324,23 @@ Tree *ts_tree_make_error_node(TreePool *pool, TreeArray *children, const TSLangu } } - Tree *result = - ts_tree_make_node(pool, ts_builtin_sym_error, children->size, children->contents, 0, language); + Tree *result = ts_tree_make_node( + pool, ts_builtin_sym_error, + children->size, children->contents, + 0, language + ); result->fragile_left = true; result->fragile_right = true; return result; } +Tree *ts_tree_make_missing_leaf(TreePool *pool, TSSymbol symbol, const TSLanguage *language) { + Tree *result = ts_tree_make_leaf(pool, symbol, length_zero(), length_zero(), language); + result->is_missing = true; + result->error_cost = ERROR_COST_PER_MISSING_TREE; + return result; +} void ts_tree_retain(Tree *self) { assert(self->ref_count > 0); self->ref_count++; @@ -574,6 +583,7 @@ static size_t ts_tree__write_to_string(const Tree *self, const TSLanguage *langu bool visible = include_all || is_root || + self->is_missing || (self->visible && self->named) || self->context.alias_is_named; @@ -585,9 +595,12 @@ static size_t ts_tree__write_to_string(const Tree *self, const TSLanguage *langu if (self->symbol == ts_builtin_sym_error && self->child_count == 0 && self->size.bytes > 0) { cursor += snprintf(*writer, limit, "(UNEXPECTED "); cursor += ts_tree__write_char_to_string(*writer, limit, self->lookahead_char); + } else if (self->is_missing) { + cursor += snprintf(*writer, limit, "(MISSING"); } else { TSSymbol symbol = self->context.alias_symbol ? self->context.alias_symbol : self->symbol; - cursor += snprintf(*writer, limit, "(%s", ts_language_symbol_name(language, symbol)); + const char *symbol_name = ts_language_symbol_name(language, symbol); + cursor += snprintf(*writer, limit, "(%s", symbol_name); } } @@ -601,11 +614,9 @@ static size_t ts_tree__write_to_string(const Tree *self, const TSLanguage *langu return cursor - string; } -char *ts_tree_string(const Tree *self, const TSLanguage *language, - bool include_all) { - static char SCRATCH[1]; - size_t size = - ts_tree__write_to_string(self, language, SCRATCH, 0, true, include_all) + 1; +char *ts_tree_string(const Tree *self, const TSLanguage *language, bool include_all) { + char scratch_string[1]; + size_t size = ts_tree__write_to_string(self, language, scratch_string, 0, true, include_all) + 1; char *result = ts_malloc(size * sizeof(char)); ts_tree__write_to_string(self, language, result, size, true, include_all); return result; diff --git a/src/runtime/tree.h b/src/runtime/tree.h index e273fcd8..9d33561d 100644 --- a/src/runtime/tree.h +++ b/src/runtime/tree.h @@ -65,6 +65,7 @@ typedef struct Tree { bool fragile_right : 1; bool has_changes : 1; bool has_external_tokens : 1; + bool is_missing : 1; } Tree; typedef Array(Tree *) TreeArray; @@ -94,6 +95,7 @@ Tree *ts_tree_make_node(TreePool *, TSSymbol, uint32_t, Tree **, unsigned, const Tree *ts_tree_make_copy(TreePool *, Tree *child); Tree *ts_tree_make_error_node(TreePool *, TreeArray *, const TSLanguage *); Tree *ts_tree_make_error(TreePool *, Length, Length, int32_t, const TSLanguage *); +Tree *ts_tree_make_missing_leaf(TreePool *, TSSymbol, const TSLanguage *); void ts_tree_retain(Tree *tree); void ts_tree_release(TreePool *, Tree *tree); bool ts_tree_eq(const Tree *tree1, const Tree *tree2);