Merge pull request #119 from tree-sitter/recover-by-inserting-missing-tokens
Add the ability to recover from errors by inserting missing tokens
This commit is contained in:
commit
ee9df753ff
12 changed files with 359 additions and 203 deletions
|
|
@ -84,6 +84,7 @@ const char *ts_node_type(TSNode, const TSDocument *);
|
|||
char *ts_node_string(TSNode, const TSDocument *);
|
||||
bool ts_node_eq(TSNode, TSNode);
|
||||
bool ts_node_is_named(TSNode);
|
||||
bool ts_node_is_missing(TSNode);
|
||||
bool ts_node_has_changes(TSNode);
|
||||
bool ts_node_has_error(TSNode);
|
||||
TSNode ts_node_parent(TSNode);
|
||||
|
|
|
|||
|
|
@ -94,6 +94,10 @@ void ts_document_edit(TSDocument *self, TSInputEdit edit) {
|
|||
edit.bytes_removed = max_bytes - edit.start_byte;
|
||||
|
||||
ts_tree_edit(self->tree, &edit);
|
||||
|
||||
if (self->parser.print_debugging_graphs) {
|
||||
ts_tree_print_dot_graph(self->tree, self->parser.language, stderr);
|
||||
}
|
||||
}
|
||||
|
||||
void ts_document_parse(TSDocument *self) {
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@
|
|||
#define RUNTIME_ERROR_COSTS_H_
|
||||
|
||||
#define ERROR_STATE 0
|
||||
#define ERROR_COST_PER_MISSING_TREE 150
|
||||
#define ERROR_COST_PER_SKIPPED_TREE 100
|
||||
#define ERROR_COST_PER_SKIPPED_LINE 30
|
||||
#define ERROR_COST_PER_SKIPPED_CHAR 1
|
||||
|
|
|
|||
|
|
@ -4,6 +4,8 @@
|
|||
#include "runtime/error_costs.h"
|
||||
#include <assert.h>
|
||||
|
||||
// #define DEBUG_GET_CHANGED_RANGES
|
||||
|
||||
typedef Array(TSRange) RangeArray;
|
||||
|
||||
static void range_array_add(RangeArray *results, TSPoint start, TSPoint end) {
|
||||
|
|
@ -83,7 +85,8 @@ static void iterator_get_visible_state(const Iterator *self, Tree **tree,
|
|||
uint32_t i = self->path.size - 1;
|
||||
|
||||
if (self->in_padding) {
|
||||
while (self->path.contents[i].child_index == 0) i--;
|
||||
if (i == 0) return;
|
||||
i--;
|
||||
}
|
||||
|
||||
for (; i + 1 > 0; i--) {
|
||||
|
|
@ -216,7 +219,8 @@ IteratorComparison iterator_compare(const Iterator *old_iter, const Iterator *ne
|
|||
iterator_get_visible_state(old_iter, &old_tree, &old_alias_symbol, &old_start);
|
||||
iterator_get_visible_state(new_iter, &new_tree, &new_alias_symbol, &new_start);
|
||||
|
||||
assert(old_tree && new_tree);
|
||||
if (!old_tree && !new_tree) return IteratorMatches;
|
||||
if (!old_tree || !new_tree) return IteratorDiffers;
|
||||
|
||||
if (old_alias_symbol == new_alias_symbol) {
|
||||
if (old_start == new_start) {
|
||||
|
|
@ -240,19 +244,21 @@ IteratorComparison iterator_compare(const Iterator *old_iter, const Iterator *ne
|
|||
return IteratorDiffers;
|
||||
}
|
||||
|
||||
// static inline void iterator_print_state(Iterator *self) {
|
||||
// TreePathEntry entry = *array_back(&self->path);
|
||||
// TSPoint start = iterator_start_position(self).extent;
|
||||
// TSPoint end = iterator_end_position(self).extent;
|
||||
// const char *name = ts_language_symbol_name(self->language, entry.tree->symbol);
|
||||
// printf(
|
||||
// "(%-25s %s\t depth:%u [%u, %u] - [%u, %u])",
|
||||
// name, self->in_padding ? "(p)" : " ",
|
||||
// self->visible_depth,
|
||||
// start.row, start.column,
|
||||
// end.row, end.column
|
||||
// );
|
||||
// }
|
||||
#ifdef DEBUG_GET_CHANGED_RANGES
|
||||
static inline void iterator_print_state(Iterator *self) {
|
||||
TreePathEntry entry = *array_back(&self->path);
|
||||
TSPoint start = iterator_start_position(self).extent;
|
||||
TSPoint end = iterator_end_position(self).extent;
|
||||
const char *name = ts_language_symbol_name(self->language, entry.tree->symbol);
|
||||
printf(
|
||||
"(%-25s %s\t depth:%u [%u, %u] - [%u, %u])",
|
||||
name, self->in_padding ? "(p)" : " ",
|
||||
self->visible_depth,
|
||||
start.row, start.column,
|
||||
end.row, end.column
|
||||
);
|
||||
}
|
||||
#endif
|
||||
|
||||
unsigned ts_tree_get_changed_ranges(Tree *old_tree, Tree *new_tree,
|
||||
TreePath *path1, TreePath *path2,
|
||||
|
|
@ -273,11 +279,13 @@ unsigned ts_tree_get_changed_ranges(Tree *old_tree, Tree *new_tree,
|
|||
}
|
||||
|
||||
do {
|
||||
// printf("At [%-2u, %-2u] Compare ", position.extent.row, position.extent.column);
|
||||
// iterator_print_state(&old_iter);
|
||||
// printf("\tvs\t");
|
||||
// iterator_print_state(&new_iter);
|
||||
// puts("");
|
||||
#ifdef DEBUG_GET_CHANGED_RANGES
|
||||
printf("At [%-2u, %-2u] Compare ", position.extent.row, position.extent.column);
|
||||
iterator_print_state(&old_iter);
|
||||
printf("\tvs\t");
|
||||
iterator_print_state(&new_iter);
|
||||
puts("");
|
||||
#endif
|
||||
|
||||
bool is_changed = false;
|
||||
switch (iterator_compare(&old_iter, &new_iter)) {
|
||||
|
|
@ -326,11 +334,13 @@ unsigned ts_tree_get_changed_ranges(Tree *old_tree, Tree *new_tree,
|
|||
while (new_iter.visible_depth > old_iter.visible_depth) iterator_ascend(&new_iter);
|
||||
|
||||
if (is_changed) {
|
||||
// printf(
|
||||
// " change: [[%u, %u] - [%u, %u]]\n",
|
||||
// position.extent.row, position.extent.column,
|
||||
// next_position.extent.row, next_position.extent.column
|
||||
// );
|
||||
#ifdef DEBUG_GET_CHANGED_RANGES
|
||||
printf(
|
||||
" change: [[%u, %u] - [%u, %u]]\n",
|
||||
position.extent.row, position.extent.column,
|
||||
next_position.extent.row, next_position.extent.column
|
||||
);
|
||||
#endif
|
||||
|
||||
range_array_add(&results, position.extent, next_position.extent);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -33,6 +33,22 @@ static inline const TSParseAction *ts_language_actions(const TSLanguage *self,
|
|||
return entry.actions;
|
||||
}
|
||||
|
||||
static inline bool ts_language_has_actions(const TSLanguage *self,
|
||||
TSStateId state,
|
||||
TSSymbol symbol) {
|
||||
TableEntry entry;
|
||||
ts_language_table_entry(self, state, symbol, &entry);
|
||||
return entry.action_count > 0;
|
||||
}
|
||||
|
||||
static inline bool ts_language_has_reduce_action(const TSLanguage *self,
|
||||
TSStateId state,
|
||||
TSSymbol symbol) {
|
||||
TableEntry entry;
|
||||
ts_language_table_entry(self, state, symbol, &entry);
|
||||
return entry.action_count > 0 && entry.actions[0].type == TSParseActionTypeReduce;
|
||||
}
|
||||
|
||||
static inline TSStateId ts_language_next_state(const TSLanguage *self,
|
||||
TSStateId state,
|
||||
TSSymbol symbol) {
|
||||
|
|
|
|||
|
|
@ -278,6 +278,11 @@ bool ts_node_is_named(TSNode self) {
|
|||
return tree->context.alias_symbol ? tree->context.alias_is_named : tree->named;
|
||||
}
|
||||
|
||||
bool ts_node_is_missing(TSNode self) {
|
||||
const Tree *tree = ts_node__tree(self);
|
||||
return tree->is_missing;
|
||||
}
|
||||
|
||||
bool ts_node_has_changes(TSNode self) {
|
||||
return ts_node__tree(self)->has_changes;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -170,7 +170,6 @@ static ErrorComparison parser__compare_versions(Parser *self, ErrorStatus a, Err
|
|||
|
||||
if (a.dynamic_precedence > b.dynamic_precedence) return ErrorComparisonPreferLeft;
|
||||
if (b.dynamic_precedence > a.dynamic_precedence) return ErrorComparisonPreferRight;
|
||||
|
||||
return ErrorComparisonNone;
|
||||
}
|
||||
|
||||
|
|
@ -178,6 +177,7 @@ static bool parser__better_version_exists(Parser *self, StackVersion version,
|
|||
bool is_in_error, unsigned cost) {
|
||||
if (self->finished_tree && self->finished_tree->error_cost <= cost) return true;
|
||||
|
||||
Length position = ts_stack_top_position(self->stack, version);
|
||||
ErrorStatus status = {
|
||||
.cost = cost,
|
||||
.is_in_error = is_in_error,
|
||||
|
|
@ -186,7 +186,9 @@ static bool parser__better_version_exists(Parser *self, StackVersion version,
|
|||
};
|
||||
|
||||
for (StackVersion i = 0, n = ts_stack_version_count(self->stack); i < n; i++) {
|
||||
if (i == version || ts_stack_is_halted(self->stack, i)) continue;
|
||||
if (i == version ||
|
||||
ts_stack_is_halted(self->stack, i) ||
|
||||
ts_stack_top_position(self->stack, i).bytes < position.bytes) continue;
|
||||
ErrorStatus status_i = {
|
||||
.cost = ts_stack_error_cost(self->stack, i),
|
||||
.is_in_error = ts_stack_top_state(self->stack, i) == ERROR_STATE,
|
||||
|
|
@ -511,6 +513,8 @@ static Tree *parser__get_lookahead(Parser *self, StackVersion version, TSStateId
|
|||
reason = "has_changes";
|
||||
} else if (result->symbol == ts_builtin_sym_error) {
|
||||
reason = "is_error";
|
||||
} else if (result->is_missing) {
|
||||
reason = "is_missing";
|
||||
} else if (result->fragile_left || result->fragile_right) {
|
||||
reason = "is_fragile";
|
||||
} else if (self->in_ambiguity && result->child_count) {
|
||||
|
|
@ -783,7 +787,9 @@ static void parser__accept(Parser *self, StackVersion version,
|
|||
ts_stack_halt(self->stack, version);
|
||||
}
|
||||
|
||||
static void parser__do_potential_reductions(Parser *self, StackVersion starting_version) {
|
||||
static bool parser__do_all_potential_reductions(Parser *self, StackVersion starting_version,
|
||||
TSSymbol lookahead_symbol) {
|
||||
bool result = false;
|
||||
for (StackVersion version = starting_version;;) {
|
||||
uint32_t version_count = ts_stack_version_count(self->stack);
|
||||
if (version >= version_count) break;
|
||||
|
|
@ -792,7 +798,16 @@ static void parser__do_potential_reductions(Parser *self, StackVersion starting_
|
|||
bool has_shift_action = false;
|
||||
array_clear(&self->reduce_actions);
|
||||
|
||||
for (TSSymbol symbol = 0; symbol < self->language->token_count; symbol++) {
|
||||
TSSymbol first_symbol, end_symbol;
|
||||
if (lookahead_symbol != 0) {
|
||||
first_symbol = lookahead_symbol;
|
||||
end_symbol = lookahead_symbol + 1;
|
||||
} else {
|
||||
first_symbol = 1;
|
||||
end_symbol = self->language->token_count;
|
||||
}
|
||||
|
||||
for (TSSymbol symbol = first_symbol; symbol < end_symbol; symbol++) {
|
||||
TableEntry entry;
|
||||
ts_language_table_entry(self->language, state, symbol, &entry);
|
||||
for (uint32_t i = 0; i < entry.action_count; i++) {
|
||||
|
|
@ -816,6 +831,7 @@ static void parser__do_potential_reductions(Parser *self, StackVersion starting_
|
|||
}
|
||||
}
|
||||
|
||||
bool has_reduce_action = self->reduce_actions.size > 0;
|
||||
for (uint32_t i = 0; i < self->reduce_actions.size; i++) {
|
||||
ReduceAction action = self->reduce_actions.contents[i];
|
||||
parser__reduce(
|
||||
|
|
@ -825,9 +841,15 @@ static void parser__do_potential_reductions(Parser *self, StackVersion starting_
|
|||
);
|
||||
}
|
||||
|
||||
if (self->reduce_actions.size > 0 && !has_shift_action) {
|
||||
ts_stack_renumber_version(self->stack, version_count, version);
|
||||
continue;
|
||||
if (has_shift_action) {
|
||||
result = true;
|
||||
} else {
|
||||
if (has_reduce_action) {
|
||||
ts_stack_renumber_version(self->stack, version_count, version);
|
||||
continue;
|
||||
} else if (lookahead_symbol != 0) {
|
||||
ts_stack_remove_version(self->stack, version);
|
||||
}
|
||||
}
|
||||
|
||||
if (version == starting_version) {
|
||||
|
|
@ -836,6 +858,7 @@ static void parser__do_potential_reductions(Parser *self, StackVersion starting_
|
|||
version++;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static void parser__handle_error(Parser *self, StackVersion version, TSSymbol lookahead_symbol) {
|
||||
|
|
@ -848,18 +871,58 @@ static void parser__handle_error(Parser *self, StackVersion version, TSSymbol lo
|
|||
return;
|
||||
}
|
||||
|
||||
// Perform any reductions that could have happened in this state, regardless of the lookahead.
|
||||
LOG("handle_error");
|
||||
|
||||
// Perform any reductions that could have happened in this state, regardless
|
||||
// of the lookahead.
|
||||
uint32_t previous_version_count = ts_stack_version_count(self->stack);
|
||||
parser__do_potential_reductions(self, version);
|
||||
parser__do_all_potential_reductions(self, version, 0);
|
||||
uint32_t version_count = ts_stack_version_count(self->stack);
|
||||
|
||||
// Push a discontinuity onto the stack. Merge all of the stack versions that
|
||||
// were created in the previous step.
|
||||
ts_stack_push(self->stack, version, NULL, false, ERROR_STATE);
|
||||
while (ts_stack_version_count(self->stack) > previous_version_count) {
|
||||
ts_stack_push(self->stack, previous_version_count, NULL, false, ERROR_STATE);
|
||||
bool did_insert_missing_token = false;
|
||||
for (StackVersion v = version; v < version_count;) {
|
||||
if (!did_insert_missing_token) {
|
||||
TSStateId state = ts_stack_top_state(self->stack, v);
|
||||
for (TSSymbol missing_symbol = 1;
|
||||
missing_symbol < self->language->token_count;
|
||||
missing_symbol++) {
|
||||
TSStateId state_after_missing_symbol = ts_language_next_state(
|
||||
self->language, state, missing_symbol
|
||||
);
|
||||
if (state_after_missing_symbol == 0) continue;
|
||||
|
||||
if (ts_language_has_reduce_action(
|
||||
self->language,
|
||||
state_after_missing_symbol,
|
||||
lookahead_symbol
|
||||
)) {
|
||||
StackVersion version_with_missing_tree = ts_stack_copy_version(self->stack, v);
|
||||
Tree *missing_tree = ts_tree_make_missing_leaf(&self->tree_pool, missing_symbol, self->language);
|
||||
ts_stack_push(
|
||||
self->stack, version_with_missing_tree,
|
||||
missing_tree, false,
|
||||
state_after_missing_symbol
|
||||
);
|
||||
ts_tree_release(&self->tree_pool, missing_tree);
|
||||
|
||||
if (parser__do_all_potential_reductions(
|
||||
self, version_with_missing_tree,
|
||||
lookahead_symbol
|
||||
)) {
|
||||
LOG("recover_with_missing symbol:%s, state:%u", SYM_NAME(missing_symbol), state_after_missing_symbol);
|
||||
LOG_STACK();
|
||||
did_insert_missing_token = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ts_stack_push(self->stack, v, NULL, false, ERROR_STATE);
|
||||
v = (v == version) ? previous_version_count : v + 1;
|
||||
}
|
||||
|
||||
for (unsigned i = previous_version_count; i < version_count; i++) {
|
||||
ts_stack_force_merge(self->stack, version, previous_version_count);
|
||||
}
|
||||
|
||||
|
|
@ -892,13 +955,69 @@ static void parser__halt_parse(Parser *self) {
|
|||
ts_tree_release(&self->tree_pool, eof);
|
||||
}
|
||||
|
||||
static bool parser__recover_to_state(Parser *self, StackVersion version, unsigned depth,
|
||||
TSStateId goal_state) {
|
||||
StackPopResult pop = ts_stack_pop_count(self->stack, version, depth);
|
||||
StackVersion previous_version = STACK_VERSION_NONE;
|
||||
|
||||
for (unsigned i = 0; i < pop.slices.size; i++) {
|
||||
StackSlice slice = pop.slices.contents[i];
|
||||
|
||||
if (slice.version == previous_version) {
|
||||
ts_tree_array_delete(&self->tree_pool, &slice.trees);
|
||||
array_erase(&pop.slices, i--);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ts_stack_top_state(self->stack, slice.version) != goal_state) {
|
||||
ts_stack_halt(self->stack, slice.version);
|
||||
ts_tree_array_delete(&self->tree_pool, &slice.trees);
|
||||
array_erase(&pop.slices, i--);
|
||||
continue;
|
||||
}
|
||||
|
||||
StackPopResult error_pop = ts_stack_pop_error(self->stack, slice.version);
|
||||
if (error_pop.slices.size > 0) {
|
||||
StackSlice error_slice = error_pop.slices.contents[0];
|
||||
array_push_all(&error_slice.trees, &slice.trees);
|
||||
array_delete(&slice.trees);
|
||||
slice.trees = error_slice.trees;
|
||||
ts_stack_renumber_version(self->stack, error_slice.version, slice.version);
|
||||
}
|
||||
|
||||
TreeArray trailing_extras = ts_tree_array_remove_trailing_extras(&slice.trees);
|
||||
|
||||
if (slice.trees.size > 0) {
|
||||
Tree *error = ts_tree_make_error_node(&self->tree_pool, &slice.trees, self->language);
|
||||
error->extra = true;
|
||||
ts_stack_push(self->stack, slice.version, error, false, goal_state);
|
||||
ts_tree_release(&self->tree_pool, error);
|
||||
} else {
|
||||
array_delete(&slice.trees);
|
||||
}
|
||||
|
||||
for (unsigned j = 0; j < trailing_extras.size; j++) {
|
||||
Tree *tree = trailing_extras.contents[j];
|
||||
ts_stack_push(self->stack, slice.version, tree, false, goal_state);
|
||||
ts_tree_release(&self->tree_pool, tree);
|
||||
}
|
||||
|
||||
previous_version = slice.version;
|
||||
array_delete(&trailing_extras);
|
||||
}
|
||||
|
||||
return previous_version != STACK_VERSION_NONE;
|
||||
}
|
||||
|
||||
static void parser__recover(Parser *self, StackVersion version, Tree *lookahead) {
|
||||
bool did_recover = false;
|
||||
unsigned previous_version_count = ts_stack_version_count(self->stack);
|
||||
Length position = ts_stack_top_position(self->stack, version);
|
||||
StackSummary *summary = ts_stack_get_summary(self->stack, version);
|
||||
|
||||
for (unsigned i = 0; i < summary->size; i++) {
|
||||
StackSummaryEntry entry = summary->contents[i];
|
||||
|
||||
if (entry.state == ERROR_STATE) continue;
|
||||
if (entry.position.bytes == position.bytes) continue;
|
||||
unsigned depth = entry.depth + ts_stack_depth_since_error(self->stack, version);
|
||||
|
|
@ -909,66 +1028,22 @@ static void parser__recover(Parser *self, StackVersion version, Tree *lookahead)
|
|||
(position.extent.row - entry.position.extent.row) * ERROR_COST_PER_SKIPPED_LINE;
|
||||
if (parser__better_version_exists(self, version, false, new_cost)) break;
|
||||
|
||||
unsigned count = 0;
|
||||
if (ts_language_actions(self->language, entry.state, lookahead->symbol, &count) && count > 0) {
|
||||
LOG("recover state:%u, depth:%u", entry.state, depth);
|
||||
StackPopResult pop = ts_stack_pop_count(self->stack, version, depth);
|
||||
StackVersion previous_version = STACK_VERSION_NONE;
|
||||
for (unsigned j = 0; j < pop.slices.size; j++) {
|
||||
StackSlice slice = pop.slices.contents[j];
|
||||
if (slice.version == previous_version) {
|
||||
ts_tree_array_delete(&self->tree_pool, &slice.trees);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ts_stack_top_state(self->stack, slice.version) != entry.state) {
|
||||
ts_tree_array_delete(&self->tree_pool, &slice.trees);
|
||||
ts_stack_halt(self->stack, slice.version);
|
||||
continue;
|
||||
}
|
||||
|
||||
StackPopResult error_pop = ts_stack_pop_error(self->stack, slice.version);
|
||||
if (error_pop.slices.size > 0) {
|
||||
StackSlice error_slice = error_pop.slices.contents[0];
|
||||
array_push_all(&error_slice.trees, &slice.trees);
|
||||
array_delete(&slice.trees);
|
||||
slice.trees = error_slice.trees;
|
||||
ts_stack_renumber_version(self->stack, error_slice.version, slice.version);
|
||||
}
|
||||
|
||||
TreeArray trailing_extras = ts_tree_array_remove_trailing_extras(&slice.trees);
|
||||
if (slice.trees.size > 0) {
|
||||
Tree *error = ts_tree_make_error_node(&self->tree_pool, &slice.trees, self->language);
|
||||
error->extra = true;
|
||||
ts_stack_push(self->stack, slice.version, error, false, entry.state);
|
||||
ts_tree_release(&self->tree_pool, error);
|
||||
} else {
|
||||
array_delete(&slice.trees);
|
||||
}
|
||||
previous_version = slice.version;
|
||||
|
||||
for (unsigned k = 0; k < trailing_extras.size; k++) {
|
||||
Tree *tree = trailing_extras.contents[k];
|
||||
ts_stack_push(self->stack, slice.version, tree, false, entry.state);
|
||||
ts_tree_release(&self->tree_pool, tree);
|
||||
}
|
||||
|
||||
array_delete(&trailing_extras);
|
||||
if (ts_language_has_actions(self->language, entry.state, lookahead->symbol)) {
|
||||
if (parser__recover_to_state(self, version, depth, entry.state)) {
|
||||
did_recover = true;
|
||||
LOG("recover state:%u, depth:%u", entry.state, depth);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for (unsigned i = previous_version_count; i < ts_stack_version_count(self->stack); i++) {
|
||||
if (ts_stack_is_halted(self->stack, i)) {
|
||||
ts_stack_remove_version(self->stack, i);
|
||||
i--;
|
||||
ts_stack_remove_version(self->stack, i--);
|
||||
} else {
|
||||
for (unsigned j = 0; j < i; j++) {
|
||||
if (ts_stack_can_merge(self->stack, j, i)) {
|
||||
ts_stack_remove_version(self->stack, i);
|
||||
i--;
|
||||
ts_stack_remove_version(self->stack, i--);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
@ -990,7 +1065,6 @@ static void parser__recover(Parser *self, StackVersion version, Tree *lookahead)
|
|||
return;
|
||||
}
|
||||
|
||||
LOG("skip_token symbol:%s", SYM_NAME(lookahead->symbol));
|
||||
unsigned n;
|
||||
const TSParseAction *actions = ts_language_actions(self->language, 1, lookahead->symbol, &n);
|
||||
bool extra = n > 0 && actions[n - 1].type == TSParseActionTypeShift && actions[n - 1].params.extra;
|
||||
|
|
@ -998,6 +1072,8 @@ static void parser__recover(Parser *self, StackVersion version, Tree *lookahead)
|
|||
|
||||
if (parser__better_version_exists(self, version, true, ts_stack_error_cost(self->stack, version))) {
|
||||
ts_stack_halt(self->stack, version);
|
||||
} else {
|
||||
LOG("skip_token symbol:%s", SYM_NAME(lookahead->symbol));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -324,14 +324,23 @@ Tree *ts_tree_make_error_node(TreePool *pool, TreeArray *children, const TSLangu
|
|||
}
|
||||
}
|
||||
|
||||
Tree *result =
|
||||
ts_tree_make_node(pool, ts_builtin_sym_error, children->size, children->contents, 0, language);
|
||||
Tree *result = ts_tree_make_node(
|
||||
pool, ts_builtin_sym_error,
|
||||
children->size, children->contents,
|
||||
0, language
|
||||
);
|
||||
|
||||
result->fragile_left = true;
|
||||
result->fragile_right = true;
|
||||
return result;
|
||||
}
|
||||
|
||||
Tree *ts_tree_make_missing_leaf(TreePool *pool, TSSymbol symbol, const TSLanguage *language) {
|
||||
Tree *result = ts_tree_make_leaf(pool, symbol, length_zero(), length_zero(), language);
|
||||
result->is_missing = true;
|
||||
result->error_cost = ERROR_COST_PER_MISSING_TREE;
|
||||
return result;
|
||||
}
|
||||
void ts_tree_retain(Tree *self) {
|
||||
assert(self->ref_count > 0);
|
||||
self->ref_count++;
|
||||
|
|
@ -574,6 +583,7 @@ static size_t ts_tree__write_to_string(const Tree *self, const TSLanguage *langu
|
|||
bool visible =
|
||||
include_all ||
|
||||
is_root ||
|
||||
self->is_missing ||
|
||||
(self->visible && self->named) ||
|
||||
self->context.alias_is_named;
|
||||
|
||||
|
|
@ -585,9 +595,12 @@ static size_t ts_tree__write_to_string(const Tree *self, const TSLanguage *langu
|
|||
if (self->symbol == ts_builtin_sym_error && self->child_count == 0 && self->size.bytes > 0) {
|
||||
cursor += snprintf(*writer, limit, "(UNEXPECTED ");
|
||||
cursor += ts_tree__write_char_to_string(*writer, limit, self->lookahead_char);
|
||||
} else if (self->is_missing) {
|
||||
cursor += snprintf(*writer, limit, "(MISSING");
|
||||
} else {
|
||||
TSSymbol symbol = self->context.alias_symbol ? self->context.alias_symbol : self->symbol;
|
||||
cursor += snprintf(*writer, limit, "(%s", ts_language_symbol_name(language, symbol));
|
||||
const char *symbol_name = ts_language_symbol_name(language, symbol);
|
||||
cursor += snprintf(*writer, limit, "(%s", symbol_name);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -601,11 +614,9 @@ static size_t ts_tree__write_to_string(const Tree *self, const TSLanguage *langu
|
|||
return cursor - string;
|
||||
}
|
||||
|
||||
char *ts_tree_string(const Tree *self, const TSLanguage *language,
|
||||
bool include_all) {
|
||||
static char SCRATCH[1];
|
||||
size_t size =
|
||||
ts_tree__write_to_string(self, language, SCRATCH, 0, true, include_all) + 1;
|
||||
char *ts_tree_string(const Tree *self, const TSLanguage *language, bool include_all) {
|
||||
char scratch_string[1];
|
||||
size_t size = ts_tree__write_to_string(self, language, scratch_string, 0, true, include_all) + 1;
|
||||
char *result = ts_malloc(size * sizeof(char));
|
||||
ts_tree__write_to_string(self, language, result, size, true, include_all);
|
||||
return result;
|
||||
|
|
|
|||
|
|
@ -65,6 +65,7 @@ typedef struct Tree {
|
|||
bool fragile_right : 1;
|
||||
bool has_changes : 1;
|
||||
bool has_external_tokens : 1;
|
||||
bool is_missing : 1;
|
||||
} Tree;
|
||||
|
||||
typedef Array(Tree *) TreeArray;
|
||||
|
|
@ -94,6 +95,7 @@ Tree *ts_tree_make_node(TreePool *, TSSymbol, uint32_t, Tree **, unsigned, const
|
|||
Tree *ts_tree_make_copy(TreePool *, Tree *child);
|
||||
Tree *ts_tree_make_error_node(TreePool *, TreeArray *, const TSLanguage *);
|
||||
Tree *ts_tree_make_error(TreePool *, Length, Length, int32_t, const TSLanguage *);
|
||||
Tree *ts_tree_make_missing_leaf(TreePool *, TSSymbol, const TSLanguage *);
|
||||
void ts_tree_retain(Tree *tree);
|
||||
void ts_tree_release(TreePool *, Tree *tree);
|
||||
bool ts_tree_eq(const Tree *tree1, const Tree *tree2);
|
||||
|
|
|
|||
143
test/fixtures/error_corpus/c_errors.txt
vendored
143
test/fixtures/error_corpus/c_errors.txt
vendored
|
|
@ -1,52 +1,97 @@
|
|||
========================================
|
||||
Errors inside ifdefs
|
||||
========================================
|
||||
=======================================
|
||||
Statements with missing semicolons
|
||||
=======================================
|
||||
|
||||
#ifdef something
|
||||
int x // no semicolon
|
||||
#endif
|
||||
int main() {
|
||||
puts("hello")
|
||||
puts("world")
|
||||
}
|
||||
|
||||
int a;
|
||||
---
|
||||
|
||||
(translation_unit
|
||||
(function_definition
|
||||
(primitive_type)
|
||||
(function_declarator (identifier) (parameter_list))
|
||||
(compound_statement
|
||||
(expression_statement (call_expression (identifier) (argument_list (string_literal))) (MISSING))
|
||||
(expression_statement (call_expression (identifier) (argument_list (string_literal))) (MISSING)))))
|
||||
|
||||
==============================================
|
||||
Top-level declarations with missing semicolons
|
||||
==============================================
|
||||
|
||||
int x
|
||||
static int b
|
||||
|
||||
---
|
||||
|
||||
(translation_unit
|
||||
(declaration (primitive_type) (identifier) (MISSING))
|
||||
(declaration (storage_class_specifier) (primitive_type) (identifier) (MISSING)))
|
||||
|
||||
==========================================
|
||||
Partial declaration lists inside ifdefs
|
||||
==========================================
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
int c() { return 5; }
|
||||
|
||||
// ok
|
||||
int b;
|
||||
|
||||
int c() {
|
||||
return 5;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
int c;
|
||||
---
|
||||
|
||||
(translation_unit
|
||||
(preproc_ifdef (identifier)
|
||||
(linkage_specification (string_literal) (declaration_list
|
||||
(preproc_call (preproc_directive))
|
||||
(comment)
|
||||
(declaration (primitive_type) (identifier))
|
||||
(function_definition (primitive_type) (function_declarator (identifier) (parameter_list)) (compound_statement (return_statement (number_literal))))
|
||||
(preproc_ifdef (identifier) (MISSING))))))
|
||||
|
||||
==========================================
|
||||
If statements with incomplete expressions
|
||||
==========================================
|
||||
|
||||
int main() {
|
||||
if (a.) {
|
||||
b();
|
||||
c();
|
||||
|
||||
if () d();
|
||||
}
|
||||
}
|
||||
|
||||
---
|
||||
|
||||
(translation_unit
|
||||
(preproc_ifdef
|
||||
(identifier)
|
||||
(ERROR (primitive_type) (identifier))
|
||||
(comment))
|
||||
(declaration (primitive_type) (identifier))
|
||||
(preproc_ifdef
|
||||
(identifier)
|
||||
(linkage_specification
|
||||
(string_literal)
|
||||
(declaration_list
|
||||
(preproc_call (preproc_directive))
|
||||
(function_definition
|
||||
(primitive_type)
|
||||
(function_declarator (identifier) (parameter_list))
|
||||
(compound_statement (return_statement (number_literal))))
|
||||
(declaration (primitive_type) (identifier))
|
||||
(ERROR (identifier)))))
|
||||
(declaration (primitive_type) (identifier)))
|
||||
(function_definition
|
||||
(primitive_type)
|
||||
(function_declarator (identifier) (parameter_list))
|
||||
(compound_statement
|
||||
(if_statement
|
||||
(field_expression (identifier) (MISSING))
|
||||
(compound_statement
|
||||
(expression_statement (call_expression (identifier) (argument_list)))
|
||||
(expression_statement (call_expression (identifier) (argument_list)))
|
||||
(if_statement
|
||||
(MISSING)
|
||||
(expression_statement (call_expression (identifier) (argument_list)))))))))
|
||||
|
||||
========================================
|
||||
Errors inside blocks
|
||||
========================================
|
||||
====================================
|
||||
Invalid characters in declarations
|
||||
====================================
|
||||
|
||||
int main() {
|
||||
int x;
|
||||
|
|
@ -63,9 +108,9 @@ int main() {
|
|||
(declaration (primitive_type) (identifier))
|
||||
(ERROR (primitive_type) (UNEXPECTED '$')))))
|
||||
|
||||
========================================
|
||||
Errors inside expressions
|
||||
========================================
|
||||
=========================================
|
||||
Extra values in parenthesized expressions
|
||||
=========================================
|
||||
|
||||
int main() {
|
||||
int x = (123 123);
|
||||
|
|
@ -85,7 +130,7 @@ int main() {
|
|||
(ERROR (number_literal))))))))
|
||||
|
||||
========================================
|
||||
Errors in declarations
|
||||
Extra identifiers in declarations
|
||||
========================================
|
||||
|
||||
float x WTF;
|
||||
|
|
@ -98,44 +143,28 @@ int y = 5;
|
|||
(declaration (primitive_type) (init_declarator (identifier) (number_literal))))
|
||||
|
||||
==========================================
|
||||
Errors at the beginnings of blocks
|
||||
Declarations with missing variable names
|
||||
==========================================
|
||||
|
||||
int a() {
|
||||
struct x = 1;
|
||||
struct y = 2;
|
||||
}
|
||||
|
||||
int b() {
|
||||
w x y z = 3;
|
||||
w x y z = 4;
|
||||
int = 2;
|
||||
}
|
||||
|
||||
---
|
||||
|
||||
(translation_unit
|
||||
(function_definition
|
||||
(primitive_type)
|
||||
(function_declarator (identifier) (parameter_list))
|
||||
(compound_statement
|
||||
(struct_specifier (type_identifier))
|
||||
(ERROR (number_literal))
|
||||
(struct_specifier (type_identifier))
|
||||
(ERROR (number_literal))))
|
||||
|
||||
(function_definition
|
||||
(primitive_type)
|
||||
(function_declarator (identifier) (parameter_list))
|
||||
(compound_statement
|
||||
(declaration
|
||||
(type_identifier)
|
||||
(ERROR (identifier) (identifier))
|
||||
(struct_specifier (type_identifier))
|
||||
(init_declarator
|
||||
(identifier)
|
||||
(MISSING)
|
||||
(number_literal)))
|
||||
(declaration
|
||||
(type_identifier)
|
||||
(ERROR (identifier) (identifier))
|
||||
(primitive_type)
|
||||
(init_declarator
|
||||
(identifier)
|
||||
(MISSING)
|
||||
(number_literal))))))
|
||||
|
|
|
|||
91
test/fixtures/error_corpus/javascript_errors.txt
vendored
91
test/fixtures/error_corpus/javascript_errors.txt
vendored
|
|
@ -1,5 +1,45 @@
|
|||
===================================================
|
||||
one invalid token right after the viable prefix
|
||||
Missing default values for function parameters
|
||||
===================================================
|
||||
|
||||
class A {
|
||||
constructor (a, b = ) {
|
||||
this.a = a
|
||||
}
|
||||
|
||||
foo() {}
|
||||
}
|
||||
|
||||
---
|
||||
|
||||
(program
|
||||
(class (identifier) (class_body
|
||||
(method_definition
|
||||
(property_identifier)
|
||||
(formal_parameters (identifier) (identifier) (ERROR))
|
||||
(statement_block (expression_statement (assignment_expression (member_expression (this) (property_identifier)) (identifier)))))
|
||||
(method_definition
|
||||
(property_identifier)
|
||||
(formal_parameters)
|
||||
(statement_block)))))
|
||||
|
||||
===================================================
|
||||
Missing object-literal values
|
||||
===================================================
|
||||
|
||||
{
|
||||
a: b,
|
||||
c:
|
||||
}
|
||||
|
||||
---
|
||||
|
||||
(program (expression_statement (object
|
||||
(pair (property_identifier) (identifier))
|
||||
(pair (property_identifier) (yield_expression (MISSING))))))
|
||||
|
||||
===================================================
|
||||
Extra identifiers in expressions
|
||||
===================================================
|
||||
|
||||
if (a b) {
|
||||
|
|
@ -17,33 +57,12 @@ e f;
|
|||
(statement_block
|
||||
(ERROR (identifier))
|
||||
(expression_statement (identifier))))
|
||||
(ERROR (identifier))
|
||||
(expression_statement (identifier)))
|
||||
|
||||
=======================================================
|
||||
multiple invalid tokens right after the viable prefix
|
||||
=======================================================
|
||||
|
||||
if (a b c) {
|
||||
d e f g;
|
||||
}
|
||||
h i j k;
|
||||
|
||||
---
|
||||
|
||||
(program
|
||||
(if_statement
|
||||
(parenthesized_expression
|
||||
(identifier)
|
||||
(ERROR (identifier) (identifier)))
|
||||
(statement_block
|
||||
(ERROR (identifier))
|
||||
(expression_statement (identifier) (ERROR (identifier) (identifier)))))
|
||||
(ERROR (identifier))
|
||||
(expression_statement (identifier) (ERROR (identifier) (identifier))))
|
||||
(expression_statement
|
||||
(identifier)
|
||||
(ERROR (identifier))))
|
||||
|
||||
===================================================
|
||||
one invalid subtree right after the viable prefix
|
||||
Extra complex literals in expressions
|
||||
===================================================
|
||||
|
||||
if ({a: 'b'} {c: 'd'}) {
|
||||
|
|
@ -68,7 +87,7 @@ if ({a: 'b'} {c: 'd'}) {
|
|||
(statement_block (expression_statement (identifier)))))))))
|
||||
|
||||
===================================================
|
||||
one invalid token at the end of the file
|
||||
Extra tokens at the end of the file
|
||||
===================================================
|
||||
|
||||
// skip the equals sign
|
||||
|
|
@ -79,24 +98,6 @@ a.b =
|
|||
(comment)
|
||||
(ERROR (member_expression (identifier) (property_identifier))))
|
||||
|
||||
=================================================================
|
||||
An invalid token at the end of a construct with extra line breaks
|
||||
=================================================================
|
||||
|
||||
a(
|
||||
b,
|
||||
c,.
|
||||
);
|
||||
|
||||
---
|
||||
|
||||
(program
|
||||
(expression_statement
|
||||
(call_expression (identifier) (arguments
|
||||
(identifier)
|
||||
(identifier)
|
||||
(ERROR)))))
|
||||
|
||||
===================================================
|
||||
Errors after a sequence of function declarations
|
||||
===================================================
|
||||
|
|
|
|||
6
test/fixtures/error_corpus/json_errors.txt
vendored
6
test/fixtures/error_corpus/json_errors.txt
vendored
|
|
@ -47,7 +47,7 @@ errors inside objects
|
|||
errors inside nested objects
|
||||
==========================================
|
||||
|
||||
{ "key1": { "key2": 1, 2 }, [, "key3": 3 }
|
||||
{ "key1": { "key2": 1, 2 }, "key3": 3 [ }
|
||||
|
||||
---
|
||||
|
||||
|
|
@ -55,5 +55,5 @@ errors inside nested objects
|
|||
(pair (string) (object
|
||||
(pair (string) (number))
|
||||
(ERROR (number))))
|
||||
(ERROR)
|
||||
(pair (string) (number))))
|
||||
(pair (string) (number))
|
||||
(ERROR)))
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue