Start work on repairing errors by inserting missing tokens
This commit is contained in:
parent
f2dc620610
commit
d3c85f288d
7 changed files with 128 additions and 33 deletions
|
|
@ -84,6 +84,7 @@ const char *ts_node_type(TSNode, const TSDocument *);
|
|||
char *ts_node_string(TSNode, const TSDocument *);
|
||||
bool ts_node_eq(TSNode, TSNode);
|
||||
bool ts_node_is_named(TSNode);
|
||||
bool ts_node_is_missing(TSNode);
|
||||
bool ts_node_has_changes(TSNode);
|
||||
bool ts_node_has_error(TSNode);
|
||||
TSNode ts_node_parent(TSNode);
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@
|
|||
#define RUNTIME_ERROR_COSTS_H_
|
||||
|
||||
#define ERROR_STATE 0
|
||||
#define ERROR_COST_PER_MISSING_TREE 150
|
||||
#define ERROR_COST_PER_SKIPPED_TREE 100
|
||||
#define ERROR_COST_PER_SKIPPED_LINE 30
|
||||
#define ERROR_COST_PER_SKIPPED_CHAR 1
|
||||
|
|
|
|||
|
|
@ -33,6 +33,22 @@ static inline const TSParseAction *ts_language_actions(const TSLanguage *self,
|
|||
return entry.actions;
|
||||
}
|
||||
|
||||
static inline bool ts_language_has_actions(const TSLanguage *self,
|
||||
TSStateId state,
|
||||
TSSymbol symbol) {
|
||||
TableEntry entry;
|
||||
ts_language_table_entry(self, state, symbol, &entry);
|
||||
return entry.action_count > 0;
|
||||
}
|
||||
|
||||
static inline bool ts_language_has_reduce_action(const TSLanguage *self,
|
||||
TSStateId state,
|
||||
TSSymbol symbol) {
|
||||
TableEntry entry;
|
||||
ts_language_table_entry(self, state, symbol, &entry);
|
||||
return entry.action_count > 0 && entry.actions[0].type == TSParseActionTypeReduce;
|
||||
}
|
||||
|
||||
static inline TSStateId ts_language_next_state(const TSLanguage *self,
|
||||
TSStateId state,
|
||||
TSSymbol symbol) {
|
||||
|
|
|
|||
|
|
@ -278,6 +278,11 @@ bool ts_node_is_named(TSNode self) {
|
|||
return tree->context.alias_symbol ? tree->context.alias_is_named : tree->named;
|
||||
}
|
||||
|
||||
bool ts_node_is_missing(TSNode self) {
|
||||
const Tree *tree = ts_node__tree(self);
|
||||
return tree->is_missing;
|
||||
}
|
||||
|
||||
bool ts_node_has_changes(TSNode self) {
|
||||
return ts_node__tree(self)->has_changes;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -170,7 +170,6 @@ static ErrorComparison parser__compare_versions(Parser *self, ErrorStatus a, Err
|
|||
|
||||
if (a.dynamic_precedence > b.dynamic_precedence) return ErrorComparisonPreferLeft;
|
||||
if (b.dynamic_precedence > a.dynamic_precedence) return ErrorComparisonPreferRight;
|
||||
|
||||
return ErrorComparisonNone;
|
||||
}
|
||||
|
||||
|
|
@ -511,6 +510,8 @@ static Tree *parser__get_lookahead(Parser *self, StackVersion version, TSStateId
|
|||
reason = "has_changes";
|
||||
} else if (result->symbol == ts_builtin_sym_error) {
|
||||
reason = "is_error";
|
||||
} else if (result->is_missing) {
|
||||
reason = "is_missing";
|
||||
} else if (result->fragile_left || result->fragile_right) {
|
||||
reason = "is_fragile";
|
||||
} else if (self->in_ambiguity && result->child_count) {
|
||||
|
|
@ -783,7 +784,9 @@ static void parser__accept(Parser *self, StackVersion version,
|
|||
ts_stack_halt(self->stack, version);
|
||||
}
|
||||
|
||||
static void parser__do_potential_reductions(Parser *self, StackVersion starting_version) {
|
||||
static bool parser__do_all_potential_reductions(Parser *self, StackVersion starting_version,
|
||||
TSSymbol lookahead_symbol) {
|
||||
bool result = false;
|
||||
for (StackVersion version = starting_version;;) {
|
||||
uint32_t version_count = ts_stack_version_count(self->stack);
|
||||
if (version >= version_count) break;
|
||||
|
|
@ -792,7 +795,16 @@ static void parser__do_potential_reductions(Parser *self, StackVersion starting_
|
|||
bool has_shift_action = false;
|
||||
array_clear(&self->reduce_actions);
|
||||
|
||||
for (TSSymbol symbol = 0; symbol < self->language->token_count; symbol++) {
|
||||
TSSymbol first_symbol, end_symbol;
|
||||
if (lookahead_symbol != 0) {
|
||||
first_symbol = lookahead_symbol;
|
||||
end_symbol = lookahead_symbol + 1;
|
||||
} else {
|
||||
first_symbol = 1;
|
||||
end_symbol = self->language->token_count;
|
||||
}
|
||||
|
||||
for (TSSymbol symbol = first_symbol; symbol < end_symbol; symbol++) {
|
||||
TableEntry entry;
|
||||
ts_language_table_entry(self->language, state, symbol, &entry);
|
||||
for (uint32_t i = 0; i < entry.action_count; i++) {
|
||||
|
|
@ -816,6 +828,7 @@ static void parser__do_potential_reductions(Parser *self, StackVersion starting_
|
|||
}
|
||||
}
|
||||
|
||||
bool has_reduce_action = self->reduce_actions.size > 0;
|
||||
for (uint32_t i = 0; i < self->reduce_actions.size; i++) {
|
||||
ReduceAction action = self->reduce_actions.contents[i];
|
||||
parser__reduce(
|
||||
|
|
@ -825,9 +838,15 @@ static void parser__do_potential_reductions(Parser *self, StackVersion starting_
|
|||
);
|
||||
}
|
||||
|
||||
if (self->reduce_actions.size > 0 && !has_shift_action) {
|
||||
ts_stack_renumber_version(self->stack, version_count, version);
|
||||
continue;
|
||||
if (has_shift_action) {
|
||||
result = true;
|
||||
} else {
|
||||
if (has_reduce_action) {
|
||||
ts_stack_renumber_version(self->stack, version_count, version);
|
||||
continue;
|
||||
} else if (lookahead_symbol != 0) {
|
||||
ts_stack_remove_version(self->stack, version);
|
||||
}
|
||||
}
|
||||
|
||||
if (version == starting_version) {
|
||||
|
|
@ -836,6 +855,7 @@ static void parser__do_potential_reductions(Parser *self, StackVersion starting_
|
|||
version++;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static void parser__handle_error(Parser *self, StackVersion version, TSSymbol lookahead_symbol) {
|
||||
|
|
@ -848,18 +868,58 @@ static void parser__handle_error(Parser *self, StackVersion version, TSSymbol lo
|
|||
return;
|
||||
}
|
||||
|
||||
// Perform any reductions that could have happened in this state, regardless of the lookahead.
|
||||
LOG("handle_error");
|
||||
|
||||
// Perform any reductions that could have happened in this state, regardless
|
||||
// of the lookahead.
|
||||
uint32_t previous_version_count = ts_stack_version_count(self->stack);
|
||||
parser__do_potential_reductions(self, version);
|
||||
parser__do_all_potential_reductions(self, version, 0);
|
||||
uint32_t version_count = ts_stack_version_count(self->stack);
|
||||
|
||||
// Push a discontinuity onto the stack. Merge all of the stack versions that
|
||||
// were created in the previous step.
|
||||
ts_stack_push(self->stack, version, NULL, false, ERROR_STATE);
|
||||
while (ts_stack_version_count(self->stack) > previous_version_count) {
|
||||
ts_stack_push(self->stack, previous_version_count, NULL, false, ERROR_STATE);
|
||||
bool did_insert_missing_token = false;
|
||||
for (StackVersion v = version; v < version_count;) {
|
||||
if (!did_insert_missing_token) {
|
||||
TSStateId state = ts_stack_top_state(self->stack, v);
|
||||
for (TSSymbol missing_symbol = 1;
|
||||
missing_symbol < self->language->token_count;
|
||||
missing_symbol++) {
|
||||
TSStateId state_after_missing_symbol = ts_language_next_state(
|
||||
self->language, state, missing_symbol
|
||||
);
|
||||
if (state_after_missing_symbol == 0) continue;
|
||||
|
||||
if (ts_language_has_reduce_action(
|
||||
self->language,
|
||||
state_after_missing_symbol,
|
||||
lookahead_symbol
|
||||
)) {
|
||||
StackVersion version_with_missing_tree = ts_stack_copy_version(self->stack, v);
|
||||
Tree *missing_tree = ts_tree_make_missing_leaf(&self->tree_pool, missing_symbol, self->language);
|
||||
ts_stack_push(
|
||||
self->stack, version_with_missing_tree,
|
||||
missing_tree, false,
|
||||
state_after_missing_symbol
|
||||
);
|
||||
ts_tree_release(&self->tree_pool, missing_tree);
|
||||
|
||||
if (parser__do_all_potential_reductions(
|
||||
self, version_with_missing_tree,
|
||||
lookahead_symbol
|
||||
)) {
|
||||
LOG("recover_with_missing symbol:%s, state:%u", SYM_NAME(missing_symbol), state_after_missing_symbol);
|
||||
LOG_STACK();
|
||||
did_insert_missing_token = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ts_stack_push(self->stack, v, NULL, false, ERROR_STATE);
|
||||
v = (v == version) ? previous_version_count : v + 1;
|
||||
}
|
||||
|
||||
for (unsigned i = previous_version_count; i < version_count; i++) {
|
||||
ts_stack_force_merge(self->stack, version, previous_version_count);
|
||||
}
|
||||
|
||||
|
|
@ -892,8 +952,8 @@ static void parser__halt_parse(Parser *self) {
|
|||
ts_tree_release(&self->tree_pool, eof);
|
||||
}
|
||||
|
||||
static StackVersion parser__recover_to_state(Parser *self, StackVersion version,
|
||||
unsigned depth, TSStateId goal_state) {
|
||||
static bool parser__recover_to_state(Parser *self, StackVersion version, unsigned depth,
|
||||
TSStateId goal_state) {
|
||||
StackPopResult pop = ts_stack_pop_count(self->stack, version, depth);
|
||||
StackVersion previous_version = STACK_VERSION_NONE;
|
||||
|
||||
|
|
@ -943,7 +1003,7 @@ static StackVersion parser__recover_to_state(Parser *self, StackVersion version,
|
|||
array_delete(&trailing_extras);
|
||||
}
|
||||
|
||||
return previous_version;
|
||||
return previous_version != STACK_VERSION_NONE;
|
||||
}
|
||||
|
||||
static void parser__recover(Parser *self, StackVersion version, Tree *lookahead) {
|
||||
|
|
@ -951,8 +1011,10 @@ static void parser__recover(Parser *self, StackVersion version, Tree *lookahead)
|
|||
unsigned previous_version_count = ts_stack_version_count(self->stack);
|
||||
Length position = ts_stack_top_position(self->stack, version);
|
||||
StackSummary *summary = ts_stack_get_summary(self->stack, version);
|
||||
|
||||
for (unsigned i = 0; i < summary->size; i++) {
|
||||
StackSummaryEntry entry = summary->contents[i];
|
||||
|
||||
if (entry.state == ERROR_STATE) continue;
|
||||
if (entry.position.bytes == position.bytes) continue;
|
||||
unsigned depth = entry.depth + ts_stack_depth_since_error(self->stack, version);
|
||||
|
|
@ -963,10 +1025,8 @@ static void parser__recover(Parser *self, StackVersion version, Tree *lookahead)
|
|||
(position.extent.row - entry.position.extent.row) * ERROR_COST_PER_SKIPPED_LINE;
|
||||
if (parser__better_version_exists(self, version, false, new_cost)) break;
|
||||
|
||||
unsigned count = 0;
|
||||
if (ts_language_actions(self->language, entry.state, lookahead->symbol, &count) && count > 0) {
|
||||
StackVersion recovered_version = parser__recover_to_state(self, version, depth, entry.state);
|
||||
if (recovered_version != STACK_VERSION_NONE) {
|
||||
if (ts_language_has_actions(self->language, entry.state, lookahead->symbol)) {
|
||||
if (parser__recover_to_state(self, version, depth, entry.state)) {
|
||||
did_recover = true;
|
||||
LOG("recover state:%u, depth:%u", entry.state, depth);
|
||||
break;
|
||||
|
|
@ -976,13 +1036,11 @@ static void parser__recover(Parser *self, StackVersion version, Tree *lookahead)
|
|||
|
||||
for (unsigned i = previous_version_count; i < ts_stack_version_count(self->stack); i++) {
|
||||
if (ts_stack_is_halted(self->stack, i)) {
|
||||
ts_stack_remove_version(self->stack, i);
|
||||
i--;
|
||||
ts_stack_remove_version(self->stack, i--);
|
||||
} else {
|
||||
for (unsigned j = 0; j < i; j++) {
|
||||
if (ts_stack_can_merge(self->stack, j, i)) {
|
||||
ts_stack_remove_version(self->stack, i);
|
||||
i--;
|
||||
ts_stack_remove_version(self->stack, i--);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
@ -1004,7 +1062,6 @@ static void parser__recover(Parser *self, StackVersion version, Tree *lookahead)
|
|||
return;
|
||||
}
|
||||
|
||||
LOG("skip_token symbol:%s", SYM_NAME(lookahead->symbol));
|
||||
unsigned n;
|
||||
const TSParseAction *actions = ts_language_actions(self->language, 1, lookahead->symbol, &n);
|
||||
bool extra = n > 0 && actions[n - 1].type == TSParseActionTypeShift && actions[n - 1].params.extra;
|
||||
|
|
@ -1012,6 +1069,8 @@ static void parser__recover(Parser *self, StackVersion version, Tree *lookahead)
|
|||
|
||||
if (parser__better_version_exists(self, version, true, ts_stack_error_cost(self->stack, version))) {
|
||||
ts_stack_halt(self->stack, version);
|
||||
} else {
|
||||
LOG("skip_token symbol:%s", SYM_NAME(lookahead->symbol));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -324,14 +324,23 @@ Tree *ts_tree_make_error_node(TreePool *pool, TreeArray *children, const TSLangu
|
|||
}
|
||||
}
|
||||
|
||||
Tree *result =
|
||||
ts_tree_make_node(pool, ts_builtin_sym_error, children->size, children->contents, 0, language);
|
||||
Tree *result = ts_tree_make_node(
|
||||
pool, ts_builtin_sym_error,
|
||||
children->size, children->contents,
|
||||
0, language
|
||||
);
|
||||
|
||||
result->fragile_left = true;
|
||||
result->fragile_right = true;
|
||||
return result;
|
||||
}
|
||||
|
||||
Tree *ts_tree_make_missing_leaf(TreePool *pool, TSSymbol symbol, const TSLanguage *language) {
|
||||
Tree *result = ts_tree_make_leaf(pool, symbol, length_zero(), length_zero(), language);
|
||||
result->is_missing = true;
|
||||
result->error_cost = ERROR_COST_PER_MISSING_TREE;
|
||||
return result;
|
||||
}
|
||||
void ts_tree_retain(Tree *self) {
|
||||
assert(self->ref_count > 0);
|
||||
self->ref_count++;
|
||||
|
|
@ -574,6 +583,7 @@ static size_t ts_tree__write_to_string(const Tree *self, const TSLanguage *langu
|
|||
bool visible =
|
||||
include_all ||
|
||||
is_root ||
|
||||
self->is_missing ||
|
||||
(self->visible && self->named) ||
|
||||
self->context.alias_is_named;
|
||||
|
||||
|
|
@ -585,9 +595,12 @@ static size_t ts_tree__write_to_string(const Tree *self, const TSLanguage *langu
|
|||
if (self->symbol == ts_builtin_sym_error && self->child_count == 0 && self->size.bytes > 0) {
|
||||
cursor += snprintf(*writer, limit, "(UNEXPECTED ");
|
||||
cursor += ts_tree__write_char_to_string(*writer, limit, self->lookahead_char);
|
||||
} else if (self->is_missing) {
|
||||
cursor += snprintf(*writer, limit, "(MISSING");
|
||||
} else {
|
||||
TSSymbol symbol = self->context.alias_symbol ? self->context.alias_symbol : self->symbol;
|
||||
cursor += snprintf(*writer, limit, "(%s", ts_language_symbol_name(language, symbol));
|
||||
const char *symbol_name = ts_language_symbol_name(language, symbol);
|
||||
cursor += snprintf(*writer, limit, "(%s", symbol_name);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -601,11 +614,9 @@ static size_t ts_tree__write_to_string(const Tree *self, const TSLanguage *langu
|
|||
return cursor - string;
|
||||
}
|
||||
|
||||
char *ts_tree_string(const Tree *self, const TSLanguage *language,
|
||||
bool include_all) {
|
||||
static char SCRATCH[1];
|
||||
size_t size =
|
||||
ts_tree__write_to_string(self, language, SCRATCH, 0, true, include_all) + 1;
|
||||
char *ts_tree_string(const Tree *self, const TSLanguage *language, bool include_all) {
|
||||
char scratch_string[1];
|
||||
size_t size = ts_tree__write_to_string(self, language, scratch_string, 0, true, include_all) + 1;
|
||||
char *result = ts_malloc(size * sizeof(char));
|
||||
ts_tree__write_to_string(self, language, result, size, true, include_all);
|
||||
return result;
|
||||
|
|
|
|||
|
|
@ -65,6 +65,7 @@ typedef struct Tree {
|
|||
bool fragile_right : 1;
|
||||
bool has_changes : 1;
|
||||
bool has_external_tokens : 1;
|
||||
bool is_missing : 1;
|
||||
} Tree;
|
||||
|
||||
typedef Array(Tree *) TreeArray;
|
||||
|
|
@ -94,6 +95,7 @@ Tree *ts_tree_make_node(TreePool *, TSSymbol, uint32_t, Tree **, unsigned, const
|
|||
Tree *ts_tree_make_copy(TreePool *, Tree *child);
|
||||
Tree *ts_tree_make_error_node(TreePool *, TreeArray *, const TSLanguage *);
|
||||
Tree *ts_tree_make_error(TreePool *, Length, Length, int32_t, const TSLanguage *);
|
||||
Tree *ts_tree_make_missing_leaf(TreePool *, TSSymbol, const TSLanguage *);
|
||||
void ts_tree_retain(Tree *tree);
|
||||
void ts_tree_release(TreePool *, Tree *tree);
|
||||
bool ts_tree_eq(const Tree *tree1, const Tree *tree2);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue