Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery states in the parse table. For each token T, there was an error recovery state in which the parser looked for *any* token that could follow T. Unfortunately, sometimes the set of tokens that could follow T contained conflicts. For example, in JS, the token '}' can be followed by the open-ended 'template_chars' token, but also by ordinary tokens like 'identifier'. So with the old algorithm, when recovering from an unexpected '}' token, the lexer had no way to distinguish identifiers from template_chars. This commit drops the error recovery states. Instead, when we encounter an unexpected token T, we recover from the error by finding a previous state S in the stack in which T would be valid, popping all of the nodes after S, and wrapping them in an error. This way, the lexer is always invoked in a normal parse state, in which it is looking for a non-conflicting set of tokens. Eliminating the error recovery states also shrinks the lex state machine significantly. Signed-off-by: Rick Winfrey <rewinfrey@github.com>
This commit is contained in:
parent
8b3941764f
commit
99d048e016
15 changed files with 327 additions and 639 deletions
|
|
@ -21,8 +21,6 @@ typedef struct StackNode StackNode;
|
|||
typedef struct {
|
||||
StackNode *node;
|
||||
Tree *tree;
|
||||
uint32_t push_count;
|
||||
uint32_t depth;
|
||||
bool is_pending;
|
||||
} StackLink;
|
||||
|
||||
|
|
@ -33,24 +31,16 @@ struct StackNode {
|
|||
short unsigned int link_count;
|
||||
uint32_t ref_count;
|
||||
unsigned error_cost;
|
||||
unsigned error_count;
|
||||
unsigned depth;
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
StackNode *node;
|
||||
TreeArray trees;
|
||||
uint32_t tree_count;
|
||||
uint32_t push_count;
|
||||
uint32_t depth;
|
||||
bool is_pending;
|
||||
} Iterator;
|
||||
|
||||
typedef struct {
|
||||
uint32_t goal_tree_count;
|
||||
bool found_error;
|
||||
bool found_valid_path;
|
||||
} StackPopSession;
|
||||
|
||||
typedef struct {
|
||||
void *payload;
|
||||
StackIterateCallback callback;
|
||||
|
|
@ -62,8 +52,8 @@ typedef struct {
|
|||
StackNode *node;
|
||||
Tree *last_external_token;
|
||||
uint32_t push_count;
|
||||
uint32_t depth;
|
||||
bool is_halted;
|
||||
StackSummary *summary;
|
||||
} StackHead;
|
||||
|
||||
struct Stack {
|
||||
|
|
@ -117,7 +107,7 @@ static StackNode *stack_node_new(StackNode *previous_node, Tree *tree, bool is_p
|
|||
StackNode *node = pool->size > 0 ?
|
||||
array_pop(pool) :
|
||||
ts_malloc(sizeof(StackNode));
|
||||
*node = (StackNode){.ref_count = 1, .link_count = 0, .state = state};
|
||||
*node = (StackNode){.ref_count = 1, .link_count = 0, .state = state, .depth = 0};
|
||||
|
||||
if (previous_node) {
|
||||
stack_node_retain(previous_node);
|
||||
|
|
@ -127,30 +117,31 @@ static StackNode *stack_node_new(StackNode *previous_node, Tree *tree, bool is_p
|
|||
.node = previous_node,
|
||||
.tree = tree,
|
||||
.is_pending = is_pending,
|
||||
.push_count = 0,
|
||||
.depth = 0,
|
||||
};
|
||||
|
||||
node->position = previous_node->position;
|
||||
node->error_count = previous_node->error_count;
|
||||
node->error_cost = previous_node->error_cost;
|
||||
|
||||
if (tree) {
|
||||
node->depth = previous_node->depth;
|
||||
if (!tree->extra) node->depth++;
|
||||
ts_tree_retain(tree);
|
||||
node->error_cost += tree->error_cost;
|
||||
node->position = length_add(node->position, ts_tree_total_size(tree));
|
||||
if (state == ERROR_STATE && !tree->extra) {
|
||||
node->error_cost +=
|
||||
ERROR_COST_PER_SKIPPED_TREE * (tree->visible ? 1 : tree->visible_child_count) +
|
||||
ERROR_COST_PER_SKIPPED_CHAR * (tree->padding.chars + tree->size.chars) +
|
||||
ERROR_COST_PER_SKIPPED_LINE * (tree->padding.extent.row + tree->size.extent.row);
|
||||
ERROR_COST_PER_SKIPPED_TREE * ((tree->visible || tree->child_count == 0) ? 1 : tree->visible_child_count) +
|
||||
ERROR_COST_PER_SKIPPED_CHAR * tree->size.chars +
|
||||
ERROR_COST_PER_SKIPPED_LINE * tree->size.extent.row;
|
||||
if (previous_node->links[0].tree) {
|
||||
node->error_cost +=
|
||||
ERROR_COST_PER_SKIPPED_CHAR * tree->padding.chars +
|
||||
ERROR_COST_PER_SKIPPED_LINE * tree->padding.extent.row;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
node->error_count++;
|
||||
}
|
||||
} else {
|
||||
node->position = length_zero();
|
||||
node->error_count = 0;
|
||||
node->error_cost = 0;
|
||||
}
|
||||
|
||||
|
|
@ -195,17 +186,19 @@ static void stack_head_delete(StackHead *self, StackNodeArray *pool) {
|
|||
if (self->last_external_token) {
|
||||
ts_tree_release(self->last_external_token);
|
||||
}
|
||||
if (self->summary) {
|
||||
array_delete(self->summary);
|
||||
ts_free(self->summary);
|
||||
}
|
||||
stack_node_release(self->node, pool);
|
||||
}
|
||||
}
|
||||
|
||||
static StackVersion ts_stack__add_version(Stack *self, StackNode *node,
|
||||
uint32_t push_count, uint32_t depth,
|
||||
Tree *last_external_token) {
|
||||
static StackVersion ts_stack__add_version(Stack *self, StackVersion original_version,
|
||||
StackNode *node, Tree *last_external_token) {
|
||||
StackHead head = {
|
||||
.node = node,
|
||||
.depth = depth,
|
||||
.push_count = push_count,
|
||||
.push_count = self->heads.contents[original_version].push_count,
|
||||
.last_external_token = last_external_token,
|
||||
.is_halted = false,
|
||||
};
|
||||
|
|
@ -215,38 +208,35 @@ static StackVersion ts_stack__add_version(Stack *self, StackNode *node,
|
|||
return (StackVersion)(self->heads.size - 1);
|
||||
}
|
||||
|
||||
static void ts_stack__add_slice(Stack *self, StackNode *node, TreeArray *trees,
|
||||
uint32_t push_count, uint32_t depth,
|
||||
Tree *last_external_token) {
|
||||
static void ts_stack__add_slice(Stack *self, StackVersion original_version, StackNode *node,
|
||||
TreeArray *trees, Tree *last_external_token) {
|
||||
for (uint32_t i = self->slices.size - 1; i + 1 > 0; i--) {
|
||||
StackVersion version = self->slices.contents[i].version;
|
||||
if (self->heads.contents[version].node == node) {
|
||||
StackSlice slice = { *trees, version };
|
||||
StackSlice slice = {*trees, version};
|
||||
array_insert(&self->slices, i + 1, slice);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
StackVersion version = ts_stack__add_version(self, node, push_count, depth, last_external_token);
|
||||
StackVersion version = ts_stack__add_version(self, original_version, node, last_external_token);
|
||||
StackSlice slice = { *trees, version };
|
||||
array_push(&self->slices, slice);
|
||||
}
|
||||
|
||||
inline StackPopResult stack__iter(Stack *self, StackVersion version,
|
||||
StackIterateInternalCallback callback, void *payload) {
|
||||
StackIterateInternalCallback callback, void *payload,
|
||||
bool include_trees) {
|
||||
array_clear(&self->slices);
|
||||
array_clear(&self->iterators);
|
||||
|
||||
StackHead *head = array_get(&self->heads, version);
|
||||
uint32_t starting_push_count = head->push_count;
|
||||
Tree *last_external_token = head->last_external_token;
|
||||
Iterator iterator = {
|
||||
.node = head->node,
|
||||
.trees = array_new(),
|
||||
.tree_count = 0,
|
||||
.is_pending = true,
|
||||
.push_count = 0,
|
||||
.depth = head->depth,
|
||||
};
|
||||
array_push(&self->iterators, iterator);
|
||||
|
||||
|
|
@ -266,10 +256,9 @@ inline StackPopResult stack__iter(Stack *self, StackVersion version,
|
|||
ts_tree_array_reverse(&trees);
|
||||
ts_stack__add_slice(
|
||||
self,
|
||||
version,
|
||||
node,
|
||||
&trees,
|
||||
starting_push_count + iterator->push_count,
|
||||
iterator->depth,
|
||||
last_external_token
|
||||
);
|
||||
}
|
||||
|
|
@ -298,28 +287,27 @@ inline StackPopResult stack__iter(Stack *self, StackVersion version,
|
|||
}
|
||||
|
||||
next_iterator->node = link.node;
|
||||
next_iterator->push_count += link.push_count;
|
||||
if (link.depth > 0) {
|
||||
next_iterator->depth = link.depth;
|
||||
}
|
||||
if (link.tree) {
|
||||
if (include_trees) {
|
||||
array_push(&next_iterator->trees, link.tree);
|
||||
ts_tree_retain(link.tree);
|
||||
}
|
||||
|
||||
if (!link.tree->extra) {
|
||||
next_iterator->tree_count++;
|
||||
next_iterator->depth--;
|
||||
if (!link.is_pending) {
|
||||
next_iterator->is_pending = false;
|
||||
}
|
||||
}
|
||||
array_push(&next_iterator->trees, link.tree);
|
||||
ts_tree_retain(link.tree);
|
||||
} else {
|
||||
next_iterator->tree_count++;
|
||||
next_iterator->is_pending = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return (StackPopResult){ false, self->slices };
|
||||
return (StackPopResult){self->slices};
|
||||
}
|
||||
|
||||
Stack *ts_stack_new() {
|
||||
|
|
@ -375,8 +363,7 @@ unsigned ts_stack_push_count(const Stack *self, StackVersion version) {
|
|||
return array_get(&self->heads, version)->push_count;
|
||||
}
|
||||
|
||||
void ts_stack_decrease_push_count(Stack *self, StackVersion version,
|
||||
unsigned decrement) {
|
||||
void ts_stack_decrease_push_count(Stack *self, StackVersion version, unsigned decrement) {
|
||||
array_get(&self->heads, version)->push_count -= decrement;
|
||||
}
|
||||
|
||||
|
|
@ -395,29 +382,18 @@ ErrorStatus ts_stack_error_status(const Stack *self, StackVersion version) {
|
|||
StackHead *head = array_get(&self->heads, version);
|
||||
return (ErrorStatus){
|
||||
.cost = head->node->error_cost,
|
||||
.count = head->node->error_count,
|
||||
.recovering = head->node->state == ERROR_STATE,
|
||||
.push_count = head->push_count,
|
||||
.depth = head->depth,
|
||||
};
|
||||
}
|
||||
|
||||
unsigned ts_stack_error_count(const Stack *self, StackVersion version) {
|
||||
StackNode *node = array_get(&self->heads, version)->node;
|
||||
return node->error_count;
|
||||
}
|
||||
|
||||
void ts_stack_push(Stack *self, StackVersion version, Tree *tree,
|
||||
bool is_pending, TSStateId state) {
|
||||
void ts_stack_push(Stack *self, StackVersion version, Tree *tree, bool pending, TSStateId state) {
|
||||
StackHead *head = array_get(&self->heads, version);
|
||||
StackNode *new_node = stack_node_new(head->node, tree, is_pending, state, &self->node_pool);
|
||||
StackNode *new_node = stack_node_new(head->node, tree, pending, state, &self->node_pool);
|
||||
if (state == ERROR_STATE) {
|
||||
new_node->links[0].push_count = head->push_count;
|
||||
new_node->links[0].depth = head->depth;
|
||||
head->push_count = 0;
|
||||
head->depth = 0;
|
||||
} else {
|
||||
} else if (!tree->extra) {
|
||||
head->push_count++;
|
||||
if (!tree->extra) head->depth++;
|
||||
}
|
||||
stack_node_release(head->node, &self->node_pool);
|
||||
head->node = new_node;
|
||||
|
|
@ -431,55 +407,20 @@ inline StackIterateAction iterate_callback(void *payload, const Iterator *iterat
|
|||
StackPopResult ts_stack_iterate(Stack *self, StackVersion version,
|
||||
StackIterateCallback callback, void *payload) {
|
||||
StackIterateSession session = {payload, callback};
|
||||
return stack__iter(self, version, iterate_callback, &session);
|
||||
return stack__iter(self, version, iterate_callback, &session, true);
|
||||
}
|
||||
|
||||
inline StackIterateAction pop_count_callback(void *payload, const Iterator *iterator) {
|
||||
StackPopSession *pop_session = (StackPopSession *)payload;
|
||||
|
||||
if (iterator->tree_count == pop_session->goal_tree_count) {
|
||||
pop_session->found_valid_path = true;
|
||||
unsigned *goal_tree_count = payload;
|
||||
if (iterator->tree_count == *goal_tree_count) {
|
||||
return StackIteratePop | StackIterateStop;
|
||||
} else {
|
||||
return StackIterateNone;
|
||||
}
|
||||
|
||||
if (iterator->node->state == ERROR_STATE) {
|
||||
if (pop_session->found_valid_path || pop_session->found_error) {
|
||||
return StackIterateStop;
|
||||
} else {
|
||||
pop_session->found_error = true;
|
||||
return StackIteratePop | StackIterateStop;
|
||||
}
|
||||
}
|
||||
return StackIterateNone;
|
||||
}
|
||||
|
||||
StackPopResult ts_stack_pop_count(Stack *self, StackVersion version,
|
||||
uint32_t count) {
|
||||
StackPopSession session = {
|
||||
.goal_tree_count = count,
|
||||
.found_error = false,
|
||||
.found_valid_path = false,
|
||||
};
|
||||
|
||||
StackPopResult pop = stack__iter(self, version, pop_count_callback, &session);
|
||||
|
||||
if (session.found_error) {
|
||||
if (session.found_valid_path) {
|
||||
StackSlice error_slice = pop.slices.contents[0];
|
||||
ts_tree_array_delete(&error_slice.trees);
|
||||
array_erase(&pop.slices, 0);
|
||||
if (array_front(&pop.slices)->version != error_slice.version) {
|
||||
ts_stack_remove_version(self, error_slice.version);
|
||||
for (StackVersion i = 0; i < pop.slices.size; i++) {
|
||||
pop.slices.contents[i].version--;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
pop.stopped_at_error = true;
|
||||
}
|
||||
}
|
||||
|
||||
return pop;
|
||||
StackPopResult ts_stack_pop_count(Stack *self, StackVersion version, uint32_t count) {
|
||||
return stack__iter(self, version, pop_count_callback, &count, true);
|
||||
}
|
||||
|
||||
inline StackIterateAction pop_pending_callback(void *payload, const Iterator *iterator) {
|
||||
|
|
@ -495,7 +436,7 @@ inline StackIterateAction pop_pending_callback(void *payload, const Iterator *it
|
|||
}
|
||||
|
||||
StackPopResult ts_stack_pop_pending(Stack *self, StackVersion version) {
|
||||
StackPopResult pop = stack__iter(self, version, pop_pending_callback, NULL);
|
||||
StackPopResult pop = stack__iter(self, version, pop_pending_callback, NULL, true);
|
||||
if (pop.slices.size > 0) {
|
||||
ts_stack_renumber_version(self, pop.slices.contents[0].version, version);
|
||||
pop.slices.contents[0].version = version;
|
||||
|
|
@ -503,12 +444,71 @@ StackPopResult ts_stack_pop_pending(Stack *self, StackVersion version) {
|
|||
return pop;
|
||||
}
|
||||
|
||||
inline StackIterateAction pop_error_callback(void *payload, const Iterator *iterator) {
|
||||
if (iterator->trees.size > 0) {
|
||||
bool *found_error = payload;
|
||||
if (!*found_error && iterator->trees.contents[0]->symbol == ts_builtin_sym_error) {
|
||||
*found_error = true;
|
||||
return StackIteratePop | StackIterateStop;
|
||||
} else {
|
||||
return StackIterateStop;
|
||||
}
|
||||
} else {
|
||||
return StackIterateNone;
|
||||
}
|
||||
}
|
||||
|
||||
StackPopResult ts_stack_pop_error(Stack *self, StackVersion version) {
|
||||
StackNode *node = array_get(&self->heads, version)->node;
|
||||
for (unsigned i = 0; i < node->link_count; i++) {
|
||||
if (node->links[i].tree && node->links[i].tree->symbol == ts_builtin_sym_error) {
|
||||
bool found_error = false;
|
||||
return stack__iter(self, version, pop_error_callback, &found_error, true);
|
||||
}
|
||||
}
|
||||
return (StackPopResult){.slices = array_new()};
|
||||
}
|
||||
|
||||
inline StackIterateAction pop_all_callback(void *payload, const Iterator *iterator) {
|
||||
return iterator->node->link_count == 0 ? StackIteratePop : StackIterateNone;
|
||||
}
|
||||
|
||||
StackPopResult ts_stack_pop_all(Stack *self, StackVersion version) {
|
||||
return stack__iter(self, version, pop_all_callback, NULL);
|
||||
return stack__iter(self, version, pop_all_callback, NULL, true);
|
||||
}
|
||||
|
||||
inline StackIterateAction summarize_stack_callback(void *payload, const Iterator *iterator) {
|
||||
StackSummary *summary = payload;
|
||||
TSStateId state = iterator->node->state;
|
||||
unsigned depth = iterator->tree_count;
|
||||
for (unsigned i = summary->size - 1; i + 1 > 0; i--) {
|
||||
StackSummaryEntry entry = summary->contents[i];
|
||||
if (entry.depth < depth) break;
|
||||
if (entry.depth == depth && entry.state == state) return StackIterateNone;
|
||||
}
|
||||
array_push(summary, ((StackSummaryEntry){.depth = depth, .state = state}));
|
||||
return StackIterateNone;
|
||||
}
|
||||
|
||||
void ts_stack_record_summary(Stack *self, StackVersion version) {
|
||||
StackSummary *result = ts_malloc(sizeof(StackSummary));
|
||||
array_init(result);
|
||||
stack__iter(self, version, summarize_stack_callback, result, false);
|
||||
self->heads.contents[version].summary = result;
|
||||
}
|
||||
|
||||
StackSummary *ts_stack_get_summary(Stack *self, StackVersion version) {
|
||||
return array_get(&self->heads, version)->summary;
|
||||
}
|
||||
|
||||
unsigned ts_stack_depth_since_error(Stack *self, StackVersion version) {
|
||||
unsigned result = 0;
|
||||
StackNode *node = array_get(&self->heads, version)->node;
|
||||
while (node->state == 0) {
|
||||
result++;
|
||||
node = node->links[0].node;
|
||||
}
|
||||
return result - 1;
|
||||
}
|
||||
|
||||
void ts_stack_remove_version(Stack *self, StackVersion version) {
|
||||
|
|
@ -536,6 +536,7 @@ StackVersion ts_stack_copy_version(Stack *self, StackVersion version) {
|
|||
StackHead *head = array_back(&self->heads);
|
||||
stack_node_retain(head->node);
|
||||
if (head->last_external_token) ts_tree_retain(head->last_external_token);
|
||||
head->summary = NULL;
|
||||
return self->heads.size - 1;
|
||||
}
|
||||
|
||||
|
|
@ -554,9 +555,8 @@ bool ts_stack_can_merge(Stack *self, StackVersion version1, StackVersion version
|
|||
return
|
||||
head1->node->state == head2->node->state &&
|
||||
head1->node->position.chars == head2->node->position.chars &&
|
||||
ts_tree_external_token_state_eq(head1->last_external_token, head2->last_external_token) &&
|
||||
((head1->node->error_count == 0 && head2->node->error_count == 0) ||
|
||||
(head1->depth == head2->depth));
|
||||
head1->node->depth == head2->node->depth &&
|
||||
ts_tree_external_token_state_eq(head1->last_external_token, head2->last_external_token);
|
||||
}
|
||||
|
||||
void ts_stack_force_merge(Stack *self, StackVersion version1, StackVersion version2) {
|
||||
|
|
@ -565,8 +565,6 @@ void ts_stack_force_merge(Stack *self, StackVersion version1, StackVersion versi
|
|||
for (uint32_t i = 0; i < head2->node->link_count; i++) {
|
||||
stack_node_add_link(head1->node, head2->node->links[i]);
|
||||
}
|
||||
if (head2->push_count > head1->push_count) head1->push_count = head2->push_count;
|
||||
if (head2->depth > head1->depth) head1->depth = head2->depth;
|
||||
ts_stack_remove_version(self, version2);
|
||||
}
|
||||
|
||||
|
|
@ -587,8 +585,6 @@ void ts_stack_clear(Stack *self) {
|
|||
array_push(&self->heads, ((StackHead){
|
||||
.node = self->base_node,
|
||||
.last_external_token = NULL,
|
||||
.depth = 0,
|
||||
.push_count = 0,
|
||||
.is_halted = false,
|
||||
}));
|
||||
}
|
||||
|
|
@ -612,8 +608,8 @@ bool ts_stack_print_dot_graph(Stack *self, const char **symbol_names, FILE *f) {
|
|||
fprintf(
|
||||
f,
|
||||
"node_head_%u -> node_%p [label=%u, fontcolor=blue, weight=10000, "
|
||||
"labeltooltip=\"push_count: %u\ndepth: %u",
|
||||
i, head->node, i, head->push_count, head->depth);
|
||||
"labeltooltip=\"push_count: %u\ndepth: %u", i, head->node, i, head->push_count, head->node->depth
|
||||
);
|
||||
|
||||
if (head->last_external_token) {
|
||||
TSExternalTokenState *state = &head->last_external_token->external_token_state;
|
||||
|
|
@ -654,10 +650,11 @@ bool ts_stack_print_dot_graph(Stack *self, const char **symbol_names, FILE *f) {
|
|||
else
|
||||
fprintf(f, "label=\"%d\"", node->state);
|
||||
|
||||
fprintf(f,
|
||||
" tooltip=\"position: %u,%u\nerror_count: %u\nerror_cost: %u\"];\n",
|
||||
node->position.extent.row, node->position.extent.column, node->error_count,
|
||||
node->error_cost);
|
||||
fprintf(
|
||||
f,
|
||||
" tooltip=\"position: %u,%u\nerror_cost: %u\"];\n",
|
||||
node->position.extent.row, node->position.extent.column, node->error_cost
|
||||
);
|
||||
|
||||
for (int j = 0; j < node->link_count; j++) {
|
||||
StackLink link = node->links[j];
|
||||
|
|
@ -668,7 +665,7 @@ bool ts_stack_print_dot_graph(Stack *self, const char **symbol_names, FILE *f) {
|
|||
fprintf(f, "fontcolor=gray ");
|
||||
|
||||
if (!link.tree) {
|
||||
fprintf(f, "color=red, tooltip=\"push_count: %u, depth: %u\"", link.push_count, link.depth);
|
||||
fprintf(f, "color=red");
|
||||
} else if (link.tree->symbol == ts_builtin_sym_error) {
|
||||
fprintf(f, "label=\"ERROR\"");
|
||||
} else {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue