Improve heuristics for pruning parse versions based on errors
* Rewrite the error cost comparison in terms of explicit, discrete conditions. * Allow merging versions have different error costs. * Store the depth of each stack version since the last error. Use this state to prevent incorrect merging. * Sort the stack versions in order of preference and put a hard limit on the version count.
This commit is contained in:
parent
445be0736a
commit
009d6d1534
9 changed files with 237 additions and 145 deletions
|
|
@ -17,7 +17,8 @@ typedef struct StackNode StackNode;
|
|||
typedef struct {
|
||||
StackNode *node;
|
||||
Tree *tree;
|
||||
unsigned push_count;
|
||||
uint32_t push_count;
|
||||
uint32_t depth;
|
||||
bool is_pending;
|
||||
} StackLink;
|
||||
|
||||
|
|
@ -32,11 +33,12 @@ struct StackNode {
|
|||
};
|
||||
|
||||
typedef struct {
|
||||
StackNode *node;
|
||||
TreeArray trees;
|
||||
uint32_t tree_count;
|
||||
StackNode *node;
|
||||
uint32_t push_count;
|
||||
uint32_t depth;
|
||||
bool is_pending;
|
||||
unsigned push_count;
|
||||
} Iterator;
|
||||
|
||||
typedef struct {
|
||||
|
|
@ -49,9 +51,10 @@ typedef Array(StackNode *) StackNodeArray;
|
|||
|
||||
typedef struct {
|
||||
StackNode *node;
|
||||
bool is_halted;
|
||||
unsigned push_count;
|
||||
Tree *last_external_token;
|
||||
uint32_t push_count;
|
||||
uint32_t depth;
|
||||
bool is_halted;
|
||||
} StackHead;
|
||||
|
||||
struct Stack {
|
||||
|
|
@ -106,6 +109,7 @@ static StackNode *stack_node_new(StackNode *previous_node, Tree *tree, bool is_p
|
|||
.tree = tree,
|
||||
.is_pending = is_pending,
|
||||
.push_count = 0,
|
||||
.depth = 0,
|
||||
};
|
||||
|
||||
node->position = previous_node->position;
|
||||
|
|
@ -118,7 +122,7 @@ static StackNode *stack_node_new(StackNode *previous_node, Tree *tree, bool is_p
|
|||
node->position = length_add(node->position, ts_tree_total_size(tree));
|
||||
if (state == ERROR_STATE && !tree->extra) {
|
||||
node->error_cost +=
|
||||
ERROR_COST_PER_SKIPPED_TREE +
|
||||
ERROR_COST_PER_SKIPPED_TREE * (tree->visible ? 1 : tree->visible_child_count) +
|
||||
ERROR_COST_PER_SKIPPED_CHAR * (tree->padding.chars + tree->size.chars) +
|
||||
ERROR_COST_PER_SKIPPED_LINE * (tree->padding.extent.row + tree->size.extent.row);
|
||||
}
|
||||
|
|
@ -134,17 +138,27 @@ static StackNode *stack_node_new(StackNode *previous_node, Tree *tree, bool is_p
|
|||
return node;
|
||||
}
|
||||
|
||||
static bool stack__tree_is_equivalent(const Tree *left, const Tree *right) {
|
||||
return left == right || (
|
||||
left &&
|
||||
right &&
|
||||
left->child_count == 0 && right->child_count == 0 &&
|
||||
left->symbol == right->symbol &&
|
||||
left->padding.bytes == right->padding.bytes &&
|
||||
left->size.bytes == right->size.bytes &&
|
||||
left->extra == right->extra &&
|
||||
ts_tree_external_token_state_eq(left, right));
|
||||
}
|
||||
|
||||
static void stack_node_add_link(StackNode *self, StackLink link) {
|
||||
for (int i = 0; i < self->link_count; i++) {
|
||||
StackLink existing_link = self->links[i];
|
||||
if (existing_link.tree == link.tree ||
|
||||
(existing_link.tree && link.tree &&
|
||||
ts_tree_tokens_eq(existing_link.tree, link.tree))) {
|
||||
if (existing_link.node == link.node)
|
||||
return;
|
||||
if (stack__tree_is_equivalent(existing_link.tree, link.tree)) {
|
||||
if (existing_link.node == link.node) return;
|
||||
if (existing_link.node->state == link.node->state) {
|
||||
for (int j = 0; j < link.node->link_count; j++)
|
||||
for (int j = 0; j < link.node->link_count; j++) {
|
||||
stack_node_add_link(existing_link.node, link.node->links[j]);
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
|
@ -152,31 +166,29 @@ static void stack_node_add_link(StackNode *self, StackLink link) {
|
|||
|
||||
if (self->link_count < MAX_LINK_COUNT) {
|
||||
stack_node_retain(link.node);
|
||||
if (link.tree)
|
||||
ts_tree_retain(link.tree);
|
||||
|
||||
self->links[self->link_count++] = (StackLink){
|
||||
.node = link.node,
|
||||
.tree = link.tree,
|
||||
.is_pending = link.is_pending,
|
||||
.push_count = link.push_count,
|
||||
};
|
||||
if (link.tree) ts_tree_retain(link.tree);
|
||||
self->links[self->link_count++] = link;
|
||||
}
|
||||
}
|
||||
|
||||
static void stack_head_delete(StackHead *self, StackNodeArray *pool) {
|
||||
if (self->last_external_token) ts_tree_release(self->last_external_token);
|
||||
stack_node_release(self->node, pool);
|
||||
if (self->node) {
|
||||
if (self->last_external_token) {
|
||||
ts_tree_release(self->last_external_token);
|
||||
}
|
||||
stack_node_release(self->node, pool);
|
||||
}
|
||||
}
|
||||
|
||||
static StackVersion ts_stack__add_version(Stack *self, StackNode *node,
|
||||
unsigned push_count,
|
||||
uint32_t push_count, uint32_t depth,
|
||||
Tree *last_external_token) {
|
||||
StackHead head = {
|
||||
.node = node,
|
||||
.is_halted = false,
|
||||
.depth = depth,
|
||||
.push_count = push_count,
|
||||
.last_external_token = last_external_token,
|
||||
.is_halted = false,
|
||||
};
|
||||
array_push(&self->heads, head);
|
||||
stack_node_retain(node);
|
||||
|
|
@ -185,7 +197,7 @@ static StackVersion ts_stack__add_version(Stack *self, StackNode *node,
|
|||
}
|
||||
|
||||
static void ts_stack__add_slice(Stack *self, StackNode *node, TreeArray *trees,
|
||||
unsigned push_count,
|
||||
uint32_t push_count, uint32_t depth,
|
||||
Tree *last_external_token) {
|
||||
for (uint32_t i = self->slices.size - 1; i + 1 > 0; i--) {
|
||||
StackVersion version = self->slices.contents[i].version;
|
||||
|
|
@ -196,7 +208,7 @@ static void ts_stack__add_slice(Stack *self, StackNode *node, TreeArray *trees,
|
|||
}
|
||||
}
|
||||
|
||||
StackVersion version = ts_stack__add_version(self, node, push_count, last_external_token);
|
||||
StackVersion version = ts_stack__add_version(self, node, push_count, depth, last_external_token);
|
||||
StackSlice slice = { *trees, version };
|
||||
array_push(&self->slices, slice);
|
||||
}
|
||||
|
|
@ -207,7 +219,7 @@ INLINE StackPopResult stack__iter(Stack *self, StackVersion version,
|
|||
array_clear(&self->iterators);
|
||||
|
||||
StackHead *head = array_get(&self->heads, version);
|
||||
unsigned push_count = head->push_count;
|
||||
uint32_t starting_push_count = head->push_count;
|
||||
Tree *last_external_token = head->last_external_token;
|
||||
Iterator iterator = {
|
||||
.node = head->node,
|
||||
|
|
@ -215,6 +227,7 @@ INLINE StackPopResult stack__iter(Stack *self, StackVersion version,
|
|||
.tree_count = 0,
|
||||
.is_pending = true,
|
||||
.push_count = 0,
|
||||
.depth = head->depth,
|
||||
};
|
||||
array_push(&self->iterators, iterator);
|
||||
|
||||
|
|
@ -224,9 +237,14 @@ INLINE StackPopResult stack__iter(Stack *self, StackVersion version,
|
|||
StackNode *node = iterator->node;
|
||||
bool is_done = node == self->base_node;
|
||||
|
||||
StackIterateAction action =
|
||||
callback(payload, node->state, &iterator->trees, iterator->tree_count,
|
||||
is_done, iterator->is_pending);
|
||||
StackIterateAction action = callback(
|
||||
payload,
|
||||
node->state,
|
||||
&iterator->trees,
|
||||
iterator->tree_count,
|
||||
is_done,
|
||||
iterator->is_pending
|
||||
);
|
||||
|
||||
bool should_pop = action & StackIteratePop;
|
||||
bool should_stop = action & StackIterateStop || node->link_count == 0;
|
||||
|
|
@ -236,8 +254,14 @@ INLINE StackPopResult stack__iter(Stack *self, StackVersion version,
|
|||
if (!should_stop)
|
||||
ts_tree_array_copy(trees, &trees);
|
||||
array_reverse(&trees);
|
||||
ts_stack__add_slice(self, node, &trees, push_count + iterator->push_count,
|
||||
last_external_token);
|
||||
ts_stack__add_slice(
|
||||
self,
|
||||
node,
|
||||
&trees,
|
||||
starting_push_count + iterator->push_count,
|
||||
iterator->depth,
|
||||
last_external_token
|
||||
);
|
||||
}
|
||||
|
||||
if (should_stop) {
|
||||
|
|
@ -265,9 +289,13 @@ INLINE StackPopResult stack__iter(Stack *self, StackVersion version,
|
|||
|
||||
next_iterator->node = link.node;
|
||||
next_iterator->push_count += link.push_count;
|
||||
if (link.depth > 0) {
|
||||
next_iterator->depth = link.depth;
|
||||
}
|
||||
if (link.tree) {
|
||||
if (!link.tree->extra) {
|
||||
next_iterator->tree_count++;
|
||||
next_iterator->depth--;
|
||||
if (!link.is_pending)
|
||||
next_iterator->is_pending = false;
|
||||
}
|
||||
|
|
@ -296,13 +324,7 @@ Stack *ts_stack_new() {
|
|||
array_grow(&self->node_pool, MAX_NODE_POOL_SIZE);
|
||||
|
||||
self->base_node = stack_node_new(NULL, NULL, false, 1, &self->node_pool);
|
||||
stack_node_retain(self->base_node);
|
||||
array_push(&self->heads, ((StackHead){
|
||||
self->base_node,
|
||||
false,
|
||||
0,
|
||||
NULL
|
||||
}));
|
||||
ts_stack_clear(self);
|
||||
|
||||
return self;
|
||||
}
|
||||
|
|
@ -378,9 +400,12 @@ void ts_stack_push(Stack *self, StackVersion version, Tree *tree,
|
|||
StackNode *new_node = stack_node_new(head->node, tree, is_pending, state, &self->node_pool);
|
||||
if (state == ERROR_STATE) {
|
||||
new_node->links[0].push_count = head->push_count;
|
||||
new_node->links[0].depth = head->depth;
|
||||
head->push_count = 0;
|
||||
head->depth = 0;
|
||||
} else {
|
||||
head->push_count++;
|
||||
if (!tree->extra) head->depth++;
|
||||
}
|
||||
stack_node_release(head->node, &self->node_pool);
|
||||
head->node = new_node;
|
||||
|
|
@ -415,9 +440,13 @@ INLINE StackIterateAction pop_count_callback(void *payload, TSStateId state,
|
|||
StackPopResult ts_stack_pop_count(Stack *self, StackVersion version,
|
||||
uint32_t count) {
|
||||
StackPopSession session = {
|
||||
.goal_tree_count = count, .found_error = false, .found_valid_path = false,
|
||||
.goal_tree_count = count,
|
||||
.found_error = false,
|
||||
.found_valid_path = false,
|
||||
};
|
||||
|
||||
StackPopResult pop = stack__iter(self, version, pop_count_callback, &session);
|
||||
|
||||
if (session.found_error) {
|
||||
if (session.found_valid_path) {
|
||||
StackSlice error_slice = pop.slices.contents[0];
|
||||
|
|
@ -425,13 +454,15 @@ StackPopResult ts_stack_pop_count(Stack *self, StackVersion version,
|
|||
array_erase(&pop.slices, 0);
|
||||
if (array_front(&pop.slices)->version != error_slice.version) {
|
||||
ts_stack_remove_version(self, error_slice.version);
|
||||
for (StackVersion i = 0; i < pop.slices.size; i++)
|
||||
for (StackVersion i = 0; i < pop.slices.size; i++) {
|
||||
pop.slices.contents[i].version--;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
pop.stopped_at_error = true;
|
||||
}
|
||||
}
|
||||
|
||||
return pop;
|
||||
}
|
||||
|
||||
|
|
@ -482,6 +513,12 @@ void ts_stack_renumber_version(Stack *self, StackVersion v1, StackVersion v2) {
|
|||
array_erase(&self->heads, v1);
|
||||
}
|
||||
|
||||
void ts_stack_swap_versions(Stack *self, StackVersion v1, StackVersion v2) {
|
||||
StackHead temporary_head = self->heads.contents[v1];
|
||||
self->heads.contents[v1] = self->heads.contents[v2];
|
||||
self->heads.contents[v2] = temporary_head;
|
||||
}
|
||||
|
||||
StackVersion ts_stack_copy_version(Stack *self, StackVersion version) {
|
||||
assert(version < self->heads.size);
|
||||
array_push(&self->heads, self->heads.contents[version]);
|
||||
|
|
@ -491,28 +528,38 @@ StackVersion ts_stack_copy_version(Stack *self, StackVersion version) {
|
|||
return self->heads.size - 1;
|
||||
}
|
||||
|
||||
bool ts_stack_merge(Stack *self, StackVersion version, StackVersion new_version) {
|
||||
StackHead *head = &self->heads.contents[version];
|
||||
StackHead *new_head = &self->heads.contents[new_version];
|
||||
StackNode *node = head->node;
|
||||
StackNode *new_node = new_head->node;
|
||||
|
||||
if (new_node->state == node->state &&
|
||||
new_node->position.chars == node->position.chars &&
|
||||
new_node->error_count == node->error_count &&
|
||||
new_node->error_cost == node->error_cost &&
|
||||
ts_tree_external_token_state_eq(new_head->last_external_token, head->last_external_token)) {
|
||||
for (uint32_t j = 0; j < new_node->link_count; j++)
|
||||
stack_node_add_link(node, new_node->links[j]);
|
||||
if (new_head->push_count > head->push_count)
|
||||
head->push_count = new_head->push_count;
|
||||
ts_stack_remove_version(self, new_version);
|
||||
bool ts_stack_merge(Stack *self, StackVersion version1, StackVersion version2) {
|
||||
if (ts_stack_can_merge(self, version1, version2)) {
|
||||
ts_stack_force_merge(self, version1, version2);
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool ts_stack_can_merge(Stack *self, StackVersion version1, StackVersion version2) {
|
||||
StackHead *head1 = &self->heads.contents[version1];
|
||||
StackHead *head2 = &self->heads.contents[version2];
|
||||
return
|
||||
head1->node->state == head2->node->state &&
|
||||
head1->node->position.chars == head2->node->position.chars &&
|
||||
ts_tree_external_token_state_eq(head1->last_external_token, head2->last_external_token) &&
|
||||
((head1->node->error_count == 0 && head2->node->error_count == 0) ||
|
||||
(head1->depth == head2->depth));
|
||||
}
|
||||
|
||||
void ts_stack_force_merge(Stack *self, StackVersion version1, StackVersion version2) {
|
||||
StackHead *head1 = &self->heads.contents[version1];
|
||||
StackHead *head2 = &self->heads.contents[version2];
|
||||
for (uint32_t i = 0; i < head2->node->link_count; i++) {
|
||||
stack_node_add_link(head1->node, head2->node->links[i]);
|
||||
}
|
||||
if (head2->push_count > head1->push_count) {
|
||||
head1->push_count = head2->push_count;
|
||||
}
|
||||
ts_stack_remove_version(self, version2);
|
||||
}
|
||||
|
||||
void ts_stack_halt(Stack *self, StackVersion version) {
|
||||
array_get(&self->heads, version)->is_halted = true;
|
||||
}
|
||||
|
|
@ -528,10 +575,11 @@ void ts_stack_clear(Stack *self) {
|
|||
}
|
||||
array_clear(&self->heads);
|
||||
array_push(&self->heads, ((StackHead){
|
||||
self->base_node,
|
||||
false,
|
||||
0,
|
||||
NULL
|
||||
.node = self->base_node,
|
||||
.last_external_token = NULL,
|
||||
.depth = 0,
|
||||
.push_count = 0,
|
||||
.is_halted = false,
|
||||
}));
|
||||
}
|
||||
|
||||
|
|
@ -548,15 +596,14 @@ bool ts_stack_print_dot_graph(Stack *self, const char **symbol_names, FILE *f) {
|
|||
|
||||
array_clear(&self->iterators);
|
||||
for (uint32_t i = 0; i < self->heads.size; i++) {
|
||||
if (ts_stack_is_halted(self, i)) continue;
|
||||
StackHead *head = &self->heads.contents[i];
|
||||
if (head->is_halted)
|
||||
continue;
|
||||
fprintf(f, "node_head_%u [shape=none, label=\"\"]\n", i);
|
||||
fprintf(
|
||||
f,
|
||||
"node_head_%u -> node_%p [label=%u, fontcolor=blue, weight=10000, "
|
||||
"labeltooltip=\"push_count: %u",
|
||||
i, head->node, i, head->push_count);
|
||||
"labeltooltip=\"push_count: %u\ndepth: %u",
|
||||
i, head->node, i, head->push_count, head->depth);
|
||||
|
||||
if (head->last_external_token) {
|
||||
const TSExternalTokenState *s = &head->last_external_token->external_token_state;
|
||||
|
|
@ -614,7 +661,7 @@ bool ts_stack_print_dot_graph(Stack *self, const char **symbol_names, FILE *f) {
|
|||
fprintf(f, "fontcolor=gray ");
|
||||
|
||||
if (!link.tree) {
|
||||
fprintf(f, "color=red, tooltip=\"push_count: %u\"", link.push_count);
|
||||
fprintf(f, "color=red, tooltip=\"push_count: %u, depth: %u\"", link.push_count, link.depth);
|
||||
} else if (link.tree->symbol == ts_builtin_sym_error) {
|
||||
fprintf(f, "label=\"ERROR\"");
|
||||
} else {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue