Rework logic for when to abandon parses with errors

This commit is contained in:
Max Brunsfeld 2016-05-29 22:36:47 -07:00
parent 6535704870
commit ea47fdc0fe
5 changed files with 206 additions and 134 deletions

View file

@ -156,6 +156,9 @@ describe("The Corpus", []() {
string description = "\"" + inserted_text + "\" at " + to_string(edit_position);
it_handles_edit_sequence("repairing an insertion of " + description, [&]() {
// ts_document_set_debugger(document, log_debugger_make(true));
// ts_document_print_debugging_graphs(document, true);
ts_document_edit(document, input->replace(edit_position, 0, inserted_text));
ts_document_parse(document);

View file

@ -583,7 +583,7 @@ static RepairResult ts_parser__repair_error(TSParser *self, StackSlice slice,
TSSymbol symbol = repair.symbol;
StackSlice new_slice = array_pop(&pop.slices);
TreeArray children_below = new_slice.trees;
TreeArray children = new_slice.trees;
ts_stack_renumber_version(self->stack, new_slice.version, slice.version);
for (size_t i = pop.slices.size - 1; i + 1 > 0; i--) {
@ -593,38 +593,29 @@ static RepairResult ts_parser__repair_error(TSParser *self, StackSlice slice,
ts_stack_remove_version(self->stack, other_slice.version);
}
LOG_ACTION("repair_found sym:%s, child_count:%lu, skipped:%lu",
SYM_NAME(symbol), repair.count + count_above_error, skip_count);
TreeArray skipped_children = array_new();
CHECK(array_grow(&skipped_children, skip_count));
for (size_t i = repair.count; i < children.size; i++)
array_push(&skipped_children, children.contents[i]);
if (skip_count > 0) {
TreeArray skipped_children = array_new();
CHECK(array_grow(&skipped_children, skip_count));
for (size_t i = repair.count; i < children_below.size; i++)
array_push(&skipped_children, children_below.contents[i]);
TSTree *error = ts_tree_make_error_node(&skipped_children);
CHECK(error);
children_below.size = repair.count;
array_push(&children_below, error);
}
TSTree *error = ts_tree_make_error_node(&skipped_children);
CHECK(error);
children.size = repair.count;
array_push(&children, error);
for (size_t i = 0; i < slice.trees.size; i++)
array_push(&children_below, slice.trees.contents[i]);
array_push(&children, slice.trees.contents[i]);
array_delete(&slice.trees);
TSTree *parent =
ts_tree_make_node(symbol, children_below.size, children_below.contents,
ts_tree_make_node(symbol, children.size, children.contents,
ts_language_symbol_metadata(self->language, symbol));
CHECK(parent);
CHECK(ts_parser__push(self, slice.version, parent, next_state));
for (StackVersion i = 0, n = ts_stack_version_count(self->stack); i < n; i++) {
size_t error_length = ts_stack_error_length(self->stack, i);
if ((error_length >= parent->error_size) ||
(error_length == 0 &&
ts_stack_last_repaired_error_size(self->stack, i) > parent->error_size))
ts_stack_halt(self->stack, i);
}
LOG_ACTION("repair_found sym:%s, child_count:%lu, skipped:%lu",
SYM_NAME(symbol), repair.count + count_above_error,
parent->error_size);
return RepairSucceeded;
@ -692,41 +683,64 @@ error:
static bool ts_parser__handle_error(TSParser *self, StackVersion version,
TSStateId state, TSTree *lookahead) {
size_t previous_version_count = ts_stack_version_count(self->stack);
bool has_shift_action = false;
array_clear(&self->reduce_actions);
for (TSSymbol symbol = 0; symbol < self->language->symbol_count; symbol++) {
size_t action_count;
const TSParseAction *actions =
ts_language_actions(self->language, state, symbol, &action_count);
for (size_t i = 0; i < action_count; i++) {
TSParseAction action = actions[i];
if (action.type == TSParseActionTypeReduce && !action.extra)
CHECK(ts_reduce_action_set_add(
&self->reduce_actions,
(ReduceAction){
.symbol = action.data.symbol, .count = action.data.child_count,
}));
if (action.extra)
continue;
switch (action.type) {
case TSParseActionTypeShift:
case TSParseActionTypeRecover:
has_shift_action = true;
break;
case TSParseActionTypeReduce:
if (action.data.child_count > 0)
CHECK(ts_reduce_action_set_add(
&self->reduce_actions,
(ReduceAction){
.symbol = action.data.symbol, .count = action.data.child_count,
}));
default:
break;
}
}
}
StackVersion scratch_version = ts_stack_split(self->stack, version);
CHECK(scratch_version != STACK_VERSION_NONE);
CHECK(ts_stack_push(self->stack, version, NULL, false, ts_parse_state_error));
size_t previous_version_count = ts_stack_version_count(self->stack);
bool did_reduce = false;
for (size_t i = 0; i < self->reduce_actions.size; i++) {
ReduceAction action = self->reduce_actions.contents[i];
Reduction reduction = ts_parser__reduce(self, scratch_version, action.symbol,
Reduction reduction = ts_parser__reduce(self, version, action.symbol,
action.count, false, true);
CHECK(reduction.status != ReduceFailed);
assert(reduction.status == ReduceSucceeded);
while (ts_stack_version_count(self->stack) > previous_version_count) {
CHECK(ts_stack_push(self->stack, previous_version_count, NULL, false,
ts_parse_state_error));
assert(ts_stack_merge(self->stack, version, previous_version_count));
switch (reduction.status) {
case ReduceFailed:
goto error;
case ReduceStoppedAtError:
ts_tree_array_delete(&reduction.slice.trees);
ts_stack_remove_version(self->stack, reduction.slice.version);
continue;
default:
did_reduce = true;
break;
}
}
ts_stack_remove_version(self->stack, scratch_version);
if (did_reduce && !has_shift_action)
ts_stack_renumber_version(self->stack, previous_version_count, version);
CHECK(ts_stack_push(self->stack, version, NULL, false, ts_parse_state_error));
while (ts_stack_version_count(self->stack) > previous_version_count) {
CHECK(ts_stack_push(self->stack, previous_version_count, NULL, false,
ts_parse_state_error));
assert(ts_stack_merge(self->stack, version, previous_version_count));
}
return true;
@ -736,27 +750,13 @@ error:
static bool ts_parser__recover(TSParser *self, StackVersion version,
TSStateId state, TSTree *lookahead) {
size_t error_length = ts_stack_error_length(self->stack, version);
LOG_ACTION("recover state:%u", state);
bool has_repaired = false;
for (StackVersion i = 0; i < ts_stack_version_count(self->stack); i++)
if (i != version && ts_stack_error_length(self->stack, i) == 0 &&
ts_stack_last_repaired_error_size(self->stack, i) <= error_length) {
has_repaired = true;
break;
}
if (has_repaired) {
LOG_ACTION("final_recover state:%u, error_length:%lu ", state, error_length);
} else {
StackVersion new_version = ts_stack_duplicate_version(self->stack, version);
CHECK(new_version != STACK_VERSION_NONE);
CHECK(ts_parser__shift(
self, new_version, ts_parse_state_error, lookahead,
ts_language_symbol_metadata(self->language, lookahead->symbol).extra));
LOG_ACTION("recover_and_discard state:%u, error_length:%lu", state,
error_length);
}
StackVersion new_version = ts_stack_duplicate_version(self->stack, version);
CHECK(new_version != STACK_VERSION_NONE);
CHECK(ts_parser__shift(
self, new_version, ts_parse_state_error, lookahead,
ts_language_symbol_metadata(self->language, lookahead->symbol).extra));
CHECK(ts_parser__shift(self, version, state, lookahead, false));
return true;
@ -812,15 +812,10 @@ static ParseActionResult ts_parser__consume_lookahead(TSParser *self,
break;
}
if (ts_stack_version_count(self->stack) == 1 && !self->finished_tree) {
LOG_ACTION("handle_error");
CHECK(ts_parser__handle_error(self, version, state, lookahead));
break;
} else {
LOG_ACTION("bail version:%d", version);
ts_stack_remove_version(self->stack, version);
return ParseActionRemoved;
}
LOG_ACTION("handle_error");
CHECK(ts_parser__handle_error(self, version, state, lookahead));
error_repair_failed = false;
break;
}
case TSParseActionTypeShift: {
@ -867,7 +862,9 @@ static ParseActionResult ts_parser__consume_lookahead(TSParser *self,
case RepairFailed:
goto error;
case RepairNoneFound:
error_repair_failed = true;
if (last_reduction_version == STACK_VERSION_NONE) {
error_repair_failed = true;
}
break;
case RepairSucceeded:
last_reduction_version = reduction.slice.version;
@ -949,10 +946,9 @@ void ts_parser_set_debugger(TSParser *self, TSDebugger debugger) {
TSTree *ts_parser_parse(TSParser *self, TSInput input, TSTree *previous_tree) {
ts_parser__start(self, input, previous_tree);
size_t max_position = 0;
ReusableNode current_reusable_node, next_reusable_node = { previous_tree, 0 };
ReusableNode reusable_node, current_reusable_node = { previous_tree, 0 };
for (;;) {
current_reusable_node = next_reusable_node;
TSTree *lookahead = NULL;
size_t last_position, position = 0;
@ -960,19 +956,13 @@ TSTree *ts_parser_parse(TSParser *self, TSInput input, TSTree *previous_tree) {
for (StackVersion version = 0;
version < ts_stack_version_count(self->stack);) {
if (ts_stack_is_halted(self->stack, version)) {
version++;
continue;
}
ReusableNode reusable_node = current_reusable_node;
reusable_node = current_reusable_node;
for (bool removed = false; !removed;) {
last_position = position;
size_t new_position = ts_stack_top_position(self->stack, version).chars;
if (new_position > max_position) {
max_position = new_position;
next_reusable_node = reusable_node;
version++;
break;
} else if (new_position == max_position && version > 0) {
@ -1010,7 +1000,13 @@ TSTree *ts_parser_parse(TSParser *self, TSInput input, TSTree *previous_tree) {
}
}
ts_stack_merge_all(self->stack);
current_reusable_node = reusable_node;
if (ts_stack_condense(self->stack)) {
LOG_ACTION("condense");
LOG_STACK();
}
ts_tree_release(lookahead);
if (ts_stack_version_count(self->stack) == 0) {

View file

@ -6,6 +6,7 @@
#include "runtime/length.h"
#include <assert.h>
#include <stdio.h>
#include <limits.h>
#define MAX_LINK_COUNT 8
#define MAX_NODE_POOL_SIZE 50
@ -26,7 +27,9 @@ struct StackNode {
StackLink links[MAX_LINK_COUNT];
short unsigned int link_count;
short unsigned int ref_count;
size_t error_length;
unsigned min_error_cost;
unsigned max_error_cost;
unsigned error_depth;
};
typedef struct {
@ -90,22 +93,35 @@ static StackNode *stack_node_new(StackNode *next, TSTree *tree, bool is_pending,
else if (!(node = ts_malloc(sizeof(StackNode))))
return NULL;
bool is_error = (state == ts_parse_state_error);
*node = (StackNode){
.ref_count = 1,
.link_count = 0,
.links = {},
.state = state,
.position = position,
.error_length = (state == ts_parse_state_error) ? 1 : 0,
.error_depth = 0,
.min_error_cost = is_error ? 1 : 0,
.max_error_cost = is_error ? 1 : 0,
};
if (next) {
if (tree)
ts_tree_retain(tree);
stack_node_retain(next);
node->link_count = 1;
node->links[0] = (StackLink){ next, tree, is_pending };
node->error_length += next->error_length;
node->link_count = 1;
node->min_error_cost += next->min_error_cost;
node->max_error_cost += next->max_error_cost;
node->error_depth = next->error_depth;
if (tree) {
ts_tree_retain(tree);
node->min_error_cost += tree->error_size;
node->max_error_cost += tree->error_size;
} else {
node->error_depth++;
}
}
return node;
@ -127,8 +143,16 @@ static void stack_node_add_link(StackNode *self, StackLink link) {
if (self->link_count < MAX_LINK_COUNT) {
stack_node_retain(link.node);
if (link.tree)
if (link.tree) {
ts_tree_retain(link.tree);
size_t min_error_cost = link.tree->error_size + link.node->min_error_cost;
size_t max_error_cost = link.tree->error_size + link.node->max_error_cost;
if (min_error_cost < self->min_error_cost)
self->min_error_cost = min_error_cost;
if (max_error_cost < self->max_error_cost)
self->max_error_cost = max_error_cost;
}
self->links[self->link_count++] = (StackLink){
link.node, link.tree, link.is_pending,
};
@ -257,7 +281,7 @@ Stack *ts_stack_new() {
if (!array_grow(&self->pop_paths, 4))
goto error;
if (!array_grow(&self->node_pool, 20))
if (!array_grow(&self->node_pool, MAX_NODE_POOL_SIZE))
goto error;
self->base_node =
@ -315,10 +339,6 @@ TSLength ts_stack_top_position(const Stack *self, StackVersion version) {
return array_get(&self->heads, version)->node->position;
}
size_t ts_stack_error_length(const Stack *self, StackVersion version) {
return array_get(&self->heads, version)->node->error_length;
}
size_t ts_stack_last_repaired_error_size(const Stack *self,
StackVersion version) {
StackNode *node = array_get(&self->heads, version)->node;
@ -333,14 +353,6 @@ size_t ts_stack_last_repaired_error_size(const Stack *self,
return 0;
}
void ts_stack_halt(Stack *self, StackVersion version) {
array_get(&self->heads, version)->is_halted = true;
}
bool ts_stack_is_halted(const Stack *self, StackVersion version) {
return array_get(&self->heads, version)->is_halted;
}
bool ts_stack_push(Stack *self, StackVersion version, TSTree *tree,
bool is_pending, TSStateId state) {
StackNode *node = array_get(&self->heads, version)->node;
@ -394,7 +406,8 @@ StackPopResult ts_stack_pop_count(Stack *self, StackVersion version,
ts_stack_remove_version(self, error_slice.version);
ts_tree_array_delete(&error_slice.trees);
array_erase(&pop.slices, 0);
pop.slices.contents[0].version--;
for (StackVersion i = 0; i < pop.slices.size; i++)
pop.slices.contents[i].version--;
} else {
pop.status = StackPopStoppedAtError;
}
@ -470,7 +483,8 @@ bool ts_stack_merge(Stack *self, StackVersion version, StackVersion new_version)
StackNode *new_node = self->heads.contents[new_version].node;
if (new_node->state == node->state &&
new_node->position.chars == node->position.chars) {
new_node->position.chars == node->position.chars &&
new_node->error_depth == node->error_depth) {
for (size_t j = 0; j < new_node->link_count; j++)
stack_node_add_link(node, new_node->links[j]);
ts_stack_remove_version(self, new_version);
@ -482,26 +496,8 @@ bool ts_stack_merge(Stack *self, StackVersion version, StackVersion new_version)
void ts_stack_merge_from(Stack *self, StackVersion start_version) {
for (size_t i = start_version; i < self->heads.size; i++) {
if (self->heads.contents[i].is_halted) {
ts_stack_remove_version(self, i);
i--;
continue;
}
StackNode *node = self->heads.contents[i].node;
for (size_t j = start_version; j < i; j++) {
StackNode *prior_node = self->heads.contents[j].node;
if (prior_node->state == node->state &&
prior_node->position.chars == node->position.chars) {
if (prior_node->error_length < node->error_length) {
ts_stack_remove_version(self, i);
} else if (node->error_length < prior_node->error_length) {
ts_stack_remove_version(self, j);
} else {
for (size_t k = 0; k < node->link_count; k++)
stack_node_add_link(prior_node, node->links[k]);
ts_stack_remove_version(self, i);
}
if (ts_stack_merge(self, j, i)) {
i--;
break;
}
@ -513,6 +509,84 @@ void ts_stack_merge_all(Stack *self) {
ts_stack_merge_from(self, 0);
}
void stack_node_remove_link(StackNode *self, size_t i,
StackNodeArray *node_pool) {
self->link_count--;
ts_tree_release(self->links[i].tree);
stack_node_release(self->links[i].node, node_pool);
memmove(&self->links[i], &self->links[i + 1],
(self->link_count - i) * sizeof(StackLink));
}
void stack_node_prune_paths_with_error_cost(StackNode *self, size_t cost,
StackNodeArray *node_pool) {
for (size_t i = 0; i < self->link_count; i++) {
StackLink link = self->links[i];
size_t link_cost = cost;
if (link.tree)
link_cost -= link.tree->error_size;
if (link.node->min_error_cost >= link_cost) {
stack_node_remove_link(self, i, node_pool);
i--;
} else if (link.node->max_error_cost >= link_cost) {
stack_node_prune_paths_with_error_cost(link.node, link_cost, node_pool);
}
}
}
bool ts_stack_condense(Stack *self) {
bool did_condense = false;
unsigned min_error_cost = UINT_MAX;
unsigned min_error_depth = UINT_MAX;
for (size_t i = 0; i < self->heads.size; i++) {
StackNode *node = self->heads.contents[i].node;
bool did_remove = false;
for (size_t j = 0; j < i; j++) {
if (ts_stack_merge(self, j, i)) {
did_condense = true;
did_remove = true;
break;
}
}
if (did_remove) {
i--;
continue;
}
if (node->error_depth < min_error_depth ||
(node->error_depth == min_error_depth &&
node->min_error_cost < min_error_cost)) {
min_error_depth = node->error_depth;
min_error_cost = node->min_error_cost;
}
}
for (size_t i = 0; i < self->heads.size; i++) {
StackNode *node = self->heads.contents[i].node;
if (node->error_depth > min_error_depth + 1) {
did_condense = true;
ts_stack_remove_version(self, i);
i--;
continue;
} else if (node->error_depth == min_error_depth + 1) {
if (node->min_error_cost >= min_error_cost) {
did_condense = true;
ts_stack_remove_version(self, i);
i--;
continue;
} else if (node->max_error_cost >= min_error_cost) {
did_condense = true;
stack_node_prune_paths_with_error_cost(node, min_error_cost,
&self->node_pool);
}
}
}
return did_condense;
}
void ts_stack_clear(Stack *self) {
stack_node_retain(self->base_node);
for (size_t i = 0; i < self->heads.size; i++)
@ -571,8 +645,13 @@ bool ts_stack_print_dot_graph(Stack *self, const char **symbol_names, FILE *f) {
node->links[0].tree->extra)
fprintf(f, "shape=point margin=0 label=\"\"");
else
fprintf(f, "label=%d", node->state);
fprintf(f, "];\n");
fprintf(f, "label=\"%d\"", node->state);
fprintf(f, " tooltip=\"error-count:%u, error-cost:", node->error_depth);
if (node->min_error_cost == node->max_error_cost)
fprintf(f, "%u", node->min_error_cost);
else
fprintf(f, "%u-%u", node->min_error_cost, node->max_error_cost);
fprintf(f, "\"];\n");
for (int j = 0; j < node->link_count; j++) {
StackLink link = node->links[j];

View file

@ -73,14 +73,6 @@ TSStateId ts_stack_top_state(const Stack *, StackVersion);
*/
TSLength ts_stack_top_position(const Stack *, StackVersion);
size_t ts_stack_error_length(const Stack *, StackVersion);
size_t ts_stack_last_repaired_error_size(const Stack *, StackVersion);
void ts_stack_halt(Stack *, StackVersion);
bool ts_stack_is_halted(const Stack *, StackVersion);
/*
* Push a tree and state onto the given head of the stack. This could cause
* the version to merge with an existing version.
@ -111,6 +103,8 @@ void ts_stack_merge_from(Stack *, StackVersion);
void ts_stack_merge_all(Stack *);
bool ts_stack_condense(Stack *);
void ts_stack_renumber_version(Stack *, StackVersion, StackVersion);
StackVersion ts_stack_duplicate_version(Stack *, StackVersion);

View file

@ -92,7 +92,7 @@ recur:
offset = ts_length_zero();
for (size_t i = 0; i < self->child_count; i++) {
TSTree *child = self->children[i];
if (child->context.parent != self) {
if (child->context.parent != self || child->context.index != i) {
child->context.parent = self;
child->context.index = i;
child->context.offset = offset;