Abort erroneous parse versions more eagerly

This commit is contained in:
Max Brunsfeld 2016-06-02 14:04:48 -07:00
parent 9b67b21dcd
commit 00a0939504
5 changed files with 125 additions and 130 deletions

View file

@ -156,9 +156,6 @@ describe("The Corpus", []() {
string description = "\"" + inserted_text + "\" at " + to_string(edit_position);
it_handles_edit_sequence("repairing an insertion of " + description, [&]() {
// ts_document_set_debugger(document, log_debugger_make(true));
// ts_document_print_debugging_graphs(document, true);
ts_document_edit(document, input->replace(edit_position, 0, inserted_text));
ts_document_parse(document);

View file

@ -121,13 +121,13 @@ describe("Stack", [&]() {
});
});
describe("merge_all()", [&]() {
describe("merge()", [&]() {
before_each([&]() {
// . <──0── A <──1── B*
// ↑
// └───2─── C*
ts_stack_push(stack, 0, trees[0], false, stateA);
ts_stack_split(stack, 0);
ts_stack_duplicate_version(stack, 0);
ts_stack_push(stack, 0, trees[1], false, stateB);
ts_stack_push(stack, 1, trees[2], false, stateC);
});
@ -142,7 +142,7 @@ describe("Stack", [&]() {
// . <──0── A <──1── B <──3── D*
// ↑ |
// └───2─── C <──4───┘
ts_stack_merge_all(stack);
AssertThat(ts_stack_merge(stack, 0, 1), IsTrue());
AssertThat(ts_stack_version_count(stack), Equals<size_t>(1));
AssertThat(get_stack_entries(stack, 0), Equals(vector<StackEntry>({
{stateD, 0},
@ -154,7 +154,7 @@ describe("Stack", [&]() {
});
it("does not combine versions that have different states", [&]() {
ts_stack_merge_all(stack);
AssertThat(ts_stack_merge(stack, 0, 1), IsFalse());
AssertThat(ts_stack_version_count(stack), Equals<size_t>(2));
});
@ -166,7 +166,7 @@ describe("Stack", [&]() {
ts_stack_push(stack, 0, trees[3], false, stateD);
ts_stack_push(stack, 1, trees[4], false, stateD);
ts_stack_merge_all(stack);
AssertThat(ts_stack_merge(stack, 0, 1), IsFalse());
AssertThat(ts_stack_version_count(stack), Equals<size_t>(2));
});
@ -183,7 +183,7 @@ describe("Stack", [&]() {
// . <──0── A <──1── B <──3── D <──5── E*
// ↑ |
// └───2─── C <──4───┘
ts_stack_merge_all(stack);
AssertThat(ts_stack_merge(stack, 0, 1), IsTrue());
AssertThat(ts_stack_version_count(stack), Equals<size_t>(1));
AssertThat(get_stack_entries(stack, 0), Equals(vector<StackEntry>({
{stateE, 0},
@ -272,7 +272,7 @@ describe("Stack", [&]() {
ts_stack_push(stack, 1, trees[4], false, stateE);
ts_stack_push(stack, 1, trees[5], false, stateF);
ts_stack_push(stack, 1, trees[6], false, stateD);
ts_stack_merge_all(stack);
ts_stack_merge(stack, 0, 1);
ts_stack_push(stack, 0, trees[10], false, stateI);
AssertThat(ts_stack_version_count(stack), Equals<size_t>(1));
@ -393,7 +393,7 @@ describe("Stack", [&]() {
ts_stack_push(stack, 1, trees[8], false, stateH);
ts_stack_push(stack, 1, trees[9], false, stateD);
ts_stack_push(stack, 1, trees[10], false, stateI);
ts_stack_merge_all(stack);
ts_stack_merge(stack, 0, 1);
AssertThat(ts_stack_version_count(stack), Equals<size_t>(1));
AssertThat(get_stack_entries(stack, 0), Equals(vector<StackEntry>({

View file

@ -1,6 +1,7 @@
#include "runtime/parser.h"
#include <assert.h>
#include <stdio.h>
#include <limits.h>
#include <stdbool.h>
#include "tree_sitter/runtime.h"
#include "tree_sitter/parser.h"
@ -43,6 +44,8 @@
goto error; \
}
static const unsigned ERROR_COST_THRESHOLD = 5;
static const TSParseAction ERROR_ACTION = {.type = TSParseActionTypeError };
static const size_t NO_ERROR_DEPTH = (size_t)(-1);
@ -62,12 +65,6 @@ typedef struct {
size_t best_repair_skip_count;
} ErrorRepairSession;
typedef enum {
ParseActionFailed,
ParseActionUpdated,
ParseActionRemoved,
} ParseActionResult;
typedef struct {
enum {
ReduceFailed,
@ -122,7 +119,8 @@ static BreakdownResult ts_parser__breakdown_top_of_stack(TSParser *self,
} else if (!last_child->extra) {
TSParseAction action =
ts_language_last_action(self->language, state, last_child->symbol);
assert(action.type == TSParseActionTypeShift || action.type == TSParseActionTypeRecover);
assert(action.type == TSParseActionTypeShift ||
action.type == TSParseActionTypeRecover);
state = action.data.to_state;
}
@ -180,6 +178,33 @@ static void ts_parser__breakdown_reusable_node(ReusableNode *reusable_node) {
} while (ts_tree_is_fragile(reusable_node->tree));
}
static bool ts_parser__condense_stack(TSParser *self) {
bool result = false;
for (StackVersion i = 0; i < ts_stack_version_count(self->stack); i++) {
if (ts_stack_is_halted(self->stack, i)) {
ts_stack_remove_version(self->stack, i);
i--;
continue;
}
bool did_merge = false;
for (size_t j = 0; j < i; j++) {
if (ts_stack_merge(self->stack, j, i)) {
did_merge = true;
break;
}
}
if (did_merge) {
result = true;
i--;
continue;
}
}
return result;
}
static bool ts_parser__can_reuse(TSParser *self, StackVersion version,
TSTree *tree) {
if (tree->symbol == ts_builtin_sym_error)
@ -441,7 +466,15 @@ static Reduction ts_parser__reduce(TSParser *self, StackVersion version,
}
}
ts_stack_merge_from(self->stack, initial_version_count);
for (StackVersion i = initial_version_count;
i < ts_stack_version_count(self->stack); i++) {
for (StackVersion j = initial_version_count; j < i; j++) {
if (ts_stack_merge(self->stack, j, i)) {
i--;
break;
}
}
}
return (Reduction){ ReduceSucceeded, pop.slices.contents[0] };
@ -617,6 +650,20 @@ static RepairResult ts_parser__repair_error(TSParser *self, StackSlice slice,
SYM_NAME(symbol), repair.count + count_above_error,
parent->error_size);
unsigned my_error_cost = ts_stack_error_cost(self->stack, slice.version);
unsigned my_error_depth = ts_stack_error_depth(self->stack, slice.version);
for (StackVersion i = 0; i < ts_stack_version_count(self->stack); i++) {
if (i != slice.version) {
unsigned error_cost = ts_stack_error_cost(self->stack, i);
unsigned error_depth = ts_stack_error_depth(self->stack, i);
if (error_depth > my_error_depth + 1 ||
(error_depth == my_error_depth + 1 && error_cost >= my_error_cost)) {
LOG_ACTION("halt_other version:%u", i);
ts_stack_halt(self->stack, i);
}
}
}
return RepairSucceeded;
error:
@ -673,7 +720,7 @@ static bool ts_parser__accept(TSParser *self, StackVersion version) {
}
ts_stack_remove_version(self->stack, pop.slices.contents[0].version);
ts_stack_remove_version(self->stack, version);
ts_stack_halt(self->stack, version);
return true;
@ -750,6 +797,30 @@ error:
static bool ts_parser__recover(TSParser *self, StackVersion version,
TSStateId state, TSTree *lookahead) {
if (lookahead->symbol == ts_builtin_sym_end) {
LOG_ACTION("recover_eof");
TreeArray children = array_new();
TSTree *parent = ts_tree_make_error_node(&children);
return ts_parser__push(self, version, parent, 1);
}
unsigned my_error_cost = ts_stack_error_cost(self->stack, version);
unsigned my_error_depth = ts_stack_error_depth(self->stack, version);
for (StackVersion i = 0; i < ts_stack_version_count(self->stack); i++) {
if (i != version) {
unsigned error_cost = ts_stack_error_cost(self->stack, i);
unsigned error_depth = ts_stack_error_depth(self->stack, i);
if (error_depth < my_error_depth - 1 ||
(error_depth == my_error_depth - 1 && error_cost <= my_error_cost) ||
(error_depth == my_error_depth &&
error_cost + ERROR_COST_THRESHOLD <= my_error_cost)) {
ts_stack_halt(self->stack, version);
LOG_ACTION("bail_on_error");
return true;
}
}
}
LOG_ACTION("recover state:%u", state);
StackVersion new_version = ts_stack_duplicate_version(self->stack, version);
@ -765,15 +836,8 @@ error:
return false;
}
static bool ts_parser__recover_eof(TSParser *self, StackVersion version) {
TreeArray children = array_new();
TSTree *parent = ts_tree_make_error_node(&children);
return ts_parser__push(self, version, parent, 1);
}
static ParseActionResult ts_parser__consume_lookahead(TSParser *self,
StackVersion version,
TSTree *lookahead) {
static bool ts_parser__consume_lookahead(TSParser *self, StackVersion version,
TSTree *lookahead) {
for (;;) {
TSStateId state = ts_stack_top_state(self->stack, version);
@ -830,7 +894,7 @@ static ParseActionResult ts_parser__consume_lookahead(TSParser *self,
CHECK(ts_parser__shift(self, version, next_state, lookahead,
action.extra));
return ParseActionUpdated;
return true;
}
case TSParseActionTypeReduce: {
@ -879,18 +943,13 @@ static ParseActionResult ts_parser__consume_lookahead(TSParser *self,
case TSParseActionTypeAccept: {
LOG_ACTION("accept");
CHECK(ts_parser__accept(self, version));
return ParseActionRemoved;
return true;
}
case TSParseActionTypeRecover: {
if (lookahead->symbol == ts_builtin_sym_end) {
LOG_ACTION("recover_eof");
CHECK(ts_parser__recover_eof(self, version));
} else {
CHECK(ts_parser__recover(self, version, action.data.to_state,
lookahead));
}
return ParseActionUpdated;
CHECK(ts_parser__recover(self, version, action.data.to_state,
lookahead));
return true;
}
}
}
@ -900,7 +959,7 @@ static ParseActionResult ts_parser__consume_lookahead(TSParser *self,
}
error:
return ParseActionFailed;
return false;
}
bool ts_parser_init(TSParser *self) {
@ -958,7 +1017,12 @@ TSTree *ts_parser_parse(TSParser *self, TSInput input, TSTree *previous_tree) {
version < ts_stack_version_count(self->stack);) {
reusable_node = current_reusable_node;
for (bool removed = false; !removed;) {
for (;;) {
if (ts_stack_is_halted(self->stack, version)) {
version++;
break;
}
last_position = position;
size_t new_position = ts_stack_top_position(self->stack, version).chars;
if (new_position > max_position) {
@ -987,22 +1051,16 @@ TSTree *ts_parser_parse(TSParser *self, TSInput input, TSTree *previous_tree) {
LOG_ACTION("lookahead sym:%s, size:%lu", SYM_NAME(lookahead->symbol),
ts_tree_total_chars(lookahead));
switch (ts_parser__consume_lookahead(self, version, lookahead)) {
case ParseActionFailed:
ts_tree_release(lookahead);
goto error;
case ParseActionRemoved:
removed = true;
break;
case ParseActionUpdated:
break;
if (!ts_parser__consume_lookahead(self, version, lookahead)) {
ts_tree_release(lookahead);
goto error;
}
}
}
current_reusable_node = reusable_node;
if (ts_stack_condense(self->stack)) {
if (ts_parser__condense_stack(self)) {
LOG_ACTION("condense");
LOG_STACK();
}

View file

@ -6,7 +6,6 @@
#include "runtime/length.h"
#include <assert.h>
#include <stdio.h>
#include <limits.h>
#define MAX_LINK_COUNT 8
#define MAX_NODE_POOL_SIZE 50
@ -339,6 +338,14 @@ TSLength ts_stack_top_position(const Stack *self, StackVersion version) {
return array_get(&self->heads, version)->node->position;
}
unsigned ts_stack_error_cost(const Stack *self, StackVersion version) {
return array_get(&self->heads, version)->node->min_error_cost;
}
unsigned ts_stack_error_depth(const Stack *self, StackVersion version) {
return array_get(&self->heads, version)->node->error_depth;
}
size_t ts_stack_last_repaired_error_size(const Stack *self,
StackVersion version) {
StackNode *node = array_get(&self->heads, version)->node;
@ -364,7 +371,7 @@ bool ts_stack_push(Stack *self, StackVersion version, TSTree *tree,
if (!new_node)
return false;
stack_node_release(node, &self->node_pool);
self->heads.contents[version] = (StackHead){ new_node, false };
self->heads.contents[version].node = new_node;
return true;
}
@ -473,13 +480,6 @@ StackVersion ts_stack_duplicate_version(Stack *self, StackVersion version) {
return self->heads.size - 1;
}
StackVersion ts_stack_split(Stack *self, StackVersion version) {
if (!array_push(&self->heads, self->heads.contents[version]))
return STACK_VERSION_NONE;
stack_node_retain(self->heads.contents[version].node);
return self->heads.size - 1;
}
bool ts_stack_merge(Stack *self, StackVersion version, StackVersion new_version) {
StackNode *node = self->heads.contents[version].node;
StackNode *new_node = self->heads.contents[new_version].node;
@ -496,21 +496,6 @@ bool ts_stack_merge(Stack *self, StackVersion version, StackVersion new_version)
}
}
void ts_stack_merge_from(Stack *self, StackVersion start_version) {
for (size_t i = start_version; i < self->heads.size; i++) {
for (size_t j = start_version; j < i; j++) {
if (ts_stack_merge(self, j, i)) {
i--;
break;
}
}
}
}
void ts_stack_merge_all(Stack *self) {
ts_stack_merge_from(self, 0);
}
void stack_node_remove_link(StackNode *self, size_t i,
StackNodeArray *node_pool) {
self->link_count--;
@ -536,57 +521,12 @@ void stack_node_prune_paths_with_error_cost(StackNode *self, size_t cost,
}
}
bool ts_stack_condense(Stack *self) {
bool did_condense = false;
unsigned min_error_cost = UINT_MAX;
unsigned min_error_depth = UINT_MAX;
for (size_t i = 0; i < self->heads.size; i++) {
StackNode *node = self->heads.contents[i].node;
void ts_stack_halt(Stack *self, StackVersion version) {
array_get(&self->heads, version)->is_halted = true;
}
bool did_remove = false;
for (size_t j = 0; j < i; j++) {
if (ts_stack_merge(self, j, i)) {
did_condense = true;
did_remove = true;
break;
}
}
if (did_remove) {
i--;
continue;
}
if (node->error_depth < min_error_depth ||
(node->error_depth == min_error_depth &&
node->min_error_cost < min_error_cost)) {
min_error_depth = node->error_depth;
min_error_cost = node->min_error_cost;
}
}
for (size_t i = 0; i < self->heads.size; i++) {
StackNode *node = self->heads.contents[i].node;
if (node->error_depth > min_error_depth + 1) {
did_condense = true;
ts_stack_remove_version(self, i);
i--;
continue;
} else if (node->error_depth == min_error_depth + 1) {
if (node->min_error_cost >= min_error_cost) {
did_condense = true;
ts_stack_remove_version(self, i);
i--;
continue;
} else if (node->max_error_cost >= min_error_cost) {
did_condense = true;
stack_node_prune_paths_with_error_cost(node, min_error_cost,
&self->node_pool);
}
}
}
return did_condense;
bool ts_stack_is_halted(Stack *self, StackVersion version) {
return array_get(&self->heads, version)->is_halted;
}
void ts_stack_clear(Stack *self) {

View file

@ -95,15 +95,15 @@ StackPopResult ts_stack_pop_pending(Stack *, StackVersion);
StackPopResult ts_stack_pop_all(Stack *, StackVersion);
StackVersion ts_stack_split(Stack *, StackVersion);
unsigned ts_stack_error_depth(const Stack *, StackVersion);
unsigned ts_stack_error_cost(const Stack *, StackVersion);
bool ts_stack_merge(Stack *, StackVersion, StackVersion);
void ts_stack_merge_from(Stack *, StackVersion);
void ts_stack_halt(Stack *, StackVersion);
void ts_stack_merge_all(Stack *);
bool ts_stack_condense(Stack *);
bool ts_stack_is_halted(Stack *, StackVersion);
void ts_stack_renumber_version(Stack *, StackVersion, StackVersion);