Abort erroneous parse versions more eagerly
This commit is contained in:
parent
9b67b21dcd
commit
00a0939504
5 changed files with 125 additions and 130 deletions
|
|
@ -156,9 +156,6 @@ describe("The Corpus", []() {
|
|||
string description = "\"" + inserted_text + "\" at " + to_string(edit_position);
|
||||
|
||||
it_handles_edit_sequence("repairing an insertion of " + description, [&]() {
|
||||
// ts_document_set_debugger(document, log_debugger_make(true));
|
||||
// ts_document_print_debugging_graphs(document, true);
|
||||
|
||||
ts_document_edit(document, input->replace(edit_position, 0, inserted_text));
|
||||
ts_document_parse(document);
|
||||
|
||||
|
|
|
|||
|
|
@ -121,13 +121,13 @@ describe("Stack", [&]() {
|
|||
});
|
||||
});
|
||||
|
||||
describe("merge_all()", [&]() {
|
||||
describe("merge()", [&]() {
|
||||
before_each([&]() {
|
||||
// . <──0── A <──1── B*
|
||||
// ↑
|
||||
// └───2─── C*
|
||||
ts_stack_push(stack, 0, trees[0], false, stateA);
|
||||
ts_stack_split(stack, 0);
|
||||
ts_stack_duplicate_version(stack, 0);
|
||||
ts_stack_push(stack, 0, trees[1], false, stateB);
|
||||
ts_stack_push(stack, 1, trees[2], false, stateC);
|
||||
});
|
||||
|
|
@ -142,7 +142,7 @@ describe("Stack", [&]() {
|
|||
// . <──0── A <──1── B <──3── D*
|
||||
// ↑ |
|
||||
// └───2─── C <──4───┘
|
||||
ts_stack_merge_all(stack);
|
||||
AssertThat(ts_stack_merge(stack, 0, 1), IsTrue());
|
||||
AssertThat(ts_stack_version_count(stack), Equals<size_t>(1));
|
||||
AssertThat(get_stack_entries(stack, 0), Equals(vector<StackEntry>({
|
||||
{stateD, 0},
|
||||
|
|
@ -154,7 +154,7 @@ describe("Stack", [&]() {
|
|||
});
|
||||
|
||||
it("does not combine versions that have different states", [&]() {
|
||||
ts_stack_merge_all(stack);
|
||||
AssertThat(ts_stack_merge(stack, 0, 1), IsFalse());
|
||||
AssertThat(ts_stack_version_count(stack), Equals<size_t>(2));
|
||||
});
|
||||
|
||||
|
|
@ -166,7 +166,7 @@ describe("Stack", [&]() {
|
|||
ts_stack_push(stack, 0, trees[3], false, stateD);
|
||||
ts_stack_push(stack, 1, trees[4], false, stateD);
|
||||
|
||||
ts_stack_merge_all(stack);
|
||||
AssertThat(ts_stack_merge(stack, 0, 1), IsFalse());
|
||||
AssertThat(ts_stack_version_count(stack), Equals<size_t>(2));
|
||||
});
|
||||
|
||||
|
|
@ -183,7 +183,7 @@ describe("Stack", [&]() {
|
|||
// . <──0── A <──1── B <──3── D <──5── E*
|
||||
// ↑ |
|
||||
// └───2─── C <──4───┘
|
||||
ts_stack_merge_all(stack);
|
||||
AssertThat(ts_stack_merge(stack, 0, 1), IsTrue());
|
||||
AssertThat(ts_stack_version_count(stack), Equals<size_t>(1));
|
||||
AssertThat(get_stack_entries(stack, 0), Equals(vector<StackEntry>({
|
||||
{stateE, 0},
|
||||
|
|
@ -272,7 +272,7 @@ describe("Stack", [&]() {
|
|||
ts_stack_push(stack, 1, trees[4], false, stateE);
|
||||
ts_stack_push(stack, 1, trees[5], false, stateF);
|
||||
ts_stack_push(stack, 1, trees[6], false, stateD);
|
||||
ts_stack_merge_all(stack);
|
||||
ts_stack_merge(stack, 0, 1);
|
||||
ts_stack_push(stack, 0, trees[10], false, stateI);
|
||||
|
||||
AssertThat(ts_stack_version_count(stack), Equals<size_t>(1));
|
||||
|
|
@ -393,7 +393,7 @@ describe("Stack", [&]() {
|
|||
ts_stack_push(stack, 1, trees[8], false, stateH);
|
||||
ts_stack_push(stack, 1, trees[9], false, stateD);
|
||||
ts_stack_push(stack, 1, trees[10], false, stateI);
|
||||
ts_stack_merge_all(stack);
|
||||
ts_stack_merge(stack, 0, 1);
|
||||
|
||||
AssertThat(ts_stack_version_count(stack), Equals<size_t>(1));
|
||||
AssertThat(get_stack_entries(stack, 0), Equals(vector<StackEntry>({
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
#include "runtime/parser.h"
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <limits.h>
|
||||
#include <stdbool.h>
|
||||
#include "tree_sitter/runtime.h"
|
||||
#include "tree_sitter/parser.h"
|
||||
|
|
@ -43,6 +44,8 @@
|
|||
goto error; \
|
||||
}
|
||||
|
||||
static const unsigned ERROR_COST_THRESHOLD = 5;
|
||||
|
||||
static const TSParseAction ERROR_ACTION = {.type = TSParseActionTypeError };
|
||||
|
||||
static const size_t NO_ERROR_DEPTH = (size_t)(-1);
|
||||
|
|
@ -62,12 +65,6 @@ typedef struct {
|
|||
size_t best_repair_skip_count;
|
||||
} ErrorRepairSession;
|
||||
|
||||
typedef enum {
|
||||
ParseActionFailed,
|
||||
ParseActionUpdated,
|
||||
ParseActionRemoved,
|
||||
} ParseActionResult;
|
||||
|
||||
typedef struct {
|
||||
enum {
|
||||
ReduceFailed,
|
||||
|
|
@ -122,7 +119,8 @@ static BreakdownResult ts_parser__breakdown_top_of_stack(TSParser *self,
|
|||
} else if (!last_child->extra) {
|
||||
TSParseAction action =
|
||||
ts_language_last_action(self->language, state, last_child->symbol);
|
||||
assert(action.type == TSParseActionTypeShift || action.type == TSParseActionTypeRecover);
|
||||
assert(action.type == TSParseActionTypeShift ||
|
||||
action.type == TSParseActionTypeRecover);
|
||||
state = action.data.to_state;
|
||||
}
|
||||
|
||||
|
|
@ -180,6 +178,33 @@ static void ts_parser__breakdown_reusable_node(ReusableNode *reusable_node) {
|
|||
} while (ts_tree_is_fragile(reusable_node->tree));
|
||||
}
|
||||
|
||||
static bool ts_parser__condense_stack(TSParser *self) {
|
||||
bool result = false;
|
||||
for (StackVersion i = 0; i < ts_stack_version_count(self->stack); i++) {
|
||||
if (ts_stack_is_halted(self->stack, i)) {
|
||||
ts_stack_remove_version(self->stack, i);
|
||||
i--;
|
||||
continue;
|
||||
}
|
||||
|
||||
bool did_merge = false;
|
||||
for (size_t j = 0; j < i; j++) {
|
||||
|
||||
if (ts_stack_merge(self->stack, j, i)) {
|
||||
did_merge = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (did_merge) {
|
||||
result = true;
|
||||
i--;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static bool ts_parser__can_reuse(TSParser *self, StackVersion version,
|
||||
TSTree *tree) {
|
||||
if (tree->symbol == ts_builtin_sym_error)
|
||||
|
|
@ -441,7 +466,15 @@ static Reduction ts_parser__reduce(TSParser *self, StackVersion version,
|
|||
}
|
||||
}
|
||||
|
||||
ts_stack_merge_from(self->stack, initial_version_count);
|
||||
for (StackVersion i = initial_version_count;
|
||||
i < ts_stack_version_count(self->stack); i++) {
|
||||
for (StackVersion j = initial_version_count; j < i; j++) {
|
||||
if (ts_stack_merge(self->stack, j, i)) {
|
||||
i--;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return (Reduction){ ReduceSucceeded, pop.slices.contents[0] };
|
||||
|
||||
|
|
@ -617,6 +650,20 @@ static RepairResult ts_parser__repair_error(TSParser *self, StackSlice slice,
|
|||
SYM_NAME(symbol), repair.count + count_above_error,
|
||||
parent->error_size);
|
||||
|
||||
unsigned my_error_cost = ts_stack_error_cost(self->stack, slice.version);
|
||||
unsigned my_error_depth = ts_stack_error_depth(self->stack, slice.version);
|
||||
for (StackVersion i = 0; i < ts_stack_version_count(self->stack); i++) {
|
||||
if (i != slice.version) {
|
||||
unsigned error_cost = ts_stack_error_cost(self->stack, i);
|
||||
unsigned error_depth = ts_stack_error_depth(self->stack, i);
|
||||
if (error_depth > my_error_depth + 1 ||
|
||||
(error_depth == my_error_depth + 1 && error_cost >= my_error_cost)) {
|
||||
LOG_ACTION("halt_other version:%u", i);
|
||||
ts_stack_halt(self->stack, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return RepairSucceeded;
|
||||
|
||||
error:
|
||||
|
|
@ -673,7 +720,7 @@ static bool ts_parser__accept(TSParser *self, StackVersion version) {
|
|||
}
|
||||
|
||||
ts_stack_remove_version(self->stack, pop.slices.contents[0].version);
|
||||
ts_stack_remove_version(self->stack, version);
|
||||
ts_stack_halt(self->stack, version);
|
||||
|
||||
return true;
|
||||
|
||||
|
|
@ -750,6 +797,30 @@ error:
|
|||
|
||||
static bool ts_parser__recover(TSParser *self, StackVersion version,
|
||||
TSStateId state, TSTree *lookahead) {
|
||||
if (lookahead->symbol == ts_builtin_sym_end) {
|
||||
LOG_ACTION("recover_eof");
|
||||
TreeArray children = array_new();
|
||||
TSTree *parent = ts_tree_make_error_node(&children);
|
||||
return ts_parser__push(self, version, parent, 1);
|
||||
}
|
||||
|
||||
unsigned my_error_cost = ts_stack_error_cost(self->stack, version);
|
||||
unsigned my_error_depth = ts_stack_error_depth(self->stack, version);
|
||||
for (StackVersion i = 0; i < ts_stack_version_count(self->stack); i++) {
|
||||
if (i != version) {
|
||||
unsigned error_cost = ts_stack_error_cost(self->stack, i);
|
||||
unsigned error_depth = ts_stack_error_depth(self->stack, i);
|
||||
if (error_depth < my_error_depth - 1 ||
|
||||
(error_depth == my_error_depth - 1 && error_cost <= my_error_cost) ||
|
||||
(error_depth == my_error_depth &&
|
||||
error_cost + ERROR_COST_THRESHOLD <= my_error_cost)) {
|
||||
ts_stack_halt(self->stack, version);
|
||||
LOG_ACTION("bail_on_error");
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
LOG_ACTION("recover state:%u", state);
|
||||
|
||||
StackVersion new_version = ts_stack_duplicate_version(self->stack, version);
|
||||
|
|
@ -765,15 +836,8 @@ error:
|
|||
return false;
|
||||
}
|
||||
|
||||
static bool ts_parser__recover_eof(TSParser *self, StackVersion version) {
|
||||
TreeArray children = array_new();
|
||||
TSTree *parent = ts_tree_make_error_node(&children);
|
||||
return ts_parser__push(self, version, parent, 1);
|
||||
}
|
||||
|
||||
static ParseActionResult ts_parser__consume_lookahead(TSParser *self,
|
||||
StackVersion version,
|
||||
TSTree *lookahead) {
|
||||
static bool ts_parser__consume_lookahead(TSParser *self, StackVersion version,
|
||||
TSTree *lookahead) {
|
||||
for (;;) {
|
||||
TSStateId state = ts_stack_top_state(self->stack, version);
|
||||
|
||||
|
|
@ -830,7 +894,7 @@ static ParseActionResult ts_parser__consume_lookahead(TSParser *self,
|
|||
|
||||
CHECK(ts_parser__shift(self, version, next_state, lookahead,
|
||||
action.extra));
|
||||
return ParseActionUpdated;
|
||||
return true;
|
||||
}
|
||||
|
||||
case TSParseActionTypeReduce: {
|
||||
|
|
@ -879,18 +943,13 @@ static ParseActionResult ts_parser__consume_lookahead(TSParser *self,
|
|||
case TSParseActionTypeAccept: {
|
||||
LOG_ACTION("accept");
|
||||
CHECK(ts_parser__accept(self, version));
|
||||
return ParseActionRemoved;
|
||||
return true;
|
||||
}
|
||||
|
||||
case TSParseActionTypeRecover: {
|
||||
if (lookahead->symbol == ts_builtin_sym_end) {
|
||||
LOG_ACTION("recover_eof");
|
||||
CHECK(ts_parser__recover_eof(self, version));
|
||||
} else {
|
||||
CHECK(ts_parser__recover(self, version, action.data.to_state,
|
||||
lookahead));
|
||||
}
|
||||
return ParseActionUpdated;
|
||||
CHECK(ts_parser__recover(self, version, action.data.to_state,
|
||||
lookahead));
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -900,7 +959,7 @@ static ParseActionResult ts_parser__consume_lookahead(TSParser *self,
|
|||
}
|
||||
|
||||
error:
|
||||
return ParseActionFailed;
|
||||
return false;
|
||||
}
|
||||
|
||||
bool ts_parser_init(TSParser *self) {
|
||||
|
|
@ -958,7 +1017,12 @@ TSTree *ts_parser_parse(TSParser *self, TSInput input, TSTree *previous_tree) {
|
|||
version < ts_stack_version_count(self->stack);) {
|
||||
reusable_node = current_reusable_node;
|
||||
|
||||
for (bool removed = false; !removed;) {
|
||||
for (;;) {
|
||||
if (ts_stack_is_halted(self->stack, version)) {
|
||||
version++;
|
||||
break;
|
||||
}
|
||||
|
||||
last_position = position;
|
||||
size_t new_position = ts_stack_top_position(self->stack, version).chars;
|
||||
if (new_position > max_position) {
|
||||
|
|
@ -987,22 +1051,16 @@ TSTree *ts_parser_parse(TSParser *self, TSInput input, TSTree *previous_tree) {
|
|||
LOG_ACTION("lookahead sym:%s, size:%lu", SYM_NAME(lookahead->symbol),
|
||||
ts_tree_total_chars(lookahead));
|
||||
|
||||
switch (ts_parser__consume_lookahead(self, version, lookahead)) {
|
||||
case ParseActionFailed:
|
||||
ts_tree_release(lookahead);
|
||||
goto error;
|
||||
case ParseActionRemoved:
|
||||
removed = true;
|
||||
break;
|
||||
case ParseActionUpdated:
|
||||
break;
|
||||
if (!ts_parser__consume_lookahead(self, version, lookahead)) {
|
||||
ts_tree_release(lookahead);
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
current_reusable_node = reusable_node;
|
||||
|
||||
if (ts_stack_condense(self->stack)) {
|
||||
if (ts_parser__condense_stack(self)) {
|
||||
LOG_ACTION("condense");
|
||||
LOG_STACK();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,7 +6,6 @@
|
|||
#include "runtime/length.h"
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <limits.h>
|
||||
|
||||
#define MAX_LINK_COUNT 8
|
||||
#define MAX_NODE_POOL_SIZE 50
|
||||
|
|
@ -339,6 +338,14 @@ TSLength ts_stack_top_position(const Stack *self, StackVersion version) {
|
|||
return array_get(&self->heads, version)->node->position;
|
||||
}
|
||||
|
||||
unsigned ts_stack_error_cost(const Stack *self, StackVersion version) {
|
||||
return array_get(&self->heads, version)->node->min_error_cost;
|
||||
}
|
||||
|
||||
unsigned ts_stack_error_depth(const Stack *self, StackVersion version) {
|
||||
return array_get(&self->heads, version)->node->error_depth;
|
||||
}
|
||||
|
||||
size_t ts_stack_last_repaired_error_size(const Stack *self,
|
||||
StackVersion version) {
|
||||
StackNode *node = array_get(&self->heads, version)->node;
|
||||
|
|
@ -364,7 +371,7 @@ bool ts_stack_push(Stack *self, StackVersion version, TSTree *tree,
|
|||
if (!new_node)
|
||||
return false;
|
||||
stack_node_release(node, &self->node_pool);
|
||||
self->heads.contents[version] = (StackHead){ new_node, false };
|
||||
self->heads.contents[version].node = new_node;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -473,13 +480,6 @@ StackVersion ts_stack_duplicate_version(Stack *self, StackVersion version) {
|
|||
return self->heads.size - 1;
|
||||
}
|
||||
|
||||
StackVersion ts_stack_split(Stack *self, StackVersion version) {
|
||||
if (!array_push(&self->heads, self->heads.contents[version]))
|
||||
return STACK_VERSION_NONE;
|
||||
stack_node_retain(self->heads.contents[version].node);
|
||||
return self->heads.size - 1;
|
||||
}
|
||||
|
||||
bool ts_stack_merge(Stack *self, StackVersion version, StackVersion new_version) {
|
||||
StackNode *node = self->heads.contents[version].node;
|
||||
StackNode *new_node = self->heads.contents[new_version].node;
|
||||
|
|
@ -496,21 +496,6 @@ bool ts_stack_merge(Stack *self, StackVersion version, StackVersion new_version)
|
|||
}
|
||||
}
|
||||
|
||||
void ts_stack_merge_from(Stack *self, StackVersion start_version) {
|
||||
for (size_t i = start_version; i < self->heads.size; i++) {
|
||||
for (size_t j = start_version; j < i; j++) {
|
||||
if (ts_stack_merge(self, j, i)) {
|
||||
i--;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ts_stack_merge_all(Stack *self) {
|
||||
ts_stack_merge_from(self, 0);
|
||||
}
|
||||
|
||||
void stack_node_remove_link(StackNode *self, size_t i,
|
||||
StackNodeArray *node_pool) {
|
||||
self->link_count--;
|
||||
|
|
@ -536,57 +521,12 @@ void stack_node_prune_paths_with_error_cost(StackNode *self, size_t cost,
|
|||
}
|
||||
}
|
||||
|
||||
bool ts_stack_condense(Stack *self) {
|
||||
bool did_condense = false;
|
||||
unsigned min_error_cost = UINT_MAX;
|
||||
unsigned min_error_depth = UINT_MAX;
|
||||
for (size_t i = 0; i < self->heads.size; i++) {
|
||||
StackNode *node = self->heads.contents[i].node;
|
||||
void ts_stack_halt(Stack *self, StackVersion version) {
|
||||
array_get(&self->heads, version)->is_halted = true;
|
||||
}
|
||||
|
||||
bool did_remove = false;
|
||||
for (size_t j = 0; j < i; j++) {
|
||||
if (ts_stack_merge(self, j, i)) {
|
||||
did_condense = true;
|
||||
did_remove = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (did_remove) {
|
||||
i--;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (node->error_depth < min_error_depth ||
|
||||
(node->error_depth == min_error_depth &&
|
||||
node->min_error_cost < min_error_cost)) {
|
||||
min_error_depth = node->error_depth;
|
||||
min_error_cost = node->min_error_cost;
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < self->heads.size; i++) {
|
||||
StackNode *node = self->heads.contents[i].node;
|
||||
if (node->error_depth > min_error_depth + 1) {
|
||||
did_condense = true;
|
||||
ts_stack_remove_version(self, i);
|
||||
i--;
|
||||
continue;
|
||||
} else if (node->error_depth == min_error_depth + 1) {
|
||||
if (node->min_error_cost >= min_error_cost) {
|
||||
did_condense = true;
|
||||
ts_stack_remove_version(self, i);
|
||||
i--;
|
||||
continue;
|
||||
} else if (node->max_error_cost >= min_error_cost) {
|
||||
did_condense = true;
|
||||
stack_node_prune_paths_with_error_cost(node, min_error_cost,
|
||||
&self->node_pool);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return did_condense;
|
||||
bool ts_stack_is_halted(Stack *self, StackVersion version) {
|
||||
return array_get(&self->heads, version)->is_halted;
|
||||
}
|
||||
|
||||
void ts_stack_clear(Stack *self) {
|
||||
|
|
|
|||
|
|
@ -95,15 +95,15 @@ StackPopResult ts_stack_pop_pending(Stack *, StackVersion);
|
|||
|
||||
StackPopResult ts_stack_pop_all(Stack *, StackVersion);
|
||||
|
||||
StackVersion ts_stack_split(Stack *, StackVersion);
|
||||
unsigned ts_stack_error_depth(const Stack *, StackVersion);
|
||||
|
||||
unsigned ts_stack_error_cost(const Stack *, StackVersion);
|
||||
|
||||
bool ts_stack_merge(Stack *, StackVersion, StackVersion);
|
||||
|
||||
void ts_stack_merge_from(Stack *, StackVersion);
|
||||
void ts_stack_halt(Stack *, StackVersion);
|
||||
|
||||
void ts_stack_merge_all(Stack *);
|
||||
|
||||
bool ts_stack_condense(Stack *);
|
||||
bool ts_stack_is_halted(Stack *, StackVersion);
|
||||
|
||||
void ts_stack_renumber_version(Stack *, StackVersion, StackVersion);
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue