Select ambiguous alternatives by minimizing error size
This commit is contained in:
parent
1fb6065f02
commit
fd4c33209e
10 changed files with 329 additions and 174 deletions
|
|
@ -48,7 +48,10 @@ extern "C" {
|
|||
|
||||
#define array_splice(self, index, old_count, new_count, new_elements) \
|
||||
array__splice((VoidArray *)(self), array__elem_size(self), index, old_count, \
|
||||
new_count, new_elements)
|
||||
new_count, (new_elements))
|
||||
|
||||
#define array_insert(self, index, element) \
|
||||
array_splice(self, index, 0, 1, &(element))
|
||||
|
||||
#define array_pop(self) ((self)->contents[--(self)->size])
|
||||
|
||||
|
|
@ -107,15 +110,14 @@ static inline bool array__grow(VoidArray *self, size_t element_size,
|
|||
static inline bool array__splice(VoidArray *self, size_t element_size,
|
||||
size_t index, size_t old_count,
|
||||
size_t new_count, void *elements) {
|
||||
assert(index + old_count <= self->size);
|
||||
assert(index < self->size);
|
||||
size_t new_size = self->size + new_count - old_count;
|
||||
size_t old_end = index + old_count;
|
||||
size_t new_end = index + new_count;
|
||||
if (new_size >= self->capacity) {
|
||||
assert(old_end <= self->size);
|
||||
|
||||
if (new_size >= self->capacity)
|
||||
if (!array__grow(self, element_size, new_size))
|
||||
return false;
|
||||
}
|
||||
|
||||
char *contents = (char *)self->contents;
|
||||
if (self->size > old_end)
|
||||
|
|
|
|||
|
|
@ -258,26 +258,20 @@ static TSTree *ts_parser__get_lookahead(TSParser *self, StackVersion version,
|
|||
return self->language->lex_fn(&self->lexer, lex_state, false);
|
||||
}
|
||||
|
||||
static int ts_parser__select_tree(void *data, TSTree *left, TSTree *right) {
|
||||
if (!left || left->symbol == ts_builtin_sym_error)
|
||||
return 1;
|
||||
if (!right || right->symbol == ts_builtin_sym_error)
|
||||
return -1;
|
||||
|
||||
TSParser *self = data;
|
||||
int comparison = ts_tree_compare(left, right);
|
||||
switch (comparison) {
|
||||
case -1:
|
||||
LOG_ACTION("select tree:%s, over_tree:%s", SYM_NAME(left->symbol),
|
||||
SYM_NAME(right->symbol));
|
||||
break;
|
||||
case 1:
|
||||
LOG_ACTION("select tree:%s, over_tree:%s", SYM_NAME(right->symbol),
|
||||
SYM_NAME(left->symbol));
|
||||
break;
|
||||
static bool ts_parser__select_tree(TSParser *self, TSTree *left, TSTree *right) {
|
||||
if (!left)
|
||||
return true;
|
||||
if (!right)
|
||||
return false;
|
||||
if (right->error_size < left->error_size) {
|
||||
LOG_ACTION("select_smaller_error symbol:%s, over_symbol:%s", SYM_NAME(right->symbol), SYM_NAME(left->symbol));
|
||||
return true;
|
||||
}
|
||||
|
||||
return comparison;
|
||||
if (left->error_size < right->error_size) {
|
||||
LOG_ACTION("select_smaller_error symbol:%s, over_symbol:%s", SYM_NAME(left->symbol), SYM_NAME(right->symbol));
|
||||
return false;
|
||||
}
|
||||
return ts_tree_compare(right, left) < 0;
|
||||
}
|
||||
|
||||
static void ts_parser__remove_version(TSParser *self, StackVersion version) {
|
||||
|
|
@ -319,6 +313,25 @@ error:
|
|||
return false;
|
||||
}
|
||||
|
||||
static bool ts_parser__switch_children(TSParser *self, TSTree *tree,
|
||||
TSTree **children, size_t count) {
|
||||
self->scratch_tree.symbol = tree->symbol;
|
||||
self->scratch_tree.child_count = 0;
|
||||
ts_tree_set_children(&self->scratch_tree, count, children);
|
||||
if (ts_parser__select_tree(self, tree, &self->scratch_tree)) {
|
||||
tree->size = self->scratch_tree.size;
|
||||
tree->padding = self->scratch_tree.padding;
|
||||
tree->error_size = self->scratch_tree.error_size;
|
||||
tree->children = self->scratch_tree.children;
|
||||
tree->child_count = self->scratch_tree.child_count;
|
||||
tree->named_child_count = self->scratch_tree.named_child_count;
|
||||
tree->visible_child_count = self->scratch_tree.visible_child_count;
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static Reduction ts_parser__reduce(TSParser *self, StackVersion version,
|
||||
TSSymbol symbol, unsigned count, bool extra,
|
||||
bool fragile) {
|
||||
|
|
@ -356,6 +369,25 @@ static Reduction ts_parser__reduce(TSParser *self, StackVersion version,
|
|||
goto error;
|
||||
}
|
||||
|
||||
while (i + 1 < pop.slices.size) {
|
||||
StackSlice next_slice = pop.slices.contents[i + 1];
|
||||
if (next_slice.version != slice.version)
|
||||
break;
|
||||
i++;
|
||||
|
||||
size_t child_count = next_slice.trees.size;
|
||||
while (child_count > 0 && next_slice.trees.contents[child_count - 1]->extra)
|
||||
child_count--;
|
||||
|
||||
if (ts_parser__switch_children(self, parent, next_slice.trees.contents,
|
||||
child_count)) {
|
||||
ts_tree_array_delete(&slice.trees);
|
||||
slice = next_slice;
|
||||
} else {
|
||||
ts_tree_array_delete(&next_slice.trees);
|
||||
}
|
||||
}
|
||||
|
||||
TSStateId state = ts_stack_top_state(self->stack, slice.version);
|
||||
if (fragile || self->is_split || ts_stack_version_count(self->stack) > 1) {
|
||||
parent->fragile_left = true;
|
||||
|
|
@ -376,7 +408,7 @@ static Reduction ts_parser__reduce(TSParser *self, StackVersion version,
|
|||
}
|
||||
|
||||
CHECK(ts_parser__push(self, slice.version, parent, new_state));
|
||||
for (size_t j = child_count; j < slice.trees.size; j++) {
|
||||
for (size_t j = parent->child_count; j < slice.trees.size; j++) {
|
||||
TSTree *tree = slice.trees.contents[j];
|
||||
CHECK(ts_parser__push(self, slice.version, tree, new_state));
|
||||
}
|
||||
|
|
@ -490,15 +522,16 @@ static RepairResult ts_parser__repair_error(TSParser *self, StackSlice slice,
|
|||
TreeArray children_below = new_slice.trees;
|
||||
ts_stack_renumber_version(self->stack, new_slice.version, slice.version);
|
||||
|
||||
while (pop.slices.size) {
|
||||
StackSlice other_slice = array_pop(&pop.slices);
|
||||
for (size_t i = pop.slices.size - 1; i + 1 > 0; i--) {
|
||||
StackSlice other_slice = pop.slices.contents[i];
|
||||
ts_tree_array_delete(&other_slice.trees);
|
||||
ts_stack_remove_version(self->stack, other_slice.version);
|
||||
if (other_slice.version != pop.slices.contents[i + 1].version)
|
||||
ts_stack_remove_version(self->stack, other_slice.version);
|
||||
}
|
||||
|
||||
LOG_ACTION(
|
||||
"repair_found sym:%s, child_count:%lu, match_count:%lu, skipped:%lu",
|
||||
SYM_NAME(symbol), repair.count_below_error, repair.in_progress_state_count,
|
||||
SYM_NAME(symbol), repair.count_below_error + count_above_error, repair.in_progress_state_count,
|
||||
skip_count);
|
||||
|
||||
if (skip_count > 0) {
|
||||
|
|
@ -538,43 +571,51 @@ static void ts_parser__start(TSParser *self, TSInput input,
|
|||
|
||||
ts_lexer_set_input(&self->lexer, input);
|
||||
ts_stack_clear(self->stack);
|
||||
ts_stack_set_tree_selection_callback(self->stack, self,
|
||||
ts_parser__select_tree);
|
||||
|
||||
self->finished_tree = NULL;
|
||||
}
|
||||
|
||||
static bool ts_parser__accept(TSParser *self, StackVersion version) {
|
||||
LOG_ACTION("accept");
|
||||
TreeArray trees = ts_stack_pop_all(self->stack, version);
|
||||
CHECK(trees.contents);
|
||||
StackPopResult pop = ts_stack_pop_all(self->stack, version);
|
||||
CHECK(pop.status);
|
||||
CHECK(pop.slices.size);
|
||||
|
||||
for (size_t i = trees.size - 1; i + 1 > 0; i--) {
|
||||
if (!trees.contents[i]->extra) {
|
||||
TSTree *root = trees.contents[i];
|
||||
CHECK(array_splice(&trees, i, 1, root->child_count, root->children));
|
||||
for (size_t i = 0; i < pop.slices.size; i++) {
|
||||
StackSlice slice = pop.slices.contents[i];
|
||||
TreeArray trees = slice.trees;
|
||||
|
||||
ts_tree_set_children(root, trees.size, trees.contents);
|
||||
if (!trees.size)
|
||||
array_delete(&trees);
|
||||
for (size_t j = trees.size - 1; j + 1 > 0; j--) {
|
||||
if (!trees.contents[j]->extra) {
|
||||
TSTree *root = trees.contents[j];
|
||||
|
||||
ts_stack_remove_version(self->stack, version);
|
||||
int comparison = ts_parser__select_tree(self, self->finished_tree, root);
|
||||
if (comparison > 0) {
|
||||
ts_tree_release(self->finished_tree);
|
||||
self->finished_tree = root;
|
||||
} else {
|
||||
ts_tree_release(root);
|
||||
CHECK(array_splice(&trees, j, 1, root->child_count, root->children));
|
||||
ts_tree_set_children(root, trees.size, trees.contents);
|
||||
if (!trees.size)
|
||||
array_delete(&trees);
|
||||
|
||||
for (size_t k = j - 1; k + 1 > 0; k--)
|
||||
if (!root->children[k]->extra)
|
||||
root->error_size += root->children[j]->size.chars;
|
||||
|
||||
if (ts_parser__select_tree(self, self->finished_tree, root)) {
|
||||
ts_tree_release(self->finished_tree);
|
||||
self->finished_tree = root;
|
||||
} else {
|
||||
ts_tree_release(root);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
ts_stack_remove_version(self->stack, pop.slices.contents[0].version);
|
||||
ts_stack_remove_version(self->stack, version);
|
||||
|
||||
return true;
|
||||
|
||||
error:
|
||||
ts_tree_array_delete(&trees);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
@ -597,10 +638,8 @@ static ParseActionResult ts_parser__handle_error(TSParser *self,
|
|||
CHECK(error);
|
||||
CHECK(ts_parser__push(self, version, error, 0));
|
||||
|
||||
TreeArray trees = ts_stack_pop_all(self->stack, version);
|
||||
CHECK(trees.contents);
|
||||
TSTree *parent = ts_tree_make_node(
|
||||
ts_builtin_sym_start, trees.size, trees.contents,
|
||||
TSTree *parent = ts_tree_make_leaf(
|
||||
ts_builtin_sym_start, ts_length_zero(), ts_length_zero(),
|
||||
ts_language_symbol_metadata(language, ts_builtin_sym_start));
|
||||
CHECK(parent);
|
||||
CHECK(ts_parser__push(self, version, parent, 0));
|
||||
|
|
@ -682,7 +721,7 @@ static ParseActionResult ts_parser__consume_lookahead(TSParser *self,
|
|||
break;
|
||||
}
|
||||
|
||||
if (ts_stack_version_count(self->stack) == 1) {
|
||||
if (ts_stack_version_count(self->stack) == 1 && !self->finished_tree) {
|
||||
return ts_parser__handle_error(self, version, lookahead);
|
||||
} else {
|
||||
ts_parser__remove_version(self, version);
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@ typedef struct {
|
|||
TSTree *finished_tree;
|
||||
bool is_split;
|
||||
bool print_debugging_graphs;
|
||||
TSTree scratch_tree;
|
||||
} TSParser;
|
||||
|
||||
bool ts_parser_init(TSParser *);
|
||||
|
|
|
|||
|
|
@ -47,8 +47,6 @@ struct Stack {
|
|||
StackSliceArray slices;
|
||||
Array(PopPath) pop_paths;
|
||||
StackNodeArray node_pool;
|
||||
void *tree_selection_payload;
|
||||
TreeSelectionFunction tree_selection_function;
|
||||
StackNode *base_node;
|
||||
};
|
||||
|
||||
|
|
@ -131,10 +129,6 @@ static void stack_node_add_link(StackNode *self, StackLink link) {
|
|||
}
|
||||
}
|
||||
|
||||
static int ts_stack__default_tree_selection(void *p, TSTree *t1, TSTree *t2) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
static StackVersion ts_stack__add_version(Stack *self, StackNode *node) {
|
||||
if (!array_push(&self->heads, node))
|
||||
return STACK_VERSION_NONE;
|
||||
|
|
@ -142,55 +136,24 @@ static StackVersion ts_stack__add_version(Stack *self, StackNode *node) {
|
|||
return (StackVersion)(self->heads.size - 1);
|
||||
}
|
||||
|
||||
static void ts_stack__update_slice(Stack *self, StackSlice *slice,
|
||||
TreeArray *trees) {
|
||||
bool should_update = false;
|
||||
if (slice->trees.size < trees->size) {
|
||||
should_update = true;
|
||||
} else if (slice->trees.size == trees->size) {
|
||||
for (size_t i = 0; i < slice->trees.size; i++) {
|
||||
TSTree *tree = slice->trees.contents[i];
|
||||
TSTree *new_tree = trees->contents[i];
|
||||
int comparison = self->tree_selection_function(
|
||||
self->tree_selection_payload, tree, new_tree);
|
||||
if (comparison < 0) {
|
||||
break;
|
||||
} else if (comparison > 0) {
|
||||
should_update = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (should_update) {
|
||||
ts_tree_array_delete(&slice->trees);
|
||||
slice->trees = *trees;
|
||||
} else {
|
||||
ts_tree_array_delete(trees);
|
||||
}
|
||||
}
|
||||
|
||||
static bool ts_stack__add_slice(Stack *self, size_t previous_version_count,
|
||||
StackNode *node, TreeArray *trees) {
|
||||
for (size_t i = 0; i < self->slices.size; i++) {
|
||||
StackSlice *previous_slice = &self->slices.contents[i];
|
||||
size_t version_index = previous_version_count + i;
|
||||
if (self->heads.contents[version_index] == node) {
|
||||
ts_stack__update_slice(self, previous_slice, trees);
|
||||
return true;
|
||||
static bool ts_stack__add_slice(Stack *self, StackNode *node, TreeArray *trees) {
|
||||
for (size_t i = self->slices.size - 1; i + 1 > 0; i--) {
|
||||
StackVersion version = self->slices.contents[i].version;
|
||||
if (self->heads.contents[version] == node) {
|
||||
StackSlice slice = {*trees, version};
|
||||
return array_insert(&self->slices, i + 1, slice);
|
||||
}
|
||||
}
|
||||
|
||||
StackVersion version = ts_stack__add_version(self, node);
|
||||
if (version == STACK_VERSION_NONE)
|
||||
return false;
|
||||
StackSlice slice = {.version = version, .trees = *trees };
|
||||
StackSlice slice = {*trees, version};
|
||||
return array_push(&self->slices, slice);
|
||||
}
|
||||
|
||||
INLINE StackPopResult stack__iter(Stack *self, StackVersion version,
|
||||
StackIterateCallback callback, void *payload) {
|
||||
size_t previous_version_count = self->heads.size;
|
||||
array_clear(&self->slices);
|
||||
|
||||
PopPath pop_path = {
|
||||
|
|
@ -219,7 +182,7 @@ INLINE StackPopResult stack__iter(Stack *self, StackVersion version,
|
|||
TreeArray trees =
|
||||
should_stop ? path->trees : ts_tree_array_copy(&path->trees);
|
||||
array_reverse(&trees);
|
||||
if (!ts_stack__add_slice(self, previous_version_count, node, &trees))
|
||||
if (!ts_stack__add_slice(self, node, &trees))
|
||||
goto error;
|
||||
}
|
||||
|
||||
|
|
@ -275,8 +238,6 @@ Stack *ts_stack_new() {
|
|||
array_init(&self->slices);
|
||||
array_init(&self->pop_paths);
|
||||
array_init(&self->node_pool);
|
||||
self->tree_selection_payload = NULL;
|
||||
self->tree_selection_function = ts_stack__default_tree_selection;
|
||||
|
||||
if (!array_grow(&self->heads, 4))
|
||||
goto error;
|
||||
|
|
@ -426,13 +387,8 @@ INLINE StackIterateAction pop_all_callback(void *payload, TSStateId state,
|
|||
return is_done ? (StackIteratePop | StackIterateStop) : StackIterateNone;
|
||||
}
|
||||
|
||||
TreeArray ts_stack_pop_all(Stack *self, StackVersion version) {
|
||||
StackPopResult pop = stack__iter(self, version, pop_all_callback, NULL);
|
||||
if (pop.status != StackPopSucceeded)
|
||||
return (TreeArray)array_new();
|
||||
assert(pop.slices.size == 1);
|
||||
ts_stack_renumber_version(self, pop.slices.contents[0].version, version);
|
||||
return pop.slices.contents[0].trees;
|
||||
StackPopResult ts_stack_pop_all(Stack *self, StackVersion version) {
|
||||
return stack__iter(self, version, pop_all_callback, NULL);
|
||||
}
|
||||
|
||||
void ts_stack_remove_version(Stack *self, StackVersion version) {
|
||||
|
|
@ -475,12 +431,6 @@ void ts_stack_clear(Stack *self) {
|
|||
array_push(&self->heads, self->base_node);
|
||||
}
|
||||
|
||||
void ts_stack_set_tree_selection_callback(Stack *self, void *payload,
|
||||
TreeSelectionFunction function) {
|
||||
self->tree_selection_payload = payload;
|
||||
self->tree_selection_function = function;
|
||||
}
|
||||
|
||||
int ts_stack_print_dot_graph(Stack *self, const char **symbol_names, FILE *f) {
|
||||
fprintf(f, "digraph stack {\n");
|
||||
fprintf(f, "rankdir=\"RL\";\n");
|
||||
|
|
|
|||
|
|
@ -45,8 +45,6 @@ typedef StackIterateAction (*StackIterateCallback)(void *, TSStateId state,
|
|||
bool is_done,
|
||||
bool is_pending);
|
||||
|
||||
typedef int (*TreeSelectionFunction)(void *, TSTree *tree1, TSTree *tree2);
|
||||
|
||||
/*
|
||||
* Create a parse stack.
|
||||
*/
|
||||
|
|
@ -94,7 +92,7 @@ StackPopResult ts_stack_iterate(Stack *, StackVersion, StackIterateCallback,
|
|||
|
||||
StackPopResult ts_stack_pop_pending(Stack *, StackVersion);
|
||||
|
||||
TreeArray ts_stack_pop_all(Stack *, StackVersion);
|
||||
StackPopResult ts_stack_pop_all(Stack *, StackVersion);
|
||||
|
||||
void ts_stack_merge(Stack *);
|
||||
|
||||
|
|
@ -110,9 +108,6 @@ void ts_stack_remove_version(Stack *, StackVersion);
|
|||
*/
|
||||
void ts_stack_clear(Stack *);
|
||||
|
||||
void ts_stack_set_tree_selection_callback(Stack *, void *,
|
||||
TreeSelectionFunction);
|
||||
|
||||
int ts_stack_print_dot_graph(Stack *, const char **, FILE *);
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
|
|||
|
|
@ -110,10 +110,13 @@ recur:
|
|||
void ts_tree_set_children(TSTree *self, size_t child_count, TSTree **children) {
|
||||
if (self->child_count > 0)
|
||||
ts_free(self->children);
|
||||
|
||||
self->children = children;
|
||||
self->child_count = child_count;
|
||||
self->named_child_count = 0;
|
||||
self->visible_child_count = 0;
|
||||
size_t error_size = 0;
|
||||
|
||||
for (size_t i = 0; i < child_count; i++) {
|
||||
TSTree *child = children[i];
|
||||
|
||||
|
|
@ -136,9 +139,16 @@ void ts_tree_set_children(TSTree *self, size_t child_count, TSTree **children) {
|
|||
if (child->symbol == ts_builtin_sym_error) {
|
||||
self->fragile_left = self->fragile_right = true;
|
||||
self->parse_state = TS_TREE_STATE_ERROR;
|
||||
} else {
|
||||
error_size += child->error_size;
|
||||
}
|
||||
}
|
||||
|
||||
if (self->symbol == ts_builtin_sym_error)
|
||||
self->error_size = self->size.chars;
|
||||
else
|
||||
self->error_size = error_size;
|
||||
|
||||
if (child_count > 0) {
|
||||
self->lex_state = children[0]->lex_state;
|
||||
if (children[0]->fragile_left)
|
||||
|
|
|
|||
|
|
@ -34,6 +34,8 @@ struct TSTree {
|
|||
TSSymbol symbol;
|
||||
TSStateId lex_state;
|
||||
TSStateId parse_state;
|
||||
size_t error_size;
|
||||
|
||||
unsigned short ref_count;
|
||||
bool visible : 1;
|
||||
bool named : 1;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue