Fix incremental parsing
Stop collapsing hidden symbols upon reducing them. Sadly, this messes up the ability to re-use parse trees. Instead, for now, hide these nodes when stringifying parse trees
This commit is contained in:
parent
7e94a4f1b2
commit
fbe8b0a905
8 changed files with 89 additions and 68 deletions
|
|
@ -269,30 +269,44 @@ static ts_lr_parser * ts_lr_parser_make() {
|
|||
return result;
|
||||
}
|
||||
|
||||
// static const char * ts_symbol_names[];
|
||||
// static void dump_stack(ts_lr_parser *parser) {
|
||||
// for (size_t i = 0; i < parser->stack.size; i++) {
|
||||
// printf("\n%ld %s", i, ts_symbol_names[parser->stack.entries[i].node->symbol]);
|
||||
// }
|
||||
// }
|
||||
|
||||
static size_t ts_lr_parser_breakdown_stack(ts_lr_parser *parser, ts_input_edit *edit) {
|
||||
if (parser->stack.size == 0) return 0;
|
||||
|
||||
ts_tree *node = ts_stack_top_node(&parser->stack);
|
||||
parser->stack.size--;
|
||||
state_id parse_state = 0;
|
||||
|
||||
size_t position = 0;
|
||||
size_t left_position = 0;
|
||||
size_t right_position = node->offset + node->size;
|
||||
size_t child_count;
|
||||
ts_tree ** children = ts_tree_children(node, &child_count);
|
||||
for (size_t i = 0; i < child_count; i++) {
|
||||
ts_tree *child = children[i];
|
||||
|
||||
position += child->offset + child->size;
|
||||
ts_tree_retain(child);
|
||||
ts_stack_push(&parser->stack, parse_state, child);
|
||||
parse_state = ts_parse_actions[parse_state][child->symbol].data.to_state;
|
||||
while (right_position > edit->position || children) {
|
||||
parser->stack.size--;
|
||||
ts_tree *child;
|
||||
for (size_t i = 0; i < child_count; i++) {
|
||||
child = children[i];
|
||||
right_position = left_position + child->offset + child->size;
|
||||
ts_tree_retain(child);
|
||||
state_id parse_state = ts_parse_actions[ts_stack_top_state(&parser->stack)][child->symbol].data.to_state;
|
||||
ts_stack_push(&parser->stack, parse_state, child);
|
||||
if (right_position >= edit->position) break;
|
||||
left_position = right_position;
|
||||
}
|
||||
ts_tree_release(node);
|
||||
node = child;
|
||||
children = ts_tree_children(node, &child_count);
|
||||
}
|
||||
|
||||
ts_tree_release(node);
|
||||
return position;
|
||||
return right_position;
|
||||
}
|
||||
|
||||
static void ts_lr_parser_initialize(ts_lr_parser *parser, ts_input input, ts_input_edit *edit) {
|
||||
if (!edit) ts_stack_shrink(&parser->stack, 0);
|
||||
parser->lookahead = NULL;
|
||||
parser->next_lookahead = NULL;
|
||||
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ typedef struct ts_tree {
|
|||
size_t ref_count;
|
||||
size_t offset;
|
||||
size_t size;
|
||||
short int is_hidden;
|
||||
union {
|
||||
struct {
|
||||
size_t count;
|
||||
|
|
@ -73,6 +74,7 @@ void ts_document_edit(ts_document *doc, ts_input_edit edit);
|
|||
|
||||
const ts_tree * ts_document_tree(const ts_document *doc);
|
||||
const char * ts_document_string(const ts_document *doc);
|
||||
const char * ts_document_symbol_name(const ts_document *document, const ts_tree *tree);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
|||
|
|
@ -52,11 +52,16 @@ describe("json", []() {
|
|||
describe("tracking the positions of AST nodes", [&]() {
|
||||
it("records the widths and offsets of nodes", [&]() {
|
||||
ts_document_set_input_string(doc, " [12, 5]");
|
||||
|
||||
const ts_tree *tree = ts_document_tree(doc);
|
||||
|
||||
// TODO - make this better
|
||||
const ts_tree *array = ts_tree_children(tree, NULL)[0];
|
||||
const ts_tree *number1 = ts_tree_children(array, NULL)[0];
|
||||
const ts_tree *number2 = ts_tree_children(array, NULL)[1];
|
||||
const ts_tree *number1 = ts_tree_children(ts_tree_children(array, NULL)[1], NULL)[0];
|
||||
const ts_tree *number2 = ts_tree_children(ts_tree_children(ts_tree_children(array, NULL)[2], NULL)[1], NULL)[0];
|
||||
|
||||
AssertThat(ts_document_symbol_name(doc, array), Equals("array"));
|
||||
AssertThat(ts_document_symbol_name(doc, number1), Equals("number"));
|
||||
AssertThat(ts_document_symbol_name(doc, number2), Equals("number"));
|
||||
|
||||
AssertThat(number1->offset, Equals<size_t>(0));
|
||||
AssertThat(number1->size, Equals<size_t>(2));
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ extern "C" ts_parser ts_parser_json();
|
|||
|
||||
START_TEST
|
||||
|
||||
describe("parsing", [&]() {
|
||||
describe("incremental parsing", [&]() {
|
||||
ts_document *doc;
|
||||
SpyReader *reader;
|
||||
|
||||
|
|
@ -66,7 +66,7 @@ describe("parsing", [&]() {
|
|||
|
||||
it("re-reads only the changed portion of the input", [&]() {
|
||||
AssertThat(reader->strings_read.size(), Equals<size_t>(2));
|
||||
AssertThat(reader->strings_read[1], Equals("\"key2\": 4 }"));
|
||||
AssertThat(reader->strings_read[1], Equals(", \"key2\": 4 }"));
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ int main(int argc, char *argv[]) {
|
|||
"",
|
||||
"--no-color",
|
||||
"--only="
|
||||
"re-reads only"
|
||||
""
|
||||
};
|
||||
return bandit::run(4, const_cast<char **>(args));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -37,6 +37,10 @@ void ts_document_edit(ts_document *document, ts_input_edit edit) {
|
|||
document->tree = ts_parser_parse(&document->parser, document->input, &edit);
|
||||
}
|
||||
|
||||
const char * ts_document_symbol_name(const ts_document *document, const ts_tree *tree) {
|
||||
return document->parser.symbol_names[tree->symbol];
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
const char *string;
|
||||
size_t position;
|
||||
|
|
|
|||
|
|
@ -49,26 +49,14 @@ void ts_stack_shrink(ts_stack *stack, size_t new_size) {
|
|||
stack->size = new_size;
|
||||
}
|
||||
|
||||
ts_tree * ts_stack_reduce(ts_stack *stack, ts_symbol symbol, int immediate_child_count, const int *collapse_flags) {
|
||||
size_t new_stack_size = stack->size - immediate_child_count;
|
||||
ts_tree * ts_stack_reduce(ts_stack *stack, ts_symbol symbol, int child_count, const int *collapse_flags) {
|
||||
size_t new_stack_size = stack->size - child_count;
|
||||
|
||||
int child_count = 0;
|
||||
for (int i = 0; i < immediate_child_count; i++) {
|
||||
ts_tree *child = stack->entries[new_stack_size + i].node;
|
||||
if (collapse_flags[i]) {
|
||||
size_t grandchild_count;
|
||||
ts_tree_children(child, &grandchild_count);
|
||||
child_count += grandchild_count;
|
||||
} else {
|
||||
child_count++;
|
||||
}
|
||||
}
|
||||
|
||||
int child_index = 0;
|
||||
size_t size = 0, offset = 0;
|
||||
ts_tree **children = malloc(child_count * sizeof(ts_tree *));
|
||||
for (int i = 0; i < immediate_child_count; i++) {
|
||||
for (int i = 0; i < child_count; i++) {
|
||||
ts_tree *child = stack->entries[new_stack_size + i].node;
|
||||
child->is_hidden = collapse_flags[i];
|
||||
if (i == 0) {
|
||||
offset = child->offset;
|
||||
size = child->size;
|
||||
|
|
@ -76,15 +64,7 @@ ts_tree * ts_stack_reduce(ts_stack *stack, ts_symbol symbol, int immediate_child
|
|||
size += child->offset + child->size;
|
||||
}
|
||||
|
||||
if (collapse_flags[i]) {
|
||||
size_t grandchild_count;
|
||||
ts_tree ** grandchildren = ts_tree_children(child, &grandchild_count);
|
||||
memcpy(children + child_index, grandchildren, (grandchild_count * sizeof(ts_tree *)));
|
||||
child_index += grandchild_count;
|
||||
} else {
|
||||
children[child_index] = child;
|
||||
child_index++;
|
||||
}
|
||||
children[i] = child;
|
||||
}
|
||||
|
||||
ts_tree *lookahead = ts_tree_make_node(symbol, child_count, children, size, offset);
|
||||
|
|
|
|||
|
|
@ -3,11 +3,13 @@
|
|||
#include <stdio.h>
|
||||
|
||||
static ts_tree * ts_tree_make(ts_symbol symbol, size_t size, size_t offset) {
|
||||
ts_tree *result = (ts_tree *)malloc(sizeof(ts_tree));
|
||||
result->ref_count = 1;
|
||||
result->symbol = symbol;
|
||||
result->size = size;
|
||||
result->offset = offset;
|
||||
ts_tree *result = malloc(sizeof(ts_tree));
|
||||
*result = (ts_tree) {
|
||||
.ref_count = 1,
|
||||
.symbol = symbol,
|
||||
.size = size,
|
||||
.offset = offset,
|
||||
};
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
@ -77,33 +79,47 @@ ts_tree ** ts_tree_children(const ts_tree *tree, size_t *count) {
|
|||
return tree->data.children.contents;
|
||||
}
|
||||
|
||||
static size_t tree_write_to_string(const ts_tree *tree, const char **symbol_names, char *string, size_t limit) {
|
||||
static const char *NULL_TREE_STRING = "(NULL)";
|
||||
static const char *ERROR_TREE_STRING = "(ERROR)";
|
||||
static size_t tree_write_to_string(const ts_tree *tree, const char **symbol_names, char *string, size_t limit, int is_beginning) {
|
||||
char *cursor = string;
|
||||
size_t result = 0;
|
||||
|
||||
if (!tree)
|
||||
return snprintf(string, limit, "%s", NULL_TREE_STRING);
|
||||
if (tree->symbol == ts_builtin_sym_error)
|
||||
return snprintf(string, limit, "%s", ERROR_TREE_STRING);
|
||||
|
||||
size_t result = snprintf(string, limit, "(%s", symbol_names[tree->symbol]);
|
||||
char *cursor = string + result;
|
||||
for (size_t i = 0; i < tree->data.children.count; i++) {
|
||||
ts_tree *child = tree->data.children.contents[i];
|
||||
result += snprintf(cursor, limit, " ");
|
||||
result += tree_write_to_string(child, symbol_names, cursor + 1, limit);
|
||||
cursor = (limit > 0) ? string + result : string;
|
||||
if (!tree) {
|
||||
return snprintf(cursor, limit, "(NULL)");
|
||||
}
|
||||
|
||||
if (!tree->is_hidden) {
|
||||
if (!is_beginning) {
|
||||
result += snprintf(cursor, limit, " ");
|
||||
if (limit > 0) cursor = string + result;
|
||||
}
|
||||
|
||||
if (tree->symbol == ts_builtin_sym_error) {
|
||||
result += snprintf(cursor, limit, "(ERROR)");
|
||||
return result;
|
||||
}
|
||||
|
||||
result += snprintf(cursor, limit, "(%s", symbol_names[tree->symbol]);
|
||||
if (limit > 0) cursor = string + result;
|
||||
}
|
||||
|
||||
return result + snprintf(cursor, limit, ")");
|
||||
for (size_t i = 0; i < tree->data.children.count; i++) {
|
||||
ts_tree *child = tree->data.children.contents[i];
|
||||
result += tree_write_to_string(child, symbol_names, cursor, limit, 0);
|
||||
if (limit > 0) cursor = string + result;
|
||||
}
|
||||
|
||||
if (!tree->is_hidden) {
|
||||
result += snprintf(cursor, limit, ")");
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static char SCRATCH_STRING[1];
|
||||
|
||||
char * ts_tree_string(const ts_tree *tree, const char **symbol_names) {
|
||||
size_t size = tree_write_to_string(tree, symbol_names, SCRATCH_STRING, 0) + 1;
|
||||
static char SCRATCH_STRING[100];
|
||||
size_t size = tree_write_to_string(tree, symbol_names, SCRATCH_STRING, 0, 1) + 1;
|
||||
char *result = malloc(size * sizeof(char));
|
||||
tree_write_to_string(tree, symbol_names, result, size);
|
||||
tree_write_to_string(tree, symbol_names, result, size, 1);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue