Fix incremental parsing

Stop collapsing hidden symbols upon reducing them.
Sadly, this messes up the ability to re-use parse
trees. Instead, for now, hide these nodes when
stringifying parse trees
This commit is contained in:
Max Brunsfeld 2014-03-19 19:27:31 -07:00
parent 7e94a4f1b2
commit fbe8b0a905
8 changed files with 89 additions and 68 deletions

View file

@ -269,30 +269,44 @@ static ts_lr_parser * ts_lr_parser_make() {
return result;
}
// static const char * ts_symbol_names[];
// static void dump_stack(ts_lr_parser *parser) {
// for (size_t i = 0; i < parser->stack.size; i++) {
// printf("\n%ld %s", i, ts_symbol_names[parser->stack.entries[i].node->symbol]);
// }
// }
static size_t ts_lr_parser_breakdown_stack(ts_lr_parser *parser, ts_input_edit *edit) {
if (parser->stack.size == 0) return 0;
ts_tree *node = ts_stack_top_node(&parser->stack);
parser->stack.size--;
state_id parse_state = 0;
size_t position = 0;
size_t left_position = 0;
size_t right_position = node->offset + node->size;
size_t child_count;
ts_tree ** children = ts_tree_children(node, &child_count);
for (size_t i = 0; i < child_count; i++) {
ts_tree *child = children[i];
position += child->offset + child->size;
ts_tree_retain(child);
ts_stack_push(&parser->stack, parse_state, child);
parse_state = ts_parse_actions[parse_state][child->symbol].data.to_state;
while (right_position > edit->position || children) {
parser->stack.size--;
ts_tree *child;
for (size_t i = 0; i < child_count; i++) {
child = children[i];
right_position = left_position + child->offset + child->size;
ts_tree_retain(child);
state_id parse_state = ts_parse_actions[ts_stack_top_state(&parser->stack)][child->symbol].data.to_state;
ts_stack_push(&parser->stack, parse_state, child);
if (right_position >= edit->position) break;
left_position = right_position;
}
ts_tree_release(node);
node = child;
children = ts_tree_children(node, &child_count);
}
ts_tree_release(node);
return position;
return right_position;
}
static void ts_lr_parser_initialize(ts_lr_parser *parser, ts_input input, ts_input_edit *edit) {
if (!edit) ts_stack_shrink(&parser->stack, 0);
parser->lookahead = NULL;
parser->next_lookahead = NULL;

View file

@ -16,6 +16,7 @@ typedef struct ts_tree {
size_t ref_count;
size_t offset;
size_t size;
short int is_hidden;
union {
struct {
size_t count;
@ -73,6 +74,7 @@ void ts_document_edit(ts_document *doc, ts_input_edit edit);
const ts_tree * ts_document_tree(const ts_document *doc);
const char * ts_document_string(const ts_document *doc);
const char * ts_document_symbol_name(const ts_document *document, const ts_tree *tree);
#ifdef __cplusplus
}

View file

@ -52,11 +52,16 @@ describe("json", []() {
describe("tracking the positions of AST nodes", [&]() {
it("records the widths and offsets of nodes", [&]() {
ts_document_set_input_string(doc, " [12, 5]");
const ts_tree *tree = ts_document_tree(doc);
// TODO - make this better
const ts_tree *array = ts_tree_children(tree, NULL)[0];
const ts_tree *number1 = ts_tree_children(array, NULL)[0];
const ts_tree *number2 = ts_tree_children(array, NULL)[1];
const ts_tree *number1 = ts_tree_children(ts_tree_children(array, NULL)[1], NULL)[0];
const ts_tree *number2 = ts_tree_children(ts_tree_children(ts_tree_children(array, NULL)[2], NULL)[1], NULL)[0];
AssertThat(ts_document_symbol_name(doc, array), Equals("array"));
AssertThat(ts_document_symbol_name(doc, number1), Equals("number"));
AssertThat(ts_document_symbol_name(doc, number2), Equals("number"));
AssertThat(number1->offset, Equals<size_t>(0));
AssertThat(number1->size, Equals<size_t>(2));

View file

@ -5,7 +5,7 @@ extern "C" ts_parser ts_parser_json();
START_TEST
describe("parsing", [&]() {
describe("incremental parsing", [&]() {
ts_document *doc;
SpyReader *reader;
@ -66,7 +66,7 @@ describe("parsing", [&]() {
it("re-reads only the changed portion of the input", [&]() {
AssertThat(reader->strings_read.size(), Equals<size_t>(2));
AssertThat(reader->strings_read[1], Equals("\"key2\": 4 }"));
AssertThat(reader->strings_read[1], Equals(", \"key2\": 4 }"));
});
});
});

View file

@ -5,7 +5,7 @@ int main(int argc, char *argv[]) {
"",
"--no-color",
"--only="
"re-reads only"
""
};
return bandit::run(4, const_cast<char **>(args));
}

View file

@ -37,6 +37,10 @@ void ts_document_edit(ts_document *document, ts_input_edit edit) {
document->tree = ts_parser_parse(&document->parser, document->input, &edit);
}
const char * ts_document_symbol_name(const ts_document *document, const ts_tree *tree) {
return document->parser.symbol_names[tree->symbol];
}
typedef struct {
const char *string;
size_t position;

View file

@ -49,26 +49,14 @@ void ts_stack_shrink(ts_stack *stack, size_t new_size) {
stack->size = new_size;
}
ts_tree * ts_stack_reduce(ts_stack *stack, ts_symbol symbol, int immediate_child_count, const int *collapse_flags) {
size_t new_stack_size = stack->size - immediate_child_count;
ts_tree * ts_stack_reduce(ts_stack *stack, ts_symbol symbol, int child_count, const int *collapse_flags) {
size_t new_stack_size = stack->size - child_count;
int child_count = 0;
for (int i = 0; i < immediate_child_count; i++) {
ts_tree *child = stack->entries[new_stack_size + i].node;
if (collapse_flags[i]) {
size_t grandchild_count;
ts_tree_children(child, &grandchild_count);
child_count += grandchild_count;
} else {
child_count++;
}
}
int child_index = 0;
size_t size = 0, offset = 0;
ts_tree **children = malloc(child_count * sizeof(ts_tree *));
for (int i = 0; i < immediate_child_count; i++) {
for (int i = 0; i < child_count; i++) {
ts_tree *child = stack->entries[new_stack_size + i].node;
child->is_hidden = collapse_flags[i];
if (i == 0) {
offset = child->offset;
size = child->size;
@ -76,15 +64,7 @@ ts_tree * ts_stack_reduce(ts_stack *stack, ts_symbol symbol, int immediate_child
size += child->offset + child->size;
}
if (collapse_flags[i]) {
size_t grandchild_count;
ts_tree ** grandchildren = ts_tree_children(child, &grandchild_count);
memcpy(children + child_index, grandchildren, (grandchild_count * sizeof(ts_tree *)));
child_index += grandchild_count;
} else {
children[child_index] = child;
child_index++;
}
children[i] = child;
}
ts_tree *lookahead = ts_tree_make_node(symbol, child_count, children, size, offset);

View file

@ -3,11 +3,13 @@
#include <stdio.h>
static ts_tree * ts_tree_make(ts_symbol symbol, size_t size, size_t offset) {
ts_tree *result = (ts_tree *)malloc(sizeof(ts_tree));
result->ref_count = 1;
result->symbol = symbol;
result->size = size;
result->offset = offset;
ts_tree *result = malloc(sizeof(ts_tree));
*result = (ts_tree) {
.ref_count = 1,
.symbol = symbol,
.size = size,
.offset = offset,
};
return result;
}
@ -77,33 +79,47 @@ ts_tree ** ts_tree_children(const ts_tree *tree, size_t *count) {
return tree->data.children.contents;
}
static size_t tree_write_to_string(const ts_tree *tree, const char **symbol_names, char *string, size_t limit) {
static const char *NULL_TREE_STRING = "(NULL)";
static const char *ERROR_TREE_STRING = "(ERROR)";
static size_t tree_write_to_string(const ts_tree *tree, const char **symbol_names, char *string, size_t limit, int is_beginning) {
char *cursor = string;
size_t result = 0;
if (!tree)
return snprintf(string, limit, "%s", NULL_TREE_STRING);
if (tree->symbol == ts_builtin_sym_error)
return snprintf(string, limit, "%s", ERROR_TREE_STRING);
size_t result = snprintf(string, limit, "(%s", symbol_names[tree->symbol]);
char *cursor = string + result;
for (size_t i = 0; i < tree->data.children.count; i++) {
ts_tree *child = tree->data.children.contents[i];
result += snprintf(cursor, limit, " ");
result += tree_write_to_string(child, symbol_names, cursor + 1, limit);
cursor = (limit > 0) ? string + result : string;
if (!tree) {
return snprintf(cursor, limit, "(NULL)");
}
if (!tree->is_hidden) {
if (!is_beginning) {
result += snprintf(cursor, limit, " ");
if (limit > 0) cursor = string + result;
}
if (tree->symbol == ts_builtin_sym_error) {
result += snprintf(cursor, limit, "(ERROR)");
return result;
}
result += snprintf(cursor, limit, "(%s", symbol_names[tree->symbol]);
if (limit > 0) cursor = string + result;
}
return result + snprintf(cursor, limit, ")");
for (size_t i = 0; i < tree->data.children.count; i++) {
ts_tree *child = tree->data.children.contents[i];
result += tree_write_to_string(child, symbol_names, cursor, limit, 0);
if (limit > 0) cursor = string + result;
}
if (!tree->is_hidden) {
result += snprintf(cursor, limit, ")");
}
return result;
}
static char SCRATCH_STRING[1];
char * ts_tree_string(const ts_tree *tree, const char **symbol_names) {
size_t size = tree_write_to_string(tree, symbol_names, SCRATCH_STRING, 0) + 1;
static char SCRATCH_STRING[100];
size_t size = tree_write_to_string(tree, symbol_names, SCRATCH_STRING, 0, 1) + 1;
char *result = malloc(size * sizeof(char));
tree_write_to_string(tree, symbol_names, result, size);
tree_write_to_string(tree, symbol_names, result, size, 1);
return result;
}