Keep track of AST node sizes and positions

This commit is contained in:
Max Brunsfeld 2014-03-01 15:44:25 -08:00
parent 5045d5cccc
commit 72a0c0e09c
5 changed files with 51 additions and 8 deletions

View file

@ -39,6 +39,8 @@ typedef struct {
typedef struct {
const char *input;
size_t position;
size_t token_end_position;
size_t token_start_position;
ts_tree *lookahead_node;
ts_tree *prev_lookahead_node;
ts_state lex_state;
@ -52,6 +54,8 @@ static const ts_symbol * ts_recover(ts_state state, ts_state *to_state, size_t *
static ts_parser ts_parser_make(const char *input) {
ts_parser result = {
.input = input,
.token_start_position = 0,
.token_end_position = 0,
.position = 0,
.lookahead_node = NULL,
.prev_lookahead_node = NULL,
@ -106,9 +110,17 @@ static void ts_parser_reduce(ts_parser *parser, ts_symbol symbol, int immediate_
}
int child_index = 0;
size_t size = 0, offset = 0;
ts_tree **children = malloc(child_count * sizeof(ts_tree *));
for (int i = 0; i < immediate_child_count; i++) {
ts_tree *child = parser->stack[new_stack_size + i].node;
if (i == 0) {
offset = child->offset;
size = child->size;
} else {
size += child->offset + child->size;
}
if (collapse_flags[i]) {
size_t grandchild_count = ts_tree_child_count(child);
memcpy(children + child_index, ts_tree_children(child), (grandchild_count * sizeof(ts_tree *)));
@ -120,7 +132,7 @@ static void ts_parser_reduce(ts_parser *parser, ts_symbol symbol, int immediate_
}
parser->prev_lookahead_node = parser->lookahead_node;
parser->lookahead_node = ts_tree_make_node(symbol, child_count, children);
parser->lookahead_node = ts_tree_make_node(symbol, child_count, children, size, offset);
ts_parser_shrink_stack(parser, new_stack_size);
DEBUG_PARSE("reduce: %s, state: %u \n", ts_symbol_names[symbol], ts_parser_parse_state(parser));
}
@ -134,7 +146,10 @@ static void ts_parser_advance(ts_parser *parser, ts_state lex_state) {
static void ts_parser_set_lookahead_sym(ts_parser *parser, ts_symbol symbol) {
DEBUG_LEX("token: %s \n", ts_symbol_names[symbol]);
parser->lookahead_node = ts_tree_make_leaf(symbol, 0, 0);
size_t size = parser->position - parser->token_start_position;
size_t offset = parser->token_start_position - parser->token_end_position;
parser->lookahead_node = ts_tree_make_leaf(symbol, size, offset);
parser->token_end_position = parser->position;
}
static ts_tree * ts_parser_tree(ts_parser *parser) {
@ -145,6 +160,7 @@ static ts_tree * ts_parser_tree(ts_parser *parser) {
static void ts_parser_skip_whitespace(ts_parser *parser) {
while (isspace(ts_parser_lookahead_char(parser)))
parser->position++;
parser->token_start_position = parser->position;
}
static int ts_parser_handle_error(ts_parser *parser, size_t count, const ts_symbol *expected_symbols) {

View file

@ -14,6 +14,8 @@ static const ts_symbol ts_builtin_sym_end = -2;
typedef struct ts_tree {
ts_symbol symbol;
size_t ref_count;
size_t offset;
size_t size;
union {
struct {
size_t count;
@ -28,7 +30,7 @@ typedef struct ts_tree {
} ts_tree;
ts_tree * ts_tree_make_leaf(ts_symbol symbol, size_t size, size_t offset);
ts_tree * ts_tree_make_node(ts_symbol symbol, size_t child_count, ts_tree **children);
ts_tree * ts_tree_make_node(ts_symbol symbol, size_t child_count, ts_tree **children, size_t size, size_t offset);
ts_tree * ts_tree_make_error(char lookahead_char, size_t expected_input_count, const ts_symbol *expected_inputs, size_t size, size_t offset);
void ts_tree_retain(ts_tree *tree);
void ts_tree_release(ts_tree *tree);

View file

@ -49,6 +49,29 @@ describe("json", []() {
AssertThat(string(ts_document_string(doc)), Equals("(value (array (value (number)) (value (number)) (value (number))))"));
});
describe("tracking the positions of AST nodes", [&]() {
it("records the widths and offsets of nodes", [&]() {
ts_document_set_input_string(doc, " [12, 5]");
const ts_tree *tree = ts_document_tree(doc);
const ts_tree *array = ts_tree_children(tree)[0];
const ts_tree *number1 = ts_tree_children(array)[0];
const ts_tree *number2 = ts_tree_children(array)[1];
AssertThat(number1->offset, Equals(0));
AssertThat(number1->size, Equals(2));
AssertThat(number2->offset, Equals(1));
AssertThat(number2->size, Equals(1));
AssertThat(array->offset, Equals(2));
AssertThat(array->size, Equals(7));
AssertThat(tree->offset, Equals(2));
AssertThat(tree->size, Equals(7));
});
});
describe("errors", [&]() {
it("reports errors in the top-level node", [&]() {
ts_document_set_input_string(doc, "[");

View file

@ -17,7 +17,7 @@ describe("trees", []() {
before_each([&]() {
tree1 = ts_tree_make_leaf(cat, 0, 0);
parent1 = ts_tree_make_node(dog, 1, tree_array({ tree1 }));
parent1 = ts_tree_make_node(dog, 1, tree_array({ tree1 }), 0, 0);
});
after_each([&]() {
@ -30,7 +30,7 @@ describe("trees", []() {
ts_tree *tree2 = ts_tree_make_leaf(cat, 0, 0);
AssertThat(ts_tree_equals(tree1, tree2), Equals(1));
ts_tree *parent2 = ts_tree_make_node(dog, 1, tree_array({ tree2 }));
ts_tree *parent2 = ts_tree_make_node(dog, 1, tree_array({ tree2 }), 0, 0);
AssertThat(ts_tree_equals(parent1, parent2), Equals(1));
ts_tree_release(tree2);
@ -45,7 +45,7 @@ describe("trees", []() {
it("returns false for trees with different children", [&]() {
ts_tree *tree2 = ts_tree_make_leaf(pig, 0, 0);
ts_tree *parent2 = ts_tree_make_node(dog, 1, tree_array({ tree2 }));
ts_tree *parent2 = ts_tree_make_node(dog, 1, tree_array({ tree2 }), 0, 0);
AssertThat(ts_tree_equals(parent2, parent1), Equals(0));
AssertThat(ts_tree_equals(parent1, parent2), Equals(0));
ts_tree_release(tree2);

View file

@ -9,6 +9,8 @@ static ts_tree * ts_tree_make(ts_symbol symbol, size_t size, size_t offset) {
ts_tree *result = (ts_tree *)malloc(sizeof(ts_tree));
result->ref_count = 1;
result->symbol = symbol;
result->size = size;
result->offset = offset;
return result;
}
@ -18,10 +20,10 @@ ts_tree * ts_tree_make_leaf(ts_symbol symbol, size_t size, size_t offset) {
return result;
}
ts_tree * ts_tree_make_node(ts_symbol symbol, size_t child_count, ts_tree **children) {
ts_tree * ts_tree_make_node(ts_symbol symbol, size_t child_count, ts_tree **children, size_t size, size_t offset) {
for (int i = 0; i < child_count; i++)
ts_tree_retain(children[i]);
ts_tree *result = ts_tree_make(symbol, 0, 0);
ts_tree *result = ts_tree_make(symbol, size, offset);
result->data.children = { .count = child_count, .contents = children };
return result;
}