From 72a0c0e09c7fb814227ac5b2408cc219dd9d32eb Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Sat, 1 Mar 2014 15:44:25 -0800 Subject: [PATCH] Keep track of AST node sizes and positions --- include/tree_sitter/parser.h | 20 ++++++++++++++++++-- include/tree_sitter/runtime.h | 4 +++- spec/runtime/json_spec.cpp | 23 +++++++++++++++++++++++ spec/runtime/tree_spec.cpp | 6 +++--- src/runtime/tree.cpp | 6 ++++-- 5 files changed, 51 insertions(+), 8 deletions(-) diff --git a/include/tree_sitter/parser.h b/include/tree_sitter/parser.h index 995ba7bf..4af24e7c 100644 --- a/include/tree_sitter/parser.h +++ b/include/tree_sitter/parser.h @@ -39,6 +39,8 @@ typedef struct { typedef struct { const char *input; size_t position; + size_t token_end_position; + size_t token_start_position; ts_tree *lookahead_node; ts_tree *prev_lookahead_node; ts_state lex_state; @@ -52,6 +54,8 @@ static const ts_symbol * ts_recover(ts_state state, ts_state *to_state, size_t * static ts_parser ts_parser_make(const char *input) { ts_parser result = { .input = input, + .token_start_position = 0, + .token_end_position = 0, .position = 0, .lookahead_node = NULL, .prev_lookahead_node = NULL, @@ -106,9 +110,17 @@ static void ts_parser_reduce(ts_parser *parser, ts_symbol symbol, int immediate_ } int child_index = 0; + size_t size = 0, offset = 0; ts_tree **children = malloc(child_count * sizeof(ts_tree *)); for (int i = 0; i < immediate_child_count; i++) { ts_tree *child = parser->stack[new_stack_size + i].node; + if (i == 0) { + offset = child->offset; + size = child->size; + } else { + size += child->offset + child->size; + } + if (collapse_flags[i]) { size_t grandchild_count = ts_tree_child_count(child); memcpy(children + child_index, ts_tree_children(child), (grandchild_count * sizeof(ts_tree *))); @@ -120,7 +132,7 @@ static void ts_parser_reduce(ts_parser *parser, ts_symbol symbol, int immediate_ } parser->prev_lookahead_node = parser->lookahead_node; - parser->lookahead_node = ts_tree_make_node(symbol, child_count, children); + parser->lookahead_node = ts_tree_make_node(symbol, child_count, children, size, offset); ts_parser_shrink_stack(parser, new_stack_size); DEBUG_PARSE("reduce: %s, state: %u \n", ts_symbol_names[symbol], ts_parser_parse_state(parser)); } @@ -134,7 +146,10 @@ static void ts_parser_advance(ts_parser *parser, ts_state lex_state) { static void ts_parser_set_lookahead_sym(ts_parser *parser, ts_symbol symbol) { DEBUG_LEX("token: %s \n", ts_symbol_names[symbol]); - parser->lookahead_node = ts_tree_make_leaf(symbol, 0, 0); + size_t size = parser->position - parser->token_start_position; + size_t offset = parser->token_start_position - parser->token_end_position; + parser->lookahead_node = ts_tree_make_leaf(symbol, size, offset); + parser->token_end_position = parser->position; } static ts_tree * ts_parser_tree(ts_parser *parser) { @@ -145,6 +160,7 @@ static ts_tree * ts_parser_tree(ts_parser *parser) { static void ts_parser_skip_whitespace(ts_parser *parser) { while (isspace(ts_parser_lookahead_char(parser))) parser->position++; + parser->token_start_position = parser->position; } static int ts_parser_handle_error(ts_parser *parser, size_t count, const ts_symbol *expected_symbols) { diff --git a/include/tree_sitter/runtime.h b/include/tree_sitter/runtime.h index 6e3923f9..79a560bf 100644 --- a/include/tree_sitter/runtime.h +++ b/include/tree_sitter/runtime.h @@ -14,6 +14,8 @@ static const ts_symbol ts_builtin_sym_end = -2; typedef struct ts_tree { ts_symbol symbol; size_t ref_count; + size_t offset; + size_t size; union { struct { size_t count; @@ -28,7 +30,7 @@ typedef struct ts_tree { } ts_tree; ts_tree * ts_tree_make_leaf(ts_symbol symbol, size_t size, size_t offset); -ts_tree * ts_tree_make_node(ts_symbol symbol, size_t child_count, ts_tree **children); +ts_tree * ts_tree_make_node(ts_symbol symbol, size_t child_count, ts_tree **children, size_t size, size_t offset); ts_tree * ts_tree_make_error(char lookahead_char, size_t expected_input_count, const ts_symbol *expected_inputs, size_t size, size_t offset); void ts_tree_retain(ts_tree *tree); void ts_tree_release(ts_tree *tree); diff --git a/spec/runtime/json_spec.cpp b/spec/runtime/json_spec.cpp index 93858134..1ed90279 100644 --- a/spec/runtime/json_spec.cpp +++ b/spec/runtime/json_spec.cpp @@ -49,6 +49,29 @@ describe("json", []() { AssertThat(string(ts_document_string(doc)), Equals("(value (array (value (number)) (value (number)) (value (number))))")); }); + describe("tracking the positions of AST nodes", [&]() { + it("records the widths and offsets of nodes", [&]() { + ts_document_set_input_string(doc, " [12, 5]"); + + const ts_tree *tree = ts_document_tree(doc); + const ts_tree *array = ts_tree_children(tree)[0]; + const ts_tree *number1 = ts_tree_children(array)[0]; + const ts_tree *number2 = ts_tree_children(array)[1]; + + AssertThat(number1->offset, Equals(0)); + AssertThat(number1->size, Equals(2)); + + AssertThat(number2->offset, Equals(1)); + AssertThat(number2->size, Equals(1)); + + AssertThat(array->offset, Equals(2)); + AssertThat(array->size, Equals(7)); + + AssertThat(tree->offset, Equals(2)); + AssertThat(tree->size, Equals(7)); + }); + }); + describe("errors", [&]() { it("reports errors in the top-level node", [&]() { ts_document_set_input_string(doc, "["); diff --git a/spec/runtime/tree_spec.cpp b/spec/runtime/tree_spec.cpp index 9dd3a455..fa3fc24b 100644 --- a/spec/runtime/tree_spec.cpp +++ b/spec/runtime/tree_spec.cpp @@ -17,7 +17,7 @@ describe("trees", []() { before_each([&]() { tree1 = ts_tree_make_leaf(cat, 0, 0); - parent1 = ts_tree_make_node(dog, 1, tree_array({ tree1 })); + parent1 = ts_tree_make_node(dog, 1, tree_array({ tree1 }), 0, 0); }); after_each([&]() { @@ -30,7 +30,7 @@ describe("trees", []() { ts_tree *tree2 = ts_tree_make_leaf(cat, 0, 0); AssertThat(ts_tree_equals(tree1, tree2), Equals(1)); - ts_tree *parent2 = ts_tree_make_node(dog, 1, tree_array({ tree2 })); + ts_tree *parent2 = ts_tree_make_node(dog, 1, tree_array({ tree2 }), 0, 0); AssertThat(ts_tree_equals(parent1, parent2), Equals(1)); ts_tree_release(tree2); @@ -45,7 +45,7 @@ describe("trees", []() { it("returns false for trees with different children", [&]() { ts_tree *tree2 = ts_tree_make_leaf(pig, 0, 0); - ts_tree *parent2 = ts_tree_make_node(dog, 1, tree_array({ tree2 })); + ts_tree *parent2 = ts_tree_make_node(dog, 1, tree_array({ tree2 }), 0, 0); AssertThat(ts_tree_equals(parent2, parent1), Equals(0)); AssertThat(ts_tree_equals(parent1, parent2), Equals(0)); ts_tree_release(tree2); diff --git a/src/runtime/tree.cpp b/src/runtime/tree.cpp index dd4daf4c..44143283 100644 --- a/src/runtime/tree.cpp +++ b/src/runtime/tree.cpp @@ -9,6 +9,8 @@ static ts_tree * ts_tree_make(ts_symbol symbol, size_t size, size_t offset) { ts_tree *result = (ts_tree *)malloc(sizeof(ts_tree)); result->ref_count = 1; result->symbol = symbol; + result->size = size; + result->offset = offset; return result; } @@ -18,10 +20,10 @@ ts_tree * ts_tree_make_leaf(ts_symbol symbol, size_t size, size_t offset) { return result; } -ts_tree * ts_tree_make_node(ts_symbol symbol, size_t child_count, ts_tree **children) { +ts_tree * ts_tree_make_node(ts_symbol symbol, size_t child_count, ts_tree **children, size_t size, size_t offset) { for (int i = 0; i < child_count; i++) ts_tree_retain(children[i]); - ts_tree *result = ts_tree_make(symbol, 0, 0); + ts_tree *result = ts_tree_make(symbol, size, offset); result->data.children = { .count = child_count, .contents = children }; return result; }