Keep track of AST node sizes and positions
This commit is contained in:
parent
5045d5cccc
commit
72a0c0e09c
5 changed files with 51 additions and 8 deletions
|
|
@ -39,6 +39,8 @@ typedef struct {
|
|||
typedef struct {
|
||||
const char *input;
|
||||
size_t position;
|
||||
size_t token_end_position;
|
||||
size_t token_start_position;
|
||||
ts_tree *lookahead_node;
|
||||
ts_tree *prev_lookahead_node;
|
||||
ts_state lex_state;
|
||||
|
|
@ -52,6 +54,8 @@ static const ts_symbol * ts_recover(ts_state state, ts_state *to_state, size_t *
|
|||
static ts_parser ts_parser_make(const char *input) {
|
||||
ts_parser result = {
|
||||
.input = input,
|
||||
.token_start_position = 0,
|
||||
.token_end_position = 0,
|
||||
.position = 0,
|
||||
.lookahead_node = NULL,
|
||||
.prev_lookahead_node = NULL,
|
||||
|
|
@ -106,9 +110,17 @@ static void ts_parser_reduce(ts_parser *parser, ts_symbol symbol, int immediate_
|
|||
}
|
||||
|
||||
int child_index = 0;
|
||||
size_t size = 0, offset = 0;
|
||||
ts_tree **children = malloc(child_count * sizeof(ts_tree *));
|
||||
for (int i = 0; i < immediate_child_count; i++) {
|
||||
ts_tree *child = parser->stack[new_stack_size + i].node;
|
||||
if (i == 0) {
|
||||
offset = child->offset;
|
||||
size = child->size;
|
||||
} else {
|
||||
size += child->offset + child->size;
|
||||
}
|
||||
|
||||
if (collapse_flags[i]) {
|
||||
size_t grandchild_count = ts_tree_child_count(child);
|
||||
memcpy(children + child_index, ts_tree_children(child), (grandchild_count * sizeof(ts_tree *)));
|
||||
|
|
@ -120,7 +132,7 @@ static void ts_parser_reduce(ts_parser *parser, ts_symbol symbol, int immediate_
|
|||
}
|
||||
|
||||
parser->prev_lookahead_node = parser->lookahead_node;
|
||||
parser->lookahead_node = ts_tree_make_node(symbol, child_count, children);
|
||||
parser->lookahead_node = ts_tree_make_node(symbol, child_count, children, size, offset);
|
||||
ts_parser_shrink_stack(parser, new_stack_size);
|
||||
DEBUG_PARSE("reduce: %s, state: %u \n", ts_symbol_names[symbol], ts_parser_parse_state(parser));
|
||||
}
|
||||
|
|
@ -134,7 +146,10 @@ static void ts_parser_advance(ts_parser *parser, ts_state lex_state) {
|
|||
|
||||
static void ts_parser_set_lookahead_sym(ts_parser *parser, ts_symbol symbol) {
|
||||
DEBUG_LEX("token: %s \n", ts_symbol_names[symbol]);
|
||||
parser->lookahead_node = ts_tree_make_leaf(symbol, 0, 0);
|
||||
size_t size = parser->position - parser->token_start_position;
|
||||
size_t offset = parser->token_start_position - parser->token_end_position;
|
||||
parser->lookahead_node = ts_tree_make_leaf(symbol, size, offset);
|
||||
parser->token_end_position = parser->position;
|
||||
}
|
||||
|
||||
static ts_tree * ts_parser_tree(ts_parser *parser) {
|
||||
|
|
@ -145,6 +160,7 @@ static ts_tree * ts_parser_tree(ts_parser *parser) {
|
|||
static void ts_parser_skip_whitespace(ts_parser *parser) {
|
||||
while (isspace(ts_parser_lookahead_char(parser)))
|
||||
parser->position++;
|
||||
parser->token_start_position = parser->position;
|
||||
}
|
||||
|
||||
static int ts_parser_handle_error(ts_parser *parser, size_t count, const ts_symbol *expected_symbols) {
|
||||
|
|
|
|||
|
|
@ -14,6 +14,8 @@ static const ts_symbol ts_builtin_sym_end = -2;
|
|||
typedef struct ts_tree {
|
||||
ts_symbol symbol;
|
||||
size_t ref_count;
|
||||
size_t offset;
|
||||
size_t size;
|
||||
union {
|
||||
struct {
|
||||
size_t count;
|
||||
|
|
@ -28,7 +30,7 @@ typedef struct ts_tree {
|
|||
} ts_tree;
|
||||
|
||||
ts_tree * ts_tree_make_leaf(ts_symbol symbol, size_t size, size_t offset);
|
||||
ts_tree * ts_tree_make_node(ts_symbol symbol, size_t child_count, ts_tree **children);
|
||||
ts_tree * ts_tree_make_node(ts_symbol symbol, size_t child_count, ts_tree **children, size_t size, size_t offset);
|
||||
ts_tree * ts_tree_make_error(char lookahead_char, size_t expected_input_count, const ts_symbol *expected_inputs, size_t size, size_t offset);
|
||||
void ts_tree_retain(ts_tree *tree);
|
||||
void ts_tree_release(ts_tree *tree);
|
||||
|
|
|
|||
|
|
@ -49,6 +49,29 @@ describe("json", []() {
|
|||
AssertThat(string(ts_document_string(doc)), Equals("(value (array (value (number)) (value (number)) (value (number))))"));
|
||||
});
|
||||
|
||||
describe("tracking the positions of AST nodes", [&]() {
|
||||
it("records the widths and offsets of nodes", [&]() {
|
||||
ts_document_set_input_string(doc, " [12, 5]");
|
||||
|
||||
const ts_tree *tree = ts_document_tree(doc);
|
||||
const ts_tree *array = ts_tree_children(tree)[0];
|
||||
const ts_tree *number1 = ts_tree_children(array)[0];
|
||||
const ts_tree *number2 = ts_tree_children(array)[1];
|
||||
|
||||
AssertThat(number1->offset, Equals(0));
|
||||
AssertThat(number1->size, Equals(2));
|
||||
|
||||
AssertThat(number2->offset, Equals(1));
|
||||
AssertThat(number2->size, Equals(1));
|
||||
|
||||
AssertThat(array->offset, Equals(2));
|
||||
AssertThat(array->size, Equals(7));
|
||||
|
||||
AssertThat(tree->offset, Equals(2));
|
||||
AssertThat(tree->size, Equals(7));
|
||||
});
|
||||
});
|
||||
|
||||
describe("errors", [&]() {
|
||||
it("reports errors in the top-level node", [&]() {
|
||||
ts_document_set_input_string(doc, "[");
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ describe("trees", []() {
|
|||
|
||||
before_each([&]() {
|
||||
tree1 = ts_tree_make_leaf(cat, 0, 0);
|
||||
parent1 = ts_tree_make_node(dog, 1, tree_array({ tree1 }));
|
||||
parent1 = ts_tree_make_node(dog, 1, tree_array({ tree1 }), 0, 0);
|
||||
});
|
||||
|
||||
after_each([&]() {
|
||||
|
|
@ -30,7 +30,7 @@ describe("trees", []() {
|
|||
ts_tree *tree2 = ts_tree_make_leaf(cat, 0, 0);
|
||||
AssertThat(ts_tree_equals(tree1, tree2), Equals(1));
|
||||
|
||||
ts_tree *parent2 = ts_tree_make_node(dog, 1, tree_array({ tree2 }));
|
||||
ts_tree *parent2 = ts_tree_make_node(dog, 1, tree_array({ tree2 }), 0, 0);
|
||||
AssertThat(ts_tree_equals(parent1, parent2), Equals(1));
|
||||
|
||||
ts_tree_release(tree2);
|
||||
|
|
@ -45,7 +45,7 @@ describe("trees", []() {
|
|||
|
||||
it("returns false for trees with different children", [&]() {
|
||||
ts_tree *tree2 = ts_tree_make_leaf(pig, 0, 0);
|
||||
ts_tree *parent2 = ts_tree_make_node(dog, 1, tree_array({ tree2 }));
|
||||
ts_tree *parent2 = ts_tree_make_node(dog, 1, tree_array({ tree2 }), 0, 0);
|
||||
AssertThat(ts_tree_equals(parent2, parent1), Equals(0));
|
||||
AssertThat(ts_tree_equals(parent1, parent2), Equals(0));
|
||||
ts_tree_release(tree2);
|
||||
|
|
|
|||
|
|
@ -9,6 +9,8 @@ static ts_tree * ts_tree_make(ts_symbol symbol, size_t size, size_t offset) {
|
|||
ts_tree *result = (ts_tree *)malloc(sizeof(ts_tree));
|
||||
result->ref_count = 1;
|
||||
result->symbol = symbol;
|
||||
result->size = size;
|
||||
result->offset = offset;
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
@ -18,10 +20,10 @@ ts_tree * ts_tree_make_leaf(ts_symbol symbol, size_t size, size_t offset) {
|
|||
return result;
|
||||
}
|
||||
|
||||
ts_tree * ts_tree_make_node(ts_symbol symbol, size_t child_count, ts_tree **children) {
|
||||
ts_tree * ts_tree_make_node(ts_symbol symbol, size_t child_count, ts_tree **children, size_t size, size_t offset) {
|
||||
for (int i = 0; i < child_count; i++)
|
||||
ts_tree_retain(children[i]);
|
||||
ts_tree *result = ts_tree_make(symbol, 0, 0);
|
||||
ts_tree *result = ts_tree_make(symbol, size, offset);
|
||||
result->data.children = { .count = child_count, .contents = children };
|
||||
return result;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue