diff --git a/include/tree_sitter/parser.h b/include/tree_sitter/parser.h index 0e772da8..402d3980 100644 --- a/include/tree_sitter/parser.h +++ b/include/tree_sitter/parser.h @@ -16,12 +16,14 @@ typedef struct TSTree TSTree; typedef struct TSLexer { TSInput input; int debug; + const char *chunk; size_t chunk_start; size_t chunk_size; - size_t position_in_chunk; - size_t token_end_position; - size_t token_start_position; + + TSLength current_position; + TSLength token_end_position; + TSLength token_start_position; size_t lookahead_size; int32_t lookahead; @@ -30,16 +32,12 @@ typedef struct TSLexer { int (*advance_fn)(struct TSLexer *); } TSLexer; -static inline size_t ts_lexer_position(const TSLexer *lexer) { - return lexer->chunk_start + lexer->position_in_chunk; -} - static inline int32_t ts_lexer_lookahead_char(const TSLexer *lexer) { return lexer->lookahead; } static inline void ts_lexer_start_token(TSLexer *lexer) { - lexer->token_start_position = ts_lexer_position(lexer); + lexer->token_start_position = lexer->current_position; } static inline int ts_lexer_advance(TSLexer *lexer) { diff --git a/include/tree_sitter/runtime.h b/include/tree_sitter/runtime.h index 502ffd4b..283707fe 100644 --- a/include/tree_sitter/runtime.h +++ b/include/tree_sitter/runtime.h @@ -7,10 +7,15 @@ extern "C" { #include +typedef struct { + size_t bytes; + size_t chars; +} TSLength; + typedef struct { void *data; const char *(*read_fn)(void *data, size_t *bytes_read); - int (*seek_fn)(void *data, size_t position); + int (*seek_fn)(void *data, TSLength position); void (*release_fn)(void *data); } TSInput; @@ -24,8 +29,8 @@ typedef unsigned short TSSymbol; typedef struct TSLanguage TSLanguage; typedef struct TSNode TSNode; -size_t ts_node_pos(const TSNode *); -size_t ts_node_size(const TSNode *); +TSLength ts_node_pos(const TSNode *); +TSLength ts_node_size(const TSNode *); TSSymbol ts_node_sym(const TSNode *); TSNode *ts_node_child(TSNode *, size_t); size_t ts_node_child_count(const TSNode *); diff --git a/spec/runtime/node_spec.cc b/spec/runtime/node_spec.cc index c0fa30d2..2506b479 100644 --- a/spec/runtime/node_spec.cc +++ b/spec/runtime/node_spec.cc @@ -11,7 +11,6 @@ describe("Node", []() { before_each([&]() { document = ts_document_make(); ts_document_set_language(document, ts_language_json()); - ts_document_set_input_string(document, " [123, false, {\"x\": null}]"); root = ts_document_root_node(document); AssertThat(ts_node_string(root), Equals( @@ -47,17 +46,17 @@ describe("Node", []() { AssertThat(ts_node_name(child2), Equals("false")); AssertThat(ts_node_name(child3), Equals("object")); - AssertThat(ts_node_pos(parent), Equals(2)); - AssertThat(ts_node_size(parent), Equals(25)); + AssertThat(ts_node_pos(parent).bytes, Equals(2)); + AssertThat(ts_node_size(parent).bytes, Equals(25)); - AssertThat(ts_node_pos(child1), Equals(3)); - AssertThat(ts_node_size(child1), Equals(3)); + AssertThat(ts_node_pos(child1).bytes, Equals(3)); + AssertThat(ts_node_size(child1).bytes, Equals(3)); - AssertThat(ts_node_pos(child2), Equals(8)); - AssertThat(ts_node_size(child2), Equals(5)); + AssertThat(ts_node_pos(child2).bytes, Equals(8)); + AssertThat(ts_node_size(child2).bytes, Equals(5)); - AssertThat(ts_node_pos(child3), Equals(15)); - AssertThat(ts_node_size(child3), Equals(11)); + AssertThat(ts_node_pos(child3).bytes, Equals(15)); + AssertThat(ts_node_size(child3).bytes, Equals(11)); ts_node_release(parent); ts_node_release(child1); @@ -110,8 +109,8 @@ describe("Node", []() { it("returns that leaf node", [&]() { TSNode *leaf = ts_node_find_for_range(root, 16, 18); AssertThat(ts_node_name(leaf), Equals("string")); - AssertThat(ts_node_size(leaf), Equals(3)); - AssertThat(ts_node_pos(leaf), Equals(16)); + AssertThat(ts_node_size(leaf).bytes, Equals(3)); + AssertThat(ts_node_pos(leaf).bytes, Equals(16)); ts_node_release(leaf); }); }); @@ -120,14 +119,14 @@ describe("Node", []() { it("returns that leaf node", [&]() { TSNode *leaf = ts_node_find_for_range(root, 16, 17); AssertThat(ts_node_name(leaf), Equals("string")); - AssertThat(ts_node_size(leaf), Equals(3)); - AssertThat(ts_node_pos(leaf), Equals(16)); + AssertThat(ts_node_size(leaf).bytes, Equals(3)); + AssertThat(ts_node_pos(leaf).bytes, Equals(16)); ts_node_release(leaf); leaf = ts_node_find_for_range(root, 17, 18); AssertThat(ts_node_name(leaf), Equals("string")); - AssertThat(ts_node_size(leaf), Equals(3)); - AssertThat(ts_node_pos(leaf), Equals(16)); + AssertThat(ts_node_size(leaf).bytes, Equals(3)); + AssertThat(ts_node_pos(leaf).bytes, Equals(16)); ts_node_release(leaf); }); }); @@ -136,8 +135,8 @@ describe("Node", []() { it("returns the smallest node that does span the range", [&]() { TSNode *node = ts_node_find_for_range(root, 16, 19); AssertThat(ts_node_name(node), Equals("object")); - AssertThat(ts_node_size(node), Equals(11)); - AssertThat(ts_node_pos(node), Equals(15)); + AssertThat(ts_node_size(node).bytes, Equals(11)); + AssertThat(ts_node_pos(node).bytes, Equals(15)); ts_node_release(node); }); }); @@ -147,8 +146,8 @@ describe("Node", []() { it("finds the smallest node that spans the given position", [&]() { TSNode *node = ts_node_find_for_pos(root, 10); AssertThat(ts_node_name(node), Equals("false")); - AssertThat(ts_node_pos(node), Equals(8)); - AssertThat(ts_node_size(node), Equals(5)); + AssertThat(ts_node_pos(node).bytes, Equals(8)); + AssertThat(ts_node_size(node).bytes, Equals(5)); ts_node_release(node); }); }); diff --git a/spec/runtime/parser_spec.cc b/spec/runtime/parser_spec.cc index 05183b6a..9fbada6d 100644 --- a/spec/runtime/parser_spec.cc +++ b/spec/runtime/parser_spec.cc @@ -49,7 +49,7 @@ describe("Parser", [&]() { describe("when the error occurs at the beginning of a token", [&]() { it("computes the error node's size and position correctly", [&]() { - set_text(" [123, @@@@@, true]"); + set_text(" [123, @@@@@, true]"); AssertThat(ts_node_string(root), Equals( "(DOCUMENT (array (number) (ERROR '@') (true)))")); @@ -59,11 +59,11 @@ describe("Parser", [&]() { TSNode *last = ts_node_child(array, 2); AssertThat(ts_node_name(error), Equals("error")); - AssertThat(ts_node_pos(error), Equals(string(" [123, ").length())) - AssertThat(ts_node_size(error), Equals(string("@@@@@").length())) + AssertThat(ts_node_pos(error).bytes, Equals(strlen(" [123, "))) + AssertThat(ts_node_size(error).bytes, Equals(strlen("@@@@@"))) AssertThat(ts_node_name(last), Equals("true")); - AssertThat(ts_node_pos(last), Equals(string(" [123, @@@@@, ").length())) + AssertThat(ts_node_pos(last).bytes, Equals(strlen(" [123, @@@@@, "))) ts_node_release(last); ts_node_release(error); @@ -83,11 +83,11 @@ describe("Parser", [&]() { TSNode *last = ts_node_child(array, 2); AssertThat(ts_node_name(error), Equals("error")); - AssertThat(ts_node_pos(error), Equals(string(" [123, ").length())) - AssertThat(ts_node_size(error), Equals(string("faaaaalse").length())) + AssertThat(ts_node_pos(error).bytes, Equals(strlen(" [123, "))) + AssertThat(ts_node_size(error).bytes, Equals(strlen("faaaaalse"))) AssertThat(ts_node_name(last), Equals("true")); - AssertThat(ts_node_pos(last), Equals(string(" [123, faaaaalse, ").length())) + AssertThat(ts_node_pos(last).bytes, Equals(strlen(" [123, faaaaalse, "))); ts_node_release(last); ts_node_release(error); @@ -107,11 +107,11 @@ describe("Parser", [&]() { TSNode *last = ts_node_child(array, 2); AssertThat(ts_node_name(error), Equals("error")); - AssertThat(ts_node_pos(error), Equals(string(" [123, ").length())) - AssertThat(ts_node_size(error), Equals(string("true false").length())) + AssertThat(ts_node_pos(error).bytes, Equals(strlen(" [123, "))); + AssertThat(ts_node_size(error).bytes, Equals(strlen("true false"))); AssertThat(ts_node_name(last), Equals("true")); - AssertThat(ts_node_pos(last), Equals(string(" [123, true false, ").length())) + AssertThat(ts_node_pos(last).bytes, Equals(strlen(" [123, true false, "))); ts_node_release(last); ts_node_release(error); @@ -131,11 +131,11 @@ describe("Parser", [&]() { TSNode *last = ts_node_child(array, 2); AssertThat(ts_node_name(error), Equals("error")); - AssertThat(ts_node_pos(error), Equals(string(" [123, ").length())) - AssertThat(ts_node_size(error), Equals(0)) + AssertThat(ts_node_pos(error).bytes, Equals(strlen(" [123, "))); + AssertThat(ts_node_size(error).bytes, Equals(0)) AssertThat(ts_node_name(last), Equals("true")); - AssertThat(ts_node_pos(last), Equals(string(" [123, , ").length())) + AssertThat(ts_node_pos(last).bytes, Equals(strlen(" [123, , "))); ts_node_release(last); ts_node_release(error); @@ -236,7 +236,7 @@ describe("Parser", [&]() { TSNode *node = ts_node_find_for_pos(root, 1); AssertThat(ts_node_name(node), Equals("variable")); - AssertThat(ts_node_size(node), Equals(strlen("abXYZc"))); + AssertThat(ts_node_size(node).bytes, Equals(strlen("abXYZc"))); ts_node_release(node); }); }); @@ -257,7 +257,7 @@ describe("Parser", [&]() { TSNode *node = ts_node_find_for_pos(root, 1); AssertThat(ts_node_name(node), Equals("variable")); - AssertThat(ts_node_size(node), Equals(strlen("abcXYZ"))); + AssertThat(ts_node_size(node).bytes, Equals(strlen("abcXYZ"))); ts_node_release(node); }); }); @@ -338,7 +338,7 @@ describe("Parser", [&]() { TSNode *expression = ts_node_child(root, 0); TSNode *comment = ts_node_child(expression, 1); - AssertThat(ts_node_size(comment), Equals(strlen("# this is a comment"))); + AssertThat(ts_node_size(comment).bytes, Equals(strlen("# this is a comment"))); ts_node_release(expression); ts_node_release(comment); diff --git a/spec/runtime/stack_spec.cc b/spec/runtime/stack_spec.cc index d08e55ce..255d2203 100644 --- a/spec/runtime/stack_spec.cc +++ b/spec/runtime/stack_spec.cc @@ -27,7 +27,12 @@ describe("stacks", [&]() { TSTree *node1; before_each([&]() { - node1 = ts_tree_make_leaf(sym1, 5, 1, 0); + node1 = ts_tree_make_leaf( + sym1, + (TSLength) { 0, 0 }, + (TSLength) { 1, 1 }, + 0); + ts_stack_push(&stack, 5, node1); }); diff --git a/spec/runtime/tree_spec.cc b/spec/runtime/tree_spec.cc index 1956e042..ca38cedf 100644 --- a/spec/runtime/tree_spec.cc +++ b/spec/runtime/tree_spec.cc @@ -15,9 +15,23 @@ describe("Tree", []() { TSTree *tree1, *tree2, *parent1; before_each([&]() { - tree1 = ts_tree_make_leaf(cat, 5, 2, 0); - tree2 = ts_tree_make_leaf(cat, 3, 1, 0); - parent1 = ts_tree_make_node(dog, 2, tree_array({ tree1, tree2, }), 0); + tree1 = ts_tree_make_leaf( + cat, + (TSLength) { .bytes = 5, .chars = 4 }, + (TSLength) { .bytes = 2, .chars = 1 }, + 0); + + tree2 = ts_tree_make_leaf( + cat, + (TSLength) { .bytes = 3, .chars = 3 }, + (TSLength) { .bytes = 1, .chars = 1 }, + 0); + + parent1 = ts_tree_make_node( + dog, + 2, + tree_array({ tree1, tree2, }), + 0); }); after_each([&]() { @@ -28,11 +42,15 @@ describe("Tree", []() { describe("building a parent node", [&]() { it("computes its size based on its child nodes", [&]() { - AssertThat(parent1->size, Equals(9)); + AssertThat(parent1->size.bytes, Equals( + tree1->size.bytes + + tree2->padding.bytes + tree2->size.bytes)); + AssertThat(parent1->size.chars, Equals( + tree1->size.chars + + tree2->padding.chars + tree2->size.chars)); }); it("computes its padding based on its first child", [&]() { - AssertThat(parent1->padding, Equals(2)); + AssertThat(parent1->padding.bytes, Equals(tree1->padding.bytes)); + AssertThat(parent1->padding.chars, Equals(tree1->padding.chars)); }); it("computes the offset of each child node", [&]() { @@ -40,11 +58,16 @@ describe("Tree", []() { TSTreeChild *children = ts_tree_visible_children(parent1, &count); AssertThat(count, Equals(2)); + AssertThat(children[0].tree, Equals(tree1)); - AssertThat(children[0].offset, Equals(0)); + AssertThat(children[0].offset.bytes, Equals(0)); + AssertThat(children[0].offset.chars, Equals(0)); + AssertThat(children[1].tree, Equals(tree2)); - AssertThat(children[1].offset, Equals( - tree1->size + tree2->padding)); + AssertThat(children[1].offset.bytes, Equals( + tree1->size.bytes + tree2->padding.bytes)); + AssertThat(children[1].offset.chars, Equals( + tree1->size.chars + tree2->padding.chars)); }); describe("when one of the child nodes is hidden", [&]() { @@ -52,7 +75,11 @@ describe("Tree", []() { before_each([&]() { parent1->options = TSTreeOptionsHidden; - tree3 = ts_tree_make_leaf(cat, 8, 5, 0); + tree3 = ts_tree_make_leaf( + cat, + (TSLength) { .bytes = 8, .chars = 6 }, + (TSLength) { .bytes = 5, .chars = 3 }, + 0); grandparent = ts_tree_make_node(pig, 2, tree_array({ parent1, tree3, @@ -70,27 +97,46 @@ describe("Tree", []() { AssertThat(count, Equals(3)); AssertThat(children[0].tree, Equals(tree1)); - AssertThat(children[0].offset, Equals(0)); + AssertThat(children[0].offset.bytes, Equals(0)); + AssertThat(children[0].offset.chars, Equals(0)); + AssertThat(children[1].tree, Equals(tree2)); - AssertThat(children[1].offset, Equals( - tree1->size + tree2->padding)); + AssertThat(children[1].offset.bytes, Equals( + tree1->size.bytes + tree2->padding.bytes)); + AssertThat(children[1].offset.chars, Equals( + tree1->size.chars + tree2->padding.chars)); + AssertThat(children[2].tree, Equals(tree3)); - AssertThat(children[2].offset, Equals( - tree1->size + tree2->padding + tree2->size + tree3->padding)); + AssertThat(children[2].offset.bytes, Equals( + tree1->size.bytes + tree2->padding.bytes + tree2->size.bytes + tree3->padding.bytes)); + AssertThat(children[2].offset.chars, Equals( + tree1->size.chars + tree2->padding.chars + tree2->size.chars + tree3->padding.chars)); }); }); }); describe("equality", [&]() { it("returns true for identical trees", [&]() { - TSTree *tree1_copy = ts_tree_make_leaf(cat, 5, 2, 0); + TSTree *tree1_copy = ts_tree_make_leaf( + cat, + (TSLength) { .bytes = 5, .chars = 4 }, + (TSLength) { .bytes = 2, .chars = 1 }, + 0); + AssertThat(ts_tree_equals(tree1, tree1_copy), Equals(1)); - TSTree *tree2_copy = ts_tree_make_leaf(cat, 3, 1, 0); + + TSTree *tree2_copy = ts_tree_make_leaf( + cat, + (TSLength) { .bytes = 3, .chars = 3 }, + (TSLength) { .bytes = 1, .chars = 1 }, + 0); + AssertThat(ts_tree_equals(tree2, tree2_copy), Equals(1)); TSTree *parent2 = ts_tree_make_node(dog, 2, tree_array({ tree1_copy, tree2_copy, }), 0); + AssertThat(ts_tree_equals(parent1, parent2), Equals(1)); ts_tree_release(tree1_copy); @@ -99,13 +145,23 @@ describe("Tree", []() { }); it("returns false for trees with different symbols", [&]() { - TSTree *different_tree = ts_tree_make_leaf(pig, 0, 0, 0); + TSTree *different_tree = ts_tree_make_leaf( + pig, + (TSLength) { .bytes = 5, .chars = 4 }, + (TSLength) { .bytes = 2, .chars = 1 }, + 0); + AssertThat(ts_tree_equals(tree1, different_tree), Equals(0)); ts_tree_release(different_tree); }); it("returns false for trees with different children", [&]() { - TSTree *different_tree = ts_tree_make_leaf(pig, 0, 0, 0); + TSTree *different_tree = ts_tree_make_leaf( + pig, + (TSLength) { .bytes = 5, .chars = 4 }, + (TSLength) { .bytes = 2, .chars = 1 }, + 0); + TSTree *different_parent = ts_tree_make_node(dog, 2, tree_array({ different_tree, different_tree, }), 0); diff --git a/src/runtime/length.h b/src/runtime/length.h new file mode 100644 index 00000000..c5c11db7 --- /dev/null +++ b/src/runtime/length.h @@ -0,0 +1,29 @@ +#ifndef RUNTIME_LENGTH_H_ +#define RUNTIME_LENGTH_H_ + +#include "tree_sitter/runtime.h" +#include + +static inline TSLength ts_length_add(TSLength len1, TSLength len2) { + return (TSLength) { + .bytes = len1.bytes + len2.bytes, + .chars = len1.chars + len2.chars, + }; +} + +static inline TSLength ts_length_sub(TSLength len1, TSLength len2) { + return (TSLength) { + .bytes = len1.bytes - len2.bytes, + .chars = len1.chars - len2.chars, + }; +} + +static inline TSLength ts_length_zero() { + return (TSLength) { 0, 0 }; +} + +static inline bool ts_length_eq(TSLength len1, TSLength len2) { + return len1.bytes == len2.bytes && len1.chars == len2.chars; +} + +#endif diff --git a/src/runtime/lexer.c b/src/runtime/lexer.c index c5ab6089..b2544b83 100644 --- a/src/runtime/lexer.c +++ b/src/runtime/lexer.c @@ -1,6 +1,7 @@ #include "runtime/lexer.h" #include "tree_sitter/parser.h" #include "runtime/tree.h" +#include "runtime/length.h" #include "utf8proc.h" static int advance(TSLexer *lexer) { @@ -13,9 +14,12 @@ static int advance(TSLexer *lexer) { return 0; } - if (lexer->position_in_chunk + 1 >= lexer->chunk_size) { + if (lexer->chunk_start + lexer->chunk_size <= lexer->current_position.bytes + 1) { + if (lexer->lookahead_size) { + lexer->current_position.bytes += lexer->lookahead_size; + lexer->current_position.chars += 1; + } lexer->lookahead_size = 0; - lexer->position_in_chunk = 0; lexer->chunk_start += lexer->chunk_size; lexer->chunk = lexer->input.read_fn(lexer->input.data, &lexer->chunk_size); } @@ -24,10 +28,14 @@ static int advance(TSLexer *lexer) { lexer->lookahead_size = 0; lexer->chunk = empty_chunk; } else { - lexer->position_in_chunk += lexer->lookahead_size; + if (lexer->lookahead_size) { + lexer->current_position.bytes += lexer->lookahead_size; + lexer->current_position.chars += 1; + } + lexer->lookahead_size = utf8proc_iterate( - (const uint8_t *)lexer->chunk + lexer->position_in_chunk, - lexer->chunk_size - lexer->position_in_chunk, + (const uint8_t *)lexer->chunk + (lexer->current_position.bytes - lexer->chunk_start), + lexer->chunk_start + lexer->chunk_size - lexer->current_position.bytes + 1, &lexer->lookahead); } @@ -35,10 +43,9 @@ static int advance(TSLexer *lexer) { } static TSTree *accept(TSLexer *lexer, TSSymbol symbol, int is_hidden) { - size_t current_position = ts_lexer_position(lexer); - size_t size = current_position - lexer->token_start_position; - size_t padding = lexer->token_start_position - lexer->token_end_position; - lexer->token_end_position = current_position; + TSLength size = ts_length_sub(lexer->current_position, lexer->token_start_position); + TSLength padding = ts_length_sub(lexer->token_start_position, lexer->token_end_position); + lexer->token_end_position = lexer->current_position; return (symbol == ts_builtin_sym_error) ? ts_tree_make_error(size, padding, ts_lexer_lookahead_char(lexer)) : ts_tree_make_leaf(symbol, size, padding, is_hidden); @@ -61,9 +68,9 @@ void ts_lexer_reset(TSLexer *lexer) { lexer->chunk = NULL; lexer->chunk_start = 0; lexer->chunk_size = 0; - lexer->position_in_chunk = 0; - lexer->token_start_position = 0; - lexer->token_end_position = 0; + lexer->current_position = (TSLength) {}; + lexer->token_start_position = (TSLength) {}; + lexer->token_end_position = (TSLength) {}; lexer->lookahead = 0; lexer->lookahead_size = 0; } diff --git a/src/runtime/node.c b/src/runtime/node.c index 6670fdcb..161b9469 100644 --- a/src/runtime/node.c +++ b/src/runtime/node.c @@ -1,8 +1,9 @@ #include "runtime/node.h" +#include "runtime/length.h" #include "runtime/tree.h" TSNode *ts_node_make(const TSTree *tree, TSNode *parent, size_t index, - size_t position, const char **names) { + TSLength position, const char **names) { if (parent) ts_node_retain(parent); TSNode *result = malloc(sizeof(TSNode)); @@ -30,9 +31,9 @@ void ts_node_release(TSNode *node) { } } -size_t ts_node_pos(const TSNode *node) { return node->position; } +TSLength ts_node_pos(const TSNode *node) { return node->position; } -size_t ts_node_size(const TSNode *node) { return node->content->size; } +TSLength ts_node_size(const TSNode *node) { return node->content->size; } int ts_node_eq(const TSNode *left, const TSNode *right) { return ts_tree_equals(left->content, right->content); @@ -73,7 +74,7 @@ TSNode *ts_node_child(TSNode *parent, size_t i) { TSTreeChild *children = ts_tree_visible_children(parent->content, &count); if (i >= count) return NULL; - size_t pos = parent->position + children[i].offset; + TSLength pos = ts_length_add(parent->position, children[i].offset); return ts_node_make(children[i].tree, parent, i, pos, parent->names); } @@ -82,10 +83,10 @@ TSNode *ts_node_find_for_range(TSNode *parent, size_t min, size_t max) { TSTreeChild *children = ts_tree_visible_children(parent->content, &count); for (size_t i = 0; i < count; i++) { TSTreeChild child = children[i]; - size_t pos = parent->position + child.offset; - if (pos > min) + TSLength pos = ts_length_add(parent->position, child.offset); + if (pos.chars > min) break; - if (pos + child.tree->size > max) { + if (pos.chars + child.tree->size.chars > max) { TSNode *node = ts_node_make(child.tree, parent, i, pos, parent->names); TSNode *result = ts_node_find_for_range(node, min, max); ts_node_release(node); diff --git a/src/runtime/node.h b/src/runtime/node.h index ffeaf166..467b89a3 100644 --- a/src/runtime/node.h +++ b/src/runtime/node.h @@ -6,15 +6,15 @@ struct TSNode { size_t ref_count; - size_t position; size_t index; + TSLength position; const TSTree *content; struct TSNode *parent; const char **names; }; TSNode *ts_node_make(const TSTree *tree, TSNode *parent, size_t index, - size_t position, const char **names); + TSLength position, const char **names); TSNode *ts_node_make_root(const TSTree *tree, const char **names); #endif diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 0acde664..001502b7 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -5,6 +5,7 @@ #include "runtime/lexer.h" #include "runtime/stack.h" #include "runtime/parser.h" +#include "runtime/length.h" #define DEBUG_PARSE(...) \ if (parser->debug) { \ @@ -17,14 +18,14 @@ static TSParseAction action_for(const TSLanguage *lang, TSStateId state, return (lang->parse_table + (state * lang->symbol_count))[sym]; } -static size_t breakdown_stack(TSParser *parser, TSInputEdit *edit) { +static TSLength breakdown_stack(TSParser *parser, TSInputEdit *edit) { if (!edit) { ts_stack_shrink(&parser->stack, 0); - return 0; + return (TSLength) {}; } TSStack *stack = &parser->stack; - size_t position = ts_stack_right_position(stack); + TSLength position = ts_stack_right_position(stack); for (;;) { TSTree *node = ts_stack_top_node(stack); @@ -33,23 +34,23 @@ static size_t breakdown_stack(TSParser *parser, TSInputEdit *edit) { size_t child_count; TSTree **children = ts_tree_children(node, &child_count); - if (position < edit->position && !children) + if (position.chars < edit->position && !children) break; stack->size--; - position -= ts_tree_total_size(node); + position = ts_length_sub(position, ts_tree_total_size(node)); DEBUG_PARSE("POP %s %u", parser->language->symbol_names[node->symbol], ts_stack_top_state(stack)); - for (size_t i = 0; i < child_count && position < edit->position; i++) { + for (size_t i = 0; i < child_count && position.chars < edit->position; i++) { TSTree *child = children[i]; TSStateId state = ts_stack_top_state(stack); TSParseAction action = action_for(parser->language, state, child->symbol); TSStateId next_state = action.type == TSParseActionTypeShift ? action.data.to_state : state; ts_stack_push(stack, next_state, child); - position += ts_tree_total_size(child); + position = ts_length_add(position, ts_tree_total_size(child)); DEBUG_PARSE("PUT BACK %s %u", parser->language->symbol_names[child->symbol], next_state); @@ -58,7 +59,7 @@ static size_t breakdown_stack(TSParser *parser, TSInputEdit *edit) { ts_tree_release(node); } - DEBUG_PARSE("RESUME %lu", position); + DEBUG_PARSE("RESUME %lu", position.bytes); return position; } @@ -129,7 +130,6 @@ static void lex(TSParser *parser, TSStateId lex_state) { static int handle_error(TSParser *parser) { TSTree *error = parser->lookahead; ts_tree_retain(error); - size_t last_token_end = parser->lexer.token_end_position; for (;;) { @@ -137,7 +137,6 @@ static int handle_error(TSParser *parser) { * Unwind the parse stack until a state is found in which an error is * expected and the current lookahead token is expected afterwards. */ - size_t error_start = last_token_end; TS_STACK_FROM_TOP(parser->stack, entry, i) { TSParseAction action_on_error = action_for(parser->language, entry->state, ts_builtin_sym_error); @@ -149,16 +148,17 @@ static int handle_error(TSParser *parser) { if (action_after_error.type != TSParseActionTypeError) { DEBUG_PARSE("RECOVER %u", state_after_error); - error->size += ts_lexer_position(&parser->lexer) - 1 - error_start; ts_stack_shrink(&parser->stack, i + 1); + error->size = ts_length_sub( + ts_length_sub( + parser->lexer.token_start_position, + ts_stack_right_position(&parser->stack)), + error->padding); ts_stack_push(&parser->stack, state_after_error, error); ts_tree_release(error); return 1; } } - - TSTree *removed_tree = entry->node; - error_start -= ts_tree_total_size(removed_tree); } /* @@ -166,11 +166,11 @@ static int handle_error(TSParser *parser) { * current lookahead token, advance to the next token. If no characters * were consumed, advance the lexer to the next character. */ - size_t prev_position = ts_lexer_position(&parser->lexer); + TSLength prev_position = parser->lexer.current_position; DEBUG_PARSE("LEX AGAIN"); lex(parser, ts_lex_state_error); - parser->lookahead->padding = 0; - if (ts_lexer_position(&parser->lexer) == prev_position) + parser->lookahead->padding = ts_length_zero(); + if (ts_length_eq(parser->lexer.current_position, prev_position)) if (!ts_lexer_advance(&parser->lexer)) { DEBUG_PARSE("FAIL TO RECOVER"); ts_stack_push(&parser->stack, 0, error); @@ -182,7 +182,7 @@ static int handle_error(TSParser *parser) { static TSTree *get_root(TSParser *parser) { if (parser->stack.size == 0) - ts_stack_push(&parser->stack, 0, ts_tree_make_error(0, 0, 0)); + ts_stack_push(&parser->stack, 0, ts_tree_make_error(ts_length_zero(), ts_length_zero(), 0)); reduce(parser, ts_builtin_sym_document, parser->stack.size); parser->lookahead->options = 0; diff --git a/src/runtime/stack.c b/src/runtime/stack.c index 4a66d06c..6795c00d 100644 --- a/src/runtime/stack.c +++ b/src/runtime/stack.c @@ -1,6 +1,7 @@ #include "tree_sitter/parser.h" #include "runtime/tree.h" #include "runtime/stack.h" +#include "runtime/length.h" static size_t INITIAL_SIZE = 100; static TSStateId INITIAL_STATE = 0; @@ -46,11 +47,11 @@ void ts_stack_shrink(TSStack *stack, size_t new_size) { stack->size = new_size; } -size_t ts_stack_right_position(const TSStack *stack) { - size_t result = 0; +TSLength ts_stack_right_position(const TSStack *stack) { + TSLength result = {}; for (size_t i = 0; i < stack->size; i++) { TSTree *node = stack->entries[i].node; - result += ts_tree_total_size(node); + result = ts_length_add(result, ts_tree_total_size(node)); } return result; } diff --git a/src/runtime/stack.h b/src/runtime/stack.h index 8dc814ed..c485e6d6 100644 --- a/src/runtime/stack.h +++ b/src/runtime/stack.h @@ -24,7 +24,7 @@ void ts_stack_shrink(TSStack *stack, size_t new_size); void ts_stack_push(TSStack *stack, TSStateId state, TSTree *node); TSStateId ts_stack_top_state(const TSStack *stack); TSTree *ts_stack_top_node(const TSStack *stack); -size_t ts_stack_right_position(const TSStack *stack); +TSLength ts_stack_right_position(const TSStack *stack); #define TS_STACK_FROM_TOP(stack, entry, index) \ size_t index = stack.size - 1; \ diff --git a/src/runtime/string_input.c b/src/runtime/string_input.c index f55ac36d..ac8d0ba9 100644 --- a/src/runtime/string_input.c +++ b/src/runtime/string_input.c @@ -19,10 +19,10 @@ const char *ts_string_input_read(void *d, size_t *bytes_read) { return data->string + previous_position; } -int ts_string_input_seek(void *d, size_t position) { +int ts_string_input_seek(void *d, TSLength position) { TSStringInput *data = (TSStringInput *)d; - data->position = position; - return (position < data->length); + data->position = position.bytes; + return (position.bytes < data->length); } TSInput ts_string_input_make(const char *string) { diff --git a/src/runtime/tree.c b/src/runtime/tree.c index ce07949b..50901cf8 100644 --- a/src/runtime/tree.c +++ b/src/runtime/tree.c @@ -3,8 +3,9 @@ #include #include "tree_sitter/parser.h" #include "runtime/tree.h" +#include "runtime/length.h" -TSTree *ts_tree_make_leaf(TSSymbol sym, size_t size, size_t padding, +TSTree *ts_tree_make_leaf(TSSymbol sym, TSLength size, TSLength padding, bool is_hidden) { TSTree *result = malloc(sizeof(TSTree)); *result = (TSTree) { .ref_count = 1, @@ -18,7 +19,7 @@ TSTree *ts_tree_make_leaf(TSSymbol sym, size_t size, size_t padding, return result; } -TSTree *ts_tree_make_error(size_t size, size_t padding, char lookahead_char) { +TSTree *ts_tree_make_error(TSLength size, TSLength padding, char lookahead_char) { TSTree *result = ts_tree_make_leaf(ts_builtin_sym_error, size, padding, false); result->lookahead_char = lookahead_char; return result; @@ -31,7 +32,8 @@ TSTree *ts_tree_make_node(TSSymbol symbol, size_t child_count, * Determine the new node's size, padding and visible child count based on * the given child nodes. */ - size_t size = 0, padding = 0, visible_child_count = 0; + TSLength size = ts_length_zero(), padding = ts_length_zero(); + size_t visible_child_count = 0; for (size_t i = 0; i < child_count; i++) { TSTree *child = children[i]; ts_tree_retain(child); @@ -40,7 +42,7 @@ TSTree *ts_tree_make_node(TSSymbol symbol, size_t child_count, padding = child->padding; size = child->size; } else { - size += child->padding + child->size; + size = ts_length_add(ts_length_add(size, child->padding), child->size); } if (ts_tree_is_visible(child)) @@ -79,11 +81,12 @@ TSTree *ts_tree_make_node(TSSymbol symbol, size_t child_count, * their positions can be queried without using the hidden child nodes. */ TSTreeChild *visible_children = ts_tree_visible_children(result, NULL); - for (size_t i = 0, vis_i = 0, offset = 0; i < child_count; i++) { + TSLength offset = ts_length_zero(); + for (size_t i = 0, vis_i = 0; i < child_count; i++) { TSTree *child = children[i]; if (i > 0) - offset += child->padding; + offset = ts_length_add(offset, child->padding); if (ts_tree_is_visible(child)) { visible_children[vis_i].tree = child; @@ -94,12 +97,12 @@ TSTree *ts_tree_make_node(TSSymbol symbol, size_t child_count, TSTreeChild *grandchildren = ts_tree_visible_children(child, &n); for (size_t j = 0; j < n; j++) { visible_children[vis_i].tree = grandchildren[j].tree; - visible_children[vis_i].offset = offset + grandchildren[j].offset; + visible_children[vis_i].offset = ts_length_add(offset, grandchildren[j].offset); vis_i++; } } - offset += child->size; + offset = ts_length_add(offset, child->size); } return result; @@ -119,8 +122,8 @@ void ts_tree_release(TSTree *tree) { } } -size_t ts_tree_total_size(const TSTree *tree) { - return tree->padding + tree->size; +TSLength ts_tree_total_size(const TSTree *tree) { + return ts_length_add(tree->padding, tree->size); } int ts_tree_equals(const TSTree *node1, const TSTree *node2) { diff --git a/src/runtime/tree.h b/src/runtime/tree.h index dac4ec9f..30a43063 100644 --- a/src/runtime/tree.h +++ b/src/runtime/tree.h @@ -6,7 +6,7 @@ extern "C" { #endif #include -#include "tree_sitter/runtime.h" +#include "tree_sitter/parser.h" typedef enum { TSTreeOptionsHidden = 1, @@ -18,8 +18,8 @@ struct TSTree { TSSymbol symbol; TSTreeOptions options; size_t ref_count; - size_t padding; - size_t size; + TSLength padding; + TSLength size; char lookahead_char; size_t child_count; size_t visible_child_count; @@ -28,7 +28,7 @@ struct TSTree { typedef struct { TSTree *tree; - size_t offset; + TSLength offset; } TSTreeChild; static inline int ts_tree_is_extra(const TSTree *tree) { @@ -47,9 +47,9 @@ static inline int ts_tree_is_wrapper(const TSTree *tree) { return (tree->options & TSTreeOptionsWrapper); } -TSTree *ts_tree_make_leaf(TSSymbol, size_t, size_t, bool); +TSTree *ts_tree_make_leaf(TSSymbol, TSLength, TSLength, bool); TSTree *ts_tree_make_node(TSSymbol, size_t, TSTree **, bool); -TSTree *ts_tree_make_error(size_t size, size_t padding, char lookahead_char); +TSTree *ts_tree_make_error(TSLength size, TSLength padding, char lookahead_char); void ts_tree_retain(TSTree *tree); void ts_tree_release(TSTree *tree); int ts_tree_equals(const TSTree *tree1, const TSTree *tree2); @@ -57,7 +57,7 @@ char *ts_tree_string(const TSTree *tree, const char **names); char *ts_tree_error_string(const TSTree *tree, const char **names); TSTree **ts_tree_children(const TSTree *tree, size_t *count); TSTreeChild *ts_tree_visible_children(const TSTree *tree, size_t *count); -size_t ts_tree_total_size(const TSTree *tree); +TSLength ts_tree_total_size(const TSTree *tree); #ifdef __cplusplus }