diff --git a/README.md b/README.md index 2feacc25..330dcf86 100644 --- a/README.md +++ b/README.md @@ -179,25 +179,28 @@ tokens, like `(` and `+`. This is useful when analyzing the meaning of a documen TSLanguage *tree_sitter_arithmetic(); int main() { - TSDocument *document = ts_document_new(); - ts_document_set_language(document, tree_sitter_arithmetic()); - ts_document_set_input_string(document, "a + b * 5"); - ts_document_parse(document); + TSParser *parser = ts_parser_new(); + ts_parser_set_language(parser, tree_sitter_arithmetic()); - TSNode root_node = ts_document_root_node(document); - assert(!strcmp(ts_node_type(root_node, document), "expression")); + const char *source_code = "a + b * 5"; + TSTree *tree = ts_parser_parse(parser, NULL, source_code, strlen(source_code)); + + TSNode root_node = ts_tree_root_node(tree); + assert(!strcmp(ts_node_type(root_node), "expression")); assert(ts_node_named_child_count(root_node) == 1); TSNode sum_node = ts_node_named_child(root_node, 0); - assert(!strcmp(ts_node_type(sum_node, document), "sum")); + assert(!strcmp(ts_node_type(sum_node), "sum")); assert(ts_node_named_child_count(sum_node) == 2); TSNode product_node = ts_node_child(ts_node_named_child(sum_node, 1), 0); - assert(!strcmp(ts_node_type(product_node, document), "product")); + assert(!strcmp(ts_node_type(product_node), "product")); assert(ts_node_named_child_count(product_node) == 2); - printf("Syntax tree: %s\n", ts_node_string(root_node, document)); - ts_document_free(document); + printf("Syntax tree: %s\n", ts_node_string(root_node)); + + ts_tree_delete(tree); + ts_parser_delete(parser); return 0; } ``` diff --git a/include/tree_sitter/parser.h b/include/tree_sitter/parser.h index d99655f8..35e77a87 100644 --- a/include/tree_sitter/parser.h +++ b/include/tree_sitter/parser.h @@ -9,13 +9,14 @@ extern "C" { #include #include -typedef uint16_t TSSymbol; -typedef uint16_t TSStateId; - #define ts_builtin_sym_error ((TSSymbol)-1) #define ts_builtin_sym_end 0 #define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024 +typedef uint16_t TSSymbol; + +typedef uint16_t TSStateId; + typedef struct { bool visible : 1; bool named : 1; @@ -129,6 +130,7 @@ typedef struct TSLanguage { */ #define STATE(id) id + #define ACTIONS(id) id #define SHIFT(state_value) \ diff --git a/include/tree_sitter/runtime.h b/include/tree_sitter/runtime.h index c3bef506..8f379718 100644 --- a/include/tree_sitter/runtime.h +++ b/include/tree_sitter/runtime.h @@ -5,6 +5,7 @@ extern "C" { #endif +#include #include #include #include @@ -13,7 +14,9 @@ extern "C" { typedef unsigned short TSSymbol; typedef struct TSLanguage TSLanguage; -typedef struct TSDocument TSDocument; +typedef struct TSParser TSParser; +typedef struct TSTree TSTree; +typedef struct TSTreeCursor TSTreeCursor; typedef enum { TSInputEncodingUTF8, @@ -31,6 +34,11 @@ typedef struct { uint32_t column; } TSPoint; +typedef struct { + TSPoint start; + TSPoint end; +} TSRange; + typedef struct { void *payload; const char *(*read)(void *payload, uint32_t *bytes_read); @@ -58,22 +66,38 @@ typedef struct { } TSInputEdit; typedef struct { - TSPoint start; - TSPoint end; -} TSRange; - -typedef struct { - const void *data; - uint32_t offset[2]; + const void *subtree; + const TSTree *tree; + TSPoint position; + uint32_t byte; + TSSymbol alias_symbol; } TSNode; +TSParser *ts_parser_new(); +void ts_parser_delete(TSParser *); +const TSLanguage *ts_parser_language(const TSParser *); +bool ts_parser_set_language(TSParser *, const TSLanguage *); +TSLogger ts_parser_logger(const TSParser *); +void ts_parser_set_logger(TSParser *, TSLogger); +void ts_parser_print_dot_graphs(TSParser *, FILE *); +void ts_parser_halt_on_error(TSParser *, bool); +TSTree *ts_parser_parse(TSParser *, const TSTree *, TSInput); +TSTree *ts_parser_parse_string(TSParser *, const TSTree *, const char *, uint32_t); + +TSTree *ts_tree_copy(const TSTree *); +void ts_tree_delete(TSTree *); +TSNode ts_tree_root_node(const TSTree *); +void ts_tree_edit(TSTree *, const TSInputEdit *); +TSRange *ts_tree_get_changed_ranges(const TSTree *, const TSTree *, uint32_t *); +void ts_tree_print_dot_graph(const TSTree *, FILE *); + uint32_t ts_node_start_byte(TSNode); TSPoint ts_node_start_point(TSNode); uint32_t ts_node_end_byte(TSNode); TSPoint ts_node_end_point(TSNode); TSSymbol ts_node_symbol(TSNode); -const char *ts_node_type(TSNode, const TSDocument *); -char *ts_node_string(TSNode, const TSDocument *); +const char *ts_node_type(TSNode); +char *ts_node_string(TSNode); bool ts_node_eq(TSNode, TSNode); bool ts_node_is_named(TSNode); bool ts_node_is_missing(TSNode); @@ -84,7 +108,6 @@ TSNode ts_node_child(TSNode, uint32_t); TSNode ts_node_named_child(TSNode, uint32_t); uint32_t ts_node_child_count(TSNode); uint32_t ts_node_named_child_count(TSNode); -uint32_t ts_node_child_index(TSNode); TSNode ts_node_next_sibling(TSNode); TSNode ts_node_next_named_sibling(TSNode); TSNode ts_node_prev_sibling(TSNode); @@ -96,32 +119,12 @@ TSNode ts_node_named_descendant_for_byte_range(TSNode, uint32_t, uint32_t); TSNode ts_node_descendant_for_point_range(TSNode, TSPoint, TSPoint); TSNode ts_node_named_descendant_for_point_range(TSNode, TSPoint, TSPoint); -TSDocument *ts_document_new(); -void ts_document_free(TSDocument *); -const TSLanguage *ts_document_language(TSDocument *); -void ts_document_set_language(TSDocument *, const TSLanguage *); -TSInput ts_document_input(TSDocument *); -void ts_document_set_input(TSDocument *, TSInput); -void ts_document_set_input_string(TSDocument *, const char *); -void ts_document_set_input_string_with_length(TSDocument *, const char *, uint32_t); -TSLogger ts_document_logger(const TSDocument *); -void ts_document_set_logger(TSDocument *, TSLogger); -void ts_document_print_debugging_graphs(TSDocument *, bool); -void ts_document_edit(TSDocument *, TSInputEdit); -void ts_document_parse(TSDocument *); -void ts_document_parse_and_get_changed_ranges(TSDocument *, TSRange **, uint32_t *); - -typedef struct { - TSRange **changed_ranges; - uint32_t *changed_range_count; - bool halt_on_error; -} TSParseOptions; - -void ts_document_parse_with_options(TSDocument *, TSParseOptions); - -void ts_document_invalidate(TSDocument *); -TSNode ts_document_root_node(const TSDocument *); -uint32_t ts_document_parse_count(const TSDocument *); +TSTreeCursor *ts_tree_cursor_new(const TSTree *); +void ts_tree_cursor_delete(TSTreeCursor *); +bool ts_tree_cursor_goto_first_child(TSTreeCursor *); +bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *); +bool ts_tree_cursor_goto_parent(TSTreeCursor *); +TSNode ts_tree_cursor_current_node(TSTreeCursor *); uint32_t ts_language_symbol_count(const TSLanguage *); const char *ts_language_symbol_name(const TSLanguage *, TSSymbol); diff --git a/project.gyp b/project.gyp index b968a171..11174724 100644 --- a/project.gyp +++ b/project.gyp @@ -87,7 +87,6 @@ 'externals/utf8proc', ], 'sources': [ - 'src/runtime/document.c', 'src/runtime/get_changed_ranges.c', 'src/runtime/language.c', 'src/runtime/lexer.c', @@ -95,7 +94,9 @@ 'src/runtime/stack.c', 'src/runtime/parser.c', 'src/runtime/string_input.c', + 'src/runtime/subtree.c', 'src/runtime/tree.c', + 'src/runtime/tree_cursor.c', 'src/runtime/utf16.c', 'externals/utf8proc/utf8proc.c', ], diff --git a/src/compiler/build_tables/lex_item_transitions.cc b/src/compiler/build_tables/lex_item_transitions.cc index d30bf011..7b4eb611 100644 --- a/src/compiler/build_tables/lex_item_transitions.cc +++ b/src/compiler/build_tables/lex_item_transitions.cc @@ -80,7 +80,7 @@ class TransitionBuilder { public: void apply(const Rule &rule) { rule.match( - [this](const rules::Blank &) {}, + [](const rules::Blank &) {}, [this](const rules::CharacterSet &character_set) { PrecedenceRange precedence; diff --git a/src/compiler/prepare_grammar/extract_tokens.cc b/src/compiler/prepare_grammar/extract_tokens.cc index 2e5cf1d9..93b06be2 100644 --- a/src/compiler/prepare_grammar/extract_tokens.cc +++ b/src/compiler/prepare_grammar/extract_tokens.cc @@ -30,7 +30,7 @@ class SymbolReplacer { Rule apply(const Rule &rule) { return rule.match( - [this](const rules::Blank &blank) -> Rule { + [](const rules::Blank &blank) -> Rule { return blank; }, @@ -110,7 +110,7 @@ class TokenExtractor { public: Rule apply(const rules::Rule &rule) { return rule.match( - [this](const rules::Blank &blank) -> Rule { return blank; }, + [](const rules::Blank &blank) -> Rule { return blank; }, [this](const rules::Metadata &rule) -> Rule { if (rule.params.is_token) { diff --git a/src/runtime/array.h b/src/runtime/array.h index 4d2f478b..45b3adaa 100644 --- a/src/runtime/array.h +++ b/src/runtime/array.h @@ -58,6 +58,9 @@ extern "C" { #define array_pop(self) ((self)->contents[--(self)->size]) +#define array_assign(self, other) \ + array__assign((VoidArray *)(self), (const VoidArray *)(other), array__elem_size(self)) + // Private typedef Array(void) VoidArray; @@ -91,6 +94,12 @@ static inline void array__reserve(VoidArray *self, size_t element_size, uint32_t } } +static inline void array__assign(VoidArray *self, const VoidArray *other, size_t element_size) { + array__reserve(self, element_size, other->size); + self->size = other->size; + memcpy(self->contents, other->contents, self->size * element_size); +} + static inline void array__grow(VoidArray *self, size_t element_size) { if (self->size == self->capacity) { size_t new_capacity = self->capacity * 2; diff --git a/src/runtime/atomic.h b/src/runtime/atomic.h new file mode 100644 index 00000000..d1ab1f23 --- /dev/null +++ b/src/runtime/atomic.h @@ -0,0 +1,30 @@ +#ifndef RUNTIME_ATOMIC_H_ +#define RUNTIME_ATOMIC_H_ + +#include + +#ifdef _WIN32 + +#include + +static inline uint32_t atomic_inc(volatile uint32_t *p) { + return InterlockedIncrement(p); +} + +static inline uint32_t atomic_dec(volatile uint32_t *p) { + return InterlockedDecrement(p); +} + +#else + +static inline uint32_t atomic_inc(volatile uint32_t *p) { + return __sync_add_and_fetch(p, 1u); +} + +static inline uint32_t atomic_dec(volatile uint32_t *p) { + return __sync_sub_and_fetch(p, 1u); +} + +#endif + +#endif // RUNTIME_ATOMIC_H_ diff --git a/src/runtime/document.c b/src/runtime/document.c deleted file mode 100644 index d611d989..00000000 --- a/src/runtime/document.c +++ /dev/null @@ -1,179 +0,0 @@ -#include "runtime/alloc.h" -#include "runtime/node.h" -#include "runtime/tree.h" -#include "runtime/parser.h" -#include "runtime/string_input.h" -#include "runtime/document.h" -#include "runtime/get_changed_ranges.h" - -#define LOG(...) \ - snprintf(self->parser.lexer.debug_buffer, TREE_SITTER_SERIALIZATION_BUFFER_SIZE, __VA_ARGS__); \ - self->parser.lexer.logger.log(self->parser.lexer.logger.payload, TSLogTypeLex, self->parser.lexer.debug_buffer); \ - -TSDocument *ts_document_new() { - TSDocument *self = ts_calloc(1, sizeof(TSDocument)); - parser_init(&self->parser); - array_init(&self->tree_path1); - array_init(&self->tree_path2); - return self; -} - -void ts_document_free(TSDocument *self) { - if (self->tree) ts_tree_release(&self->parser.tree_pool, self->tree); - if (self->tree_path1.contents) array_delete(&self->tree_path1); - if (self->tree_path2.contents) array_delete(&self->tree_path2); - parser_destroy(&self->parser); - ts_document_set_input(self, (TSInput){ - NULL, - NULL, - NULL, - TSInputEncodingUTF8, - }); - ts_free(self); -} - -const TSLanguage *ts_document_language(TSDocument *self) { - return self->parser.language; -} - -void ts_document_set_language(TSDocument *self, const TSLanguage *language) { - if (language->version != TREE_SITTER_LANGUAGE_VERSION) return; - ts_document_invalidate(self); - parser_set_language(&self->parser, language); - if (self->tree) { - ts_tree_release(&self->parser.tree_pool, self->tree); - self->tree = NULL; - } -} - -TSLogger ts_document_logger(const TSDocument *self) { - return self->parser.lexer.logger; -} - -void ts_document_set_logger(TSDocument *self, TSLogger logger) { - self->parser.lexer.logger = logger; -} - -void ts_document_print_debugging_graphs(TSDocument *self, bool should_print) { - self->parser.print_debugging_graphs = should_print; -} - -TSInput ts_document_input(TSDocument *self) { - return self->input; -} - -void ts_document_set_input(TSDocument *self, TSInput input) { - if (self->owns_input) - ts_free(self->input.payload); - self->input = input; - self->owns_input = false; -} - -void ts_document_set_input_string(TSDocument *self, const char *text) { - ts_document_invalidate(self); - TSInput input = ts_string_input_make(text); - ts_document_set_input(self, input); - if (input.payload) { - self->owns_input = true; - } -} - -void ts_document_set_input_string_with_length(TSDocument *self, const char *text, uint32_t length) { - ts_document_invalidate(self); - TSInput input = ts_string_input_make_with_length(text, length); - ts_document_set_input(self, input); - if (input.payload) { - self->owns_input = true; - } -} - -void ts_document_edit(TSDocument *self, TSInputEdit edit) { - if (!self->tree) - return; - - uint32_t max_bytes = ts_tree_total_bytes(self->tree); - if (edit.start_byte > max_bytes) - return; - if (edit.bytes_removed > max_bytes - edit.start_byte) - edit.bytes_removed = max_bytes - edit.start_byte; - - ts_tree_edit(self->tree, &edit); - - if (self->parser.print_debugging_graphs) { - ts_tree_print_dot_graph(self->tree, self->parser.language, stderr); - } -} - -void ts_document_parse(TSDocument *self) { - ts_document_parse_with_options(self, (TSParseOptions){ - .halt_on_error = false, - .changed_ranges = NULL, - .changed_range_count = NULL, - }); -} - -void ts_document_parse_and_get_changed_ranges(TSDocument *self, TSRange **ranges, - uint32_t *range_count) { - ts_document_parse_with_options(self, (TSParseOptions){ - .halt_on_error = false, - .changed_ranges = ranges, - .changed_range_count = range_count, - }); -} - -void ts_document_parse_with_options(TSDocument *self, TSParseOptions options) { - if (options.changed_ranges && options.changed_range_count) { - *options.changed_ranges = NULL; - *options.changed_range_count = 0; - } - - if (!self->input.read || !self->parser.language) - return; - - Tree *reusable_tree = self->valid ? self->tree : NULL; - if (reusable_tree && !reusable_tree->has_changes) - return; - - Tree *tree = parser_parse(&self->parser, self->input, reusable_tree, options.halt_on_error); - - if (self->tree) { - Tree *old_tree = self->tree; - self->tree = tree; - - if (options.changed_ranges && options.changed_range_count) { - *options.changed_range_count = ts_tree_get_changed_ranges( - old_tree, tree, &self->tree_path1, &self->tree_path2, - self->parser.language, options.changed_ranges - ); - - if (self->parser.lexer.logger.log) { - for (unsigned i = 0; i < *options.changed_range_count; i++) { - TSRange range = (*options.changed_ranges)[i]; - LOG( - "changed_range start:[%u %u], end:[%u %u]", - range.start.row, range.start.column, - range.end.row, range.end.column - ); - } - } - } - - ts_tree_release(&self->parser.tree_pool, old_tree); - } - - self->tree = tree; - self->parse_count++; - self->valid = true; -} - -void ts_document_invalidate(TSDocument *self) { - self->valid = false; -} - -TSNode ts_document_root_node(const TSDocument *self) { - return ts_node_make(self->tree, 0, 0); -} - -uint32_t ts_document_parse_count(const TSDocument *self) { - return self->parse_count; -} diff --git a/src/runtime/document.h b/src/runtime/document.h deleted file mode 100644 index 0be05f14..00000000 --- a/src/runtime/document.h +++ /dev/null @@ -1,20 +0,0 @@ -#ifndef RUNTIME_DOCUMENT_H_ -#define RUNTIME_DOCUMENT_H_ - -#include "runtime/parser.h" -#include "runtime/tree.h" -#include "runtime/get_changed_ranges.h" -#include - -struct TSDocument { - Parser parser; - TSInput input; - Tree *tree; - TreePath tree_path1; - TreePath tree_path2; - size_t parse_count; - bool valid; - bool owns_input; -}; - -#endif diff --git a/src/runtime/get_changed_ranges.c b/src/runtime/get_changed_ranges.c index 26211613..2b015d27 100644 --- a/src/runtime/get_changed_ranges.c +++ b/src/runtime/get_changed_ranges.c @@ -1,7 +1,8 @@ #include "runtime/get_changed_ranges.h" -#include "runtime/tree.h" +#include "runtime/subtree.h" #include "runtime/language.h" #include "runtime/error_costs.h" +#include "runtime/tree_cursor.h" #include // #define DEBUG_GET_CHANGED_RANGES @@ -24,22 +25,22 @@ static void range_array_add(RangeArray *results, TSPoint start, TSPoint end) { } typedef struct { - TreePath path; + TSTreeCursor cursor; const TSLanguage *language; unsigned visible_depth; bool in_padding; } Iterator; -static Iterator iterator_new(TreePath *path, Tree *tree, const TSLanguage *language) { - array_clear(path); - array_push(path, ((TreePathEntry){ - .tree = tree, +static Iterator iterator_new(TSTreeCursor *cursor, const Subtree *tree, const TSLanguage *language) { + array_clear(&cursor->stack); + array_push(&cursor->stack, ((TreeCursorEntry){ + .subtree = tree, .position = length_zero(), .child_index = 0, .structural_child_index = 0, })); return (Iterator) { - .path = *path, + .cursor = *cursor, .language = language, .visible_depth = 1, .in_padding = false, @@ -47,42 +48,42 @@ static Iterator iterator_new(TreePath *path, Tree *tree, const TSLanguage *langu } static bool iterator_done(Iterator *self) { - return self->path.size == 0; + return self->cursor.stack.size == 0; } Length iterator_start_position(Iterator *self) { - TreePathEntry entry = *array_back(&self->path); + TreeCursorEntry entry = *array_back(&self->cursor.stack); if (self->in_padding) { return entry.position; } else { - return length_add(entry.position, entry.tree->padding); + return length_add(entry.position, entry.subtree->padding); } } Length iterator_end_position(Iterator *self) { - TreePathEntry entry = *array_back(&self->path); - Length result = length_add(entry.position, entry.tree->padding); + TreeCursorEntry entry = *array_back(&self->cursor.stack); + Length result = length_add(entry.position, entry.subtree->padding); if (self->in_padding) { return result; } else { - return length_add(result, entry.tree->size); + return length_add(result, entry.subtree->size); } } static bool iterator_tree_is_visible(const Iterator *self) { - TreePathEntry entry = *array_back(&self->path); - if (entry.tree->visible) return true; - if (self->path.size > 1) { - Tree *parent = self->path.contents[self->path.size - 2].tree; + TreeCursorEntry entry = *array_back(&self->cursor.stack); + if (entry.subtree->visible) return true; + if (self->cursor.stack.size > 1) { + const Subtree *parent = self->cursor.stack.contents[self->cursor.stack.size - 2].subtree; const TSSymbol *alias_sequence = ts_language_alias_sequence(self->language, parent->alias_sequence_id); return alias_sequence && alias_sequence[entry.structural_child_index] != 0; } return false; } -static void iterator_get_visible_state(const Iterator *self, Tree **tree, +static void iterator_get_visible_state(const Iterator *self, const Subtree **tree, TSSymbol *alias_symbol, uint32_t *start_byte) { - uint32_t i = self->path.size - 1; + uint32_t i = self->cursor.stack.size - 1; if (self->in_padding) { if (i == 0) return; @@ -90,10 +91,10 @@ static void iterator_get_visible_state(const Iterator *self, Tree **tree, } for (; i + 1 > 0; i--) { - TreePathEntry entry = self->path.contents[i]; + TreeCursorEntry entry = self->cursor.stack.contents[i]; if (i > 0) { - Tree *parent = self->path.contents[i - 1].tree; + const Subtree *parent = self->cursor.stack.contents[i - 1].subtree; const TSSymbol *alias_sequence = ts_language_alias_sequence( self->language, parent->alias_sequence_id @@ -103,8 +104,8 @@ static void iterator_get_visible_state(const Iterator *self, Tree **tree, } } - if (entry.tree->visible || *alias_symbol) { - *tree = entry.tree; + if (entry.subtree->visible || *alias_symbol) { + *tree = entry.subtree; *start_byte = entry.position.bytes; break; } @@ -114,8 +115,8 @@ static void iterator_get_visible_state(const Iterator *self, Tree **tree, static void iterator_ascend(Iterator *self) { if (iterator_done(self)) return; if (iterator_tree_is_visible(self) && !self->in_padding) self->visible_depth--; - if (array_back(&self->path)->child_index > 0) self->in_padding = false; - self->path.size--; + if (array_back(&self->cursor.stack)->child_index > 0) self->in_padding = false; + self->cursor.stack.size--; } static bool iterator_descend(Iterator *self, uint32_t goal_position) { @@ -124,17 +125,17 @@ static bool iterator_descend(Iterator *self, uint32_t goal_position) { bool did_descend; do { did_descend = false; - TreePathEntry entry = *array_back(&self->path); + TreeCursorEntry entry = *array_back(&self->cursor.stack); Length position = entry.position; uint32_t structural_child_index = 0; - for (uint32_t i = 0; i < entry.tree->children.size; i++) { - Tree *child = entry.tree->children.contents[i]; + for (uint32_t i = 0; i < entry.subtree->children.size; i++) { + const Subtree *child = entry.subtree->children.contents[i]; Length child_left = length_add(position, child->padding); Length child_right = length_add(child_left, child->size); if (child_right.bytes > goal_position) { - array_push(&self->path, ((TreePathEntry){ - .tree = child, + array_push(&self->cursor.stack, ((TreeCursorEntry){ + .subtree = child, .position = position, .child_index = i, .structural_child_index = structural_child_index, @@ -174,19 +175,19 @@ static void iterator_advance(Iterator *self) { for (;;) { if (iterator_tree_is_visible(self)) self->visible_depth--; - TreePathEntry entry = array_pop(&self->path); + TreeCursorEntry entry = array_pop(&self->cursor.stack); if (iterator_done(self)) return; - Tree *parent = array_back(&self->path)->tree; + const Subtree *parent = array_back(&self->cursor.stack)->subtree; uint32_t child_index = entry.child_index + 1; if (parent->children.size > child_index) { - Length position = length_add(entry.position, ts_tree_total_size(entry.tree)); + Length position = length_add(entry.position, ts_subtree_total_size(entry.subtree)); uint32_t structural_child_index = entry.structural_child_index; - if (!entry.tree->extra) structural_child_index++; - Tree *next_child = parent->children.contents[child_index]; + if (!entry.subtree->extra) structural_child_index++; + const Subtree *next_child = parent->children.contents[child_index]; - array_push(&self->path, ((TreePathEntry){ - .tree = next_child, + array_push(&self->cursor.stack, ((TreeCursorEntry){ + .subtree = next_child, .position = position, .child_index = child_index, .structural_child_index = structural_child_index, @@ -213,7 +214,7 @@ typedef enum { } IteratorComparison; IteratorComparison iterator_compare(const Iterator *old_iter, const Iterator *new_iter) { - Tree *old_tree = NULL, *new_tree = NULL; + const Subtree *old_tree = NULL, *new_tree = NULL; uint32_t old_start = 0, new_start = 0; TSSymbol old_alias_symbol = 0, new_alias_symbol = 0; iterator_get_visible_state(old_iter, &old_tree, &old_alias_symbol, &old_start); @@ -246,10 +247,10 @@ IteratorComparison iterator_compare(const Iterator *old_iter, const Iterator *ne #ifdef DEBUG_GET_CHANGED_RANGES static inline void iterator_print_state(Iterator *self) { - TreePathEntry entry = *array_back(&self->path); + TreeCursorEntry entry = *array_back(&self->cursor.stack); TSPoint start = iterator_start_position(self).extent; TSPoint end = iterator_end_position(self).extent; - const char *name = ts_language_symbol_name(self->language, entry.tree->symbol); + const char *name = ts_language_symbol_name(self->language, entry.subtree->symbol); printf( "(%-25s %s\t depth:%u [%u, %u] - [%u, %u])", name, self->in_padding ? "(p)" : " ", @@ -260,13 +261,13 @@ static inline void iterator_print_state(Iterator *self) { } #endif -unsigned ts_tree_get_changed_ranges(Tree *old_tree, Tree *new_tree, - TreePath *path1, TreePath *path2, - const TSLanguage *language, TSRange **ranges) { +unsigned ts_subtree_get_changed_ranges(const Subtree *old_tree, const Subtree *new_tree, + TSTreeCursor *cursor1, TSTreeCursor *cursor2, + const TSLanguage *language, TSRange **ranges) { RangeArray results = array_new(); - Iterator old_iter = iterator_new(path1, old_tree, language); - Iterator new_iter = iterator_new(path2, new_tree, language); + Iterator old_iter = iterator_new(cursor1, old_tree, language); + Iterator new_iter = iterator_new(cursor2, new_tree, language); Length position = iterator_start_position(&old_iter); Length next_position = iterator_start_position(&new_iter); @@ -348,8 +349,8 @@ unsigned ts_tree_get_changed_ranges(Tree *old_tree, Tree *new_tree, position = next_position; } while (!iterator_done(&old_iter) && !iterator_done(&new_iter)); - *path1 = old_iter.path; - *path2 = new_iter.path; + *cursor1 = old_iter.cursor; + *cursor2 = new_iter.cursor; *ranges = results.contents; return results.size; } diff --git a/src/runtime/get_changed_ranges.h b/src/runtime/get_changed_ranges.h index 360cdbd4..fbe9cc23 100644 --- a/src/runtime/get_changed_ranges.h +++ b/src/runtime/get_changed_ranges.h @@ -1,19 +1,11 @@ #ifndef RUNTIME_GET_CHANGED_RANGES_H_ #define RUNTIME_GET_CHANGED_RANGES_H_ -#include "runtime/tree.h" +#include "runtime/subtree.h" -typedef struct { - Tree *tree; - Length position; - uint32_t child_index; - uint32_t structural_child_index; -} TreePathEntry; - -typedef Array(TreePathEntry) TreePath; - -unsigned ts_tree_get_changed_ranges( - Tree *old_tree, Tree *new_tree, TreePath *path1, TreePath *path2, +unsigned ts_subtree_get_changed_ranges( + const Subtree *old_tree, const Subtree *new_tree, + TSTreeCursor *cursor1, TSTreeCursor *cursor2, const TSLanguage *language, TSRange **ranges ); diff --git a/src/runtime/language.c b/src/runtime/language.c index 9bf1fc63..75e7da8b 100644 --- a/src/runtime/language.c +++ b/src/runtime/language.c @@ -1,5 +1,5 @@ #include "runtime/language.h" -#include "runtime/tree.h" +#include "runtime/subtree.h" #include "runtime/error_costs.h" void ts_language_table_entry(const TSLanguage *self, TSStateId state, diff --git a/src/runtime/language.h b/src/runtime/language.h index 64733242..879a5b5b 100644 --- a/src/runtime/language.h +++ b/src/runtime/language.h @@ -6,7 +6,7 @@ extern "C" { #endif #include "tree_sitter/parser.h" -#include "runtime/tree.h" +#include "runtime/subtree.h" #define ts_builtin_sym_error_repeat (ts_builtin_sym_error - 1) diff --git a/src/runtime/lexer.c b/src/runtime/lexer.c index 157276a1..e6e5aa58 100644 --- a/src/runtime/lexer.c +++ b/src/runtime/lexer.c @@ -1,6 +1,6 @@ #include #include "runtime/lexer.h" -#include "runtime/tree.h" +#include "runtime/subtree.h" #include "runtime/length.h" #include "runtime/utf16.h" #include "utf8proc.h" diff --git a/src/runtime/lexer.h b/src/runtime/lexer.h index 08a71a11..90be55a7 100644 --- a/src/runtime/lexer.h +++ b/src/runtime/lexer.h @@ -8,7 +8,7 @@ extern "C" { #include "tree_sitter/parser.h" #include "tree_sitter/runtime.h" #include "runtime/length.h" -#include "runtime/tree.h" +#include "runtime/subtree.h" typedef struct { TSLexer data; diff --git a/src/runtime/node.c b/src/runtime/node.c index c825a104..1f221d12 100644 --- a/src/runtime/node.c +++ b/src/runtime/node.c @@ -1,44 +1,92 @@ #include -#include "runtime/node.h" +#include "runtime/subtree.h" #include "runtime/tree.h" -#include "runtime/document.h" +#include "runtime/language.h" -TSNode ts_node_make(const Tree *tree, uint32_t byte, uint32_t row) { - return (TSNode){.data = tree, .offset = { byte, row } }; -} +// NodeChildIterator -/* - * Private - */ +typedef struct { + const Subtree *parent; + const TSTree *tree; + Length position; + uint32_t child_index; + uint32_t structural_child_index; + const TSSymbol *alias_sequence; +} NodeChildIterator; + +// TSNode - Private static inline TSNode ts_node__null() { - return ts_node_make(NULL, 0, 0); + return (TSNode) { + .subtree = NULL, + .tree = NULL, + .position = {0, 0}, + .byte = 0, + }; } -static inline const Tree *ts_node__tree(TSNode self) { - return self.data; +static inline const Subtree *ts_node__tree(TSNode self) { + return self.subtree; } -static inline uint32_t ts_node__offset_byte(TSNode self) { - return self.offset[0]; +static inline NodeChildIterator ts_node_child_iterator_begin(const TSNode *node) { + const Subtree *tree = ts_node__tree(*node); + const TSSymbol *alias_sequence = ts_language_alias_sequence( + node->tree->language, + tree->alias_sequence_id + ); + return (NodeChildIterator) { + .parent = tree, + .tree = node->tree, + .position = {node->byte, node->position}, + .child_index = 0, + .structural_child_index = 0, + .alias_sequence = alias_sequence, + }; } -static inline uint32_t ts_node__offset_row(TSNode self) { - return self.offset[1]; +static inline bool ts_node_child_iterator_next(NodeChildIterator *self, TSNode *result) { + if (self->child_index == self->parent->children.size) return false; + const Subtree *child = self->parent->children.contents[self->child_index]; + TSSymbol alias_symbol = 0; + if (!child->extra) { + if (self->alias_sequence) { + alias_symbol = self->alias_sequence[self->structural_child_index]; + } + self->structural_child_index++; + } + *result = (TSNode) { + .subtree = child, + .tree = self->tree, + .position = self->position.extent, + .byte = self->position.bytes, + .alias_symbol = alias_symbol, + }; + self->position = length_add(self->position, ts_subtree_total_size(child)); + self->child_index++; + return true; } static inline bool ts_node__is_relevant(TSNode self, bool include_anonymous) { - const Tree *tree = ts_node__tree(self); + const Subtree *tree = ts_node__tree(self); if (include_anonymous) { - return tree->context.alias_symbol || tree->visible; + return tree->visible || self.alias_symbol; } else { - return tree->context.alias_is_named || (tree->visible && tree->named); + return ( + (tree->visible && tree->named) || + ( + self.alias_symbol && + ts_language_symbol_metadata( + self.tree->language, + self.alias_symbol + ).named + ) + ); } } -static inline uint32_t ts_node__relevant_child_count(TSNode self, - bool include_anonymous) { - const Tree *tree = ts_node__tree(self); +static inline uint32_t ts_node__relevant_child_count(TSNode self, bool include_anonymous) { + const Subtree *tree = ts_node__tree(self); if (tree->children.size > 0) { if (include_anonymous) { return tree->visible_child_count; @@ -50,44 +98,23 @@ static inline uint32_t ts_node__relevant_child_count(TSNode self, } } -static inline TSNode ts_node__direct_parent(TSNode self, uint32_t *index) { - const Tree *tree = ts_node__tree(self); - *index = tree->context.index; - return ts_node_make( - tree->context.parent, - ts_node__offset_byte(self) - tree->context.offset.bytes, - ts_node__offset_row(self) - tree->context.offset.extent.row - ); -} - -static inline TSNode ts_node__direct_child(TSNode self, uint32_t i) { - const Tree *child_tree = ts_node__tree(self)->children.contents[i]; - return ts_node_make( - child_tree, - ts_node__offset_byte(self) + child_tree->context.offset.bytes, - ts_node__offset_row(self) + child_tree->context.offset.extent.row - ); -} - -static inline TSNode ts_node__child(TSNode self, uint32_t child_index, - bool include_anonymous) { +static inline TSNode ts_node__child(TSNode self, uint32_t child_index, bool include_anonymous) { TSNode result = self; bool did_descend = true; while (did_descend) { did_descend = false; + TSNode child; uint32_t index = 0; - for (uint32_t i = 0; i < ts_node__tree(result)->children.size; i++) { - TSNode child = ts_node__direct_child(result, i); + NodeChildIterator iterator = ts_node_child_iterator_begin(&result); + while (ts_node_child_iterator_next(&iterator, &child)) { if (ts_node__is_relevant(child, include_anonymous)) { - if (index == child_index) - return child; + if (index == child_index) return child; index++; } else { uint32_t grandchild_index = child_index - index; - uint32_t grandchild_count = - ts_node__relevant_child_count(child, include_anonymous); + uint32_t grandchild_count = ts_node__relevant_child_count(child, include_anonymous); if (grandchild_index < grandchild_count) { did_descend = true; result = child; @@ -103,47 +130,101 @@ static inline TSNode ts_node__child(TSNode self, uint32_t child_index, } static inline TSNode ts_node__prev_sibling(TSNode self, bool include_anonymous) { - TSNode result = self; + uint32_t target_end_byte = ts_node_end_byte(self); - do { - uint32_t index; - result = ts_node__direct_parent(result, &index); - if (!result.data) - break; + TSNode node = ts_node_parent(self); + TSNode earlier_node = ts_node__null(); + bool earlier_node_is_relevant = false; - for (uint32_t i = index - 1; i + 1 > 0; i--) { - TSNode child = ts_node__direct_child(result, i); - if (ts_node__is_relevant(child, include_anonymous)) - return child; - uint32_t grandchild_count = - ts_node__relevant_child_count(child, include_anonymous); - if (grandchild_count > 0) - return ts_node__child(child, grandchild_count - 1, include_anonymous); + while (node.subtree) { + TSNode earlier_child = ts_node__null(); + bool earlier_child_is_relevant = false; + bool found_child_containing_target = false; + + TSNode child; + NodeChildIterator iterator = ts_node_child_iterator_begin(&node); + while (ts_node_child_iterator_next(&iterator, &child)) { + if (iterator.position.bytes >= target_end_byte) { + found_child_containing_target = child.subtree != self.subtree; + break; + } + + if (ts_node__is_relevant(child, include_anonymous)) { + earlier_child = child; + earlier_child_is_relevant = true; + } else if (ts_node__relevant_child_count(child, include_anonymous) > 0) { + earlier_child = child; + earlier_child_is_relevant = false; + } } - } while (!ts_node__tree(result)->visible); + + if (found_child_containing_target) { + if (earlier_child.subtree) { + earlier_node = earlier_child; + earlier_node_is_relevant = earlier_child_is_relevant; + } + node = child; + } else if (earlier_child_is_relevant) { + return earlier_child; + } else if (earlier_child.subtree) { + node = earlier_child; + } else if (earlier_node_is_relevant) { + return earlier_node; + } else { + node = earlier_node; + } + } return ts_node__null(); } static inline TSNode ts_node__next_sibling(TSNode self, bool include_anonymous) { - TSNode result = self; + uint32_t target_end_byte = ts_node_end_byte(self); - do { - uint32_t index; - result = ts_node__direct_parent(result, &index); - if (!result.data) - break; + TSNode node = ts_node_parent(self); + TSNode later_node = ts_node__null(); + bool later_node_is_relevant = false; - for (uint32_t i = index + 1; i < ts_node__tree(result)->children.size; i++) { - TSNode child = ts_node__direct_child(result, i); - if (ts_node__is_relevant(child, include_anonymous)) - return child; - uint32_t grandchild_count = - ts_node__relevant_child_count(child, include_anonymous); - if (grandchild_count > 0) - return ts_node__child(child, 0, include_anonymous); + while (node.subtree) { + TSNode later_child = ts_node__null(); + bool later_child_is_relevant = false; + TSNode child_containing_target = ts_node__null(); + + TSNode child; + NodeChildIterator iterator = ts_node_child_iterator_begin(&node); + while (ts_node_child_iterator_next(&iterator, &child)) { + if (iterator.position.bytes < target_end_byte) continue; + if (child.byte <= self.byte) { + if (child.subtree != self.subtree) { + child_containing_target = child; + } + } else if (ts_node__is_relevant(child, include_anonymous)) { + later_child = child; + later_child_is_relevant = true; + break; + } else if (ts_node__relevant_child_count(child, include_anonymous) > 0) { + later_child = child; + later_child_is_relevant = false; + break; + } } - } while (!ts_node__tree(result)->visible); + + if (child_containing_target.subtree) { + if (later_child.subtree) { + later_node = later_child; + later_node_is_relevant = later_child_is_relevant; + } + node = child_containing_target; + } else if (later_child_is_relevant) { + return later_child; + } else if (later_child.subtree) { + node = later_child; + } else if (later_node_is_relevant) { + return later_node; + } else { + node = later_node; + } + } return ts_node__null(); } @@ -160,8 +241,9 @@ static inline TSNode ts_node__first_child_for_byte(TSNode self, uint32_t goal, while (did_descend) { did_descend = false; - for (uint32_t i = 0; i < ts_node__tree(node)->children.size; i++) { - TSNode child = ts_node__direct_child(node, i); + TSNode child; + NodeChildIterator iterator = ts_node_child_iterator_begin(&node); + while (ts_node_child_iterator_next(&iterator, &child)) { if (ts_node_end_byte(child) > goal) { if (ts_node__is_relevant(child, include_anonymous)) { return child; @@ -187,10 +269,11 @@ static inline TSNode ts_node__descendant_for_byte_range(TSNode self, uint32_t mi while (did_descend) { did_descend = false; - for (uint32_t i = 0, n = ts_node__tree(node)->children.size; i < n; i++) { - TSNode child = ts_node__direct_child(node, i); - if (ts_node_end_byte(child) > max) { - if (ts_node_start_byte(child) > min) break; + TSNode child; + NodeChildIterator iterator = ts_node_child_iterator_begin(&node); + while (ts_node_child_iterator_next(&iterator, &child)) { + if (iterator.position.bytes > max) { + if (child.byte > min) break; node = child; if (ts_node__is_relevant(node, include_anonymous)) last_visible_node = node; did_descend = true; @@ -214,10 +297,13 @@ static inline TSNode ts_node__descendant_for_point_range(TSNode self, TSPoint mi while (did_descend) { did_descend = false; - for (uint32_t i = 0, n = ts_node__tree(node)->children.size; i < n; i++) { - TSNode child = ts_node__direct_child(node, i); - const Tree *child_tree = ts_node__tree(child); - if (i > 0) start_position = point_add(start_position, child_tree->padding.extent); + TSNode child; + NodeChildIterator iterator = ts_node_child_iterator_begin(&node); + while (ts_node_child_iterator_next(&iterator, &child)) { + const Subtree *child_tree = ts_node__tree(child); + if (iterator.child_index != 1) { + start_position = point_add(start_position, child_tree->padding.extent); + } end_position = point_add(start_position, child_tree->size.extent); if (point_gt(end_position, max)) { if (point_gt(start_position, min)) break; @@ -233,12 +319,10 @@ static inline TSNode ts_node__descendant_for_point_range(TSNode self, TSPoint mi return last_visible_node; } -/* - * Public - */ +// TSNode - Public uint32_t ts_node_start_byte(TSNode self) { - return ts_node__offset_byte(self) + ts_node__tree(self)->padding.bytes; + return self.byte + ts_node__tree(self)->padding.bytes; } uint32_t ts_node_end_byte(TSNode self) { @@ -246,45 +330,42 @@ uint32_t ts_node_end_byte(TSNode self) { } TSPoint ts_node_start_point(TSNode self) { - const Tree *tree = ts_node__tree(self); - return (TSPoint){ ts_node__offset_row(self) + tree->padding.extent.row, - ts_tree_start_column(tree) }; + return point_add(self.position, ts_node__tree(self)->padding.extent); } TSPoint ts_node_end_point(TSNode self) { - const Tree *tree = ts_node__tree(self); - return (TSPoint){ ts_node__offset_row(self) + tree->padding.extent.row + - tree->size.extent.row, - ts_tree_end_column(tree) }; + return point_add(ts_node_start_point(self), ts_node__tree(self)->size.extent); } TSSymbol ts_node_symbol(TSNode self) { - const Tree *tree = ts_node__tree(self); - return tree->context.alias_symbol ? tree->context.alias_symbol : tree->symbol; + const Subtree *tree = ts_node__tree(self); + return self.alias_symbol ? self.alias_symbol : tree->symbol; } -const char *ts_node_type(TSNode self, const TSDocument *document) { - return ts_language_symbol_name(document->parser.language, ts_node_symbol(self)); +const char *ts_node_type(TSNode self) { + return ts_language_symbol_name(self.tree->language, ts_node_symbol(self)); } -char *ts_node_string(TSNode self, const TSDocument *document) { - return ts_tree_string(ts_node__tree(self), document->parser.language, false); +char *ts_node_string(TSNode self) { + return ts_subtree_string(ts_node__tree(self), self.tree->language, false); } bool ts_node_eq(TSNode self, TSNode other) { - return - ts_tree_eq(ts_node__tree(self), ts_node__tree(other)) && - self.offset[0] == other.offset[0] && - self.offset[1] == other.offset[1]; + return ( + ts_subtree_eq(ts_node__tree(self), ts_node__tree(other)) && + self.byte == other.byte + ); } bool ts_node_is_named(TSNode self) { - const Tree *tree = ts_node__tree(self); - return tree->context.alias_symbol ? tree->context.alias_is_named : tree->named; + const Subtree *tree = ts_node__tree(self); + return self.alias_symbol + ? ts_language_symbol_metadata(self.tree->language, self.alias_symbol).named + : tree->named; } bool ts_node_is_missing(TSNode self) { - const Tree *tree = ts_node__tree(self); + const Subtree *tree = ts_node__tree(self); return tree->is_missing; } @@ -297,35 +378,31 @@ bool ts_node_has_error(TSNode self) { } TSNode ts_node_parent(TSNode self) { - TSNode result = self; - uint32_t index; + TSNode node = ts_tree_root_node(self.tree); + uint32_t end_byte = ts_node_end_byte(self); + if (node.subtree == self.subtree) return ts_node__null(); - do { - result = ts_node__direct_parent(result, &index); - if (!result.data) - return ts_node__null(); - } while (!ts_node__tree(result)->visible); + TSNode last_visible_node = node; + bool did_descend = true; + while (did_descend) { + did_descend = false; - return result; -} - -uint32_t ts_node_child_index(TSNode self) { - const Tree *tree = ts_node__tree(self); - uint32_t result = 0; - - for (;;) { - const Tree *parent = tree->context.parent; - uint32_t index = tree->context.index; - if (!parent) return UINT32_MAX; - for (uint32_t i = 0; i < index; i++) { - Tree *child = parent->children.contents[i]; - result += child->visible ? 1 : child->visible_child_count; + TSNode child; + NodeChildIterator iterator = ts_node_child_iterator_begin(&node); + while (ts_node_child_iterator_next(&iterator, &child)) { + if (child.byte > self.byte || child.subtree == self.subtree) break; + if (iterator.position.bytes >= end_byte) { + node = child; + if (ts_node__is_relevant(child, true)) { + last_visible_node = node; + } + did_descend = true; + break; + } } - if (parent->visible) break; - tree = parent; } - return result; + return last_visible_node; } TSNode ts_node_child(TSNode self, uint32_t child_index) { @@ -337,7 +414,7 @@ TSNode ts_node_named_child(TSNode self, uint32_t child_index) { } uint32_t ts_node_child_count(TSNode self) { - const Tree *tree = ts_node__tree(self); + const Subtree *tree = ts_node__tree(self); if (tree->children.size > 0) { return tree->visible_child_count; } else { @@ -346,7 +423,7 @@ uint32_t ts_node_child_count(TSNode self) { } uint32_t ts_node_named_child_count(TSNode self) { - const Tree *tree = ts_node__tree(self); + const Subtree *tree = ts_node__tree(self); if (tree->children.size > 0) { return tree->named_child_count; } else { diff --git a/src/runtime/node.h b/src/runtime/node.h deleted file mode 100644 index ee184c9a..00000000 --- a/src/runtime/node.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef RUNTIME_NODE_H_ -#define RUNTIME_NODE_H_ - -#include "runtime/tree.h" - -TSNode ts_node_make(const Tree *, uint32_t byte, uint32_t row); - -#endif diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 265354a2..dde46f80 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -1,34 +1,37 @@ -#include "runtime/parser.h" #include #include #include #include #include "tree_sitter/runtime.h" -#include "runtime/tree.h" +#include "runtime/subtree.h" #include "runtime/lexer.h" #include "runtime/length.h" #include "runtime/array.h" #include "runtime/language.h" #include "runtime/alloc.h" +#include "runtime/stack.h" +#include "runtime/reusable_node.h" #include "runtime/reduce_action.h" #include "runtime/error_costs.h" +#include "runtime/string_input.h" +#include "runtime/tree.h" #define LOG(...) \ - if (self->lexer.logger.log || self->print_debugging_graphs) { \ + if (self->lexer.logger.log || self->dot_graph_file) { \ snprintf(self->lexer.debug_buffer, TREE_SITTER_SERIALIZATION_BUFFER_SIZE, __VA_ARGS__); \ - parser__log(self); \ + ts_parser__log(self); \ } -#define LOG_STACK() \ - if (self->print_debugging_graphs) { \ - ts_stack_print_dot_graph(self->stack, self->language, stderr); \ - fputs("\n\n", stderr); \ +#define LOG_STACK() \ + if (self->dot_graph_file) { \ + ts_stack_print_dot_graph(self->stack, self->language, self->dot_graph_file); \ + fputs("\n\n", self->dot_graph_file); \ } -#define LOG_TREE() \ - if (self->print_debugging_graphs) { \ - ts_tree_print_dot_graph(self->finished_tree, self->language, stderr); \ - fputs("\n", stderr); \ +#define LOG_TREE() \ + if (self->dot_graph_file) { \ + ts_subtree_print_dot_graph(self->finished_tree, self->language, self->dot_graph_file); \ + fputs("\n", self->dot_graph_file); \ } #define SYM_NAME(symbol) ts_language_symbol_name(self->language, symbol) @@ -37,6 +40,29 @@ static const unsigned MAX_VERSION_COUNT = 6; static const unsigned MAX_SUMMARY_DEPTH = 16; static const unsigned MAX_COST_DIFFERENCE = 16 * ERROR_COST_PER_SKIPPED_TREE; +typedef struct { + const Subtree *token; + const Subtree *last_external_token; + uint32_t byte_index; +} TokenCache; + +struct TSParser { + Lexer lexer; + Stack *stack; + SubtreePool tree_pool; + const TSLanguage *language; + ReduceActionSet reduce_actions; + const Subtree *finished_tree; + Subtree scratch_tree; + TokenCache token_cache; + ReusableNode reusable_node; + void *external_scanner_payload; + bool in_ambiguity; + FILE *dot_graph_file; + bool halt_on_error; + unsigned accept_count; +}; + typedef struct { unsigned cost; unsigned node_count; @@ -52,7 +78,9 @@ typedef enum { ErrorComparisonTakeRight, } ErrorComparison; -static void parser__log(Parser *self) { +// Parser - Private + +static void ts_parser__log(TSParser *self) { if (self->lexer.logger.log) { self->lexer.logger.log( self->lexer.logger.payload, @@ -61,17 +89,17 @@ static void parser__log(Parser *self) { ); } - if (self->print_debugging_graphs) { - fprintf(stderr, "graph {\nlabel=\""); + if (self->dot_graph_file) { + fprintf(self->dot_graph_file, "graph {\nlabel=\""); for (char *c = &self->lexer.debug_buffer[0]; *c != 0; c++) { - if (*c == '"') fputc('\\', stderr); - fputc(*c, stderr); + if (*c == '"') fputc('\\', self->dot_graph_file); + fputc(*c, self->dot_graph_file); } - fprintf(stderr, "\"\n}\n\n"); + fprintf(self->dot_graph_file, "\"\n}\n\n"); } } -static bool parser__breakdown_top_of_stack(Parser *self, StackVersion version) { +static bool ts_parser__breakdown_top_of_stack(TSParser *self, StackVersion version) { bool did_break_down = false; bool pending = false; @@ -84,10 +112,10 @@ static bool parser__breakdown_top_of_stack(Parser *self, StackVersion version) { for (uint32_t i = 0; i < pop.size; i++) { StackSlice slice = pop.contents[i]; TSStateId state = ts_stack_state(self->stack, slice.version); - Tree *parent = *array_front(&slice.trees); + const Subtree *parent = *array_front(&slice.subtrees); for (uint32_t j = 0; j < parent->children.size; j++) { - Tree *child = parent->children.contents[j]; + const Subtree *child = parent->children.contents[j]; pending = child->children.size > 0; if (child->symbol == ts_builtin_sym_error) { @@ -96,17 +124,17 @@ static bool parser__breakdown_top_of_stack(Parser *self, StackVersion version) { state = ts_language_next_state(self->language, state, child->symbol); } - ts_tree_retain(child); + ts_subtree_retain(child); ts_stack_push(self->stack, slice.version, child, pending, state); } - for (uint32_t j = 1; j < slice.trees.size; j++) { - Tree *tree = slice.trees.contents[j]; + for (uint32_t j = 1; j < slice.subtrees.size; j++) { + const Subtree *tree = slice.subtrees.contents[j]; ts_stack_push(self->stack, slice.version, tree, false, state); } - ts_tree_release(&self->tree_pool, parent); - array_delete(&slice.trees); + ts_subtree_release(&self->tree_pool, parent); + array_delete(&slice.subtrees); LOG("breakdown_top_of_stack tree:%s", SYM_NAME(parent->symbol)); LOG_STACK(); @@ -116,23 +144,25 @@ static bool parser__breakdown_top_of_stack(Parser *self, StackVersion version) { return did_break_down; } -static void parser__breakdown_lookahead(Parser *self, Tree **lookahead, - TSStateId state, - ReusableNode *reusable_node) { - bool did_break_down = false; - while (reusable_node->tree->children.size > 0 && reusable_node->tree->parse_state != state) { - LOG("state_mismatch sym:%s", SYM_NAME(reusable_node->tree->symbol)); - reusable_node_breakdown(reusable_node); - did_break_down = true; +static void ts_parser__breakdown_lookahead(TSParser *self, const Subtree **lookahead, + TSStateId state, ReusableNode *reusable_node) { + bool did_descend = false; + const Subtree *tree = reusable_node_tree(reusable_node); + while (tree->children.size > 0 && tree->parse_state != state) { + LOG("state_mismatch sym:%s", SYM_NAME(tree->symbol)); + reusable_node_descend(reusable_node); + tree = reusable_node_tree(reusable_node); + did_descend = true; } - if (did_break_down) { - ts_tree_release(&self->tree_pool, *lookahead); - ts_tree_retain(*lookahead = reusable_node->tree); + if (did_descend) { + ts_subtree_release(&self->tree_pool, *lookahead); + *lookahead = tree; + ts_subtree_retain(*lookahead); } } -static ErrorComparison parser__compare_versions(Parser *self, ErrorStatus a, ErrorStatus b) { +static ErrorComparison ts_parser__compare_versions(TSParser *self, ErrorStatus a, ErrorStatus b) { if (!a.is_in_error && b.is_in_error) { if (a.cost < b.cost) { return ErrorComparisonTakeLeft; @@ -170,7 +200,7 @@ static ErrorComparison parser__compare_versions(Parser *self, ErrorStatus a, Err return ErrorComparisonNone; } -static ErrorStatus parser__version_status(Parser *self, StackVersion version) { +static ErrorStatus ts_parser__version_status(TSParser *self, StackVersion version) { unsigned cost = ts_stack_error_cost(self->stack, version); bool is_paused = ts_stack_is_paused(self->stack, version); if (is_paused) cost += ERROR_COST_PER_SKIPPED_TREE; @@ -182,7 +212,7 @@ static ErrorStatus parser__version_status(Parser *self, StackVersion version) { }; } -static bool parser__better_version_exists(Parser *self, StackVersion version, +static bool ts_parser__better_version_exists(TSParser *self, StackVersion version, bool is_in_error, unsigned cost) { if (self->finished_tree && self->finished_tree->error_cost <= cost) return true; @@ -198,8 +228,8 @@ static bool parser__better_version_exists(Parser *self, StackVersion version, if (i == version || !ts_stack_is_active(self->stack, i) || ts_stack_position(self->stack, i).bytes < position.bytes) continue; - ErrorStatus status_i = parser__version_status(self, i); - switch (parser__compare_versions(self, status, status_i)) { + ErrorStatus status_i = ts_parser__version_status(self, i); + switch (ts_parser__compare_versions(self, status, status_i)) { case ErrorComparisonTakeRight: return true; case ErrorComparisonPreferRight: @@ -212,21 +242,21 @@ static bool parser__better_version_exists(Parser *self, StackVersion version, return false; } -static void parser__restore_external_scanner(Parser *self, Tree *external_token) { +static void ts_parser__restore_external_scanner(TSParser *self, const Subtree *external_token) { if (external_token) { self->language->external_scanner.deserialize( self->external_scanner_payload, - ts_external_token_state_data(&external_token->external_token_state), - external_token->external_token_state.length + ts_external_scanner_state_data(&external_token->external_scanner_state), + external_token->external_scanner_state.length ); } else { self->language->external_scanner.deserialize(self->external_scanner_payload, NULL, 0); } } -static Tree *parser__lex(Parser *self, StackVersion version, TSStateId parse_state) { +static const Subtree *ts_parser__lex(TSParser *self, StackVersion version, TSStateId parse_state) { Length start_position = ts_stack_position(self->stack, version); - Tree *external_token = ts_stack_last_external_token(self->stack, version); + const Subtree *external_token = ts_stack_last_external_token(self->stack, version); TSLexMode lex_mode = self->language->lex_modes[parse_state]; const bool *valid_external_tokens = ts_language_enabled_external_tokens( self->language, @@ -253,7 +283,7 @@ static Tree *parser__lex(Parser *self, StackVersion version, TSStateId parse_sta current_position.extent.column ); ts_lexer_start(&self->lexer); - parser__restore_external_scanner(self, external_token); + ts_parser__restore_external_scanner(self, external_token); if (self->language->external_scanner.scan( self->external_scanner_payload, &self->lexer.data, @@ -323,11 +353,11 @@ static Tree *parser__lex(Parser *self, StackVersion version, TSStateId parse_sta last_byte_scanned = self->lexer.current_position.bytes; } - Tree *result; + Subtree *result; if (skipped_error) { Length padding = length_sub(error_start_position, start_position); Length size = length_sub(error_end_position, error_start_position); - result = ts_tree_make_error(&self->tree_pool, size, padding, first_error_character, self->language); + result = ts_subtree_new_error(&self->tree_pool, size, padding, first_error_character, self->language); } else { if (self->lexer.token_end_position.bytes < self->lexer.token_start_position.bytes) { self->lexer.token_start_position = self->lexer.token_end_position; @@ -352,7 +382,7 @@ static Tree *parser__lex(Parser *self, StackVersion version, TSStateId parse_sta } } - result = ts_tree_make_leaf(&self->tree_pool, symbol, padding, size, self->language); + result = ts_subtree_new_leaf(&self->tree_pool, symbol, padding, size, self->language); if (found_external_token) { result->has_external_tokens = true; @@ -360,7 +390,7 @@ static Tree *parser__lex(Parser *self, StackVersion version, TSStateId parse_sta self->external_scanner_payload, self->lexer.debug_buffer ); - ts_external_token_state_init(&result->external_token_state, self->lexer.debug_buffer, length); + ts_external_scanner_state_init(&result->external_scanner_state, self->lexer.debug_buffer, length); } } @@ -372,32 +402,32 @@ static Tree *parser__lex(Parser *self, StackVersion version, TSStateId parse_sta return result; } -static Tree *parser__get_cached_token(Parser *self, size_t byte_index, Tree *last_external_token) { +static const Subtree *ts_parser__get_cached_token(TSParser *self, size_t byte_index, + const Subtree *last_external_token) { TokenCache *cache = &self->token_cache; if (cache->token && cache->byte_index == byte_index && - ts_tree_external_token_state_eq(cache->last_external_token, last_external_token)) { + ts_subtree_external_scanner_state_eq(cache->last_external_token, last_external_token)) { return cache->token; } else { return NULL; } } -static void parser__set_cached_token(Parser *self, size_t byte_index, Tree *last_external_token, - Tree *token) { +static void ts_parser__set_cached_token(TSParser *self, size_t byte_index, + const Subtree *last_external_token, const Subtree *token) { TokenCache *cache = &self->token_cache; - if (token) ts_tree_retain(token); - if (last_external_token) ts_tree_retain(last_external_token); - if (cache->token) ts_tree_release(&self->tree_pool, cache->token); - if (cache->last_external_token) ts_tree_release(&self->tree_pool, cache->last_external_token); + if (token) ts_subtree_retain(token); + if (last_external_token) ts_subtree_retain(last_external_token); + if (cache->token) ts_subtree_release(&self->tree_pool, cache->token); + if (cache->last_external_token) ts_subtree_release(&self->tree_pool, cache->last_external_token); cache->token = token; cache->byte_index = byte_index; cache->last_external_token = last_external_token; } -static bool parser__can_reuse_first_leaf(Parser *self, TSStateId state, Tree *tree, - TableEntry *table_entry, - ReusableNode *next_reusable_node) { +static bool ts_parser__can_reuse_first_leaf(TSParser *self, TSStateId state, const Subtree *tree, + TableEntry *table_entry) { TSLexMode current_lex_mode = self->language->lex_modes[state]; // If the token was created in a state with the same set of lookaheads, it is reusable. @@ -414,27 +444,29 @@ static bool parser__can_reuse_first_leaf(Parser *self, TSStateId state, Tree *tr return current_lex_mode.external_lex_state == 0 && table_entry->is_reusable; } -static Tree *parser__get_lookahead(Parser *self, StackVersion version, TSStateId *state, - ReusableNode *reusable_node, TableEntry *table_entry) { +static const Subtree *ts_parser__get_lookahead(TSParser *self, StackVersion version, + TSStateId *state, ReusableNode *reusable_node, + TableEntry *table_entry) { Length position = ts_stack_position(self->stack, version); - Tree *last_external_token = ts_stack_last_external_token(self->stack, version); + const Subtree *last_external_token = ts_stack_last_external_token(self->stack, version); - Tree *result; - while ((result = reusable_node->tree)) { - if (reusable_node->byte_index > position.bytes) { + const Subtree *result; + while ((result = reusable_node_tree(reusable_node))) { + uint32_t byte_offset = reusable_node_byte_offset(reusable_node); + if (byte_offset > position.bytes) { LOG("before_reusable_node symbol:%s", SYM_NAME(result->symbol)); break; } - if (reusable_node->byte_index < position.bytes) { + if (byte_offset < position.bytes) { LOG("past_reusable_node symbol:%s", SYM_NAME(result->symbol)); - reusable_node_pop(reusable_node); + reusable_node_advance(reusable_node); continue; } - if (!ts_tree_external_token_state_eq(reusable_node->last_external_token, last_external_token)) { + if (!ts_subtree_external_scanner_state_eq(reusable_node->last_external_token, last_external_token)) { LOG("reusable_node_has_different_external_scanner_state symbol:%s", SYM_NAME(result->symbol)); - reusable_node_pop(reusable_node); + reusable_node_advance(reusable_node); continue; } @@ -453,46 +485,45 @@ static Tree *parser__get_lookahead(Parser *self, StackVersion version, TSStateId if (reason) { LOG("cant_reuse_node_%s tree:%s", reason, SYM_NAME(result->symbol)); - if (!reusable_node_breakdown(reusable_node)) { - reusable_node_pop(reusable_node); - parser__breakdown_top_of_stack(self, version); + if (!reusable_node_descend(reusable_node)) { + reusable_node_advance(reusable_node); + ts_parser__breakdown_top_of_stack(self, version); *state = ts_stack_state(self->stack, version); } continue; } ts_language_table_entry(self->language, *state, result->first_leaf.symbol, table_entry); - ReusableNode next_reusable_node = reusable_node_after_leaf(reusable_node); - if (!parser__can_reuse_first_leaf(self, *state, result, table_entry, &next_reusable_node)) { + if (!ts_parser__can_reuse_first_leaf(self, *state, result, table_entry)) { LOG( "cant_reuse_node symbol:%s, first_leaf_symbol:%s", SYM_NAME(result->symbol), SYM_NAME(result->first_leaf.symbol) ); - *reusable_node = next_reusable_node; + reusable_node_advance_past_leaf(reusable_node); break; } LOG("reuse_node symbol:%s", SYM_NAME(result->symbol)); - ts_tree_retain(result); + ts_subtree_retain(result); return result; } - if ((result = parser__get_cached_token(self, position.bytes, last_external_token))) { + if ((result = ts_parser__get_cached_token(self, position.bytes, last_external_token))) { ts_language_table_entry(self->language, *state, result->first_leaf.symbol, table_entry); - if (parser__can_reuse_first_leaf(self, *state, result, table_entry, NULL)) { - ts_tree_retain(result); + if (ts_parser__can_reuse_first_leaf(self, *state, result, table_entry)) { + ts_subtree_retain(result); return result; } } - result = parser__lex(self, version, *state); - parser__set_cached_token(self, position.bytes, last_external_token, result); + result = ts_parser__lex(self, version, *state); + ts_parser__set_cached_token(self, position.bytes, last_external_token, result); ts_language_table_entry(self->language, *state, result->symbol, table_entry); return result; } -static bool parser__select_tree(Parser *self, Tree *left, Tree *right) { +static bool ts_parser__select_tree(TSParser *self, const Subtree *left, const Subtree *right) { if (!left) return true; if (!right) return false; @@ -524,7 +555,7 @@ static bool parser__select_tree(Parser *self, Tree *left, Tree *right) { if (left->error_cost > 0) return true; - int comparison = ts_tree_compare(left, right); + int comparison = ts_subtree_compare(left, right); switch (comparison) { case -1: LOG("select_earlier symbol:%s, over_symbol:%s", SYM_NAME(left->symbol), @@ -542,33 +573,31 @@ static bool parser__select_tree(Parser *self, Tree *left, Tree *right) { } } -static void parser__shift(Parser *self, StackVersion version, TSStateId state, - Tree *lookahead, bool extra) { +static void ts_parser__shift(TSParser *self, StackVersion version, TSStateId state, + const Subtree *lookahead, bool extra) { + const Subtree *subtree_to_push; if (extra != lookahead->extra) { - if (ts_stack_version_count(self->stack) > 1) { - lookahead = ts_tree_make_copy(&self->tree_pool, lookahead); - } else { - ts_tree_retain(lookahead); - } - lookahead->extra = extra; + Subtree *result = ts_subtree_make_mut(&self->tree_pool, lookahead); + result->extra = extra; + subtree_to_push = result; } else { - ts_tree_retain(lookahead); + subtree_to_push = lookahead; } - bool is_pending = lookahead->children.size > 0; - ts_stack_push(self->stack, version, lookahead, is_pending, state); - if (lookahead->has_external_tokens) { + bool is_pending = subtree_to_push->children.size > 0; + ts_stack_push(self->stack, version, subtree_to_push, is_pending, state); + if (subtree_to_push->has_external_tokens) { ts_stack_set_last_external_token( - self->stack, version, ts_tree_last_external_token(lookahead) + self->stack, version, ts_subtree_last_external_token(subtree_to_push) ); } } -static bool parser__replace_children(Parser *self, Tree *tree, TreeArray *children) { +static bool ts_parser__replace_children(TSParser *self, Subtree *tree, SubtreeArray *children) { self->scratch_tree = *tree; self->scratch_tree.children.size = 0; - ts_tree_set_children(&self->scratch_tree, children, self->language); - if (parser__select_tree(self, tree, &self->scratch_tree)) { + ts_subtree_set_children(&self->scratch_tree, children, self->language); + if (ts_parser__select_tree(self, tree, &self->scratch_tree)) { *tree = self->scratch_tree; return true; } else { @@ -576,7 +605,7 @@ static bool parser__replace_children(Parser *self, Tree *tree, TreeArray *childr } } -static StackSliceArray parser__reduce(Parser *self, StackVersion version, TSSymbol symbol, +static StackSliceArray ts_parser__reduce(TSParser *self, StackVersion version, TSSymbol symbol, uint32_t count, int dynamic_precedence, uint16_t alias_sequence_id, bool fragile) { uint32_t initial_version_count = ts_stack_version_count(self->stack); @@ -589,12 +618,12 @@ static StackSliceArray parser__reduce(Parser *self, StackVersion version, TSSymb // Extra tokens on top of the stack should not be included in this new parent // node. They will be re-pushed onto the stack after the parent node is // created and pushed. - TreeArray children = slice.trees; + SubtreeArray children = slice.subtrees; while (children.size > 0 && children.contents[children.size - 1]->extra) { children.size--; } - Tree *parent = ts_tree_make_node(&self->tree_pool, + Subtree *parent = ts_subtree_new_node(&self->tree_pool, symbol, &children, alias_sequence_id, self->language ); @@ -607,16 +636,16 @@ static StackSliceArray parser__reduce(Parser *self, StackVersion version, TSSymb if (next_slice.version != slice.version) break; i++; - TreeArray children = next_slice.trees; + SubtreeArray children = next_slice.subtrees; while (children.size > 0 && children.contents[children.size - 1]->extra) { children.size--; } - if (parser__replace_children(self, parent, &children)) { - ts_tree_array_delete(&self->tree_pool, &slice.trees); + if (ts_parser__replace_children(self, parent, &children)) { + ts_subtree_array_delete(&self->tree_pool, &slice.subtrees); slice = next_slice; } else { - ts_tree_array_delete(&self->tree_pool, &next_slice.trees); + ts_subtree_array_delete(&self->tree_pool, &next_slice.subtrees); } } @@ -636,15 +665,15 @@ static StackSliceArray parser__reduce(Parser *self, StackVersion version, TSSymb // Push the parent node onto the stack, along with any extra tokens that // were previously on top of the stack. ts_stack_push(self->stack, slice.version, parent, false, next_state); - for (uint32_t j = parent->children.size; j < slice.trees.size; j++) { - ts_stack_push(self->stack, slice.version, slice.trees.contents[j], false, next_state); + for (uint32_t j = parent->children.size; j < slice.subtrees.size; j++) { + ts_stack_push(self->stack, slice.version, slice.subtrees.contents[j], false, next_state); } if (ts_stack_version_count(self->stack) > MAX_VERSION_COUNT) { i++; while (i < pop.size) { StackSlice slice = pop.contents[i]; - ts_tree_array_delete(&self->tree_pool, &slice.trees); + ts_subtree_array_delete(&self->tree_pool, &slice.subtrees); ts_stack_halt(self->stack, slice.version); i++; } @@ -667,7 +696,7 @@ static StackSliceArray parser__reduce(Parser *self, StackVersion version, TSSymb return pop; } -static void parser__start(Parser *self, TSInput input, Tree *previous_tree) { +static void ts_parser__start(TSParser *self, TSInput input, const Subtree *previous_tree) { if (previous_tree) { LOG("parse_after_edit"); } else { @@ -680,35 +709,33 @@ static void parser__start(Parser *self, TSInput input, Tree *previous_tree) { ts_lexer_set_input(&self->lexer, input); ts_stack_clear(self->stack); - self->reusable_node = reusable_node_new(previous_tree); + reusable_node_reset(&self->reusable_node, previous_tree); self->finished_tree = NULL; self->accept_count = 0; self->in_ambiguity = false; } -static void parser__accept(Parser *self, StackVersion version, Tree *lookahead) { - lookahead->extra = true; +static void ts_parser__accept(TSParser *self, StackVersion version, const Subtree *lookahead) { assert(lookahead->symbol == ts_builtin_sym_end); - ts_tree_retain(lookahead); ts_stack_push(self->stack, version, lookahead, false, 1); StackSliceArray pop = ts_stack_pop_all(self->stack, version); for (uint32_t i = 0; i < pop.size; i++) { - TreeArray trees = pop.contents[i].trees; + SubtreeArray trees = pop.contents[i].subtrees; - Tree *root = NULL; + const Subtree *root = NULL; for (uint32_t j = trees.size - 1; j + 1 > 0; j--) { - Tree *child = trees.contents[j]; + const Subtree *child = trees.contents[j]; if (!child->extra) { for (uint32_t k = 0; k < child->children.size; k++) { - ts_tree_retain(child->children.contents[k]); + ts_subtree_retain(child->children.contents[k]); } array_splice(&trees, j, 1, &child->children); - root = ts_tree_make_node( + root = ts_subtree_new_node( &self->tree_pool, child->symbol, &trees, child->alias_sequence_id, self->language ); - ts_tree_release(&self->tree_pool, child); + ts_subtree_release(&self->tree_pool, child); break; } } @@ -717,11 +744,11 @@ static void parser__accept(Parser *self, StackVersion version, Tree *lookahead) self->accept_count++; if (self->finished_tree) { - if (parser__select_tree(self, self->finished_tree, root)) { - ts_tree_release(&self->tree_pool, self->finished_tree); + if (ts_parser__select_tree(self, self->finished_tree, root)) { + ts_subtree_release(&self->tree_pool, self->finished_tree); self->finished_tree = root; } else { - ts_tree_release(&self->tree_pool, root); + ts_subtree_release(&self->tree_pool, root); } } else { self->finished_tree = root; @@ -732,7 +759,7 @@ static void parser__accept(Parser *self, StackVersion version, Tree *lookahead) ts_stack_halt(self->stack, version); } -static bool parser__do_all_potential_reductions(Parser *self, StackVersion starting_version, +static bool ts_parser__do_all_potential_reductions(TSParser *self, StackVersion starting_version, TSSymbol lookahead_symbol) { uint32_t initial_version_count = ts_stack_version_count(self->stack); @@ -791,7 +818,7 @@ static bool parser__do_all_potential_reductions(Parser *self, StackVersion start for (uint32_t i = 0; i < self->reduce_actions.size; i++) { ReduceAction action = self->reduce_actions.contents[i]; - parser__reduce( + ts_parser__reduce( self, version, action.symbol, action.count, action.dynamic_precedence, action.alias_sequence_id, true @@ -817,10 +844,11 @@ static bool parser__do_all_potential_reductions(Parser *self, StackVersion start return can_shift_lookahead_symbol; } -static void parser__handle_error(Parser *self, StackVersion version, TSSymbol lookahead_symbol) { +static void ts_parser__handle_error(TSParser *self, StackVersion version, + TSSymbol lookahead_symbol) { // Perform any reductions that could have happened in this state, regardless of the lookahead. uint32_t previous_version_count = ts_stack_version_count(self->stack); - parser__do_all_potential_reductions(self, version, 0); + ts_parser__do_all_potential_reductions(self, version, 0); uint32_t version_count = ts_stack_version_count(self->stack); // Push a discontinuity onto the stack. Merge all of the stack versions that @@ -843,14 +871,16 @@ static void parser__handle_error(Parser *self, StackVersion version, TSSymbol lo lookahead_symbol )) { StackVersion version_with_missing_tree = ts_stack_copy_version(self->stack, v); - Tree *missing_tree = ts_tree_make_missing_leaf(&self->tree_pool, missing_symbol, self->language); + const Subtree *missing_tree = ts_subtree_new_missing_leaf( + &self->tree_pool, missing_symbol, self->language + ); ts_stack_push( self->stack, version_with_missing_tree, missing_tree, false, state_after_missing_symbol ); - if (parser__do_all_potential_reductions( + if (ts_parser__do_all_potential_reductions( self, version_with_missing_tree, lookahead_symbol )) { @@ -878,7 +908,7 @@ static void parser__handle_error(Parser *self, StackVersion version, TSSymbol lo LOG_STACK(); } -static void parser__halt_parse(Parser *self) { +static void ts_parser__halt_parse(TSParser *self) { LOG("halting_parse"); LOG_STACK(); @@ -888,21 +918,20 @@ static void parser__halt_parse(Parser *self) { ts_stack_position(self->stack, 0) ); - Tree *filler_node = ts_tree_make_error(&self->tree_pool, remaining_length, length_zero(), 0, self->language); + Subtree *filler_node = ts_subtree_new_error(&self->tree_pool, remaining_length, length_zero(), 0, self->language); filler_node->visible = false; ts_stack_push(self->stack, 0, filler_node, false, 0); - TreeArray children = array_new(); - Tree *root_error = ts_tree_make_error_node(&self->tree_pool, &children, self->language); + SubtreeArray children = array_new(); + Subtree *root_error = ts_subtree_new_error_node(&self->tree_pool, &children, self->language); ts_stack_push(self->stack, 0, root_error, false, 0); - Tree *eof = ts_tree_make_leaf(&self->tree_pool, ts_builtin_sym_end, length_zero(), length_zero(), self->language); - parser__accept(self, 0, eof); - ts_tree_release(&self->tree_pool, eof); + Subtree *eof = ts_subtree_new_leaf(&self->tree_pool, ts_builtin_sym_end, length_zero(), length_zero(), self->language); + ts_parser__accept(self, 0, eof); } -static bool parser__recover_to_state(Parser *self, StackVersion version, unsigned depth, - TSStateId goal_state) { +static bool ts_parser__recover_to_state(TSParser *self, StackVersion version, unsigned depth, + TSStateId goal_state) { StackSliceArray pop = ts_stack_pop_count(self->stack, version, depth); StackVersion previous_version = STACK_VERSION_NONE; @@ -910,40 +939,40 @@ static bool parser__recover_to_state(Parser *self, StackVersion version, unsigne StackSlice slice = pop.contents[i]; if (slice.version == previous_version) { - ts_tree_array_delete(&self->tree_pool, &slice.trees); + ts_subtree_array_delete(&self->tree_pool, &slice.subtrees); array_erase(&pop, i--); continue; } if (ts_stack_state(self->stack, slice.version) != goal_state) { ts_stack_halt(self->stack, slice.version); - ts_tree_array_delete(&self->tree_pool, &slice.trees); + ts_subtree_array_delete(&self->tree_pool, &slice.subtrees); array_erase(&pop, i--); continue; } - TreeArray error_trees = ts_stack_pop_error(self->stack, slice.version); + SubtreeArray error_trees = ts_stack_pop_error(self->stack, slice.version); if (error_trees.size > 0) { assert(error_trees.size == 1); - array_splice(&slice.trees, 0, 0, &error_trees.contents[0]->children); + array_splice(&slice.subtrees, 0, 0, &error_trees.contents[0]->children); for (unsigned j = 0; j < error_trees.contents[0]->children.size; j++) { - ts_tree_retain(slice.trees.contents[j]); + ts_subtree_retain(slice.subtrees.contents[j]); } - ts_tree_array_delete(&self->tree_pool, &error_trees); + ts_subtree_array_delete(&self->tree_pool, &error_trees); } - TreeArray trailing_extras = ts_tree_array_remove_trailing_extras(&slice.trees); + SubtreeArray trailing_extras = ts_subtree_array_remove_trailing_extras(&slice.subtrees); - if (slice.trees.size > 0) { - Tree *error = ts_tree_make_error_node(&self->tree_pool, &slice.trees, self->language); + if (slice.subtrees.size > 0) { + Subtree *error = ts_subtree_new_error_node(&self->tree_pool, &slice.subtrees, self->language); error->extra = true; ts_stack_push(self->stack, slice.version, error, false, goal_state); } else { - array_delete(&slice.trees); + array_delete(&slice.subtrees); } for (unsigned j = 0; j < trailing_extras.size; j++) { - Tree *tree = trailing_extras.contents[j]; + const Subtree *tree = trailing_extras.contents[j]; ts_stack_push(self->stack, slice.version, tree, false, goal_state); } @@ -954,7 +983,7 @@ static bool parser__recover_to_state(Parser *self, StackVersion version, unsigne return previous_version != STACK_VERSION_NONE; } -static void parser__recover(Parser *self, StackVersion version, Tree *lookahead) { +static void ts_parser__recover(TSParser *self, StackVersion version, const Subtree *lookahead) { bool did_recover = false; unsigned previous_version_count = ts_stack_version_count(self->stack); Length position = ts_stack_position(self->stack, version); @@ -989,10 +1018,10 @@ static void parser__recover(Parser *self, StackVersion version, Tree *lookahead) entry.depth * ERROR_COST_PER_SKIPPED_TREE + (position.bytes - entry.position.bytes) * ERROR_COST_PER_SKIPPED_CHAR + (position.extent.row - entry.position.extent.row) * ERROR_COST_PER_SKIPPED_LINE; - if (parser__better_version_exists(self, version, false, new_cost)) break; + if (ts_parser__better_version_exists(self, version, false, new_cost)) break; if (ts_language_has_actions(self->language, entry.state, lookahead->symbol)) { - if (parser__recover_to_state(self, version, depth, entry.state)) { + if (ts_parser__recover_to_state(self, version, depth, entry.state)) { did_recover = true; LOG("recover_to_previous state:%u, depth:%u", entry.state, depth); LOG_STACK(); @@ -1010,40 +1039,43 @@ static void parser__recover(Parser *self, StackVersion version, Tree *lookahead) if (did_recover && ts_stack_version_count(self->stack) > MAX_VERSION_COUNT) { ts_stack_halt(self->stack, version); + ts_subtree_release(&self->tree_pool, lookahead); return; } if (lookahead->symbol == ts_builtin_sym_end) { LOG("recover_eof"); - TreeArray children = array_new(); - Tree *parent = ts_tree_make_error_node(&self->tree_pool, &children, self->language); + SubtreeArray children = array_new(); + const Subtree *parent = ts_subtree_new_error_node(&self->tree_pool, &children, self->language); ts_stack_push(self->stack, version, parent, false, 1); - parser__accept(self, version, lookahead); + ts_parser__accept(self, version, lookahead); return; } unsigned new_cost = current_error_cost + ERROR_COST_PER_SKIPPED_TREE + - ts_tree_total_bytes(lookahead) * ERROR_COST_PER_SKIPPED_CHAR + - ts_tree_total_size(lookahead).extent.row * ERROR_COST_PER_SKIPPED_LINE; + ts_subtree_total_bytes(lookahead) * ERROR_COST_PER_SKIPPED_CHAR + + ts_subtree_total_size(lookahead).extent.row * ERROR_COST_PER_SKIPPED_LINE; - if (parser__better_version_exists(self, version, false, new_cost)) { + if (ts_parser__better_version_exists(self, version, false, new_cost)) { ts_stack_halt(self->stack, version); + ts_subtree_release(&self->tree_pool, lookahead); return; } unsigned n; const TSParseAction *actions = ts_language_actions(self->language, 1, lookahead->symbol, &n); if (n > 0 && actions[n - 1].type == TSParseActionTypeShift && actions[n - 1].params.extra) { - lookahead->extra = true; + Subtree *mutable_lookahead = ts_subtree_make_mut(&self->tree_pool, lookahead); + mutable_lookahead->extra = true; + lookahead = mutable_lookahead; } LOG("skip_token symbol:%s", SYM_NAME(lookahead->symbol)); - ts_tree_retain(lookahead); - TreeArray children = array_new(); + SubtreeArray children = array_new(); array_reserve(&children, 1); array_push(&children, lookahead); - Tree *error_repeat = ts_tree_make_node( + const Subtree *error_repeat = ts_subtree_new_node( &self->tree_pool, ts_builtin_sym_error_repeat, &children, @@ -1054,13 +1086,13 @@ static void parser__recover(Parser *self, StackVersion version, Tree *lookahead) if (node_count_since_error > 0) { StackSliceArray pop = ts_stack_pop_count(self->stack, version, 1); assert(pop.size == 1); - assert(pop.contents[0].trees.size == 1); + assert(pop.contents[0].subtrees.size == 1); ts_stack_renumber_version(self->stack, pop.contents[0].version, version); - array_push(&pop.contents[0].trees, error_repeat); - error_repeat = ts_tree_make_node( + array_push(&pop.contents[0].subtrees, error_repeat); + error_repeat = ts_subtree_new_node( &self->tree_pool, ts_builtin_sym_error_repeat, - &pop.contents[0].trees, + &pop.contents[0].subtrees, 0, self->language ); @@ -1070,15 +1102,17 @@ static void parser__recover(Parser *self, StackVersion version, Tree *lookahead) if (lookahead->has_external_tokens) { ts_stack_set_last_external_token( - self->stack, version, ts_tree_last_external_token(lookahead) + self->stack, version, ts_subtree_last_external_token(lookahead) ); } } -static void parser__advance(Parser *self, StackVersion version, ReusableNode *reusable_node) { +static void ts_parser__advance(TSParser *self, StackVersion version, ReusableNode *reusable_node) { TSStateId state = ts_stack_state(self->stack, version); TableEntry table_entry; - Tree *lookahead = parser__get_lookahead(self, version, &state, reusable_node, &table_entry); + const Subtree *lookahead = ts_parser__get_lookahead( + self, version, &state, reusable_node, &table_entry + ); for (;;) { StackVersion last_reduction_version = STACK_VERSION_NONE; @@ -1103,20 +1137,21 @@ static void parser__advance(Parser *self, StackVersion version, ReusableNode *re } if (lookahead->children.size > 0) { - parser__breakdown_lookahead(self, &lookahead, state, reusable_node); + ts_parser__breakdown_lookahead(self, &lookahead, state, reusable_node); next_state = ts_language_next_state(self->language, state, lookahead->symbol); } - parser__shift(self, version, next_state, lookahead, action.params.extra); - if (lookahead == reusable_node->tree) reusable_node_pop(reusable_node); - ts_tree_release(&self->tree_pool, lookahead); + ts_parser__shift(self, version, next_state, lookahead, action.params.extra); + if (lookahead == reusable_node_tree(reusable_node)) { + reusable_node_advance(reusable_node); + } return; } case TSParseActionTypeReduce: { bool is_fragile = table_entry.action_count > 1; LOG("reduce sym:%s, child_count:%u", SYM_NAME(action.params.symbol), action.params.child_count); - StackSliceArray reduction = parser__reduce( + StackSliceArray reduction = ts_parser__reduce( self, version, action.params.symbol, action.params.child_count, action.params.dynamic_precedence, action.params.alias_sequence_id, is_fragile @@ -1128,18 +1163,18 @@ static void parser__advance(Parser *self, StackVersion version, ReusableNode *re case TSParseActionTypeAccept: { LOG("accept"); - parser__accept(self, version, lookahead); - ts_tree_release(&self->tree_pool, lookahead); + ts_parser__accept(self, version, lookahead); return; } case TSParseActionTypeRecover: { while (lookahead->children.size > 0) { - parser__breakdown_lookahead(self, &lookahead, state, reusable_node); + ts_parser__breakdown_lookahead(self, &lookahead, state, reusable_node); + } + ts_parser__recover(self, version, lookahead); + if (lookahead == reusable_node_tree(reusable_node)) { + reusable_node_advance(reusable_node); } - parser__recover(self, version, lookahead); - if (lookahead == reusable_node->tree) reusable_node_pop(reusable_node); - ts_tree_release(&self->tree_pool, lookahead); return; } } @@ -1149,13 +1184,12 @@ static void parser__advance(Parser *self, StackVersion version, ReusableNode *re ts_stack_renumber_version(self->stack, last_reduction_version, version); LOG_STACK(); } else if (state == ERROR_STATE) { - parser__recover(self, version, lookahead); - ts_tree_release(&self->tree_pool, lookahead); + ts_parser__recover(self, version, lookahead); return; - } else if (!parser__breakdown_top_of_stack(self, version)) { + } else if (!ts_parser__breakdown_top_of_stack(self, version)) { LOG("detect_error"); ts_stack_pause(self->stack, version, lookahead->first_leaf.symbol); - ts_tree_release(&self->tree_pool, lookahead); + ts_subtree_release(&self->tree_pool, lookahead); return; } @@ -1164,7 +1198,7 @@ static void parser__advance(Parser *self, StackVersion version, ReusableNode *re } } -static unsigned parser__condense_stack(Parser *self) { +static unsigned ts_parser__condense_stack(TSParser *self) { bool made_changes = false; unsigned min_error_cost = UINT_MAX; for (StackVersion i = 0; i < ts_stack_version_count(self->stack); i++) { @@ -1174,15 +1208,15 @@ static unsigned parser__condense_stack(Parser *self) { continue; } - ErrorStatus status_i = parser__version_status(self, i); + ErrorStatus status_i = ts_parser__version_status(self, i); if (!status_i.is_in_error && status_i.cost < min_error_cost) { min_error_cost = status_i.cost; } for (StackVersion j = 0; j < i; j++) { - ErrorStatus status_j = parser__version_status(self, j); + ErrorStatus status_j = ts_parser__version_status(self, j); - switch (parser__compare_versions(self, status_j, status_i)) { + switch (ts_parser__compare_versions(self, status_j, status_i)) { case ErrorComparisonTakeLeft: made_changes = true; ts_stack_remove_version(self->stack, i); @@ -1229,7 +1263,7 @@ static unsigned parser__condense_stack(Parser *self) { LOG("resume version:%u", i); min_error_cost = ts_stack_error_cost(self->stack, i); TSSymbol lookahead_symbol = ts_stack_resume(self->stack, i); - parser__handle_error(self, i, lookahead_symbol); + ts_parser__handle_error(self, i, lookahead_symbol); has_unpaused_version = true; } else { ts_stack_remove_version(self->stack, i); @@ -1250,48 +1284,85 @@ static unsigned parser__condense_stack(Parser *self) { return min_error_cost; } -bool parser_init(Parser *self) { +// Parser - Public + +TSParser *ts_parser_new() { + TSParser *self = ts_calloc(1, sizeof(TSParser)); ts_lexer_init(&self->lexer); array_init(&self->reduce_actions); array_reserve(&self->reduce_actions, 4); - ts_tree_pool_init(&self->tree_pool); + self->tree_pool = ts_subtree_pool_new(32); self->stack = ts_stack_new(&self->tree_pool); self->finished_tree = NULL; - parser__set_cached_token(self, 0, NULL, NULL); + self->reusable_node = reusable_node_new(); + self->dot_graph_file = NULL; + self->halt_on_error = false; + ts_parser__set_cached_token(self, 0, NULL, NULL); + return self; +} + +void ts_parser_delete(TSParser *self) { + if (self->stack) { + ts_stack_delete(self->stack); + } + if (self->reduce_actions.contents) { + array_delete(&self->reduce_actions); + } + ts_subtree_pool_delete(&self->tree_pool); + reusable_node_delete(&self->reusable_node); + ts_parser_set_language(self, NULL); + ts_free(self); +} + +const TSLanguage *ts_parser_language(const TSParser *self) { + return self->language; +} + +bool ts_parser_set_language(TSParser *self, const TSLanguage *language) { + if (language && language->version != TREE_SITTER_LANGUAGE_VERSION) return false; + + if (self->external_scanner_payload && self->language->external_scanner.destroy) { + self->language->external_scanner.destroy(self->external_scanner_payload); + } + + if (language && language->external_scanner.create) { + self->external_scanner_payload = language->external_scanner.create(); + } else { + self->external_scanner_payload = NULL; + } + + self->language = language; return true; } -void parser_set_language(Parser *self, const TSLanguage *language) { - if (self->external_scanner_payload && self->language->external_scanner.destroy) - self->language->external_scanner.destroy(self->external_scanner_payload); - - if (language && language->external_scanner.create) - self->external_scanner_payload = language->external_scanner.create(); - else - self->external_scanner_payload = NULL; - - self->language = language; +TSLogger ts_parser_logger(const TSParser *self) { + return self->lexer.logger; } -void parser_destroy(Parser *self) { - if (self->stack) - ts_stack_delete(self->stack); - if (self->reduce_actions.contents) - array_delete(&self->reduce_actions); - ts_tree_pool_delete(&self->tree_pool); - parser_set_language(self, NULL); +void ts_parser_set_logger(TSParser *self, TSLogger logger) { + self->lexer.logger = logger; } -Tree *parser_parse(Parser *self, TSInput input, Tree *old_tree, bool halt_on_error) { - parser__start(self, input, old_tree); +void ts_parser_print_dot_graphs(TSParser *self, FILE *file) { + self->dot_graph_file = file; +} + +void ts_parser_halt_on_error(TSParser *self, bool should_halt_on_error) { + self->halt_on_error = should_halt_on_error; +} + +TSTree *ts_parser_parse(TSParser *self, const TSTree *old_tree, TSInput input) { + if (!self->language) return NULL; + ts_parser__start(self, input, old_tree ? old_tree->root : NULL); StackVersion version = STACK_VERSION_NONE; uint32_t position = 0, last_position = 0; - ReusableNode reusable_node; + ReusableNode reusable_node = reusable_node_new(); + reusable_node_assign(&reusable_node, &self->reusable_node); do { for (version = 0; version < ts_stack_version_count(self->stack); version++) { - reusable_node = self->reusable_node; + reusable_node_assign(&reusable_node, &self->reusable_node); while (ts_stack_is_active(self->stack, version)) { LOG("process version:%d, version_count:%u, state:%d, row:%u, col:%u", @@ -1300,7 +1371,7 @@ Tree *parser_parse(Parser *self, TSInput input, Tree *old_tree, bool halt_on_err ts_stack_position(self->stack, version).extent.row, ts_stack_position(self->stack, version).extent.column); - parser__advance(self, version, &reusable_node); + ts_parser__advance(self, version, &reusable_node); LOG_STACK(); position = ts_stack_position(self->stack, version).bytes; @@ -1311,24 +1382,33 @@ Tree *parser_parse(Parser *self, TSInput input, Tree *old_tree, bool halt_on_err } } - self->reusable_node = reusable_node; + reusable_node_assign(&self->reusable_node, &reusable_node); - unsigned min_error_cost = parser__condense_stack(self); + unsigned min_error_cost = ts_parser__condense_stack(self); if (self->finished_tree && self->finished_tree->error_cost < min_error_cost) { break; - } else if (halt_on_error && min_error_cost > 0) { - parser__halt_parse(self); + } else if (self->halt_on_error && min_error_cost > 0) { + ts_parser__halt_parse(self); break; } self->in_ambiguity = version > 1; } while (version != 0); + reusable_node_delete(&reusable_node); ts_stack_clear(self->stack); - parser__set_cached_token(self, 0, NULL, NULL); - ts_tree_assign_parents(self->finished_tree, &self->tree_pool, self->language); + ts_parser__set_cached_token(self, 0, NULL, NULL); + ts_subtree_balance(self->finished_tree, &self->tree_pool, self->language); LOG("done"); LOG_TREE(); - return self->finished_tree; + + return ts_tree_new(self->finished_tree, self->language); +} + +TSTree *ts_parser_parse_string(TSParser *self, const TSTree *old_tree, + const char *string, uint32_t length) { + TSStringInput input; + ts_string_input_init(&input, string, length); + return ts_parser_parse(self, old_tree, input.input); } diff --git a/src/runtime/parser.h b/src/runtime/parser.h deleted file mode 100644 index dab58dc9..00000000 --- a/src/runtime/parser.h +++ /dev/null @@ -1,46 +0,0 @@ -#ifndef RUNTIME_PARSER_H_ -#define RUNTIME_PARSER_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -#include "runtime/stack.h" -#include "runtime/array.h" -#include "runtime/lexer.h" -#include "runtime/reusable_node.h" -#include "runtime/reduce_action.h" -#include "runtime/tree.h" - -typedef struct { - Tree *token; - Tree *last_external_token; - uint32_t byte_index; -} TokenCache; - -typedef struct { - Lexer lexer; - Stack *stack; - TreePool tree_pool; - const TSLanguage *language; - ReduceActionSet reduce_actions; - Tree *finished_tree; - Tree scratch_tree; - TokenCache token_cache; - ReusableNode reusable_node; - void *external_scanner_payload; - bool in_ambiguity; - bool print_debugging_graphs; - unsigned accept_count; -} Parser; - -bool parser_init(Parser *); -void parser_destroy(Parser *); -Tree *parser_parse(Parser *, TSInput, Tree *, bool halt_on_error); -void parser_set_language(Parser *, const TSLanguage *); - -#ifdef __cplusplus -} -#endif - -#endif // RUNTIME_PARSER_H_ diff --git a/src/runtime/reusable_node.h b/src/runtime/reusable_node.h index 04b9af7e..42ae6f1e 100644 --- a/src/runtime/reusable_node.h +++ b/src/runtime/reusable_node.h @@ -1,46 +1,87 @@ -#include "runtime/tree.h" +#include "runtime/subtree.h" typedef struct { - Tree *tree; - uint32_t byte_index; - Tree *last_external_token; + const Subtree *tree; + uint32_t child_index; + uint32_t byte_offset; +} StackEntry; + +typedef struct { + Array(StackEntry) stack; + const Subtree *last_external_token; } ReusableNode; -static inline ReusableNode reusable_node_new(Tree *tree) { - ReusableNode result = {tree, 0, NULL}; - return result; +static inline ReusableNode reusable_node_new() { + return (ReusableNode) {array_new(), NULL}; } -static inline void reusable_node_pop(ReusableNode *self) { - self->byte_index += ts_tree_total_bytes(self->tree); - if (self->tree->has_external_tokens) { - self->last_external_token = ts_tree_last_external_token(self->tree); +static inline void reusable_node_reset(ReusableNode *self, const Subtree *tree) { + array_clear(&self->stack); + array_push(&self->stack, ((StackEntry) { + .tree = tree, + .child_index = 0, + .byte_offset = 0, + })); +} + +static inline const Subtree *reusable_node_tree(ReusableNode *self) { + return self->stack.size > 0 + ? self->stack.contents[self->stack.size - 1].tree + : NULL; +} + +static inline uint32_t reusable_node_byte_offset(ReusableNode *self) { + return self->stack.size > 0 + ? self->stack.contents[self->stack.size - 1].byte_offset + : UINT32_MAX; +} + +static inline void reusable_node_delete(ReusableNode *self) { + array_delete(&self->stack); +} + +static inline void reusable_node_assign(ReusableNode *self, const ReusableNode *other) { + array_assign(&self->stack, &other->stack); +} + +static inline void reusable_node_advance(ReusableNode *self) { + StackEntry last_entry = *array_back(&self->stack); + uint32_t byte_offset = last_entry.byte_offset + ts_subtree_total_bytes(last_entry.tree); + if (last_entry.tree->has_external_tokens) { + self->last_external_token = ts_subtree_last_external_token(last_entry.tree); } - while (self->tree) { - Tree *parent = self->tree->context.parent; - uint32_t next_index = self->tree->context.index + 1; - if (parent && parent->children.size > next_index) { - self->tree = parent->children.contents[next_index]; - return; - } - self->tree = parent; - } + const Subtree *tree; + uint32_t next_index; + do { + StackEntry popped_entry = array_pop(&self->stack); + next_index = popped_entry.child_index + 1; + if (self->stack.size == 0) return; + tree = array_back(&self->stack)->tree; + } while (tree->children.size <= next_index); + + array_push(&self->stack, ((StackEntry) { + .tree = tree->children.contents[next_index], + .child_index = next_index, + .byte_offset = byte_offset, + })); } -static inline ReusableNode reusable_node_after_leaf(const ReusableNode *self) { - ReusableNode result = *self; - while (result.tree->children.size > 0) - result.tree = result.tree->children.contents[0]; - reusable_node_pop(&result); - return result; -} - -static inline bool reusable_node_breakdown(ReusableNode *self) { - if (self->tree->children.size == 0) { - return false; - } else { - self->tree = self->tree->children.contents[0]; +static inline bool reusable_node_descend(ReusableNode *self) { + StackEntry last_entry = *array_back(&self->stack); + if (last_entry.tree->children.size > 0) { + array_push(&self->stack, ((StackEntry) { + .tree = last_entry.tree->children.contents[0], + .child_index = 0, + .byte_offset = last_entry.byte_offset, + })); return true; + } else { + return false; } } + +static inline void reusable_node_advance_past_leaf(ReusableNode *self) { + while (reusable_node_descend(self)) {} + reusable_node_advance(self); +} diff --git a/src/runtime/stack.c b/src/runtime/stack.c index f7383846..8921d559 100644 --- a/src/runtime/stack.c +++ b/src/runtime/stack.c @@ -1,6 +1,6 @@ #include "runtime/alloc.h" #include "runtime/language.h" -#include "runtime/tree.h" +#include "runtime/subtree.h" #include "runtime/array.h" #include "runtime/stack.h" #include "runtime/length.h" @@ -21,7 +21,7 @@ typedef struct StackNode StackNode; typedef struct { StackNode *node; - Tree *tree; + const Subtree *subtree; bool is_pending; } StackLink; @@ -38,8 +38,8 @@ struct StackNode { typedef struct { StackNode *node; - TreeArray trees; - uint32_t tree_count; + SubtreeArray subtrees; + uint32_t subtree_count; bool is_pending; } Iterator; @@ -58,7 +58,7 @@ typedef enum { typedef struct { StackNode *node; - Tree *last_external_token; + const Subtree *last_external_token; StackSummary *summary; unsigned node_count_at_last_error; TSSymbol lookahead_when_paused; @@ -71,7 +71,7 @@ struct Stack { Array(Iterator) iterators; StackNodeArray node_pool; StackNode *base_node; - TreePool *tree_pool; + SubtreePool *subtree_pool; }; typedef unsigned StackAction; @@ -91,7 +91,7 @@ static void stack_node_retain(StackNode *self) { assert(self->ref_count != 0); } -static void stack_node_release(StackNode *self, StackNodeArray *pool, TreePool *tree_pool) { +static void stack_node_release(StackNode *self, StackNodeArray *pool, SubtreePool *subtree_pool) { recur: assert(self->ref_count != 0); self->ref_count--; @@ -100,10 +100,10 @@ recur: StackNode *first_predecessor = NULL; if (self->link_count > 0) { for (unsigned i = self->link_count - 1; i > 0; i--) { - if (self->links[i].tree) ts_tree_release(tree_pool, self->links[i].tree); - stack_node_release(self->links[i].node, pool, tree_pool); + if (self->links[i].subtree) ts_subtree_release(subtree_pool, self->links[i].subtree); + stack_node_release(self->links[i].node, pool, subtree_pool); } - if (self->links[0].tree) ts_tree_release(tree_pool, self->links[0].tree); + if (self->links[0].subtree) ts_subtree_release(subtree_pool, self->links[0].subtree); first_predecessor = self->links[0].node; } @@ -119,8 +119,8 @@ recur: } } -static StackNode *stack_node_new(StackNode *previous_node, Tree *tree, bool is_pending, - TSStateId state, StackNodeArray *pool) { +static StackNode *stack_node_new(StackNode *previous_node, const Subtree *subtree, + bool is_pending, TSStateId state, StackNodeArray *pool) { StackNode *node = pool->size > 0 ? array_pop(pool) : ts_malloc(sizeof(StackNode)); @@ -130,7 +130,7 @@ static StackNode *stack_node_new(StackNode *previous_node, Tree *tree, bool is_p node->link_count = 1; node->links[0] = (StackLink){ .node = previous_node, - .tree = tree, + .subtree = subtree, .is_pending = is_pending, }; @@ -139,11 +139,11 @@ static StackNode *stack_node_new(StackNode *previous_node, Tree *tree, bool is_p node->dynamic_precedence = previous_node->dynamic_precedence; node->node_count = previous_node->node_count; - if (tree) { - node->error_cost += tree->error_cost; - node->position = length_add(node->position, ts_tree_total_size(tree)); - node->dynamic_precedence += tree->dynamic_precedence; - if (!tree->extra) node->node_count += tree->node_count; + if (subtree) { + node->error_cost += subtree->error_cost; + node->position = length_add(node->position, ts_subtree_total_size(subtree)); + node->dynamic_precedence += subtree->dynamic_precedence; + if (!subtree->extra) node->node_count += subtree->node_count; } } else { node->position = length_zero(); @@ -153,7 +153,7 @@ static StackNode *stack_node_new(StackNode *previous_node, Tree *tree, bool is_p return node; } -static bool stack__tree_is_equivalent(const Tree *left, const Tree *right) { +static bool stack__subtree_is_equivalent(const Subtree *left, const Subtree *right) { return left == right || (left && @@ -164,7 +164,7 @@ static bool stack__tree_is_equivalent(const Tree *left, const Tree *right) { left->padding.bytes == right->padding.bytes && left->size.bytes == right->size.bytes && left->extra == right->extra && - ts_tree_external_token_state_eq(left, right)))); + ts_subtree_external_scanner_state_eq(left, right)))); } static void stack_node_add_link(StackNode *self, StackLink link) { @@ -172,7 +172,7 @@ static void stack_node_add_link(StackNode *self, StackLink link) { for (int i = 0; i < self->link_count; i++) { StackLink existing_link = self->links[i]; - if (stack__tree_is_equivalent(existing_link.tree, link.tree)) { + if (stack__subtree_is_equivalent(existing_link.subtree, link.subtree)) { if (existing_link.node == link.node) return; if (existing_link.node->state == link.node->state && existing_link.node->position.bytes == link.node->position.bytes) { @@ -187,24 +187,24 @@ static void stack_node_add_link(StackNode *self, StackLink link) { if (self->link_count == MAX_LINK_COUNT) return; stack_node_retain(link.node); - if (link.tree) ts_tree_retain(link.tree); + if (link.subtree) ts_subtree_retain(link.subtree); self->links[self->link_count++] = link; unsigned node_count = link.node->node_count; - if (link.tree) node_count += link.tree->node_count; + if (link.subtree) node_count += link.subtree->node_count; if (node_count > self->node_count) self->node_count = node_count; } -static void stack_head_delete(StackHead *self, StackNodeArray *pool, TreePool *tree_pool) { +static void stack_head_delete(StackHead *self, StackNodeArray *pool, SubtreePool *subtree_pool) { if (self->node) { if (self->last_external_token) { - ts_tree_release(tree_pool, self->last_external_token); + ts_subtree_release(subtree_pool, self->last_external_token); } if (self->summary) { array_delete(self->summary); ts_free(self->summary); } - stack_node_release(self->node, pool, tree_pool); + stack_node_release(self->node, pool, subtree_pool); } } @@ -219,44 +219,44 @@ static StackVersion ts_stack__add_version(Stack *self, StackVersion original_ver }; array_push(&self->heads, head); stack_node_retain(node); - if (head.last_external_token) ts_tree_retain(head.last_external_token); + if (head.last_external_token) ts_subtree_retain(head.last_external_token); return (StackVersion)(self->heads.size - 1); } static void ts_stack__add_slice(Stack *self, StackVersion original_version, - StackNode *node, TreeArray *trees) { + StackNode *node, SubtreeArray *subtrees) { for (uint32_t i = self->slices.size - 1; i + 1 > 0; i--) { StackVersion version = self->slices.contents[i].version; if (self->heads.contents[version].node == node) { - StackSlice slice = {*trees, version}; + StackSlice slice = {*subtrees, version}; array_insert(&self->slices, i + 1, slice); return; } } StackVersion version = ts_stack__add_version(self, original_version, node); - StackSlice slice = { *trees, version }; + StackSlice slice = { *subtrees, version }; array_push(&self->slices, slice); } inline StackSliceArray stack__iter(Stack *self, StackVersion version, StackCallback callback, void *payload, - int goal_tree_count) { + int goal_subtree_count) { array_clear(&self->slices); array_clear(&self->iterators); StackHead *head = array_get(&self->heads, version); Iterator iterator = { .node = head->node, - .trees = array_new(), - .tree_count = 0, + .subtrees = array_new(), + .subtree_count = 0, .is_pending = true, }; - bool include_trees = false; - if (goal_tree_count >= 0) { - include_trees = true; - array_reserve(&iterator.trees, goal_tree_count); + bool include_subtrees = false; + if (goal_subtree_count >= 0) { + include_subtrees = true; + array_reserve(&iterator.subtrees, goal_subtree_count); } array_push(&self->iterators, iterator); @@ -271,21 +271,21 @@ inline StackSliceArray stack__iter(Stack *self, StackVersion version, bool should_stop = action & StackActionStop || node->link_count == 0; if (should_pop) { - TreeArray trees = iterator->trees; + SubtreeArray subtrees = iterator->subtrees; if (!should_stop) - ts_tree_array_copy(trees, &trees); - ts_tree_array_reverse(&trees); + ts_subtree_array_copy(subtrees, &subtrees); + ts_subtree_array_reverse(&subtrees); ts_stack__add_slice( self, version, node, - &trees + &subtrees ); } if (should_stop) { if (!should_pop) - ts_tree_array_delete(self->tree_pool, &iterator->trees); + ts_subtree_array_delete(self->subtree_pool, &iterator->subtrees); array_erase(&self->iterators, i); i--, size--; continue; @@ -303,24 +303,24 @@ inline StackSliceArray stack__iter(Stack *self, StackVersion version, Iterator current_iterator = self->iterators.contents[i]; array_push(&self->iterators, current_iterator); next_iterator = array_back(&self->iterators); - ts_tree_array_copy(next_iterator->trees, &next_iterator->trees); + ts_subtree_array_copy(next_iterator->subtrees, &next_iterator->subtrees); } next_iterator->node = link.node; - if (link.tree) { - if (include_trees) { - array_push(&next_iterator->trees, link.tree); - ts_tree_retain(link.tree); + if (link.subtree) { + if (include_subtrees) { + array_push(&next_iterator->subtrees, link.subtree); + ts_subtree_retain(link.subtree); } - if (!link.tree->extra) { - next_iterator->tree_count++; + if (!link.subtree->extra) { + next_iterator->subtree_count++; if (!link.is_pending) { next_iterator->is_pending = false; } } } else { - next_iterator->tree_count++; + next_iterator->subtree_count++; next_iterator->is_pending = false; } } @@ -330,7 +330,7 @@ inline StackSliceArray stack__iter(Stack *self, StackVersion version, return self->slices; } -Stack *ts_stack_new(TreePool *tree_pool) { +Stack *ts_stack_new(SubtreePool *subtree_pool) { Stack *self = ts_calloc(1, sizeof(Stack)); array_init(&self->heads); @@ -342,7 +342,7 @@ Stack *ts_stack_new(TreePool *tree_pool) { array_reserve(&self->iterators, 4); array_reserve(&self->node_pool, MAX_NODE_POOL_SIZE); - self->tree_pool = tree_pool; + self->subtree_pool = subtree_pool; self->base_node = stack_node_new(NULL, NULL, false, 1, &self->node_pool); ts_stack_clear(self); @@ -354,9 +354,9 @@ void ts_stack_delete(Stack *self) { array_delete(&self->slices); if (self->iterators.contents) array_delete(&self->iterators); - stack_node_release(self->base_node, &self->node_pool, self->tree_pool); + stack_node_release(self->base_node, &self->node_pool, self->subtree_pool); for (uint32_t i = 0; i < self->heads.size; i++) { - stack_head_delete(&self->heads.contents[i], &self->node_pool, self->tree_pool); + stack_head_delete(&self->heads.contents[i], &self->node_pool, self->subtree_pool); } array_clear(&self->heads); if (self->node_pool.contents) { @@ -380,14 +380,14 @@ Length ts_stack_position(const Stack *self, StackVersion version) { return array_get(&self->heads, version)->node->position; } -Tree *ts_stack_last_external_token(const Stack *self, StackVersion version) { +const Subtree *ts_stack_last_external_token(const Stack *self, StackVersion version) { return array_get(&self->heads, version)->last_external_token; } -void ts_stack_set_last_external_token(Stack *self, StackVersion version, Tree *token) { +void ts_stack_set_last_external_token(Stack *self, StackVersion version, const Subtree *token) { StackHead *head = array_get(&self->heads, version); - if (token) ts_tree_retain(token); - if (head->last_external_token) ts_tree_release(self->tree_pool, head->last_external_token); + if (token) ts_subtree_retain(token); + if (head->last_external_token) ts_subtree_release(self->subtree_pool, head->last_external_token); head->last_external_token = token; } @@ -396,7 +396,7 @@ unsigned ts_stack_error_cost(const Stack *self, StackVersion version) { unsigned result = head->node->error_cost; if ( head->status == StackStatusPaused || - (head->node->state == ERROR_STATE && !head->node->links[0].tree)) { + (head->node->state == ERROR_STATE && !head->node->links[0].subtree)) { result += ERROR_COST_PER_RECOVERY; } return result; @@ -410,10 +410,11 @@ unsigned ts_stack_node_count_since_error(const Stack *self, StackVersion version return head->node->node_count - head->node_count_at_last_error; } -void ts_stack_push(Stack *self, StackVersion version, Tree *tree, bool pending, TSStateId state) { +void ts_stack_push(Stack *self, StackVersion version, const Subtree *subtree, + bool pending, TSStateId state) { StackHead *head = array_get(&self->heads, version); - StackNode *new_node = stack_node_new(head->node, tree, pending, state, &self->node_pool); - if (!tree) head->node_count_at_last_error = new_node->node_count; + StackNode *new_node = stack_node_new(head->node, subtree, pending, state, &self->node_pool); + if (!subtree) head->node_count_at_last_error = new_node->node_count; head->node = new_node; } @@ -422,7 +423,7 @@ inline StackAction iterate_callback(void *payload, const Iterator *iterator) { session->callback( session->payload, iterator->node->state, - iterator->tree_count + iterator->subtree_count ); return StackActionNone; } @@ -434,8 +435,8 @@ void ts_stack_iterate(Stack *self, StackVersion version, } inline StackAction pop_count_callback(void *payload, const Iterator *iterator) { - unsigned *goal_tree_count = payload; - if (iterator->tree_count == *goal_tree_count) { + unsigned *goal_subtree_count = payload; + if (iterator->subtree_count == *goal_subtree_count) { return StackActionPop | StackActionStop; } else { return StackActionNone; @@ -447,7 +448,7 @@ StackSliceArray ts_stack_pop_count(Stack *self, StackVersion version, uint32_t c } inline StackAction pop_pending_callback(void *payload, const Iterator *iterator) { - if (iterator->tree_count >= 1) { + if (iterator->subtree_count >= 1) { if (iterator->is_pending) { return StackActionPop | StackActionStop; } else { @@ -468,9 +469,9 @@ StackSliceArray ts_stack_pop_pending(Stack *self, StackVersion version) { } inline StackAction pop_error_callback(void *payload, const Iterator *iterator) { - if (iterator->trees.size > 0) { + if (iterator->subtrees.size > 0) { bool *found_error = payload; - if (!*found_error && iterator->trees.contents[0]->symbol == ts_builtin_sym_error) { + if (!*found_error && iterator->subtrees.contents[0]->symbol == ts_builtin_sym_error) { *found_error = true; return StackActionPop | StackActionStop; } else { @@ -481,21 +482,21 @@ inline StackAction pop_error_callback(void *payload, const Iterator *iterator) { } } -TreeArray ts_stack_pop_error(Stack *self, StackVersion version) { +SubtreeArray ts_stack_pop_error(Stack *self, StackVersion version) { StackNode *node = array_get(&self->heads, version)->node; for (unsigned i = 0; i < node->link_count; i++) { - if (node->links[i].tree && node->links[i].tree->symbol == ts_builtin_sym_error) { + if (node->links[i].subtree && node->links[i].subtree->symbol == ts_builtin_sym_error) { bool found_error = false; StackSliceArray pop = stack__iter(self, version, pop_error_callback, &found_error, 1); if (pop.size > 0) { assert(pop.size == 1); ts_stack_renumber_version(self, pop.contents[0].version, version); - return pop.contents[0].trees; + return pop.contents[0].subtrees; } break; } } - return (TreeArray){.size = 0}; + return (SubtreeArray){.size = 0}; } inline StackAction pop_all_callback(void *payload, const Iterator *iterator) { @@ -514,7 +515,7 @@ typedef struct { inline StackAction summarize_stack_callback(void *payload, const Iterator *iterator) { SummarizeStackSession *session = payload; TSStateId state = iterator->node->state; - unsigned depth = iterator->tree_count; + unsigned depth = iterator->subtree_count; if (depth > session->max_depth) return StackActionStop; for (unsigned i = session->summary->size - 1; i + 1 > 0; i--) { StackSummaryEntry entry = session->summary->contents[i]; @@ -548,7 +549,7 @@ int ts_stack_dynamic_precedence(Stack *self, StackVersion version) { } void ts_stack_remove_version(Stack *self, StackVersion version) { - stack_head_delete(array_get(&self->heads, version), &self->node_pool, self->tree_pool); + stack_head_delete(array_get(&self->heads, version), &self->node_pool, self->subtree_pool); array_erase(&self->heads, version); } @@ -561,7 +562,7 @@ void ts_stack_renumber_version(Stack *self, StackVersion v1, StackVersion v2) { source_head->summary = target_head->summary; target_head->summary = NULL; } - stack_head_delete(target_head, &self->node_pool, self->tree_pool); + stack_head_delete(target_head, &self->node_pool, self->subtree_pool); *target_head = *source_head; array_erase(&self->heads, v1); } @@ -577,7 +578,7 @@ StackVersion ts_stack_copy_version(Stack *self, StackVersion version) { array_push(&self->heads, self->heads.contents[version]); StackHead *head = array_back(&self->heads); stack_node_retain(head->node); - if (head->last_external_token) ts_tree_retain(head->last_external_token); + if (head->last_external_token) ts_subtree_retain(head->last_external_token); head->summary = NULL; return self->heads.size - 1; } @@ -605,7 +606,7 @@ bool ts_stack_can_merge(Stack *self, StackVersion version1, StackVersion version head1->node->state == head2->node->state && head1->node->position.bytes == head2->node->position.bytes && head1->node->error_cost == head2->node->error_cost && - ts_tree_external_token_state_eq(head1->last_external_token, head2->last_external_token); + ts_subtree_external_scanner_state_eq(head1->last_external_token, head2->last_external_token); } void ts_stack_halt(Stack *self, StackVersion version) { @@ -643,7 +644,7 @@ TSSymbol ts_stack_resume(Stack *self, StackVersion version) { void ts_stack_clear(Stack *self) { stack_node_retain(self->base_node); for (uint32_t i = 0; i < self->heads.size; i++) { - stack_head_delete(&self->heads.contents[i], &self->node_pool, self->tree_pool); + stack_head_delete(&self->heads.contents[i], &self->node_pool, self->subtree_pool); } array_clear(&self->heads); array_push(&self->heads, ((StackHead){ @@ -684,9 +685,9 @@ bool ts_stack_print_dot_graph(Stack *self, const TSLanguage *language, FILE *f) ); if (head->last_external_token) { - TSExternalTokenState *state = &head->last_external_token->external_token_state; - const char *data = ts_external_token_state_data(state); - fprintf(f, "\nexternal_token_state:"); + const ExternalScannerState *state = &head->last_external_token->external_scanner_state; + const char *data = ts_external_scanner_state_data(state); + fprintf(f, "\nexternal_scanner_state:"); for (uint32_t j = 0; j < state->length; j++) fprintf(f, " %2X", data[j]); } @@ -715,8 +716,8 @@ bool ts_stack_print_dot_graph(Stack *self, const TSLanguage *language, FILE *f) fprintf(f, "node_%p [", node); if (node->state == ERROR_STATE) fprintf(f, "label=\"?\""); - else if (node->link_count == 1 && node->links[0].tree && - node->links[0].tree->extra) + else if (node->link_count == 1 && node->links[0].subtree && + node->links[0].subtree->extra) fprintf(f, "shape=point margin=0 label=\"\""); else fprintf(f, "label=\"%d\"", node->state); @@ -736,24 +737,24 @@ bool ts_stack_print_dot_graph(Stack *self, const TSLanguage *language, FILE *f) fprintf(f, "node_%p -> node_%p [", node, link.node); if (link.is_pending) fprintf(f, "style=dashed "); - if (link.tree && link.tree->extra) + if (link.subtree && link.subtree->extra) fprintf(f, "fontcolor=gray "); - if (!link.tree) { + if (!link.subtree) { fprintf(f, "color=red"); } else { fprintf(f, "label=\""); - if (link.tree->visible && !link.tree->named) fprintf(f, "'"); - const char *name = ts_language_symbol_name(language, link.tree->symbol); + if (link.subtree->visible && !link.subtree->named) fprintf(f, "'"); + const char *name = ts_language_symbol_name(language, link.subtree->symbol); for (const char *c = name; *c; c++) { if (*c == '\"' || *c == '\\') fprintf(f, "\\"); fprintf(f, "%c", *c); } - if (link.tree->visible && !link.tree->named) fprintf(f, "'"); + if (link.subtree->visible && !link.subtree->named) fprintf(f, "'"); fprintf(f, "\""); fprintf(f, "labeltooltip=\"error_cost: %u\ndynamic_precedence: %u\"", - link.tree->error_cost, - link.tree->dynamic_precedence); + link.subtree->error_cost, + link.subtree->dynamic_precedence); } fprintf(f, "];\n"); diff --git a/src/runtime/stack.h b/src/runtime/stack.h index 4a552323..b7dba342 100644 --- a/src/runtime/stack.h +++ b/src/runtime/stack.h @@ -6,7 +6,7 @@ extern "C" { #endif #include "runtime/array.h" -#include "runtime/tree.h" +#include "runtime/subtree.h" #include "runtime/error_costs.h" #include @@ -16,7 +16,7 @@ typedef unsigned StackVersion; #define STACK_VERSION_NONE ((StackVersion)-1) typedef struct { - TreeArray trees; + SubtreeArray subtrees; StackVersion version; } StackSlice; typedef Array(StackSlice) StackSliceArray; @@ -29,7 +29,7 @@ typedef struct { typedef Array(StackSummaryEntry) StackSummary; // Create a stack. -Stack *ts_stack_new(TreePool *); +Stack *ts_stack_new(SubtreePool *); // Release the memory reserved for a given stack. void ts_stack_delete(Stack *); @@ -42,10 +42,10 @@ uint32_t ts_stack_version_count(const Stack *); TSStateId ts_stack_state(const Stack *, StackVersion); // Get the last external token associated with a given version of the stack. -Tree *ts_stack_last_external_token(const Stack *, StackVersion); +const Subtree *ts_stack_last_external_token(const Stack *, StackVersion); // Set the last external token associated with a given version of the stack. -void ts_stack_set_last_external_token(Stack *, StackVersion, Tree *); +void ts_stack_set_last_external_token(Stack *, StackVersion, const Subtree *); // Get the position of the given version of the stack within the document. Length ts_stack_position(const Stack *, StackVersion); @@ -55,7 +55,7 @@ Length ts_stack_position(const Stack *, StackVersion); // This transfers ownership of the tree to the Stack. Callers that // need to retain ownership of the tree for their own purposes should // first retain the tree. -void ts_stack_push(Stack *, StackVersion, Tree *, bool, TSStateId); +void ts_stack_push(Stack *, StackVersion, const Subtree *, bool, TSStateId); // Pop the given number of entries from the given version of the stack. This // operation can increase the number of stack versions by revealing multiple @@ -65,7 +65,7 @@ void ts_stack_push(Stack *, StackVersion, Tree *, bool, TSStateId); StackSliceArray ts_stack_pop_count(Stack *, StackVersion, uint32_t count); // Remove an error at the top of the given version of the stack. -TreeArray ts_stack_pop_error(Stack *, StackVersion); +SubtreeArray ts_stack_pop_error(Stack *, StackVersion); // Remove any pending trees from the top of the given version of the stack. StackSliceArray ts_stack_pop_pending(Stack *, StackVersion); diff --git a/src/runtime/string_input.c b/src/runtime/string_input.c index 53f69ee5..c4e13e0c 100644 --- a/src/runtime/string_input.c +++ b/src/runtime/string_input.c @@ -1,13 +1,7 @@ +#include "tree_sitter/runtime.h" #include "runtime/string_input.h" -#include "runtime/alloc.h" #include -typedef struct { - const char *string; - uint32_t position; - uint32_t length; -} TSStringInput; - static const char *ts_string_input__read(void *payload, uint32_t *bytes_read) { TSStringInput *input = (TSStringInput *)payload; if (input->position >= input->length) { @@ -26,17 +20,12 @@ static int ts_string_input__seek(void *payload, uint32_t byte, TSPoint _) { return (byte < input->length); } -TSInput ts_string_input_make(const char *string) { - return ts_string_input_make_with_length(string, strlen(string)); -} - -TSInput ts_string_input_make_with_length(const char *string, uint32_t length) { - TSStringInput *input = ts_malloc(sizeof(TSStringInput)); - input->string = string; - input->position = 0; - input->length = length; - return (TSInput){ - .payload = input, +void ts_string_input_init(TSStringInput *self, const char *string, uint32_t length) { + self->string = string; + self->position = 0; + self->length = length; + self->input = (TSInput) { + .payload = self, .read = ts_string_input__read, .seek = ts_string_input__seek, .encoding = TSInputEncodingUTF8, diff --git a/src/runtime/string_input.h b/src/runtime/string_input.h index c96cd416..19171e4f 100644 --- a/src/runtime/string_input.h +++ b/src/runtime/string_input.h @@ -7,8 +7,14 @@ extern "C" { #include "tree_sitter/runtime.h" -TSInput ts_string_input_make(const char *); -TSInput ts_string_input_make_with_length(const char *, uint32_t); +typedef struct { + const char *string; + uint32_t position; + uint32_t length; + TSInput input; +} TSStringInput; + +void ts_string_input_init(TSStringInput *, const char *, uint32_t); #ifdef __cplusplus } diff --git a/src/runtime/subtree.c b/src/runtime/subtree.c new file mode 100644 index 00000000..0991b67c --- /dev/null +++ b/src/runtime/subtree.c @@ -0,0 +1,713 @@ +#include +#include +#include +#include +#include +#include +#include "runtime/alloc.h" +#include "runtime/atomic.h" +#include "runtime/subtree.h" +#include "runtime/length.h" +#include "runtime/language.h" +#include "runtime/error_costs.h" + +typedef struct { + Length start; + Length added; + Length removed; +} Edit; + +TSStateId TS_TREE_STATE_NONE = USHRT_MAX; + +static const uint32_t MAX_TREE_POOL_SIZE = 1024; + +static const ExternalScannerState empty_state = {.length = 0, .short_data = {0}}; + +// ExternalScannerState + +void ts_external_scanner_state_init(ExternalScannerState *self, const char *data, unsigned length) { + self->length = length; + if (length > sizeof(self->short_data)) { + self->long_data = ts_malloc(length); + memcpy(self->long_data, data, length); + } else { + memcpy(self->short_data, data, length); + } +} + +void ts_external_scanner_state_delete(ExternalScannerState *self) { + if (self->length > sizeof(self->short_data)) { + ts_free(self->long_data); + } +} + +const char *ts_external_scanner_state_data(const ExternalScannerState *self) { + if (self->length > sizeof(self->short_data)) { + return self->long_data; + } else { + return self->short_data; + } +} + +bool ts_external_scanner_state_eq(const ExternalScannerState *a, const ExternalScannerState *b) { + return a == b || ( + a->length == b->length && + !memcmp(ts_external_scanner_state_data(a), ts_external_scanner_state_data(b), a->length) + ); +} + +// SubtreeArray + +bool ts_subtree_array_copy(SubtreeArray self, SubtreeArray *dest) { + const Subtree **contents = NULL; + if (self.capacity > 0) { + contents = ts_calloc(self.capacity, sizeof(Subtree *)); + memcpy(contents, self.contents, self.size * sizeof(Subtree *)); + for (uint32_t i = 0; i < self.size; i++) { + ts_subtree_retain(contents[i]); + } + } + + dest->size = self.size; + dest->capacity = self.capacity; + dest->contents = contents; + return true; +} + +void ts_subtree_array_delete(SubtreePool *pool, SubtreeArray *self) { + for (uint32_t i = 0; i < self->size; i++) { + ts_subtree_release(pool, self->contents[i]); + } + array_delete(self); +} + +SubtreeArray ts_subtree_array_remove_trailing_extras(SubtreeArray *self) { + SubtreeArray result = array_new(); + + uint32_t i = self->size - 1; + for (; i + 1 > 0; i--) { + const Subtree *child = self->contents[i]; + if (!child->extra) break; + array_push(&result, child); + } + + self->size = i + 1; + ts_subtree_array_reverse(&result); + return result; +} + +void ts_subtree_array_reverse(SubtreeArray *self) { + for (uint32_t i = 0, limit = self->size / 2; i < limit; i++) { + size_t reverse_index = self->size - 1 - i; + const Subtree *swap = self->contents[i]; + self->contents[i] = self->contents[reverse_index]; + self->contents[reverse_index] = swap; + } +} + +// SubtreePool + +SubtreePool ts_subtree_pool_new(uint32_t capacity) { + SubtreePool self = {array_new(), array_new()}; + array_reserve(&self.free_trees, capacity); + return self; +} + +void ts_subtree_pool_delete(SubtreePool *self) { + if (self->free_trees.contents) { + for (unsigned i = 0; i < self->free_trees.size; i++) { + ts_free(self->free_trees.contents[i]); + } + array_delete(&self->free_trees); + } + if (self->tree_stack.contents) array_delete(&self->tree_stack); +} + +Subtree *ts_subtree_pool_allocate(SubtreePool *self) { + if (self->free_trees.size > 0) { + return array_pop(&self->free_trees); + } else { + return ts_malloc(sizeof(Subtree)); + } +} + +void ts_subtree_pool_free(SubtreePool *self, Subtree *tree) { + if (self->free_trees.capacity > 0 && self->free_trees.size < MAX_TREE_POOL_SIZE) { + array_push(&self->free_trees, tree); + } else { + ts_free(tree); + } +} + +// Subtree + +Subtree *ts_subtree_new_leaf(SubtreePool *pool, TSSymbol symbol, Length padding, Length size, + const TSLanguage *language) { + TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol); + Subtree *result = ts_subtree_pool_allocate(pool); + *result = (Subtree){ + .ref_count = 1, + .symbol = symbol, + .size = size, + .visible_child_count = 0, + .named_child_count = 0, + .alias_sequence_id = 0, + .padding = padding, + .visible = metadata.visible, + .named = metadata.named, + .node_count = 1, + .has_changes = false, + .first_leaf = { + .symbol = symbol, + .lex_mode = {0, 0}, + }, + .has_external_tokens = false, + }; + if (symbol == ts_builtin_sym_end) result->extra = true; + return result; +} + +Subtree *ts_subtree_new_error(SubtreePool *pool, Length size, Length padding, + int32_t lookahead_char, const TSLanguage *language) { + Subtree *result = ts_subtree_new_leaf(pool, ts_builtin_sym_error, padding, size, language); + result->fragile_left = true; + result->fragile_right = true; + result->lookahead_char = lookahead_char; + return result; +} + +Subtree *ts_subtree_new_copy(SubtreePool *pool, const Subtree *self) { + Subtree *result = ts_subtree_pool_allocate(pool); + *result = *self; + if (result->children.size > 0) { + ts_subtree_array_copy(self->children, &result->children); + } + result->ref_count = 1; + return result; +} + +Subtree *ts_subtree_make_mut(SubtreePool *pool, const Subtree *self) { + if (self->ref_count == 1) { + return (Subtree *)self; + } else { + Subtree *result = ts_subtree_new_copy(pool, self); + ts_subtree_release(pool, self); + return result; + } +} + +static void ts_subtree__compress(Subtree *self, unsigned count, const TSLanguage *language, + MutableSubtreeArray *stack) { + unsigned initial_stack_size = stack->size; + + Subtree *tree = self; + for (unsigned i = 0; i < count; i++) { + if (tree->ref_count > 1 || tree->children.size != 2) break; + + Subtree *child = (Subtree *)tree->children.contents[0]; + if ( + child->ref_count > 1 || + child->children.size != 2 || + child->symbol != tree->symbol + ) break; + + Subtree *grandchild = (Subtree *)child->children.contents[0]; + if ( + grandchild->ref_count > 1 || + grandchild->children.size != 2 || + grandchild->symbol != tree->symbol + ) break; + + tree->children.contents[0] = grandchild; + child->children.contents[0] = grandchild->children.contents[1]; + grandchild->children.contents[1] = child; + array_push(stack, tree); + tree = grandchild; + } + + while (stack->size > initial_stack_size) { + tree = array_pop(stack); + assert(tree); + Subtree *child = (Subtree *)tree->children.contents[0]; + Subtree *grandchild = (Subtree *)child->children.contents[1]; + ts_subtree_set_children(grandchild, &grandchild->children, language); + ts_subtree_set_children(child, &child->children, language); + ts_subtree_set_children(tree, &tree->children, language); + } +} + +void ts_subtree_balance(const Subtree *self, SubtreePool *pool, const TSLanguage *language) { + array_clear(&pool->tree_stack); + + if (self->ref_count == 1) { + array_push(&pool->tree_stack, (Subtree *)self); + } + + while (pool->tree_stack.size > 0) { + Subtree *tree = array_pop(&pool->tree_stack); + assert(tree); + + if (tree->repeat_depth > 0 && + tree->children.contents[0]->repeat_depth > tree->children.contents[1]->repeat_depth) { + unsigned n = ( + tree->children.contents[0]->repeat_depth - + tree->children.contents[1]->repeat_depth + ); + for (unsigned i = n / 2; i > 0; i /= 2) { + ts_subtree__compress(tree, i, language, &pool->tree_stack); + n -= i; + } + } + + for (uint32_t i = 0; i < tree->children.size; i++) { + const Subtree *child = tree->children.contents[i]; + if (child->ref_count == 1) { + array_push(&pool->tree_stack, (Subtree *)child); + } + } + } +} + +void ts_subtree_set_children(Subtree *self, SubtreeArray *children, const TSLanguage *language) { + if (self->children.size > 0 && children->contents != self->children.contents) { + array_delete(&self->children); + } + + self->children = *children; + self->named_child_count = 0; + self->visible_child_count = 0; + self->error_cost = 0; + self->repeat_depth = 0; + self->node_count = 1; + self->has_external_tokens = false; + self->dynamic_precedence = 0; + + uint32_t non_extra_index = 0; + const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self->alias_sequence_id); + + for (uint32_t i = 0; i < self->children.size; i++) { + const Subtree *child = self->children.contents[i]; + + if (i == 0) { + self->padding = child->padding; + self->size = child->size; + self->bytes_scanned = child->bytes_scanned; + } else { + uint32_t bytes_scanned = ts_subtree_total_bytes(self) + child->bytes_scanned; + if (bytes_scanned > self->bytes_scanned) self->bytes_scanned = bytes_scanned; + self->size = length_add(self->size, ts_subtree_total_size(child)); + } + + if (child->symbol != ts_builtin_sym_error_repeat) { + self->error_cost += child->error_cost; + } + self->dynamic_precedence += child->dynamic_precedence; + self->node_count += child->node_count; + + if (alias_sequence && alias_sequence[non_extra_index] != 0 && !child->extra) { + self->visible_child_count++; + if (ts_language_symbol_metadata(language, alias_sequence[non_extra_index]).named) { + self->named_child_count++; + } + } else if (child->visible) { + self->visible_child_count++; + if (child->named) self->named_child_count++; + } else if (child->children.size > 0) { + self->visible_child_count += child->visible_child_count; + self->named_child_count += child->named_child_count; + } + + if (child->has_external_tokens) self->has_external_tokens = true; + + if (child->symbol == ts_builtin_sym_error) { + self->fragile_left = self->fragile_right = true; + self->parse_state = TS_TREE_STATE_NONE; + } + + if (!child->extra) non_extra_index++; + } + + if (self->symbol == ts_builtin_sym_error || self->symbol == ts_builtin_sym_error_repeat) { + self->error_cost += ERROR_COST_PER_RECOVERY + + ERROR_COST_PER_SKIPPED_CHAR * self->size.bytes + + ERROR_COST_PER_SKIPPED_LINE * self->size.extent.row; + for (uint32_t i = 0; i < self->children.size; i++) { + const Subtree *child = self->children.contents[i]; + if (child->extra) continue; + if (child->symbol == ts_builtin_sym_error && child->children.size == 0) continue; + if (child->visible) { + self->error_cost += ERROR_COST_PER_SKIPPED_TREE; + } else { + self->error_cost += ERROR_COST_PER_SKIPPED_TREE * child->visible_child_count; + } + } + } + + if (self->children.size > 0) { + const Subtree *first_child = self->children.contents[0]; + const Subtree *last_child = self->children.contents[self->children.size - 1]; + self->first_leaf = first_child->first_leaf; + if (first_child->fragile_left) self->fragile_left = true; + if (last_child->fragile_right) self->fragile_right = true; + if ( + self->children.size == 2 && + !self->visible && !self->named && + first_child->symbol == self->symbol && + last_child->symbol == self->symbol + ) { + if (first_child->repeat_depth > last_child->repeat_depth) { + self->repeat_depth = first_child->repeat_depth + 1; + } else { + self->repeat_depth = last_child->repeat_depth + 1; + } + } + } +} + +Subtree *ts_subtree_new_node(SubtreePool *pool, TSSymbol symbol, SubtreeArray *children, + unsigned alias_sequence_id, const TSLanguage *language) { + Subtree *result = ts_subtree_new_leaf(pool, symbol, length_zero(), length_zero(), language); + result->alias_sequence_id = alias_sequence_id; + if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) { + result->fragile_left = true; + result->fragile_right = true; + } + ts_subtree_set_children(result, children, language); + return result; +} + +Subtree *ts_subtree_new_error_node(SubtreePool *pool, SubtreeArray *children, + const TSLanguage *language) { + return ts_subtree_new_node(pool, ts_builtin_sym_error, children, 0, language); +} + +Subtree *ts_subtree_new_missing_leaf(SubtreePool *pool, TSSymbol symbol, + const TSLanguage *language) { + Subtree *result = ts_subtree_new_leaf(pool, symbol, length_zero(), length_zero(), language); + result->is_missing = true; + result->error_cost = ERROR_COST_PER_MISSING_TREE + ERROR_COST_PER_RECOVERY; + return result; +} + +void ts_subtree_retain(const Subtree *self) { + assert(self->ref_count > 0); + atomic_inc((volatile uint32_t *)&self->ref_count); + assert(self->ref_count != 0); +} + +void ts_subtree_release(SubtreePool *pool, const Subtree *self) { + array_clear(&pool->tree_stack); + + assert(self->ref_count > 0); + if (atomic_dec((volatile uint32_t *)&self->ref_count) == 0) { + array_push(&pool->tree_stack, (Subtree *)self); + } + + while (pool->tree_stack.size > 0) { + Subtree *tree = array_pop(&pool->tree_stack); + if (tree->children.size > 0) { + for (uint32_t i = 0; i < tree->children.size; i++) { + const Subtree *child = tree->children.contents[i]; + if (atomic_dec((volatile uint32_t *)&child->ref_count) == 0) { + array_push(&pool->tree_stack, (Subtree *)child); + } + } + array_delete(&tree->children); + } else if (tree->has_external_tokens) { + ts_external_scanner_state_delete(&tree->external_scanner_state); + } + ts_subtree_pool_free(pool, tree); + } +} + +bool ts_subtree_eq(const Subtree *self, const Subtree *other) { + if (self) { + if (!other) return false; + } else { + return !other; + } + + if (self->symbol != other->symbol) return false; + if (self->visible != other->visible) return false; + if (self->named != other->named) return false; + if (self->padding.bytes != other->padding.bytes) return false; + if (self->size.bytes != other->size.bytes) return false; + if (self->symbol == ts_builtin_sym_error) return self->lookahead_char == other->lookahead_char; + if (self->children.size != other->children.size) return false; + if (self->visible_child_count != other->visible_child_count) return false; + if (self->named_child_count != other->named_child_count) return false; + + for (uint32_t i = 0; i < self->children.size; i++) { + if (!ts_subtree_eq(self->children.contents[i], other->children.contents[i])) { + return false; + } + } + return true; +} + +int ts_subtree_compare(const Subtree *left, const Subtree *right) { + if (left->symbol < right->symbol) + return -1; + if (right->symbol < left->symbol) + return 1; + if (left->children.size < right->children.size) + return -1; + if (right->children.size < left->children.size) + return 1; + for (uint32_t i = 0; i < left->children.size; i++) { + const Subtree *left_child = left->children.contents[i]; + const Subtree *right_child = right->children.contents[i]; + switch (ts_subtree_compare(left_child, right_child)) { + case -1: + return -1; + case 1: + return 1; + default: + break; + } + } + return 0; +} + +const Subtree *ts_subtree_invalidate_lookahead(const Subtree *self, uint32_t edit_byte_offset, + SubtreePool *pool) { + if (edit_byte_offset >= self->bytes_scanned) return self; + + Subtree *result = ts_subtree_make_mut(pool, self); + result->has_changes = true; + + if (result->children.size > 0) { + uint32_t child_start_byte = 0; + for (uint32_t i = 0; i < result->children.size; i++) { + const Subtree **child = &result->children.contents[i]; + if (child_start_byte > edit_byte_offset) break; + *child = ts_subtree_invalidate_lookahead(*child, edit_byte_offset - child_start_byte, pool); + child_start_byte += ts_subtree_total_bytes(*child); + } + } + + return result; +} + +const Subtree *ts_subtree__edit(const Subtree *self, Edit edit, SubtreePool *pool) { + Length new_end = length_add(edit.start, edit.added); + Length old_end = length_add(edit.start, edit.removed); + + Subtree *result = ts_subtree_make_mut(pool, self); + result->has_changes = true; + + if (old_end.bytes <= result->padding.bytes) { + result->padding = length_add(new_end, length_sub(result->padding, old_end)); + } else if (edit.start.bytes < result->padding.bytes) { + result->size = length_sub(result->size, length_sub(old_end, result->padding)); + result->padding = new_end; + } else if (edit.start.bytes == result->padding.bytes && edit.removed.bytes == 0) { + result->padding = length_add(result->padding, edit.added); + } else { + Length new_total_size = length_add(new_end, length_sub(ts_subtree_total_size(result), old_end)); + result->size = length_sub(new_total_size, result->padding); + } + + Length child_left, child_right = length_zero(); + for (uint32_t i = 0; i < result->children.size; i++) { + const Subtree **child = &result->children.contents[i]; + Length child_size = ts_subtree_total_size(*child); + child_left = child_right; + child_right = length_add(child_left, child_size); + + if (child_left.bytes > old_end.bytes || + (child_left.bytes == old_end.bytes && child_size.bytes > 0 && i > 0)) break; + + if (child_right.bytes > edit.start.bytes || + (child_right.bytes == edit.start.bytes && edit.removed.bytes == 0)) { + Edit child_edit = { + .start = length_sub(edit.start, child_left), + .added = edit.added, + .removed = edit.removed, + }; + + if (edit.start.bytes < child_left.bytes) { + child_edit.start = length_zero(); + } + + if (old_end.bytes > child_right.bytes) { + child_edit.removed = length_sub(child_size, child_edit.start); + } + + edit.added = length_zero(); + edit.removed = length_sub(edit.removed, child_edit.removed); + + *child = ts_subtree__edit(*child, child_edit, pool); + } else if (child_left.bytes <= edit.start.bytes) { + *child = ts_subtree_invalidate_lookahead(*child, edit.start.bytes - child_left.bytes, pool); + } + } + + return result; +} + +const Subtree *ts_subtree_edit(const Subtree *self, const TSInputEdit *edit, SubtreePool *pool) { + return ts_subtree__edit(self, (Edit) { + .start = {edit->start_byte, edit->start_point}, + .added = {edit->bytes_added, edit->extent_added}, + .removed = {edit->bytes_removed, edit->extent_removed}, + }, pool); +} + +const Subtree *ts_subtree_last_external_token(const Subtree *tree) { + if (!tree->has_external_tokens) return NULL; + while (tree->children.size > 0) { + for (uint32_t i = tree->children.size - 1; i + 1 > 0; i--) { + const Subtree *child = tree->children.contents[i]; + if (child->has_external_tokens) { + tree = child; + break; + } + } + } + return tree; +} + +static size_t ts_subtree__write_char_to_string(char *s, size_t n, int32_t c) { + if (c == 0) + return snprintf(s, n, "EOF"); + if (c == -1) + return snprintf(s, n, "INVALID"); + else if (c == '\n') + return snprintf(s, n, "'\\n'"); + else if (c == '\t') + return snprintf(s, n, "'\\t'"); + else if (c == '\r') + return snprintf(s, n, "'\\r'"); + else if (0 < c && c < 128 && isprint(c)) + return snprintf(s, n, "'%c'", c); + else + return snprintf(s, n, "%d", c); +} + +static size_t ts_subtree__write_to_string(const Subtree *self, char *string, size_t limit, + const TSLanguage *language, bool is_root, + bool include_all, TSSymbol alias_symbol, + bool alias_is_named) { + if (!self) return snprintf(string, limit, "(NULL)"); + + char *cursor = string; + char **writer = (limit > 0) ? &cursor : &string; + bool visible = + include_all || + is_root || + self->is_missing || + (self->visible && self->named) || + alias_is_named; + + if (visible && !is_root) { + cursor += snprintf(*writer, limit, " "); + } + + if (visible) { + if (self->symbol == ts_builtin_sym_error && self->children.size == 0 && self->size.bytes > 0) { + cursor += snprintf(*writer, limit, "(UNEXPECTED "); + cursor += ts_subtree__write_char_to_string(*writer, limit, self->lookahead_char); + } else if (self->is_missing) { + cursor += snprintf(*writer, limit, "(MISSING"); + } else { + TSSymbol symbol = alias_symbol ? alias_symbol : self->symbol; + const char *symbol_name = ts_language_symbol_name(language, symbol); + cursor += snprintf(*writer, limit, "(%s", symbol_name); + } + } + + const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self->alias_sequence_id); + uint32_t structural_child_index = 0; + for (uint32_t i = 0; i < self->children.size; i++) { + const Subtree *child = self->children.contents[i]; + if (child->extra) { + cursor += ts_subtree__write_to_string( + child, *writer, limit, + language, false, include_all, + 0, false + ); + } else { + TSSymbol alias_symbol = alias_sequence ? alias_sequence[structural_child_index] : 0; + cursor += ts_subtree__write_to_string( + child, *writer, limit, + language, false, include_all, + alias_symbol, + alias_symbol ? ts_language_symbol_metadata(language, alias_symbol).named : false + ); + structural_child_index++; + } + } + + if (visible) cursor += snprintf(*writer, limit, ")"); + + return cursor - string; +} + +char *ts_subtree_string(const Subtree *self, const TSLanguage *language, bool include_all) { + char scratch_string[1]; + size_t size = ts_subtree__write_to_string( + self, scratch_string, 0, + language, true, + include_all, 0, false + ) + 1; + char *result = ts_malloc(size * sizeof(char)); + ts_subtree__write_to_string(self, result, size, language, true, include_all, 0, false); + return result; +} + +void ts_subtree__print_dot_graph(const Subtree *self, uint32_t byte_offset, + const TSLanguage *language, TSSymbol alias_symbol, FILE *f) { + TSSymbol symbol = alias_symbol ? alias_symbol : self->symbol; + fprintf(f, "tree_%p [label=\"%s\"", self, ts_language_symbol_name(language, symbol)); + + if (self->children.size == 0) + fprintf(f, ", shape=plaintext"); + if (self->extra) + fprintf(f, ", fontcolor=gray"); + + fprintf(f, ", tooltip=\"" + "address:%p\n" + "range:%u - %u\n" + "state:%d\n" + "error-cost:%u\n" + "repeat-depth:%u\n" + "bytes-scanned:%u\"]\n", + self, + byte_offset, byte_offset + ts_subtree_total_bytes(self), + self->parse_state, + self->error_cost, + self->repeat_depth, + self->bytes_scanned + ); + + const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self->alias_sequence_id); + uint32_t structural_child_index = 0; + for (uint32_t i = 0; i < self->children.size; i++) { + const Subtree *child = self->children.contents[i]; + if (child->extra) { + ts_subtree__print_dot_graph(child, byte_offset, language, 0, f); + } else { + TSSymbol alias_symbol = alias_sequence ? alias_sequence[structural_child_index] : 0; + ts_subtree__print_dot_graph(child, byte_offset, language, alias_symbol, f); + structural_child_index++; + } + fprintf(f, "tree_%p -> tree_%p [tooltip=%u]\n", self, child, i); + byte_offset += ts_subtree_total_bytes(child); + } +} + +void ts_subtree_print_dot_graph(const Subtree *self, const TSLanguage *language, FILE *f) { + fprintf(f, "digraph tree {\n"); + fprintf(f, "edge [arrowhead=none]\n"); + ts_subtree__print_dot_graph(self, 0, language, 0, f); + fprintf(f, "}\n"); +} + +bool ts_subtree_external_scanner_state_eq(const Subtree *self, const Subtree *other) { + const ExternalScannerState *state1 = &empty_state; + const ExternalScannerState *state2 = &empty_state; + if (self && self->has_external_tokens) state1 = &self->external_scanner_state; + if (other && other->has_external_tokens) state2 = &other->external_scanner_state; + return ts_external_scanner_state_eq(state1, state2); +} diff --git a/src/runtime/subtree.h b/src/runtime/subtree.h new file mode 100644 index 00000000..cf59db43 --- /dev/null +++ b/src/runtime/subtree.h @@ -0,0 +1,123 @@ +#ifndef RUNTIME_SUBTREE_H_ +#define RUNTIME_SUBTREE_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include "tree_sitter/parser.h" +#include "tree_sitter/runtime.h" +#include "runtime/length.h" +#include "runtime/array.h" +#include + +extern TSStateId TS_TREE_STATE_NONE; + +typedef struct { + union { + char *long_data; + char short_data[sizeof(char *) + sizeof(uint32_t)]; + }; + uint32_t length; +} ExternalScannerState; + +typedef struct Subtree Subtree; + +typedef Array(const Subtree *) SubtreeArray; +typedef Array(Subtree *) MutableSubtreeArray; + +struct Subtree { + Length padding; + Length size; + volatile uint32_t ref_count; + uint32_t bytes_scanned; + uint32_t error_cost; + uint32_t node_count; + uint32_t repeat_depth; + uint32_t child_count; + int32_t dynamic_precedence; + + bool visible : 1; + bool named : 1; + bool extra : 1; + bool fragile_left : 1; + bool fragile_right : 1; + bool has_changes : 1; + bool has_external_tokens : 1; + bool is_missing : 1; + TSSymbol symbol; + TSStateId parse_state; + struct { + TSSymbol symbol; + TSLexMode lex_mode; + } first_leaf; + + union { + struct { + SubtreeArray children; + uint32_t visible_child_count; + uint32_t named_child_count; + uint16_t alias_sequence_id; + }; + struct { + uint32_t _2; + ExternalScannerState external_scanner_state; + }; + struct { + uint32_t _1; + int32_t lookahead_char; + }; + }; +}; + +typedef struct { + MutableSubtreeArray free_trees; + MutableSubtreeArray tree_stack; +} SubtreePool; + +void ts_external_scanner_state_init(ExternalScannerState *, const char *, unsigned); +const char *ts_external_scanner_state_data(const ExternalScannerState *); + +bool ts_subtree_array_copy(SubtreeArray, SubtreeArray *); +void ts_subtree_array_delete(SubtreePool *, SubtreeArray *); +SubtreeArray ts_subtree_array_remove_trailing_extras(SubtreeArray *); +void ts_subtree_array_reverse(SubtreeArray *); + +SubtreePool ts_subtree_pool_new(uint32_t capacity); +void ts_subtree_pool_delete(SubtreePool *); +Subtree *ts_subtree_pool_allocate(SubtreePool *); +void ts_subtree_pool_free(SubtreePool *, Subtree *); + +Subtree *ts_subtree_new_leaf(SubtreePool *, TSSymbol, Length, Length, const TSLanguage *); +Subtree *ts_subtree_new_node(SubtreePool *, TSSymbol, SubtreeArray *, unsigned, const TSLanguage *); +Subtree *ts_subtree_new_copy(SubtreePool *, const Subtree *); +Subtree *ts_subtree_new_error_node(SubtreePool *, SubtreeArray *, const TSLanguage *); +Subtree *ts_subtree_new_error(SubtreePool *, Length, Length, int32_t, const TSLanguage *); +Subtree *ts_subtree_new_missing_leaf(SubtreePool *, TSSymbol, const TSLanguage *); +Subtree *ts_subtree_make_mut(SubtreePool *, const Subtree *); +void ts_subtree_retain(const Subtree *tree); +void ts_subtree_release(SubtreePool *, const Subtree *tree); +bool ts_subtree_eq(const Subtree *tree1, const Subtree *tree2); +int ts_subtree_compare(const Subtree *tree1, const Subtree *tree2); +void ts_subtree_set_children(Subtree *, SubtreeArray *, const TSLanguage *); +void ts_subtree_balance(const Subtree *, SubtreePool *, const TSLanguage *); +const Subtree *ts_subtree_edit(const Subtree *, const TSInputEdit *edit, SubtreePool *); +char *ts_subtree_string(const Subtree *, const TSLanguage *, bool include_all); +void ts_subtree_print_dot_graph(const Subtree *, const TSLanguage *, FILE *); +const Subtree *ts_subtree_last_external_token(const Subtree *); +bool ts_subtree_external_scanner_state_eq(const Subtree *, const Subtree *); + +static inline uint32_t ts_subtree_total_bytes(const Subtree *self) { + return self->padding.bytes + self->size.bytes; +} + +static inline Length ts_subtree_total_size(const Subtree *self) { + return length_add(self->padding, self->size); +} + +#ifdef __cplusplus +} +#endif + +#endif // RUNTIME_SUBTREE_H_ diff --git a/src/runtime/tree.c b/src/runtime/tree.c index c1ada974..ce1e2b98 100644 --- a/src/runtime/tree.c +++ b/src/runtime/tree.c @@ -1,700 +1,59 @@ -#include -#include -#include -#include -#include -#include -#include "runtime/alloc.h" +#include "tree_sitter/runtime.h" +#include "runtime/array.h" +#include "runtime/get_changed_ranges.h" +#include "runtime/subtree.h" +#include "runtime/tree_cursor.h" #include "runtime/tree.h" -#include "runtime/length.h" -#include "runtime/language.h" -#include "runtime/error_costs.h" -TSStateId TS_TREE_STATE_NONE = USHRT_MAX; - -// ExternalTokenState - -void ts_external_token_state_init(TSExternalTokenState *self, const char *content, unsigned length) { - self->length = length; - if (length > sizeof(self->short_data)) { - self->long_data = ts_malloc(length); - memcpy(self->long_data, content, length); - } else { - memcpy(self->short_data, content, length); - } -} - -void ts_external_token_state_delete(TSExternalTokenState *self) { - if (self->length > sizeof(self->short_data)) { - ts_free(self->long_data); - } -} - -const char *ts_external_token_state_data(const TSExternalTokenState *self) { - if (self->length > sizeof(self->short_data)) { - return self->long_data; - } else { - return self->short_data; - } -} - -bool ts_external_token_state_eq(const TSExternalTokenState *a, const TSExternalTokenState *b) { - return a == b || - (a->length == b->length && - memcmp(ts_external_token_state_data(a), ts_external_token_state_data(b), a->length) == 0); -} - -// TreeArray - -bool ts_tree_array_copy(TreeArray self, TreeArray *dest) { - Tree **contents = NULL; - if (self.capacity > 0) { - contents = ts_calloc(self.capacity, sizeof(Tree *)); - memcpy(contents, self.contents, self.size * sizeof(Tree *)); - for (uint32_t i = 0; i < self.size; i++) - ts_tree_retain(contents[i]); - } - - dest->size = self.size; - dest->capacity = self.capacity; - dest->contents = contents; - return true; -} - -void ts_tree_array_delete(TreePool *pool, TreeArray *self) { - for (uint32_t i = 0; i < self->size; i++) { - ts_tree_release(pool, self->contents[i]); - } - array_delete(self); -} - -TreeArray ts_tree_array_remove_trailing_extras(TreeArray *self) { - TreeArray result = array_new(); - - uint32_t i = self->size - 1; - for (; i + 1 > 0; i--) { - Tree *child = self->contents[i]; - if (!child->extra) break; - array_push(&result, child); - } - - self->size = i + 1; - ts_tree_array_reverse(&result); +TSTree *ts_tree_new(const Subtree *root, const TSLanguage *language) { + TSTree *result = ts_malloc(sizeof(TSTree)); + result->root = root; + result->language = language; return result; } -void ts_tree_array_reverse(TreeArray *self) { - for (uint32_t i = 0, limit = self->size / 2; i < limit; i++) { - size_t reverse_index = self->size - 1 - i; - Tree *swap = self->contents[i]; - self->contents[i] = self->contents[reverse_index]; - self->contents[reverse_index] = swap; - } +TSTree *ts_tree_copy(const TSTree *self) { + ts_subtree_retain(self->root); + return ts_tree_new(self->root, self->language); } -// TreePool - -static const uint32_t MAX_TREE_POOL_SIZE = 1024; - -void ts_tree_pool_init(TreePool *self) { - array_init(&self->free_trees); - array_init(&self->tree_stack); +void ts_tree_delete(TSTree *self) { + SubtreePool pool = ts_subtree_pool_new(0); + ts_subtree_release(&pool, self->root); + ts_subtree_pool_delete(&pool); + ts_free(self); } -void ts_tree_pool_delete(TreePool *self) { - if (self->free_trees.contents) { - for (unsigned i = 0; i < self->free_trees.size; i++) { - ts_free(self->free_trees.contents[i]); - } - array_delete(&self->free_trees); - } - if (self->tree_stack.contents) array_delete(&self->tree_stack); -} - -Tree *ts_tree_pool_allocate(TreePool *self) { - if (self->free_trees.size > 0) { - return array_pop(&self->free_trees); - } else { - return ts_malloc(sizeof(Tree)); - } -} - -void ts_tree_pool_free(TreePool *self, Tree *tree) { - if (self->free_trees.size < MAX_TREE_POOL_SIZE) { - array_push(&self->free_trees, tree); - } else { - ts_free(tree); - } -} - -// Tree - -Tree *ts_tree_make_leaf(TreePool *pool, TSSymbol symbol, Length padding, Length size, const TSLanguage *language) { - TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol); - Tree *result = ts_tree_pool_allocate(pool); - *result = (Tree){ - .ref_count = 1, - .symbol = symbol, - .size = size, - .visible_child_count = 0, - .named_child_count = 0, - .alias_sequence_id = 0, - .padding = padding, - .visible = metadata.visible, - .named = metadata.named, - .node_count = 1, - .has_changes = false, - .first_leaf = { - .symbol = symbol, - .lex_mode = {0, 0}, - }, - .has_external_tokens = false, +TSNode ts_tree_root_node(const TSTree *self) { + return (TSNode) { + .subtree = self->root, + .tree = self, + .position = {0, 0}, + .byte = 0, + .alias_symbol = 0, }; +} + +void ts_tree_edit(TSTree *self, const TSInputEdit *edit) { + SubtreePool pool = ts_subtree_pool_new(0); + self->root = ts_subtree_edit(self->root, edit, &pool); + ts_subtree_pool_delete(&pool); +} + +TSRange *ts_tree_get_changed_ranges(const TSTree *self, const TSTree *other, uint32_t *count) { + TSRange *result; + TSTreeCursor cursor1, cursor2; + ts_tree_cursor_init(&cursor1, self); + ts_tree_cursor_init(&cursor2, self); + *count = ts_subtree_get_changed_ranges( + self->root, other->root, &cursor1, &cursor2, + self->language, &result + ); + array_delete(&cursor1.stack); + array_delete(&cursor2.stack); return result; } -Tree *ts_tree_make_error(TreePool *pool, Length size, Length padding, int32_t lookahead_char, - const TSLanguage *language) { - Tree *result = ts_tree_make_leaf(pool, ts_builtin_sym_error, padding, size, language); - result->fragile_left = true; - result->fragile_right = true; - result->lookahead_char = lookahead_char; - return result; -} - -Tree *ts_tree_make_copy(TreePool *pool, Tree *self) { - Tree *result = ts_tree_pool_allocate(pool); - *result = *self; - result->ref_count = 1; - return result; -} - -static void ts_tree__compress(Tree *self, unsigned count, const TSLanguage *language) { - Tree *tree = self; - for (unsigned i = 0; i < count; i++) { - if (tree->ref_count > 1 || tree->children.size != 2) break; - - Tree *child = tree->children.contents[0]; - if ( - child->ref_count > 1 || - child->children.size != 2 || - child->symbol != tree->symbol - ) break; - - Tree *grandchild = child->children.contents[0]; - if ( - grandchild->ref_count > 1 || - grandchild->children.size != 2 || - grandchild->symbol != tree->symbol - ) break; - - tree->children.contents[0] = grandchild; - grandchild->context.parent = tree; - grandchild->context.index = -1; - - child->children.contents[0] = grandchild->children.contents[1]; - child->children.contents[0]->context.parent = child; - child->children.contents[0]->context.index = -1; - - grandchild->children.contents[1] = child; - grandchild->children.contents[1]->context.parent = grandchild; - grandchild->children.contents[1]->context.index = -1; - - tree = grandchild; - } - - while (tree != self) { - tree = tree->context.parent; - Tree *child = tree->children.contents[0]; - Tree *grandchild = child->children.contents[1]; - ts_tree_set_children(grandchild, &grandchild->children, language); - ts_tree_set_children(child, &child->children, language); - ts_tree_set_children(tree, &tree->children, language); - } -} - -void ts_tree__balance(Tree *self, const TSLanguage *language) { - if (self->children.contents[0]->repeat_depth > self->children.contents[1]->repeat_depth) { - unsigned n = self->children.contents[0]->repeat_depth - self->children.contents[1]->repeat_depth; - for (unsigned i = n / 2; i > 0; i /= 2) { - ts_tree__compress(self, i, language); - n -= i; - } - } -} - -void ts_tree_assign_parents(Tree *self, TreePool *pool, const TSLanguage *language) { - self->context.parent = NULL; - array_clear(&pool->tree_stack); - array_push(&pool->tree_stack, self); - while (pool->tree_stack.size > 0) { - Tree *tree = array_pop(&pool->tree_stack); - - if (tree->repeat_depth > 0) { - ts_tree__balance(tree, language); - } - - Length offset = length_zero(); - const TSSymbol *alias_sequence = ts_language_alias_sequence(language, tree->alias_sequence_id); - uint32_t non_extra_index = 0; - bool earlier_child_was_changed = false; - for (uint32_t i = 0; i < tree->children.size; i++) { - Tree *child = tree->children.contents[i]; - if (earlier_child_was_changed || child->context.parent != tree || child->context.index != i) { - earlier_child_was_changed = true; - child->context.parent = tree; - child->context.index = i; - child->context.offset = offset; - if (!child->extra && alias_sequence && alias_sequence[non_extra_index] != 0) { - TSSymbolMetadata metadata = ts_language_symbol_metadata(language, alias_sequence[non_extra_index]); - child->context.alias_symbol = alias_sequence[non_extra_index]; - child->context.alias_is_named = metadata.named; - } else { - child->context.alias_symbol = 0; - child->context.alias_is_named = false; - } - array_push(&pool->tree_stack, child); - } - offset = length_add(offset, ts_tree_total_size(child)); - if (!child->extra) non_extra_index++; - } - } -} - -void ts_tree_set_children(Tree *self, TreeArray *children, const TSLanguage *language) { - if (self->children.size > 0 && children->contents != self->children.contents) { - array_delete(&self->children); - } - - self->children = *children; - self->named_child_count = 0; - self->visible_child_count = 0; - self->error_cost = 0; - self->repeat_depth = 0; - self->node_count = 1; - self->has_external_tokens = false; - self->dynamic_precedence = 0; - - uint32_t non_extra_index = 0; - const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self->alias_sequence_id); - - for (uint32_t i = 0; i < self->children.size; i++) { - Tree *child = self->children.contents[i]; - - if (i == 0) { - self->padding = child->padding; - self->size = child->size; - self->bytes_scanned = child->bytes_scanned; - } else { - uint32_t bytes_scanned = ts_tree_total_bytes(self) + child->bytes_scanned; - if (bytes_scanned > self->bytes_scanned) self->bytes_scanned = bytes_scanned; - self->size = length_add(self->size, ts_tree_total_size(child)); - } - - if (child->symbol != ts_builtin_sym_error_repeat) { - self->error_cost += child->error_cost; - } - self->dynamic_precedence += child->dynamic_precedence; - self->node_count += child->node_count; - - if (alias_sequence && alias_sequence[non_extra_index] != 0 && !child->extra) { - self->visible_child_count++; - if (ts_language_symbol_metadata(language, alias_sequence[non_extra_index]).named) { - self->named_child_count++; - } - } else if (child->visible) { - self->visible_child_count++; - if (child->named) self->named_child_count++; - } else if (child->children.size > 0) { - self->visible_child_count += child->visible_child_count; - self->named_child_count += child->named_child_count; - } - - if (child->has_external_tokens) self->has_external_tokens = true; - - if (child->symbol == ts_builtin_sym_error) { - self->fragile_left = self->fragile_right = true; - self->parse_state = TS_TREE_STATE_NONE; - } - - if (!child->extra) non_extra_index++; - } - - if (self->symbol == ts_builtin_sym_error || self->symbol == ts_builtin_sym_error_repeat) { - self->error_cost += ERROR_COST_PER_RECOVERY + - ERROR_COST_PER_SKIPPED_CHAR * self->size.bytes + - ERROR_COST_PER_SKIPPED_LINE * self->size.extent.row; - for (uint32_t i = 0; i < self->children.size; i++) { - Tree *child = self->children.contents[i]; - if (child->extra) continue; - if (child->symbol == ts_builtin_sym_error && child->children.size == 0) continue; - if (child->visible) { - self->error_cost += ERROR_COST_PER_SKIPPED_TREE; - } else { - self->error_cost += ERROR_COST_PER_SKIPPED_TREE * child->visible_child_count; - } - } - } - - if (self->children.size > 0) { - Tree *first_child = self->children.contents[0]; - Tree *last_child = self->children.contents[self->children.size - 1]; - self->first_leaf = first_child->first_leaf; - if (first_child->fragile_left) self->fragile_left = true; - if (last_child->fragile_right) self->fragile_right = true; - if ( - self->children.size == 2 && - !self->visible && !self->named && - first_child->symbol == self->symbol && - last_child->symbol == self->symbol - ) { - if (first_child->repeat_depth > last_child->repeat_depth) { - self->repeat_depth = first_child->repeat_depth + 1; - } else { - self->repeat_depth = last_child->repeat_depth + 1; - } - } - } -} - -Tree *ts_tree_make_node(TreePool *pool, TSSymbol symbol, TreeArray *children, - unsigned alias_sequence_id, const TSLanguage *language) { - Tree *result = ts_tree_make_leaf(pool, symbol, length_zero(), length_zero(), language); - result->alias_sequence_id = alias_sequence_id; - if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) { - result->fragile_left = true; - result->fragile_right = true; - } - ts_tree_set_children(result, children, language); - return result; -} - -Tree *ts_tree_make_error_node(TreePool *pool, TreeArray *children, const TSLanguage *language) { - return ts_tree_make_node(pool, ts_builtin_sym_error, children, 0, language); -} - -Tree *ts_tree_make_missing_leaf(TreePool *pool, TSSymbol symbol, const TSLanguage *language) { - Tree *result = ts_tree_make_leaf(pool, symbol, length_zero(), length_zero(), language); - result->is_missing = true; - result->error_cost = ERROR_COST_PER_MISSING_TREE + ERROR_COST_PER_RECOVERY; - return result; -} - -void ts_tree_retain(Tree *self) { - assert(self->ref_count > 0); - self->ref_count++; - assert(self->ref_count != 0); -} - -void ts_tree_release(TreePool *pool, Tree *self) { - array_clear(&pool->tree_stack); - array_push(&pool->tree_stack, self); - while (pool->tree_stack.size > 0) { - Tree *tree = array_pop(&pool->tree_stack); - assert(tree->ref_count > 0); - tree->ref_count--; - if (tree->ref_count == 0) { - if (tree->children.size > 0) { - for (uint32_t i = 0; i < tree->children.size; i++) { - array_push(&pool->tree_stack, tree->children.contents[i]); - } - array_delete(&tree->children); - } else if (tree->has_external_tokens) { - ts_external_token_state_delete(&tree->external_token_state); - } - ts_tree_pool_free(pool, tree); - } - } -} - -uint32_t ts_tree_start_column(const Tree *self) { - uint32_t column = self->padding.extent.column; - if (self->padding.extent.row > 0) - return column; - for (const Tree *tree = self; tree != NULL; tree = tree->context.parent) { - column += tree->context.offset.extent.column; - if (tree->context.offset.extent.row > 0) - break; - } - return column; -} - -uint32_t ts_tree_end_column(const Tree *self) { - uint32_t result = self->size.extent.column; - if (self->size.extent.row == 0) - result += ts_tree_start_column(self); - return result; -} - -bool ts_tree_eq(const Tree *self, const Tree *other) { - if (self) { - if (!other) return false; - } else { - return !other; - } - - if (self->symbol != other->symbol) return false; - if (self->visible != other->visible) return false; - if (self->named != other->named) return false; - if (self->padding.bytes != other->padding.bytes) return false; - if (self->size.bytes != other->size.bytes) return false; - if (self->symbol == ts_builtin_sym_error) return self->lookahead_char == other->lookahead_char; - if (self->children.size != other->children.size) return false; - if (self->visible_child_count != other->visible_child_count) return false; - if (self->named_child_count != other->named_child_count) return false; - - for (uint32_t i = 0; i < self->children.size; i++) { - if (!ts_tree_eq(self->children.contents[i], other->children.contents[i])) { - return false; - } - } - return true; -} - -int ts_tree_compare(const Tree *left, const Tree *right) { - if (left->symbol < right->symbol) - return -1; - if (right->symbol < left->symbol) - return 1; - if (left->children.size < right->children.size) - return -1; - if (right->children.size < left->children.size) - return 1; - for (uint32_t i = 0; i < left->children.size; i++) { - Tree *left_child = left->children.contents[i]; - Tree *right_child = right->children.contents[i]; - switch (ts_tree_compare(left_child, right_child)) { - case -1: - return -1; - case 1: - return 1; - default: - break; - } - } - return 0; -} - -static inline long min_byte(long a, long b) { - return a <= b ? a : b; -} - -bool ts_tree_invalidate_lookahead(Tree *self, uint32_t edit_byte_offset) { - if (edit_byte_offset >= self->bytes_scanned) return false; - self->has_changes = true; - if (self->children.size > 0) { - uint32_t child_start_byte = 0; - for (uint32_t i = 0; i < self->children.size; i++) { - Tree *child = self->children.contents[i]; - if (child_start_byte > edit_byte_offset) break; - ts_tree_invalidate_lookahead(child, edit_byte_offset - child_start_byte); - child_start_byte += ts_tree_total_bytes(child); - } - } - return true; -} - -static inline TSPoint ts_tree_total_extent(const Tree *self) { - return point_add(self->padding.extent, self->size.extent); -} - -void ts_tree_edit(Tree *self, const TSInputEdit *edit) { - uint32_t old_end_byte = edit->start_byte + edit->bytes_removed; - uint32_t new_end_byte = edit->start_byte + edit->bytes_added; - TSPoint old_end_point = point_add(edit->start_point, edit->extent_removed); - TSPoint new_end_point = point_add(edit->start_point, edit->extent_added); - - assert(old_end_byte <= ts_tree_total_bytes(self)); - - self->has_changes = true; - - if (edit->start_byte < self->padding.bytes) { - if (self->padding.bytes >= old_end_byte) { - uint32_t trailing_padding_bytes = self->padding.bytes - old_end_byte; - TSPoint trailing_padding_extent = point_sub(self->padding.extent, old_end_point); - self->padding.bytes = new_end_byte + trailing_padding_bytes; - self->padding.extent = point_add(new_end_point, trailing_padding_extent); - } else { - uint32_t removed_content_bytes = old_end_byte - self->padding.bytes; - TSPoint removed_content_extent = point_sub(old_end_point, self->padding.extent); - self->size.bytes = self->size.bytes - removed_content_bytes; - self->size.extent = point_sub(self->size.extent, removed_content_extent); - self->padding.bytes = new_end_byte; - self->padding.extent = new_end_point; - } - } else if (edit->start_byte == self->padding.bytes && edit->bytes_removed == 0) { - self->padding.bytes = self->padding.bytes + edit->bytes_added; - self->padding.extent = point_add(self->padding.extent, edit->extent_added); - } else { - uint32_t trailing_content_bytes = ts_tree_total_bytes(self) - old_end_byte; - TSPoint trailing_content_extent = point_sub(ts_tree_total_extent(self), old_end_point); - self->size.bytes = new_end_byte + trailing_content_bytes - self->padding.bytes; - self->size.extent = point_sub(point_add(new_end_point, trailing_content_extent), self->padding.extent); - } - - bool found_first_child = false; - long remaining_bytes_to_delete = 0; - TSPoint remaining_extent_to_delete = {0, 0}; - Length child_left, child_right = length_zero(); - for (uint32_t i = 0; i < self->children.size; i++) { - Tree *child = self->children.contents[i]; - child_left = child_right; - child_right = length_add(child_left, ts_tree_total_size(child)); - - if (!found_first_child && child_right.bytes >= edit->start_byte) { - found_first_child = true; - TSInputEdit child_edit = { - .start_byte = edit->start_byte - child_left.bytes, - .bytes_added = edit->bytes_added, - .bytes_removed = edit->bytes_removed, - .start_point = point_sub(edit->start_point, child_left.extent), - .extent_added = edit->extent_added, - .extent_removed = edit->extent_removed, - }; - - if (old_end_byte > child_right.bytes) { - child_edit.bytes_removed = child_right.bytes - edit->start_byte; - child_edit.extent_removed = point_sub(child_right.extent, edit->start_point); - remaining_bytes_to_delete = old_end_byte - child_right.bytes; - remaining_extent_to_delete = point_sub(old_end_point, child_right.extent); - } - - ts_tree_edit(child, &child_edit); - } else if (remaining_bytes_to_delete > 0) { - TSInputEdit child_edit = { - .start_byte = 0, - .bytes_added = 0, - .bytes_removed = min_byte(remaining_bytes_to_delete, ts_tree_total_bytes(child)), - .start_point = {0, 0}, - .extent_added = {0, 0}, - .extent_removed = point_min(remaining_extent_to_delete, ts_tree_total_size(child).extent), - }; - remaining_bytes_to_delete -= child_edit.bytes_removed; - remaining_extent_to_delete = point_sub(remaining_extent_to_delete, child_edit.extent_removed); - ts_tree_edit(child, &child_edit); - } else { - ts_tree_invalidate_lookahead(child, edit->start_byte - child_left.bytes); - } - - child_right = length_add(child_left, ts_tree_total_size(child)); - child->context.offset = child_left; - } -} - -Tree *ts_tree_last_external_token(Tree *tree) { - if (!tree->has_external_tokens) return NULL; - while (tree->children.size > 0) { - for (uint32_t i = tree->children.size - 1; i + 1 > 0; i--) { - Tree *child = tree->children.contents[i]; - if (child->has_external_tokens) { - tree = child; - break; - } - } - } - return tree; -} - -static size_t ts_tree__write_char_to_string(char *s, size_t n, int32_t c) { - if (c == 0) - return snprintf(s, n, "EOF"); - if (c == -1) - return snprintf(s, n, "INVALID"); - else if (c == '\n') - return snprintf(s, n, "'\\n'"); - else if (c == '\t') - return snprintf(s, n, "'\\t'"); - else if (c == '\r') - return snprintf(s, n, "'\\r'"); - else if (0 < c && c < 128 && isprint(c)) - return snprintf(s, n, "'%c'", c); - else - return snprintf(s, n, "%d", c); -} - -static size_t ts_tree__write_to_string(const Tree *self, const TSLanguage *language, - char *string, size_t limit, bool is_root, - bool include_all) { - if (!self) return snprintf(string, limit, "(NULL)"); - - char *cursor = string; - char **writer = (limit > 0) ? &cursor : &string; - bool visible = - include_all || - is_root || - self->is_missing || - (self->visible && self->named) || - self->context.alias_is_named; - - if (visible && !is_root) { - cursor += snprintf(*writer, limit, " "); - } - - if (visible) { - if (self->symbol == ts_builtin_sym_error && self->children.size == 0 && self->size.bytes > 0) { - cursor += snprintf(*writer, limit, "(UNEXPECTED "); - cursor += ts_tree__write_char_to_string(*writer, limit, self->lookahead_char); - } else if (self->is_missing) { - cursor += snprintf(*writer, limit, "(MISSING"); - } else { - TSSymbol symbol = self->context.alias_symbol ? self->context.alias_symbol : self->symbol; - const char *symbol_name = ts_language_symbol_name(language, symbol); - cursor += snprintf(*writer, limit, "(%s", symbol_name); - } - } - - for (uint32_t i = 0; i < self->children.size; i++) { - Tree *child = self->children.contents[i]; - cursor += ts_tree__write_to_string(child, language, *writer, limit, false, include_all); - } - - if (visible) cursor += snprintf(*writer, limit, ")"); - - return cursor - string; -} - -char *ts_tree_string(const Tree *self, const TSLanguage *language, bool include_all) { - char scratch_string[1]; - size_t size = ts_tree__write_to_string(self, language, scratch_string, 0, true, include_all) + 1; - char *result = ts_malloc(size * sizeof(char)); - ts_tree__write_to_string(self, language, result, size, true, include_all); - return result; -} - -void ts_tree__print_dot_graph(const Tree *self, uint32_t byte_offset, - const TSLanguage *language, FILE *f) { - TSSymbol symbol = self->context.alias_symbol ? self->context.alias_symbol : self->symbol; - fprintf(f, "tree_%p [label=\"%s\"", self, ts_language_symbol_name(language, symbol)); - - if (self->children.size == 0) - fprintf(f, ", shape=plaintext"); - if (self->extra) - fprintf(f, ", fontcolor=gray"); - - fprintf(f, ", tooltip=\"address:%p\nrange:%u - %u\nstate:%d\nerror-cost:%u\nrepeat-depth:%u\"]\n", - self, byte_offset, byte_offset + ts_tree_total_bytes(self), self->parse_state, - self->error_cost, self->repeat_depth); - for (uint32_t i = 0; i < self->children.size; i++) { - const Tree *child = self->children.contents[i]; - ts_tree__print_dot_graph(child, byte_offset, language, f); - fprintf(f, "tree_%p -> tree_%p [tooltip=%u]\n", self, child, i); - byte_offset += ts_tree_total_bytes(child); - } -} - -void ts_tree_print_dot_graph(const Tree *self, const TSLanguage *language, FILE *f) { - fprintf(f, "digraph tree {\n"); - fprintf(f, "edge [arrowhead=none]\n"); - ts_tree__print_dot_graph(self, 0, language, f); - fprintf(f, "}\n"); -} - -static const TSExternalTokenState empty_state = {.length = 0, .short_data = {0}}; - -bool ts_tree_external_token_state_eq(const Tree *self, const Tree *other) { - const TSExternalTokenState *state1 = &empty_state; - const TSExternalTokenState *state2 = &empty_state; - if (self && self->has_external_tokens) state1 = &self->external_token_state; - if (other && other->has_external_tokens) state2 = &other->external_token_state; - return ts_external_token_state_eq(state1, state2); +void ts_tree_print_dot_graph(const TSTree *self, FILE *file) { + ts_subtree_print_dot_graph(self->root, self->language, file); } diff --git a/src/runtime/tree.h b/src/runtime/tree.h index f416516d..7429e06c 100644 --- a/src/runtime/tree.h +++ b/src/runtime/tree.h @@ -5,124 +5,12 @@ extern "C" { #endif -#include -#include "tree_sitter/parser.h" -#include "tree_sitter/runtime.h" -#include "runtime/length.h" -#include "runtime/array.h" -#include - -extern TSStateId TS_TREE_STATE_NONE; - -typedef struct { - union { - char *long_data; - char short_data[sizeof(char *) + sizeof(uint32_t)]; - }; - uint32_t length; -} TSExternalTokenState; - -typedef struct Tree Tree; - -typedef Array(Tree *) TreeArray; - -struct Tree { - struct { - struct Tree *parent; - uint32_t index; - Length offset; - TSSymbol alias_symbol : 15; - bool alias_is_named : 1; - } context; - - Length padding; - Length size; - uint32_t ref_count; - uint32_t bytes_scanned; - uint32_t error_cost; - uint32_t node_count; - uint32_t repeat_depth; - uint32_t child_count; - int32_t dynamic_precedence; - - bool visible : 1; - bool named : 1; - bool extra : 1; - bool fragile_left : 1; - bool fragile_right : 1; - bool has_changes : 1; - bool has_external_tokens : 1; - bool is_missing : 1; - TSSymbol symbol; - TSStateId parse_state; - struct { - TSSymbol symbol; - TSLexMode lex_mode; - } first_leaf; - - union { - struct { - TreeArray children; - uint32_t visible_child_count; - uint32_t named_child_count; - uint16_t alias_sequence_id; - }; - struct { - uint32_t _2; - TSExternalTokenState external_token_state; - }; - struct { - uint32_t _1; - int32_t lookahead_char; - }; - }; +struct TSTree { + const Subtree *root; + const TSLanguage *language; }; -typedef struct { - TreeArray free_trees; - TreeArray tree_stack; -} TreePool; - -void ts_external_token_state_init(TSExternalTokenState *, const char *, unsigned); -const char *ts_external_token_state_data(const TSExternalTokenState *); - -bool ts_tree_array_copy(TreeArray, TreeArray *); -void ts_tree_array_delete(TreePool *, TreeArray *); -TreeArray ts_tree_array_remove_trailing_extras(TreeArray *); -void ts_tree_array_reverse(TreeArray *); - -void ts_tree_pool_init(TreePool *); -void ts_tree_pool_delete(TreePool *); -Tree *ts_tree_pool_allocate(TreePool *); -void ts_tree_pool_free(TreePool *, Tree *); - -Tree *ts_tree_make_leaf(TreePool *, TSSymbol, Length, Length, const TSLanguage *); -Tree *ts_tree_make_node(TreePool *, TSSymbol, TreeArray *, unsigned, const TSLanguage *); -Tree *ts_tree_make_copy(TreePool *, Tree *child); -Tree *ts_tree_make_error_node(TreePool *, TreeArray *, const TSLanguage *); -Tree *ts_tree_make_error(TreePool *, Length, Length, int32_t, const TSLanguage *); -Tree *ts_tree_make_missing_leaf(TreePool *, TSSymbol, const TSLanguage *); -void ts_tree_retain(Tree *tree); -void ts_tree_release(TreePool *, Tree *tree); -bool ts_tree_eq(const Tree *tree1, const Tree *tree2); -int ts_tree_compare(const Tree *tree1, const Tree *tree2); -uint32_t ts_tree_start_column(const Tree *self); -uint32_t ts_tree_end_column(const Tree *self); -void ts_tree_set_children(Tree *, TreeArray *, const TSLanguage *); -void ts_tree_assign_parents(Tree *, TreePool *, const TSLanguage *); -void ts_tree_edit(Tree *, const TSInputEdit *edit); -char *ts_tree_string(const Tree *, const TSLanguage *, bool include_all); -void ts_tree_print_dot_graph(const Tree *, const TSLanguage *, FILE *); -Tree *ts_tree_last_external_token(Tree *); -bool ts_tree_external_token_state_eq(const Tree *, const Tree *); - -static inline uint32_t ts_tree_total_bytes(const Tree *self) { - return self->padding.bytes + self->size.bytes; -} - -static inline Length ts_tree_total_size(const Tree *self) { - return length_add(self->padding, self->size); -} +TSTree *ts_tree_new(const Subtree *root, const TSLanguage *language); #ifdef __cplusplus } diff --git a/src/runtime/tree_cursor.c b/src/runtime/tree_cursor.c new file mode 100644 index 00000000..31eee8b4 --- /dev/null +++ b/src/runtime/tree_cursor.c @@ -0,0 +1,138 @@ +#include "tree_sitter/runtime.h" +#include "runtime/alloc.h" +#include "runtime/tree_cursor.h" +#include "runtime/language.h" +#include "runtime/tree.h" + +TSTreeCursor *ts_tree_cursor_new(const TSTree *tree) { + TSTreeCursor *self = ts_malloc(sizeof(TSTreeCursor)); + ts_tree_cursor_init(self, tree); + return self; +} + +void ts_tree_cursor_init(TSTreeCursor *self, const TSTree *tree) { + self->tree = tree; + array_init(&self->stack); + array_push(&self->stack, ((TreeCursorEntry) { + .subtree = tree->root, + .position = length_zero(), + .child_index = 0, + .structural_child_index = 0, + })); +} + +void ts_tree_cursor_delete(TSTreeCursor *self) { + array_delete(&self->stack); + ts_free(self); +} + +bool ts_tree_cursor_goto_first_child(TSTreeCursor *self) { + TreeCursorEntry *last_entry = array_back(&self->stack); + const Subtree *tree = last_entry->subtree; + Length position = last_entry->position; + + bool did_descend; + do { + did_descend = false; + + uint32_t structural_child_index = 0; + for (uint32_t i = 0; i < tree->children.size; i++) { + const Subtree *child = tree->children.contents[i]; + if (child->visible || child->visible_child_count > 0) { + array_push(&self->stack, ((TreeCursorEntry) { + .subtree = child, + .child_index = i, + .structural_child_index = structural_child_index, + .position = position, + })); + + if (child->visible) { + return true; + } else { + tree = child; + did_descend = true; + break; + } + } + if (!child->extra) structural_child_index++; + position = length_add(position, ts_subtree_total_size(child)); + } + } while (did_descend); + + return false; +} + +bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *self) { + TreeCursorEntry *child_entry = array_back(&self->stack); + + for (unsigned i = self->stack.size - 2; i + 1 > 0; i--) { + TreeCursorEntry *parent_entry = &self->stack.contents[i]; + + const Subtree *parent = parent_entry->subtree; + uint32_t child_index = child_entry->child_index; + uint32_t structural_child_index = child_entry->structural_child_index; + Length position = child_entry->position; + const Subtree *child = parent->children.contents[child_index]; + + while (++child_index < parent->children.size) { + if (!child->extra) structural_child_index++; + position = length_add(position, ts_subtree_total_size(child)); + child = parent->children.contents[child_index]; + + if (child->visible || child->visible_child_count > 0) { + self->stack.contents[i + 1] = (TreeCursorEntry) { + .subtree = child, + .child_index = child_index, + .structural_child_index = structural_child_index, + .position = position, + }; + self->stack.size = i + 2; + + if (child->visible) { + return true; + } else { + ts_tree_cursor_goto_first_child(self); + return true; + } + } + } + + child_entry = parent_entry; + if (parent->visible) break; + } + + return false; +} + +bool ts_tree_cursor_goto_parent(TSTreeCursor *self) { + for (unsigned i = self->stack.size - 2; i + 1 > 0; i--) { + TreeCursorEntry *entry = &self->stack.contents[i]; + if (entry->subtree->visible) { + self->stack.size = i + 1; + return true; + } + } + return false; +} + +TSNode ts_tree_cursor_current_node(TSTreeCursor *self) { + TreeCursorEntry *last_entry = array_back(&self->stack); + TSSymbol alias_symbol = 0; + if (self->stack.size > 1) { + TreeCursorEntry *parent_entry = &self->stack.contents[self->stack.size - 2]; + const TSSymbol *alias_sequence = ts_language_alias_sequence( + self->tree->language, + parent_entry->subtree->alias_sequence_id + ); + if (alias_sequence) { + alias_symbol = alias_sequence[last_entry->structural_child_index]; + } + } + return (TSNode) { + .tree = self->tree, + .subtree = last_entry->subtree, + .position = last_entry->position.extent, + .byte = last_entry->position.bytes, + .alias_symbol = alias_symbol, + }; +} diff --git a/src/runtime/tree_cursor.h b/src/runtime/tree_cursor.h new file mode 100644 index 00000000..615e7e06 --- /dev/null +++ b/src/runtime/tree_cursor.h @@ -0,0 +1,20 @@ +#ifndef RUNTIME_TREE_CURSOR_H_ +#define RUNTIME_TREE_CURSOR_H_ + +#include "runtime/subtree.h" + +typedef struct { + const Subtree *subtree; + Length position; + uint32_t child_index; + uint32_t structural_child_index; +} TreeCursorEntry; + +struct TSTreeCursor { + const TSTree *tree; + Array(TreeCursorEntry) stack; +}; + +void ts_tree_cursor_init(TSTreeCursor *, const TSTree *); + +#endif // RUNTIME_TREE_CURSOR_H_ diff --git a/test/benchmarks.cc b/test/benchmarks.cc index 616d2de6..6612444e 100644 --- a/test/benchmarks.cc +++ b/test/benchmarks.cc @@ -43,12 +43,12 @@ int main(int argc, char *arg[]) { vector error_speeds; vector non_error_speeds; - auto document = ts_document_new(); + TSParser *parser = ts_parser_new(); if (getenv("TREE_SITTER_BENCHMARK_SVG")) { - ts_document_print_debugging_graphs(document, true); + ts_parser_print_dot_graphs(parser, stderr); } else if (getenv("TREE_SITTER_BENCHMARK_LOG")) { - ts_document_set_logger(document, stderr_logger_new(false)); + ts_parser_set_logger(parser, stderr_logger_new(false)); } auto language_filter = getenv("TREE_SITTER_BENCHMARK_LANGUAGE"); @@ -61,7 +61,7 @@ int main(int argc, char *arg[]) { for (auto &language_name : language_names) { if (language_filter && language_name != language_filter) continue; - ts_document_set_language(document, load_real_language(language_name)); + ts_parser_set_language(parser, load_real_language(language_name)); printf("%s\n", language_name.c_str()); @@ -69,20 +69,16 @@ int main(int argc, char *arg[]) { if (file_name_filter && example.file_name != file_name_filter) continue; if (example.input.size() < 256) continue; - ts_document_invalidate(document); - ts_document_set_input_string(document, ""); - ts_document_parse(document); - - ts_document_invalidate(document); - ts_document_set_input_string(document, example.input.c_str()); - clock_t start_time = clock(); - ts_document_parse(document); + TSTree *tree = ts_parser_parse_string(parser, nullptr, example.input.c_str(), example.input.size()); clock_t end_time = clock(); - unsigned duration = (end_time - start_time) * 1000 / CLOCKS_PER_SEC; - assert(!ts_node_has_error(ts_document_root_node(document))); + + assert(!ts_node_has_error(ts_tree_root_node(tree))); + ts_tree_delete(tree); + + size_t duration = (end_time - start_time) * 1000 / CLOCKS_PER_SEC; size_t speed = static_cast(example.input.size()) / duration; - printf(" %-30s\t%u ms\t\t%lu bytes/ms\n", example.file_name.c_str(), duration, speed); + printf(" %-30s\t%lu ms\t\t%lu bytes/ms\n", example.file_name.c_str(), duration, speed); if (speed != 0) non_error_speeds.push_back(speed); } @@ -93,15 +89,15 @@ int main(int argc, char *arg[]) { if (file_name_filter && example.file_name != file_name_filter) continue; if (example.input.size() < 256) continue; - ts_document_invalidate(document); - ts_document_set_input_string(document, example.input.c_str()); - clock_t start_time = clock(); - ts_document_parse(document); + TSTree *tree = ts_parser_parse_string(parser, nullptr, example.input.c_str(), example.input.size()); clock_t end_time = clock(); - unsigned duration = (end_time - start_time) * 1000 / CLOCKS_PER_SEC; + + ts_tree_delete(tree); + + size_t duration = (end_time - start_time) * 1000 / CLOCKS_PER_SEC; size_t speed = static_cast(example.input.size()) / duration; - printf(" %-30s\t%u ms\t\t%lu bytes/ms\n", example.file_name.c_str(), duration, speed); + printf(" %-30s\t%lu ms\t\t%lu bytes/ms\n", example.file_name.c_str(), duration, speed); if (speed != 0) error_speeds.push_back(speed); } } @@ -118,5 +114,6 @@ int main(int argc, char *arg[]) { printf(" %-30s\t%lu bytes/ms\n", "average speed", mean(error_speeds)); printf(" %-30s\t%lu bytes/ms\n", "worst speed", min(error_speeds)); + ts_parser_delete(parser); return 0; } diff --git a/test/compiler/prepare_grammar/extract_tokens_test.cc b/test/compiler/prepare_grammar/extract_tokens_test.cc index 0f9be780..da2ae60a 100644 --- a/test/compiler/prepare_grammar/extract_tokens_test.cc +++ b/test/compiler/prepare_grammar/extract_tokens_test.cc @@ -48,7 +48,7 @@ describe("extract_tokens", []() { Repeat{Symbol::non_terminal(3)} }, }, - {}, {}, {}, {} + {}, {}, {}, {}, {} }); InitialSyntaxGrammar &syntax_grammar = get<0>(result); @@ -156,7 +156,7 @@ describe("extract_tokens", []() { }) }, }, - {}, {}, {}, {} + {}, {}, {}, {}, {} }); InitialSyntaxGrammar &syntax_grammar = get<0>(result); @@ -203,7 +203,7 @@ describe("extract_tokens", []() { Rule::seq({ String{"ef"}, String{"cd"} }) }, }, - {}, {}, {}, {} + {}, {}, {}, {}, {} }); InitialSyntaxGrammar &syntax_grammar = get<0>(result); @@ -258,7 +258,7 @@ describe("extract_tokens", []() { String{"a"} }, }, - {}, {}, {}, {} + {}, {}, {}, {}, {} }); InitialSyntaxGrammar &syntax_grammar = get<0>(result); @@ -298,7 +298,7 @@ describe("extract_tokens", []() { { { Symbol::non_terminal(2), Symbol::non_terminal(3) } }, - {}, {} + {}, {}, {} }); InitialSyntaxGrammar &syntax_grammar = get<0>(result); @@ -319,7 +319,7 @@ describe("extract_tokens", []() { String{"y"}, Pattern{" "}, }, - {}, {}, {} + {}, {}, {}, {} }); AssertThat(get<2>(result), Equals(CompileError::none())); @@ -340,7 +340,7 @@ describe("extract_tokens", []() { { String{"y"}, }, - {}, {}, {} + {}, {}, {}, {} }); AssertThat(get<2>(result), Equals(CompileError::none())); @@ -370,7 +370,7 @@ describe("extract_tokens", []() { { Symbol::non_terminal(2), }, - {}, {}, {} + {}, {}, {}, {} }); AssertThat(get<2>(result), Equals(CompileError::none())); @@ -399,7 +399,7 @@ describe("extract_tokens", []() { { Symbol::non_terminal(1) }, - {}, {}, {} + {}, {}, {}, {} }); AssertThat(get<2>(result), Equals(CompileError( @@ -417,7 +417,7 @@ describe("extract_tokens", []() { { Rule::choice({ Symbol::non_terminal(1), Blank{} }) }, - {}, {}, {} + {}, {}, {}, {} }); AssertThat(get<2>(result), Equals(CompileError( @@ -446,7 +446,7 @@ describe("extract_tokens", []() { { Variable{"rule_A", VariableTypeNamed, Symbol::non_terminal(0)} }, - {} + {}, {} }); AssertThat(get<2>(result), Equals(CompileError( diff --git a/test/fuzz/fuzzer.cc b/test/fuzz/fuzzer.cc index 7b1aea5b..570dfa7e 100644 --- a/test/fuzz/fuzzer.cc +++ b/test/fuzz/fuzzer.cc @@ -12,16 +12,15 @@ extern "C" const TSLanguage *TS_LANG(); extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { const char *str = reinterpret_cast(data); - TSDocument *document = ts_document_new(); - ts_document_set_language(document, TS_LANG()); - ts_document_set_input_string_with_length(document, str, size); + TSParser *parser = ts_parser_new(); + ts_parser_set_language(parser, TS_LANG()); + ts_parser_halt_on_error(parser, TS_HALT_ON_ERROR); - TSParseOptions options = {}; - options.halt_on_error = TS_HALT_ON_ERROR; - ts_document_parse_with_options(document, options); + TSTree *tree = ts_parser_parse_string(parser, NULL, str, size); + TSNode root_node = ts_document_root_node(tree); - TSNode root_node = ts_document_root_node(document); - ts_document_free(document); + ts_tree_delete(tree); + ts_parser_delete(parser); return 0; } diff --git a/test/helpers/random_helpers.cc b/test/helpers/random_helpers.cc index d79475c7..b0200e51 100644 --- a/test/helpers/random_helpers.cc +++ b/test/helpers/random_helpers.cc @@ -1,3 +1,4 @@ +#include "helpers/random_helpers.h" #include #include #include @@ -6,54 +7,69 @@ using std::string; using std::vector; -static std::default_random_engine engine; +Generator default_generator(0); unsigned get_time_as_seed() { return time(nullptr); } -void random_reseed(unsigned seed) { +void Generator::reseed(unsigned seed) { engine.seed(seed); } -unsigned random_unsigned() { - return std::uniform_int_distribution()(engine); +unsigned Generator::operator()() { + return distribution(engine); } -unsigned random_unsigned(unsigned max) { - return std::uniform_int_distribution(0, max - 1)(engine); +unsigned Generator::operator()(unsigned max) { + return distribution(engine) % max; } -static string random_string(char min, char max) { +string Generator::str(char min, char max) { string result; - size_t length = random_unsigned(12); + size_t length = operator()(12); for (size_t i = 0; i < length; i++) { - result += (min + random_unsigned(max - min)); + result += (min + operator()(max - min)); } return result; } -static string random_char(string characters) { - size_t index = random_unsigned(characters.size()); - return string() + characters[index]; -} +static string operator_characters = "!(){}[]<>+-="; -string random_words(size_t count) { +string Generator::words(size_t count) { string result; bool just_inserted_word = false; for (size_t i = 0; i < count; i++) { - if (random_unsigned(10) < 6) { - result += random_char("!(){}[]<>+-="); + if (operator()(10) < 6) { + result += operator_characters[operator()(operator_characters.size())]; } else { if (just_inserted_word) result += " "; - result += random_string('a', 'z'); + result += str('a', 'z'); just_inserted_word = true; } } return result; } -string select_random(const vector &list) { - return list[random_unsigned(list.size())]; +string Generator::select(const vector &list) { + return list[operator()(list.size())]; } + +#ifdef _WIN32 + +#include + +void Generator::sleep_some() { + Sleep(operator()(5)); +} + +#else + +#include + +void Generator::sleep_some() { + usleep(operator()(5 * 1000)); +} + +#endif diff --git a/test/helpers/random_helpers.h b/test/helpers/random_helpers.h index b66c4aee..7dd471fd 100644 --- a/test/helpers/random_helpers.h +++ b/test/helpers/random_helpers.h @@ -3,12 +3,26 @@ #include #include +#include unsigned get_time_as_seed(); -void random_reseed(unsigned); -unsigned random_unsigned(); -unsigned random_unsigned(unsigned max); -std::string random_words(size_t count); -std::string select_random(const std::vector &); + +class Generator { + std::default_random_engine engine; + std::uniform_int_distribution distribution; + +public: + Generator(uint32_t seed) : engine{seed} {} + + void reseed(unsigned); + unsigned operator()(); + unsigned operator()(unsigned max); + std::string words(size_t count); + std::string str(char min, char max); + std::string select(const std::vector &); + void sleep_some(); +}; + +extern Generator default_generator; #endif // HELPERS_RANDOM_HELPERS_H_ diff --git a/test/helpers/record_alloc.cc b/test/helpers/record_alloc.cc index 43e11abe..2e2ea648 100644 --- a/test/helpers/record_alloc.cc +++ b/test/helpers/record_alloc.cc @@ -1,6 +1,7 @@ #include #include #include +#include using std::map; using std::vector; @@ -8,13 +9,16 @@ using std::vector; static bool _enabled = false; static size_t _allocation_count = 0; static map _outstanding_allocations; +static std::mutex _outstanding_allocations_mutex; +static bool _multi_threaded_mode = false; namespace record_alloc { -void start() { +void start(bool multi_threaded_mode) { _enabled = true; _allocation_count = 0; _outstanding_allocations.clear(); + _multi_threaded_mode = multi_threaded_mode; } void stop() { @@ -30,7 +34,11 @@ vector outstanding_allocation_indices() { } size_t allocation_count() { - return _allocation_count; + size_t result; + _outstanding_allocations_mutex.lock(); + result = _allocation_count; + _outstanding_allocations_mutex.unlock(); + return result; } } // namespace record_alloc @@ -39,16 +47,20 @@ extern "C" { static void *record_allocation(void *result) { if (!_enabled) return result; + if (_multi_threaded_mode) _outstanding_allocations_mutex.lock(); _outstanding_allocations[result] = _allocation_count; _allocation_count++; + if (_multi_threaded_mode) _outstanding_allocations_mutex.unlock(); return result; } static void record_deallocation(void *pointer) { + if (_multi_threaded_mode) _outstanding_allocations_mutex.lock(); auto entry = _outstanding_allocations.find(pointer); if (entry != _outstanding_allocations.end()) { _outstanding_allocations.erase(entry); } + if (_multi_threaded_mode) _outstanding_allocations_mutex.unlock(); } void *ts_record_malloc(size_t size) { diff --git a/test/helpers/record_alloc.h b/test/helpers/record_alloc.h index 1f5968ac..f21876b4 100644 --- a/test/helpers/record_alloc.h +++ b/test/helpers/record_alloc.h @@ -5,7 +5,7 @@ namespace record_alloc { -void start(); +void start(bool multi_threaded_mode = false); void stop(); void fail_at_allocation_index(size_t failure_index); std::vector outstanding_allocation_indices(); diff --git a/test/helpers/scope_sequence.cc b/test/helpers/scope_sequence.cc index d6e2e3b1..8851f0c4 100644 --- a/test/helpers/scope_sequence.cc +++ b/test/helpers/scope_sequence.cc @@ -21,17 +21,16 @@ static void append_text_to_scope_sequence(ScopeSequence *sequence, static void append_to_scope_sequence(ScopeSequence *sequence, ScopeStack *current_scopes, - TSNode node, TSDocument *document, - const std::string &text) { + TSNode node, const std::string &text) { append_text_to_scope_sequence( sequence, current_scopes, text, ts_node_start_byte(node) - sequence->size() ); - current_scopes->push_back(ts_node_type(node, document)); + current_scopes->push_back(ts_node_type(node)); for (size_t i = 0, n = ts_node_child_count(node); i < n; i++) { TSNode child = ts_node_child(node, i); - append_to_scope_sequence(sequence, current_scopes, child, document, text); + append_to_scope_sequence(sequence, current_scopes, child, text); } append_text_to_scope_sequence( @@ -41,11 +40,11 @@ static void append_to_scope_sequence(ScopeSequence *sequence, current_scopes->pop_back(); } -ScopeSequence build_scope_sequence(TSDocument *document, const std::string &text) { +ScopeSequence build_scope_sequence(TSTree *tree, const std::string &text) { ScopeSequence sequence; ScopeStack current_scopes; - TSNode node = ts_document_root_node(document); - append_to_scope_sequence(&sequence, ¤t_scopes, node, document, text); + TSNode node = ts_tree_root_node(tree); + append_to_scope_sequence(&sequence, ¤t_scopes, node, text); return sequence; } diff --git a/test/helpers/scope_sequence.h b/test/helpers/scope_sequence.h index c83ad597..2ad15117 100644 --- a/test/helpers/scope_sequence.h +++ b/test/helpers/scope_sequence.h @@ -9,7 +9,7 @@ typedef std::string Scope; typedef std::vector ScopeStack; typedef std::vector ScopeSequence; -ScopeSequence build_scope_sequence(TSDocument *document, const std::string &text); +ScopeSequence build_scope_sequence(TSTree *tree, const std::string &text); void verify_changed_ranges(const ScopeSequence &old, const ScopeSequence &new_sequence, const std::string &text, TSRange *ranges, size_t range_count); diff --git a/test/helpers/tree_helpers.cc b/test/helpers/tree_helpers.cc index 5142ca8d..0e63a19f 100644 --- a/test/helpers/tree_helpers.cc +++ b/test/helpers/tree_helpers.cc @@ -1,8 +1,6 @@ -#include "bandit/bandit.h" +#include "test_helper.h" #include "helpers/tree_helpers.h" #include "helpers/point_helpers.h" -#include "runtime/document.h" -#include "runtime/node.h" #include using std::string; @@ -16,36 +14,42 @@ const char *symbol_names[24] = { "twenty-two", "twenty-three" }; -TreeArray *tree_array(std::vector trees) { - static TreeArray result; +SubtreeArray *tree_array(std::vector trees) { + static SubtreeArray result; result.capacity = trees.size(); result.size = trees.size(); - result.contents = (Tree **)calloc(trees.size(), sizeof(Tree *)); - for (size_t i = 0; i < trees.size(); i++) + result.contents = (const Subtree **)calloc(trees.size(), sizeof(Subtree *)); + for (size_t i = 0; i < trees.size(); i++) { result.contents[i] = trees[i]; + } return &result; } -ostream &operator<<(std::ostream &stream, const Tree *tree) { +ostream &operator<<(std::ostream &stream, const Subtree *tree) { static TSLanguage DUMMY_LANGUAGE = {}; - static TSDocument DUMMY_DOCUMENT = {}; - DUMMY_DOCUMENT.parser.language = &DUMMY_LANGUAGE; DUMMY_LANGUAGE.symbol_names = symbol_names; - TSNode node; - node.data = tree; - return stream << string(ts_node_string(node, &DUMMY_DOCUMENT)); + char *string = ts_subtree_string(tree, &DUMMY_LANGUAGE, false); + stream << string; + ts_free(string); + return stream; } ostream &operator<<(ostream &stream, const TSNode &node) { - return stream << string("{") << (const Tree *)node.data << - string(", ") << to_string(ts_node_start_byte(node)) << string("}"); + if (node.subtree) { + char *string = ts_node_string(node); + stream << "{" << string << ", " << to_string(ts_node_start_byte(node)) << "}"; + ts_free(string); + return stream; + } else { + return stream << "NULL"; + } } bool operator==(const TSNode &left, const TSNode &right) { return ts_node_eq(left, right); } -bool operator==(const std::vector &vec, const TreeArray &array) { +bool operator==(const std::vector &vec, const SubtreeArray &array) { if (vec.size() != array.size) return false; for (size_t i = 0; i < array.size; i++) diff --git a/test/helpers/tree_helpers.h b/test/helpers/tree_helpers.h index c28dcd98..00740faf 100644 --- a/test/helpers/tree_helpers.h +++ b/test/helpers/tree_helpers.h @@ -1,17 +1,17 @@ #ifndef HELPERS_TREE_HELPERS_H_ #define HELPERS_TREE_HELPERS_H_ -#include "runtime/tree.h" +#include "runtime/subtree.h" #include #include extern const char *symbol_names[24]; -TreeArray *tree_array(std::vector trees); +SubtreeArray *tree_array(std::vector trees); -std::ostream &operator<<(std::ostream &stream, const Tree *tree); +std::ostream &operator<<(std::ostream &stream, const Subtree *tree); std::ostream &operator<<(std::ostream &stream, const TSNode &node); bool operator==(const TSNode &left, const TSNode &right); -bool operator==(const std::vector &right, const TreeArray &array); +bool operator==(const std::vector &right, const SubtreeArray &array); void assert_consistent_tree_sizes(TSNode node); diff --git a/test/integration/fuzzing-examples.cc b/test/integration/fuzzing-examples.cc index 46cac15a..aca95840 100644 --- a/test/integration/fuzzing-examples.cc +++ b/test/integration/fuzzing-examples.cc @@ -29,14 +29,14 @@ describe("examples found via fuzzing", [&]() { for (unsigned i = 0, n = examples.size(); i < n; i++) { it(("parses example number " + to_string(i)).c_str(), [&]() { - TSDocument *document = ts_document_new(); + TSParser *parser = ts_parser_new(); if (getenv("TREE_SITTER_ENABLE_DEBUG_GRAPHS")) { - ts_document_print_debugging_graphs(document, true); + ts_parser_print_dot_graphs(parser, stderr); } const string &language_name = examples[i].first; - ts_document_set_language(document, load_real_language(language_name)); + ts_parser_set_language(parser, load_real_language(language_name)); string input; const string &base64_input = examples[i].second; @@ -47,18 +47,12 @@ describe("examples found via fuzzing", [&]() { base64_input.size() )); - ts_document_set_input_string_with_length( - document, - input.c_str(), - input.size() - ); - - ts_document_parse(document); - - TSNode node = ts_document_root_node(document); + TSTree *tree = ts_parser_parse_string(parser, nullptr, input.c_str(), input.size()); + TSNode node = ts_tree_root_node(tree); assert_consistent_tree_sizes(node); - ts_document_free(document); + ts_tree_delete(tree); + ts_parser_delete(parser); }); } diff --git a/test/integration/real_grammars.cc b/test/integration/real_grammars.cc index 37465add..02752a34 100644 --- a/test/integration/real_grammars.cc +++ b/test/integration/real_grammars.cc @@ -12,14 +12,16 @@ #include "helpers/tree_helpers.h" #include -static void assert_correct_tree_size(TSDocument *document, string content) { - TSNode root_node = ts_document_root_node(document); +static void assert_correct_tree_size(TSTree *tree, string content) { + TSNode root_node = ts_tree_root_node(tree); AssertThat(ts_node_end_byte(root_node), Equals(content.size())); assert_consistent_tree_sizes(root_node); } START_TEST +if (TREE_SITTER_SEED == -1) return; + vector test_languages({ "javascript", "json", @@ -31,107 +33,136 @@ vector test_languages({ for (auto &language_name : test_languages) { describe(("the " + language_name + " language").c_str(), [&]() { - TSDocument *document; + TSParser *parser; const bool debug_graphs_enabled = getenv("TREE_SITTER_ENABLE_DEBUG_GRAPHS"); before_each([&]() { record_alloc::start(); - document = ts_document_new(); - ts_document_set_language(document, load_real_language(language_name)); + parser = ts_parser_new(); + ts_parser_set_language(parser, load_real_language(language_name)); - // ts_document_set_logger(document, stderr_logger_new(true)); + // ts_parser_set_logger(parser, stderr_logger_new(true)); if (debug_graphs_enabled) { - ts_document_print_debugging_graphs(document, true); + ts_parser_print_dot_graphs(parser, stderr); } }); after_each([&]() { - ts_document_free(document); + ts_parser_delete(parser); AssertThat(record_alloc::outstanding_allocation_indices(), IsEmpty()); }); for (auto &entry : read_real_language_corpus(language_name)) { SpyInput *input; - auto it_handles_edit_sequence = [&](string name, std::function edit_sequence){ - it(("parses " + entry.description + ": " + name).c_str(), [&]() { - input = new SpyInput(entry.input, 3); - if (debug_graphs_enabled) printf("%s\n\n", input->content.c_str()); - ts_document_set_input(document, input->input()); - edit_sequence(); + it(("parses " + entry.description + ": initial parse").c_str(), [&]() { + input = new SpyInput(entry.input, 3); + if (debug_graphs_enabled) printf("%s\n\n", input->content.c_str()); - TSNode root_node = ts_document_root_node(document); - const char *node_string = ts_node_string(root_node, document); - string result(node_string); - ts_free((void *)node_string); - AssertThat(result, Equals(entry.tree_string)); + TSTree *tree = ts_parser_parse(parser, nullptr, input->input()); + assert_correct_tree_size(tree, input->content); - assert_correct_tree_size(document, input->content); - delete input; - }); - }; + TSNode root_node = ts_tree_root_node(tree); + const char *node_string = ts_node_string(root_node); + string result(node_string); + ts_free((void *)node_string); + AssertThat(result, Equals(entry.tree_string)); - it_handles_edit_sequence("initial parse", [&]() { - ts_document_parse(document); + ts_tree_delete(tree); + delete input; }); set> deletions; set> insertions; for (size_t i = 0; i < 60; i++) { - size_t edit_position = random_unsigned(utf8_char_count(entry.input)); - size_t deletion_size = random_unsigned(utf8_char_count(entry.input) - edit_position); - string inserted_text = random_words(random_unsigned(4) + 1); + size_t edit_position = default_generator(utf8_char_count(entry.input)); + size_t deletion_size = default_generator(utf8_char_count(entry.input) - edit_position); + string inserted_text = default_generator.words(default_generator(4) + 1); if (insertions.insert({edit_position, inserted_text}).second) { - string description = "\"" + inserted_text + "\" at " + to_string(edit_position); - - it_handles_edit_sequence("repairing an insertion of " + description, [&]() { - ts_document_edit(document, input->replace(edit_position, 0, inserted_text)); - ts_document_parse(document); - assert_correct_tree_size(document, input->content); + it(("parses " + entry.description + + ": repairing an insertion of \"" + inserted_text + "\"" + + " at " + to_string(edit_position)).c_str(), [&]() { + input = new SpyInput(entry.input, 3); if (debug_graphs_enabled) printf("%s\n\n", input->content.c_str()); - ts_document_edit(document, input->undo()); - assert_correct_tree_size(document, input->content); + input->replace(edit_position, 0, inserted_text); + TSTree *tree = ts_parser_parse(parser, nullptr, input->input()); + assert_correct_tree_size(tree, input->content); if (debug_graphs_enabled) printf("%s\n\n", input->content.c_str()); - TSRange *ranges; + TSInputEdit edit = input->undo(); + ts_tree_edit(tree, &edit); + assert_correct_tree_size(tree, input->content); + if (debug_graphs_enabled) printf("%s\n\n", input->content.c_str()); + + TSTree *new_tree = ts_parser_parse(parser, tree, input->input()); + assert_correct_tree_size(new_tree, input->content); + uint32_t range_count; - ScopeSequence old_scope_sequence = build_scope_sequence(document, input->content); - ts_document_parse_and_get_changed_ranges(document, &ranges, &range_count); - assert_correct_tree_size(document, input->content); + TSRange *ranges = ts_tree_get_changed_ranges(tree, new_tree, &range_count); - ScopeSequence new_scope_sequence = build_scope_sequence(document, input->content); - verify_changed_ranges(old_scope_sequence, new_scope_sequence, - input->content, ranges, range_count); + ScopeSequence old_scope_sequence = build_scope_sequence(tree, input->content); + ScopeSequence new_scope_sequence = build_scope_sequence(new_tree, input->content); + verify_changed_ranges( + old_scope_sequence, new_scope_sequence, + input->content, ranges, range_count + ); ts_free(ranges); + + TSNode root_node = ts_tree_root_node(new_tree); + const char *node_string = ts_node_string(root_node); + string result(node_string); + ts_free((void *)node_string); + AssertThat(result, Equals(entry.tree_string)); + + ts_tree_delete(tree); + ts_tree_delete(new_tree); + delete input; }); } if (deletions.insert({edit_position, deletion_size}).second) { - string desription = to_string(edit_position) + "-" + to_string(edit_position + deletion_size); - - it_handles_edit_sequence("repairing a deletion of " + desription, [&]() { - ts_document_edit(document, input->replace(edit_position, deletion_size, "")); - ts_document_parse(document); - assert_correct_tree_size(document, input->content); + it(("parses " + entry.description + + ": repairing a deletion of " + + to_string(edit_position) + "-" + to_string(edit_position + deletion_size)).c_str(), [&]() { + input = new SpyInput(entry.input, 3); if (debug_graphs_enabled) printf("%s\n\n", input->content.c_str()); - ts_document_edit(document, input->undo()); - assert_correct_tree_size(document, input->content); + input->replace(edit_position, deletion_size, ""); + TSTree *tree = ts_parser_parse(parser, nullptr, input->input()); + assert_correct_tree_size(tree, input->content); if (debug_graphs_enabled) printf("%s\n\n", input->content.c_str()); - TSRange *ranges; + TSInputEdit edit = input->undo(); + ts_tree_edit(tree, &edit); + assert_correct_tree_size(tree, input->content); + if (debug_graphs_enabled) printf("%s\n\n", input->content.c_str()); + + TSTree *new_tree = ts_parser_parse(parser, tree, input->input()); + assert_correct_tree_size(new_tree, input->content); + uint32_t range_count; - ScopeSequence old_scope_sequence = build_scope_sequence(document, input->content); - ts_document_parse_and_get_changed_ranges(document, &ranges, &range_count); - assert_correct_tree_size(document, input->content); + TSRange *ranges = ts_tree_get_changed_ranges(tree, new_tree, &range_count); - ScopeSequence new_scope_sequence = build_scope_sequence(document, input->content); - verify_changed_ranges(old_scope_sequence, new_scope_sequence, - input->content, ranges, range_count); + ScopeSequence old_scope_sequence = build_scope_sequence(tree, input->content); + ScopeSequence new_scope_sequence = build_scope_sequence(new_tree, input->content); + verify_changed_ranges( + old_scope_sequence, new_scope_sequence, + input->content, ranges, range_count + ); ts_free(ranges); + + TSNode root_node = ts_tree_root_node(new_tree); + const char *node_string = ts_node_string(root_node); + string result(node_string); + ts_free((void *)node_string); + AssertThat(result, Equals(entry.tree_string)); + + ts_tree_delete(tree); + ts_tree_delete(new_tree); + delete input; }); } } diff --git a/test/integration/test_grammars.cc b/test/integration/test_grammars.cc index 9dbefcd8..62174855 100644 --- a/test/integration/test_grammars.cc +++ b/test/integration/test_grammars.cc @@ -9,6 +9,8 @@ START_TEST +if (TREE_SITTER_SEED == -1) return; + string grammars_dir_path = join_path({"test", "fixtures", "test_grammars"}); vector test_languages = list_directory(grammars_dir_path); @@ -50,26 +52,26 @@ for (auto &language_name : test_languages) { ); } - TSDocument *document = ts_document_new(); - ts_document_set_language(document, language); - ts_document_set_input_string_with_length(document, entry.input.c_str(), entry.input.size()); + TSParser *parser = ts_parser_new(); + ts_parser_set_language(parser, language); - // ts_document_print_debugging_graphs(document, true); if (getenv("TREE_SITTER_ENABLE_DEBUG_GRAPHS")) { - ts_document_print_debugging_graphs(document, true); + ts_parser_print_dot_graphs(parser, stderr); } - ts_document_parse(document); + TSTree *tree = ts_parser_parse_string(parser, nullptr, entry.input.c_str(), entry.input.size()); - TSNode root_node = ts_document_root_node(document); + TSNode root_node = ts_tree_root_node(tree); AssertThat(ts_node_end_byte(root_node), Equals(entry.input.size())); assert_consistent_tree_sizes(root_node); - const char *node_string = ts_node_string(root_node, document); + + const char *node_string = ts_node_string(root_node); string result(node_string); ts_free((void *)node_string); - ts_document_free(document); - AssertThat(result, Equals(entry.tree_string)); + + ts_tree_delete(tree); + ts_parser_delete(parser); AssertThat(record_alloc::outstanding_allocation_indices(), IsEmpty()); }); } diff --git a/test/runtime/document_test.cc b/test/runtime/document_test.cc deleted file mode 100644 index 0be03657..00000000 --- a/test/runtime/document_test.cc +++ /dev/null @@ -1,490 +0,0 @@ -#include "test_helper.h" -#include "runtime/alloc.h" -#include "helpers/record_alloc.h" -#include "helpers/stream_methods.h" -#include "helpers/tree_helpers.h" -#include "helpers/point_helpers.h" -#include "helpers/spy_logger.h" -#include "helpers/stderr_logger.h" -#include "helpers/spy_input.h" -#include "helpers/load_language.h" - -TSPoint point(size_t row, size_t column) { - return TSPoint{static_cast(row), static_cast(column)}; -} - -START_TEST - -describe("Document", [&]() { - TSDocument *document; - TSNode root; - - before_each([&]() { - record_alloc::start(); - document = ts_document_new(); - - if (getenv("TREE_SITTER_ENABLE_DEBUG_GRAPHS")) { - ts_document_print_debugging_graphs(document, true); - } - }); - - after_each([&]() { - ts_document_free(document); - record_alloc::stop(); - AssertThat(record_alloc::outstanding_allocation_indices(), IsEmpty()); - }); - - auto assert_node_string_equals = [&](TSNode node, const string &expected) { - char *str = ts_node_string(node, document); - string actual(str); - ts_free(str); - AssertThat(actual, Equals(expected)); - }; - - describe("set_input(input)", [&]() { - SpyInput *spy_input; - - before_each([&]() { - spy_input = new SpyInput("{\"key\": [null, 2]}", 3); - - ts_document_set_language(document, load_real_language("json")); - ts_document_set_input_string(document, "{\"key\": [1, 2]}"); - ts_document_parse(document); - - root = ts_document_root_node(document); - assert_node_string_equals( - root, - "(value (object (pair (string) (array (number) (number)))))"); - }); - - after_each([&]() { - delete spy_input; - }); - - it("handles both UTF8 and UTF16 encodings", [&]() { - const char16_t content[] = u"[true, false]"; - spy_input->content = string((const char *)content, sizeof(content)); - spy_input->encoding = TSInputEncodingUTF16; - - ts_document_set_input(document, spy_input->input()); - ts_document_invalidate(document); - ts_document_parse(document); - - root = ts_document_root_node(document); - assert_node_string_equals( - root, - "(value (array (true) (false)))"); - }); - - it("handles truncated UTF16 data", [&]() { - const char content[1] = { '\0' }; - spy_input->content = string(content, sizeof(content)); - spy_input->encoding = TSInputEncodingUTF16; - - ts_document_set_input(document, spy_input->input()); - ts_document_invalidate(document); - ts_document_parse(document); - }); - - it("measures columns in bytes", [&]() { - const char16_t content[] = u"[true, false]"; - spy_input->content = string((const char *)content, sizeof(content)); - spy_input->encoding = TSInputEncodingUTF16; - TSInput input = spy_input->input(); - - ts_document_set_input(document, input); - ts_document_invalidate(document); - ts_document_parse(document); - root = ts_document_root_node(document); - AssertThat(ts_node_end_point(root), Equals({0, 28})); - }); - - it("allows the input to be retrieved later", [&]() { - ts_document_set_input(document, spy_input->input()); - AssertThat(ts_document_input(document).payload, Equals(spy_input)); - AssertThat(ts_document_input(document).read, Equals(spy_input->input().read)); - AssertThat(ts_document_input(document).seek, Equals(spy_input->input().seek)); - }); - - it("does not assume that the document's text has changed", [&]() { - ts_document_set_input(document, spy_input->input()); - AssertThat(ts_document_root_node(document), Equals(root)); - AssertThat(ts_node_has_changes(root), IsFalse()); - AssertThat(spy_input->strings_read(), IsEmpty()); - }); - - it("reads text from the new input for future parses", [&]() { - ts_document_set_input(document, spy_input->input()); - - // Insert 'null', delete '1'. - TSInputEdit edit = {}; - edit.start_point.column = edit.start_byte = strlen("{\"key\": ["); - edit.extent_added.column = edit.bytes_added = 4; - edit.extent_removed.column = edit.bytes_removed = 1; - - ts_document_edit(document, edit); - ts_document_parse(document); - - TSNode new_root = ts_document_root_node(document); - assert_node_string_equals( - new_root, - "(value (object (pair (string) (array (null) (number)))))"); - AssertThat(spy_input->strings_read(), Equals(vector({" [null, 2" }))); - }); - - it("allows setting input string with length", [&]() { - const char content[] = { '1' }; - ts_document_set_input_string_with_length(document, content, 1); - ts_document_parse(document); - TSNode new_root = ts_document_root_node(document); - AssertThat(ts_node_end_byte(new_root), Equals(1)); - assert_node_string_equals( - new_root, - "(value (number))"); - }); - - it("reads from the new input correctly when the old input was blank", [&]() { - ts_document_set_input_string(document, ""); - ts_document_parse(document); - TSNode new_root = ts_document_root_node(document); - AssertThat(ts_node_end_byte(new_root), Equals(0)); - assert_node_string_equals( - new_root, - "(ERROR)"); - - ts_document_set_input_string(document, "1"); - ts_document_parse(document); - new_root = ts_document_root_node(document); - AssertThat(ts_node_end_byte(new_root), Equals(1)); - assert_node_string_equals( - new_root, - "(value (number))"); - }); - }); - - describe("set_language(language)", [&]() { - before_each([&]() { - ts_document_set_input_string(document, "{\"key\": [1, 2]}\n"); - }); - - it("uses the given language for future parses", [&]() { - ts_document_set_language(document, load_real_language("json")); - ts_document_parse(document); - - root = ts_document_root_node(document); - assert_node_string_equals( - root, - "(value (object (pair (string) (array (number) (number)))))"); - }); - - it("clears out any previous tree", [&]() { - ts_document_set_language(document, load_real_language("json")); - ts_document_parse(document); - - ts_document_set_language(document, load_real_language("javascript")); - AssertThat(ts_document_root_node(document).data, Equals(nullptr)); - - ts_document_parse(document); - root = ts_document_root_node(document); - assert_node_string_equals( - root, - "(program (expression_statement " - "(object (pair (string) (array (number) (number))))))"); - }); - - it("does not allow setting a language with a different version number", [&]() { - TSLanguage language = *load_real_language("json"); - AssertThat(ts_language_version(&language), Equals(TREE_SITTER_LANGUAGE_VERSION)); - - language.version++; - AssertThat(ts_language_version(&language), !Equals(TREE_SITTER_LANGUAGE_VERSION)); - - ts_document_set_language(document, &language); - AssertThat(ts_document_language(document), Equals(nullptr)); - }); - }); - - describe("set_logger(TSLogger)", [&]() { - SpyLogger *logger; - - before_each([&]() { - logger = new SpyLogger(); - ts_document_set_language(document, load_real_language("json")); - ts_document_set_input_string(document, "[1, 2]"); - }); - - after_each([&]() { - delete logger; - }); - - it("calls the debugger with a message for each parse action", [&]() { - ts_document_set_logger(document, logger->logger()); - ts_document_parse(document); - - AssertThat(logger->messages, Contains("new_parse")); - AssertThat(logger->messages, Contains("skip character:' '")); - AssertThat(logger->messages, Contains("consume character:'['")); - AssertThat(logger->messages, Contains("consume character:'1'")); - AssertThat(logger->messages, Contains("reduce sym:array, child_count:4")); - AssertThat(logger->messages, Contains("accept")); - }); - - it("allows the debugger to be retrieved later", [&]() { - ts_document_set_logger(document, logger->logger()); - AssertThat(ts_document_logger(document).payload, Equals(logger)); - }); - - describe("disabling debugging", [&]() { - before_each([&]() { - ts_document_set_logger(document, logger->logger()); - ts_document_set_logger(document, {NULL, NULL}); - }); - - it("does not call the debugger any more", [&]() { - ts_document_parse(document); - AssertThat(logger->messages, IsEmpty()); - }); - }); - }); - - describe("parse_and_get_changed_ranges()", [&]() { - SpyInput *input; - - before_each([&]() { - ts_document_set_language(document, load_real_language("javascript")); - input = new SpyInput("{a: null};\n", 3); - ts_document_set_input(document, input->input()); - ts_document_parse(document); - assert_node_string_equals( - ts_document_root_node(document), - "(program (expression_statement (object (pair (property_identifier) (null)))))"); - }); - - after_each([&]() { - delete input; - }); - - auto get_invalidated_ranges_for_edit = [&](std::function callback) -> vector { - TSInputEdit edit = callback(); - ts_document_edit(document, edit); - - TSRange *ranges; - uint32_t range_count = 0; - ts_document_parse_and_get_changed_ranges(document, &ranges, &range_count); - - vector result; - for (size_t i = 0; i < range_count; i++) { - result.push_back(ranges[i]); - } - ts_free(ranges); - return result; - }; - - it("reports changes when one token has been updated", [&]() { - // Replace `null` with `nothing` - auto ranges = get_invalidated_ranges_for_edit([&]() { - return input->replace(input->content.find("ull"), 1, "othing"); - }); - - AssertThat(ranges, Equals(vector({ - TSRange{ - point(0, input->content.find("nothing")), - point(0, input->content.find("}")) - }, - }))); - - // Replace `nothing` with `null` again - ranges = get_invalidated_ranges_for_edit([&]() { - return input->undo(); - }); - - AssertThat(ranges, Equals(vector({ - TSRange{ - point(0, input->content.find("null")), - point(0, input->content.find("}")) - }, - }))); - }); - - it("reports no changes when leading whitespace has changed (regression)", [&]() { - input->chars_per_chunk = 80; - - // Insert leading whitespace - auto ranges = get_invalidated_ranges_for_edit([&]() { - return input->replace(0, 0, "\n"); - }); - assert_node_string_equals( - ts_document_root_node(document), - "(program (expression_statement (object (pair (property_identifier) (null)))))"); - AssertThat(ranges, Equals(vector({}))); - - // Remove leading whitespace - ranges = get_invalidated_ranges_for_edit([&]() { - return input->undo(); - }); - assert_node_string_equals( - ts_document_root_node(document), - "(program (expression_statement (object (pair (property_identifier) (null)))))"); - AssertThat(ranges, Equals(vector({}))); - - // Insert leading whitespace again - ranges = get_invalidated_ranges_for_edit([&]() { - return input->replace(0, 0, "\n"); - }); - assert_node_string_equals( - ts_document_root_node(document), - "(program (expression_statement (object (pair (property_identifier) (null)))))"); - AssertThat(ranges, Equals(vector({}))); - }); - - it("reports changes when tokens have been appended", [&]() { - // Add a second key-value pair - auto ranges = get_invalidated_ranges_for_edit([&]() { - return input->replace(input->content.find("}"), 0, ", b: false"); - }); - - AssertThat(ranges, Equals(vector({ - TSRange{ - point(0, input->content.find(",")), - point(0, input->content.find("}")) - }, - }))); - - // Add a third key-value pair in between the first two - ranges = get_invalidated_ranges_for_edit([&]() { - return input->replace(input->content.find(", b"), 0, ", c: 1"); - }); - - assert_node_string_equals( - ts_document_root_node(document), - "(program (expression_statement (object " - "(pair (property_identifier) (null)) " - "(pair (property_identifier) (number)) " - "(pair (property_identifier) (false)))))"); - - AssertThat(ranges, Equals(vector({ - TSRange{ - point(0, input->content.find(", c")), - point(0, input->content.find(", b")) - }, - }))); - - // Delete the middle pair. - ranges = get_invalidated_ranges_for_edit([&]() { - return input->undo(); - }); - - assert_node_string_equals( - ts_document_root_node(document), - "(program (expression_statement (object " - "(pair (property_identifier) (null)) " - "(pair (property_identifier) (false)))))"); - - AssertThat(ranges, IsEmpty()); - - // Delete the second pair. - ranges = get_invalidated_ranges_for_edit([&]() { - return input->undo(); - }); - - assert_node_string_equals( - ts_document_root_node(document), - "(program (expression_statement (object " - "(pair (property_identifier) (null)))))"); - - AssertThat(ranges, IsEmpty()); - }); - - it("reports changes when trees have been wrapped", [&]() { - // Wrap the object in an assignment expression. - auto ranges = get_invalidated_ranges_for_edit([&]() { - return input->replace(input->content.find("null"), 0, "b === "); - }); - - assert_node_string_equals( - ts_document_root_node(document), - "(program (expression_statement (object " - "(pair (property_identifier) (binary_expression (identifier) (null))))))"); - - AssertThat(ranges, Equals(vector({ - TSRange{ - point(0, input->content.find("b ===")), - point(0, input->content.find("}")) - }, - }))); - }); - }); - - describe("parse_with_options(options)", [&]() { - it("halts as soon as an error is found if the halt_on_error flag is set", [&]() { - string input_string = "[1, null, error, 3]"; - ts_document_set_language(document, load_real_language("json")); - ts_document_set_input_string(document, input_string.c_str()); - - TSParseOptions options = {}; - options.changed_ranges = nullptr; - - options.halt_on_error = false; - ts_document_parse_with_options(document, options); - root = ts_document_root_node(document); - assert_node_string_equals( - root, - "(value (array (number) (null) (ERROR (UNEXPECTED 'e')) (number)))"); - - ts_document_invalidate(document); - - options.halt_on_error = true; - ts_document_parse_with_options(document, options); - root = ts_document_root_node(document); - assert_node_string_equals( - root, - "(ERROR (number) (null))"); - - AssertThat(ts_node_end_byte(root), Equals(input_string.size())); - }); - - it("does not insert missing tokens if the halt_on_error flag is set", [&]() { - string input_string = "[1, null, 3"; - ts_document_set_language(document, load_real_language("json")); - ts_document_set_input_string(document, input_string.c_str()); - - TSParseOptions options = {}; - options.changed_ranges = nullptr; - - options.halt_on_error = false; - ts_document_parse_with_options(document, options); - root = ts_document_root_node(document); - assert_node_string_equals( - root, - "(value (array (number) (null) (number) (MISSING)))"); - - ts_document_invalidate(document); - - options.halt_on_error = true; - ts_document_parse_with_options(document, options); - root = ts_document_root_node(document); - assert_node_string_equals( - root, - "(ERROR (number) (null) (number))"); - - AssertThat(ts_node_end_byte(root), Equals(input_string.size())); - }); - - it("can parse valid code with the halt_on_error flag set", [&]() { - string input_string = "[1, null, 3]"; - ts_document_set_language(document, load_real_language("json")); - ts_document_set_input_string(document, input_string.c_str()); - - TSParseOptions options = {}; - options.changed_ranges = nullptr; - options.halt_on_error = true; - ts_document_parse_with_options(document, options); - root = ts_document_root_node(document); - assert_node_string_equals( - root, - "(value (array (number) (null) (number)))"); - }); - }); -}); - -END_TEST diff --git a/test/runtime/language_test.cc b/test/runtime/language_test.cc index 7d9d51cc..747327c0 100644 --- a/test/runtime/language_test.cc +++ b/test/runtime/language_test.cc @@ -28,18 +28,17 @@ describe("Language", []() { } })JSON"); - TSDocument *document = ts_document_new(); + TSParser *parser = ts_parser_new(); const TSLanguage *language = load_test_language("aliased_rules", compile_result); - ts_document_set_language(document, language); - ts_document_set_input_string(document, "b"); - ts_document_parse(document); + ts_parser_set_language(parser, language); + TSTree *tree = ts_parser_parse_string(parser, nullptr, "b", 1); - TSNode root_node = ts_document_root_node(document); - char *string = ts_node_string(root_node, document); + TSNode root_node = ts_tree_root_node(tree); + char *string = ts_node_string(root_node); AssertThat(string, Equals("(a (c))")); TSNode aliased_node = ts_node_child(root_node, 0); - AssertThat(ts_node_type(aliased_node, document), Equals("c")); + AssertThat(ts_node_type(aliased_node), Equals("c")); TSSymbol aliased_symbol = ts_node_symbol(aliased_node); AssertThat(ts_language_symbol_count(language), IsGreaterThan(aliased_symbol)); @@ -47,7 +46,8 @@ describe("Language", []() { AssertThat(ts_language_symbol_type(language, aliased_symbol), Equals(TSSymbolTypeRegular)); ts_free(string); - ts_document_free(document); + ts_parser_delete(parser); + ts_tree_delete(tree); }); }); }); diff --git a/test/runtime/node_test.cc b/test/runtime/node_test.cc index ff12169f..4c306897 100644 --- a/test/runtime/node_test.cc +++ b/test/runtime/node_test.cc @@ -62,28 +62,29 @@ string grammar_with_aliases_and_extras = R"JSON({ })JSON"; describe("Node", [&]() { - TSDocument *document; + TSParser *parser; + TSTree *tree; TSNode root_node; before_each([&]() { record_alloc::start(); - document = ts_document_new(); - ts_document_set_language(document, load_real_language("json")); - ts_document_set_input_string(document, json_string.c_str()); - ts_document_parse(document); - root_node = ts_node_child(ts_document_root_node(document), 0); + parser = ts_parser_new(); + ts_parser_set_language(parser, load_real_language("json")); + tree = ts_parser_parse_string(parser, nullptr, json_string.c_str(), json_string.size()); + root_node = ts_node_child(ts_tree_root_node(tree), 0); }); after_each([&]() { - ts_document_free(document); + ts_parser_delete(parser); + ts_tree_delete(tree); record_alloc::stop(); AssertThat(record_alloc::outstanding_allocation_indices(), IsEmpty()); }); it("parses the example as expected (precondition)", [&]() { - char *node_string = ts_node_string(root_node, document); + char *node_string = ts_node_string(root_node); AssertThat(node_string, Equals( "(array " "(number) " @@ -94,7 +95,7 @@ describe("Node", [&]() { describe("named_child_count(), named_child(i)", [&]() { it("returns the named child node at the given index", [&]() { - AssertThat(ts_node_type(root_node, document), Equals("array")); + AssertThat(ts_node_type(root_node), Equals("array")); AssertThat(ts_node_named_child_count(root_node), Equals(3)); AssertThat(ts_node_start_byte(root_node), Equals(array_index)); @@ -106,9 +107,9 @@ describe("Node", [&]() { TSNode false_node = ts_node_named_child(root_node, 1); TSNode object_node = ts_node_named_child(root_node, 2); - AssertThat(ts_node_type(number_node, document), Equals("number")); - AssertThat(ts_node_type(false_node, document), Equals("false")); - AssertThat(ts_node_type(object_node, document), Equals("object")); + AssertThat(ts_node_type(number_node), Equals("number")); + AssertThat(ts_node_type(false_node), Equals("false")); + AssertThat(ts_node_type(object_node), Equals("object")); AssertThat(ts_node_start_byte(number_node), Equals(number_index)); AssertThat(ts_node_end_byte(number_node), Equals(number_end_index)); @@ -128,7 +129,7 @@ describe("Node", [&]() { TSNode pair_node = ts_node_named_child(object_node, 0); - AssertThat(ts_node_type(pair_node, document), Equals("pair")); + AssertThat(ts_node_type(pair_node), Equals("pair")); AssertThat(ts_node_start_byte(pair_node), Equals(string_index)); AssertThat(ts_node_end_byte(pair_node), Equals(null_end_index)); AssertThat(ts_node_start_point(pair_node), Equals({ 6, 4 })); @@ -138,8 +139,8 @@ describe("Node", [&]() { TSNode string_node = ts_node_named_child(pair_node, 0); TSNode null_node = ts_node_named_child(pair_node, 1); - AssertThat(ts_node_type(string_node, document), Equals("string")); - AssertThat(ts_node_type(null_node, document), Equals("null")); + AssertThat(ts_node_type(string_node), Equals("string")); + AssertThat(ts_node_type(null_node), Equals("null")); AssertThat(ts_node_start_byte(string_node), Equals(string_index)); AssertThat(ts_node_end_byte(string_node), Equals(string_end_index)); @@ -157,29 +158,33 @@ describe("Node", [&]() { AssertThat(ts_node_parent(number_node), Equals(root_node)); AssertThat(ts_node_parent(false_node), Equals(root_node)); AssertThat(ts_node_parent(object_node), Equals(root_node)); - AssertThat(ts_node_parent(ts_document_root_node(document)).data, Equals(nullptr)); + AssertThat(ts_node_parent(ts_tree_root_node(tree)).subtree, Equals(nullptr)); }); it("works correctly when the node contains aliased children and extras", [&]() { TSCompileResult compile_result = ts_compile_grammar(grammar_with_aliases_and_extras.c_str()); const TSLanguage *language = load_test_language("aliases_and_extras", compile_result); - ts_document_set_language(document, language); - ts_document_set_input_string(document, "b ... b ... b"); - ts_document_parse(document); - root_node = ts_document_root_node(document); + ts_parser_set_language(parser, language); - char *node_string = ts_node_string(root_node, document); + ts_tree_delete(tree); + tree = ts_parser_parse_string(parser, nullptr, "b ... b ... b", 13); + root_node = ts_tree_root_node(tree); + + char *node_string = ts_node_string(root_node); AssertThat(node_string, Equals("(a (b) (comment) (B) (comment) (b))")); ts_free(node_string); AssertThat(ts_node_named_child_count(root_node), Equals(5u)); - AssertThat(ts_node_type(ts_node_named_child(root_node, 0), document), Equals("b")); - AssertThat(ts_node_type(ts_node_named_child(root_node, 1), document), Equals("comment")); - AssertThat(ts_node_type(ts_node_named_child(root_node, 2), document), Equals("B")); - AssertThat(ts_node_type(ts_node_named_child(root_node, 3), document), Equals("comment")); - AssertThat(ts_node_type(ts_node_named_child(root_node, 4), document), Equals("b")); + AssertThat(ts_node_type(ts_node_named_child(root_node, 0)), Equals("b")); + AssertThat(ts_node_type(ts_node_named_child(root_node, 1)), Equals("comment")); + AssertThat(ts_node_type(ts_node_named_child(root_node, 2)), Equals("B")); + AssertThat(ts_node_type(ts_node_named_child(root_node, 3)), Equals("comment")); + AssertThat(ts_node_type(ts_node_named_child(root_node, 4)), Equals("b")); - AssertThat(ts_node_symbol(ts_node_named_child(root_node, 0)), !Equals(ts_node_symbol(ts_node_named_child(root_node, 2)))); + AssertThat( + ts_node_symbol(ts_node_named_child(root_node, 0)), + !Equals(ts_node_symbol(ts_node_named_child(root_node, 2))) + ); }); }); @@ -188,29 +193,29 @@ describe("Node", [&]() { TSNode child; child = ts_node_first_child_for_byte(root_node, array_index); - AssertThat(ts_node_type(child, document), Equals("[")); + AssertThat(ts_node_type(child), Equals("[")); child = ts_node_first_child_for_byte(root_node, number_index); - AssertThat(ts_node_type(child, document), Equals("number")); + AssertThat(ts_node_type(child), Equals("number")); child = ts_node_first_child_for_byte(root_node, number_end_index); - AssertThat(ts_node_type(child, document), Equals(",")); + AssertThat(ts_node_type(child), Equals(",")); child = ts_node_first_child_for_byte(root_node, number_end_index + 1); - AssertThat(ts_node_type(child, document), Equals("false")); + AssertThat(ts_node_type(child), Equals("false")); child = ts_node_first_child_for_byte(root_node, false_index - 1); - AssertThat(ts_node_type(child, document), Equals("false")); + AssertThat(ts_node_type(child), Equals("false")); child = ts_node_first_child_for_byte(root_node, false_index); - AssertThat(ts_node_type(child, document), Equals("false")); + AssertThat(ts_node_type(child), Equals("false")); child = ts_node_first_child_for_byte(root_node, false_index + 1); - AssertThat(ts_node_type(child, document), Equals("false")); + AssertThat(ts_node_type(child), Equals("false")); child = ts_node_first_child_for_byte(root_node, false_end_index); - AssertThat(ts_node_type(child, document), Equals(",")); + AssertThat(ts_node_type(child), Equals(",")); child = ts_node_first_child_for_byte(root_node, false_end_index); - AssertThat(ts_node_type(child, document), Equals(",")); + AssertThat(ts_node_type(child), Equals(",")); child = ts_node_first_child_for_byte(root_node, object_index); - AssertThat(ts_node_type(child, document), Equals("object")); + AssertThat(ts_node_type(child), Equals("object")); child = ts_node_first_child_for_byte(root_node, object_index + 1); - AssertThat(ts_node_type(child, document), Equals("object")); + AssertThat(ts_node_type(child), Equals("object")); child = ts_node_first_child_for_byte(root_node, object_end_index); - AssertThat(ts_node_type(child, document), Equals("]")); + AssertThat(ts_node_type(child), Equals("]")); }); }); @@ -219,39 +224,27 @@ describe("Node", [&]() { TSNode child; child = ts_node_first_named_child_for_byte(root_node, array_index); - AssertThat(ts_node_type(child, document), Equals("number")); + AssertThat(ts_node_type(child), Equals("number")); child = ts_node_first_named_child_for_byte(root_node, number_index); - AssertThat(ts_node_type(child, document), Equals("number")); + AssertThat(ts_node_type(child), Equals("number")); child = ts_node_first_named_child_for_byte(root_node, number_end_index); - AssertThat(ts_node_type(child, document), Equals("false")); + AssertThat(ts_node_type(child), Equals("false")); child = ts_node_first_named_child_for_byte(root_node, number_end_index + 1); - AssertThat(ts_node_type(child, document), Equals("false")); + AssertThat(ts_node_type(child), Equals("false")); child = ts_node_first_named_child_for_byte(root_node, false_index - 1); - AssertThat(ts_node_type(child, document), Equals("false")); + AssertThat(ts_node_type(child), Equals("false")); child = ts_node_first_named_child_for_byte(root_node, false_index); - AssertThat(ts_node_type(child, document), Equals("false")); + AssertThat(ts_node_type(child), Equals("false")); child = ts_node_first_named_child_for_byte(root_node, false_index + 1); - AssertThat(ts_node_type(child, document), Equals("false")); + AssertThat(ts_node_type(child), Equals("false")); child = ts_node_first_named_child_for_byte(root_node, false_end_index); - AssertThat(ts_node_type(child, document), Equals("object")); + AssertThat(ts_node_type(child), Equals("object")); child = ts_node_first_named_child_for_byte(root_node, object_index); - AssertThat(ts_node_type(child, document), Equals("object")); + AssertThat(ts_node_type(child), Equals("object")); child = ts_node_first_named_child_for_byte(root_node, object_index + 1); - AssertThat(ts_node_type(child, document), Equals("object")); + AssertThat(ts_node_type(child), Equals("object")); child = ts_node_first_named_child_for_byte(root_node, object_end_index); - AssertThat(child.data, Equals(nullptr)); - }); - }); - - describe("child_index()", [&]() { - it("returns the index of the node within its parent", [&]() { - AssertThat(ts_node_child_index(ts_node_child(root_node, 0)), Equals(0u)); - AssertThat(ts_node_child_index(ts_node_child(root_node, 1)), Equals(1u)); - AssertThat(ts_node_child_index(ts_node_child(root_node, 2)), Equals(2u)); - AssertThat(ts_node_child_index(ts_node_child(root_node, 3)), Equals(3u)); - AssertThat(ts_node_child_index(ts_node_child(root_node, 4)), Equals(4u)); - AssertThat(ts_node_child_index(ts_node_child(root_node, 5)), Equals(5u)); - AssertThat(ts_node_child_index(ts_node_child(root_node, 6)), Equals(6u)); + AssertThat(child.subtree, Equals(nullptr)); }); }); @@ -266,14 +259,14 @@ describe("Node", [&]() { TSNode child6 = ts_node_child(root_node, 5); TSNode child7 = ts_node_child(root_node, 6); - AssertThat(ts_node_type(root_node, document), Equals("array")); - AssertThat(ts_node_type(child1, document), Equals("[")); - AssertThat(ts_node_type(child2, document), Equals("number")); - AssertThat(ts_node_type(child3, document), Equals(",")); - AssertThat(ts_node_type(child4, document), Equals("false")); - AssertThat(ts_node_type(child5, document), Equals(",")); - AssertThat(ts_node_type(child6, document), Equals("object")); - AssertThat(ts_node_type(child7, document), Equals("]")); + AssertThat(ts_node_type(root_node), Equals("array")); + AssertThat(ts_node_type(child1), Equals("[")); + AssertThat(ts_node_type(child2), Equals("number")); + AssertThat(ts_node_type(child3), Equals(",")); + AssertThat(ts_node_type(child4), Equals("false")); + AssertThat(ts_node_type(child5), Equals(",")); + AssertThat(ts_node_type(child6), Equals("object")); + AssertThat(ts_node_type(child7), Equals("]")); AssertThat(ts_node_is_named(root_node), IsTrue()); AssertThat(ts_node_is_named(child1), IsFalse()); @@ -314,13 +307,13 @@ describe("Node", [&]() { TSNode grandchild3 = ts_node_child(pair, 1); TSNode grandchild4 = ts_node_child(pair, 2); - AssertThat(ts_node_type(left_brace, document), Equals("{")); - AssertThat(ts_node_type(pair, document), Equals("pair")); - AssertThat(ts_node_type(right_brace, document), Equals("}")); + AssertThat(ts_node_type(left_brace), Equals("{")); + AssertThat(ts_node_type(pair), Equals("pair")); + AssertThat(ts_node_type(right_brace), Equals("}")); - AssertThat(ts_node_type(grandchild2, document), Equals("string")); - AssertThat(ts_node_type(grandchild3, document), Equals(":")); - AssertThat(ts_node_type(grandchild4, document), Equals("null")); + AssertThat(ts_node_type(grandchild2), Equals("string")); + AssertThat(ts_node_type(grandchild3), Equals(":")); + AssertThat(ts_node_type(grandchild4), Equals("null")); AssertThat(ts_node_parent(grandchild2), Equals(pair)); AssertThat(ts_node_parent(grandchild3), Equals(pair)); @@ -335,7 +328,7 @@ describe("Node", [&]() { AssertThat(ts_node_parent(child5), Equals(root_node)); AssertThat(ts_node_parent(child6), Equals(root_node)); AssertThat(ts_node_parent(child7), Equals(root_node)); - AssertThat(ts_node_parent(ts_document_root_node(document)).data, Equals(nullptr)); + AssertThat(ts_node_parent(ts_tree_root_node(tree)).subtree, Equals(nullptr)); }); }); @@ -355,15 +348,16 @@ describe("Node", [&]() { TSNode brace_node2 = ts_node_child(object_node, 2); TSNode bracket_node2 = ts_node_child(root_node, 6); + AssertThat(ts_node_parent(bracket_node1), Equals(root_node)); AssertThat(ts_node_next_sibling(bracket_node1), Equals(number_node)); AssertThat(ts_node_next_sibling(number_node), Equals(array_comma_node1)); AssertThat(ts_node_next_sibling(array_comma_node1), Equals(false_node)); AssertThat(ts_node_next_sibling(false_node), Equals(array_comma_node2)); AssertThat(ts_node_next_sibling(array_comma_node2), Equals(object_node)); AssertThat(ts_node_next_sibling(object_node), Equals(bracket_node2)); - AssertThat(ts_node_next_sibling(bracket_node2).data, Equals(nullptr)); + AssertThat(ts_node_next_sibling(bracket_node2).subtree, Equals(nullptr)); - AssertThat(ts_node_prev_sibling(bracket_node1).data, Equals(nullptr)); + AssertThat(ts_node_prev_sibling(bracket_node1).subtree, Equals(nullptr)); AssertThat(ts_node_prev_sibling(number_node), Equals(bracket_node1)); AssertThat(ts_node_prev_sibling(array_comma_node1), Equals(number_node)); AssertThat(ts_node_prev_sibling(false_node), Equals(array_comma_node1)); @@ -373,24 +367,24 @@ describe("Node", [&]() { AssertThat(ts_node_next_sibling(brace_node1), Equals(pair_node)); AssertThat(ts_node_next_sibling(pair_node), Equals(brace_node2)); - AssertThat(ts_node_next_sibling(brace_node2).data, Equals(nullptr)); + AssertThat(ts_node_next_sibling(brace_node2).subtree, Equals(nullptr)); - AssertThat(ts_node_prev_sibling(brace_node1).data, Equals(nullptr)); + AssertThat(ts_node_prev_sibling(brace_node1).subtree, Equals(nullptr)); AssertThat(ts_node_prev_sibling(pair_node), Equals(brace_node1)); AssertThat(ts_node_prev_sibling(brace_node2), Equals(pair_node)); AssertThat(ts_node_next_sibling(string_node), Equals(colon_node)); AssertThat(ts_node_next_sibling(colon_node), Equals(null_node)); - AssertThat(ts_node_next_sibling(null_node).data, Equals(nullptr)); + AssertThat(ts_node_next_sibling(null_node).subtree, Equals(nullptr)); - AssertThat(ts_node_prev_sibling(string_node).data, Equals(nullptr)); + AssertThat(ts_node_prev_sibling(string_node).subtree, Equals(nullptr)); AssertThat(ts_node_prev_sibling(colon_node), Equals(string_node)); AssertThat(ts_node_prev_sibling(null_node), Equals(colon_node)); }); it("returns null when the node has no parent", [&]() { - AssertThat(ts_node_next_named_sibling(root_node).data, Equals(nullptr)); - AssertThat(ts_node_prev_named_sibling(root_node).data, Equals(nullptr)); + AssertThat(ts_node_next_named_sibling(root_node).subtree, Equals(nullptr)); + AssertThat(ts_node_prev_named_sibling(root_node).subtree, Equals(nullptr)); }); }); @@ -412,8 +406,8 @@ describe("Node", [&]() { }); it("returns null when the node has no parent", [&]() { - AssertThat(ts_node_next_named_sibling(root_node).data, Equals(nullptr)); - AssertThat(ts_node_prev_named_sibling(root_node).data, Equals(nullptr)); + AssertThat(ts_node_next_named_sibling(root_node).subtree, Equals(nullptr)); + AssertThat(ts_node_prev_named_sibling(root_node).subtree, Equals(nullptr)); }); }); @@ -421,14 +415,14 @@ describe("Node", [&]() { describe("when there is a leaf node that spans the given range exactly", [&]() { it("returns that leaf node", [&]() { TSNode leaf = ts_node_named_descendant_for_byte_range(root_node, string_index, string_end_index - 1); - AssertThat(ts_node_type(leaf, document), Equals("string")); + AssertThat(ts_node_type(leaf), Equals("string")); AssertThat(ts_node_start_byte(leaf), Equals(string_index)); AssertThat(ts_node_end_byte(leaf), Equals(string_end_index)); AssertThat(ts_node_start_point(leaf), Equals({ 6, 4 })); AssertThat(ts_node_end_point(leaf), Equals({ 6, 7 })); leaf = ts_node_named_descendant_for_byte_range(root_node, number_index, number_end_index - 1); - AssertThat(ts_node_type(leaf, document), Equals("number")); + AssertThat(ts_node_type(leaf), Equals("number")); AssertThat(ts_node_start_byte(leaf), Equals(number_index)); AssertThat(ts_node_end_byte(leaf), Equals(number_end_index)); AssertThat(ts_node_start_point(leaf), Equals({ 3, 2 })); @@ -439,14 +433,14 @@ describe("Node", [&]() { describe("when there is a leaf node that extends beyond the given range", [&]() { it("returns that leaf node", [&]() { TSNode leaf = ts_node_named_descendant_for_byte_range(root_node, string_index, string_index + 1); - AssertThat(ts_node_type(leaf, document), Equals("string")); + AssertThat(ts_node_type(leaf), Equals("string")); AssertThat(ts_node_start_byte(leaf), Equals(string_index)); AssertThat(ts_node_end_byte(leaf), Equals(string_end_index)); AssertThat(ts_node_start_point(leaf), Equals({ 6, 4 })); AssertThat(ts_node_end_point(leaf), Equals({ 6, 7 })); leaf = ts_node_named_descendant_for_byte_range(root_node, string_index + 1, string_index + 2); - AssertThat(ts_node_type(leaf, document), Equals("string")); + AssertThat(ts_node_type(leaf), Equals("string")); AssertThat(ts_node_start_byte(leaf), Equals(string_index)); AssertThat(ts_node_end_byte(leaf), Equals(string_end_index)); AssertThat(ts_node_start_point(leaf), Equals({ 6, 4 })); @@ -457,7 +451,7 @@ describe("Node", [&]() { describe("when there is no leaf node that spans the given range", [&]() { it("returns the smallest node that does span the range", [&]() { TSNode pair_node = ts_node_named_descendant_for_byte_range(root_node, string_index, string_index + 3); - AssertThat(ts_node_type(pair_node, document), Equals("pair")); + AssertThat(ts_node_type(pair_node), Equals("pair")); AssertThat(ts_node_start_byte(pair_node), Equals(string_index)); AssertThat(ts_node_end_byte(pair_node), Equals(null_end_index)); AssertThat(ts_node_start_point(pair_node), Equals({ 6, 4 })); @@ -466,7 +460,7 @@ describe("Node", [&]() { it("does not return invisible nodes (repeats)", [&]() { TSNode node = ts_node_named_descendant_for_byte_range(root_node, number_end_index, number_end_index + 1); - AssertThat(ts_node_type(node, document), Equals("array")); + AssertThat(ts_node_type(node), Equals("array")); AssertThat(ts_node_start_byte(node), Equals(array_index)); AssertThat(ts_node_end_byte(node), Equals(array_end_index)); AssertThat(ts_node_start_point(node), Equals({ 2, 0 })); @@ -478,14 +472,14 @@ describe("Node", [&]() { describe("descendant_for_byte_range(start, end)", [&]() { it("returns the smallest node that spans the given byte offsets", [&]() { TSNode node1 = ts_node_descendant_for_byte_range(root_node, colon_index, colon_index); - AssertThat(ts_node_type(node1, document), Equals(":")); + AssertThat(ts_node_type(node1), Equals(":")); AssertThat(ts_node_start_byte(node1), Equals(colon_index)); AssertThat(ts_node_end_byte(node1), Equals(colon_index + 1)); AssertThat(ts_node_start_point(node1), Equals({ 6, 7 })); AssertThat(ts_node_end_point(node1), Equals({ 6, 8 })); TSNode node2 = ts_node_descendant_for_byte_range(root_node, string_index + 2, string_index + 4); - AssertThat(ts_node_type(node2, document), Equals("pair")); + AssertThat(ts_node_type(node2), Equals("pair")); AssertThat(ts_node_start_byte(node2), Equals(string_index)); AssertThat(ts_node_end_byte(node2), Equals(null_end_index)); AssertThat(ts_node_start_point(node2), Equals({ 6, 4 })); @@ -494,16 +488,17 @@ describe("Node", [&]() { it("works in the presence of multi-byte characters", [&]() { string input_string = "[\"αβγδ\", \"αβγδ\"]"; - ts_document_set_input_string(document, input_string.c_str()); - ts_document_parse(document); - TSNode root_node = ts_document_root_node(document); + + ts_tree_delete(tree); + tree = ts_parser_parse_string(parser, nullptr, input_string.c_str(), input_string.size()); + TSNode root_node = ts_tree_root_node(tree); uint32_t comma_position = input_string.find(","); TSNode node1 = ts_node_descendant_for_byte_range(root_node, comma_position, comma_position); - AssertThat(ts_node_type(node1, document), Equals(",")); + AssertThat(ts_node_type(node1), Equals(",")); TSNode node2 = ts_node_descendant_for_byte_range(root_node, 6, 10); - AssertThat(ts_node_type(node2, document), Equals("string")); + AssertThat(ts_node_type(node2), Equals("string")); AssertThat(ts_node_start_byte(node2), Equals(1)); AssertThat(ts_node_end_byte(node2), Equals(11)); }); @@ -512,14 +507,14 @@ describe("Node", [&]() { describe("descendant_for_point_range(start, end)", [&]() { it("returns the smallest concrete node that spans the given range", [&]() { TSNode node1 = ts_node_descendant_for_point_range(root_node, {6, 7}, {6, 7}); - AssertThat(ts_node_type(node1, document), Equals(":")); + AssertThat(ts_node_type(node1), Equals(":")); AssertThat(ts_node_start_byte(node1), Equals(colon_index)); AssertThat(ts_node_end_byte(node1), Equals(colon_index + 1)); AssertThat(ts_node_start_point(node1), Equals({ 6, 7 })); AssertThat(ts_node_end_point(node1), Equals({ 6, 8 })); TSNode node2 = ts_node_descendant_for_point_range(root_node, {6, 6}, {6, 8}); - AssertThat(ts_node_type(node2, document), Equals("pair")); + AssertThat(ts_node_type(node2), Equals("pair")); AssertThat(ts_node_start_byte(node2), Equals(string_index)); AssertThat(ts_node_end_byte(node2), Equals(null_end_index)); AssertThat(ts_node_start_point(node2), Equals({ 6, 4 })); @@ -528,4 +523,132 @@ describe("Node", [&]() { }); }); +describe("TreeCursor", [&]() { + TSParser *parser; + TSTree *tree; + TSTreeCursor *cursor; + + before_each([&]() { + record_alloc::start(); + + parser = ts_parser_new(); + ts_parser_set_language(parser, load_real_language("json")); + tree = ts_parser_parse_string(parser, nullptr, json_string.c_str(), json_string.size()); + cursor = ts_tree_cursor_new(tree); + }); + + after_each([&]() { + ts_tree_delete(tree); + ts_tree_cursor_delete(cursor); + ts_parser_delete(parser); + + record_alloc::stop(); + AssertThat(record_alloc::outstanding_allocation_indices(), IsEmpty()); + }); + + it("can walk the tree", [&]() { + TSNode node = ts_tree_cursor_current_node(cursor); + AssertThat(ts_node_type(node), Equals("value")); + AssertThat(ts_node_start_byte(node), Equals(array_index)); + + AssertThat(ts_tree_cursor_goto_first_child(cursor), IsTrue()); + node = ts_tree_cursor_current_node(cursor); + AssertThat(ts_node_type(node), Equals("array")); + AssertThat(ts_node_start_byte(node), Equals(array_index)); + + AssertThat(ts_tree_cursor_goto_first_child(cursor), IsTrue()); + node = ts_tree_cursor_current_node(cursor); + AssertThat(ts_node_type(node), Equals("[")); + AssertThat(ts_node_start_byte(node), Equals(array_index)); + + // Cannot descend into a node with no children + AssertThat(ts_tree_cursor_goto_first_child(cursor), IsFalse()); + node = ts_tree_cursor_current_node(cursor); + AssertThat(ts_node_type(node), Equals("[")); + AssertThat(ts_node_start_byte(node), Equals(array_index)); + + AssertThat(ts_tree_cursor_goto_next_sibling(cursor), IsTrue()); + node = ts_tree_cursor_current_node(cursor); + AssertThat(ts_node_type(node), Equals("number")); + AssertThat(ts_node_start_byte(node), Equals(number_index)); + + AssertThat(ts_tree_cursor_goto_next_sibling(cursor), IsTrue()); + node = ts_tree_cursor_current_node(cursor); + AssertThat(ts_node_type(node), Equals(",")); + AssertThat(ts_node_start_byte(node), Equals(number_end_index)); + + AssertThat(ts_tree_cursor_goto_next_sibling(cursor), IsTrue()); + node = ts_tree_cursor_current_node(cursor); + AssertThat(ts_node_type(node), Equals("false")); + AssertThat(ts_node_start_byte(node), Equals(false_index)); + + AssertThat(ts_tree_cursor_goto_next_sibling(cursor), IsTrue()); + node = ts_tree_cursor_current_node(cursor); + AssertThat(ts_node_type(node), Equals(",")); + AssertThat(ts_node_start_byte(node), Equals(false_end_index)); + + AssertThat(ts_tree_cursor_goto_next_sibling(cursor), IsTrue()); + node = ts_tree_cursor_current_node(cursor); + AssertThat(ts_node_type(node), Equals("object")); + AssertThat(ts_node_start_byte(node), Equals(object_index)); + + AssertThat(ts_tree_cursor_goto_first_child(cursor), IsTrue()); + node = ts_tree_cursor_current_node(cursor); + AssertThat(ts_node_type(node), Equals("{")); + AssertThat(ts_node_start_byte(node), Equals(object_index)); + + AssertThat(ts_tree_cursor_goto_next_sibling(cursor), IsTrue()); + node = ts_tree_cursor_current_node(cursor); + AssertThat(ts_node_type(node), Equals("pair")); + AssertThat(ts_node_start_byte(node), Equals(string_index)); + + AssertThat(ts_tree_cursor_goto_first_child(cursor), IsTrue()); + node = ts_tree_cursor_current_node(cursor); + AssertThat(ts_node_type(node), Equals("string")); + AssertThat(ts_node_start_byte(node), Equals(string_index)); + + AssertThat(ts_tree_cursor_goto_next_sibling(cursor), IsTrue()); + node = ts_tree_cursor_current_node(cursor); + AssertThat(ts_node_type(node), Equals(":")); + AssertThat(ts_node_start_byte(node), Equals(string_end_index)); + + AssertThat(ts_tree_cursor_goto_next_sibling(cursor), IsTrue()); + node = ts_tree_cursor_current_node(cursor); + AssertThat(ts_node_type(node), Equals("null")); + AssertThat(ts_node_start_byte(node), Equals(null_index)); + + // Cannot move beyond a node with no next sibling + AssertThat(ts_tree_cursor_goto_next_sibling(cursor), IsFalse()); + node = ts_tree_cursor_current_node(cursor); + AssertThat(ts_node_type(node), Equals("null")); + AssertThat(ts_node_start_byte(node), Equals(null_index)); + + AssertThat(ts_tree_cursor_goto_parent(cursor), IsTrue()); + node = ts_tree_cursor_current_node(cursor); + AssertThat(ts_node_type(node), Equals("pair")); + AssertThat(ts_node_start_byte(node), Equals(string_index)); + + AssertThat(ts_tree_cursor_goto_parent(cursor), IsTrue()); + node = ts_tree_cursor_current_node(cursor); + AssertThat(ts_node_type(node), Equals("object")); + AssertThat(ts_node_start_byte(node), Equals(object_index)); + + AssertThat(ts_tree_cursor_goto_parent(cursor), IsTrue()); + node = ts_tree_cursor_current_node(cursor); + AssertThat(ts_node_type(node), Equals("array")); + AssertThat(ts_node_start_byte(node), Equals(array_index)); + + AssertThat(ts_tree_cursor_goto_parent(cursor), IsTrue()); + node = ts_tree_cursor_current_node(cursor); + AssertThat(ts_node_type(node), Equals("value")); + AssertThat(ts_node_start_byte(node), Equals(array_index)); + + // The root node doesn't have a parent. + AssertThat(ts_tree_cursor_goto_parent(cursor), IsFalse()); + node = ts_tree_cursor_current_node(cursor); + AssertThat(ts_node_type(node), Equals("value")); + AssertThat(ts_node_start_byte(node), Equals(array_index)); + }); +}); + END_TEST diff --git a/test/runtime/parser_test.cc b/test/runtime/parser_test.cc index 4481ece7..7d0b2d1d 100644 --- a/test/runtime/parser_test.cc +++ b/test/runtime/parser_test.cc @@ -1,17 +1,20 @@ #include "test_helper.h" #include "runtime/alloc.h" +#include "runtime/language.h" #include "helpers/record_alloc.h" #include "helpers/spy_input.h" #include "helpers/load_language.h" #include "helpers/record_alloc.h" #include "helpers/point_helpers.h" +#include "helpers/spy_logger.h" #include "helpers/stderr_logger.h" #include "helpers/dedent.h" START_TEST describe("Parser", [&]() { - TSDocument *document; + TSParser *parser; + TSTree *tree; SpyInput *input; TSNode root; size_t chunk_size; @@ -21,14 +24,16 @@ describe("Parser", [&]() { chunk_size = 3; input = nullptr; - document = ts_document_new(); + tree = nullptr; + parser = ts_parser_new(); if (getenv("TREE_SITTER_ENABLE_DEBUG_GRAPHS")) { - ts_document_print_debugging_graphs(document, true); + ts_parser_print_dot_graphs(parser, stderr); } }); after_each([&]() { - if (document) ts_document_free(document); + if (parser) ts_parser_delete(parser); + if (tree) ts_tree_delete(tree); if (input) delete input; record_alloc::stop(); @@ -37,10 +42,8 @@ describe("Parser", [&]() { auto set_text = [&](string text) { input = new SpyInput(text, chunk_size); - ts_document_set_input(document, input->input()); - ts_document_parse(document); - - root = ts_document_root_node(document); + tree = ts_parser_parse(parser, nullptr, input->input()); + root = ts_tree_root_node(tree); AssertThat(ts_node_end_byte(root), Equals(text.size())); input->clear(); }; @@ -48,10 +51,13 @@ describe("Parser", [&]() { auto replace_text = [&](size_t position, size_t length, string new_text) { size_t prev_size = ts_node_end_byte(root); - ts_document_edit(document, input->replace(position, length, new_text)); - ts_document_parse(document); + TSInputEdit edit = input->replace(position, length, new_text); + ts_tree_edit(tree, &edit); + TSTree *new_tree = ts_parser_parse(parser, tree, input->input()); + ts_tree_delete(tree); + tree = new_tree; - root = ts_document_root_node(document); + root = ts_tree_root_node(tree); size_t new_size = ts_node_end_byte(root); AssertThat(new_size, Equals(prev_size - length + new_text.size())); }; @@ -65,13 +71,16 @@ describe("Parser", [&]() { }; auto undo = [&]() { - ts_document_edit(document, input->undo()); - ts_document_parse(document); + TSInputEdit edit = input->undo(); + ts_tree_edit(tree, &edit); + TSTree *new_tree = ts_parser_parse(parser, tree, input->input()); + ts_tree_delete(tree); + tree = new_tree; }; auto assert_root_node = [&](const string &expected) { - TSNode node = ts_document_root_node(document); - char *node_string = ts_node_string(node, document); + TSNode node = ts_tree_root_node(tree); + char *node_string = ts_node_string(node); string actual(node_string); ts_free(node_string); AssertThat(actual, Equals(expected)); @@ -86,14 +95,12 @@ describe("Parser", [&]() { describe("handling errors", [&]() { describe("when there is an invalid substring right before a valid token", [&]() { it("computes the error node's size and position correctly", [&]() { - ts_document_set_language(document, load_real_language("json")); + ts_parser_set_language(parser, load_real_language("json")); set_text(" [123, @@@@@, true]"); - - assert_root_node( - "(value (array (number) (ERROR (UNEXPECTED '@')) (true)))"); + assert_root_node("(value (array (number) (ERROR (UNEXPECTED '@')) (true)))"); TSNode error = ts_node_named_child(ts_node_child(root, 0), 1); - AssertThat(ts_node_type(error, document), Equals("ERROR")); + AssertThat(ts_node_type(error), Equals("ERROR")); AssertThat(get_node_text(error), Equals("@@@@@,")); AssertThat(ts_node_child_count(error), Equals(2)); @@ -104,79 +111,75 @@ describe("Parser", [&]() { AssertThat(get_node_text(comma), Equals(",")); TSNode node_after_error = ts_node_next_named_sibling(error); - AssertThat(ts_node_type(node_after_error, document), Equals("true")); + AssertThat(ts_node_type(node_after_error), Equals("true")); AssertThat(get_node_text(node_after_error), Equals("true")); }); }); describe("when there is an unexpected string in the middle of a token", [&]() { it("computes the error node's size and position correctly", [&]() { - ts_document_set_language(document, load_real_language("json")); + ts_parser_set_language(parser, load_real_language("json")); set_text(" [123, faaaaalse, true]"); assert_root_node( "(value (array (number) (ERROR (UNEXPECTED 'a')) (true)))"); TSNode error = ts_node_named_child(ts_node_child(root, 0), 1); - AssertThat(ts_node_type(error, document), Equals("ERROR")); + AssertThat(ts_node_type(error), Equals("ERROR")); AssertThat(get_node_text(error), Equals("faaaaalse,")); AssertThat(ts_node_child_count(error), Equals(2)); TSNode garbage = ts_node_child(error, 0); - AssertThat(ts_node_type(garbage, document), Equals("ERROR")); + AssertThat(ts_node_type(garbage), Equals("ERROR")); AssertThat(get_node_text(garbage), Equals("faaaaalse")); TSNode comma = ts_node_child(error, 1); - AssertThat(ts_node_type(comma, document), Equals(",")); + AssertThat(ts_node_type(comma), Equals(",")); AssertThat(get_node_text(comma), Equals(",")); TSNode last = ts_node_next_named_sibling(error); - AssertThat(ts_node_type(last, document), Equals("true")); + AssertThat(ts_node_type(last), Equals("true")); AssertThat(ts_node_start_byte(last), Equals(strlen(" [123, faaaaalse, "))); }); }); describe("when there is one unexpected token between two valid tokens", [&]() { it("computes the error node's size and position correctly", [&]() { - ts_document_set_language(document, load_real_language("json")); + ts_parser_set_language(parser, load_real_language("json")); set_text(" [123, true false, true]"); - assert_root_node( - "(value (array (number) (true) (ERROR (false)) (true)))"); + assert_root_node("(value (array (number) (true) (ERROR (false)) (true)))"); TSNode error = ts_node_named_child(ts_node_child(root, 0), 2); - AssertThat(ts_node_type(error, document), Equals("ERROR")); + AssertThat(ts_node_type(error), Equals("ERROR")); AssertThat(get_node_text(error), Equals("false")); AssertThat(ts_node_child_count(error), Equals(1)); TSNode last = ts_node_next_named_sibling(error); - AssertThat(ts_node_type(last, document), Equals("true")); + AssertThat(ts_node_type(last), Equals("true")); AssertThat(get_node_text(last), Equals("true")); }); }); describe("when there is an unexpected string at the end of a token", [&]() { it("computes the error's size and position correctly", [&]() { - ts_document_set_language(document, load_real_language("json")); + ts_parser_set_language(parser, load_real_language("json")); set_text(" [123, \"hi\n, true]"); - - assert_root_node( - "(value (array (number) (ERROR (UNEXPECTED '\\n')) (true)))"); + assert_root_node("(value (array (number) (ERROR (UNEXPECTED '\\n')) (true)))"); }); }); describe("when there is an unterminated error", [&]() { it("maintains a consistent tree", [&]() { - ts_document_set_language(document, load_real_language("javascript")); + ts_parser_set_language(parser, load_real_language("javascript")); set_text("a; ' this string never ends"); - assert_root_node( - "(program (expression_statement (identifier)) (ERROR (UNEXPECTED EOF)))"); + assert_root_node("(program (expression_statement (identifier)) (ERROR (UNEXPECTED EOF)))"); }); }); describe("when there are extra tokens at the end of the viable prefix", [&]() { it("does not include them in the error node", [&]() { - ts_document_set_language(document, load_real_language("javascript")); + ts_parser_set_language(parser, load_real_language("javascript")); set_text( "var x;\n" "\n" @@ -186,7 +189,7 @@ describe("Parser", [&]() { ); TSNode error = ts_node_named_child(root, 1); - AssertThat(ts_node_type(error, document), Equals("ERROR")); + AssertThat(ts_node_type(error), Equals("ERROR")); AssertThat(ts_node_start_point(error), Equals({2, 0})); AssertThat(ts_node_end_point(error), Equals({2, 2})); }); @@ -196,59 +199,56 @@ describe("Parser", [&]() { char *string = (char *)malloc(1); string[0] = '\xdf'; - ts_document_set_language(document, load_real_language("json")); - ts_document_set_input_string_with_length(document, string, 1); - ts_document_parse(document); + ts_parser_set_language(parser, load_real_language("json")); + tree = ts_parser_parse_string(parser, nullptr, string, 1); free(string); - assert_root_node("(ERROR (UNEXPECTED INVALID))"); }); - }); - describe("handling extra tokens", [&]() { - describe("when the token appears as part of a grammar rule", [&]() { - it("incorporates it into the tree", [&]() { - ts_document_set_language(document, load_real_language("javascript")); - set_text("fn()\n"); + describe("when halt_on_error is set to true", [&]() { + it("halts as soon as an error is found if the halt_on_error flag is set", [&]() { + string input_string = "[1, null, error, 3]"; + ts_parser_set_language(parser, load_real_language("json")); - assert_root_node( - "(program (expression_statement (call_expression (identifier) (arguments))))"); + tree = ts_parser_parse_string(parser, nullptr, input_string.c_str(), input_string.size()); + root = ts_tree_root_node(tree); + assert_root_node("(value (array (number) (null) (ERROR (UNEXPECTED 'e')) (number)))"); + + ts_parser_halt_on_error(parser, true); + + ts_tree_delete(tree); + tree = ts_parser_parse_string(parser, nullptr, input_string.c_str(), input_string.size()); + root = ts_tree_root_node(tree); + assert_root_node("(ERROR (number) (null))"); + AssertThat(ts_node_end_byte(root), Equals(input_string.size())); }); - }); - describe("when the token appears somewhere else", [&]() { - it("incorporates it into the tree", [&]() { - ts_document_set_language(document, load_real_language("javascript")); - set_text( - "fn()\n" - " .otherFn();"); + it("does not insert missing tokens if the halt_on_error flag is set", [&]() { + string input_string = "[1, null, 3"; + ts_parser_set_language(parser, load_real_language("json")); - assert_root_node( - "(program (expression_statement (call_expression " - "(member_expression " - "(call_expression (identifier) (arguments)) " - "(property_identifier)) " - "(arguments))))"); + tree = ts_parser_parse_string(parser, nullptr, input_string.c_str(), input_string.size()); + root = ts_tree_root_node(tree); + assert_root_node("(value (array (number) (null) (number) (MISSING)))"); + + ts_parser_halt_on_error(parser, true); + + ts_tree_delete(tree); + tree = ts_parser_parse_string(parser, nullptr, input_string.c_str(), input_string.size()); + root = ts_tree_root_node(tree); + assert_root_node("(ERROR (number) (null) (number))"); + AssertThat(ts_node_end_byte(root), Equals(input_string.size())); }); - }); - describe("when several extra tokens appear in a row", [&]() { - it("incorporates them into the tree", [&]() { - ts_document_set_language(document, load_real_language("javascript")); - set_text( - "fn()\n\n" - "// This is a comment" - "\n\n" - ".otherFn();"); + it("can parse valid code with the halt_on_error flag set", [&]() { + string input_string = "[1, null, 3]"; + ts_parser_set_language(parser, load_real_language("json")); - assert_root_node( - "(program (expression_statement (call_expression " - "(member_expression " - "(call_expression (identifier) (arguments)) " - "(comment) " - "(property_identifier)) " - "(arguments))))"); + ts_parser_halt_on_error(parser, true); + tree = ts_parser_parse_string(parser, nullptr, input_string.c_str(), input_string.size()); + root = ts_tree_root_node(tree); + assert_root_node("(value (array (number) (null) (number)))"); }); }); }); @@ -256,7 +256,7 @@ describe("Parser", [&]() { describe("editing", [&]() { describe("creating new tokens near the end of the input", [&]() { it("updates the parse tree and re-reads only the changed portion of the text", [&]() { - ts_document_set_language(document, load_real_language("javascript")); + ts_parser_set_language(parser, load_real_language("javascript")); set_text("x * (100 + abc);"); assert_root_node( @@ -289,7 +289,7 @@ describe("Parser", [&]() { it("updates the parse tree and re-reads only the changed portion of the input", [&]() { chunk_size = 2; - ts_document_set_language(document, load_real_language("javascript")); + ts_parser_set_language(parser, load_real_language("javascript")); set_text("123 + 456 * (10 + x);"); assert_root_node( @@ -315,7 +315,7 @@ describe("Parser", [&]() { describe("introducing an error", [&]() { it("gives the error the right size", [&]() { - ts_document_set_language(document, load_real_language("javascript")); + ts_parser_set_language(parser, load_real_language("javascript")); set_text("var x = y;"); assert_root_node( @@ -338,7 +338,7 @@ describe("Parser", [&]() { describe("into the middle of an existing token", [&]() { it("updates the parse tree", [&]() { - ts_document_set_language(document, load_real_language("javascript")); + ts_parser_set_language(parser, load_real_language("javascript")); set_text("abc * 123;"); assert_root_node( @@ -350,14 +350,14 @@ describe("Parser", [&]() { "(program (expression_statement (binary_expression (identifier) (number))))"); TSNode node = ts_node_named_descendant_for_byte_range(root, 1, 1); - AssertThat(ts_node_type(node, document), Equals("identifier")); + AssertThat(ts_node_type(node), Equals("identifier")); AssertThat(ts_node_end_byte(node), Equals(strlen("abXYZc"))); }); }); describe("at the end of an existing token", [&]() { it("updates the parse tree", [&]() { - ts_document_set_language(document, load_real_language("javascript")); + ts_parser_set_language(parser, load_real_language("javascript")); set_text("abc * 123;"); assert_root_node( @@ -369,14 +369,14 @@ describe("Parser", [&]() { "(program (expression_statement (binary_expression (identifier) (number))))"); TSNode node = ts_node_named_descendant_for_byte_range(root, 1, 1); - AssertThat(ts_node_type(node, document), Equals("identifier")); + AssertThat(ts_node_type(node), Equals("identifier")); AssertThat(ts_node_end_byte(node), Equals(strlen("abcXYZ"))); }); }); describe("inserting text into a node containing a extra token", [&]() { it("updates the parse tree", [&]() { - ts_document_set_language(document, load_real_language("javascript")); + ts_parser_set_language(parser, load_real_language("javascript")); set_text("123 *\n" "// a-comment\n" "abc;"); @@ -403,7 +403,7 @@ describe("Parser", [&]() { describe("when a critical token is removed", [&]() { it("updates the parse tree, creating an error", [&]() { - ts_document_set_language(document, load_real_language("javascript")); + ts_parser_set_language(parser, load_real_language("javascript")); set_text("123 * 456; 789 * 123;"); assert_root_node( @@ -423,7 +423,7 @@ describe("Parser", [&]() { describe("with external tokens", [&]() { it("maintains the external scanner's state during incremental parsing", [&]() { - ts_document_set_language(document, load_real_language("python")); + ts_parser_set_language(parser, load_real_language("python")); string text = dedent(R"PYTHON( if a: print b @@ -451,7 +451,7 @@ describe("Parser", [&]() { }); it("does not try to reuse nodes that are within the edited region", [&]() { - ts_document_set_language(document, load_real_language("javascript")); + ts_parser_set_language(parser, load_real_language("javascript")); set_text("{ x: (b.c) };"); assert_root_node( @@ -464,23 +464,12 @@ describe("Parser", [&]() { "(program (expression_statement (object (pair " "(property_identifier) (member_expression (identifier) (property_identifier))))))"); }); - - it("updates the document's parse count", [&]() { - ts_document_set_language(document, load_real_language("javascript")); - AssertThat(ts_document_parse_count(document), Equals(0)); - - set_text("{ x: (b.c) };"); - AssertThat(ts_document_parse_count(document), Equals(1)); - - insert_text(strlen("{ x"), "yz"); - AssertThat(ts_document_parse_count(document), Equals(2)); - }); }); describe("lexing", [&]() { describe("handling tokens containing wildcard patterns (e.g. comments)", [&]() { - it("terminates them at the end of the document", [&]() { - ts_document_set_language(document, load_real_language("javascript")); + it("terminates them at the end of the string", [&]() { + ts_parser_set_language(parser, load_real_language("javascript")); set_text("x; // this is a comment"); assert_root_node( @@ -495,7 +484,7 @@ describe("Parser", [&]() { it("recognizes UTF8 characters as single characters", [&]() { // 'ΩΩΩ — ΔΔ'; - ts_document_set_language(document, load_real_language("javascript")); + ts_parser_set_language(parser, load_real_language("javascript")); set_text("'\u03A9\u03A9\u03A9 \u2014 \u0394\u0394';"); assert_root_node( @@ -507,14 +496,120 @@ describe("Parser", [&]() { it("handles non-UTF8 characters", [&]() { const char *string = "cons\xeb\x00e=ls\x83l6hi');\x0a"; - ts_document_set_language(document, load_real_language("javascript")); - ts_document_set_input_string(document, string); - ts_document_parse(document); - - TSNode root = ts_document_root_node(document); + ts_parser_set_language(parser, load_real_language("javascript")); + tree = ts_parser_parse_string(parser, nullptr, string, strlen(string)); + TSNode root = ts_tree_root_node(tree); AssertThat(ts_node_end_byte(root), Equals(strlen(string))); }); }); + + describe("handling TSInputs", [&]() { + SpyInput *spy_input; + + before_each([&]() { + spy_input = new SpyInput("{\"key\": [null, 2]}", 3); + ts_parser_set_language(parser, load_real_language("json")); + }); + + after_each([&]() { + delete spy_input; + }); + + it("handles UTF16 encodings", [&]() { + const char16_t content[] = u"[true, false]"; + spy_input->content = string((const char *)content, sizeof(content)); + spy_input->encoding = TSInputEncodingUTF16; + + tree = ts_parser_parse(parser, nullptr, spy_input->input()); + root = ts_tree_root_node(tree); + assert_root_node( + "(value (array (true) (false)))"); + }); + + it("handles truncated UTF16 data", [&]() { + const char content[1] = { '\0' }; + spy_input->content = string(content, sizeof(content)); + spy_input->encoding = TSInputEncodingUTF16; + + tree = ts_parser_parse(parser, nullptr, spy_input->input()); + }); + + it("measures columns in bytes", [&]() { + const char16_t content[] = u"[true, false]"; + spy_input->content = string((const char *)content, sizeof(content)); + spy_input->encoding = TSInputEncodingUTF16; + + tree = ts_parser_parse(parser, nullptr, spy_input->input()); + root = ts_tree_root_node(tree); + AssertThat(ts_node_end_point(root), Equals({0, 28})); + }); + }); + + describe("set_language(language)", [&]() { + string input_string = "{\"key\": [1, 2]}\n"; + + it("uses the given language for future parses", [&]() { + ts_parser_set_language(parser, load_real_language("json")); + tree = ts_parser_parse_string(parser, nullptr, input_string.c_str(), input_string.size()); + + root = ts_tree_root_node(tree); + assert_root_node( + "(value (object (pair (string) (array (number) (number)))))"); + }); + + it("does not allow setting a language with a different version number", [&]() { + TSLanguage language = *load_real_language("json"); + AssertThat(ts_language_version(&language), Equals(TREE_SITTER_LANGUAGE_VERSION)); + + language.version++; + AssertThat(ts_language_version(&language), !Equals(TREE_SITTER_LANGUAGE_VERSION)); + + AssertThat(ts_parser_set_language(parser, &language), IsFalse()); + AssertThat(ts_parser_language(parser), Equals(nullptr)); + }); + }); + + describe("set_logger(TSLogger)", [&]() { + SpyLogger *logger; + + before_each([&]() { + logger = new SpyLogger(); + ts_parser_set_language(parser, load_real_language("json")); + }); + + after_each([&]() { + delete logger; + }); + + it("calls the debugger with a message for each parse action", [&]() { + ts_parser_set_logger(parser, logger->logger()); + tree = ts_parser_parse_string(parser, nullptr, "[ 1, 2, 3 ]", 11); + + AssertThat(logger->messages, Contains("new_parse")); + AssertThat(logger->messages, Contains("skip character:' '")); + AssertThat(logger->messages, Contains("consume character:'['")); + AssertThat(logger->messages, Contains("consume character:'1'")); + AssertThat(logger->messages, Contains("reduce sym:array, child_count:4")); + AssertThat(logger->messages, Contains("accept")); + }); + + it("allows the debugger to be retrieved later", [&]() { + ts_parser_set_logger(parser, logger->logger()); + AssertThat(ts_parser_logger(parser).payload, Equals(logger)); + }); + + describe("disabling debugging", [&]() { + before_each([&]() { + ts_parser_set_logger(parser, logger->logger()); + ts_parser_set_logger(parser, {NULL, NULL}); + }); + + it("does not call the debugger any more", [&]() { + tree = ts_parser_parse_string(parser, nullptr, "{}", 2); + AssertThat(logger->messages, IsEmpty()); + }); + }); + }); }); END_TEST diff --git a/test/runtime/stack_test.cc b/test/runtime/stack_test.cc index d6cbcea0..68104935 100644 --- a/test/runtime/stack_test.cc +++ b/test/runtime/stack_test.cc @@ -4,7 +4,7 @@ #include "helpers/record_alloc.h" #include "helpers/stream_methods.h" #include "runtime/stack.h" -#include "runtime/tree.h" +#include "runtime/subtree.h" #include "runtime/length.h" #include "runtime/alloc.h" @@ -23,27 +23,31 @@ Length operator*(const Length &length, uint32_t factor) { return {length.bytes * factor, {0, length.extent.column * factor}}; } -void free_slice_array(TreePool *pool, StackSliceArray *slices) { +void free_slice_array(SubtreePool *pool, StackSliceArray *slices) { for (size_t i = 0; i < slices->size; i++) { StackSlice slice = slices->contents[i]; bool matches_prior_trees = false; for (size_t j = 0; j < i; j++) { StackSlice prior_slice = slices->contents[j]; - if (slice.trees.contents == prior_slice.trees.contents) { + if (slice.subtrees.contents == prior_slice.subtrees.contents) { matches_prior_trees = true; break; } } if (!matches_prior_trees) { - for (size_t j = 0; j < slice.trees.size; j++) - ts_tree_release(pool, slice.trees.contents[j]); - array_delete(&slice.trees); + for (size_t j = 0; j < slice.subtrees.size; j++) + ts_subtree_release(pool, slice.subtrees.contents[j]); + array_delete(&slice.subtrees); } } } +Subtree *mutate(const Subtree *subtree) { + return (Subtree *)subtree; +} + struct StackEntry { TSStateId state; size_t depth; @@ -54,9 +58,9 @@ vector get_stack_entries(Stack *stack, StackVersion version) { ts_stack_iterate( stack, version, - [](void *payload, TSStateId state, uint32_t tree_count) { + [](void *payload, TSStateId state, uint32_t subtree_count) { auto entries = static_cast *>(payload); - StackEntry entry = {state, tree_count}; + StackEntry entry = {state, subtree_count}; if (find(entries->begin(), entries->end(), entry) == entries->end()) { entries->push_back(entry); } @@ -68,39 +72,39 @@ START_TEST describe("Stack", [&]() { Stack *stack; - const size_t tree_count = 11; - Tree *trees[tree_count]; + const size_t subtree_count = 11; + const Subtree *subtrees[subtree_count]; Length tree_len = {3, {0, 3}}; - TreePool pool; + SubtreePool pool; before_each([&]() { record_alloc::start(); - ts_tree_pool_init(&pool); + pool = ts_subtree_pool_new(10); stack = ts_stack_new(&pool); TSLanguage dummy_language; TSSymbolMetadata symbol_metadata[50] = {}; dummy_language.symbol_metadata = symbol_metadata; - for (size_t i = 0; i < tree_count; i++) { - trees[i] = ts_tree_make_leaf(&pool, i, length_zero(), tree_len, &dummy_language); + for (size_t i = 0; i < subtree_count; i++) { + subtrees[i] = ts_subtree_new_leaf(&pool, i + 1, length_zero(), tree_len, &dummy_language); } }); after_each([&]() { ts_stack_delete(stack); - for (size_t i = 0; i < tree_count; i++) { - ts_tree_release(&pool, trees[i]); + for (size_t i = 0; i < subtree_count; i++) { + ts_subtree_release(&pool, subtrees[i]); } - ts_tree_pool_delete(&pool); + ts_subtree_pool_delete(&pool); record_alloc::stop(); AssertThat(record_alloc::outstanding_allocation_indices(), IsEmpty()); }); - auto push = [&](StackVersion version, Tree *tree, TSStateId state) { - ts_tree_retain(tree); + auto push = [&](StackVersion version, const Subtree *tree, TSStateId state) { + ts_subtree_retain(tree); ts_stack_push(stack, version, tree, false, state); }; @@ -111,17 +115,17 @@ describe("Stack", [&]() { AssertThat(ts_stack_position(stack, 0), Equals(length_zero())); // . <──0── A* - push(0, trees[0], stateA); + push(0, subtrees[0], stateA); AssertThat(ts_stack_state(stack, 0), Equals(stateA)); AssertThat(ts_stack_position(stack, 0), Equals(tree_len)); // . <──0── A <──1── B* - push(0, trees[1], stateB); + push(0, subtrees[1], stateB); AssertThat(ts_stack_state(stack, 0), Equals(stateB)); AssertThat(ts_stack_position(stack, 0), Equals(tree_len * 2)); // . <──0── A <──1── B <──2── C* - push(0, trees[2], stateC); + push(0, subtrees[2], stateC); AssertThat(ts_stack_state(stack, 0), Equals(stateC)); AssertThat(ts_stack_position(stack, 0), Equals(tree_len * 3)); @@ -139,7 +143,7 @@ describe("Stack", [&]() { // . <──0── A <─* // ↑ // └───* - push(0, trees[0], stateA); + push(0, subtrees[0], stateA); ts_stack_copy_version(stack, 0); }); @@ -147,10 +151,10 @@ describe("Stack", [&]() { // . <──0── A <──1── B <──3── D* // ↑ // └───2─── C <──4── D* - push(0, trees[1], stateB); - push(1, trees[2], stateC); - push(0, trees[3], stateD); - push(1, trees[4], stateD); + push(0, subtrees[1], stateB); + push(1, subtrees[2], stateC); + push(0, subtrees[3], stateD); + push(1, subtrees[4], stateD); // . <──0── A <──1── B <──3── D* // ↑ | @@ -170,8 +174,8 @@ describe("Stack", [&]() { // . <──0── A <──1── B* // ↑ // └───2─── C* - push(0, trees[1], stateB); - push(1, trees[2], stateC); + push(0, subtrees[1], stateB); + push(1, subtrees[2], stateC); AssertThat(ts_stack_merge(stack, 0, 1), IsFalse()); AssertThat(ts_stack_version_count(stack), Equals(2)); @@ -181,11 +185,11 @@ describe("Stack", [&]() { // . <──0── A <──1── B <────3──── D* // ↑ // └───2─── C <──4── D* - trees[3]->size = tree_len * 3; - push(0, trees[1], stateB); - push(1, trees[2], stateC); - push(0, trees[3], stateD); - push(1, trees[4], stateD); + mutate(subtrees[3])->size = tree_len * 3; + push(0, subtrees[1], stateB); + push(1, subtrees[2], stateC); + push(0, subtrees[3], stateD); + push(1, subtrees[4], stateD); AssertThat(ts_stack_merge(stack, 0, 1), IsFalse()); AssertThat(ts_stack_version_count(stack), Equals(2)); @@ -196,12 +200,12 @@ describe("Stack", [&]() { // . <──0── A <──1── B <──3── D <──5── E* // ↑ // └───2─── C <──4── D <──5── E* - push(0, trees[1], stateB); - push(1, trees[2], stateC); - push(0, trees[3], stateD); - push(1, trees[4], stateD); - push(0, trees[5], stateE); - push(1, trees[5], stateE); + push(0, subtrees[1], stateB); + push(1, subtrees[2], stateC); + push(0, subtrees[3], stateD); + push(1, subtrees[4], stateD); + push(0, subtrees[5], stateE); + push(1, subtrees[5], stateE); // . <──0── A <──1── B <──3── D <──5── E* // ↑ | @@ -224,12 +228,12 @@ describe("Stack", [&]() { // . <──0── A <────1──── B* // ↑ // └2─ A <──1── B* - trees[2]->extra = true; - trees[2]->size = tree_len * 0; + mutate(subtrees[2])->extra = true; + mutate(subtrees[2])->size = tree_len * 0; - push(0, trees[1], stateB); - push(1, trees[2], stateA); - push(1, trees[1], stateB); + push(0, subtrees[1], stateB); + push(1, subtrees[2], stateA); + push(1, subtrees[1], stateB); // . <──0── A <──1── B* AssertThat(ts_stack_merge(stack, 0, 1), IsTrue()); @@ -246,9 +250,9 @@ describe("Stack", [&]() { describe("pop_count(version, count)", [&]() { before_each([&]() { // . <──0── A <──1── B <──2── C* - push(0, trees[0], stateA); - push(0, trees[1], stateB); - push(0, trees[2], stateC); + push(0, subtrees[0], stateA); + push(0, subtrees[1], stateB); + push(0, subtrees[2], stateC); }); it("creates a new version with the given number of entries removed", [&]() { @@ -261,14 +265,14 @@ describe("Stack", [&]() { StackSlice slice = pop.contents[0]; AssertThat(slice.version, Equals(1)); - AssertThat(slice.trees, Equals(vector({ trees[1], trees[2] }))); + AssertThat(slice.subtrees, Equals(vector({ subtrees[1], subtrees[2] }))); AssertThat(ts_stack_state(stack, 1), Equals(stateA)); free_slice_array(&pool,&pop); }); - it("does not count 'extra' trees toward the given count", [&]() { - trees[1]->extra = true; + it("does not count 'extra' subtrees toward the given count", [&]() { + mutate(subtrees[1])->extra = true; // . <──0── A <──1── B <──2── C* // ↑ @@ -277,7 +281,7 @@ describe("Stack", [&]() { AssertThat(pop.size, Equals(1)); StackSlice slice = pop.contents[0]; - AssertThat(slice.trees, Equals(vector({ trees[0], trees[1], trees[2] }))); + AssertThat(slice.subtrees, Equals(vector({ subtrees[0], subtrees[1], subtrees[2] }))); AssertThat(ts_stack_state(stack, 1), Equals(1)); free_slice_array(&pool,&pop); @@ -288,14 +292,14 @@ describe("Stack", [&]() { // . <──0── A <──1── B <──2── C <──3── D <──10── I* // ↑ | // └───4─── E <──5── F <──6───┘ - push(0, trees[3], stateD); + push(0, subtrees[3], stateD); StackSliceArray pop = ts_stack_pop_count(stack, 0, 3); free_slice_array(&pool,&pop); - push(1, trees[4], stateE); - push(1, trees[5], stateF); - push(1, trees[6], stateD); + push(1, subtrees[4], stateE); + push(1, subtrees[5], stateF); + push(1, subtrees[6], stateD); ts_stack_merge(stack, 0, 1); - push(0, trees[10], stateI); + push(0, subtrees[10], stateI); AssertThat(ts_stack_version_count(stack), Equals(1)); AssertThat(get_stack_entries(stack, 0), Equals(vector({ @@ -322,11 +326,11 @@ describe("Stack", [&]() { StackSlice slice1 = pop.contents[0]; AssertThat(slice1.version, Equals(1)); - AssertThat(slice1.trees, Equals(vector({ trees[2], trees[3], trees[10] }))); + AssertThat(slice1.subtrees, Equals(vector({ subtrees[2], subtrees[3], subtrees[10] }))); StackSlice slice2 = pop.contents[1]; AssertThat(slice2.version, Equals(2)); - AssertThat(slice2.trees, Equals(vector({ trees[5], trees[6], trees[10] }))); + AssertThat(slice2.subtrees, Equals(vector({ subtrees[5], subtrees[6], subtrees[10] }))); AssertThat(ts_stack_version_count(stack), Equals(3)); AssertThat(get_stack_entries(stack, 0), Equals(vector({ @@ -366,7 +370,7 @@ describe("Stack", [&]() { StackSlice slice1 = pop.contents[0]; AssertThat(slice1.version, Equals(1)); - AssertThat(slice1.trees, Equals(vector({ trees[10] }))); + AssertThat(slice1.subtrees, Equals(vector({ subtrees[10] }))); AssertThat(ts_stack_version_count(stack), Equals(2)); AssertThat(ts_stack_state(stack, 0), Equals(stateI)); @@ -388,11 +392,11 @@ describe("Stack", [&]() { StackSlice slice1 = pop.contents[0]; AssertThat(slice1.version, Equals(1)); - AssertThat(slice1.trees, Equals(vector({ trees[1], trees[2], trees[3], trees[10] }))); + AssertThat(slice1.subtrees, Equals(vector({ subtrees[1], subtrees[2], subtrees[3], subtrees[10] }))); StackSlice slice2 = pop.contents[1]; AssertThat(slice2.version, Equals(1)); - AssertThat(slice2.trees, Equals(vector({ trees[4], trees[5], trees[6], trees[10] }))); + AssertThat(slice2.subtrees, Equals(vector({ subtrees[4], subtrees[5], subtrees[6], subtrees[10] }))); AssertThat(ts_stack_version_count(stack), Equals(2)); AssertThat(ts_stack_state(stack, 0), Equals(stateI)); @@ -403,7 +407,7 @@ describe("Stack", [&]() { }); describe("when there are three paths that lead to three different versions", [&]() { - it("returns three entries with different arrays of trees", [&]() { + it("returns three entries with different arrays of subtrees", [&]() { // . <──0── A <──1── B <──2── C <──3── D <──10── I* // ↑ | // ├───4─── E <──5── F <──6───┘ @@ -411,10 +415,10 @@ describe("Stack", [&]() { // └───7─── G <──8── H <──9───┘ StackSliceArray pop = ts_stack_pop_count(stack, 0, 4); free_slice_array(&pool,&pop); - push(1, trees[7], stateG); - push(1, trees[8], stateH); - push(1, trees[9], stateD); - push(1, trees[10], stateI); + push(1, subtrees[7], stateG); + push(1, subtrees[8], stateH); + push(1, subtrees[9], stateD); + push(1, subtrees[10], stateI); ts_stack_merge(stack, 0, 1); AssertThat(ts_stack_version_count(stack), Equals(1)); @@ -443,15 +447,15 @@ describe("Stack", [&]() { StackSlice slice1 = pop.contents[0]; AssertThat(slice1.version, Equals(1)); - AssertThat(slice1.trees, Equals(vector({ trees[3], trees[10] }))); + AssertThat(slice1.subtrees, Equals(vector({ subtrees[3], subtrees[10] }))); StackSlice slice2 = pop.contents[1]; AssertThat(slice2.version, Equals(2)); - AssertThat(slice2.trees, Equals(vector({ trees[6], trees[10] }))); + AssertThat(slice2.subtrees, Equals(vector({ subtrees[6], subtrees[10] }))); StackSlice slice3 = pop.contents[2]; AssertThat(slice3.version, Equals(3)); - AssertThat(slice3.trees, Equals(vector({ trees[9], trees[10] }))); + AssertThat(slice3.subtrees, Equals(vector({ subtrees[9], subtrees[10] }))); AssertThat(ts_stack_version_count(stack), Equals(4)); AssertThat(ts_stack_state(stack, 0), Equals(stateI)); @@ -467,12 +471,12 @@ describe("Stack", [&]() { describe("pop_pending(version)", [&]() { before_each([&]() { - push(0, trees[0], stateA); + push(0, subtrees[0], stateA); }); it("removes the top node from the stack if it was pushed in pending mode", [&]() { - ts_stack_push(stack, 0, trees[1], true, stateB); - ts_tree_retain(trees[1]); + ts_stack_push(stack, 0, subtrees[1], true, stateB); + ts_subtree_retain(subtrees[1]); StackSliceArray pop = ts_stack_pop_pending(stack, 0); AssertThat(pop.size, Equals(1)); @@ -485,20 +489,20 @@ describe("Stack", [&]() { free_slice_array(&pool,&pop); }); - it("skips entries whose trees are extra", [&]() { - ts_stack_push(stack, 0, trees[1], true, stateB); - ts_tree_retain(trees[1]); + it("skips entries whose subtrees are extra", [&]() { + ts_stack_push(stack, 0, subtrees[1], true, stateB); + ts_subtree_retain(subtrees[1]); - trees[2]->extra = true; - trees[3]->extra = true; + mutate(subtrees[2])->extra = true; + mutate(subtrees[3])->extra = true; - push(0, trees[2], stateB); - push(0, trees[3], stateB); + push(0, subtrees[2], stateB); + push(0, subtrees[3], stateB); StackSliceArray pop = ts_stack_pop_pending(stack, 0); AssertThat(pop.size, Equals(1)); - AssertThat(pop.contents[0].trees, Equals(vector({ trees[1], trees[2], trees[3] }))); + AssertThat(pop.contents[0].subtrees, Equals(vector({ subtrees[1], subtrees[2], subtrees[3] }))); AssertThat(get_stack_entries(stack, 0), Equals(vector({ {stateA, 0}, @@ -509,7 +513,7 @@ describe("Stack", [&]() { }); it("does nothing if the top node was not pushed in pending mode", [&]() { - push(0, trees[1], stateB); + push(0, subtrees[1], stateB); StackSliceArray pop = ts_stack_pop_pending(stack, 0); AssertThat(pop.size, Equals(0)); @@ -526,59 +530,59 @@ describe("Stack", [&]() { describe("setting external token state", [&]() { before_each([&]() { - trees[1]->has_external_tokens = true; - trees[2]->has_external_tokens = true; - ts_external_token_state_init(&trees[1]->external_token_state, NULL, 0); - ts_external_token_state_init(&trees[2]->external_token_state, NULL, 0); + mutate(subtrees[1])->has_external_tokens = true; + mutate(subtrees[2])->has_external_tokens = true; + ts_external_scanner_state_init(&mutate(subtrees[1])->external_scanner_state, NULL, 0); + ts_external_scanner_state_init(&mutate(subtrees[2])->external_scanner_state, NULL, 0); }); it("allows the state to be retrieved", [&]() { - AssertThat(ts_stack_last_external_token(stack, 0), Equals(nullptr)); + AssertThat(ts_stack_last_external_token(stack, 0), Equals(nullptr)); - ts_stack_set_last_external_token(stack, 0, trees[1]); - AssertThat(ts_stack_last_external_token(stack, 0), Equals(trees[1])); + ts_stack_set_last_external_token(stack, 0, subtrees[1]); + AssertThat(ts_stack_last_external_token(stack, 0), Equals(subtrees[1])); ts_stack_copy_version(stack, 0); - AssertThat(ts_stack_last_external_token(stack, 1), Equals(trees[1])); + AssertThat(ts_stack_last_external_token(stack, 1), Equals(subtrees[1])); - ts_stack_set_last_external_token(stack, 0, trees[2]); - AssertThat(ts_stack_last_external_token(stack, 0), Equals(trees[2])); + ts_stack_set_last_external_token(stack, 0, subtrees[2]); + AssertThat(ts_stack_last_external_token(stack, 0), Equals(subtrees[2])); }); it("does not merge stack versions with different external token states", [&]() { - ts_external_token_state_init(&trees[1]->external_token_state, "abcd", 2); - ts_external_token_state_init(&trees[2]->external_token_state, "ABCD", 2); + ts_external_scanner_state_init(&mutate(subtrees[1])->external_scanner_state, "abcd", 2); + ts_external_scanner_state_init(&mutate(subtrees[2])->external_scanner_state, "ABCD", 2); ts_stack_copy_version(stack, 0); - push(0, trees[0], 5); - push(1, trees[0], 5); + push(0, subtrees[0], 5); + push(1, subtrees[0], 5); - ts_stack_set_last_external_token(stack, 0, trees[1]); - ts_stack_set_last_external_token(stack, 1, trees[2]); + ts_stack_set_last_external_token(stack, 0, subtrees[1]); + ts_stack_set_last_external_token(stack, 1, subtrees[2]); AssertThat(ts_stack_merge(stack, 0, 1), IsFalse()); }); it("merges stack versions with identical external token states", [&]() { - ts_external_token_state_init(&trees[1]->external_token_state, "abcd", 2); - ts_external_token_state_init(&trees[2]->external_token_state, "abcd", 2); + ts_external_scanner_state_init(&mutate(subtrees[1])->external_scanner_state, "abcd", 2); + ts_external_scanner_state_init(&mutate(subtrees[2])->external_scanner_state, "abcd", 2); ts_stack_copy_version(stack, 0); - push(0, trees[0], 5); - push(1, trees[0], 5); + push(0, subtrees[0], 5); + push(1, subtrees[0], 5); - ts_stack_set_last_external_token(stack, 0, trees[1]); - ts_stack_set_last_external_token(stack, 1, trees[2]); + ts_stack_set_last_external_token(stack, 0, subtrees[1]); + ts_stack_set_last_external_token(stack, 1, subtrees[2]); AssertThat(ts_stack_merge(stack, 0, 1), IsTrue()); }); it("does not distinguish between an *empty* external token state and *no* external token state", [&]() { ts_stack_copy_version(stack, 0); - push(0, trees[0], 5); - push(1, trees[0], 5); + push(0, subtrees[0], 5); + push(1, subtrees[0], 5); - ts_stack_set_last_external_token(stack, 0, trees[1]); + ts_stack_set_last_external_token(stack, 0, subtrees[1]); AssertThat(ts_stack_merge(stack, 0, 1), IsTrue()); }); @@ -595,7 +599,7 @@ std::ostream &operator<<(std::ostream &stream, const StackEntry &entry) { return stream << "{" << entry.state << ", " << entry.depth << "}"; } -std::ostream &operator<<(std::ostream &stream, const TreeArray &array) { +std::ostream &operator<<(std::ostream &stream, const SubtreeArray &array) { stream << "["; bool first = true; for (size_t i = 0; i < array.size; i++) { diff --git a/test/runtime/subtree_test.cc b/test/runtime/subtree_test.cc new file mode 100644 index 00000000..3c1c9ad2 --- /dev/null +++ b/test/runtime/subtree_test.cc @@ -0,0 +1,505 @@ +#include "test_helper.h" +#include "helpers/tree_helpers.h" +#include "helpers/point_helpers.h" +#include "runtime/subtree.h" +#include "runtime/length.h" + +void assert_consistent(const Subtree *tree) { + if (tree->child_count == 0) return; + AssertThat(tree->children.contents[0]->padding, Equals(tree->padding)); + + Length total_children_size = length_zero(); + for (size_t i = 0; i < tree->children.size; i++) { + const Subtree *child = tree->children.contents[i]; + assert_consistent(child); + total_children_size = length_add(total_children_size, ts_subtree_total_size(child)); + } + + AssertThat(total_children_size, Equals(ts_subtree_total_size(tree))); +}; + +START_TEST + +describe("Subtree", []() { + enum { + symbol1 = 1, + symbol2, + symbol3, + symbol4, + symbol5, + symbol6, + symbol7, + symbol8, + symbol9, + }; + + TSSymbolMetadata metadata_list[30] = {}; + + TSLanguage language; + language.symbol_metadata = metadata_list; + + SubtreePool pool; + + before_each([&]() { + pool = ts_subtree_pool_new(10); + }); + + after_each([&]() { + ts_subtree_pool_delete(&pool); + }); + + describe("make_leaf", [&]() { + it("does not mark the tree as fragile", [&]() { + const Subtree *tree = ts_subtree_new_leaf(&pool, symbol1, {2, {0, 1}}, {5, {0, 4}}, &language); + AssertThat(tree->fragile_left, IsFalse()); + AssertThat(tree->fragile_right, IsFalse()); + + ts_subtree_release(&pool, tree); + }); + }); + + describe("make_error", [&]() { + it("marks the tree as fragile", [&]() { + const Subtree *error_tree = ts_subtree_new_error( + &pool, + length_zero(), + length_zero(), + 'z', + &language + ); + + AssertThat(error_tree->fragile_left, IsTrue()); + AssertThat(error_tree->fragile_right, IsTrue()); + + ts_subtree_release(&pool, error_tree); + }); + }); + + describe("make_node", [&]() { + const Subtree *tree1, *tree2, *parent1; + + before_each([&]() { + tree1 = ts_subtree_new_leaf(&pool, symbol1, {2, {0, 1}}, {5, {0, 4}}, &language); + tree2 = ts_subtree_new_leaf(&pool, symbol2, {1, {0, 1}}, {3, {0, 3}}, &language); + + ts_subtree_retain(tree1); + ts_subtree_retain(tree2); + parent1 = ts_subtree_new_node(&pool, symbol3, tree_array({ + tree1, + tree2, + }), 0, &language); + }); + + after_each([&]() { + ts_subtree_release(&pool, tree1); + ts_subtree_release(&pool, tree2); + ts_subtree_release(&pool, parent1); + }); + + it("computes its size and padding based on its child nodes", [&]() { + AssertThat(parent1->size.bytes, Equals( + tree1->size.bytes + tree2->padding.bytes + tree2->size.bytes + )); + AssertThat(parent1->padding.bytes, Equals(tree1->padding.bytes)); + }); + + describe("when the first node is fragile on the left side", [&]() { + const Subtree *parent; + + before_each([&]() { + Subtree *mutable_tree1 = (Subtree *)tree1; + mutable_tree1->fragile_left = true; + mutable_tree1->extra = true; + + ts_subtree_retain(tree1); + ts_subtree_retain(tree2); + parent = ts_subtree_new_node(&pool, symbol3, tree_array({ + tree1, + tree2, + }), 0, &language); + }); + + after_each([&]() { + ts_subtree_release(&pool, parent); + }); + + it("records that it is fragile on the left side", [&]() { + AssertThat(parent->fragile_left, IsTrue()); + }); + }); + + describe("when the last node is fragile on the right side", [&]() { + const Subtree *parent; + + before_each([&]() { + Subtree *mutable_tree2 = (Subtree *)tree2; + mutable_tree2->fragile_right = true; + mutable_tree2->extra = true; + + ts_subtree_retain(tree1); + ts_subtree_retain(tree2); + parent = ts_subtree_new_node(&pool, symbol3, tree_array({ + tree1, + tree2, + }), 0, &language); + }); + + after_each([&]() { + ts_subtree_release(&pool, parent); + }); + + it("records that it is fragile on the right side", [&]() { + AssertThat(parent->fragile_right, IsTrue()); + }); + }); + + describe("when the outer nodes aren't fragile on their outer side", [&]() { + const Subtree *parent; + + before_each([&]() { + Subtree *mutable_tree1 = (Subtree *)tree1; + Subtree *mutable_tree2 = (Subtree *)tree2; + mutable_tree1->fragile_right = true; + mutable_tree2->fragile_left = true; + + ts_subtree_retain(tree1); + ts_subtree_retain(tree2); + parent = ts_subtree_new_node(&pool, symbol3, tree_array({ + tree1, + tree2, + }), 0, &language); + }); + + after_each([&]() { + ts_subtree_release(&pool, parent); + }); + + it("records that it is not fragile", [&]() { + AssertThat(parent->fragile_left, IsFalse()); + AssertThat(parent->fragile_right, IsFalse()); + }); + }); + }); + + describe("edit", [&]() { + const Subtree *tree; + + before_each([&]() { + tree = ts_subtree_new_node(&pool, symbol1, tree_array({ + ts_subtree_new_leaf(&pool, symbol2, {2, {0, 2}}, {3, {0, 3}}, &language), + ts_subtree_new_leaf(&pool, symbol3, {2, {0, 2}}, {3, {0, 3}}, &language), + ts_subtree_new_leaf(&pool, symbol4, {2, {0, 2}}, {3, {0, 3}}, &language), + }), 0, &language); + + AssertThat(tree->padding, Equals({2, {0, 2}})); + AssertThat(tree->size, Equals({13, {0, 13}})); + }); + + after_each([&]() { + ts_subtree_release(&pool, tree); + }); + + it("does not mutate the argument", [&]() { + TSInputEdit edit; + edit.start_byte = 1; + edit.bytes_removed = 0; + edit.bytes_added = 1; + edit.start_point = {0, 1}; + edit.extent_removed = {0, 0}; + edit.extent_added = {0, 1}; + + ts_subtree_retain(tree); + const Subtree *new_tree = ts_subtree_edit(tree, &edit, &pool); + assert_consistent(tree); + assert_consistent(new_tree); + + AssertThat(tree->has_changes, IsFalse()); + AssertThat(tree->padding, Equals({2, {0, 2}})); + AssertThat(tree->size, Equals({13, {0, 13}})); + + AssertThat(tree->children.contents[0]->has_changes, IsFalse()); + AssertThat(tree->children.contents[0]->padding, Equals({2, {0, 2}})); + AssertThat(tree->children.contents[0]->size, Equals({3, {0, 3}})); + + AssertThat(tree->children.contents[1]->has_changes, IsFalse()); + AssertThat(tree->children.contents[1]->padding, Equals({2, {0, 2}})); + AssertThat(tree->children.contents[1]->size, Equals({3, {0, 3}})); + + ts_subtree_release(&pool, new_tree); + }); + + describe("edits within a tree's padding", [&]() { + it("resizes the padding of the tree and its leftmost descendants", [&]() { + TSInputEdit edit; + edit.start_byte = 1; + edit.bytes_removed = 0; + edit.bytes_added = 1; + edit.start_point = {0, 1}; + edit.extent_removed = {0, 0}; + edit.extent_added = {0, 1}; + tree = ts_subtree_edit(tree, &edit, &pool); + assert_consistent(tree); + + AssertThat(tree->has_changes, IsTrue()); + AssertThat(tree->padding, Equals({3, {0, 3}})); + AssertThat(tree->size, Equals({13, {0, 13}})); + + AssertThat(tree->children.contents[0]->has_changes, IsTrue()); + AssertThat(tree->children.contents[0]->padding, Equals({3, {0, 3}})); + AssertThat(tree->children.contents[0]->size, Equals({3, {0, 3}})); + + AssertThat(tree->children.contents[1]->has_changes, IsFalse()); + AssertThat(tree->children.contents[1]->padding, Equals({2, {0, 2}})); + AssertThat(tree->children.contents[1]->size, Equals({3, {0, 3}})); + }); + }); + + describe("edits that start in a tree's padding but extend into its content", [&]() { + it("shrinks the content to compensate for the expanded padding", [&]() { + TSInputEdit edit; + edit.start_byte = 1; + edit.bytes_removed = 3; + edit.bytes_added = 4; + edit.start_point = {0, 1}; + edit.extent_removed = {0, 3}; + edit.extent_added = {0, 4}; + tree = ts_subtree_edit(tree, &edit, &pool); + assert_consistent(tree); + + AssertThat(tree->has_changes, IsTrue()); + AssertThat(tree->padding, Equals({5, {0, 5}})); + AssertThat(tree->size, Equals({11, {0, 11}})); + + AssertThat(tree->children.contents[0]->has_changes, IsTrue()); + AssertThat(tree->children.contents[0]->padding, Equals({5, {0, 5}})); + AssertThat(tree->children.contents[0]->size, Equals({1, {0, 1}})); + }); + }); + + describe("insertions at the edge of a tree's padding", [&]() { + it("expands the tree's padding", [&]() { + TSInputEdit edit; + edit.start_byte = 2; + edit.bytes_removed = 0; + edit.bytes_added = 2; + edit.start_point = {0, 2}; + edit.extent_removed = {0, 0}; + edit.extent_added = {0, 2}; + tree = ts_subtree_edit(tree, &edit, &pool); + assert_consistent(tree); + + AssertThat(tree->has_changes, IsTrue()); + AssertThat(tree->padding, Equals({4, {0, 4}})); + AssertThat(tree->size, Equals({13, {0, 13}})); + + AssertThat(tree->children.contents[0]->has_changes, IsTrue()); + AssertThat(tree->children.contents[0]->padding, Equals({4, {0, 4}})); + AssertThat(tree->children.contents[0]->size, Equals({3, {0, 3}})); + + AssertThat(tree->children.contents[1]->has_changes, IsFalse()); + }); + }); + + describe("replacements starting at the edge of a tree's padding", [&]() { + it("resizes the content and not the padding", [&]() { + TSInputEdit edit; + edit.start_byte = 2; + edit.bytes_removed = 2; + edit.bytes_added = 5; + edit.start_point = {0, 2}; + edit.extent_removed = {0, 2}; + edit.extent_added = {0, 5}; + tree = ts_subtree_edit(tree, &edit, &pool); + assert_consistent(tree); + + AssertThat(tree->has_changes, IsTrue()); + AssertThat(tree->padding, Equals({2, {0, 2}})); + AssertThat(tree->size, Equals({16, {0, 16}})); + + AssertThat(tree->children.contents[0]->has_changes, IsTrue()); + AssertThat(tree->children.contents[0]->padding, Equals({2, {0, 2}})); + AssertThat(tree->children.contents[0]->size, Equals({6, {0, 6}})); + + AssertThat(tree->children.contents[1]->has_changes, IsFalse()); + }); + }); + + describe("deletions that span more than one child node", [&]() { + it("shrinks subsequent child nodes", [&]() { + TSInputEdit edit; + edit.start_byte = 1; + edit.bytes_removed = 10; + edit.bytes_added = 3; + edit.start_point = {0, 1}; + edit.extent_removed = {0, 10}; + edit.extent_added = {0, 3}; + tree = ts_subtree_edit(tree, &edit, &pool); + assert_consistent(tree); + + AssertThat(tree->has_changes, IsTrue()); + AssertThat(tree->padding, Equals({4, {0, 4}})); + AssertThat(tree->size, Equals({4, {0, 4}})); + + AssertThat(tree->children.contents[0]->has_changes, IsTrue()); + AssertThat(tree->children.contents[0]->padding, Equals({4, {0, 4}})); + AssertThat(tree->children.contents[0]->size, Equals({0, {0, 0}})); + + AssertThat(tree->children.contents[1]->has_changes, IsTrue()); + AssertThat(tree->children.contents[1]->padding, Equals({0, {0, 0}})); + AssertThat(tree->children.contents[1]->size, Equals({0, {0, 0}})); + + AssertThat(tree->children.contents[2]->has_changes, IsTrue()); + AssertThat(tree->children.contents[2]->padding, Equals({1, {0, 1}})); + AssertThat(tree->children.contents[2]->size, Equals({3, {0, 3}})); + }); + }); + + describe("edits within a tree's range of scanned bytes", [&]() { + it("marks preceding trees as changed", [&]() { + Subtree *mutable_child = (Subtree *)tree->children.contents[0]; + mutable_child->bytes_scanned = 7; + + TSInputEdit edit; + edit.start_byte = 6; + edit.bytes_removed = 1; + edit.bytes_added = 1; + edit.start_point = {0, 6}; + edit.extent_removed = {0, 1}; + edit.extent_added = {0, 1}; + tree = ts_subtree_edit(tree, &edit, &pool); + assert_consistent(tree); + + AssertThat(tree->children.contents[0]->has_changes, IsTrue()); + }); + }); + }); + + describe("eq", [&]() { + const Subtree *leaf; + + before_each([&]() { + leaf = ts_subtree_new_leaf(&pool, symbol1, {2, {0, 1}}, {5, {0, 4}}, &language); + }); + + after_each([&]() { + ts_subtree_release(&pool, leaf); + }); + + it("returns true for identical trees", [&]() { + const Subtree *leaf_copy = ts_subtree_new_leaf(&pool, symbol1, {2, {1, 1}}, {5, {1, 4}}, &language); + AssertThat(ts_subtree_eq(leaf, leaf_copy), IsTrue()); + + const Subtree *parent = ts_subtree_new_node(&pool, symbol2, tree_array({ + leaf, + leaf_copy, + }), 0, &language); + ts_subtree_retain(leaf); + ts_subtree_retain(leaf_copy); + + const Subtree *parent_copy = ts_subtree_new_node(&pool, symbol2, tree_array({ + leaf, + leaf_copy, + }), 0, &language); + ts_subtree_retain(leaf); + ts_subtree_retain(leaf_copy); + + AssertThat(ts_subtree_eq(parent, parent_copy), IsTrue()); + + ts_subtree_release(&pool, leaf_copy); + ts_subtree_release(&pool, parent); + ts_subtree_release(&pool, parent_copy); + }); + + it("returns false for trees with different symbols", [&]() { + const Subtree *different_leaf = ts_subtree_new_leaf( + &pool, + leaf->symbol + 1, + leaf->padding, + leaf->size, + &language + ); + + AssertThat(ts_subtree_eq(leaf, different_leaf), IsFalse()); + ts_subtree_release(&pool, different_leaf); + }); + + it("returns false for trees with different options", [&]() { + const Subtree *different_leaf = ts_subtree_new_leaf( + &pool, leaf->symbol, leaf->padding, leaf->size, &language + ); + ((Subtree *)different_leaf)->visible = !leaf->visible; + AssertThat(ts_subtree_eq(leaf, different_leaf), IsFalse()); + ts_subtree_release(&pool, different_leaf); + }); + + it("returns false for trees with different paddings or sizes", [&]() { + const Subtree *different_leaf = ts_subtree_new_leaf(&pool, leaf->symbol, {}, leaf->size, &language); + AssertThat(ts_subtree_eq(leaf, different_leaf), IsFalse()); + ts_subtree_release(&pool, different_leaf); + + different_leaf = ts_subtree_new_leaf(&pool, symbol1, leaf->padding, {}, &language); + AssertThat(ts_subtree_eq(leaf, different_leaf), IsFalse()); + ts_subtree_release(&pool, different_leaf); + }); + + it("returns false for trees with different children", [&]() { + const Subtree *leaf2 = ts_subtree_new_leaf(&pool, symbol2, {1, {0, 1}}, {3, {0, 3}}, &language); + + const Subtree *parent = ts_subtree_new_node(&pool, symbol2, tree_array({ + leaf, + leaf2, + }), 0, &language); + ts_subtree_retain(leaf); + ts_subtree_retain(leaf2); + + const Subtree *different_parent = ts_subtree_new_node(&pool, symbol2, tree_array({ + leaf2, + leaf, + }), 0, &language); + ts_subtree_retain(leaf2); + ts_subtree_retain(leaf); + + AssertThat(ts_subtree_eq(different_parent, parent), IsFalse()); + AssertThat(ts_subtree_eq(parent, different_parent), IsFalse()); + + ts_subtree_release(&pool, leaf2); + ts_subtree_release(&pool, parent); + ts_subtree_release(&pool, different_parent); + }); + }); + + describe("last_external_token", [&]() { + Length padding = {1, {0, 1}}; + Length size = {2, {0, 2}}; + + auto make_external = [](const Subtree *tree) { + ((Subtree *)tree)->has_external_tokens = true; + return tree; + }; + + it("returns the last serialized external token state in the given tree", [&]() { + const Subtree *tree1, *tree2, *tree3, *tree4, *tree5, *tree6, *tree7, *tree8, *tree9; + + tree1 = ts_subtree_new_node(&pool, symbol1, tree_array({ + (tree2 = ts_subtree_new_node(&pool, symbol2, tree_array({ + (tree3 = make_external(ts_subtree_new_leaf(&pool, symbol3, padding, size, &language))), + (tree4 = ts_subtree_new_leaf(&pool, symbol4, padding, size, &language)), + (tree5 = ts_subtree_new_leaf(&pool, symbol5, padding, size, &language)), + }), 0, &language)), + (tree6 = ts_subtree_new_node(&pool, symbol6, tree_array({ + (tree7 = ts_subtree_new_node(&pool, symbol7, tree_array({ + (tree8 = ts_subtree_new_leaf(&pool, symbol8, padding, size, &language)), + }), 0, &language)), + (tree9 = ts_subtree_new_leaf(&pool, symbol9, padding, size, &language)), + }), 0, &language)), + }), 0, &language); + + auto token = ts_subtree_last_external_token(tree1); + AssertThat(token, Equals(tree3)); + + ts_subtree_release(&pool, tree1); + }); + }); +}); + +END_TEST diff --git a/test/runtime/tree_test.cc b/test/runtime/tree_test.cc index 8669b6c1..b599f568 100644 --- a/test/runtime/tree_test.cc +++ b/test/runtime/tree_test.cc @@ -1,473 +1,257 @@ #include "test_helper.h" +#include +#include "runtime/alloc.h" +#include "helpers/record_alloc.h" +#include "helpers/stream_methods.h" #include "helpers/tree_helpers.h" #include "helpers/point_helpers.h" -#include "runtime/tree.h" -#include "runtime/length.h" +#include "helpers/spy_logger.h" +#include "helpers/stderr_logger.h" +#include "helpers/spy_input.h" +#include "helpers/load_language.h" +#include "helpers/random_helpers.h" +#include "helpers/read_test_entries.h" +#include "helpers/encoding_helpers.h" +#include "helpers/tree_helpers.h" -void assert_consistent(const Tree *tree) { - if (tree->child_count == 0) - return; - AssertThat(tree->children.contents[0]->padding, Equals(tree->padding)); - - Length total_children_size = length_zero(); - for (size_t i = 0; i < tree->children.size; i++) { - Tree *child = tree->children.contents[i]; - AssertThat(child->context.offset, Equals(total_children_size)); - assert_consistent(child); - total_children_size = length_add(total_children_size, ts_tree_total_size(child)); - } - - AssertThat(total_children_size, Equals(ts_tree_total_size(tree))); -}; +TSPoint point(uint32_t row, uint32_t column) { + TSPoint result = {row, column}; + return result; +} START_TEST -describe("Tree", []() { - enum { - symbol1 = 1, - symbol2, - symbol3, - symbol4, - symbol5, - symbol6, - symbol7, - symbol8, - symbol9, - }; - - TSSymbolMetadata metadata_list[30] = {}; - - TSLanguage language; - language.symbol_metadata = metadata_list; - - TreePool pool; +describe("Tree", [&]() { + TSParser *parser; + SpyInput *input; + TSTree *tree; before_each([&]() { - ts_tree_pool_init(&pool); + record_alloc::start(true); + parser = ts_parser_new(); + tree = nullptr; + input = nullptr; }); after_each([&]() { - ts_tree_pool_delete(&pool); + if (tree) ts_tree_delete(tree); + if (input) delete input; + ts_parser_delete(parser); + AssertThat(record_alloc::outstanding_allocation_indices(), IsEmpty()); }); - describe("make_leaf", [&]() { - it("does not mark the tree as fragile", [&]() { - Tree *tree = ts_tree_make_leaf(&pool, symbol1, {2, {0, 1}}, {5, {0, 4}}, &language); - AssertThat(tree->fragile_left, IsFalse()); - AssertThat(tree->fragile_right, IsFalse()); + auto assert_root_node = [&](const string &expected) { + TSNode node = ts_tree_root_node(tree); + char *node_string = ts_node_string(node); + string actual(node_string); + ts_free(node_string); + AssertThat(actual, Equals(expected)); + }; - ts_tree_release(&pool, tree); + describe("copy()", [&]() { + it("returns a tree that can be safely used while the current tree is edited", [&]() { + const TSLanguage *language = load_real_language("javascript"); + ts_parser_set_language(parser, language); + string source_code = examples_for_language("javascript")[0].input; + + input = new SpyInput(source_code, 32); + TSTree *original_tree = ts_parser_parse(parser, nullptr, input->input()); + + vector> new_trees; + for (unsigned i = 0; i < 8; i++) { + TSTree *tree_copy = ts_tree_copy(original_tree); + new_trees.push_back(std::async([i, tree_copy, &source_code, language]() { + Generator random(TREE_SITTER_SEED + i); + + TSTree *tree = tree_copy; + TSParser *parser = ts_parser_new(); + ts_parser_set_language(parser, language); + SpyInput *input = new SpyInput(source_code, 1024); + + for (unsigned j = 0; j < 10; j++) { + random.sleep_some(); + + size_t edit_position = random(utf8_char_count(input->content)); + size_t deletion_size = random(utf8_char_count(input->content) - edit_position); + string inserted_text = random.words(random(4) + 1); + + TSInputEdit edit = input->replace(edit_position, deletion_size, inserted_text); + ts_tree_edit(tree, &edit); + + TSTree *new_tree = ts_parser_parse(parser, tree, input->input()); + ts_tree_delete(tree); + tree = new_tree; + } + + ts_parser_delete(parser); + delete input; + + return tree; + })); + } + + ts_tree_delete(original_tree); + + for (auto &future : new_trees) { + future.wait(); + TSTree *new_tree = future.get(); + assert_consistent_tree_sizes(ts_tree_root_node(new_tree)); + ts_tree_delete(new_tree); + } }); }); - describe("make_error", [&]() { - it("marks the tree as fragile", [&]() { - Tree *error_tree = ts_tree_make_error( - &pool, - length_zero(), - length_zero(), - 'z', - &language + describe("get_changed_ranges()", [&]() { + before_each([&]() { + ts_parser_set_language(parser, load_real_language("javascript")); + input = new SpyInput("{a: null};\n", 3); + tree = ts_parser_parse(parser, nullptr, input->input()); + + assert_root_node( + "(program (expression_statement (object (pair (property_identifier) (null)))))" ); - - AssertThat(error_tree->fragile_left, IsTrue()); - AssertThat(error_tree->fragile_right, IsTrue()); - - ts_tree_release(&pool, error_tree); - }); - }); - - describe("make_node", [&]() { - Tree *tree1, *tree2, *parent1; - - before_each([&]() { - tree1 = ts_tree_make_leaf(&pool, symbol1, {2, {0, 1}}, {5, {0, 4}}, &language); - tree2 = ts_tree_make_leaf(&pool, symbol2, {1, {0, 1}}, {3, {0, 3}}, &language); - - ts_tree_retain(tree1); - ts_tree_retain(tree2); - parent1 = ts_tree_make_node(&pool, symbol3, tree_array({ - tree1, - tree2, - }), 0, &language); }); - after_each([&]() { - ts_tree_release(&pool, tree1); - ts_tree_release(&pool, tree2); - ts_tree_release(&pool, parent1); - }); - - it("computes its size and padding based on its child nodes", [&]() { - AssertThat(parent1->size.bytes, Equals( - tree1->size.bytes + tree2->padding.bytes + tree2->size.bytes - )); - AssertThat(parent1->padding.bytes, Equals(tree1->padding.bytes)); - }); - - describe("when the first node is fragile on the left side", [&]() { - Tree *parent; - - before_each([&]() { - tree1->fragile_left = true; - tree1->extra = true; - - ts_tree_retain(tree1); - ts_tree_retain(tree2); - parent = ts_tree_make_node(&pool, symbol3, tree_array({ - tree1, - tree2, - }), 0, &language); - }); - - after_each([&]() { - ts_tree_release(&pool, parent); - }); - - it("records that it is fragile on the left side", [&]() { - AssertThat(parent->fragile_left, IsTrue()); - }); - }); - - describe("when the last node is fragile on the right side", [&]() { - Tree *parent; - - before_each([&]() { - tree2->fragile_right = true; - tree2->extra = true; - - ts_tree_retain(tree1); - ts_tree_retain(tree2); - parent = ts_tree_make_node(&pool, symbol3, tree_array({ - tree1, - tree2, - }), 0, &language); - }); - - after_each([&]() { - ts_tree_release(&pool, parent); - }); - - it("records that it is fragile on the right side", [&]() { - AssertThat(parent->fragile_right, IsTrue()); - }); - }); - - describe("when the outer nodes aren't fragile on their outer side", [&]() { - Tree *parent; - - before_each([&]() { - tree1->fragile_right = true; - tree2->fragile_left = true; - - ts_tree_retain(tree1); - ts_tree_retain(tree2); - parent = ts_tree_make_node(&pool, symbol3, tree_array({ - tree1, - tree2, - }), 0, &language); - }); - - after_each([&]() { - ts_tree_release(&pool, parent); - }); - - it("records that it is not fragile", [&]() { - AssertThat(parent->fragile_left, IsFalse()); - AssertThat(parent->fragile_right, IsFalse()); - }); - }); - }); - - describe("edit", [&]() { - Tree *tree = nullptr; - - before_each([&]() { - tree = ts_tree_make_node(&pool, symbol1, tree_array({ - ts_tree_make_leaf(&pool, symbol2, {2, {0, 2}}, {3, {0, 3}}, &language), - ts_tree_make_leaf(&pool, symbol3, {2, {0, 2}}, {3, {0, 3}}, &language), - ts_tree_make_leaf(&pool, symbol4, {2, {0, 2}}, {3, {0, 3}}, &language), - }), 0, &language); - - AssertThat(tree->padding, Equals({2, {0, 2}})); - AssertThat(tree->size, Equals({13, {0, 13}})); - }); - - after_each([&]() { - ts_tree_release(&pool, tree); - }); - - describe("edits within a tree's padding", [&]() { - it("resizes the padding of the tree and its leftmost descendants", [&]() { - TSInputEdit edit; - edit.start_byte = 1; - edit.bytes_removed = 0; - edit.bytes_added = 1; - edit.start_point = {0, 1}; - edit.extent_removed = {0, 0}; - edit.extent_added = {0, 1}; - ts_tree_edit(tree, &edit); - assert_consistent(tree); - - AssertThat(tree->has_changes, IsTrue()); - AssertThat(tree->padding, Equals({3, {0, 3}})); - AssertThat(tree->size, Equals({13, {0, 13}})); - - AssertThat(tree->children.contents[0]->has_changes, IsTrue()); - AssertThat(tree->children.contents[0]->padding, Equals({3, {0, 3}})); - AssertThat(tree->children.contents[0]->size, Equals({3, {0, 3}})); - - AssertThat(tree->children.contents[1]->has_changes, IsFalse()); - AssertThat(tree->children.contents[1]->padding, Equals({2, {0, 2}})); - AssertThat(tree->children.contents[1]->size, Equals({3, {0, 3}})); - }); - }); - - describe("edits that start in a tree's padding but extend into its content", [&]() { - it("shrinks the content to compensate for the expanded padding", [&]() { - TSInputEdit edit; - edit.start_byte = 1; - edit.bytes_removed = 3; - edit.bytes_added = 4; - edit.start_point = {0, 1}; - edit.extent_removed = {0, 3}; - edit.extent_added = {0, 4}; - ts_tree_edit(tree, &edit); - assert_consistent(tree); - - AssertThat(tree->has_changes, IsTrue()); - AssertThat(tree->padding, Equals({5, {0, 5}})); - AssertThat(tree->size, Equals({11, {0, 11}})); - - AssertThat(tree->children.contents[0]->has_changes, IsTrue()); - AssertThat(tree->children.contents[0]->padding, Equals({5, {0, 5}})); - AssertThat(tree->children.contents[0]->size, Equals({1, {0, 1}})); - }); - }); - - describe("insertions at the edge of a tree's padding", [&]() { - it("expands the tree's padding", [&]() { - TSInputEdit edit; - edit.start_byte = 2; - edit.bytes_removed = 0; - edit.bytes_added = 2; - edit.start_point = {0, 2}; - edit.extent_removed = {0, 0}; - edit.extent_added = {0, 2}; - ts_tree_edit(tree, &edit); - assert_consistent(tree); - - assert_consistent(tree); - - AssertThat(tree->has_changes, IsTrue()); - AssertThat(tree->padding, Equals({4, {0, 4}})); - AssertThat(tree->size, Equals({13, {0, 13}})); - - AssertThat(tree->children.contents[0]->has_changes, IsTrue()); - AssertThat(tree->children.contents[0]->padding, Equals({4, {0, 4}})); - AssertThat(tree->children.contents[0]->size, Equals({3, {0, 3}})); - - AssertThat(tree->children.contents[1]->has_changes, IsFalse()); - }); - }); - - describe("replacements starting at the edge of a tree's padding", [&]() { - it("resizes the content and not the padding", [&]() { - TSInputEdit edit; - edit.start_byte = 2; - edit.bytes_removed = 2; - edit.bytes_added = 5; - edit.start_point = {0, 2}; - edit.extent_removed = {0, 2}; - edit.extent_added = {0, 5}; - ts_tree_edit(tree, &edit); - assert_consistent(tree); - - AssertThat(tree->has_changes, IsTrue()); - AssertThat(tree->padding, Equals({2, {0, 2}})); - AssertThat(tree->size, Equals({16, {0, 16}})); - - AssertThat(tree->children.contents[0]->has_changes, IsTrue()); - AssertThat(tree->children.contents[0]->padding, Equals({2, {0, 2}})); - AssertThat(tree->children.contents[0]->size, Equals({6, {0, 6}})); - - AssertThat(tree->children.contents[1]->has_changes, IsFalse()); - }); - }); - - describe("deletions that span more than one child node", [&]() { - it("shrinks subsequent child nodes", [&]() { - TSInputEdit edit; - edit.start_byte = 1; - edit.bytes_removed = 10; - edit.bytes_added = 3; - edit.start_point = {0, 1}; - edit.extent_removed = {0, 10}; - edit.extent_added = {0, 3}; - ts_tree_edit(tree, &edit); - assert_consistent(tree); - - assert_consistent(tree); - - AssertThat(tree->has_changes, IsTrue()); - AssertThat(tree->padding, Equals({4, {0, 4}})); - AssertThat(tree->size, Equals({4, {0, 4}})); - - AssertThat(tree->children.contents[0]->has_changes, IsTrue()); - AssertThat(tree->children.contents[0]->padding, Equals({4, {0, 4}})); - AssertThat(tree->children.contents[0]->size, Equals({0, {0, 0}})); - - AssertThat(tree->children.contents[1]->has_changes, IsTrue()); - AssertThat(tree->children.contents[1]->padding, Equals({0, {0, 0}})); - AssertThat(tree->children.contents[1]->size, Equals({0, {0, 0}})); - - AssertThat(tree->children.contents[2]->has_changes, IsTrue()); - AssertThat(tree->children.contents[2]->padding, Equals({1, {0, 1}})); - AssertThat(tree->children.contents[2]->size, Equals({3, {0, 3}})); - }); - }); - - describe("edits within a tree's range of scanned bytes", [&]() { - it("marks preceding trees as changed", [&]() { - tree->children.contents[0]->bytes_scanned = 7; - - TSInputEdit edit; - edit.start_byte = 6; - edit.bytes_removed = 1; - edit.bytes_added = 1; - edit.start_point = {0, 6}; - edit.extent_removed = {0, 1}; - edit.extent_added = {0, 1}; - ts_tree_edit(tree, &edit); - assert_consistent(tree); - - AssertThat(tree->children.contents[0]->has_changes, IsTrue()); - }); - }); - }); - - describe("eq", [&]() { - Tree *leaf; - - before_each([&]() { - leaf = ts_tree_make_leaf(&pool, symbol1, {2, {0, 1}}, {5, {0, 4}}, &language); - }); - - after_each([&]() { - ts_tree_release(&pool, leaf); - }); - - it("returns true for identical trees", [&]() { - Tree *leaf_copy = ts_tree_make_leaf(&pool, symbol1, {2, {1, 1}}, {5, {1, 4}}, &language); - AssertThat(ts_tree_eq(leaf, leaf_copy), IsTrue()); - - Tree *parent = ts_tree_make_node(&pool, symbol2, tree_array({ - leaf, - leaf_copy, - }), 0, &language); - ts_tree_retain(leaf); - ts_tree_retain(leaf_copy); - - Tree *parent_copy = ts_tree_make_node(&pool, symbol2, tree_array({ - leaf, - leaf_copy, - }), 0, &language); - ts_tree_retain(leaf); - ts_tree_retain(leaf_copy); - - AssertThat(ts_tree_eq(parent, parent_copy), IsTrue()); - - ts_tree_release(&pool, leaf_copy); - ts_tree_release(&pool, parent); - ts_tree_release(&pool, parent_copy); - }); - - it("returns false for trees with different symbols", [&]() { - Tree *different_leaf = ts_tree_make_leaf( - &pool, - leaf->symbol + 1, - leaf->padding, - leaf->size, - &language - ); - - AssertThat(ts_tree_eq(leaf, different_leaf), IsFalse()); - ts_tree_release(&pool, different_leaf); - }); - - it("returns false for trees with different options", [&]() { - Tree *different_leaf = ts_tree_make_leaf(&pool, leaf->symbol, leaf->padding, leaf->size, &language); - different_leaf->visible = !leaf->visible; - AssertThat(ts_tree_eq(leaf, different_leaf), IsFalse()); - ts_tree_release(&pool, different_leaf); - }); - - it("returns false for trees with different paddings or sizes", [&]() { - Tree *different_leaf = ts_tree_make_leaf(&pool, leaf->symbol, {}, leaf->size, &language); - AssertThat(ts_tree_eq(leaf, different_leaf), IsFalse()); - ts_tree_release(&pool, different_leaf); - - different_leaf = ts_tree_make_leaf(&pool, symbol1, leaf->padding, {}, &language); - AssertThat(ts_tree_eq(leaf, different_leaf), IsFalse()); - ts_tree_release(&pool, different_leaf); - }); - - it("returns false for trees with different children", [&]() { - Tree *leaf2 = ts_tree_make_leaf(&pool, symbol2, {1, {0, 1}}, {3, {0, 3}}, &language); - - Tree *parent = ts_tree_make_node(&pool, symbol2, tree_array({ - leaf, - leaf2, - }), 0, &language); - ts_tree_retain(leaf); - ts_tree_retain(leaf2); - - Tree *different_parent = ts_tree_make_node(&pool, symbol2, tree_array({ - leaf2, - leaf, - }), 0, &language); - ts_tree_retain(leaf2); - ts_tree_retain(leaf); - - AssertThat(ts_tree_eq(different_parent, parent), IsFalse()); - AssertThat(ts_tree_eq(parent, different_parent), IsFalse()); - - ts_tree_release(&pool, leaf2); - ts_tree_release(&pool, parent); - ts_tree_release(&pool, different_parent); - }); - }); - - describe("last_external_token", [&]() { - Length padding = {1, {0, 1}}; - Length size = {2, {0, 2}}; - - auto make_external = [](Tree *tree) { - tree->has_external_tokens = true; - return tree; + auto get_changed_ranges_for_edit = [&](function fn) -> vector { + TSInputEdit edit = fn(); + ts_tree_edit(tree, &edit); + + uint32_t range_count = 0; + TSTree *new_tree = ts_parser_parse(parser, tree, input->input()); + TSRange *ranges = ts_tree_get_changed_ranges(tree, new_tree, &range_count); + ts_tree_delete(tree); + tree = new_tree; + + vector result; + for (size_t i = 0; i < range_count; i++) { + result.push_back(ranges[i]); + } + + ts_free(ranges); + return result; }; - it("returns the last serialized external token state in the given tree", [&]() { - Tree *tree1, *tree2, *tree3, *tree4, *tree5, *tree6, *tree7, *tree8, *tree9; + it("reports changes when one token has been updated", [&]() { + // Replace `null` with `nothing` + auto ranges = get_changed_ranges_for_edit([&]() { + return input->replace(input->content.find("ull"), 1, "othing"); + }); + AssertThat(ranges, Equals(vector({ + TSRange{ + point(0, input->content.find("nothing")), + point(0, input->content.find("}")) + }, + }))); - tree1 = ts_tree_make_node(&pool, symbol1, tree_array({ - (tree2 = ts_tree_make_node(&pool, symbol2, tree_array({ - (tree3 = make_external(ts_tree_make_leaf(&pool, symbol3, padding, size, &language))), - (tree4 = ts_tree_make_leaf(&pool, symbol4, padding, size, &language)), - (tree5 = ts_tree_make_leaf(&pool, symbol5, padding, size, &language)), - }), 0, &language)), - (tree6 = ts_tree_make_node(&pool, symbol6, tree_array({ - (tree7 = ts_tree_make_node(&pool, symbol7, tree_array({ - (tree8 = ts_tree_make_leaf(&pool, symbol8, padding, size, &language)), - }), 0, &language)), - (tree9 = ts_tree_make_leaf(&pool, symbol9, padding, size, &language)), - }), 0, &language)), - }), 0, &language); + // Replace `nothing` with `null` again + ranges = get_changed_ranges_for_edit([&]() { + return input->undo(); + }); + AssertThat(ranges, Equals(vector({ + TSRange{ + point(0, input->content.find("null")), + point(0, input->content.find("}")) + }, + }))); + }); - auto token = ts_tree_last_external_token(tree1); - AssertThat(token, Equals(tree3)); + it("reports no changes when leading whitespace has changed (regression)", [&]() { + input->chars_per_chunk = 80; - ts_tree_release(&pool, tree1); + // Insert leading whitespace + auto ranges = get_changed_ranges_for_edit([&]() { + return input->replace(0, 0, "\n"); + }); + assert_root_node( + "(program (expression_statement (object (pair (property_identifier) (null)))))" + ); + AssertThat(ranges, IsEmpty()); + + // Remove leading whitespace + ranges = get_changed_ranges_for_edit([&]() { + return input->undo(); + }); + assert_root_node( + "(program (expression_statement (object (pair (property_identifier) (null)))))" + ); + AssertThat(ranges, IsEmpty()); + + // Insert leading whitespace again + ranges = get_changed_ranges_for_edit([&]() { + return input->replace(0, 0, "\n"); + }); + assert_root_node( + "(program (expression_statement (object (pair (property_identifier) (null)))))" + ); + AssertThat(ranges, IsEmpty()); + }); + + it("reports changes when tokens have been appended", [&]() { + // Add a second key-value pair + auto ranges = get_changed_ranges_for_edit([&]() { + return input->replace(input->content.find("}"), 0, ", b: false"); + }); + AssertThat(ranges, Equals(vector({ + TSRange{ + point(0, input->content.find(",")), + point(0, input->content.find("}")) + }, + }))); + + // Add a third key-value pair in between the first two + ranges = get_changed_ranges_for_edit([&]() { + return input->replace(input->content.find(", b"), 0, ", c: 1"); + }); + assert_root_node( + "(program (expression_statement (object " + "(pair (property_identifier) (null)) " + "(pair (property_identifier) (number)) " + "(pair (property_identifier) (false)))))" + ); + AssertThat(ranges, Equals(vector({ + TSRange{ + point(0, input->content.find(", c")), + point(0, input->content.find(", b")) + }, + }))); + + // Delete the middle pair. + ranges = get_changed_ranges_for_edit([&]() { + return input->undo(); + }); + assert_root_node( + "(program (expression_statement (object " + "(pair (property_identifier) (null)) " + "(pair (property_identifier) (false)))))" + ); + AssertThat(ranges, IsEmpty()); + + // Delete the second pair. + ranges = get_changed_ranges_for_edit([&]() { + return input->undo(); + }); + assert_root_node( + "(program (expression_statement (object " + "(pair (property_identifier) (null)))))" + ); + AssertThat(ranges, IsEmpty()); + }); + + it("reports changes when trees have been wrapped", [&]() { + // Wrap the object in an assignment expression. + auto ranges = get_changed_ranges_for_edit([&]() { + return input->replace(input->content.find("null"), 0, "b === "); + }); + assert_root_node( + "(program (expression_statement (object " + "(pair (property_identifier) (binary_expression (identifier) (null))))))" + ); + AssertThat(ranges, Equals(vector({ + TSRange{ + point(0, input->content.find("b ===")), + point(0, input->content.find("}")) + }, + }))); }); }); }); diff --git a/test/test_helper.h b/test/test_helper.h index 6947452b..0cb2d41c 100644 --- a/test/test_helper.h +++ b/test/test_helper.h @@ -5,6 +5,8 @@ #include "tree_sitter/compiler.h" #include "tree_sitter/runtime.h" +extern int TREE_SITTER_SEED; + namespace tree_sitter {} using namespace std; diff --git a/test/tests.cc b/test/tests.cc index cb9d6595..0d8c23e7 100644 --- a/test/tests.cc +++ b/test/tests.cc @@ -1,17 +1,18 @@ #include "test_helper.h" #include "helpers/random_helpers.h" +int TREE_SITTER_SEED = 0; + int main(int argc, char *argv[]) { - int seed; const char *seed_env = getenv("TREE_SITTER_SEED"); if (seed_env) { - seed = atoi(seed_env); + TREE_SITTER_SEED = atoi(seed_env); } else { - seed = get_time_as_seed(); + TREE_SITTER_SEED = get_time_as_seed(); } - printf("Random seed: %d\n", seed); - random_reseed(seed); + printf("Random seed: %d\n", TREE_SITTER_SEED); + default_generator.reseed(TREE_SITTER_SEED); return bandit::run(argc, argv); } diff --git a/tests.gyp b/tests.gyp index eefdf2bf..80e8d618 100644 --- a/tests.gyp +++ b/tests.gyp @@ -66,11 +66,11 @@ 'test/integration/fuzzing-examples.cc', 'test/integration/real_grammars.cc', 'test/integration/test_grammars.cc', - 'test/runtime/document_test.cc', 'test/runtime/language_test.cc', 'test/runtime/node_test.cc', 'test/runtime/parser_test.cc', 'test/runtime/stack_test.cc', + 'test/runtime/subtree_test.cc', 'test/runtime/tree_test.cc', 'test/tests.cc', ], @@ -101,7 +101,7 @@ 'cflags_cc': ['-std=c++14'], 'conditions': [ ['OS=="linux"', { - 'libraries': ['-ldl'], + 'libraries': ['-ldl', '-lpthread'], }] ], 'xcode_settings': {