From e75ecd1bb125deea42ac1567b6ed442bcdf8bbfb Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 10 May 2018 22:22:37 -0700 Subject: [PATCH] Rework API completely --- include/tree_sitter/runtime.h | 49 ++- project.gyp | 2 +- src/runtime/document.c | 188 ---------- src/runtime/document.h | 20 -- src/runtime/get_changed_ranges.c | 34 +- src/runtime/node.c | 22 +- src/runtime/parser.c | 118 +++++-- src/runtime/parser.h | 46 --- src/runtime/reusable_node.h | 2 +- src/runtime/string_input.c | 25 +- src/runtime/string_input.h | 10 +- src/runtime/subtree.c | 17 +- src/runtime/subtree.h | 8 +- src/runtime/tree.c | 59 ++++ src/runtime/tree.h | 19 ++ src/runtime/tree_cursor.c | 32 +- src/runtime/tree_cursor.h | 6 +- test/benchmarks.cc | 39 +-- test/helpers/scope_sequence.cc | 11 +- test/helpers/scope_sequence.h | 2 +- test/integration/fuzzing-examples.cc | 20 +- test/integration/real_grammars.cc | 139 +++++--- test/integration/test_grammars.cc | 18 +- test/runtime/document_test.cc | 490 --------------------------- test/runtime/language_test.cc | 12 +- test/runtime/node_test.cc | 56 +-- test/runtime/parser_test.cc | 266 +++++++++++---- test/runtime/stack_test.cc | 2 +- test/runtime/subtree_test.cc | 2 +- test/runtime/tree_test.cc | 200 +++++++++++ tests.gyp | 2 +- 31 files changed, 841 insertions(+), 1075 deletions(-) delete mode 100644 src/runtime/document.c delete mode 100644 src/runtime/document.h delete mode 100644 src/runtime/parser.h create mode 100644 src/runtime/tree.c create mode 100644 src/runtime/tree.h delete mode 100644 test/runtime/document_test.cc create mode 100644 test/runtime/tree_test.cc diff --git a/include/tree_sitter/runtime.h b/include/tree_sitter/runtime.h index d3e282fb..fa6fd919 100644 --- a/include/tree_sitter/runtime.h +++ b/include/tree_sitter/runtime.h @@ -5,6 +5,7 @@ extern "C" { #endif +#include #include #include #include @@ -13,7 +14,8 @@ extern "C" { typedef unsigned short TSSymbol; typedef struct TSLanguage TSLanguage; -typedef struct TSDocument TSDocument; +typedef struct TSParser TSParser; +typedef struct TSTree TSTree; typedef struct TSTreeCursor TSTreeCursor; typedef enum { @@ -65,17 +67,29 @@ typedef struct { typedef struct { const void *subtree; - const TSDocument *document; + const TSTree *tree; TSPoint position; uint32_t byte; TSSymbol alias_symbol; } TSNode; -typedef struct { - TSRange **changed_ranges; - uint32_t *changed_range_count; - bool halt_on_error; -} TSParseOptions; +TSParser *ts_parser_new(); +void ts_parser_delete(TSParser *); +const TSLanguage *ts_parser_language(const TSParser *); +bool ts_parser_set_language(TSParser *, const TSLanguage *); +TSLogger ts_parser_logger(const TSParser *); +void ts_parser_set_logger(TSParser *, TSLogger); +void ts_parser_print_debugging_graphs(TSParser *, bool); +void ts_parser_halt_on_error(TSParser *, bool); +TSTree *ts_parser_parse(TSParser *, const TSTree *, TSInput); +TSTree *ts_parser_parse_string(TSParser *, const TSTree *, const char *, uint32_t); + +TSTree *ts_tree_copy(const TSTree *); +void ts_tree_delete(const TSTree *); +TSNode ts_tree_root_node(const TSTree *); +void ts_tree_edit(TSTree *, const TSInputEdit *); +TSRange *ts_tree_get_changed_ranges(const TSTree *, const TSTree *, uint32_t *); +void ts_tree_print_dot_graph(const TSTree *, FILE *); uint32_t ts_node_start_byte(TSNode); TSPoint ts_node_start_point(TSNode); @@ -105,26 +119,7 @@ TSNode ts_node_named_descendant_for_byte_range(TSNode, uint32_t, uint32_t); TSNode ts_node_descendant_for_point_range(TSNode, TSPoint, TSPoint); TSNode ts_node_named_descendant_for_point_range(TSNode, TSPoint, TSPoint); -TSDocument *ts_document_new(); -void ts_document_free(TSDocument *); -const TSLanguage *ts_document_language(TSDocument *); -void ts_document_set_language(TSDocument *, const TSLanguage *); -TSInput ts_document_input(TSDocument *); -void ts_document_set_input(TSDocument *, TSInput); -void ts_document_set_input_string(TSDocument *, const char *); -void ts_document_set_input_string_with_length(TSDocument *, const char *, uint32_t); -TSLogger ts_document_logger(const TSDocument *); -void ts_document_set_logger(TSDocument *, TSLogger); -void ts_document_print_debugging_graphs(TSDocument *, bool); -void ts_document_edit(TSDocument *, TSInputEdit); -void ts_document_parse(TSDocument *); -void ts_document_parse_and_get_changed_ranges(TSDocument *, TSRange **, uint32_t *); -void ts_document_parse_with_options(TSDocument *, TSParseOptions); -void ts_document_invalidate(TSDocument *); -TSNode ts_document_root_node(const TSDocument *); -TSTreeCursor *ts_document_tree_cursor(const TSDocument *); -uint32_t ts_document_parse_count(const TSDocument *); - +TSTreeCursor *ts_tree_cursor_new(const TSTree *); void ts_tree_cursor_delete(TSTreeCursor *); bool ts_tree_cursor_goto_first_child(TSTreeCursor *); bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *); diff --git a/project.gyp b/project.gyp index cfc1776b..11174724 100644 --- a/project.gyp +++ b/project.gyp @@ -87,7 +87,6 @@ 'externals/utf8proc', ], 'sources': [ - 'src/runtime/document.c', 'src/runtime/get_changed_ranges.c', 'src/runtime/language.c', 'src/runtime/lexer.c', @@ -96,6 +95,7 @@ 'src/runtime/parser.c', 'src/runtime/string_input.c', 'src/runtime/subtree.c', + 'src/runtime/tree.c', 'src/runtime/tree_cursor.c', 'src/runtime/utf16.c', 'externals/utf8proc/utf8proc.c', diff --git a/src/runtime/document.c b/src/runtime/document.c deleted file mode 100644 index 7d175b6a..00000000 --- a/src/runtime/document.c +++ /dev/null @@ -1,188 +0,0 @@ -#include "runtime/alloc.h" -#include "runtime/subtree.h" -#include "runtime/parser.h" -#include "runtime/string_input.h" -#include "runtime/document.h" -#include "runtime/tree_cursor.h" -#include "runtime/get_changed_ranges.h" - -#define LOG(...) \ - snprintf(self->parser.lexer.debug_buffer, TREE_SITTER_SERIALIZATION_BUFFER_SIZE, __VA_ARGS__); \ - self->parser.lexer.logger.log(self->parser.lexer.logger.payload, TSLogTypeLex, self->parser.lexer.debug_buffer); \ - -TSDocument *ts_document_new() { - TSDocument *self = ts_calloc(1, sizeof(TSDocument)); - ts_parser_init(&self->parser); - array_init(&self->cursor1.stack); - array_init(&self->cursor2.stack); - return self; -} - -void ts_document_free(TSDocument *self) { - if (self->tree) ts_subtree_release(&self->parser.tree_pool, self->tree); - if (self->cursor1.stack.contents) array_delete(&self->cursor1.stack); - if (self->cursor2.stack.contents) array_delete(&self->cursor2.stack); - ts_parser_destroy(&self->parser); - ts_document_set_input(self, (TSInput){ - NULL, - NULL, - NULL, - TSInputEncodingUTF8, - }); - ts_free(self); -} - -const TSLanguage *ts_document_language(TSDocument *self) { - return self->parser.language; -} - -void ts_document_set_language(TSDocument *self, const TSLanguage *language) { - if (language->version != TREE_SITTER_LANGUAGE_VERSION) return; - ts_document_invalidate(self); - ts_parser_set_language(&self->parser, language); - if (self->tree) { - ts_subtree_release(&self->parser.tree_pool, self->tree); - self->tree = NULL; - } -} - -TSLogger ts_document_logger(const TSDocument *self) { - return self->parser.lexer.logger; -} - -void ts_document_set_logger(TSDocument *self, TSLogger logger) { - self->parser.lexer.logger = logger; -} - -void ts_document_print_debugging_graphs(TSDocument *self, bool should_print) { - self->parser.print_debugging_graphs = should_print; -} - -TSInput ts_document_input(TSDocument *self) { - return self->input; -} - -void ts_document_set_input(TSDocument *self, TSInput input) { - if (self->owns_input) - ts_free(self->input.payload); - self->input = input; - self->owns_input = false; -} - -void ts_document_set_input_string(TSDocument *self, const char *text) { - ts_document_invalidate(self); - TSInput input = ts_string_input_make(text); - ts_document_set_input(self, input); - if (input.payload) { - self->owns_input = true; - } -} - -void ts_document_set_input_string_with_length(TSDocument *self, const char *text, uint32_t length) { - ts_document_invalidate(self); - TSInput input = ts_string_input_make_with_length(text, length); - ts_document_set_input(self, input); - if (input.payload) { - self->owns_input = true; - } -} - -void ts_document_edit(TSDocument *self, TSInputEdit edit) { - if (!self->tree) - return; - - uint32_t max_bytes = ts_subtree_total_bytes(self->tree); - if (edit.start_byte > max_bytes) - return; - if (edit.bytes_removed > max_bytes - edit.start_byte) - edit.bytes_removed = max_bytes - edit.start_byte; - - self->tree = ts_subtree_edit(self->tree, &edit, &self->parser.tree_pool); - - if (self->parser.print_debugging_graphs) { - ts_subtree_print_dot_graph(self->tree, self->parser.language, stderr); - } -} - -void ts_document_parse(TSDocument *self) { - ts_document_parse_with_options(self, (TSParseOptions){ - .halt_on_error = false, - .changed_ranges = NULL, - .changed_range_count = NULL, - }); -} - -void ts_document_parse_and_get_changed_ranges(TSDocument *self, TSRange **ranges, - uint32_t *range_count) { - ts_document_parse_with_options(self, (TSParseOptions){ - .halt_on_error = false, - .changed_ranges = ranges, - .changed_range_count = range_count, - }); -} - -void ts_document_parse_with_options(TSDocument *self, TSParseOptions options) { - if (options.changed_ranges && options.changed_range_count) { - *options.changed_ranges = NULL; - *options.changed_range_count = 0; - } - - if (!self->input.read || !self->parser.language) - return; - - Subtree *reusable_tree = self->valid ? self->tree : NULL; - if (reusable_tree && !reusable_tree->has_changes) - return; - - Subtree *tree = ts_parser_parse(&self->parser, self->input, reusable_tree, options.halt_on_error); - - if (self->tree) { - Subtree *old_tree = self->tree; - self->tree = tree; - - if (options.changed_ranges && options.changed_range_count) { - *options.changed_range_count = ts_subtree_get_changed_ranges( - old_tree, tree, &self->cursor1, &self->cursor2, - self->parser.language, options.changed_ranges - ); - - if (self->parser.lexer.logger.log) { - for (unsigned i = 0; i < *options.changed_range_count; i++) { - TSRange range = (*options.changed_ranges)[i]; - LOG( - "changed_range start:[%u %u], end:[%u %u]", - range.start.row, range.start.column, - range.end.row, range.end.column - ); - } - } - } - - ts_subtree_release(&self->parser.tree_pool, old_tree); - } - - self->tree = tree; - self->parse_count++; - self->valid = true; -} - -void ts_document_invalidate(TSDocument *self) { - self->valid = false; -} - -TSNode ts_document_root_node(const TSDocument *self) { - return (TSNode) { - .subtree = self->tree, - .document = self, - .position = {0, 0}, - .byte = 0, - }; -} - -uint32_t ts_document_parse_count(const TSDocument *self) { - return self->parse_count; -} - -TSTreeCursor *ts_document_tree_cursor(const TSDocument *self) { - return ts_tree_cursor_new(self); -} diff --git a/src/runtime/document.h b/src/runtime/document.h deleted file mode 100644 index a7476ebf..00000000 --- a/src/runtime/document.h +++ /dev/null @@ -1,20 +0,0 @@ -#ifndef RUNTIME_DOCUMENT_H_ -#define RUNTIME_DOCUMENT_H_ - -#include "runtime/parser.h" -#include "runtime/subtree.h" -#include "runtime/tree_cursor.h" -#include - -struct TSDocument { - TSParser parser; - TSInput input; - Subtree *tree; - TSTreeCursor cursor1; - TSTreeCursor cursor2; - size_t parse_count; - bool valid; - bool owns_input; -}; - -#endif diff --git a/src/runtime/get_changed_ranges.c b/src/runtime/get_changed_ranges.c index 76e01f29..032fdaab 100644 --- a/src/runtime/get_changed_ranges.c +++ b/src/runtime/get_changed_ranges.c @@ -34,7 +34,7 @@ typedef struct { static Iterator iterator_new(TSTreeCursor *cursor, Subtree *tree, const TSLanguage *language) { array_clear(&cursor->stack); array_push(&cursor->stack, ((TreeCursorEntry){ - .tree = tree, + .subtree = tree, .position = length_zero(), .child_index = 0, .structural_child_index = 0, @@ -56,25 +56,25 @@ Length iterator_start_position(Iterator *self) { if (self->in_padding) { return entry.position; } else { - return length_add(entry.position, entry.tree->padding); + return length_add(entry.position, entry.subtree->padding); } } Length iterator_end_position(Iterator *self) { TreeCursorEntry entry = *array_back(&self->cursor.stack); - Length result = length_add(entry.position, entry.tree->padding); + Length result = length_add(entry.position, entry.subtree->padding); if (self->in_padding) { return result; } else { - return length_add(result, entry.tree->size); + return length_add(result, entry.subtree->size); } } static bool iterator_tree_is_visible(const Iterator *self) { TreeCursorEntry entry = *array_back(&self->cursor.stack); - if (entry.tree->visible) return true; + if (entry.subtree->visible) return true; if (self->cursor.stack.size > 1) { - Subtree *parent = self->cursor.stack.contents[self->cursor.stack.size - 2].tree; + Subtree *parent = self->cursor.stack.contents[self->cursor.stack.size - 2].subtree; const TSSymbol *alias_sequence = ts_language_alias_sequence(self->language, parent->alias_sequence_id); return alias_sequence && alias_sequence[entry.structural_child_index] != 0; } @@ -94,7 +94,7 @@ static void iterator_get_visible_state(const Iterator *self, Subtree **tree, TreeCursorEntry entry = self->cursor.stack.contents[i]; if (i > 0) { - Subtree *parent = self->cursor.stack.contents[i - 1].tree; + Subtree *parent = self->cursor.stack.contents[i - 1].subtree; const TSSymbol *alias_sequence = ts_language_alias_sequence( self->language, parent->alias_sequence_id @@ -104,8 +104,8 @@ static void iterator_get_visible_state(const Iterator *self, Subtree **tree, } } - if (entry.tree->visible || *alias_symbol) { - *tree = entry.tree; + if (entry.subtree->visible || *alias_symbol) { + *tree = entry.subtree; *start_byte = entry.position.bytes; break; } @@ -128,14 +128,14 @@ static bool iterator_descend(Iterator *self, uint32_t goal_position) { TreeCursorEntry entry = *array_back(&self->cursor.stack); Length position = entry.position; uint32_t structural_child_index = 0; - for (uint32_t i = 0; i < entry.tree->children.size; i++) { - Subtree *child = entry.tree->children.contents[i]; + for (uint32_t i = 0; i < entry.subtree->children.size; i++) { + Subtree *child = entry.subtree->children.contents[i]; Length child_left = length_add(position, child->padding); Length child_right = length_add(child_left, child->size); if (child_right.bytes > goal_position) { array_push(&self->cursor.stack, ((TreeCursorEntry){ - .tree = child, + .subtree = child, .position = position, .child_index = i, .structural_child_index = structural_child_index, @@ -178,16 +178,16 @@ static void iterator_advance(Iterator *self) { TreeCursorEntry entry = array_pop(&self->cursor.stack); if (iterator_done(self)) return; - Subtree *parent = array_back(&self->cursor.stack)->tree; + Subtree *parent = array_back(&self->cursor.stack)->subtree; uint32_t child_index = entry.child_index + 1; if (parent->children.size > child_index) { - Length position = length_add(entry.position, ts_subtree_total_size(entry.tree)); + Length position = length_add(entry.position, ts_subtree_total_size(entry.subtree)); uint32_t structural_child_index = entry.structural_child_index; - if (!entry.tree->extra) structural_child_index++; + if (!entry.subtree->extra) structural_child_index++; Subtree *next_child = parent->children.contents[child_index]; array_push(&self->cursor.stack, ((TreeCursorEntry){ - .tree = next_child, + .subtree = next_child, .position = position, .child_index = child_index, .structural_child_index = structural_child_index, @@ -250,7 +250,7 @@ static inline void iterator_print_state(Iterator *self) { TreeCursorEntry entry = *array_back(&self->cursor.stack); TSPoint start = iterator_start_position(self).extent; TSPoint end = iterator_end_position(self).extent; - const char *name = ts_language_symbol_name(self->language, entry.tree->symbol); + const char *name = ts_language_symbol_name(self->language, entry.subtree->symbol); printf( "(%-25s %s\t depth:%u [%u, %u] - [%u, %u])", name, self->in_padding ? "(p)" : " ", diff --git a/src/runtime/node.c b/src/runtime/node.c index 4df3400a..e260f13b 100644 --- a/src/runtime/node.c +++ b/src/runtime/node.c @@ -1,13 +1,13 @@ #include #include "runtime/subtree.h" -#include "runtime/document.h" +#include "runtime/tree.h" #include "runtime/language.h" // NodeChildIterator typedef struct { const Subtree *parent; - const TSDocument *document; + const TSTree *tree; Length position; uint32_t child_index; uint32_t structural_child_index; @@ -19,7 +19,7 @@ typedef struct { static inline TSNode ts_node__null() { return (TSNode) { .subtree = NULL, - .document = NULL, + .tree = NULL, .position = {0, 0}, .byte = 0, }; @@ -32,12 +32,12 @@ static inline const Subtree *ts_node__tree(TSNode self) { static inline NodeChildIterator ts_node_child_iterator_begin(const TSNode *node) { const Subtree *tree = ts_node__tree(*node); const TSSymbol *alias_sequence = ts_language_alias_sequence( - node->document->parser.language, + node->tree->language, tree->alias_sequence_id ); return (NodeChildIterator) { .parent = tree, - .document = node->document, + .tree = node->tree, .position = {node->byte, node->position}, .child_index = 0, .structural_child_index = 0, @@ -57,7 +57,7 @@ static inline bool ts_node_child_iterator_next(NodeChildIterator *self, TSNode * } *result = (TSNode) { .subtree = child, - .document = self->document, + .tree = self->tree, .position = self->position.extent, .byte = self->position.bytes, .alias_symbol = alias_symbol, @@ -77,7 +77,7 @@ static inline bool ts_node__is_relevant(TSNode self, bool include_anonymous) { ( self.alias_symbol && ts_language_symbol_metadata( - self.document->parser.language, + self.tree->language, self.alias_symbol ).named ) @@ -343,11 +343,11 @@ TSSymbol ts_node_symbol(TSNode self) { } const char *ts_node_type(TSNode self) { - return ts_language_symbol_name(self.document->parser.language, ts_node_symbol(self)); + return ts_language_symbol_name(self.tree->language, ts_node_symbol(self)); } char *ts_node_string(TSNode self) { - return ts_subtree_string(ts_node__tree(self), self.document->parser.language, false); + return ts_subtree_string(ts_node__tree(self), self.tree->language, false); } bool ts_node_eq(TSNode self, TSNode other) { @@ -360,7 +360,7 @@ bool ts_node_eq(TSNode self, TSNode other) { bool ts_node_is_named(TSNode self) { const Subtree *tree = ts_node__tree(self); return self.alias_symbol - ? ts_language_symbol_metadata(self.document->parser.language, self.alias_symbol).named + ? ts_language_symbol_metadata(self.tree->language, self.alias_symbol).named : tree->named; } @@ -378,7 +378,7 @@ bool ts_node_has_error(TSNode self) { } TSNode ts_node_parent(TSNode self) { - TSNode node = ts_document_root_node(self.document); + TSNode node = ts_tree_root_node(self.tree); uint32_t end_byte = ts_node_end_byte(self); if (node.subtree == self.subtree) return ts_node__null(); diff --git a/src/runtime/parser.c b/src/runtime/parser.c index f3f6fd64..1c7da6b1 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -1,4 +1,3 @@ -#include "runtime/parser.h" #include #include #include @@ -10,8 +9,12 @@ #include "runtime/array.h" #include "runtime/language.h" #include "runtime/alloc.h" +#include "runtime/stack.h" +#include "runtime/reusable_node.h" #include "runtime/reduce_action.h" #include "runtime/error_costs.h" +#include "runtime/string_input.h" +#include "runtime/tree.h" #define LOG(...) \ if (self->lexer.logger.log || self->print_debugging_graphs) { \ @@ -37,6 +40,29 @@ static const unsigned MAX_VERSION_COUNT = 6; static const unsigned MAX_SUMMARY_DEPTH = 16; static const unsigned MAX_COST_DIFFERENCE = 16 * ERROR_COST_PER_SKIPPED_TREE; +typedef struct { + Subtree *token; + Subtree *last_external_token; + uint32_t byte_index; +} TokenCache; + +struct TSParser { + Lexer lexer; + Stack *stack; + SubtreePool tree_pool; + const TSLanguage *language; + ReduceActionSet reduce_actions; + Subtree *finished_tree; + Subtree scratch_tree; + TokenCache token_cache; + ReusableNode reusable_node; + void *external_scanner_payload; + bool in_ambiguity; + bool print_debugging_graphs; + bool halt_on_error; + unsigned accept_count; +}; + typedef struct { unsigned cost; unsigned node_count; @@ -52,6 +78,8 @@ typedef enum { ErrorComparisonTakeRight, } ErrorComparison; +// Parser - Private + static void ts_parser__log(TSParser *self) { if (self->lexer.logger.log) { self->lexer.logger.log( @@ -670,7 +698,7 @@ static StackSliceArray ts_parser__reduce(TSParser *self, StackVersion version, T return pop; } -static void ts_parser__start(TSParser *self, TSInput input, Subtree *previous_tree) { +static void ts_parser__start(TSParser *self, TSInput input, const Subtree *previous_tree) { if (previous_tree) { LOG("parse_after_edit"); } else { @@ -1258,42 +1286,76 @@ static unsigned ts_parser__condense_stack(TSParser *self) { return min_error_cost; } -bool ts_parser_init(TSParser *self) { +// Parser - Public + +TSParser *ts_parser_new() { + TSParser *self = ts_calloc(1, sizeof(TSParser)); ts_lexer_init(&self->lexer); array_init(&self->reduce_actions); array_reserve(&self->reduce_actions, 4); - ts_subtree_pool_init(&self->tree_pool); + self->tree_pool = ts_subtree_pool_new(32); self->stack = ts_stack_new(&self->tree_pool); self->finished_tree = NULL; self->reusable_node = reusable_node_new(); + self->print_debugging_graphs = false; + self->halt_on_error = false; ts_parser__set_cached_token(self, 0, NULL, NULL); - return true; + return self; } -void ts_parser_set_language(TSParser *self, const TSLanguage *language) { - if (self->external_scanner_payload && self->language->external_scanner.destroy) - self->language->external_scanner.destroy(self->external_scanner_payload); - - if (language && language->external_scanner.create) - self->external_scanner_payload = language->external_scanner.create(); - else - self->external_scanner_payload = NULL; - - self->language = language; -} - -void ts_parser_destroy(TSParser *self) { - if (self->stack) +void ts_parser_delete(TSParser *self) { + if (self->stack) { ts_stack_delete(self->stack); - if (self->reduce_actions.contents) + } + if (self->reduce_actions.contents) { array_delete(&self->reduce_actions); + } ts_subtree_pool_delete(&self->tree_pool); reusable_node_delete(&self->reusable_node); ts_parser_set_language(self, NULL); + ts_free(self); } -Subtree *ts_parser_parse(TSParser *self, TSInput input, Subtree *old_tree, bool halt_on_error) { - ts_parser__start(self, input, old_tree); +const TSLanguage *ts_parser_language(const TSParser *self) { + return self->language; +} + +bool ts_parser_set_language(TSParser *self, const TSLanguage *language) { + if (language && language->version != TREE_SITTER_LANGUAGE_VERSION) return false; + + if (self->external_scanner_payload && self->language->external_scanner.destroy) { + self->language->external_scanner.destroy(self->external_scanner_payload); + } + + if (language && language->external_scanner.create) { + self->external_scanner_payload = language->external_scanner.create(); + } else { + self->external_scanner_payload = NULL; + } + + self->language = language; + return true; +} + +TSLogger ts_parser_logger(const TSParser *self) { + return self->lexer.logger; +} + +void ts_parser_set_logger(TSParser *self, TSLogger logger) { + self->lexer.logger = logger; +} + +void ts_parser_print_debugging_graphs(TSParser *self, bool should_print) { + self->print_debugging_graphs = should_print; +} + +void ts_parser_halt_on_error(TSParser *self, bool should_halt_on_error) { + self->halt_on_error = should_halt_on_error; +} + +TSTree *ts_parser_parse(TSParser *self, const TSTree *old_tree, TSInput input) { + if (!self->language) return NULL; + ts_parser__start(self, input, old_tree ? old_tree->root : NULL); StackVersion version = STACK_VERSION_NONE; uint32_t position = 0, last_position = 0; @@ -1327,7 +1389,7 @@ Subtree *ts_parser_parse(TSParser *self, TSInput input, Subtree *old_tree, bool unsigned min_error_cost = ts_parser__condense_stack(self); if (self->finished_tree && self->finished_tree->error_cost < min_error_cost) { break; - } else if (halt_on_error && min_error_cost > 0) { + } else if (self->halt_on_error && min_error_cost > 0) { ts_parser__halt_parse(self); break; } @@ -1342,5 +1404,13 @@ Subtree *ts_parser_parse(TSParser *self, TSInput input, Subtree *old_tree, bool LOG("done"); LOG_TREE(); - return self->finished_tree; + + return ts_tree_new(self->finished_tree, self->language); +} + +TSTree *ts_parser_parse_string(TSParser *self, const TSTree *old_tree, + const char *string, uint32_t length) { + TSStringInput input; + ts_string_input_init(&input, string, length); + return ts_parser_parse(self, old_tree, input.input); } diff --git a/src/runtime/parser.h b/src/runtime/parser.h deleted file mode 100644 index 46cc7489..00000000 --- a/src/runtime/parser.h +++ /dev/null @@ -1,46 +0,0 @@ -#ifndef RUNTIME_PARSER_H_ -#define RUNTIME_PARSER_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -#include "runtime/stack.h" -#include "runtime/array.h" -#include "runtime/lexer.h" -#include "runtime/reusable_node.h" -#include "runtime/reduce_action.h" -#include "runtime/subtree.h" - -typedef struct { - Subtree *token; - Subtree *last_external_token; - uint32_t byte_index; -} TokenCache; - -typedef struct { - Lexer lexer; - Stack *stack; - SubtreePool tree_pool; - const TSLanguage *language; - ReduceActionSet reduce_actions; - Subtree *finished_tree; - Subtree scratch_tree; - TokenCache token_cache; - ReusableNode reusable_node; - void *external_scanner_payload; - bool in_ambiguity; - bool print_debugging_graphs; - unsigned accept_count; -} TSParser; - -bool ts_parser_init(TSParser *); -void ts_parser_destroy(TSParser *); -Subtree *ts_parser_parse(TSParser *, TSInput, Subtree *, bool halt_on_error); -void ts_parser_set_language(TSParser *, const TSLanguage *); - -#ifdef __cplusplus -} -#endif - -#endif // RUNTIME_PARSER_H_ diff --git a/src/runtime/reusable_node.h b/src/runtime/reusable_node.h index 5f1884d5..c1d4f06b 100644 --- a/src/runtime/reusable_node.h +++ b/src/runtime/reusable_node.h @@ -24,7 +24,7 @@ static inline void reusable_node_reset(ReusableNode *self, Subtree *tree) { })); } -static inline Subtree *reusable_node_tree(ReusableNode *self) { +static inline const Subtree *reusable_node_tree(ReusableNode *self) { return self->stack.size > 0 ? self->stack.contents[self->stack.size - 1].tree : NULL; diff --git a/src/runtime/string_input.c b/src/runtime/string_input.c index 53f69ee5..c4e13e0c 100644 --- a/src/runtime/string_input.c +++ b/src/runtime/string_input.c @@ -1,13 +1,7 @@ +#include "tree_sitter/runtime.h" #include "runtime/string_input.h" -#include "runtime/alloc.h" #include -typedef struct { - const char *string; - uint32_t position; - uint32_t length; -} TSStringInput; - static const char *ts_string_input__read(void *payload, uint32_t *bytes_read) { TSStringInput *input = (TSStringInput *)payload; if (input->position >= input->length) { @@ -26,17 +20,12 @@ static int ts_string_input__seek(void *payload, uint32_t byte, TSPoint _) { return (byte < input->length); } -TSInput ts_string_input_make(const char *string) { - return ts_string_input_make_with_length(string, strlen(string)); -} - -TSInput ts_string_input_make_with_length(const char *string, uint32_t length) { - TSStringInput *input = ts_malloc(sizeof(TSStringInput)); - input->string = string; - input->position = 0; - input->length = length; - return (TSInput){ - .payload = input, +void ts_string_input_init(TSStringInput *self, const char *string, uint32_t length) { + self->string = string; + self->position = 0; + self->length = length; + self->input = (TSInput) { + .payload = self, .read = ts_string_input__read, .seek = ts_string_input__seek, .encoding = TSInputEncodingUTF8, diff --git a/src/runtime/string_input.h b/src/runtime/string_input.h index c96cd416..19171e4f 100644 --- a/src/runtime/string_input.h +++ b/src/runtime/string_input.h @@ -7,8 +7,14 @@ extern "C" { #include "tree_sitter/runtime.h" -TSInput ts_string_input_make(const char *); -TSInput ts_string_input_make_with_length(const char *, uint32_t); +typedef struct { + const char *string; + uint32_t position; + uint32_t length; + TSInput input; +} TSStringInput; + +void ts_string_input_init(TSStringInput *, const char *, uint32_t); #ifdef __cplusplus } diff --git a/src/runtime/subtree.c b/src/runtime/subtree.c index 1808ee6d..aff2a6fc 100644 --- a/src/runtime/subtree.c +++ b/src/runtime/subtree.c @@ -19,6 +19,10 @@ typedef struct { TSStateId TS_TREE_STATE_NONE = USHRT_MAX; +static const uint32_t MAX_TREE_POOL_SIZE = 1024; + +static const TSExternalTokenState empty_state = {.length = 0, .short_data = {0}}; + // ExternalTokenState void ts_external_token_state_init(TSExternalTokenState *self, const char *content, unsigned length) { @@ -102,11 +106,10 @@ void ts_subtree_array_reverse(SubtreeArray *self) { // SubtreePool -static const uint32_t MAX_TREE_POOL_SIZE = 1024; - -void ts_subtree_pool_init(SubtreePool *self) { - array_init(&self->free_trees); - array_init(&self->tree_stack); +SubtreePool ts_subtree_pool_new(uint32_t capacity) { + SubtreePool self = {array_new(), array_new()}; + array_reserve(&self.free_trees, capacity); + return self; } void ts_subtree_pool_delete(SubtreePool *self) { @@ -128,7 +131,7 @@ Subtree *ts_subtree_pool_allocate(SubtreePool *self) { } void ts_subtree_pool_free(SubtreePool *self, Subtree *tree) { - if (self->free_trees.size < MAX_TREE_POOL_SIZE) { + if (self->free_trees.capacity > 0 && self->free_trees.size < MAX_TREE_POOL_SIZE) { array_push(&self->free_trees, tree); } else { ts_free(tree); @@ -691,8 +694,6 @@ void ts_subtree_print_dot_graph(const Subtree *self, const TSLanguage *language, fprintf(f, "}\n"); } -static const TSExternalTokenState empty_state = {.length = 0, .short_data = {0}}; - bool ts_subtree_external_token_state_eq(const Subtree *self, const Subtree *other) { const TSExternalTokenState *state1 = &empty_state; const TSExternalTokenState *state2 = &empty_state; diff --git a/src/runtime/subtree.h b/src/runtime/subtree.h index 5168ac38..fd1fcaff 100644 --- a/src/runtime/subtree.h +++ b/src/runtime/subtree.h @@ -1,5 +1,5 @@ -#ifndef RUNTIME_TREE_H_ -#define RUNTIME_TREE_H_ +#ifndef RUNTIME_SUBTREE_H_ +#define RUNTIME_SUBTREE_H_ #ifdef __cplusplus extern "C" { @@ -83,7 +83,7 @@ void ts_subtree_array_delete(SubtreePool *, SubtreeArray *); SubtreeArray ts_subtree_array_remove_trailing_extras(SubtreeArray *); void ts_subtree_array_reverse(SubtreeArray *); -void ts_subtree_pool_init(SubtreePool *); +SubtreePool ts_subtree_pool_new(uint32_t capacity); void ts_subtree_pool_delete(SubtreePool *); Subtree *ts_subtree_pool_allocate(SubtreePool *); void ts_subtree_pool_free(SubtreePool *, Subtree *); @@ -122,4 +122,4 @@ static inline Length ts_subtree_total_size(const Subtree *self) { } #endif -#endif // RUNTIME_TREE_H_ +#endif // RUNTIME_SUBTREE_H_ diff --git a/src/runtime/tree.c b/src/runtime/tree.c new file mode 100644 index 00000000..2c365644 --- /dev/null +++ b/src/runtime/tree.c @@ -0,0 +1,59 @@ +#include "tree_sitter/runtime.h" +#include "runtime/array.h" +#include "runtime/get_changed_ranges.h" +#include "runtime/subtree.h" +#include "runtime/tree_cursor.h" +#include "runtime/tree.h" + +TSTree *ts_tree_new(const Subtree *root, const TSLanguage *language) { + TSTree *result = ts_malloc(sizeof(TSTree)); + result->root = root; + result->language = language; + return result; +} + +TSTree *ts_tree_copy(const TSTree *self) { + ts_subtree_retain(self->root); + return ts_tree_new(self->root, self->language); +} + +void ts_tree_delete(const TSTree *self) { + SubtreePool pool = ts_subtree_pool_new(0); + ts_subtree_release(&pool, self->root); + ts_subtree_pool_delete(&pool); + ts_free(self); +} + +TSNode ts_tree_root_node(const TSTree *self) { + return (TSNode) { + .subtree = self->root, + .tree = self, + .position = {0, 0}, + .byte = 0, + .alias_symbol = 0, + }; +} + +void ts_tree_edit(TSTree *self, const TSInputEdit *edit) { + SubtreePool pool = ts_subtree_pool_new(0); + self->root = ts_subtree_edit(self->root, edit, &pool); + assert(pool.tree_stack.capacity == 0 && pool.free_trees.capacity == 0); +} + +TSRange *ts_tree_get_changed_ranges(const TSTree *self, const TSTree *other, uint32_t *count) { + TSRange *result; + TSTreeCursor cursor1, cursor2; + ts_tree_cursor_init(&cursor1, self); + ts_tree_cursor_init(&cursor2, self); + *count = ts_subtree_get_changed_ranges( + self->root, other->root, &cursor1, &cursor2, + self->language, &result + ); + array_delete(&cursor1.stack); + array_delete(&cursor2.stack); + return result; +} + +void ts_tree_print_dot_graph(const TSTree *self, FILE *file) { + ts_subtree_print_dot_graph(self->root, self->language, file); +} diff --git a/src/runtime/tree.h b/src/runtime/tree.h new file mode 100644 index 00000000..7429e06c --- /dev/null +++ b/src/runtime/tree.h @@ -0,0 +1,19 @@ +#ifndef RUNTIME_TREE_H_ +#define RUNTIME_TREE_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +struct TSTree { + const Subtree *root; + const TSLanguage *language; +}; + +TSTree *ts_tree_new(const Subtree *root, const TSLanguage *language); + +#ifdef __cplusplus +} +#endif + +#endif // RUNTIME_TREE_H_ diff --git a/src/runtime/tree_cursor.c b/src/runtime/tree_cursor.c index 730dacf9..c7aa9691 100644 --- a/src/runtime/tree_cursor.c +++ b/src/runtime/tree_cursor.c @@ -1,20 +1,24 @@ #include "tree_sitter/runtime.h" #include "runtime/alloc.h" #include "runtime/tree_cursor.h" -#include "runtime/document.h" #include "runtime/language.h" +#include "runtime/tree.h" -TSTreeCursor *ts_tree_cursor_new(const TSDocument *document) { +TSTreeCursor *ts_tree_cursor_new(const TSTree *tree) { TSTreeCursor *self = ts_malloc(sizeof(TSTreeCursor)); - self->document = document; + ts_tree_cursor_init(self, tree); + return self; +} + +void ts_tree_cursor_init(TSTreeCursor *self, const TSTree *tree) { + self->tree = tree; array_init(&self->stack); array_push(&self->stack, ((TreeCursorEntry) { - .tree = document->tree, + .subtree = tree->root, .position = length_zero(), .child_index = 0, .structural_child_index = 0, })); - return self; } void ts_tree_cursor_delete(TSTreeCursor *self) { @@ -24,7 +28,7 @@ void ts_tree_cursor_delete(TSTreeCursor *self) { bool ts_tree_cursor_goto_first_child(TSTreeCursor *self) { TreeCursorEntry *last_entry = array_back(&self->stack); - Subtree *tree = last_entry->tree; + Subtree *tree = last_entry->subtree; Length position = last_entry->position; bool did_descend; @@ -36,7 +40,7 @@ bool ts_tree_cursor_goto_first_child(TSTreeCursor *self) { Subtree *child = tree->children.contents[i]; if (child->visible || child->visible_child_count > 0) { array_push(&self->stack, ((TreeCursorEntry) { - .tree = child, + .subtree = child, .child_index = i, .structural_child_index = structural_child_index, .position = position, @@ -64,7 +68,7 @@ bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *self) { for (unsigned i = self->stack.size - 2; i + 1 > 0; i--) { TreeCursorEntry *parent_entry = &self->stack.contents[i]; - Subtree *parent = parent_entry->tree; + Subtree *parent = parent_entry->subtree; uint32_t child_index = child_entry->child_index; uint32_t structural_child_index = child_entry->structural_child_index; Length position = child_entry->position; @@ -77,7 +81,7 @@ bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *self) { if (child->visible || child->visible_child_count > 0) { self->stack.contents[i + 1] = (TreeCursorEntry) { - .tree = child, + .subtree = child, .child_index = child_index, .structural_child_index = structural_child_index, .position = position, @@ -103,7 +107,7 @@ bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *self) { bool ts_tree_cursor_goto_parent(TSTreeCursor *self) { for (unsigned i = self->stack.size - 2; i + 1 > 0; i--) { TreeCursorEntry *entry = &self->stack.contents[i]; - if (entry->tree->visible) { + if (entry->subtree->visible) { self->stack.size = i + 1; return true; } @@ -117,16 +121,16 @@ TSNode ts_tree_cursor_current_node(TSTreeCursor *self) { if (self->stack.size > 1) { TreeCursorEntry *parent_entry = &self->stack.contents[self->stack.size - 2]; const TSSymbol *alias_sequence = ts_language_alias_sequence( - self->document->parser.language, - parent_entry->tree->alias_sequence_id + self->tree->language, + parent_entry->subtree->alias_sequence_id ); if (alias_sequence) { alias_symbol = alias_sequence[last_entry->structural_child_index]; } } return (TSNode) { - .document = self->document, - .subtree = last_entry->tree, + .tree = self->tree, + .subtree = last_entry->subtree, .position = last_entry->position.extent, .byte = last_entry->position.bytes, .alias_symbol = alias_symbol, diff --git a/src/runtime/tree_cursor.h b/src/runtime/tree_cursor.h index 5e571fba..c016b276 100644 --- a/src/runtime/tree_cursor.h +++ b/src/runtime/tree_cursor.h @@ -4,17 +4,17 @@ #include "runtime/subtree.h" typedef struct { - Subtree *tree; + Subtree *subtree; Length position; uint32_t child_index; uint32_t structural_child_index; } TreeCursorEntry; struct TSTreeCursor { - const TSDocument *document; + const TSTree *tree; Array(TreeCursorEntry) stack; }; -TSTreeCursor *ts_tree_cursor_new(const TSDocument *); +void ts_tree_cursor_init(TSTreeCursor *, const TSTree *); #endif // RUNTIME_TREE_CURSOR_H_ diff --git a/test/benchmarks.cc b/test/benchmarks.cc index 616d2de6..d4f475b5 100644 --- a/test/benchmarks.cc +++ b/test/benchmarks.cc @@ -43,12 +43,12 @@ int main(int argc, char *arg[]) { vector error_speeds; vector non_error_speeds; - auto document = ts_document_new(); + TSParser *parser = ts_parser_new(); if (getenv("TREE_SITTER_BENCHMARK_SVG")) { - ts_document_print_debugging_graphs(document, true); + ts_parser_print_debugging_graphs(parser, true); } else if (getenv("TREE_SITTER_BENCHMARK_LOG")) { - ts_document_set_logger(document, stderr_logger_new(false)); + ts_parser_set_logger(parser, stderr_logger_new(false)); } auto language_filter = getenv("TREE_SITTER_BENCHMARK_LANGUAGE"); @@ -61,7 +61,7 @@ int main(int argc, char *arg[]) { for (auto &language_name : language_names) { if (language_filter && language_name != language_filter) continue; - ts_document_set_language(document, load_real_language(language_name)); + ts_parser_set_language(parser, load_real_language(language_name)); printf("%s\n", language_name.c_str()); @@ -69,20 +69,16 @@ int main(int argc, char *arg[]) { if (file_name_filter && example.file_name != file_name_filter) continue; if (example.input.size() < 256) continue; - ts_document_invalidate(document); - ts_document_set_input_string(document, ""); - ts_document_parse(document); - - ts_document_invalidate(document); - ts_document_set_input_string(document, example.input.c_str()); - clock_t start_time = clock(); - ts_document_parse(document); + TSTree *tree = ts_parser_parse_string(parser, nullptr, example.input.c_str(), example.input.size()); clock_t end_time = clock(); - unsigned duration = (end_time - start_time) * 1000 / CLOCKS_PER_SEC; - assert(!ts_node_has_error(ts_document_root_node(document))); + + assert(!ts_node_has_error(ts_tree_root_node(tree))); + ts_tree_delete(tree); + + size_t duration = (end_time - start_time) * 1000 / CLOCKS_PER_SEC; size_t speed = static_cast(example.input.size()) / duration; - printf(" %-30s\t%u ms\t\t%lu bytes/ms\n", example.file_name.c_str(), duration, speed); + printf(" %-30s\t%lu ms\t\t%lu bytes/ms\n", example.file_name.c_str(), duration, speed); if (speed != 0) non_error_speeds.push_back(speed); } @@ -93,15 +89,15 @@ int main(int argc, char *arg[]) { if (file_name_filter && example.file_name != file_name_filter) continue; if (example.input.size() < 256) continue; - ts_document_invalidate(document); - ts_document_set_input_string(document, example.input.c_str()); - clock_t start_time = clock(); - ts_document_parse(document); + TSTree *tree = ts_parser_parse_string(parser, nullptr, example.input.c_str(), example.input.size()); clock_t end_time = clock(); - unsigned duration = (end_time - start_time) * 1000 / CLOCKS_PER_SEC; + + ts_tree_delete(tree); + + size_t duration = (end_time - start_time) * 1000 / CLOCKS_PER_SEC; size_t speed = static_cast(example.input.size()) / duration; - printf(" %-30s\t%u ms\t\t%lu bytes/ms\n", example.file_name.c_str(), duration, speed); + printf(" %-30s\t%lu ms\t\t%lu bytes/ms\n", example.file_name.c_str(), duration, speed); if (speed != 0) error_speeds.push_back(speed); } } @@ -118,5 +114,6 @@ int main(int argc, char *arg[]) { printf(" %-30s\t%lu bytes/ms\n", "average speed", mean(error_speeds)); printf(" %-30s\t%lu bytes/ms\n", "worst speed", min(error_speeds)); + ts_parser_delete(parser); return 0; } diff --git a/test/helpers/scope_sequence.cc b/test/helpers/scope_sequence.cc index c3db70ac..8851f0c4 100644 --- a/test/helpers/scope_sequence.cc +++ b/test/helpers/scope_sequence.cc @@ -21,8 +21,7 @@ static void append_text_to_scope_sequence(ScopeSequence *sequence, static void append_to_scope_sequence(ScopeSequence *sequence, ScopeStack *current_scopes, - TSNode node, TSDocument *document, - const std::string &text) { + TSNode node, const std::string &text) { append_text_to_scope_sequence( sequence, current_scopes, text, ts_node_start_byte(node) - sequence->size() ); @@ -31,7 +30,7 @@ static void append_to_scope_sequence(ScopeSequence *sequence, for (size_t i = 0, n = ts_node_child_count(node); i < n; i++) { TSNode child = ts_node_child(node, i); - append_to_scope_sequence(sequence, current_scopes, child, document, text); + append_to_scope_sequence(sequence, current_scopes, child, text); } append_text_to_scope_sequence( @@ -41,11 +40,11 @@ static void append_to_scope_sequence(ScopeSequence *sequence, current_scopes->pop_back(); } -ScopeSequence build_scope_sequence(TSDocument *document, const std::string &text) { +ScopeSequence build_scope_sequence(TSTree *tree, const std::string &text) { ScopeSequence sequence; ScopeStack current_scopes; - TSNode node = ts_document_root_node(document); - append_to_scope_sequence(&sequence, ¤t_scopes, node, document, text); + TSNode node = ts_tree_root_node(tree); + append_to_scope_sequence(&sequence, ¤t_scopes, node, text); return sequence; } diff --git a/test/helpers/scope_sequence.h b/test/helpers/scope_sequence.h index c83ad597..2ad15117 100644 --- a/test/helpers/scope_sequence.h +++ b/test/helpers/scope_sequence.h @@ -9,7 +9,7 @@ typedef std::string Scope; typedef std::vector ScopeStack; typedef std::vector ScopeSequence; -ScopeSequence build_scope_sequence(TSDocument *document, const std::string &text); +ScopeSequence build_scope_sequence(TSTree *tree, const std::string &text); void verify_changed_ranges(const ScopeSequence &old, const ScopeSequence &new_sequence, const std::string &text, TSRange *ranges, size_t range_count); diff --git a/test/integration/fuzzing-examples.cc b/test/integration/fuzzing-examples.cc index 46cac15a..92682d4c 100644 --- a/test/integration/fuzzing-examples.cc +++ b/test/integration/fuzzing-examples.cc @@ -29,14 +29,14 @@ describe("examples found via fuzzing", [&]() { for (unsigned i = 0, n = examples.size(); i < n; i++) { it(("parses example number " + to_string(i)).c_str(), [&]() { - TSDocument *document = ts_document_new(); + TSParser *parser = ts_parser_new(); if (getenv("TREE_SITTER_ENABLE_DEBUG_GRAPHS")) { - ts_document_print_debugging_graphs(document, true); + ts_parser_print_debugging_graphs(parser, true); } const string &language_name = examples[i].first; - ts_document_set_language(document, load_real_language(language_name)); + ts_parser_set_language(parser, load_real_language(language_name)); string input; const string &base64_input = examples[i].second; @@ -47,18 +47,12 @@ describe("examples found via fuzzing", [&]() { base64_input.size() )); - ts_document_set_input_string_with_length( - document, - input.c_str(), - input.size() - ); - - ts_document_parse(document); - - TSNode node = ts_document_root_node(document); + TSTree *tree = ts_parser_parse_string(parser, nullptr, input.c_str(), input.size()); + TSNode node = ts_tree_root_node(tree); assert_consistent_tree_sizes(node); - ts_document_free(document); + ts_tree_delete(tree); + ts_parser_delete(parser); }); } diff --git a/test/integration/real_grammars.cc b/test/integration/real_grammars.cc index 608ded82..0d594ce5 100644 --- a/test/integration/real_grammars.cc +++ b/test/integration/real_grammars.cc @@ -12,8 +12,8 @@ #include "helpers/tree_helpers.h" #include -static void assert_correct_tree_size(TSDocument *document, string content) { - TSNode root_node = ts_document_root_node(document); +static void assert_correct_tree_size(TSTree *tree, string content) { + TSNode root_node = ts_tree_root_node(tree); AssertThat(ts_node_end_byte(root_node), Equals(content.size())); assert_consistent_tree_sizes(root_node); } @@ -33,48 +33,43 @@ vector test_languages({ for (auto &language_name : test_languages) { describe(("the " + language_name + " language").c_str(), [&]() { - TSDocument *document; + TSParser *parser; const bool debug_graphs_enabled = getenv("TREE_SITTER_ENABLE_DEBUG_GRAPHS"); before_each([&]() { record_alloc::start(); - document = ts_document_new(); - ts_document_set_language(document, load_real_language(language_name)); + parser = ts_parser_new(); + ts_parser_set_language(parser, load_real_language(language_name)); - // ts_document_set_logger(document, stderr_logger_new(true)); + // ts_parser_set_logger(parser, stderr_logger_new(true)); if (debug_graphs_enabled) { - ts_document_print_debugging_graphs(document, true); + ts_parser_print_debugging_graphs(parser, true); } }); after_each([&]() { - ts_document_free(document); + ts_parser_delete(parser); AssertThat(record_alloc::outstanding_allocation_indices(), IsEmpty()); }); for (auto &entry : read_real_language_corpus(language_name)) { SpyInput *input; - auto it_handles_edit_sequence = [&](string name, std::function edit_sequence){ - it(("parses " + entry.description + ": " + name).c_str(), [&]() { - input = new SpyInput(entry.input, 3); - if (debug_graphs_enabled) printf("%s\n\n", input->content.c_str()); - ts_document_set_input(document, input->input()); - edit_sequence(); + it(("parses " + entry.description + ": initial parse").c_str(), [&]() { + input = new SpyInput(entry.input, 3); + if (debug_graphs_enabled) printf("%s\n\n", input->content.c_str()); - TSNode root_node = ts_document_root_node(document); - const char *node_string = ts_node_string(root_node); - string result(node_string); - ts_free((void *)node_string); - AssertThat(result, Equals(entry.tree_string)); + TSTree *tree = ts_parser_parse(parser, nullptr, input->input()); + assert_correct_tree_size(tree, input->content); - assert_correct_tree_size(document, input->content); - delete input; - }); - }; + TSNode root_node = ts_tree_root_node(tree); + const char *node_string = ts_node_string(root_node); + string result(node_string); + ts_free((void *)node_string); + AssertThat(result, Equals(entry.tree_string)); - it_handles_edit_sequence("initial parse", [&]() { - ts_document_parse(document); + ts_tree_delete(tree); + delete input; }); set> deletions; @@ -86,54 +81,88 @@ for (auto &language_name : test_languages) { string inserted_text = random_words(random_unsigned(4) + 1); if (insertions.insert({edit_position, inserted_text}).second) { - string description = "\"" + inserted_text + "\" at " + to_string(edit_position); - - it_handles_edit_sequence("repairing an insertion of " + description, [&]() { - ts_document_edit(document, input->replace(edit_position, 0, inserted_text)); - ts_document_parse(document); - assert_correct_tree_size(document, input->content); + it(("parses " + entry.description + + ": repairing an insertion of \"" + inserted_text + "\"" + + " at " + to_string(edit_position)).c_str(), [&]() { + input = new SpyInput(entry.input, 3); if (debug_graphs_enabled) printf("%s\n\n", input->content.c_str()); - ts_document_edit(document, input->undo()); - assert_correct_tree_size(document, input->content); + input->replace(edit_position, 0, inserted_text); + TSTree *tree = ts_parser_parse(parser, nullptr, input->input()); + assert_correct_tree_size(tree, input->content); if (debug_graphs_enabled) printf("%s\n\n", input->content.c_str()); - TSRange *ranges; + TSInputEdit edit = input->undo(); + ts_tree_edit(tree, &edit); + assert_correct_tree_size(tree, input->content); + if (debug_graphs_enabled) printf("%s\n\n", input->content.c_str()); + + TSTree *new_tree = ts_parser_parse(parser, tree, input->input()); + assert_correct_tree_size(new_tree, input->content); + uint32_t range_count; - ScopeSequence old_scope_sequence = build_scope_sequence(document, input->content); - ts_document_parse_and_get_changed_ranges(document, &ranges, &range_count); - assert_correct_tree_size(document, input->content); + TSRange *ranges = ts_tree_get_changed_ranges(tree, new_tree, &range_count); - ScopeSequence new_scope_sequence = build_scope_sequence(document, input->content); - verify_changed_ranges(old_scope_sequence, new_scope_sequence, - input->content, ranges, range_count); + ScopeSequence old_scope_sequence = build_scope_sequence(tree, input->content); + ScopeSequence new_scope_sequence = build_scope_sequence(new_tree, input->content); + verify_changed_ranges( + old_scope_sequence, new_scope_sequence, + input->content, ranges, range_count + ); ts_free(ranges); + + TSNode root_node = ts_tree_root_node(new_tree); + const char *node_string = ts_node_string(root_node); + string result(node_string); + ts_free((void *)node_string); + AssertThat(result, Equals(entry.tree_string)); + + ts_tree_delete(tree); + ts_tree_delete(new_tree); + delete input; }); } if (deletions.insert({edit_position, deletion_size}).second) { - string desription = to_string(edit_position) + "-" + to_string(edit_position + deletion_size); - - it_handles_edit_sequence("repairing a deletion of " + desription, [&]() { - ts_document_edit(document, input->replace(edit_position, deletion_size, "")); - ts_document_parse(document); - assert_correct_tree_size(document, input->content); + it(("parses " + entry.description + + ": repairing a deletion of " + + to_string(edit_position) + "-" + to_string(edit_position + deletion_size)).c_str(), [&]() { + input = new SpyInput(entry.input, 3); if (debug_graphs_enabled) printf("%s\n\n", input->content.c_str()); - ts_document_edit(document, input->undo()); - assert_correct_tree_size(document, input->content); + input->replace(edit_position, deletion_size, ""); + TSTree *tree = ts_parser_parse(parser, nullptr, input->input()); + assert_correct_tree_size(tree, input->content); if (debug_graphs_enabled) printf("%s\n\n", input->content.c_str()); - TSRange *ranges; + TSInputEdit edit = input->undo(); + ts_tree_edit(tree, &edit); + assert_correct_tree_size(tree, input->content); + if (debug_graphs_enabled) printf("%s\n\n", input->content.c_str()); + + TSTree *new_tree = ts_parser_parse(parser, tree, input->input()); + assert_correct_tree_size(new_tree, input->content); + uint32_t range_count; - ScopeSequence old_scope_sequence = build_scope_sequence(document, input->content); - ts_document_parse_and_get_changed_ranges(document, &ranges, &range_count); - assert_correct_tree_size(document, input->content); + TSRange *ranges = ts_tree_get_changed_ranges(tree, new_tree, &range_count); - ScopeSequence new_scope_sequence = build_scope_sequence(document, input->content); - verify_changed_ranges(old_scope_sequence, new_scope_sequence, - input->content, ranges, range_count); + ScopeSequence old_scope_sequence = build_scope_sequence(tree, input->content); + ScopeSequence new_scope_sequence = build_scope_sequence(new_tree, input->content); + verify_changed_ranges( + old_scope_sequence, new_scope_sequence, + input->content, ranges, range_count + ); ts_free(ranges); + + TSNode root_node = ts_tree_root_node(new_tree); + const char *node_string = ts_node_string(root_node); + string result(node_string); + ts_free((void *)node_string); + AssertThat(result, Equals(entry.tree_string)); + + ts_tree_delete(tree); + ts_tree_delete(new_tree); + delete input; }); } } diff --git a/test/integration/test_grammars.cc b/test/integration/test_grammars.cc index f5324579..bba40ace 100644 --- a/test/integration/test_grammars.cc +++ b/test/integration/test_grammars.cc @@ -52,26 +52,26 @@ for (auto &language_name : test_languages) { ); } - TSDocument *document = ts_document_new(); - ts_document_set_language(document, language); - ts_document_set_input_string_with_length(document, entry.input.c_str(), entry.input.size()); + TSParser *parser = ts_parser_new(); + ts_parser_set_language(parser, language); - // ts_document_print_debugging_graphs(document, true); if (getenv("TREE_SITTER_ENABLE_DEBUG_GRAPHS")) { - ts_document_print_debugging_graphs(document, true); + ts_parser_print_debugging_graphs(parser, true); } - ts_document_parse(document); + TSTree *tree = ts_parser_parse_string(parser, nullptr, entry.input.c_str(), entry.input.size()); - TSNode root_node = ts_document_root_node(document); + TSNode root_node = ts_tree_root_node(tree); AssertThat(ts_node_end_byte(root_node), Equals(entry.input.size())); assert_consistent_tree_sizes(root_node); + const char *node_string = ts_node_string(root_node); string result(node_string); ts_free((void *)node_string); - ts_document_free(document); - AssertThat(result, Equals(entry.tree_string)); + + ts_tree_delete(tree); + ts_parser_delete(parser); AssertThat(record_alloc::outstanding_allocation_indices(), IsEmpty()); }); } diff --git a/test/runtime/document_test.cc b/test/runtime/document_test.cc deleted file mode 100644 index 5f140251..00000000 --- a/test/runtime/document_test.cc +++ /dev/null @@ -1,490 +0,0 @@ -#include "test_helper.h" -#include "runtime/alloc.h" -#include "helpers/record_alloc.h" -#include "helpers/stream_methods.h" -#include "helpers/tree_helpers.h" -#include "helpers/point_helpers.h" -#include "helpers/spy_logger.h" -#include "helpers/stderr_logger.h" -#include "helpers/spy_input.h" -#include "helpers/load_language.h" - -TSPoint point(size_t row, size_t column) { - return TSPoint{static_cast(row), static_cast(column)}; -} - -START_TEST - -describe("Document", [&]() { - TSDocument *document; - TSNode root; - - before_each([&]() { - record_alloc::start(); - document = ts_document_new(); - - if (getenv("TREE_SITTER_ENABLE_DEBUG_GRAPHS")) { - ts_document_print_debugging_graphs(document, true); - } - }); - - after_each([&]() { - ts_document_free(document); - record_alloc::stop(); - AssertThat(record_alloc::outstanding_allocation_indices(), IsEmpty()); - }); - - auto assert_node_string_equals = [&](TSNode node, const string &expected) { - char *str = ts_node_string(node); - string actual(str); - ts_free(str); - AssertThat(actual, Equals(expected)); - }; - - describe("set_input(input)", [&]() { - SpyInput *spy_input; - - before_each([&]() { - spy_input = new SpyInput("{\"key\": [null, 2]}", 3); - - ts_document_set_language(document, load_real_language("json")); - ts_document_set_input_string(document, "{\"key\": [1, 2]}"); - ts_document_parse(document); - - root = ts_document_root_node(document); - assert_node_string_equals( - root, - "(value (object (pair (string) (array (number) (number)))))"); - }); - - after_each([&]() { - delete spy_input; - }); - - it("handles both UTF8 and UTF16 encodings", [&]() { - const char16_t content[] = u"[true, false]"; - spy_input->content = string((const char *)content, sizeof(content)); - spy_input->encoding = TSInputEncodingUTF16; - - ts_document_set_input(document, spy_input->input()); - ts_document_invalidate(document); - ts_document_parse(document); - - root = ts_document_root_node(document); - assert_node_string_equals( - root, - "(value (array (true) (false)))"); - }); - - it("handles truncated UTF16 data", [&]() { - const char content[1] = { '\0' }; - spy_input->content = string(content, sizeof(content)); - spy_input->encoding = TSInputEncodingUTF16; - - ts_document_set_input(document, spy_input->input()); - ts_document_invalidate(document); - ts_document_parse(document); - }); - - it("measures columns in bytes", [&]() { - const char16_t content[] = u"[true, false]"; - spy_input->content = string((const char *)content, sizeof(content)); - spy_input->encoding = TSInputEncodingUTF16; - TSInput input = spy_input->input(); - - ts_document_set_input(document, input); - ts_document_invalidate(document); - ts_document_parse(document); - root = ts_document_root_node(document); - AssertThat(ts_node_end_point(root), Equals({0, 28})); - }); - - it("allows the input to be retrieved later", [&]() { - ts_document_set_input(document, spy_input->input()); - AssertThat(ts_document_input(document).payload, Equals(spy_input)); - AssertThat(ts_document_input(document).read, Equals(spy_input->input().read)); - AssertThat(ts_document_input(document).seek, Equals(spy_input->input().seek)); - }); - - it("does not assume that the document's text has changed", [&]() { - ts_document_set_input(document, spy_input->input()); - AssertThat(ts_document_root_node(document), Equals(root)); - AssertThat(ts_node_has_changes(root), IsFalse()); - AssertThat(spy_input->strings_read(), IsEmpty()); - }); - - it("reads text from the new input for future parses", [&]() { - ts_document_set_input(document, spy_input->input()); - - // Insert 'null', delete '1'. - TSInputEdit edit = {}; - edit.start_point.column = edit.start_byte = strlen("{\"key\": ["); - edit.extent_added.column = edit.bytes_added = 4; - edit.extent_removed.column = edit.bytes_removed = 1; - - ts_document_edit(document, edit); - ts_document_parse(document); - - TSNode new_root = ts_document_root_node(document); - assert_node_string_equals( - new_root, - "(value (object (pair (string) (array (null) (number)))))"); - AssertThat(spy_input->strings_read(), Equals(vector({" [null, 2" }))); - }); - - it("allows setting input string with length", [&]() { - const char content[] = { '1' }; - ts_document_set_input_string_with_length(document, content, 1); - ts_document_parse(document); - TSNode new_root = ts_document_root_node(document); - AssertThat(ts_node_end_byte(new_root), Equals(1)); - assert_node_string_equals( - new_root, - "(value (number))"); - }); - - it("reads from the new input correctly when the old input was blank", [&]() { - ts_document_set_input_string(document, ""); - ts_document_parse(document); - TSNode new_root = ts_document_root_node(document); - AssertThat(ts_node_end_byte(new_root), Equals(0)); - assert_node_string_equals( - new_root, - "(ERROR)"); - - ts_document_set_input_string(document, "1"); - ts_document_parse(document); - new_root = ts_document_root_node(document); - AssertThat(ts_node_end_byte(new_root), Equals(1)); - assert_node_string_equals( - new_root, - "(value (number))"); - }); - }); - - describe("set_language(language)", [&]() { - before_each([&]() { - ts_document_set_input_string(document, "{\"key\": [1, 2]}\n"); - }); - - it("uses the given language for future parses", [&]() { - ts_document_set_language(document, load_real_language("json")); - ts_document_parse(document); - - root = ts_document_root_node(document); - assert_node_string_equals( - root, - "(value (object (pair (string) (array (number) (number)))))"); - }); - - it("clears out any previous tree", [&]() { - ts_document_set_language(document, load_real_language("json")); - ts_document_parse(document); - - ts_document_set_language(document, load_real_language("javascript")); - AssertThat(ts_document_root_node(document).subtree, Equals(nullptr)); - - ts_document_parse(document); - root = ts_document_root_node(document); - assert_node_string_equals( - root, - "(program (expression_statement " - "(object (pair (string) (array (number) (number))))))"); - }); - - it("does not allow setting a language with a different version number", [&]() { - TSLanguage language = *load_real_language("json"); - AssertThat(ts_language_version(&language), Equals(TREE_SITTER_LANGUAGE_VERSION)); - - language.version++; - AssertThat(ts_language_version(&language), !Equals(TREE_SITTER_LANGUAGE_VERSION)); - - ts_document_set_language(document, &language); - AssertThat(ts_document_language(document), Equals(nullptr)); - }); - }); - - describe("set_logger(TSLogger)", [&]() { - SpyLogger *logger; - - before_each([&]() { - logger = new SpyLogger(); - ts_document_set_language(document, load_real_language("json")); - ts_document_set_input_string(document, "[1, 2]"); - }); - - after_each([&]() { - delete logger; - }); - - it("calls the debugger with a message for each parse action", [&]() { - ts_document_set_logger(document, logger->logger()); - ts_document_parse(document); - - AssertThat(logger->messages, Contains("new_parse")); - AssertThat(logger->messages, Contains("skip character:' '")); - AssertThat(logger->messages, Contains("consume character:'['")); - AssertThat(logger->messages, Contains("consume character:'1'")); - AssertThat(logger->messages, Contains("reduce sym:array, child_count:4")); - AssertThat(logger->messages, Contains("accept")); - }); - - it("allows the debugger to be retrieved later", [&]() { - ts_document_set_logger(document, logger->logger()); - AssertThat(ts_document_logger(document).payload, Equals(logger)); - }); - - describe("disabling debugging", [&]() { - before_each([&]() { - ts_document_set_logger(document, logger->logger()); - ts_document_set_logger(document, {NULL, NULL}); - }); - - it("does not call the debugger any more", [&]() { - ts_document_parse(document); - AssertThat(logger->messages, IsEmpty()); - }); - }); - }); - - describe("parse_and_get_changed_ranges()", [&]() { - SpyInput *input; - - before_each([&]() { - ts_document_set_language(document, load_real_language("javascript")); - input = new SpyInput("{a: null};\n", 3); - ts_document_set_input(document, input->input()); - ts_document_parse(document); - assert_node_string_equals( - ts_document_root_node(document), - "(program (expression_statement (object (pair (property_identifier) (null)))))"); - }); - - after_each([&]() { - delete input; - }); - - auto get_invalidated_ranges_for_edit = [&](std::function callback) -> vector { - TSInputEdit edit = callback(); - ts_document_edit(document, edit); - - TSRange *ranges; - uint32_t range_count = 0; - ts_document_parse_and_get_changed_ranges(document, &ranges, &range_count); - - vector result; - for (size_t i = 0; i < range_count; i++) { - result.push_back(ranges[i]); - } - ts_free(ranges); - return result; - }; - - it("reports changes when one token has been updated", [&]() { - // Replace `null` with `nothing` - auto ranges = get_invalidated_ranges_for_edit([&]() { - return input->replace(input->content.find("ull"), 1, "othing"); - }); - - AssertThat(ranges, Equals(vector({ - TSRange{ - point(0, input->content.find("nothing")), - point(0, input->content.find("}")) - }, - }))); - - // Replace `nothing` with `null` again - ranges = get_invalidated_ranges_for_edit([&]() { - return input->undo(); - }); - - AssertThat(ranges, Equals(vector({ - TSRange{ - point(0, input->content.find("null")), - point(0, input->content.find("}")) - }, - }))); - }); - - it("reports no changes when leading whitespace has changed (regression)", [&]() { - input->chars_per_chunk = 80; - - // Insert leading whitespace - auto ranges = get_invalidated_ranges_for_edit([&]() { - return input->replace(0, 0, "\n"); - }); - assert_node_string_equals( - ts_document_root_node(document), - "(program (expression_statement (object (pair (property_identifier) (null)))))"); - AssertThat(ranges, Equals(vector({}))); - - // Remove leading whitespace - ranges = get_invalidated_ranges_for_edit([&]() { - return input->undo(); - }); - assert_node_string_equals( - ts_document_root_node(document), - "(program (expression_statement (object (pair (property_identifier) (null)))))"); - AssertThat(ranges, Equals(vector({}))); - - // Insert leading whitespace again - ranges = get_invalidated_ranges_for_edit([&]() { - return input->replace(0, 0, "\n"); - }); - assert_node_string_equals( - ts_document_root_node(document), - "(program (expression_statement (object (pair (property_identifier) (null)))))"); - AssertThat(ranges, Equals(vector({}))); - }); - - it("reports changes when tokens have been appended", [&]() { - // Add a second key-value pair - auto ranges = get_invalidated_ranges_for_edit([&]() { - return input->replace(input->content.find("}"), 0, ", b: false"); - }); - - AssertThat(ranges, Equals(vector({ - TSRange{ - point(0, input->content.find(",")), - point(0, input->content.find("}")) - }, - }))); - - // Add a third key-value pair in between the first two - ranges = get_invalidated_ranges_for_edit([&]() { - return input->replace(input->content.find(", b"), 0, ", c: 1"); - }); - - assert_node_string_equals( - ts_document_root_node(document), - "(program (expression_statement (object " - "(pair (property_identifier) (null)) " - "(pair (property_identifier) (number)) " - "(pair (property_identifier) (false)))))"); - - AssertThat(ranges, Equals(vector({ - TSRange{ - point(0, input->content.find(", c")), - point(0, input->content.find(", b")) - }, - }))); - - // Delete the middle pair. - ranges = get_invalidated_ranges_for_edit([&]() { - return input->undo(); - }); - - assert_node_string_equals( - ts_document_root_node(document), - "(program (expression_statement (object " - "(pair (property_identifier) (null)) " - "(pair (property_identifier) (false)))))"); - - AssertThat(ranges, IsEmpty()); - - // Delete the second pair. - ranges = get_invalidated_ranges_for_edit([&]() { - return input->undo(); - }); - - assert_node_string_equals( - ts_document_root_node(document), - "(program (expression_statement (object " - "(pair (property_identifier) (null)))))"); - - AssertThat(ranges, IsEmpty()); - }); - - it("reports changes when trees have been wrapped", [&]() { - // Wrap the object in an assignment expression. - auto ranges = get_invalidated_ranges_for_edit([&]() { - return input->replace(input->content.find("null"), 0, "b === "); - }); - - assert_node_string_equals( - ts_document_root_node(document), - "(program (expression_statement (object " - "(pair (property_identifier) (binary_expression (identifier) (null))))))"); - - AssertThat(ranges, Equals(vector({ - TSRange{ - point(0, input->content.find("b ===")), - point(0, input->content.find("}")) - }, - }))); - }); - }); - - describe("parse_with_options(options)", [&]() { - it("halts as soon as an error is found if the halt_on_error flag is set", [&]() { - string input_string = "[1, null, error, 3]"; - ts_document_set_language(document, load_real_language("json")); - ts_document_set_input_string(document, input_string.c_str()); - - TSParseOptions options = {}; - options.changed_ranges = nullptr; - - options.halt_on_error = false; - ts_document_parse_with_options(document, options); - root = ts_document_root_node(document); - assert_node_string_equals( - root, - "(value (array (number) (null) (ERROR (UNEXPECTED 'e')) (number)))"); - - ts_document_invalidate(document); - - options.halt_on_error = true; - ts_document_parse_with_options(document, options); - root = ts_document_root_node(document); - assert_node_string_equals( - root, - "(ERROR (number) (null))"); - - AssertThat(ts_node_end_byte(root), Equals(input_string.size())); - }); - - it("does not insert missing tokens if the halt_on_error flag is set", [&]() { - string input_string = "[1, null, 3"; - ts_document_set_language(document, load_real_language("json")); - ts_document_set_input_string(document, input_string.c_str()); - - TSParseOptions options = {}; - options.changed_ranges = nullptr; - - options.halt_on_error = false; - ts_document_parse_with_options(document, options); - root = ts_document_root_node(document); - assert_node_string_equals( - root, - "(value (array (number) (null) (number) (MISSING)))"); - - ts_document_invalidate(document); - - options.halt_on_error = true; - ts_document_parse_with_options(document, options); - root = ts_document_root_node(document); - assert_node_string_equals( - root, - "(ERROR (number) (null) (number))"); - - AssertThat(ts_node_end_byte(root), Equals(input_string.size())); - }); - - it("can parse valid code with the halt_on_error flag set", [&]() { - string input_string = "[1, null, 3]"; - ts_document_set_language(document, load_real_language("json")); - ts_document_set_input_string(document, input_string.c_str()); - - TSParseOptions options = {}; - options.changed_ranges = nullptr; - options.halt_on_error = true; - ts_document_parse_with_options(document, options); - root = ts_document_root_node(document); - assert_node_string_equals( - root, - "(value (array (number) (null) (number)))"); - }); - }); -}); - -END_TEST diff --git a/test/runtime/language_test.cc b/test/runtime/language_test.cc index 4726c4ba..747327c0 100644 --- a/test/runtime/language_test.cc +++ b/test/runtime/language_test.cc @@ -28,13 +28,12 @@ describe("Language", []() { } })JSON"); - TSDocument *document = ts_document_new(); + TSParser *parser = ts_parser_new(); const TSLanguage *language = load_test_language("aliased_rules", compile_result); - ts_document_set_language(document, language); - ts_document_set_input_string(document, "b"); - ts_document_parse(document); + ts_parser_set_language(parser, language); + TSTree *tree = ts_parser_parse_string(parser, nullptr, "b", 1); - TSNode root_node = ts_document_root_node(document); + TSNode root_node = ts_tree_root_node(tree); char *string = ts_node_string(root_node); AssertThat(string, Equals("(a (c))")); @@ -47,7 +46,8 @@ describe("Language", []() { AssertThat(ts_language_symbol_type(language, aliased_symbol), Equals(TSSymbolTypeRegular)); ts_free(string); - ts_document_free(document); + ts_parser_delete(parser); + ts_tree_delete(tree); }); }); }); diff --git a/test/runtime/node_test.cc b/test/runtime/node_test.cc index 808084be..4c306897 100644 --- a/test/runtime/node_test.cc +++ b/test/runtime/node_test.cc @@ -62,21 +62,22 @@ string grammar_with_aliases_and_extras = R"JSON({ })JSON"; describe("Node", [&]() { - TSDocument *document; + TSParser *parser; + TSTree *tree; TSNode root_node; before_each([&]() { record_alloc::start(); - document = ts_document_new(); - ts_document_set_language(document, load_real_language("json")); - ts_document_set_input_string(document, json_string.c_str()); - ts_document_parse(document); - root_node = ts_node_child(ts_document_root_node(document), 0); + parser = ts_parser_new(); + ts_parser_set_language(parser, load_real_language("json")); + tree = ts_parser_parse_string(parser, nullptr, json_string.c_str(), json_string.size()); + root_node = ts_node_child(ts_tree_root_node(tree), 0); }); after_each([&]() { - ts_document_free(document); + ts_parser_delete(parser); + ts_tree_delete(tree); record_alloc::stop(); AssertThat(record_alloc::outstanding_allocation_indices(), IsEmpty()); @@ -157,16 +158,17 @@ describe("Node", [&]() { AssertThat(ts_node_parent(number_node), Equals(root_node)); AssertThat(ts_node_parent(false_node), Equals(root_node)); AssertThat(ts_node_parent(object_node), Equals(root_node)); - AssertThat(ts_node_parent(ts_document_root_node(document)).subtree, Equals(nullptr)); + AssertThat(ts_node_parent(ts_tree_root_node(tree)).subtree, Equals(nullptr)); }); it("works correctly when the node contains aliased children and extras", [&]() { TSCompileResult compile_result = ts_compile_grammar(grammar_with_aliases_and_extras.c_str()); const TSLanguage *language = load_test_language("aliases_and_extras", compile_result); - ts_document_set_language(document, language); - ts_document_set_input_string(document, "b ... b ... b"); - ts_document_parse(document); - root_node = ts_document_root_node(document); + ts_parser_set_language(parser, language); + + ts_tree_delete(tree); + tree = ts_parser_parse_string(parser, nullptr, "b ... b ... b", 13); + root_node = ts_tree_root_node(tree); char *node_string = ts_node_string(root_node); AssertThat(node_string, Equals("(a (b) (comment) (B) (comment) (b))")); @@ -179,7 +181,10 @@ describe("Node", [&]() { AssertThat(ts_node_type(ts_node_named_child(root_node, 3)), Equals("comment")); AssertThat(ts_node_type(ts_node_named_child(root_node, 4)), Equals("b")); - AssertThat(ts_node_symbol(ts_node_named_child(root_node, 0)), !Equals(ts_node_symbol(ts_node_named_child(root_node, 2)))); + AssertThat( + ts_node_symbol(ts_node_named_child(root_node, 0)), + !Equals(ts_node_symbol(ts_node_named_child(root_node, 2))) + ); }); }); @@ -323,7 +328,7 @@ describe("Node", [&]() { AssertThat(ts_node_parent(child5), Equals(root_node)); AssertThat(ts_node_parent(child6), Equals(root_node)); AssertThat(ts_node_parent(child7), Equals(root_node)); - AssertThat(ts_node_parent(ts_document_root_node(document)).subtree, Equals(nullptr)); + AssertThat(ts_node_parent(ts_tree_root_node(tree)).subtree, Equals(nullptr)); }); }); @@ -483,9 +488,10 @@ describe("Node", [&]() { it("works in the presence of multi-byte characters", [&]() { string input_string = "[\"αβγδ\", \"αβγδ\"]"; - ts_document_set_input_string(document, input_string.c_str()); - ts_document_parse(document); - TSNode root_node = ts_document_root_node(document); + + ts_tree_delete(tree); + tree = ts_parser_parse_string(parser, nullptr, input_string.c_str(), input_string.size()); + TSNode root_node = ts_tree_root_node(tree); uint32_t comma_position = input_string.find(","); TSNode node1 = ts_node_descendant_for_byte_range(root_node, comma_position, comma_position); @@ -518,23 +524,23 @@ describe("Node", [&]() { }); describe("TreeCursor", [&]() { - TSDocument *document; + TSParser *parser; + TSTree *tree; TSTreeCursor *cursor; before_each([&]() { record_alloc::start(); - document = ts_document_new(); - ts_document_set_language(document, load_real_language("json")); - ts_document_set_input_string(document, json_string.c_str()); - ts_document_parse(document); - - cursor = ts_document_tree_cursor(document); + parser = ts_parser_new(); + ts_parser_set_language(parser, load_real_language("json")); + tree = ts_parser_parse_string(parser, nullptr, json_string.c_str(), json_string.size()); + cursor = ts_tree_cursor_new(tree); }); after_each([&]() { + ts_tree_delete(tree); ts_tree_cursor_delete(cursor); - ts_document_free(document); + ts_parser_delete(parser); record_alloc::stop(); AssertThat(record_alloc::outstanding_allocation_indices(), IsEmpty()); diff --git a/test/runtime/parser_test.cc b/test/runtime/parser_test.cc index 2cf70b69..e9cfbe72 100644 --- a/test/runtime/parser_test.cc +++ b/test/runtime/parser_test.cc @@ -1,17 +1,20 @@ #include "test_helper.h" #include "runtime/alloc.h" +#include "runtime/language.h" #include "helpers/record_alloc.h" #include "helpers/spy_input.h" #include "helpers/load_language.h" #include "helpers/record_alloc.h" #include "helpers/point_helpers.h" +#include "helpers/spy_logger.h" #include "helpers/stderr_logger.h" #include "helpers/dedent.h" START_TEST describe("Parser", [&]() { - TSDocument *document; + TSParser *parser; + TSTree *tree; SpyInput *input; TSNode root; size_t chunk_size; @@ -21,14 +24,16 @@ describe("Parser", [&]() { chunk_size = 3; input = nullptr; - document = ts_document_new(); + tree = nullptr; + parser = ts_parser_new(); if (getenv("TREE_SITTER_ENABLE_DEBUG_GRAPHS")) { - ts_document_print_debugging_graphs(document, true); + ts_parser_print_debugging_graphs(parser, true); } }); after_each([&]() { - if (document) ts_document_free(document); + if (parser) ts_parser_delete(parser); + if (tree) ts_tree_delete(tree); if (input) delete input; record_alloc::stop(); @@ -37,10 +42,8 @@ describe("Parser", [&]() { auto set_text = [&](string text) { input = new SpyInput(text, chunk_size); - ts_document_set_input(document, input->input()); - ts_document_parse(document); - - root = ts_document_root_node(document); + tree = ts_parser_parse(parser, nullptr, input->input()); + root = ts_tree_root_node(tree); AssertThat(ts_node_end_byte(root), Equals(text.size())); input->clear(); }; @@ -48,10 +51,13 @@ describe("Parser", [&]() { auto replace_text = [&](size_t position, size_t length, string new_text) { size_t prev_size = ts_node_end_byte(root); - ts_document_edit(document, input->replace(position, length, new_text)); - ts_document_parse(document); + TSInputEdit edit = input->replace(position, length, new_text); + ts_tree_edit(tree, &edit); + TSTree *new_tree = ts_parser_parse(parser, tree, input->input()); + ts_tree_delete(tree); + tree = new_tree; - root = ts_document_root_node(document); + root = ts_tree_root_node(tree); size_t new_size = ts_node_end_byte(root); AssertThat(new_size, Equals(prev_size - length + new_text.size())); }; @@ -65,12 +71,15 @@ describe("Parser", [&]() { }; auto undo = [&]() { - ts_document_edit(document, input->undo()); - ts_document_parse(document); + TSInputEdit edit = input->undo(); + ts_tree_edit(tree, &edit); + TSTree *new_tree = ts_parser_parse(parser, tree, input->input()); + ts_tree_delete(tree); + tree = new_tree; }; auto assert_root_node = [&](const string &expected) { - TSNode node = ts_document_root_node(document); + TSNode node = ts_tree_root_node(tree); char *node_string = ts_node_string(node); string actual(node_string); ts_free(node_string); @@ -86,11 +95,9 @@ describe("Parser", [&]() { describe("handling errors", [&]() { describe("when there is an invalid substring right before a valid token", [&]() { it("computes the error node's size and position correctly", [&]() { - ts_document_set_language(document, load_real_language("json")); + ts_parser_set_language(parser, load_real_language("json")); set_text(" [123, @@@@@, true]"); - - assert_root_node( - "(value (array (number) (ERROR (UNEXPECTED '@')) (true)))"); + assert_root_node("(value (array (number) (ERROR (UNEXPECTED '@')) (true)))"); TSNode error = ts_node_named_child(ts_node_child(root, 0), 1); AssertThat(ts_node_type(error), Equals("ERROR")); @@ -111,7 +118,7 @@ describe("Parser", [&]() { describe("when there is an unexpected string in the middle of a token", [&]() { it("computes the error node's size and position correctly", [&]() { - ts_document_set_language(document, load_real_language("json")); + ts_parser_set_language(parser, load_real_language("json")); set_text(" [123, faaaaalse, true]"); assert_root_node( @@ -138,11 +145,10 @@ describe("Parser", [&]() { describe("when there is one unexpected token between two valid tokens", [&]() { it("computes the error node's size and position correctly", [&]() { - ts_document_set_language(document, load_real_language("json")); + ts_parser_set_language(parser, load_real_language("json")); set_text(" [123, true false, true]"); - assert_root_node( - "(value (array (number) (true) (ERROR (false)) (true)))"); + assert_root_node("(value (array (number) (true) (ERROR (false)) (true)))"); TSNode error = ts_node_named_child(ts_node_child(root, 0), 2); AssertThat(ts_node_type(error), Equals("ERROR")); @@ -157,26 +163,23 @@ describe("Parser", [&]() { describe("when there is an unexpected string at the end of a token", [&]() { it("computes the error's size and position correctly", [&]() { - ts_document_set_language(document, load_real_language("json")); + ts_parser_set_language(parser, load_real_language("json")); set_text(" [123, \"hi\n, true]"); - - assert_root_node( - "(value (array (number) (ERROR (UNEXPECTED '\\n')) (true)))"); + assert_root_node("(value (array (number) (ERROR (UNEXPECTED '\\n')) (true)))"); }); }); describe("when there is an unterminated error", [&]() { it("maintains a consistent tree", [&]() { - ts_document_set_language(document, load_real_language("javascript")); + ts_parser_set_language(parser, load_real_language("javascript")); set_text("a; ' this string never ends"); - assert_root_node( - "(program (expression_statement (identifier)) (ERROR (UNEXPECTED EOF)))"); + assert_root_node("(program (expression_statement (identifier)) (ERROR (UNEXPECTED EOF)))"); }); }); describe("when there are extra tokens at the end of the viable prefix", [&]() { it("does not include them in the error node", [&]() { - ts_document_set_language(document, load_real_language("javascript")); + ts_parser_set_language(parser, load_real_language("javascript")); set_text( "var x;\n" "\n" @@ -196,20 +199,64 @@ describe("Parser", [&]() { char *string = (char *)malloc(1); string[0] = '\xdf'; - ts_document_set_language(document, load_real_language("json")); - ts_document_set_input_string_with_length(document, string, 1); - ts_document_parse(document); + ts_parser_set_language(parser, load_real_language("json")); + tree = ts_parser_parse_string(parser, nullptr, string, 1); free(string); - assert_root_node("(ERROR (UNEXPECTED INVALID))"); }); + + describe("when halt_on_error is set to true", [&]() { + it("halts as soon as an error is found if the halt_on_error flag is set", [&]() { + string input_string = "[1, null, error, 3]"; + ts_parser_set_language(parser, load_real_language("json")); + + tree = ts_parser_parse_string(parser, nullptr, input_string.c_str(), input_string.size()); + root = ts_tree_root_node(tree); + assert_root_node("(value (array (number) (null) (ERROR (UNEXPECTED 'e')) (number)))"); + + ts_parser_halt_on_error(parser, true); + + ts_tree_delete(tree); + tree = ts_parser_parse_string(parser, nullptr, input_string.c_str(), input_string.size()); + root = ts_tree_root_node(tree); + assert_root_node("(ERROR (number) (null))"); + AssertThat(ts_node_end_byte(root), Equals(input_string.size())); + }); + + it("does not insert missing tokens if the halt_on_error flag is set", [&]() { + string input_string = "[1, null, 3"; + ts_parser_set_language(parser, load_real_language("json")); + + tree = ts_parser_parse_string(parser, nullptr, input_string.c_str(), input_string.size()); + root = ts_tree_root_node(tree); + assert_root_node("(value (array (number) (null) (number) (MISSING)))"); + + ts_parser_halt_on_error(parser, true); + + ts_tree_delete(tree); + tree = ts_parser_parse_string(parser, nullptr, input_string.c_str(), input_string.size()); + root = ts_tree_root_node(tree); + assert_root_node("(ERROR (number) (null) (number))"); + AssertThat(ts_node_end_byte(root), Equals(input_string.size())); + }); + + it("can parse valid code with the halt_on_error flag set", [&]() { + string input_string = "[1, null, 3]"; + ts_parser_set_language(parser, load_real_language("json")); + + ts_parser_halt_on_error(parser, true); + tree = ts_parser_parse_string(parser, nullptr, input_string.c_str(), input_string.size()); + root = ts_tree_root_node(tree); + assert_root_node("(value (array (number) (null) (number)))"); + }); + }); }); describe("editing", [&]() { describe("creating new tokens near the end of the input", [&]() { it("updates the parse tree and re-reads only the changed portion of the text", [&]() { - ts_document_set_language(document, load_real_language("javascript")); + ts_parser_set_language(parser, load_real_language("javascript")); set_text("x * (100 + abc);"); assert_root_node( @@ -242,7 +289,7 @@ describe("Parser", [&]() { it("updates the parse tree and re-reads only the changed portion of the input", [&]() { chunk_size = 2; - ts_document_set_language(document, load_real_language("javascript")); + ts_parser_set_language(parser, load_real_language("javascript")); set_text("123 + 456 * (10 + x);"); assert_root_node( @@ -268,7 +315,7 @@ describe("Parser", [&]() { describe("introducing an error", [&]() { it("gives the error the right size", [&]() { - ts_document_set_language(document, load_real_language("javascript")); + ts_parser_set_language(parser, load_real_language("javascript")); set_text("var x = y;"); assert_root_node( @@ -291,7 +338,7 @@ describe("Parser", [&]() { describe("into the middle of an existing token", [&]() { it("updates the parse tree", [&]() { - ts_document_set_language(document, load_real_language("javascript")); + ts_parser_set_language(parser, load_real_language("javascript")); set_text("abc * 123;"); assert_root_node( @@ -310,7 +357,7 @@ describe("Parser", [&]() { describe("at the end of an existing token", [&]() { it("updates the parse tree", [&]() { - ts_document_set_language(document, load_real_language("javascript")); + ts_parser_set_language(parser, load_real_language("javascript")); set_text("abc * 123;"); assert_root_node( @@ -329,7 +376,7 @@ describe("Parser", [&]() { describe("inserting text into a node containing a extra token", [&]() { it("updates the parse tree", [&]() { - ts_document_set_language(document, load_real_language("javascript")); + ts_parser_set_language(parser, load_real_language("javascript")); set_text("123 *\n" "// a-comment\n" "abc;"); @@ -356,7 +403,7 @@ describe("Parser", [&]() { describe("when a critical token is removed", [&]() { it("updates the parse tree, creating an error", [&]() { - ts_document_set_language(document, load_real_language("javascript")); + ts_parser_set_language(parser, load_real_language("javascript")); set_text("123 * 456; 789 * 123;"); assert_root_node( @@ -376,7 +423,7 @@ describe("Parser", [&]() { describe("with external tokens", [&]() { it("maintains the external scanner's state during incremental parsing", [&]() { - ts_document_set_language(document, load_real_language("python")); + ts_parser_set_language(parser, load_real_language("python")); string text = dedent(R"PYTHON( if a: print b @@ -404,7 +451,7 @@ describe("Parser", [&]() { }); it("does not try to reuse nodes that are within the edited region", [&]() { - ts_document_set_language(document, load_real_language("javascript")); + ts_parser_set_language(parser, load_real_language("javascript")); set_text("{ x: (b.c) };"); assert_root_node( @@ -417,23 +464,12 @@ describe("Parser", [&]() { "(program (expression_statement (object (pair " "(property_identifier) (member_expression (identifier) (property_identifier))))))"); }); - - it("updates the document's parse count", [&]() { - ts_document_set_language(document, load_real_language("javascript")); - AssertThat(ts_document_parse_count(document), Equals(0)); - - set_text("{ x: (b.c) };"); - AssertThat(ts_document_parse_count(document), Equals(1)); - - insert_text(strlen("{ x"), "yz"); - AssertThat(ts_document_parse_count(document), Equals(2)); - }); }); describe("lexing", [&]() { describe("handling tokens containing wildcard patterns (e.g. comments)", [&]() { - it("terminates them at the end of the document", [&]() { - ts_document_set_language(document, load_real_language("javascript")); + it("terminates them at the end of the string", [&]() { + ts_parser_set_language(parser, load_real_language("javascript")); set_text("x; // this is a comment"); assert_root_node( @@ -448,7 +484,7 @@ describe("Parser", [&]() { it("recognizes UTF8 characters as single characters", [&]() { // 'ΩΩΩ — ΔΔ'; - ts_document_set_language(document, load_real_language("javascript")); + ts_parser_set_language(parser, load_real_language("javascript")); set_text("'\u03A9\u03A9\u03A9 \u2014 \u0394\u0394';"); assert_root_node( @@ -460,14 +496,120 @@ describe("Parser", [&]() { it("handles non-UTF8 characters", [&]() { const char *string = "cons\xeb\x00e=ls\x83l6hi');\x0a"; - ts_document_set_language(document, load_real_language("javascript")); - ts_document_set_input_string(document, string); - ts_document_parse(document); - - TSNode root = ts_document_root_node(document); + ts_parser_set_language(parser, load_real_language("javascript")); + tree = ts_parser_parse_string(parser, nullptr, string, strlen(string)); + TSNode root = ts_tree_root_node(tree); AssertThat(ts_node_end_byte(root), Equals(strlen(string))); }); }); + + describe("handling TSInputs", [&]() { + SpyInput *spy_input; + + before_each([&]() { + spy_input = new SpyInput("{\"key\": [null, 2]}", 3); + ts_parser_set_language(parser, load_real_language("json")); + }); + + after_each([&]() { + delete spy_input; + }); + + it("handles UTF16 encodings", [&]() { + const char16_t content[] = u"[true, false]"; + spy_input->content = string((const char *)content, sizeof(content)); + spy_input->encoding = TSInputEncodingUTF16; + + tree = ts_parser_parse(parser, nullptr, spy_input->input()); + root = ts_tree_root_node(tree); + assert_root_node( + "(value (array (true) (false)))"); + }); + + it("handles truncated UTF16 data", [&]() { + const char content[1] = { '\0' }; + spy_input->content = string(content, sizeof(content)); + spy_input->encoding = TSInputEncodingUTF16; + + tree = ts_parser_parse(parser, nullptr, spy_input->input()); + }); + + it("measures columns in bytes", [&]() { + const char16_t content[] = u"[true, false]"; + spy_input->content = string((const char *)content, sizeof(content)); + spy_input->encoding = TSInputEncodingUTF16; + + tree = ts_parser_parse(parser, nullptr, spy_input->input()); + root = ts_tree_root_node(tree); + AssertThat(ts_node_end_point(root), Equals({0, 28})); + }); + }); + + describe("set_language(language)", [&]() { + string input_string = "{\"key\": [1, 2]}\n"; + + it("uses the given language for future parses", [&]() { + ts_parser_set_language(parser, load_real_language("json")); + tree = ts_parser_parse_string(parser, nullptr, input_string.c_str(), input_string.size()); + + root = ts_tree_root_node(tree); + assert_root_node( + "(value (object (pair (string) (array (number) (number)))))"); + }); + + it("does not allow setting a language with a different version number", [&]() { + TSLanguage language = *load_real_language("json"); + AssertThat(ts_language_version(&language), Equals(TREE_SITTER_LANGUAGE_VERSION)); + + language.version++; + AssertThat(ts_language_version(&language), !Equals(TREE_SITTER_LANGUAGE_VERSION)); + + AssertThat(ts_parser_set_language(parser, &language), IsFalse()); + AssertThat(ts_parser_language(parser), Equals(nullptr)); + }); + }); + + describe("set_logger(TSLogger)", [&]() { + SpyLogger *logger; + + before_each([&]() { + logger = new SpyLogger(); + ts_parser_set_language(parser, load_real_language("json")); + }); + + after_each([&]() { + delete logger; + }); + + it("calls the debugger with a message for each parse action", [&]() { + ts_parser_set_logger(parser, logger->logger()); + tree = ts_parser_parse_string(parser, nullptr, "[ 1, 2, 3 ]", 11); + + AssertThat(logger->messages, Contains("new_parse")); + AssertThat(logger->messages, Contains("skip character:' '")); + AssertThat(logger->messages, Contains("consume character:'['")); + AssertThat(logger->messages, Contains("consume character:'1'")); + AssertThat(logger->messages, Contains("reduce sym:array, child_count:4")); + AssertThat(logger->messages, Contains("accept")); + }); + + it("allows the debugger to be retrieved later", [&]() { + ts_parser_set_logger(parser, logger->logger()); + AssertThat(ts_parser_logger(parser).payload, Equals(logger)); + }); + + describe("disabling debugging", [&]() { + before_each([&]() { + ts_parser_set_logger(parser, logger->logger()); + ts_parser_set_logger(parser, {NULL, NULL}); + }); + + it("does not call the debugger any more", [&]() { + tree = ts_parser_parse_string(parser, nullptr, "{}", 2); + AssertThat(logger->messages, IsEmpty()); + }); + }); + }); }); END_TEST diff --git a/test/runtime/stack_test.cc b/test/runtime/stack_test.cc index 99a979d1..ae5b1589 100644 --- a/test/runtime/stack_test.cc +++ b/test/runtime/stack_test.cc @@ -76,7 +76,7 @@ describe("Stack", [&]() { before_each([&]() { record_alloc::start(); - ts_subtree_pool_init(&pool); + pool = ts_subtree_pool_new(10); stack = ts_stack_new(&pool); TSLanguage dummy_language; diff --git a/test/runtime/subtree_test.cc b/test/runtime/subtree_test.cc index d56be01d..093e68db 100644 --- a/test/runtime/subtree_test.cc +++ b/test/runtime/subtree_test.cc @@ -41,7 +41,7 @@ describe("Subtree", []() { SubtreePool pool; before_each([&]() { - ts_subtree_pool_init(&pool); + pool = ts_subtree_pool_new(10); }); after_each([&]() { diff --git a/test/runtime/tree_test.cc b/test/runtime/tree_test.cc new file mode 100644 index 00000000..a87c754b --- /dev/null +++ b/test/runtime/tree_test.cc @@ -0,0 +1,200 @@ +#include "test_helper.h" +#include "runtime/alloc.h" +#include "helpers/record_alloc.h" +#include "helpers/stream_methods.h" +#include "helpers/tree_helpers.h" +#include "helpers/point_helpers.h" +#include "helpers/spy_logger.h" +#include "helpers/stderr_logger.h" +#include "helpers/spy_input.h" +#include "helpers/load_language.h" + +TSPoint point(uint32_t row, uint32_t column) { + TSPoint result = {row, column}; + return result; +} + +START_TEST + +describe("Tree", [&]() { + TSParser *parser; + SpyInput *input; + TSTree *tree; + + before_each([&]() { + parser = ts_parser_new(); + }); + + after_each([&]() { + ts_parser_delete(parser); + }); + + auto assert_root_node = [&](const string &expected) { + TSNode node = ts_tree_root_node(tree); + char *node_string = ts_node_string(node); + string actual(node_string); + ts_free(node_string); + AssertThat(actual, Equals(expected)); + }; + + describe("get_changed_ranges()", [&]() { + before_each([&]() { + ts_parser_set_language(parser, load_real_language("javascript")); + input = new SpyInput("{a: null};\n", 3); + tree = ts_parser_parse(parser, nullptr, input->input()); + + assert_root_node( + "(program (expression_statement (object (pair (property_identifier) (null)))))" + ); + }); + + after_each([&]() { + ts_tree_delete(tree); + delete input; + }); + + auto get_changed_ranges_for_edit = [&](function fn) -> vector { + TSInputEdit edit = fn(); + ts_tree_edit(tree, &edit); + + uint32_t range_count = 0; + TSTree *new_tree = ts_parser_parse(parser, tree, input->input()); + TSRange *ranges = ts_tree_get_changed_ranges(tree, new_tree, &range_count); + ts_tree_delete(tree); + tree = new_tree; + + vector result; + for (size_t i = 0; i < range_count; i++) { + result.push_back(ranges[i]); + } + + ts_free(ranges); + return result; + }; + + it("reports changes when one token has been updated", [&]() { + // Replace `null` with `nothing` + auto ranges = get_changed_ranges_for_edit([&]() { + return input->replace(input->content.find("ull"), 1, "othing"); + }); + AssertThat(ranges, Equals(vector({ + TSRange{ + point(0, input->content.find("nothing")), + point(0, input->content.find("}")) + }, + }))); + + // Replace `nothing` with `null` again + ranges = get_changed_ranges_for_edit([&]() { + return input->undo(); + }); + AssertThat(ranges, Equals(vector({ + TSRange{ + point(0, input->content.find("null")), + point(0, input->content.find("}")) + }, + }))); + }); + + it("reports no changes when leading whitespace has changed (regression)", [&]() { + input->chars_per_chunk = 80; + + // Insert leading whitespace + auto ranges = get_changed_ranges_for_edit([&]() { + return input->replace(0, 0, "\n"); + }); + assert_root_node( + "(program (expression_statement (object (pair (property_identifier) (null)))))" + ); + AssertThat(ranges, IsEmpty()); + + // Remove leading whitespace + ranges = get_changed_ranges_for_edit([&]() { + return input->undo(); + }); + assert_root_node( + "(program (expression_statement (object (pair (property_identifier) (null)))))" + ); + AssertThat(ranges, IsEmpty()); + + // Insert leading whitespace again + ranges = get_changed_ranges_for_edit([&]() { + return input->replace(0, 0, "\n"); + }); + assert_root_node( + "(program (expression_statement (object (pair (property_identifier) (null)))))" + ); + AssertThat(ranges, IsEmpty()); + }); + + it("reports changes when tokens have been appended", [&]() { + // Add a second key-value pair + auto ranges = get_changed_ranges_for_edit([&]() { + return input->replace(input->content.find("}"), 0, ", b: false"); + }); + AssertThat(ranges, Equals(vector({ + TSRange{ + point(0, input->content.find(",")), + point(0, input->content.find("}")) + }, + }))); + + // Add a third key-value pair in between the first two + ranges = get_changed_ranges_for_edit([&]() { + return input->replace(input->content.find(", b"), 0, ", c: 1"); + }); + assert_root_node( + "(program (expression_statement (object " + "(pair (property_identifier) (null)) " + "(pair (property_identifier) (number)) " + "(pair (property_identifier) (false)))))" + ); + AssertThat(ranges, Equals(vector({ + TSRange{ + point(0, input->content.find(", c")), + point(0, input->content.find(", b")) + }, + }))); + + // Delete the middle pair. + ranges = get_changed_ranges_for_edit([&]() { + return input->undo(); + }); + assert_root_node( + "(program (expression_statement (object " + "(pair (property_identifier) (null)) " + "(pair (property_identifier) (false)))))" + ); + AssertThat(ranges, IsEmpty()); + + // Delete the second pair. + ranges = get_changed_ranges_for_edit([&]() { + return input->undo(); + }); + assert_root_node( + "(program (expression_statement (object " + "(pair (property_identifier) (null)))))" + ); + AssertThat(ranges, IsEmpty()); + }); + + it("reports changes when trees have been wrapped", [&]() { + // Wrap the object in an assignment expression. + auto ranges = get_changed_ranges_for_edit([&]() { + return input->replace(input->content.find("null"), 0, "b === "); + }); + assert_root_node( + "(program (expression_statement (object " + "(pair (property_identifier) (binary_expression (identifier) (null))))))" + ); + AssertThat(ranges, Equals(vector({ + TSRange{ + point(0, input->content.find("b ===")), + point(0, input->content.find("}")) + }, + }))); + }); + }); +}); + +END_TEST diff --git a/tests.gyp b/tests.gyp index b964a507..ab0de485 100644 --- a/tests.gyp +++ b/tests.gyp @@ -66,12 +66,12 @@ 'test/integration/fuzzing-examples.cc', 'test/integration/real_grammars.cc', 'test/integration/test_grammars.cc', - 'test/runtime/document_test.cc', 'test/runtime/language_test.cc', 'test/runtime/node_test.cc', 'test/runtime/parser_test.cc', 'test/runtime/stack_test.cc', 'test/runtime/subtree_test.cc', + 'test/runtime/tree_test.cc', 'test/tests.cc', ], 'cflags': [