From 00528e50ce1ca92a3c05df90aeee6d96f9171864 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 13 Sep 2016 13:08:52 -0700 Subject: [PATCH] Change edit API to be byte-based --- include/tree_sitter/runtime.h | 15 +++--- spec/helpers/spy_input.cc | 62 +++++++++++++++++------ spec/helpers/spy_input.h | 6 +-- spec/runtime/document_spec.cc | 21 +++++++- spec/runtime/tree_spec.cc | 83 ++++++++++++++++++++++-------- src/runtime/document.c | 49 +++++++++--------- src/runtime/length.h | 29 ++++++----- src/runtime/parser.c | 20 ++++---- src/runtime/parser.h | 4 +- src/runtime/tree.c | 95 ++++++++++++++++++++++------------- src/runtime/tree.h | 10 +++- 11 files changed, 261 insertions(+), 133 deletions(-) diff --git a/include/tree_sitter/runtime.h b/include/tree_sitter/runtime.h index be00a69e..71c3a8f4 100644 --- a/include/tree_sitter/runtime.h +++ b/include/tree_sitter/runtime.h @@ -34,17 +34,20 @@ typedef struct { void (*log)(void *payload, TSLogType, const char *); } TSLogger; -typedef struct { - size_t position; - size_t chars_inserted; - size_t chars_removed; -} TSInputEdit; - typedef struct { size_t row; size_t column; } TSPoint; +typedef struct { + size_t start_byte; + size_t bytes_removed; + size_t bytes_added; + TSPoint start_point; + TSPoint extent_removed; + TSPoint extent_added; +} TSInputEdit; + typedef struct { TSPoint start; TSPoint end; diff --git a/spec/helpers/spy_input.cc b/spec/helpers/spy_input.cc index d5d4bf96..c56ab67e 100644 --- a/spec/helpers/spy_input.cc +++ b/spec/helpers/spy_input.cc @@ -4,6 +4,7 @@ #include #include +using std::pair; using std::string; static const size_t UTF8_MAX_CHAR_SIZE = 4; @@ -68,34 +69,63 @@ TSInput SpyInput::input() { return result; } -TSInputEdit SpyInput::replace(size_t start_char, size_t chars_removed, string text) { - string text_removed = swap_substr(start_char, chars_removed, text); - size_t chars_inserted = string_char_count(encoding, text); - undo_stack.push_back(SpyInputEdit{start_char, chars_inserted, text_removed}); - return {start_char, chars_inserted, chars_removed}; +static TSPoint get_extent(string text) { + TSPoint result = {0, 0}; + for (auto i = text.begin(); i != text.end(); i++) { + if (*i == '\n') { + result.row++; + result.column = 0; + } else { + result.column++; + } + } + return result; +} + +TSInputEdit SpyInput::replace(size_t start_byte, size_t bytes_removed, string text) { + auto swap = swap_substr(start_byte, bytes_removed, text); + size_t bytes_added = text.size(); + undo_stack.push_back(SpyInputEdit{start_byte, bytes_added, swap.first}); + TSInputEdit result = {}; + result.start_byte = start_byte; + result.bytes_added = bytes_added; + result.bytes_removed = bytes_removed; + result.start_point = swap.second; + result.extent_removed = get_extent(swap.first); + result.extent_added = get_extent(text); + return result; } TSInputEdit SpyInput::undo() { SpyInputEdit entry = undo_stack.back(); undo_stack.pop_back(); - swap_substr(entry.position, entry.chars_removed, entry.text_inserted); - size_t chars_inserted = string_char_count(encoding, entry.text_inserted); - return TSInputEdit{entry.position, chars_inserted, entry.chars_removed}; + auto swap = swap_substr(entry.start_byte, entry.bytes_removed, entry.text_inserted); + TSInputEdit result; + result.start_byte = entry.start_byte; + result.bytes_removed = entry.bytes_removed; + result.bytes_added = entry.text_inserted.size(); + result.start_point = swap.second; + result.extent_removed = get_extent(swap.first); + result.extent_added = get_extent(entry.text_inserted); + return result; } -string SpyInput::swap_substr(size_t start_char, size_t chars_removed, string text) { - long start_byte = string_byte_for_character(encoding, content, 0, start_char); - assert(start_byte >= 0); - - long bytes_removed = string_byte_for_character(encoding, content, start_byte, chars_removed); - if (bytes_removed < 0) - bytes_removed = content.size() - start_byte; +pair SpyInput::swap_substr(size_t start_byte, size_t bytes_removed, string text) { + TSPoint start_position = {0, 0}; + for (auto i = content.begin(), n = content.begin() + start_byte; i < n; i++) { + if (*i == '\n') { + start_position.row++; + start_position.column = 0; + } else { + start_position.column++; + } + } string text_removed = content.substr(start_byte, bytes_removed); content.erase(start_byte, bytes_removed); content.insert(start_byte, text); - return text_removed; + return {text_removed, start_position}; } void SpyInput::clear() { diff --git a/spec/helpers/spy_input.h b/spec/helpers/spy_input.h index 24caee6a..a91fc165 100644 --- a/spec/helpers/spy_input.h +++ b/spec/helpers/spy_input.h @@ -6,8 +6,8 @@ #include "tree_sitter/runtime.h" struct SpyInputEdit { - size_t position; - size_t chars_removed; + size_t start_byte; + size_t bytes_removed; std::string text_inserted; }; @@ -20,7 +20,7 @@ class SpyInput { static const char * read(void *, size_t *); static int seek(void *, size_t, size_t); - std::string swap_substr(size_t, size_t, std::string); + std::pair swap_substr(size_t, size_t, std::string); public: SpyInput(std::string content, size_t chars_per_chunk); diff --git a/spec/runtime/document_spec.cc b/spec/runtime/document_spec.cc index aa5ad6bb..c36c5aa3 100644 --- a/spec/runtime/document_spec.cc +++ b/spec/runtime/document_spec.cc @@ -67,6 +67,18 @@ describe("Document", [&]() { "(array (true) (false))"); }); + it("allows columns to be measured in either bytes or characters", [&]() { + const char16_t content[] = u"[true, false]"; + spy_input->content = string((const char *)content, sizeof(content)); + spy_input->encoding = TSInputEncodingUTF16; + // spy_input->measure_columns_in_bytes + + ts_document_set_input(doc, spy_input->input()); + ts_document_invalidate(doc); + ts_document_parse(doc); + TSNode root_node = ts_document_root_node(doc); + }); + it("allows the input to be retrieved later", [&]() { ts_document_set_input(doc, spy_input->input()); AssertThat(ts_document_input(doc).payload, Equals(spy_input)); @@ -85,7 +97,12 @@ describe("Document", [&]() { ts_document_set_input(doc, spy_input->input()); // Insert 'null', delete '1'. - ts_document_edit(doc, {strlen("{\"key\": ["), 4, 1}); + TSInputEdit edit = {}; + edit.start_point.column = edit.start_byte = strlen("{\"key\": ["); + edit.extent_added.column = edit.bytes_added = 4; + edit.extent_removed.column = edit.bytes_removed = 1; + + ts_document_edit(doc, edit); ts_document_parse(doc); TSNode new_root = ts_document_root_node(doc); @@ -194,7 +211,7 @@ describe("Document", [&]() { }); }); - describe("parse_and_get_changed_ranges()", [&]() { + describe("parse_and_get_changed_ranges()", [&]() { SpyInput *input; before_each([&]() { diff --git a/spec/runtime/tree_spec.cc b/spec/runtime/tree_spec.cc index f076c44b..3c209b37 100644 --- a/spec/runtime/tree_spec.cc +++ b/spec/runtime/tree_spec.cc @@ -183,16 +183,23 @@ describe("Tree", []() { describe("edits within a tree's padding", [&]() { it("resizes the padding of the tree and its leftmost descendants", [&]() { - ts_tree_edit(tree, {1, 1, 0}); - + TSInputEdit edit = { + .start_byte = 1, + .bytes_removed = 0, + .bytes_added = 1, + .start_point = {0, 1}, + .extent_removed = {0, 0}, + .extent_added = {0, 1}, + }; + ts_tree_edit(tree, &edit); assert_consistent(tree); AssertThat(tree->has_changes, IsTrue()); - AssertThat(tree->padding, Equals({0, 3, {0, 0}})); + AssertThat(tree->padding, Equals({3, 0, {0, 3}})); AssertThat(tree->size, Equals({13, 13, {0, 13}})); AssertThat(tree->children[0]->has_changes, IsTrue()); - AssertThat(tree->children[0]->padding, Equals({0, 3, {0, 0}})); + AssertThat(tree->children[0]->padding, Equals({3, 0, {0, 3}})); AssertThat(tree->children[0]->size, Equals({3, 3, {0, 3}})); AssertThat(tree->children[1]->has_changes, IsFalse()); @@ -203,32 +210,48 @@ describe("Tree", []() { describe("edits that start in a tree's padding but extend into its content", [&]() { it("shrinks the content to compensate for the expanded padding", [&]() { - ts_tree_edit(tree, {1, 4, 3}); - + TSInputEdit edit = { + .start_byte = 1, + .bytes_removed = 3, + .bytes_added = 4, + .start_point = {0, 1}, + .extent_removed = {0, 3}, + .extent_added = {0, 4}, + }; + ts_tree_edit(tree, &edit); assert_consistent(tree); AssertThat(tree->has_changes, IsTrue()); - AssertThat(tree->padding, Equals({0, 5, {0, 0}})); - AssertThat(tree->size, Equals({0, 11, {0, 0}})); + AssertThat(tree->padding, Equals({5, 0, {0, 5}})); + AssertThat(tree->size, Equals({11, 0, {0, 11}})); AssertThat(tree->children[0]->has_changes, IsTrue()); - AssertThat(tree->children[0]->padding, Equals({0, 5, {0, 0}})); - AssertThat(tree->children[0]->size, Equals({0, 1, {0, 0}})); + AssertThat(tree->children[0]->padding, Equals({5, 0, {0, 5}})); + AssertThat(tree->children[0]->size, Equals({1, 0, {0, 1}})); }); }); describe("insertions at the edge of a tree's padding", [&]() { it("expands the tree's padding", [&]() { - ts_tree_edit(tree, {2, 2, 0}); + TSInputEdit edit = { + .start_byte = 2, + .bytes_removed = 0, + .bytes_added = 2, + .start_point = {0, 2}, + .extent_removed = {0, 0}, + .extent_added = {0, 2}, + }; + ts_tree_edit(tree, &edit); + assert_consistent(tree); assert_consistent(tree); AssertThat(tree->has_changes, IsTrue()); - AssertThat(tree->padding, Equals({0, 4, {0, 0}})); + AssertThat(tree->padding, Equals({4, 0, {0, 4}})); AssertThat(tree->size, Equals({13, 13, {0, 13}})); AssertThat(tree->children[0]->has_changes, IsTrue()); - AssertThat(tree->children[0]->padding, Equals({0, 4, {0, 0}})); + AssertThat(tree->children[0]->padding, Equals({4, 0, {0, 4}})); AssertThat(tree->children[0]->size, Equals({3, 3, {0, 3}})); AssertThat(tree->children[1]->has_changes, IsFalse()); @@ -237,17 +260,24 @@ describe("Tree", []() { describe("replacements starting at the edge of a tree's padding", [&]() { it("resizes the content and not the padding", [&]() { - ts_tree_edit(tree, {2, 5, 2}); - + TSInputEdit edit = { + .start_byte = 2, + .bytes_removed = 2, + .bytes_added = 5, + .start_point = {0, 2}, + .extent_removed = {0, 2}, + .extent_added = {0, 5}, + }; + ts_tree_edit(tree, &edit); assert_consistent(tree); AssertThat(tree->has_changes, IsTrue()); AssertThat(tree->padding, Equals({2, 2, {0, 2}})); - AssertThat(tree->size, Equals({0, 16, {0, 0}})); + AssertThat(tree->size, Equals({16, 0, {0, 16}})); AssertThat(tree->children[0]->has_changes, IsTrue()); AssertThat(tree->children[0]->padding, Equals({2, 2, {0, 2}})); - AssertThat(tree->children[0]->size, Equals({0, 6, {0, 0}})); + AssertThat(tree->children[0]->size, Equals({6, 0, {0, 6}})); AssertThat(tree->children[1]->has_changes, IsFalse()); }); @@ -255,16 +285,25 @@ describe("Tree", []() { describe("deletions that span more than one child node", [&]() { it("shrinks subsequent child nodes", [&]() { - ts_tree_edit(tree, {1, 3, 10}); + TSInputEdit edit = { + .start_byte = 1, + .bytes_removed = 10, + .bytes_added = 3, + .start_point = {0, 1}, + .extent_removed = {0, 10}, + .extent_added = {0, 3}, + }; + ts_tree_edit(tree, &edit); + assert_consistent(tree); assert_consistent(tree); AssertThat(tree->has_changes, IsTrue()); - AssertThat(tree->padding, Equals({0, 4, {0, 0}})); - AssertThat(tree->size, Equals({0, 4, {0, 0}})); + AssertThat(tree->padding, Equals({4, 0, {0, 4}})); + AssertThat(tree->size, Equals({4, 0, {0, 4}})); AssertThat(tree->children[0]->has_changes, IsTrue()); - AssertThat(tree->children[0]->padding, Equals({0, 4, {0, 0}})); + AssertThat(tree->children[0]->padding, Equals({4, 0, {0, 4}})); AssertThat(tree->children[0]->size, Equals({0, 0, {0, 0}})); AssertThat(tree->children[1]->has_changes, IsTrue()); @@ -272,7 +311,7 @@ describe("Tree", []() { AssertThat(tree->children[1]->size, Equals({0, 0, {0, 0}})); AssertThat(tree->children[2]->has_changes, IsTrue()); - AssertThat(tree->children[2]->padding, Equals({0, 1, {0, 0}})); + AssertThat(tree->children[2]->padding, Equals({1, 0, {0, 1}})); AssertThat(tree->children[2]->size, Equals({3, 3, {0, 3}})); }); }); diff --git a/src/runtime/document.c b/src/runtime/document.c index 3d90a482..edb31ca4 100644 --- a/src/runtime/document.c +++ b/src/runtime/document.c @@ -80,13 +80,13 @@ void ts_document_edit(TSDocument *self, TSInputEdit edit) { if (!self->tree) return; - size_t max_chars = ts_tree_total_chars(self->tree); - if (edit.position > max_chars) - edit.position = max_chars; - if (edit.chars_removed > max_chars - edit.position) - edit.chars_removed = max_chars - edit.position; + size_t max_bytes = ts_tree_total_bytes(self->tree); + if (edit.start_byte > max_bytes) + edit.start_byte = max_bytes; + if (edit.bytes_removed > max_bytes - edit.start_byte) + edit.bytes_removed = max_bytes - edit.start_byte; - ts_tree_edit(self->tree, edit); + ts_tree_edit(self->tree, &edit); } typedef Array(TSRange) RangeArray; @@ -107,18 +107,19 @@ static bool push_diff(RangeArray *results, TSNode *node, bool *extend_last_chang return array_push(results, ((TSRange){start, end})); } -static bool ts_tree_diff(TSDocument *doc, TSTree *old, TSNode *new_node, - size_t depth, RangeArray *results, bool *extend_last_change) { +static bool ts_tree_get_changes(TSDocument *doc, TSTree *old, TSNode *new_node, + size_t depth, RangeArray *results, + bool *extend_last_change) { TSTree *new = (TSTree *)(new_node->data); PRINT("At %lu, ('%s', %lu) vs ('%s', %lu) {", - ts_node_start_char(*new_node), - NAME(old), old->size.chars, - NAME(new), new->size.chars); + ts_node_start_byte(*new_node), + NAME(old), old->size.bytes, + NAME(new), new->size.bytes); if (old->visible) { if (old == new || (old->symbol == new->symbol && - old->size.chars == new->size.chars && !old->has_changes)) { + old->size.bytes == new->size.bytes && !old->has_changes)) { *extend_last_change = false; PRINT("}", NULL); return true; @@ -140,21 +141,21 @@ static bool ts_tree_diff(TSDocument *doc, TSTree *old, TSNode *new_node, depth++; size_t old_child_start; - size_t old_child_end = ts_node_start_char(*new_node) - old->padding.chars; + size_t old_child_end = ts_node_start_byte(*new_node) - old->padding.bytes; for (size_t j = 0; j < old->child_count; j++) { TSTree *old_child = old->children[j]; - if (old_child->padding.chars == 0 && old_child->size.chars == 0) + if (old_child->padding.bytes == 0 && old_child->size.bytes == 0) continue; - old_child_start = old_child_end + old_child->padding.chars; - old_child_end = old_child_start + old_child->size.chars; + old_child_start = old_child_end + old_child->padding.bytes; + old_child_end = old_child_start + old_child->size.bytes; while (true) { - size_t new_child_start = ts_node_start_char(*new_node); + size_t new_child_start = ts_node_start_byte(*new_node); if (new_child_start < old_child_start) { PRINT("skip new:('%s', %lu), old:('%s', %lu), old_parent:%s", - NAME(new_node->data), ts_node_start_char(*new_node), NAME(old_child), + NAME(new_node->data), ts_node_start_byte(*new_node), NAME(old_child), old_child_start, NAME(old)); if (!push_diff(results, new_node, extend_last_change)) @@ -163,23 +164,23 @@ static bool ts_tree_diff(TSDocument *doc, TSTree *old, TSNode *new_node, TSNode next = ts_node_next_sibling(*new_node); if (next.data) { PRINT("advance before diff ('%s', %lu) -> ('%s', %lu)", - NAME(new_node->data), ts_node_start_char(*new_node), NAME(next.data), - ts_node_start_char(next)); + NAME(new_node->data), ts_node_start_byte(*new_node), NAME(next.data), + ts_node_start_byte(next)); *new_node = next; } else { break; } } else if (new_child_start == old_child_start) { - if (!ts_tree_diff(doc, old_child, new_node, depth, results, extend_last_change)) + if (!ts_tree_get_changes(doc, old_child, new_node, depth, results, extend_last_change)) return false; if (old_child->visible) { TSNode next = ts_node_next_sibling(*new_node); if (next.data) { PRINT("advance after diff ('%s', %lu) -> ('%s', %lu)", - NAME(new_node->data), ts_node_start_char(*new_node), NAME(next.data), - ts_node_start_char(next)); + NAME(new_node->data), ts_node_start_byte(*new_node), NAME(next.data), + ts_node_start_byte(next)); *new_node = next; } } @@ -225,7 +226,7 @@ int ts_document_parse_and_get_changed_ranges(TSDocument *self, TSRange **ranges, if (ranges && range_count) { bool extend_last_change = false; RangeArray result = {0, 0, 0}; - if (!ts_tree_diff(self, old_tree, &new_root, 0, &result, &extend_last_change)) + if (!ts_tree_get_changes(self, old_tree, &new_root, 0, &result, &extend_last_change)) return -1; *ranges = result.contents; *range_count = result.size; diff --git a/src/runtime/length.h b/src/runtime/length.h index dde1f81f..e4840d32 100644 --- a/src/runtime/length.h +++ b/src/runtime/length.h @@ -18,13 +18,19 @@ static inline TSPoint ts_point_sub(TSPoint a, TSPoint b) { return (TSPoint){0, a.column - b.column}; } +static inline TSPoint ts_point_min(TSPoint a, TSPoint b) { + if (a.row < b.row || (a.row == b.row && a.column < b.column)) + return a; + else + return b; +} + static inline bool ts_length_is_unknown(TSLength self) { - return self.chars > 0 && self.bytes == 0; + return self.bytes > 0 && self.chars == 0; } static inline void ts_length_set_unknown(TSLength *self) { - self->bytes = 0; - self->extent = (TSPoint){0, 0}; + self->chars = 0; } static inline TSLength ts_length_min(TSLength len1, TSLength len2) { @@ -34,13 +40,13 @@ static inline TSLength ts_length_min(TSLength len1, TSLength len2) { static inline TSLength ts_length_add(TSLength len1, TSLength len2) { TSLength result; result.chars = len1.chars + len2.chars; + result.bytes = len1.bytes + len2.bytes; + result.extent = ts_point_add(len1.extent, len2.extent); if (ts_length_is_unknown(len1) || ts_length_is_unknown(len2)) { - result.bytes = 0; - result.extent = (TSPoint){0, result.chars}; + result.chars = 0; } else { - result.bytes = len1.bytes + len2.bytes; - result.extent = ts_point_add(len1.extent, len2.extent); + result.chars = len1.chars + len2.chars; } return result; @@ -48,14 +54,13 @@ static inline TSLength ts_length_add(TSLength len1, TSLength len2) { static inline TSLength ts_length_sub(TSLength len1, TSLength len2) { TSLength result; - result.chars = len1.chars - len2.chars; + result.bytes = len1.bytes - len2.bytes; + result.extent = ts_point_sub(len1.extent, len2.extent); if (ts_length_is_unknown(len1) || ts_length_is_unknown(len2)) { - result.bytes = 0; - result.extent = (TSPoint){0, result.chars}; + result.chars = 0; } else { - result.bytes = len1.bytes - len2.bytes; - result.extent = ts_point_sub(len1.extent, len2.extent); + result.chars = len1.chars - len2.chars; } return result; diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 04c2246c..7e3c9a0d 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -144,7 +144,7 @@ error: } static void parser__pop_reusable_node(ReusableNode *reusable_node) { - reusable_node->char_index += ts_tree_total_chars(reusable_node->tree); + reusable_node->byte_index += ts_tree_total_bytes(reusable_node->tree); while (reusable_node->tree) { TSTree *parent = reusable_node->tree->context.parent; size_t next_index = reusable_node->tree->context.index + 1; @@ -270,7 +270,7 @@ static TSTree *parser__lex(Parser *self, TSStateId parse_state) { break; } - if (self->lexer.current_position.chars == position.chars) { + if (self->lexer.current_position.bytes == position.bytes) { if (!skipped_error) { error_start_position = self->lexer.current_position; first_error_character = self->lexer.lookahead; @@ -317,15 +317,15 @@ static TSTree *parser__get_lookahead(Parser *self, StackVersion version, TSLength position = ts_stack_top_position(self->stack, version); while (reusable_node->tree) { - if (reusable_node->char_index > position.chars) { + if (reusable_node->byte_index > position.bytes) { LOG("before_reusable sym:%s, pos:%lu", - SYM_NAME(reusable_node->tree->symbol), reusable_node->char_index); + SYM_NAME(reusable_node->tree->symbol), reusable_node->byte_index); break; } - if (reusable_node->char_index < position.chars) { + if (reusable_node->byte_index < position.bytes) { LOG("past_reusable sym:%s, pos:%lu", - SYM_NAME(reusable_node->tree->symbol), reusable_node->char_index); + SYM_NAME(reusable_node->tree->symbol), reusable_node->byte_index); parser__pop_reusable_node(reusable_node); continue; } @@ -333,7 +333,7 @@ static TSTree *parser__get_lookahead(Parser *self, StackVersion version, if (reusable_node->tree->has_changes) { LOG("cant_reuse_changed tree:%s, size:%lu", SYM_NAME(reusable_node->tree->symbol), - reusable_node->tree->size.chars); + reusable_node->tree->size.bytes); if (!parser__breakdown_reusable_node(reusable_node)) { parser__pop_reusable_node(reusable_node); CHECK(parser__breakdown_top_of_stack(self, version)); @@ -344,7 +344,7 @@ static TSTree *parser__get_lookahead(Parser *self, StackVersion version, if (reusable_node->tree->symbol == ts_builtin_sym_error) { LOG("cant_reuse_error tree:%s, size:%lu", SYM_NAME(reusable_node->tree->symbol), - reusable_node->tree->size.chars); + reusable_node->tree->size.bytes); if (!parser__breakdown_reusable_node(reusable_node)) { parser__pop_reusable_node(reusable_node); CHECK(parser__breakdown_top_of_stack(self, version)); @@ -357,7 +357,7 @@ static TSTree *parser__get_lookahead(Parser *self, StackVersion version, return result; } - if (self->cached_token && position.chars == self->cached_token_char_index) { + if (self->cached_token && position.bytes == self->cached_token_byte_index) { ts_tree_retain(self->cached_token); return self->cached_token; } @@ -1073,7 +1073,7 @@ static bool parser__advance(Parser *self, StackVersion version, validated_lookahead = true; LOG("lookahead sym:%s, size:%lu", SYM_NAME(lookahead->symbol), - lookahead->size.chars); + lookahead->size.bytes); } bool reduction_stopped_at_error = false; diff --git a/src/runtime/parser.h b/src/runtime/parser.h index ac621ca1..146ee6a4 100644 --- a/src/runtime/parser.h +++ b/src/runtime/parser.h @@ -11,7 +11,7 @@ extern "C" { typedef struct { TSTree *tree; - size_t char_index; + size_t byte_index; } ReusableNode; typedef struct { @@ -24,7 +24,7 @@ typedef struct { bool print_debugging_graphs; TSTree scratch_tree; TSTree *cached_token; - size_t cached_token_char_index; + size_t cached_token_byte_index; ReusableNode reusable_node; } Parser; diff --git a/src/runtime/tree.c b/src/runtime/tree.c index dc75d827..85c545d7 100644 --- a/src/runtime/tree.c +++ b/src/runtime/tree.c @@ -307,34 +307,48 @@ static inline long min(long a, long b) { return a <= b ? a : b; } -void ts_tree_edit(TSTree *self, TSInputEdit edit) { - size_t start = edit.position; - size_t new_end = edit.position + edit.chars_inserted; - size_t old_end = edit.position + edit.chars_removed; - assert(old_end <= ts_tree_total_chars(self)); + +void ts_tree_edit(TSTree *self, const TSInputEdit *edit) { + size_t old_end_byte = edit->start_byte + edit->bytes_removed; + size_t new_end_byte = edit->start_byte + edit->bytes_added; + TSPoint old_end_point = ts_point_add(edit->start_point, edit->extent_removed); + TSPoint new_end_point = ts_point_add(edit->start_point, edit->extent_added); + + assert(old_end_byte <= ts_tree_total_bytes(self)); self->has_changes = true; - if (start < self->padding.chars) { + if (edit->start_byte < self->padding.bytes) { ts_length_set_unknown(&self->padding); - long remaining_padding = self->padding.chars - old_end; - if (remaining_padding >= 0) { - self->padding.chars = new_end + remaining_padding; + if (self->padding.bytes >= old_end_byte) { + size_t trailing_padding_bytes = self->padding.bytes - old_end_byte; + TSPoint trailing_padding_extent = ts_point_sub(self->padding.extent, old_end_point); + self->padding.bytes = new_end_byte + trailing_padding_bytes; + self->padding.extent = ts_point_add(new_end_point, trailing_padding_extent); } else { - self->padding.chars = new_end; - self->size.chars += remaining_padding; ts_length_set_unknown(&self->size); + size_t removed_content_bytes = old_end_byte - self->padding.bytes; + TSPoint removed_content_extent = ts_point_sub(old_end_point, self->padding.extent); + self->size.bytes = self->size.bytes - removed_content_bytes; + self->size.extent = ts_point_sub(self->size.extent, removed_content_extent); + self->padding.bytes = new_end_byte; + self->padding.extent = new_end_point; } - } else if (start == self->padding.chars && edit.chars_removed == 0) { - self->padding.chars += edit.chars_inserted; + } else if (edit->start_byte == self->padding.bytes && edit->bytes_removed == 0) { ts_length_set_unknown(&self->padding); + self->padding.bytes = self->padding.bytes + edit->bytes_added; + self->padding.extent = ts_point_add(self->padding.extent, edit->extent_added); } else { - self->size.chars += (edit.chars_inserted - edit.chars_removed); ts_length_set_unknown(&self->size); + size_t trailing_content_bytes = ts_tree_total_bytes(self) - old_end_byte; + TSPoint trailing_content_extent = ts_point_sub(ts_tree_total_extent(self), old_end_point); + self->size.bytes = new_end_byte + trailing_content_bytes - self->padding.bytes; + self->size.extent = ts_point_sub(ts_point_add(new_end_point, trailing_content_extent), self->padding.extent); } bool found_first_child = false; - long remainder_to_delete = edit.chars_removed - edit.chars_inserted; + long remaining_bytes_to_delete = 0; + TSPoint remaining_extent_to_delete = {0, 0}; TSLength child_left, child_right = ts_length_zero(); for (size_t i = 0; i < self->child_count; i++) { TSTree *child = self->children[i]; @@ -342,30 +356,41 @@ void ts_tree_edit(TSTree *self, TSInputEdit edit) { if (!found_first_child) { child_right = ts_length_add(child_left, ts_tree_total_size(child)); - if (child_right.chars >= start) { + if (child_right.bytes >= edit->start_byte) { found_first_child = true; - size_t chars_removed = min(edit.chars_removed, child_right.chars - start); - remainder_to_delete -= (chars_removed - edit.chars_inserted); - ts_tree_edit(child, (TSInputEdit){ - .position = start - child_left.chars, - .chars_inserted = edit.chars_inserted, - .chars_removed = chars_removed, - }); - child_right = ts_length_add(child_left, ts_tree_total_size(child)); + TSInputEdit child_edit = { + .start_byte = edit->start_byte - child_left.bytes, + .bytes_added = edit->bytes_added, + .bytes_removed = edit->bytes_removed, + .start_point = ts_point_sub(edit->start_point, child_left.extent), + .extent_added = edit->extent_added, + .extent_removed = edit->extent_removed, + }; + + if (old_end_byte > child_right.bytes) { + child_edit.bytes_removed = child_right.bytes - edit->start_byte; + child_edit.extent_removed = ts_point_sub(child_right.extent, edit->start_point); + remaining_bytes_to_delete = old_end_byte - child_right.bytes; + remaining_extent_to_delete = ts_point_sub(old_end_point, child_right.extent); + } + + ts_tree_edit(child, &child_edit); } - } else { - if (remainder_to_delete > 0) { - size_t chars_removed = min(remainder_to_delete, ts_tree_total_chars(child)); - remainder_to_delete -= chars_removed; - ts_tree_edit( - child, - (TSInputEdit){ - .position = 0, .chars_inserted = 0, .chars_removed = chars_removed, - }); - } - child_right = ts_length_add(child_right, ts_tree_total_size(child)); + } else if (remaining_bytes_to_delete > 0) { + TSInputEdit child_edit = { + .start_byte = 0, + .bytes_added = 0, + .bytes_removed = min(remaining_bytes_to_delete, ts_tree_total_bytes(child)), + .start_point = {0, 0}, + .extent_added = {0, 0}, + .extent_removed = ts_point_min(remaining_extent_to_delete, ts_tree_total_size(child).extent), + }; + remaining_bytes_to_delete -= child_edit.bytes_removed; + remaining_extent_to_delete = ts_point_sub(remaining_extent_to_delete, child_edit.extent_removed); + ts_tree_edit(child, &child_edit); } + child_right = ts_length_add(child_left, ts_tree_total_size(child)); child->context.offset = child_left; } } diff --git a/src/runtime/tree.h b/src/runtime/tree.h index 7121b2b5..af8d50d9 100644 --- a/src/runtime/tree.h +++ b/src/runtime/tree.h @@ -68,7 +68,7 @@ size_t ts_tree_start_column(const TSTree *self); size_t ts_tree_end_column(const TSTree *self); void ts_tree_set_children(TSTree *, size_t, TSTree **); void ts_tree_assign_parents(TSTree *); -void ts_tree_edit(TSTree *, TSInputEdit); +void ts_tree_edit(TSTree *, const TSInputEdit *edit); char *ts_tree_string(const TSTree *, const TSLanguage *, bool include_all); void ts_tree_print_dot_graph(const TSTree *, const TSLanguage *, FILE *); @@ -76,10 +76,18 @@ static inline size_t ts_tree_total_chars(const TSTree *self) { return self->padding.chars + self->size.chars; } +static inline size_t ts_tree_total_bytes(const TSTree *self) { + return self->padding.bytes + self->size.bytes; +} + static inline TSLength ts_tree_total_size(const TSTree *self) { return ts_length_add(self->padding, self->size); } +static inline TSPoint ts_tree_total_extent(const TSTree *self) { + return ts_point_add(self->padding.extent, self->size.extent); +} + static inline bool ts_tree_is_fragile(const TSTree *tree) { return tree->fragile_left || tree->fragile_right || ts_tree_total_chars(tree) == 0;