diff --git a/spec/helpers/scope_sequence.cc b/spec/helpers/scope_sequence.cc index 32a77bc6..87e059dc 100644 --- a/spec/helpers/scope_sequence.cc +++ b/spec/helpers/scope_sequence.cc @@ -23,23 +23,18 @@ static void append_to_scope_sequence(ScopeSequence *sequence, ScopeStack *current_scopes, TSNode node, TSDocument *document, const std::string &text) { + append_text_to_scope_sequence(sequence, current_scopes, text, ts_node_start_byte(node) - sequence->size()); + string scope = ts_node_type(node, document); current_scopes->push_back(scope); size_t child_count = ts_node_child_count(node); if (child_count > 0) { - size_t previous_child_end = ts_node_start_char(node); for (size_t i = 0; i < child_count; i++) { TSNode child = ts_node_child(node, i); - size_t child_start = ts_node_start_char(child); - size_t spacing = child_start - previous_child_end; - append_text_to_scope_sequence(sequence, current_scopes, text, spacing); append_to_scope_sequence(sequence, current_scopes, child, document, text); - previous_child_end = ts_node_end_char(child); } - size_t spacing = ts_node_end_char(node) - previous_child_end; - append_text_to_scope_sequence(sequence, current_scopes, text, spacing); } else { - size_t length = ts_node_end_char(node) - ts_node_start_char(node); + size_t length = ts_node_end_byte(node) - ts_node_start_byte(node); append_text_to_scope_sequence(sequence, current_scopes, text, length); } current_scopes->pop_back(); @@ -50,7 +45,6 @@ ScopeSequence build_scope_sequence(TSDocument *document, const std::string &text ScopeStack current_scopes; TSNode node = ts_document_root_node(document); append_to_scope_sequence(&sequence, ¤t_scopes, node, document, text); - AssertThat(sequence.size(), Equals(text.size())); return sequence; } @@ -66,7 +60,7 @@ bool operator<=(const TSPoint &left, const TSPoint &right) { void verify_changed_ranges(const ScopeSequence &old_sequence, const ScopeSequence &new_sequence, const string &text, TSRange *ranges, size_t range_count) { TSPoint current_position = {0, 0}; - for (size_t i = 0; i < text.size(); i++) { + for (size_t i = 0; i < old_sequence.size(); i++) { if (text[i] == '\n') { current_position.row++; current_position.column = 0; @@ -89,6 +83,7 @@ void verify_changed_ranges(const ScopeSequence &old_sequence, const ScopeSequenc std::stringstream message_stream; message_stream << "Found changed scope outside of any invalidated range;\n"; message_stream << "Position: " << current_position << "\n"; + message_stream << "Byte index: " << i << "\n"; size_t line_start_index = i - current_position.column; size_t line_end_index = text.find_first_of('\n', i); message_stream << "Line: " << text.substr(line_start_index, line_end_index - line_start_index) << "\n"; @@ -99,7 +94,7 @@ void verify_changed_ranges(const ScopeSequence &old_sequence, const ScopeSequenc message_stream << "New scopes: " << new_scopes << "\n"; message_stream << "Invalidated ranges:\n"; for (size_t j = 0; j < range_count; j++) { - message_stream << " " << ranges[i] << "\n"; + message_stream << " " << ranges[j] << "\n"; } Assert::Failure(message_stream.str()); } diff --git a/spec/integration/corpus_specs.cc b/spec/integration/corpus_specs.cc index b4612f9d..d9701d2e 100644 --- a/spec/integration/corpus_specs.cc +++ b/spec/integration/corpus_specs.cc @@ -8,6 +8,7 @@ #include "helpers/encoding_helpers.h" #include "helpers/record_alloc.h" #include "helpers/random_helpers.h" +#include "helpers/scope_sequence.h" #include static void assert_correct_tree_shape(const TSDocument *document, string tree_string) { @@ -139,7 +140,16 @@ describe("The Corpus", []() { ts_document_edit(document, input->undo()); assert_correct_tree_size(document, input->content); - ts_document_parse(document); + + TSRange *ranges; + size_t range_count; + ScopeSequence old_scope_sequence = build_scope_sequence(document, input->content); + ts_document_parse_and_get_changed_ranges(document, &ranges, &range_count); + + ScopeSequence new_scope_sequence = build_scope_sequence(document, input->content); + verify_changed_ranges(old_scope_sequence, new_scope_sequence, + input->content, ranges, range_count); + ts_free(ranges); }); } diff --git a/spec/runtime/document_spec.cc b/spec/runtime/document_spec.cc index bd07ff3c..417c8d31 100644 --- a/spec/runtime/document_spec.cc +++ b/spec/runtime/document_spec.cc @@ -76,7 +76,6 @@ describe("Document", [&]() { ts_document_set_input(doc, spy_input->input()); ts_document_invalidate(doc); ts_document_parse(doc); - TSNode root_node = ts_document_root_node(doc); }); it("allows the input to be retrieved later", [&]() { @@ -211,7 +210,7 @@ describe("Document", [&]() { }); }); - describe("parse_and_get_changed_ranges()", [&]() { + describe("parse_and_get_changed_ranges()", [&]() { SpyInput *input; before_each([&]() { @@ -234,6 +233,7 @@ describe("Document", [&]() { TSRange *ranges; size_t range_count = 0; + ts_document_parse_and_get_changed_ranges(doc, &ranges, &range_count); vector result; @@ -333,13 +333,18 @@ describe("Document", [&]() { it("reports changes when trees have been wrapped", [&]() { // Wrap the object in an assignment expression. auto ranges = get_ranges([&]() { - return input->replace(0, 0, "x.y = "); + return input->replace(input->content.find("null"), 0, "b === "); }); + assert_node_string_equals( + ts_document_root_node(doc), + "(program (expression_statement (object " + "(pair (identifier) (rel_op (identifier) (null))))))"); + AssertThat(ranges, Equals(vector({ TSRange{ - TSPoint{0, 0}, - TSPoint{0, input->content.find(";")}, + TSPoint{0, input->content.find("b ===")}, + TSPoint{0, input->content.find("}")}, }, }))); }); diff --git a/src/runtime/document.c b/src/runtime/document.c index 325534b6..9afa33c2 100644 --- a/src/runtime/document.c +++ b/src/runtime/document.c @@ -91,118 +91,210 @@ void ts_document_edit(TSDocument *self, TSInputEdit edit) { typedef Array(TSRange) RangeArray; -#define NAME(t) ((t) ? (ts_language_symbol_name(doc->parser.language, ((TSTree *)(t))->symbol)) : "") -// #define PRINT(msg, ...) for (size_t k = 0; k < depth; k++) { printf(" "); } printf(msg "\n", __VA_ARGS__); -#define PRINT(msg, ...) +#define NAME(t) \ + ((t) \ + ? (ts_language_symbol_name(doc->parser.language, ((TSTree *)(t))->symbol)) \ + : "") -static bool push_diff(RangeArray *results, TSNode *node, bool *extend_last_change) { - TSPoint start = ts_node_start_point(*node); - TSPoint end = ts_node_end_point(*node); - if (*extend_last_change) { +static bool push_change(RangeArray *results, TSPoint start, TSPoint end) { + if (results->size > 0) { TSRange *last_range = array_back(results); - last_range->end = end; - return true; - } - *extend_last_change = true; - return array_push(results, ((TSRange){start, end})); -} - -static bool ts_tree_get_changes(TSDocument *doc, TSTree *old, TSNode *new_node, - size_t depth, RangeArray *results, - bool *extend_last_change) { - TSTree *new = (TSTree *)(new_node->data); - - PRINT("At %lu, ('%s', %lu) vs ('%s', %lu) {", - ts_node_start_byte(*new_node), - NAME(old), old->size.bytes, - NAME(new), new->size.bytes); - - if (old->visible) { - if (old == new || (old->symbol == new->symbol && - old->size.bytes == new->size.bytes && !old->has_changes)) { - *extend_last_change = false; - PRINT("}", NULL); - return true; - } - - if (old->symbol != new->symbol) { - PRINT("}", NULL); - return push_diff(results, new_node, extend_last_change); - } - - TSNode child = ts_node_child(*new_node, 0); - if (child.data) { - *new_node = child; - } else { - PRINT("}", NULL); + if (ts_point_lte(start, last_range->end)) { + last_range->end = end; return true; } } - depth++; - size_t old_child_start; - size_t old_child_end = ts_node_start_byte(*new_node) - old->padding.bytes; - - for (size_t j = 0; j < old->child_count; j++) { - TSTree *old_child = old->children[j]; - if (old_child->padding.bytes == 0 && old_child->size.bytes == 0) - continue; - - old_child_start = old_child_end + old_child->padding.bytes; - old_child_end = old_child_start + old_child->size.bytes; - - while (true) { - size_t new_child_start = ts_node_start_byte(*new_node); - if (new_child_start < old_child_start) { - PRINT("skip new:('%s', %lu), old:('%s', %lu), old_parent:%s", - NAME(new_node->data), ts_node_start_byte(*new_node), NAME(old_child), - old_child_start, NAME(old)); - - if (!push_diff(results, new_node, extend_last_change)) - return false; - - TSNode next = ts_node_next_sibling(*new_node); - if (next.data) { - PRINT("advance before diff ('%s', %lu) -> ('%s', %lu)", - NAME(new_node->data), ts_node_start_byte(*new_node), NAME(next.data), - ts_node_start_byte(next)); - - *new_node = next; - } else { - break; - } - } else if (new_child_start == old_child_start) { - if (!ts_tree_get_changes(doc, old_child, new_node, depth, results, extend_last_change)) - return false; - - if (old_child->visible) { - TSNode next = ts_node_next_sibling(*new_node); - if (next.data) { - PRINT("advance after diff ('%s', %lu) -> ('%s', %lu)", - NAME(new_node->data), ts_node_start_byte(*new_node), NAME(next.data), - ts_node_start_byte(next)); - *new_node = next; - } - } - break; - } else { - break; - } - } + if (ts_point_lt(start, end)) { + TSRange range = { start, end }; + return array_push(results, range); } - depth--; - if (old->visible) { - *new_node = ts_node_parent(*new_node); - } - - PRINT("}", NULL); return true; } -int ts_document_parse_and_get_changed_ranges(TSDocument *self, TSRange **ranges, size_t *range_count) { - if (ranges) *ranges = NULL; - if (range_count) *range_count = 0; +static bool tree_path_descend(TreePath *path, TSPoint position) { + bool did_descend; + do { + did_descend = false; + TreePathEntry entry = *array_back(path); + TSLength child_position = entry.position; + for (size_t i = 0; i < entry.tree->child_count; i++) { + TSTree *child = entry.tree->children[i]; + TSLength child_right_position = + ts_length_add(child_position, ts_tree_total_size(child)); + if (ts_point_lt(position, child_right_position.extent)) { + TreePathEntry child_entry = { child, child_position, i }; + if (child->visible) { + array_push(path, child_entry); + return true; + } else if (child->visible_child_count > 0) { + array_push(path, child_entry); + did_descend = true; + break; + } + } + child_position = child_right_position; + } + } while (did_descend); + return false; +} + +static size_t tree_path_advance(TreePath *path) { + size_t ascend_count = 0; + while (path->size > 0) { + TreePathEntry entry = array_pop(path); + if (path->size == 0) + break; + TreePathEntry parent_entry = *array_back(path); + if (parent_entry.tree->visible) { + ascend_count++; + } + TSLength position = + ts_length_add(entry.position, ts_tree_total_size(entry.tree)); + for (size_t i = entry.child_index + 1, n = parent_entry.tree->child_count; + i < n; i++) { + TSTree *next_child = parent_entry.tree->children[i]; + if (next_child->visible || next_child->visible_child_count > 0) { + if (parent_entry.tree->visible) { + ascend_count--; + } + array_push(path, + ((TreePathEntry){ + .tree = next_child, .child_index = i, .position = position, + })); + if (!next_child->visible) + tree_path_descend(path, (TSPoint){ 0, 0 }); + return ascend_count; + } + position = ts_length_add(position, ts_tree_total_size(next_child)); + } + } + return ascend_count; +} + +static void tree_path_ascend(TreePath *path, size_t count) { + for (size_t i = 0; i < count; i++) { + do { + array_pop(path); + } while (path->size > 0 && !array_back(path)->tree->visible); + } +} + +static void tree_path_init(TreePath *path, TSTree *tree) { + array_clear(path); + array_push(path, + ((TreePathEntry){ + .tree = tree, .position = { 0, 0, { 0, 0 } }, .child_index = 0, + })); + if (!tree->visible) + tree_path_descend(path, (TSPoint){ 0, 0 }); +} + +static bool ts_tree_get_changes(TSDocument *doc, TreePath *old_path, + TreePath *new_path, size_t depth, + RangeArray *results) { + TSPoint position = { 0, 0 }; + + while (old_path->size && new_path->size) { + bool is_different = false; + TSPoint next_position = position; + + TreePathEntry old_entry = *array_back(old_path); + TreePathEntry new_entry = *array_back(new_path); + TSTree *old_tree = old_entry.tree; + TSTree *new_tree = new_entry.tree; + TSSymbol old_symbol = old_tree->symbol; + TSSymbol new_symbol = new_tree->symbol; + size_t old_start_byte = old_entry.position.bytes; + size_t new_start_byte = new_entry.position.bytes; + size_t old_end_byte = old_start_byte + ts_tree_total_bytes(old_tree); + size_t new_end_byte = new_start_byte + ts_tree_total_bytes(new_tree); + TSPoint old_start_point = + ts_point_add(old_entry.position.extent, old_tree->padding.extent); + TSPoint new_start_point = + ts_point_add(new_entry.position.extent, new_tree->padding.extent); + TSPoint old_end_point = ts_point_add(old_start_point, old_tree->size.extent); + TSPoint new_end_point = ts_point_add(new_start_point, new_tree->size.extent); + + // printf("At [%-2lu, %-2lu] Compare (%-20s\t [%-2lu, %-2lu] - [%lu, %lu])\tvs\t(%-20s\t [%lu, %lu] - [%lu, %lu])\t", + // position.row, position.column, NAME(old_tree), old_start_point.row, + // old_start_point.column, old_end_point.row, old_end_point.column, + // NAME(new_tree), new_start_point.row, new_start_point.column, + // new_end_point.row, new_end_point.column); + + if (ts_point_lt(position, old_start_point)) { + if (ts_point_lt(position, new_start_point)) { + next_position = ts_point_min(old_start_point, new_start_point); + } else { + is_different = true; + next_position = old_start_point; + } + } else if (ts_point_lt(position, new_start_point)) { + is_different = true; + next_position = new_start_point; + } else { + if (old_tree == new_tree || + (!old_tree->has_changes && old_symbol == new_symbol && + old_start_byte == new_start_byte && old_end_byte == new_end_byte && + old_tree->parse_state != TS_TREE_STATE_NONE && + new_tree->parse_state != TS_TREE_STATE_NONE)) { + next_position = old_end_point; + } else if (old_symbol == new_symbol) { + bool old_descended = tree_path_descend(old_path, position); + bool new_descended = tree_path_descend(new_path, position); + if (old_descended) { + if (!new_descended) { + tree_path_ascend(old_path, 1); + is_different = true; + next_position = new_end_point; + } + } else if (new_descended) { + tree_path_ascend(new_path, 1); + is_different = true; + next_position = old_end_point; + } else { + next_position = ts_point_min(old_end_point, new_end_point); + } + } else { + is_different = true; + next_position = ts_point_min(old_end_point, new_end_point); + } + } + + bool advance_old = ts_point_lte(old_end_point, next_position); + bool advance_new = ts_point_lte(new_end_point, next_position); + + if (advance_new && advance_old) { + size_t old_ascend_count = tree_path_advance(old_path); + size_t new_ascend_count = tree_path_advance(new_path); + if (old_ascend_count > new_ascend_count) { + tree_path_ascend(new_path, old_ascend_count - new_ascend_count); + } else if (new_ascend_count > old_ascend_count) { + tree_path_ascend(old_path, new_ascend_count - old_ascend_count); + } + } else if (advance_new) { + size_t ascend_count = tree_path_advance(new_path); + tree_path_ascend(old_path, ascend_count); + } else if (advance_old) { + size_t ascend_count = tree_path_advance(old_path); + tree_path_ascend(new_path, ascend_count); + } + + if (is_different) + push_change(results, position, next_position); + position = next_position; + } + + return true; +} + +int ts_document_parse_and_get_changed_ranges(TSDocument *self, TSRange **ranges, + size_t *range_count) { + if (ranges) + *ranges = NULL; + if (range_count) + *range_count = 0; if (!self->input.read || !self->parser.language) return -1; @@ -218,15 +310,13 @@ int ts_document_parse_and_get_changed_ranges(TSDocument *self, TSRange **ranges, if (self->tree) { TSTree *old_tree = self->tree; self->tree = tree; - TSNode new_root = ts_document_root_node(self); - - // ts_tree_print_dot_graph(old_tree, self->parser.language, stderr); - // ts_tree_print_dot_graph(tree, self->parser.language, stderr); if (ranges && range_count) { - bool extend_last_change = false; - RangeArray result = {0, 0, 0}; - if (!ts_tree_get_changes(self, old_tree, &new_root, 0, &result, &extend_last_change)) + RangeArray result = { 0, 0, 0 }; + tree_path_init(&self->parser.tree_path1, old_tree); + tree_path_init(&self->parser.tree_path2, tree); + if (!ts_tree_get_changes(self, &self->parser.tree_path1, + &self->parser.tree_path2, 0, &result)) return -1; *ranges = result.contents; *range_count = result.size; diff --git a/src/runtime/length.h b/src/runtime/length.h index e4840d32..5a6ae20a 100644 --- a/src/runtime/length.h +++ b/src/runtime/length.h @@ -18,6 +18,18 @@ static inline TSPoint ts_point_sub(TSPoint a, TSPoint b) { return (TSPoint){0, a.column - b.column}; } +static inline bool ts_point_lte(TSPoint a, TSPoint b) { + return (a.row < b.row) || (a.row == b.row && a.column <= b.column); +} + +static inline bool ts_point_lt(TSPoint a, TSPoint b) { + return (a.row < b.row) || (a.row == b.row && a.column < b.column); +} + +static inline bool ts_point_eq(TSPoint a, TSPoint b) { + return a.row == b.row && a.column == b.column; +} + static inline TSPoint ts_point_min(TSPoint a, TSPoint b) { if (a.row < b.row || (a.row == b.row && a.column < b.column)) return a; @@ -25,6 +37,13 @@ static inline TSPoint ts_point_min(TSPoint a, TSPoint b) { return b; } +static inline TSPoint ts_point_max(TSPoint a, TSPoint b) { + if (a.row > b.row || (a.row == b.row && a.column > b.column)) + return a; + else + return b; +} + static inline bool ts_length_is_unknown(TSLength self) { return self.bytes > 0 && self.chars == 0; } diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 75b2b4df..cfe70439 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -1226,7 +1226,8 @@ bool parser_init(Parser *self) { self->finished_tree = NULL; self->stack = NULL; array_init(&self->reduce_actions); - array_init(&self->tree_stack); + array_init(&self->tree_path1); + array_init(&self->tree_path2); self->stack = ts_stack_new(); if (!self->stack) @@ -1252,8 +1253,10 @@ void parser_destroy(Parser *self) { ts_stack_delete(self->stack); if (self->reduce_actions.contents) array_delete(&self->reduce_actions); - if (self->tree_stack.contents) - array_delete(&self->tree_stack); + if (self->tree_path1.contents) + array_delete(&self->tree_path1); + if (self->tree_path2.contents) + array_delete(&self->tree_path2); } TSTree *parser_parse(Parser *self, TSInput input, TSTree *old_tree) { @@ -1299,7 +1302,7 @@ TSTree *parser_parse(Parser *self, TSInput input, TSTree *old_tree) { LOG_TREE(); ts_stack_clear(self->stack); parser__clear_cached_token(self); - CHECK(ts_tree_assign_parents(self->finished_tree, &self->tree_stack)); + CHECK(ts_tree_assign_parents(self->finished_tree, &self->tree_path1)); return self->finished_tree; error: diff --git a/src/runtime/parser.h b/src/runtime/parser.h index 4bc22697..a5bd60f0 100644 --- a/src/runtime/parser.h +++ b/src/runtime/parser.h @@ -26,7 +26,8 @@ typedef struct { TSTree *cached_token; size_t cached_token_byte_index; ReusableNode reusable_node; - TreeArray tree_stack; + TreePath tree_path1; + TreePath tree_path2; } Parser; bool parser_init(Parser *); diff --git a/src/runtime/tree.c b/src/runtime/tree.c index 930ff0a3..1aae1393 100644 --- a/src/runtime/tree.c +++ b/src/runtime/tree.c @@ -29,6 +29,7 @@ TSTree *ts_tree_make_leaf(TSSymbol sym, TSLength padding, TSLength size, .visible = metadata.visible, .named = metadata.named, .first_leaf.symbol = sym, + .has_changes = false, }; return result; @@ -91,12 +92,12 @@ TSTree *ts_tree_make_copy(TSTree *self) { return result; } -bool ts_tree_assign_parents(TSTree *self, TreeArray *stack) { - array_clear(stack); - if (!array_push(stack, self)) +bool ts_tree_assign_parents(TSTree *self, TreePath *path) { + array_clear(path); + if (!array_push(path, ((TreePathEntry){self, ts_length_zero(), 0}))) return false; - while (stack->size > 0) { - TSTree *tree = array_pop(stack); + while (path->size > 0) { + TSTree *tree = array_pop(path).tree; TSLength offset = ts_length_zero(); for (size_t i = 0; i < tree->child_count; i++) { TSTree *child = tree->children[i]; @@ -104,7 +105,7 @@ bool ts_tree_assign_parents(TSTree *self, TreeArray *stack) { child->context.parent = tree; child->context.index = i; child->context.offset = offset; - if (!array_push(stack, child)) + if (!array_push(path, ((TreePathEntry){child, ts_length_zero(), 0}))) return false; } offset = ts_length_add(offset, ts_tree_total_size(child)); @@ -458,7 +459,7 @@ char *ts_tree_string(const TSTree *self, const TSLanguage *language, return result; } -void ts_tree__print_dot_graph(const TSTree *self, size_t offset, +void ts_tree__print_dot_graph(const TSTree *self, size_t byte_offset, const TSLanguage *language, FILE *f) { fprintf(f, "tree_%p [label=\"%s\"", self, ts_language_symbol_name(language, self->symbol)); @@ -469,13 +470,13 @@ void ts_tree__print_dot_graph(const TSTree *self, size_t offset, fprintf(f, ", fontcolor=gray"); fprintf(f, ", tooltip=\"range:%lu - %lu\nstate:%d\nerror-cost:%u\"]\n", - offset, offset + ts_tree_total_chars(self), self->parse_state, + byte_offset, byte_offset + ts_tree_total_bytes(self), self->parse_state, self->error_cost); for (size_t i = 0; i < self->child_count; i++) { const TSTree *child = self->children[i]; - ts_tree__print_dot_graph(child, offset, language, f); + ts_tree__print_dot_graph(child, byte_offset, language, f); fprintf(f, "tree_%p -> tree_%p [tooltip=%lu]\n", self, child, i); - offset += ts_tree_total_chars(child); + byte_offset += ts_tree_total_bytes(child); } } diff --git a/src/runtime/tree.h b/src/runtime/tree.h index 146897d8..11ef85aa 100644 --- a/src/runtime/tree.h +++ b/src/runtime/tree.h @@ -49,7 +49,16 @@ typedef struct TSTree { bool has_changes : 1; } TSTree; +typedef struct { + TSTree *tree; + TSLength position; + size_t child_index; +} TreePathEntry; + typedef Array(TSTree *) TreeArray; + +typedef Array(TreePathEntry) TreePath; + bool ts_tree_array_copy(TreeArray, TreeArray *); void ts_tree_array_delete(TreeArray *); size_t ts_tree_array_essential_count(const TreeArray *); @@ -67,7 +76,7 @@ int ts_tree_compare(const TSTree *tree1, const TSTree *tree2); size_t ts_tree_start_column(const TSTree *self); size_t ts_tree_end_column(const TSTree *self); void ts_tree_set_children(TSTree *, size_t, TSTree **); -bool ts_tree_assign_parents(TSTree *, TreeArray *); +bool ts_tree_assign_parents(TSTree *, TreePath *); void ts_tree_edit(TSTree *, const TSInputEdit *edit); char *ts_tree_string(const TSTree *, const TSLanguage *, bool include_all); void ts_tree_print_dot_graph(const TSTree *, const TSLanguage *, FILE *);