From fce8d57152946cd7a9dfcf645f935b998212fc4c Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 7 Sep 2016 17:49:16 -0700 Subject: [PATCH 01/16] Start work on document_parse_and_diff API --- include/tree_sitter/runtime.h | 6 ++ spec/helpers/point_helpers.cc | 8 ++ spec/helpers/point_helpers.h | 4 + spec/runtime/document_spec.cc | 135 +++++++++++++++++++++++++++++++++ src/runtime/document.c | 138 +++++++++++++++++++++++++++++++++- 5 files changed, 288 insertions(+), 3 deletions(-) diff --git a/include/tree_sitter/runtime.h b/include/tree_sitter/runtime.h index 8b9a863e..be321720 100644 --- a/include/tree_sitter/runtime.h +++ b/include/tree_sitter/runtime.h @@ -45,6 +45,11 @@ typedef struct { size_t column; } TSPoint; +typedef struct { + TSPoint start; + TSPoint end; +} TSRange; + typedef struct { const void *data; size_t offset[3]; @@ -98,6 +103,7 @@ void ts_document_set_logger(TSDocument *, TSLogger); void ts_document_print_debugging_graphs(TSDocument *, bool); void ts_document_edit(TSDocument *, TSInputEdit); int ts_document_parse(TSDocument *); +int ts_document_parse_and_diff(TSDocument *, TSRange **, size_t *); void ts_document_invalidate(TSDocument *); TSNode ts_document_root_node(const TSDocument *); size_t ts_document_parse_count(const TSDocument *); diff --git a/spec/helpers/point_helpers.cc b/spec/helpers/point_helpers.cc index 97a444a9..f07d1da7 100644 --- a/spec/helpers/point_helpers.cc +++ b/spec/helpers/point_helpers.cc @@ -8,10 +8,18 @@ bool operator==(const TSPoint &left, const TSPoint &right) { return left.row == right.row && left.column == right.column; } +bool operator==(const TSRange &left, const TSRange &right) { + return left.start == right.start && left.end == right.end; +} + std::ostream &operator<<(std::ostream &stream, const TSPoint &point) { return stream << "{" << point.row << ", " << point.column << "}"; } +std::ostream &operator<<(std::ostream &stream, const TSRange &range) { + return stream << "{" << range.start << ", " << range.end << "}"; +} + bool operator<(const TSPoint &left, const TSPoint &right) { if (left.row < right.row) return true; if (left.row > right.row) return false; diff --git a/spec/helpers/point_helpers.h b/spec/helpers/point_helpers.h index 321f05ad..3caf14c2 100644 --- a/spec/helpers/point_helpers.h +++ b/spec/helpers/point_helpers.h @@ -7,6 +7,10 @@ bool operator<(const TSPoint &left, const TSPoint &right); bool operator>(const TSPoint &left, const TSPoint &right); +bool operator==(const TSRange &left, const TSRange &right); + std::ostream &operator<<(std::ostream &stream, const TSPoint &point); +std::ostream &operator<<(std::ostream &stream, const TSRange &range); + #endif // HELPERS_POINT_HELPERS_H_ diff --git a/spec/runtime/document_spec.cc b/spec/runtime/document_spec.cc index ec479b4e..9e393134 100644 --- a/spec/runtime/document_spec.cc +++ b/spec/runtime/document_spec.cc @@ -3,6 +3,7 @@ #include "helpers/record_alloc.h" #include "helpers/stream_methods.h" #include "helpers/tree_helpers.h" +#include "helpers/point_helpers.h" #include "helpers/spy_logger.h" #include "helpers/spy_input.h" #include "helpers/load_language.h" @@ -192,6 +193,140 @@ describe("Document", [&]() { }); }); }); + + describe("parse_and_diff()", [&]() { + SpyInput *input; + + before_each([&]() { + ts_document_set_language(doc, get_test_language("javascript")); + input = new SpyInput("{a: null};", 3); + ts_document_set_input(doc, input->input()); + ts_document_parse(doc); + assert_node_string_equals( + ts_document_root_node(doc), + "(program (expression_statement (object (pair (identifier) (null)))))"); + }); + + after_each([&]() { + delete input; + }); + + auto get_ranges = [&](std::function callback) -> vector { + TSInputEdit edit = callback(); + ts_document_edit(doc, edit); + + TSRange *ranges; + size_t range_count = 0; + ts_document_parse_and_diff(doc, &ranges, &range_count); + + vector result; + for (size_t i = 0; i < range_count; i++) + result.push_back(ranges[i]); + ts_free(ranges); + + return result; + }; + + it("reports changes when one token has been updated", [&]() { + // Replace `null` with `nothing` + auto ranges = get_ranges([&]() { + return input->replace(input->content.find("ull"), 1, "othing"); + }); + + AssertThat(ranges, Equals(vector({ + TSRange{ + TSPoint{0, input->content.find("nothing")}, + TSPoint{0, input->content.find("}")} + }, + }))); + + // Replace `nothing` with `null` again + ranges = get_ranges([&]() { + return input->undo(); + }); + + AssertThat(ranges, Equals(vector({ + TSRange{ + TSPoint{0, input->content.find("null")}, + TSPoint{0, input->content.find("}")} + }, + }))); + }); + + it("reports changes when tokens have been appended", [&]() { + // Add a second key-value pair + auto ranges = get_ranges([&]() { + return input->replace(input->content.find("}"), 0, ", b: false"); + }); + + AssertThat(ranges, Equals(vector({ + TSRange{ + TSPoint{0, input->content.find(",")}, + TSPoint{0, input->content.find("}")}, + }, + }))); + + // Add a third key-value pair in between the first two + ranges = get_ranges([&]() { + return input->replace(input->content.find(", b"), 0, ", c: 1"); + }); + + assert_node_string_equals( + ts_document_root_node(doc), + "(program (expression_statement (object " + "(pair (identifier) (null)) " + "(pair (identifier) (number)) " + "(pair (identifier) (false)))))"); + + AssertThat(ranges, Equals(vector({ + TSRange{ + TSPoint{0, input->content.find(", c")}, + TSPoint{0, input->content.find(", b")}, + }, + }))); + + // Delete the middle pair. + ranges = get_ranges([&]() { + return input->undo(); + }); + + assert_node_string_equals( + ts_document_root_node(doc), + "(program (expression_statement (object " + "(pair (identifier) (null)) " + "(pair (identifier) (false)))))"); + + AssertThat(ranges, Equals(vector({ + }))); + + // Delete the second pair. + ranges = get_ranges([&]() { + return input->undo(); + }); + + assert_node_string_equals( + ts_document_root_node(doc), + "(program (expression_statement (object " + "(pair (identifier) (null)))))"); + + AssertThat(ranges, Equals(vector({ + }))); + }); + + it("reports changes when trees have been wrapped", [&]() { + // Wrap the object in an assignment expression. + auto ranges = get_ranges([&]() { + return input->replace(0, 0, "x.y = "); + }); + + AssertThat(ranges, Equals(vector({ + TSRange{ + TSPoint{0, 0}, + TSPoint{0, input->content.find(";")}, + }, + }))); + }); + }); }); END_TEST diff --git a/src/runtime/document.c b/src/runtime/document.c index 1211eb5e..22e4bb5b 100644 --- a/src/runtime/document.c +++ b/src/runtime/document.c @@ -89,7 +89,117 @@ void ts_document_edit(TSDocument *self, TSInputEdit edit) { ts_tree_edit(self->tree, edit); } -int ts_document_parse(TSDocument *self) { +typedef Array(TSRange) RangeArray; + +#define NAME(t) ((t) ? (ts_language_symbol_name(doc->parser.language, ((TSTree *)(t))->symbol)) : "") +// #define PRINT(msg, ...) for (size_t k = 0; k < depth; k++) { printf(" "); } printf(msg "\n", __VA_ARGS__); +#define PRINT(msg, ...) + +static bool push_diff(RangeArray *results, TSNode *node, bool *extend_last_change) { + TSPoint start = ts_node_start_point(*node); + TSPoint end = ts_node_end_point(*node); + if (*extend_last_change) { + TSRange *last_range = array_back(results); + last_range->end = end; + return true; + } + *extend_last_change = true; + return array_push(results, ((TSRange){start, end})); +} + +static bool ts_tree_diff(TSDocument *doc, TSTree *old, TSNode *new_node, + size_t depth, RangeArray *results, bool *extend_last_change) { + TSTree *new = (TSTree *)(new_node->data); + + PRINT("At %lu, ('%s', %lu) vs ('%s', %lu) {", + ts_node_start_char(*new_node), + NAME(old), old->size.chars, + NAME(new), new->size.chars); + + if (old->visible) { + if (old == new || (old->symbol == new->symbol && + old->size.chars == new->size.chars && !old->has_changes)) { + *extend_last_change = false; + PRINT("}", NULL); + return true; + } + + if (old->symbol != new->symbol) { + PRINT("}", NULL); + return push_diff(results, new_node, extend_last_change); + } + + TSNode child = ts_node_child(*new_node, 0); + if (child.data) { + *new_node = child; + } else { + PRINT("}", NULL); + return true; + } + } + + depth++; + size_t old_child_start; + size_t old_child_end = ts_node_start_char(*new_node) - old->padding.chars; + + for (size_t j = 0; j < old->child_count; j++) { + TSTree *old_child = old->children[j]; + if (old_child->padding.chars == 0 && old_child->size.chars == 0) + continue; + + old_child_start = old_child_end + old_child->padding.chars; + old_child_end = old_child_start + old_child->size.chars; + + while (true) { + size_t new_child_start = ts_node_start_char(*new_node); + if (new_child_start < old_child_start) { + PRINT("skip new:('%s', %lu), old:('%s', %lu), old_parent:%s", + NAME(new_node->data), ts_node_start_char(*new_node), NAME(old_child), + old_child_start, NAME(old)); + + if (!push_diff(results, new_node, extend_last_change)) + return false; + + TSNode next = ts_node_next_sibling(*new_node); + if (next.data) { + PRINT("advance before diff ('%s', %lu) -> ('%s', %lu)", + NAME(new_node->data), ts_node_start_char(*new_node), NAME(next.data), + ts_node_start_char(next)); + *new_node = next; + } + } else if (new_child_start == old_child_start) { + if (!ts_tree_diff(doc, old_child, new_node, depth, results, extend_last_change)) + return false; + + if (old_child->visible) { + TSNode next = ts_node_next_sibling(*new_node); + if (next.data) { + PRINT("advance after diff ('%s', %lu) -> ('%s', %lu)", + NAME(new_node->data), ts_node_start_char(*new_node), NAME(next.data), + ts_node_start_char(next)); + *new_node = next; + } + } + break; + } else { + break; + } + } + } + + depth--; + if (old->visible) { + *new_node = ts_node_parent(*new_node); + } + + PRINT("}", NULL); + return true; +} + +int ts_document_parse_and_diff(TSDocument *self, TSRange **ranges, size_t *range_count) { + if (ranges) *ranges = NULL; + if (range_count) *range_count = 0; + if (!self->input.read || !self->parser.language) return -1; @@ -101,14 +211,36 @@ int ts_document_parse(TSDocument *self) { if (!tree) return -1; - if (self->tree) - ts_tree_release(self->tree); + if (self->tree) { + TSTree *old_tree = self->tree; + self->tree = tree; + TSNode new_root = ts_document_root_node(self); + + // ts_tree_print_dot_graph(old_tree, self->parser.language, stderr); + // ts_tree_print_dot_graph(tree, self->parser.language, stderr); + + if (ranges && range_count) { + bool extend_last_change = false; + RangeArray result = {0, 0, 0}; + if (!ts_tree_diff(self, old_tree, &new_root, 0, &result, &extend_last_change)) + return -1; + *ranges = result.contents; + *range_count = result.size; + } + + ts_tree_release(old_tree); + } + self->tree = tree; self->parse_count++; self->valid = true; return 0; } +int ts_document_parse(TSDocument *self) { + return ts_document_parse_and_diff(self, NULL, NULL); +} + void ts_document_invalidate(TSDocument *self) { self->valid = false; } From 131bbee1601d6b0aaa00548ce06e2226e8cc57cf Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 8 Sep 2016 17:51:34 -0700 Subject: [PATCH 02/16] Rename parse_and_diff -> parse_and_get_changed_ranges Signed-off-by: Nathan Sobo --- include/tree_sitter/runtime.h | 2 +- spec/runtime/document_spec.cc | 4 ++-- src/runtime/document.c | 7 +++++-- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/include/tree_sitter/runtime.h b/include/tree_sitter/runtime.h index be321720..be00a69e 100644 --- a/include/tree_sitter/runtime.h +++ b/include/tree_sitter/runtime.h @@ -103,7 +103,7 @@ void ts_document_set_logger(TSDocument *, TSLogger); void ts_document_print_debugging_graphs(TSDocument *, bool); void ts_document_edit(TSDocument *, TSInputEdit); int ts_document_parse(TSDocument *); -int ts_document_parse_and_diff(TSDocument *, TSRange **, size_t *); +int ts_document_parse_and_get_changed_ranges(TSDocument *, TSRange **, size_t *); void ts_document_invalidate(TSDocument *); TSNode ts_document_root_node(const TSDocument *); size_t ts_document_parse_count(const TSDocument *); diff --git a/spec/runtime/document_spec.cc b/spec/runtime/document_spec.cc index 9e393134..aa5ad6bb 100644 --- a/spec/runtime/document_spec.cc +++ b/spec/runtime/document_spec.cc @@ -194,7 +194,7 @@ describe("Document", [&]() { }); }); - describe("parse_and_diff()", [&]() { + describe("parse_and_get_changed_ranges()", [&]() { SpyInput *input; before_each([&]() { @@ -217,7 +217,7 @@ describe("Document", [&]() { TSRange *ranges; size_t range_count = 0; - ts_document_parse_and_diff(doc, &ranges, &range_count); + ts_document_parse_and_get_changed_ranges(doc, &ranges, &range_count); vector result; for (size_t i = 0; i < range_count; i++) diff --git a/src/runtime/document.c b/src/runtime/document.c index 22e4bb5b..3d90a482 100644 --- a/src/runtime/document.c +++ b/src/runtime/document.c @@ -165,7 +165,10 @@ static bool ts_tree_diff(TSDocument *doc, TSTree *old, TSNode *new_node, PRINT("advance before diff ('%s', %lu) -> ('%s', %lu)", NAME(new_node->data), ts_node_start_char(*new_node), NAME(next.data), ts_node_start_char(next)); + *new_node = next; + } else { + break; } } else if (new_child_start == old_child_start) { if (!ts_tree_diff(doc, old_child, new_node, depth, results, extend_last_change)) @@ -196,7 +199,7 @@ static bool ts_tree_diff(TSDocument *doc, TSTree *old, TSNode *new_node, return true; } -int ts_document_parse_and_diff(TSDocument *self, TSRange **ranges, size_t *range_count) { +int ts_document_parse_and_get_changed_ranges(TSDocument *self, TSRange **ranges, size_t *range_count) { if (ranges) *ranges = NULL; if (range_count) *range_count = 0; @@ -238,7 +241,7 @@ int ts_document_parse_and_diff(TSDocument *self, TSRange **ranges, size_t *range } int ts_document_parse(TSDocument *self) { - return ts_document_parse_and_diff(self, NULL, NULL); + return ts_document_parse_and_get_changed_ranges(self, NULL, NULL); } void ts_document_invalidate(TSDocument *self) { From 591fcc980c2ee7fc653cd958ad943a48bfd63260 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 8 Sep 2016 17:54:51 -0700 Subject: [PATCH 03/16] Update trees' offsets when editing them --- spec/integration/corpus_specs.cc | 27 +++++++++++++-------------- spec/runtime/tree_spec.cc | 24 ++++++++++++++++-------- src/runtime/tree.c | 19 ++++++++++--------- 3 files changed, 39 insertions(+), 31 deletions(-) diff --git a/spec/integration/corpus_specs.cc b/spec/integration/corpus_specs.cc index 3653809d..ec01574f 100644 --- a/spec/integration/corpus_specs.cc +++ b/spec/integration/corpus_specs.cc @@ -29,8 +29,8 @@ static void expect_a_consistent_tree(TSNode node, TSDocument *document) { AssertThat(start_char, !IsGreaterThan(end_char)); AssertThat(start_point, !IsGreaterThan(end_point)); - size_t last_child_end_char = 0; - TSPoint last_child_end_point = {0, 0}; + size_t last_child_end_char = start_char; + TSPoint last_child_end_point = start_point; for (size_t i = 0; i < child_count; i++) { TSNode child = ts_node_child(node, i); @@ -39,17 +39,10 @@ static void expect_a_consistent_tree(TSNode node, TSDocument *document) { TSPoint child_start_point = ts_node_start_point(child); TSPoint child_end_point = ts_node_end_point(child); - if (i > 0) { - AssertThat(child_start_char, !IsLessThan(last_child_end_char)); - AssertThat(child_start_point, !IsLessThan(last_child_end_point)); - last_child_end_char = child_end_char; - last_child_end_point = child_end_point; - } - - AssertThat(child_start_char, !IsLessThan(start_char)); - AssertThat(child_end_char, !IsGreaterThan(end_char)); - AssertThat(child_start_point, !IsLessThan(start_point)); - AssertThat(child_end_point, !IsGreaterThan(end_point)); + AssertThat(child_start_char, !IsLessThan(last_child_end_char)); + AssertThat(child_start_point, !IsLessThan(last_child_end_point)); + last_child_end_char = child_end_char; + last_child_end_point = child_end_point; expect_a_consistent_tree(child, document); @@ -57,8 +50,14 @@ static void expect_a_consistent_tree(TSNode node, TSDocument *document) { some_child_has_changes = true; } - if (child_count > 0) + if (child_count > 0) { + AssertThat(end_char, !IsLessThan(last_child_end_char)); + + if (!has_changes) + AssertThat(end_point, !IsLessThan(last_child_end_point)); + AssertThat(has_changes, Equals(some_child_has_changes)); + } } START_TEST diff --git a/spec/runtime/tree_spec.cc b/spec/runtime/tree_spec.cc index 6420a206..08e3cb7d 100644 --- a/spec/runtime/tree_spec.cc +++ b/spec/runtime/tree_spec.cc @@ -3,6 +3,22 @@ #include "runtime/tree.h" #include "runtime/length.h" +void assert_consistent(const TSTree *tree) { + if (tree->child_count == 0) + return; + AssertThat(tree->children[0]->padding, Equals(tree->padding)); + + TSLength total_children_size = ts_length_zero(); + for (size_t i = 0; i < tree->child_count; i++) { + TSTree *child = tree->children[i]; + AssertThat(child->context.offset, Equals(total_children_size)); + assert_consistent(child); + total_children_size = ts_length_add(total_children_size, ts_tree_total_size(child)); + } + + AssertThat(total_children_size, Equals(ts_tree_total_size(tree))); +}; + START_TEST enum { @@ -163,14 +179,6 @@ describe("Tree", []() { ts_tree_release(tree); }); - auto assert_consistent = [&](const TSTree *tree) { - AssertThat(tree->children[0]->padding, Equals(tree->padding)); - - TSLength total_children_size = ts_length_zero(); - for (size_t i = 0; i < tree->child_count; i++) - total_children_size = ts_length_add(total_children_size, ts_tree_total_size(tree->children[i])); - AssertThat(total_children_size, Equals(ts_tree_total_size(tree))); - }; describe("edits within a tree's padding", [&]() { it("resizes the padding of the tree and its leftmost descendants", [&]() { diff --git a/src/runtime/tree.c b/src/runtime/tree.c index 777649e3..73747ab2 100644 --- a/src/runtime/tree.c +++ b/src/runtime/tree.c @@ -335,37 +335,38 @@ void ts_tree_edit(TSTree *self, TSInputEdit edit) { bool found_first_child = false; long remainder_to_delete = edit.chars_removed - edit.chars_inserted; - size_t child_left = 0, child_right = 0; + TSLength child_left, child_right = ts_length_zero(); for (size_t i = 0; i < self->child_count; i++) { TSTree *child = self->children[i]; - size_t child_size = ts_tree_total_chars(child); child_left = child_right; - child_right += child_size; if (!found_first_child) { - if (child_right >= start) { + child_right = ts_length_add(child_left, ts_tree_total_size(child)); + if (child_right.chars >= start) { found_first_child = true; - size_t chars_removed = min(edit.chars_removed, child_right - start); + size_t chars_removed = min(edit.chars_removed, child_right.chars - start); remainder_to_delete -= (chars_removed - edit.chars_inserted); ts_tree_edit(child, (TSInputEdit){ - .position = start - child_left, + .position = start - child_left.chars, .chars_inserted = edit.chars_inserted, .chars_removed = chars_removed, }); + child_right = ts_length_add(child_left, ts_tree_total_size(child)); } } else { if (remainder_to_delete > 0) { - size_t chars_removed = min(remainder_to_delete, child_size); + size_t chars_removed = min(remainder_to_delete, ts_tree_total_chars(child)); remainder_to_delete -= chars_removed; ts_tree_edit( child, (TSInputEdit){ .position = 0, .chars_inserted = 0, .chars_removed = chars_removed, }); - } else { - break; } + child_right = ts_length_add(child_right, ts_tree_total_size(child)); } + + child->context.offset = child_left; } } From eb5dda75c40d791687ff67d109c4565762647ca3 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 9 Sep 2016 09:20:04 -0700 Subject: [PATCH 04/16] Start work on randomized changed-region testing Signed-off-by: Nathan Sobo --- spec/helpers/scope_sequence.cc | 110 +++++++++++++++++++++++++++++++++ spec/helpers/scope_sequence.h | 16 +++++ 2 files changed, 126 insertions(+) create mode 100644 spec/helpers/scope_sequence.cc create mode 100644 spec/helpers/scope_sequence.h diff --git a/spec/helpers/scope_sequence.cc b/spec/helpers/scope_sequence.cc new file mode 100644 index 00000000..32a77bc6 --- /dev/null +++ b/spec/helpers/scope_sequence.cc @@ -0,0 +1,110 @@ +#include "./scope_sequence.h" + +#include "bandit/bandit.h" +#include +#include "helpers/stream_methods.h" +#include "helpers/point_helpers.h" + +using std::string; +using std::cout; + +static void append_text_to_scope_sequence(ScopeSequence *sequence, + ScopeStack *current_scopes, + const std::string &text, + size_t length) { + for (size_t i = 0; i < length; i++) { + string character(1, text[sequence->size()]); + sequence->push_back(*current_scopes); + sequence->back().push_back("'" + character + "'"); + } +} + +static void append_to_scope_sequence(ScopeSequence *sequence, + ScopeStack *current_scopes, + TSNode node, TSDocument *document, + const std::string &text) { + string scope = ts_node_type(node, document); + current_scopes->push_back(scope); + size_t child_count = ts_node_child_count(node); + if (child_count > 0) { + size_t previous_child_end = ts_node_start_char(node); + for (size_t i = 0; i < child_count; i++) { + TSNode child = ts_node_child(node, i); + size_t child_start = ts_node_start_char(child); + size_t spacing = child_start - previous_child_end; + append_text_to_scope_sequence(sequence, current_scopes, text, spacing); + append_to_scope_sequence(sequence, current_scopes, child, document, text); + previous_child_end = ts_node_end_char(child); + } + size_t spacing = ts_node_end_char(node) - previous_child_end; + append_text_to_scope_sequence(sequence, current_scopes, text, spacing); + } else { + size_t length = ts_node_end_char(node) - ts_node_start_char(node); + append_text_to_scope_sequence(sequence, current_scopes, text, length); + } + current_scopes->pop_back(); +} + +ScopeSequence build_scope_sequence(TSDocument *document, const std::string &text) { + ScopeSequence sequence; + ScopeStack current_scopes; + TSNode node = ts_document_root_node(document); + append_to_scope_sequence(&sequence, ¤t_scopes, node, document, text); + AssertThat(sequence.size(), Equals(text.size())); + return sequence; +} + +bool operator<=(const TSPoint &left, const TSPoint &right) { + if (left.row < right.row) + return true; + else if (left.row == right.row) + return left.column <= right.column; + else + return false; +} + +void verify_changed_ranges(const ScopeSequence &old_sequence, const ScopeSequence &new_sequence, + const string &text, TSRange *ranges, size_t range_count) { + TSPoint current_position = {0, 0}; + for (size_t i = 0; i < text.size(); i++) { + if (text[i] == '\n') { + current_position.row++; + current_position.column = 0; + continue; + } + + const ScopeStack &old_scopes = old_sequence[i]; + const ScopeStack &new_scopes = new_sequence[i]; + if (old_scopes != new_scopes) { + bool found_containing_range = false; + for (size_t j = 0; j < range_count; j++) { + TSRange range = ranges[j]; + if (range.start <= current_position && current_position <= range.end) { + found_containing_range = true; + break; + } + } + + if (!found_containing_range) { + std::stringstream message_stream; + message_stream << "Found changed scope outside of any invalidated range;\n"; + message_stream << "Position: " << current_position << "\n"; + size_t line_start_index = i - current_position.column; + size_t line_end_index = text.find_first_of('\n', i); + message_stream << "Line: " << text.substr(line_start_index, line_end_index - line_start_index) << "\n"; + for (size_t j = 0; j < current_position.column + string("Line: ").size(); j++) + message_stream << " "; + message_stream << "^\n"; + message_stream << "Old scopes: " << old_scopes << "\n"; + message_stream << "New scopes: " << new_scopes << "\n"; + message_stream << "Invalidated ranges:\n"; + for (size_t j = 0; j < range_count; j++) { + message_stream << " " << ranges[i] << "\n"; + } + Assert::Failure(message_stream.str()); + } + } + + current_position.column++; + } +} diff --git a/spec/helpers/scope_sequence.h b/spec/helpers/scope_sequence.h new file mode 100644 index 00000000..c83ad597 --- /dev/null +++ b/spec/helpers/scope_sequence.h @@ -0,0 +1,16 @@ +#ifndef HELPERS_SCOPE_SEQUENCE_H_ +#define HELPERS_SCOPE_SEQUENCE_H_ + +#include +#include +#include "tree_sitter/runtime.h" + +typedef std::string Scope; +typedef std::vector ScopeStack; +typedef std::vector ScopeSequence; + +ScopeSequence build_scope_sequence(TSDocument *document, const std::string &text); + +void verify_changed_ranges(const ScopeSequence &old, const ScopeSequence &new_sequence, const std::string &text, TSRange *ranges, size_t range_count); + +#endif // HELPERS_SCOPE_SEQUENCE_H_ From cc62fe03756c9d8749109a0aed53771fd62d5dca Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 9 Sep 2016 21:11:02 -0700 Subject: [PATCH 05/16] Represent Lengths in terms of Points --- include/tree_sitter/parser.h | 3 +- spec/helpers/point_helpers.cc | 23 +++++++---- spec/helpers/point_helpers.h | 7 ++++ spec/helpers/tree_helpers.cc | 9 ----- spec/helpers/tree_helpers.h | 2 - spec/runtime/stack_spec.cc | 5 ++- spec/runtime/tree_spec.cc | 75 ++++++++++++++++++----------------- src/runtime/length.h | 44 ++++++++++---------- src/runtime/lexer.c | 6 +-- src/runtime/node.c | 10 ++--- src/runtime/parser.c | 4 +- src/runtime/stack.c | 4 +- src/runtime/tree.c | 14 +++---- 13 files changed, 106 insertions(+), 100 deletions(-) diff --git a/include/tree_sitter/parser.h b/include/tree_sitter/parser.h index c1ad5c53..3bed984e 100644 --- a/include/tree_sitter/parser.h +++ b/include/tree_sitter/parser.h @@ -17,8 +17,7 @@ typedef unsigned short TSStateId; typedef struct { size_t bytes; size_t chars; - size_t rows; - size_t columns; + TSPoint extent; } TSLength; typedef struct { diff --git a/spec/helpers/point_helpers.cc b/spec/helpers/point_helpers.cc index f07d1da7..395087ee 100644 --- a/spec/helpers/point_helpers.cc +++ b/spec/helpers/point_helpers.cc @@ -1,6 +1,6 @@ +#include "./point_helpers.h" #include #include -#include "runtime/length.h" using namespace std; @@ -12,12 +12,8 @@ bool operator==(const TSRange &left, const TSRange &right) { return left.start == right.start && left.end == right.end; } -std::ostream &operator<<(std::ostream &stream, const TSPoint &point) { - return stream << "{" << point.row << ", " << point.column << "}"; -} - -std::ostream &operator<<(std::ostream &stream, const TSRange &range) { - return stream << "{" << range.start << ", " << range.end << "}"; +bool operator==(const TSLength &left, const TSLength &right) { + return ts_length_eq(left, right); } bool operator<(const TSPoint &left, const TSPoint &right) { @@ -30,3 +26,16 @@ bool operator<(const TSPoint &left, const TSPoint &right) { bool operator>(const TSPoint &left, const TSPoint &right) { return right < left; } + +std::ostream &operator<<(std::ostream &stream, const TSPoint &point) { + return stream << "{" << point.row << ", " << point.column << "}"; +} + +std::ostream &operator<<(std::ostream &stream, const TSRange &range) { + return stream << "{" << range.start << ", " << range.end << "}"; +} + +ostream &operator<<(ostream &stream, const TSLength &length) { + return stream << "{chars:" << length.chars << ", bytes:" << + length.bytes << ", extent:" << length.extent << "}"; +} diff --git a/spec/helpers/point_helpers.h b/spec/helpers/point_helpers.h index 3caf14c2..de5edd32 100644 --- a/spec/helpers/point_helpers.h +++ b/spec/helpers/point_helpers.h @@ -1,6 +1,9 @@ #ifndef HELPERS_POINT_HELPERS_H_ #define HELPERS_POINT_HELPERS_H_ +#include "runtime/length.h" +#include + bool operator==(const TSPoint &left, const TSPoint &right); bool operator<(const TSPoint &left, const TSPoint &right); @@ -9,8 +12,12 @@ bool operator>(const TSPoint &left, const TSPoint &right); bool operator==(const TSRange &left, const TSRange &right); +bool operator==(const TSLength &left, const TSLength &right); + std::ostream &operator<<(std::ostream &stream, const TSPoint &point); std::ostream &operator<<(std::ostream &stream, const TSRange &range); +std::ostream &operator<<(std::ostream &stream, const TSLength &length); + #endif // HELPERS_POINT_HELPERS_H_ diff --git a/spec/helpers/tree_helpers.cc b/spec/helpers/tree_helpers.cc index 682ec2c7..04e39194 100644 --- a/spec/helpers/tree_helpers.cc +++ b/spec/helpers/tree_helpers.cc @@ -40,10 +40,6 @@ bool operator==(const TSNode &left, const TSNode &right) { return ts_node_eq(left, right); } -bool operator==(const TSLength &left, const TSLength &right) { - return ts_length_eq(left, right); -} - bool operator==(const std::vector &vec, const TreeArray &array) { if (vec.size() != array.size) return false; @@ -52,8 +48,3 @@ bool operator==(const std::vector &vec, const TreeArray &array) { return false; return true; } - -ostream &operator<<(ostream &stream, const TSLength &length) { - return stream << "{chars:" << length.chars << ", bytes:" << - length.bytes << ", rows:" << length.rows << ", columns:" << length.columns << "}"; -} diff --git a/spec/helpers/tree_helpers.h b/spec/helpers/tree_helpers.h index 18d62b80..e8420dc0 100644 --- a/spec/helpers/tree_helpers.h +++ b/spec/helpers/tree_helpers.h @@ -10,9 +10,7 @@ TSTree ** tree_array(std::vector trees); std::ostream &operator<<(std::ostream &stream, const TSTree *tree); std::ostream &operator<<(std::ostream &stream, const TSNode &node); -std::ostream &operator<<(std::ostream &stream, const TSLength &length); bool operator==(const TSNode &left, const TSNode &right); -bool operator==(const TSLength &left, const TSLength &right); bool operator==(const std::vector &right, const TreeArray &array); #endif // HELPERS_TREE_HELPERS_H_ diff --git a/spec/runtime/stack_spec.cc b/spec/runtime/stack_spec.cc index 1beb5b22..16261ada 100644 --- a/spec/runtime/stack_spec.cc +++ b/spec/runtime/stack_spec.cc @@ -1,5 +1,6 @@ #include "spec_helper.h" #include "helpers/tree_helpers.h" +#include "helpers/point_helpers.h" #include "helpers/record_alloc.h" #include "helpers/stream_methods.h" #include "runtime/stack.h" @@ -19,7 +20,7 @@ enum { }; TSLength operator*(const TSLength &length, size_t factor) { - return {length.bytes * factor, length.chars * factor, 0, length.columns * factor}; + return {length.bytes * factor, length.chars * factor, {0, length.extent.column * factor}}; } void free_slice_array(StackSliceArray *slices) { @@ -69,7 +70,7 @@ describe("Stack", [&]() { Stack *stack; const size_t tree_count = 11; TSTree *trees[tree_count]; - TSLength tree_len = {2, 3, 0, 3}; + TSLength tree_len = {2, 3, {0, 3}}; before_each([&]() { record_alloc::start(); diff --git a/spec/runtime/tree_spec.cc b/spec/runtime/tree_spec.cc index 08e3cb7d..f076c44b 100644 --- a/spec/runtime/tree_spec.cc +++ b/spec/runtime/tree_spec.cc @@ -1,5 +1,6 @@ #include "spec_helper.h" #include "helpers/tree_helpers.h" +#include "helpers/point_helpers.h" #include "runtime/tree.h" #include "runtime/length.h" @@ -36,8 +37,8 @@ describe("Tree", []() { TSSymbolMetadata invisible = {false, false, false, true}; before_each([&]() { - tree1 = ts_tree_make_leaf(cat, {2, 1, 0, 1}, {5, 4, 0, 4}, visible); - tree2 = ts_tree_make_leaf(cat, {1, 1, 0, 1}, {3, 3, 0, 3}, visible); + tree1 = ts_tree_make_leaf(cat, {2, 1, {0, 1}}, {5, 4, {0, 4}}, visible); + tree2 = ts_tree_make_leaf(cat, {1, 1, {0, 1}}, {3, 3, {0, 3}}, visible); ts_tree_retain(tree1); ts_tree_retain(tree2); @@ -166,13 +167,13 @@ describe("Tree", []() { before_each([&]() { tree = ts_tree_make_node(cat, 3, tree_array({ - ts_tree_make_leaf(dog, {2, 2, 0, 2}, {3, 3, 0, 3}, visible), - ts_tree_make_leaf(eel, {2, 2, 0, 2}, {3, 3, 0, 3}, visible), - ts_tree_make_leaf(fox, {2, 2, 0, 2}, {3, 3, 0, 3}, visible), + ts_tree_make_leaf(dog, {2, 2, {0, 2}}, {3, 3, {0, 3}}, visible), + ts_tree_make_leaf(eel, {2, 2, {0, 2}}, {3, 3, {0, 3}}, visible), + ts_tree_make_leaf(fox, {2, 2, {0, 2}}, {3, 3, {0, 3}}, visible), }), visible); - AssertThat(tree->padding, Equals({2, 2, 0, 2})); - AssertThat(tree->size, Equals({13, 13, 0, 13})); + AssertThat(tree->padding, Equals({2, 2, {0, 2}})); + AssertThat(tree->size, Equals({13, 13, {0, 13}})); }); after_each([&]() { @@ -187,16 +188,16 @@ describe("Tree", []() { assert_consistent(tree); AssertThat(tree->has_changes, IsTrue()); - AssertThat(tree->padding, Equals({0, 3, 0, 0})); - AssertThat(tree->size, Equals({13, 13, 0, 13})); + AssertThat(tree->padding, Equals({0, 3, {0, 0}})); + AssertThat(tree->size, Equals({13, 13, {0, 13}})); AssertThat(tree->children[0]->has_changes, IsTrue()); - AssertThat(tree->children[0]->padding, Equals({0, 3, 0, 0})); - AssertThat(tree->children[0]->size, Equals({3, 3, 0, 3})); + AssertThat(tree->children[0]->padding, Equals({0, 3, {0, 0}})); + AssertThat(tree->children[0]->size, Equals({3, 3, {0, 3}})); AssertThat(tree->children[1]->has_changes, IsFalse()); - AssertThat(tree->children[1]->padding, Equals({2, 2, 0, 2})); - AssertThat(tree->children[1]->size, Equals({3, 3, 0, 3})); + AssertThat(tree->children[1]->padding, Equals({2, 2, {0, 2}})); + AssertThat(tree->children[1]->size, Equals({3, 3, {0, 3}})); }); }); @@ -207,12 +208,12 @@ describe("Tree", []() { assert_consistent(tree); AssertThat(tree->has_changes, IsTrue()); - AssertThat(tree->padding, Equals({0, 5, 0, 0})); - AssertThat(tree->size, Equals({0, 11, 0, 0})); + AssertThat(tree->padding, Equals({0, 5, {0, 0}})); + AssertThat(tree->size, Equals({0, 11, {0, 0}})); AssertThat(tree->children[0]->has_changes, IsTrue()); - AssertThat(tree->children[0]->padding, Equals({0, 5, 0, 0})); - AssertThat(tree->children[0]->size, Equals({0, 1, 0, 0})); + AssertThat(tree->children[0]->padding, Equals({0, 5, {0, 0}})); + AssertThat(tree->children[0]->size, Equals({0, 1, {0, 0}})); }); }); @@ -223,12 +224,12 @@ describe("Tree", []() { assert_consistent(tree); AssertThat(tree->has_changes, IsTrue()); - AssertThat(tree->padding, Equals({0, 4, 0, 0})); - AssertThat(tree->size, Equals({13, 13, 0, 13})); + AssertThat(tree->padding, Equals({0, 4, {0, 0}})); + AssertThat(tree->size, Equals({13, 13, {0, 13}})); AssertThat(tree->children[0]->has_changes, IsTrue()); - AssertThat(tree->children[0]->padding, Equals({0, 4, 0, 0})); - AssertThat(tree->children[0]->size, Equals({3, 3, 0, 3})); + AssertThat(tree->children[0]->padding, Equals({0, 4, {0, 0}})); + AssertThat(tree->children[0]->size, Equals({3, 3, {0, 3}})); AssertThat(tree->children[1]->has_changes, IsFalse()); }); @@ -241,12 +242,12 @@ describe("Tree", []() { assert_consistent(tree); AssertThat(tree->has_changes, IsTrue()); - AssertThat(tree->padding, Equals({2, 2, 0, 2})); - AssertThat(tree->size, Equals({0, 16, 0, 0})); + AssertThat(tree->padding, Equals({2, 2, {0, 2}})); + AssertThat(tree->size, Equals({0, 16, {0, 0}})); AssertThat(tree->children[0]->has_changes, IsTrue()); - AssertThat(tree->children[0]->padding, Equals({2, 2, 0, 2})); - AssertThat(tree->children[0]->size, Equals({0, 6, 0, 0})); + AssertThat(tree->children[0]->padding, Equals({2, 2, {0, 2}})); + AssertThat(tree->children[0]->size, Equals({0, 6, {0, 0}})); AssertThat(tree->children[1]->has_changes, IsFalse()); }); @@ -259,30 +260,30 @@ describe("Tree", []() { assert_consistent(tree); AssertThat(tree->has_changes, IsTrue()); - AssertThat(tree->padding, Equals({0, 4, 0, 0})); - AssertThat(tree->size, Equals({0, 4, 0, 0})); + AssertThat(tree->padding, Equals({0, 4, {0, 0}})); + AssertThat(tree->size, Equals({0, 4, {0, 0}})); AssertThat(tree->children[0]->has_changes, IsTrue()); - AssertThat(tree->children[0]->padding, Equals({0, 4, 0, 0})); - AssertThat(tree->children[0]->size, Equals({0, 0, 0, 0})); + AssertThat(tree->children[0]->padding, Equals({0, 4, {0, 0}})); + AssertThat(tree->children[0]->size, Equals({0, 0, {0, 0}})); AssertThat(tree->children[1]->has_changes, IsTrue()); - AssertThat(tree->children[1]->padding, Equals({0, 0, 0, 0})); - AssertThat(tree->children[1]->size, Equals({0, 0, 0, 0})); + AssertThat(tree->children[1]->padding, Equals({0, 0, {0, 0}})); + AssertThat(tree->children[1]->size, Equals({0, 0, {0, 0}})); AssertThat(tree->children[2]->has_changes, IsTrue()); - AssertThat(tree->children[2]->padding, Equals({0, 1, 0, 0})); - AssertThat(tree->children[2]->size, Equals({3, 3, 0, 3})); + AssertThat(tree->children[2]->padding, Equals({0, 1, {0, 0}})); + AssertThat(tree->children[2]->size, Equals({3, 3, {0, 3}})); }); }); }); describe("equality", [&]() { it("returns true for identical trees", [&]() { - TSTree *tree1_copy = ts_tree_make_leaf(cat, {2, 1, 1, 1}, {5, 4, 1, 4}, visible); + TSTree *tree1_copy = ts_tree_make_leaf(cat, {2, 1, {1, 1}}, {5, 4, {1, 4}}, visible); AssertThat(ts_tree_eq(tree1, tree1_copy), IsTrue()); - TSTree *tree2_copy = ts_tree_make_leaf(cat, {1, 1, 0, 1}, {3, 3, 0, 3}, visible); + TSTree *tree2_copy = ts_tree_make_leaf(cat, {1, 1, {0, 1}}, {3, 3, {0, 3}}, visible); AssertThat(ts_tree_eq(tree2, tree2_copy), IsTrue()); TSTree *parent2 = ts_tree_make_node(dog, 2, tree_array({ @@ -313,11 +314,11 @@ describe("Tree", []() { }); it("returns false for trees with different sizes", [&]() { - TSTree *tree1_copy = ts_tree_make_leaf(cat, {2, 1, 0, 1}, tree1->size, invisible); + TSTree *tree1_copy = ts_tree_make_leaf(cat, {2, 1, {0, 1}}, tree1->size, invisible); AssertThat(ts_tree_eq(tree1, tree1_copy), IsFalse()); ts_tree_release(tree1_copy); - tree1_copy = ts_tree_make_leaf(cat, tree1->padding, {5, 4, 1, 10}, invisible); + tree1_copy = ts_tree_make_leaf(cat, tree1->padding, {5, 4, {1, 10}}, invisible); AssertThat(ts_tree_eq(tree1, tree1_copy), IsFalse()); ts_tree_release(tree1_copy); }); diff --git a/src/runtime/length.h b/src/runtime/length.h index 3c16a7d8..dde1f81f 100644 --- a/src/runtime/length.h +++ b/src/runtime/length.h @@ -4,14 +4,27 @@ #include "tree_sitter/parser.h" #include +static inline TSPoint ts_point_add(TSPoint a, TSPoint b) { + if (b.row > 0) + return (TSPoint){a.row + b.row, b.column}; + else + return (TSPoint){a.row, a.column + b.column}; +} + +static inline TSPoint ts_point_sub(TSPoint a, TSPoint b) { + if (a.row > b.row) + return (TSPoint){a.row - b.row, a.column}; + else + return (TSPoint){0, a.column - b.column}; +} + static inline bool ts_length_is_unknown(TSLength self) { return self.chars > 0 && self.bytes == 0; } static inline void ts_length_set_unknown(TSLength *self) { self->bytes = 0; - self->rows = 0; - self->columns = 0; + self->extent = (TSPoint){0, 0}; } static inline TSLength ts_length_min(TSLength len1, TSLength len2) { @@ -24,17 +37,10 @@ static inline TSLength ts_length_add(TSLength len1, TSLength len2) { if (ts_length_is_unknown(len1) || ts_length_is_unknown(len2)) { result.bytes = 0; - result.rows = 0; - result.columns = result.chars; + result.extent = (TSPoint){0, result.chars}; } else { result.bytes = len1.bytes + len2.bytes; - if (len2.rows == 0) { - result.rows = len1.rows; - result.columns = len1.columns + len2.columns; - } else { - result.rows = len1.rows + len2.rows; - result.columns = len2.columns; - } + result.extent = ts_point_add(len1.extent, len2.extent); } return result; @@ -46,29 +52,23 @@ static inline TSLength ts_length_sub(TSLength len1, TSLength len2) { if (ts_length_is_unknown(len1) || ts_length_is_unknown(len2)) { result.bytes = 0; - result.rows = 0; - result.columns = result.chars; + result.extent = (TSPoint){0, result.chars}; } else { result.bytes = len1.bytes - len2.bytes; - if (len1.rows == len2.rows) { - result.rows = 0; - result.columns = len1.columns - len2.columns; - } else { - result.rows = len1.rows - len2.rows; - result.columns = len1.columns; - } + result.extent = ts_point_sub(len1.extent, len2.extent); } return result; } static inline TSLength ts_length_zero() { - return (TSLength){ 0, 0, 0, 0 }; + return (TSLength){ 0, 0, {0, 0} }; } static inline bool ts_length_eq(TSLength self, TSLength other) { return self.bytes == other.bytes && self.chars == other.chars && - self.rows == other.rows && self.columns == other.columns; + self.extent.row == other.extent.row && + self.extent.column == other.extent.column; } #endif diff --git a/src/runtime/lexer.c b/src/runtime/lexer.c index d87ebd9c..f46a7d1a 100644 --- a/src/runtime/lexer.c +++ b/src/runtime/lexer.c @@ -55,10 +55,10 @@ static void ts_lexer__advance(TSLexer *self, TSStateId state, bool skip) { self->current_position.bytes += self->lookahead_size; self->current_position.chars++; if (self->lookahead == '\n') { - self->current_position.rows++; - self->current_position.columns = 0; + self->current_position.extent.row++; + self->current_position.extent.column = 0; } else { - self->current_position.columns++; + self->current_position.extent.column++; } } diff --git a/src/runtime/node.c b/src/runtime/node.c index 787fc649..5aec705d 100644 --- a/src/runtime/node.c +++ b/src/runtime/node.c @@ -48,7 +48,7 @@ static inline TSNode ts_node__direct_parent(TSNode self, size_t *index) { return ts_node_make(tree->context.parent, ts_node__offset_char(self) - tree->context.offset.chars, ts_node__offset_byte(self) - tree->context.offset.bytes, - ts_node__offset_row(self) - tree->context.offset.rows); + ts_node__offset_row(self) - tree->context.offset.extent.row); } static inline TSNode ts_node__direct_child(TSNode self, size_t i) { @@ -56,7 +56,7 @@ static inline TSNode ts_node__direct_child(TSNode self, size_t i) { return ts_node_make( child_tree, ts_node__offset_char(self) + child_tree->context.offset.chars, ts_node__offset_byte(self) + child_tree->context.offset.bytes, - ts_node__offset_row(self) + child_tree->context.offset.rows); + ts_node__offset_row(self) + child_tree->context.offset.extent.row); } static inline TSNode ts_node__child(TSNode self, size_t child_index, @@ -246,14 +246,14 @@ size_t ts_node_end_byte(TSNode self) { TSPoint ts_node_start_point(TSNode self) { const TSTree *tree = ts_node__tree(self); - return (TSPoint){ ts_node__offset_row(self) + tree->padding.rows, + return (TSPoint){ ts_node__offset_row(self) + tree->padding.extent.row, ts_tree_start_column(tree) }; } TSPoint ts_node_end_point(TSNode self) { const TSTree *tree = ts_node__tree(self); - return (TSPoint){ ts_node__offset_row(self) + tree->padding.rows + - tree->size.rows, + return (TSPoint){ ts_node__offset_row(self) + tree->padding.extent.row + + tree->size.extent.row, ts_tree_end_column(tree) }; } diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 4216c645..04c2246c 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -1272,8 +1272,8 @@ TSTree *parser_parse(Parser *self, TSInput input, TSTree *old_tree) { LOG("process version:%d, version_count:%lu, state:%d, row:%lu, col:%lu", version, ts_stack_version_count(self->stack), ts_stack_top_state(self->stack, version), - ts_stack_top_position(self->stack, version).rows + 1, - ts_stack_top_position(self->stack, version).columns + 1); + ts_stack_top_position(self->stack, version).extent.row + 1, + ts_stack_top_position(self->stack, version).extent.column + 1); CHECK(parser__advance(self, version, &reusable_node)); LOG_STACK(); diff --git a/src/runtime/stack.c b/src/runtime/stack.c index 8e74cd04..be440d93 100644 --- a/src/runtime/stack.c +++ b/src/runtime/stack.c @@ -126,7 +126,7 @@ static StackNode *stack_node_new(StackNode *next, TSTree *tree, bool is_pending, ERROR_COST_PER_SKIPPED_CHAR * (tree->padding.chars + tree->size.chars) + ERROR_COST_PER_SKIPPED_LINE * - (tree->padding.rows + tree->size.rows); + (tree->padding.extent.row + tree->size.extent.row); } } } else { @@ -606,7 +606,7 @@ bool ts_stack_print_dot_graph(Stack *self, const char **symbol_names, FILE *f) { fprintf(f, " tooltip=\"position: %lu,%lu\nerror_count: %u\nerror_cost: %u\"];\n", - node->position.rows, node->position.columns, node->error_count, + node->position.extent.row, node->position.extent.column, node->error_count, node->error_cost); for (int j = 0; j < node->link_count; j++) { diff --git a/src/runtime/tree.c b/src/runtime/tree.c index 73747ab2..dc75d827 100644 --- a/src/runtime/tree.c +++ b/src/runtime/tree.c @@ -152,7 +152,7 @@ void ts_tree_set_children(TSTree *self, size_t child_count, TSTree **children) { if (self->symbol == ts_builtin_sym_error) { self->error_cost += ERROR_COST_PER_SKIPPED_CHAR * self->size.chars + - ERROR_COST_PER_SKIPPED_LINE * self->size.rows; + ERROR_COST_PER_SKIPPED_LINE * self->size.extent.row; for (size_t i = 0; i < child_count; i++) if (!self->children[i]->extra) self->error_cost += ERROR_COST_PER_SKIPPED_TREE; @@ -233,20 +233,20 @@ recur: } size_t ts_tree_start_column(const TSTree *self) { - size_t column = self->padding.columns; - if (self->padding.rows > 0) + size_t column = self->padding.extent.column; + if (self->padding.extent.row > 0) return column; for (const TSTree *tree = self; tree != NULL; tree = tree->context.parent) { - column += tree->context.offset.columns; - if (tree->context.offset.rows > 0) + column += tree->context.offset.extent.column; + if (tree->context.offset.extent.row > 0) break; } return column; } size_t ts_tree_end_column(const TSTree *self) { - size_t result = self->size.columns; - if (self->size.rows == 0) + size_t result = self->size.extent.column; + if (self->size.extent.row == 0) result += ts_tree_start_column(self); return result; } From 00528e50ce1ca92a3c05df90aeee6d96f9171864 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 13 Sep 2016 13:08:52 -0700 Subject: [PATCH 06/16] Change edit API to be byte-based --- include/tree_sitter/runtime.h | 15 +++--- spec/helpers/spy_input.cc | 62 +++++++++++++++++------ spec/helpers/spy_input.h | 6 +-- spec/runtime/document_spec.cc | 21 +++++++- spec/runtime/tree_spec.cc | 83 ++++++++++++++++++++++-------- src/runtime/document.c | 49 +++++++++--------- src/runtime/length.h | 29 ++++++----- src/runtime/parser.c | 20 ++++---- src/runtime/parser.h | 4 +- src/runtime/tree.c | 95 ++++++++++++++++++++++------------- src/runtime/tree.h | 10 +++- 11 files changed, 261 insertions(+), 133 deletions(-) diff --git a/include/tree_sitter/runtime.h b/include/tree_sitter/runtime.h index be00a69e..71c3a8f4 100644 --- a/include/tree_sitter/runtime.h +++ b/include/tree_sitter/runtime.h @@ -34,17 +34,20 @@ typedef struct { void (*log)(void *payload, TSLogType, const char *); } TSLogger; -typedef struct { - size_t position; - size_t chars_inserted; - size_t chars_removed; -} TSInputEdit; - typedef struct { size_t row; size_t column; } TSPoint; +typedef struct { + size_t start_byte; + size_t bytes_removed; + size_t bytes_added; + TSPoint start_point; + TSPoint extent_removed; + TSPoint extent_added; +} TSInputEdit; + typedef struct { TSPoint start; TSPoint end; diff --git a/spec/helpers/spy_input.cc b/spec/helpers/spy_input.cc index d5d4bf96..c56ab67e 100644 --- a/spec/helpers/spy_input.cc +++ b/spec/helpers/spy_input.cc @@ -4,6 +4,7 @@ #include #include +using std::pair; using std::string; static const size_t UTF8_MAX_CHAR_SIZE = 4; @@ -68,34 +69,63 @@ TSInput SpyInput::input() { return result; } -TSInputEdit SpyInput::replace(size_t start_char, size_t chars_removed, string text) { - string text_removed = swap_substr(start_char, chars_removed, text); - size_t chars_inserted = string_char_count(encoding, text); - undo_stack.push_back(SpyInputEdit{start_char, chars_inserted, text_removed}); - return {start_char, chars_inserted, chars_removed}; +static TSPoint get_extent(string text) { + TSPoint result = {0, 0}; + for (auto i = text.begin(); i != text.end(); i++) { + if (*i == '\n') { + result.row++; + result.column = 0; + } else { + result.column++; + } + } + return result; +} + +TSInputEdit SpyInput::replace(size_t start_byte, size_t bytes_removed, string text) { + auto swap = swap_substr(start_byte, bytes_removed, text); + size_t bytes_added = text.size(); + undo_stack.push_back(SpyInputEdit{start_byte, bytes_added, swap.first}); + TSInputEdit result = {}; + result.start_byte = start_byte; + result.bytes_added = bytes_added; + result.bytes_removed = bytes_removed; + result.start_point = swap.second; + result.extent_removed = get_extent(swap.first); + result.extent_added = get_extent(text); + return result; } TSInputEdit SpyInput::undo() { SpyInputEdit entry = undo_stack.back(); undo_stack.pop_back(); - swap_substr(entry.position, entry.chars_removed, entry.text_inserted); - size_t chars_inserted = string_char_count(encoding, entry.text_inserted); - return TSInputEdit{entry.position, chars_inserted, entry.chars_removed}; + auto swap = swap_substr(entry.start_byte, entry.bytes_removed, entry.text_inserted); + TSInputEdit result; + result.start_byte = entry.start_byte; + result.bytes_removed = entry.bytes_removed; + result.bytes_added = entry.text_inserted.size(); + result.start_point = swap.second; + result.extent_removed = get_extent(swap.first); + result.extent_added = get_extent(entry.text_inserted); + return result; } -string SpyInput::swap_substr(size_t start_char, size_t chars_removed, string text) { - long start_byte = string_byte_for_character(encoding, content, 0, start_char); - assert(start_byte >= 0); - - long bytes_removed = string_byte_for_character(encoding, content, start_byte, chars_removed); - if (bytes_removed < 0) - bytes_removed = content.size() - start_byte; +pair SpyInput::swap_substr(size_t start_byte, size_t bytes_removed, string text) { + TSPoint start_position = {0, 0}; + for (auto i = content.begin(), n = content.begin() + start_byte; i < n; i++) { + if (*i == '\n') { + start_position.row++; + start_position.column = 0; + } else { + start_position.column++; + } + } string text_removed = content.substr(start_byte, bytes_removed); content.erase(start_byte, bytes_removed); content.insert(start_byte, text); - return text_removed; + return {text_removed, start_position}; } void SpyInput::clear() { diff --git a/spec/helpers/spy_input.h b/spec/helpers/spy_input.h index 24caee6a..a91fc165 100644 --- a/spec/helpers/spy_input.h +++ b/spec/helpers/spy_input.h @@ -6,8 +6,8 @@ #include "tree_sitter/runtime.h" struct SpyInputEdit { - size_t position; - size_t chars_removed; + size_t start_byte; + size_t bytes_removed; std::string text_inserted; }; @@ -20,7 +20,7 @@ class SpyInput { static const char * read(void *, size_t *); static int seek(void *, size_t, size_t); - std::string swap_substr(size_t, size_t, std::string); + std::pair swap_substr(size_t, size_t, std::string); public: SpyInput(std::string content, size_t chars_per_chunk); diff --git a/spec/runtime/document_spec.cc b/spec/runtime/document_spec.cc index aa5ad6bb..c36c5aa3 100644 --- a/spec/runtime/document_spec.cc +++ b/spec/runtime/document_spec.cc @@ -67,6 +67,18 @@ describe("Document", [&]() { "(array (true) (false))"); }); + it("allows columns to be measured in either bytes or characters", [&]() { + const char16_t content[] = u"[true, false]"; + spy_input->content = string((const char *)content, sizeof(content)); + spy_input->encoding = TSInputEncodingUTF16; + // spy_input->measure_columns_in_bytes + + ts_document_set_input(doc, spy_input->input()); + ts_document_invalidate(doc); + ts_document_parse(doc); + TSNode root_node = ts_document_root_node(doc); + }); + it("allows the input to be retrieved later", [&]() { ts_document_set_input(doc, spy_input->input()); AssertThat(ts_document_input(doc).payload, Equals(spy_input)); @@ -85,7 +97,12 @@ describe("Document", [&]() { ts_document_set_input(doc, spy_input->input()); // Insert 'null', delete '1'. - ts_document_edit(doc, {strlen("{\"key\": ["), 4, 1}); + TSInputEdit edit = {}; + edit.start_point.column = edit.start_byte = strlen("{\"key\": ["); + edit.extent_added.column = edit.bytes_added = 4; + edit.extent_removed.column = edit.bytes_removed = 1; + + ts_document_edit(doc, edit); ts_document_parse(doc); TSNode new_root = ts_document_root_node(doc); @@ -194,7 +211,7 @@ describe("Document", [&]() { }); }); - describe("parse_and_get_changed_ranges()", [&]() { + describe("parse_and_get_changed_ranges()", [&]() { SpyInput *input; before_each([&]() { diff --git a/spec/runtime/tree_spec.cc b/spec/runtime/tree_spec.cc index f076c44b..3c209b37 100644 --- a/spec/runtime/tree_spec.cc +++ b/spec/runtime/tree_spec.cc @@ -183,16 +183,23 @@ describe("Tree", []() { describe("edits within a tree's padding", [&]() { it("resizes the padding of the tree and its leftmost descendants", [&]() { - ts_tree_edit(tree, {1, 1, 0}); - + TSInputEdit edit = { + .start_byte = 1, + .bytes_removed = 0, + .bytes_added = 1, + .start_point = {0, 1}, + .extent_removed = {0, 0}, + .extent_added = {0, 1}, + }; + ts_tree_edit(tree, &edit); assert_consistent(tree); AssertThat(tree->has_changes, IsTrue()); - AssertThat(tree->padding, Equals({0, 3, {0, 0}})); + AssertThat(tree->padding, Equals({3, 0, {0, 3}})); AssertThat(tree->size, Equals({13, 13, {0, 13}})); AssertThat(tree->children[0]->has_changes, IsTrue()); - AssertThat(tree->children[0]->padding, Equals({0, 3, {0, 0}})); + AssertThat(tree->children[0]->padding, Equals({3, 0, {0, 3}})); AssertThat(tree->children[0]->size, Equals({3, 3, {0, 3}})); AssertThat(tree->children[1]->has_changes, IsFalse()); @@ -203,32 +210,48 @@ describe("Tree", []() { describe("edits that start in a tree's padding but extend into its content", [&]() { it("shrinks the content to compensate for the expanded padding", [&]() { - ts_tree_edit(tree, {1, 4, 3}); - + TSInputEdit edit = { + .start_byte = 1, + .bytes_removed = 3, + .bytes_added = 4, + .start_point = {0, 1}, + .extent_removed = {0, 3}, + .extent_added = {0, 4}, + }; + ts_tree_edit(tree, &edit); assert_consistent(tree); AssertThat(tree->has_changes, IsTrue()); - AssertThat(tree->padding, Equals({0, 5, {0, 0}})); - AssertThat(tree->size, Equals({0, 11, {0, 0}})); + AssertThat(tree->padding, Equals({5, 0, {0, 5}})); + AssertThat(tree->size, Equals({11, 0, {0, 11}})); AssertThat(tree->children[0]->has_changes, IsTrue()); - AssertThat(tree->children[0]->padding, Equals({0, 5, {0, 0}})); - AssertThat(tree->children[0]->size, Equals({0, 1, {0, 0}})); + AssertThat(tree->children[0]->padding, Equals({5, 0, {0, 5}})); + AssertThat(tree->children[0]->size, Equals({1, 0, {0, 1}})); }); }); describe("insertions at the edge of a tree's padding", [&]() { it("expands the tree's padding", [&]() { - ts_tree_edit(tree, {2, 2, 0}); + TSInputEdit edit = { + .start_byte = 2, + .bytes_removed = 0, + .bytes_added = 2, + .start_point = {0, 2}, + .extent_removed = {0, 0}, + .extent_added = {0, 2}, + }; + ts_tree_edit(tree, &edit); + assert_consistent(tree); assert_consistent(tree); AssertThat(tree->has_changes, IsTrue()); - AssertThat(tree->padding, Equals({0, 4, {0, 0}})); + AssertThat(tree->padding, Equals({4, 0, {0, 4}})); AssertThat(tree->size, Equals({13, 13, {0, 13}})); AssertThat(tree->children[0]->has_changes, IsTrue()); - AssertThat(tree->children[0]->padding, Equals({0, 4, {0, 0}})); + AssertThat(tree->children[0]->padding, Equals({4, 0, {0, 4}})); AssertThat(tree->children[0]->size, Equals({3, 3, {0, 3}})); AssertThat(tree->children[1]->has_changes, IsFalse()); @@ -237,17 +260,24 @@ describe("Tree", []() { describe("replacements starting at the edge of a tree's padding", [&]() { it("resizes the content and not the padding", [&]() { - ts_tree_edit(tree, {2, 5, 2}); - + TSInputEdit edit = { + .start_byte = 2, + .bytes_removed = 2, + .bytes_added = 5, + .start_point = {0, 2}, + .extent_removed = {0, 2}, + .extent_added = {0, 5}, + }; + ts_tree_edit(tree, &edit); assert_consistent(tree); AssertThat(tree->has_changes, IsTrue()); AssertThat(tree->padding, Equals({2, 2, {0, 2}})); - AssertThat(tree->size, Equals({0, 16, {0, 0}})); + AssertThat(tree->size, Equals({16, 0, {0, 16}})); AssertThat(tree->children[0]->has_changes, IsTrue()); AssertThat(tree->children[0]->padding, Equals({2, 2, {0, 2}})); - AssertThat(tree->children[0]->size, Equals({0, 6, {0, 0}})); + AssertThat(tree->children[0]->size, Equals({6, 0, {0, 6}})); AssertThat(tree->children[1]->has_changes, IsFalse()); }); @@ -255,16 +285,25 @@ describe("Tree", []() { describe("deletions that span more than one child node", [&]() { it("shrinks subsequent child nodes", [&]() { - ts_tree_edit(tree, {1, 3, 10}); + TSInputEdit edit = { + .start_byte = 1, + .bytes_removed = 10, + .bytes_added = 3, + .start_point = {0, 1}, + .extent_removed = {0, 10}, + .extent_added = {0, 3}, + }; + ts_tree_edit(tree, &edit); + assert_consistent(tree); assert_consistent(tree); AssertThat(tree->has_changes, IsTrue()); - AssertThat(tree->padding, Equals({0, 4, {0, 0}})); - AssertThat(tree->size, Equals({0, 4, {0, 0}})); + AssertThat(tree->padding, Equals({4, 0, {0, 4}})); + AssertThat(tree->size, Equals({4, 0, {0, 4}})); AssertThat(tree->children[0]->has_changes, IsTrue()); - AssertThat(tree->children[0]->padding, Equals({0, 4, {0, 0}})); + AssertThat(tree->children[0]->padding, Equals({4, 0, {0, 4}})); AssertThat(tree->children[0]->size, Equals({0, 0, {0, 0}})); AssertThat(tree->children[1]->has_changes, IsTrue()); @@ -272,7 +311,7 @@ describe("Tree", []() { AssertThat(tree->children[1]->size, Equals({0, 0, {0, 0}})); AssertThat(tree->children[2]->has_changes, IsTrue()); - AssertThat(tree->children[2]->padding, Equals({0, 1, {0, 0}})); + AssertThat(tree->children[2]->padding, Equals({1, 0, {0, 1}})); AssertThat(tree->children[2]->size, Equals({3, 3, {0, 3}})); }); }); diff --git a/src/runtime/document.c b/src/runtime/document.c index 3d90a482..edb31ca4 100644 --- a/src/runtime/document.c +++ b/src/runtime/document.c @@ -80,13 +80,13 @@ void ts_document_edit(TSDocument *self, TSInputEdit edit) { if (!self->tree) return; - size_t max_chars = ts_tree_total_chars(self->tree); - if (edit.position > max_chars) - edit.position = max_chars; - if (edit.chars_removed > max_chars - edit.position) - edit.chars_removed = max_chars - edit.position; + size_t max_bytes = ts_tree_total_bytes(self->tree); + if (edit.start_byte > max_bytes) + edit.start_byte = max_bytes; + if (edit.bytes_removed > max_bytes - edit.start_byte) + edit.bytes_removed = max_bytes - edit.start_byte; - ts_tree_edit(self->tree, edit); + ts_tree_edit(self->tree, &edit); } typedef Array(TSRange) RangeArray; @@ -107,18 +107,19 @@ static bool push_diff(RangeArray *results, TSNode *node, bool *extend_last_chang return array_push(results, ((TSRange){start, end})); } -static bool ts_tree_diff(TSDocument *doc, TSTree *old, TSNode *new_node, - size_t depth, RangeArray *results, bool *extend_last_change) { +static bool ts_tree_get_changes(TSDocument *doc, TSTree *old, TSNode *new_node, + size_t depth, RangeArray *results, + bool *extend_last_change) { TSTree *new = (TSTree *)(new_node->data); PRINT("At %lu, ('%s', %lu) vs ('%s', %lu) {", - ts_node_start_char(*new_node), - NAME(old), old->size.chars, - NAME(new), new->size.chars); + ts_node_start_byte(*new_node), + NAME(old), old->size.bytes, + NAME(new), new->size.bytes); if (old->visible) { if (old == new || (old->symbol == new->symbol && - old->size.chars == new->size.chars && !old->has_changes)) { + old->size.bytes == new->size.bytes && !old->has_changes)) { *extend_last_change = false; PRINT("}", NULL); return true; @@ -140,21 +141,21 @@ static bool ts_tree_diff(TSDocument *doc, TSTree *old, TSNode *new_node, depth++; size_t old_child_start; - size_t old_child_end = ts_node_start_char(*new_node) - old->padding.chars; + size_t old_child_end = ts_node_start_byte(*new_node) - old->padding.bytes; for (size_t j = 0; j < old->child_count; j++) { TSTree *old_child = old->children[j]; - if (old_child->padding.chars == 0 && old_child->size.chars == 0) + if (old_child->padding.bytes == 0 && old_child->size.bytes == 0) continue; - old_child_start = old_child_end + old_child->padding.chars; - old_child_end = old_child_start + old_child->size.chars; + old_child_start = old_child_end + old_child->padding.bytes; + old_child_end = old_child_start + old_child->size.bytes; while (true) { - size_t new_child_start = ts_node_start_char(*new_node); + size_t new_child_start = ts_node_start_byte(*new_node); if (new_child_start < old_child_start) { PRINT("skip new:('%s', %lu), old:('%s', %lu), old_parent:%s", - NAME(new_node->data), ts_node_start_char(*new_node), NAME(old_child), + NAME(new_node->data), ts_node_start_byte(*new_node), NAME(old_child), old_child_start, NAME(old)); if (!push_diff(results, new_node, extend_last_change)) @@ -163,23 +164,23 @@ static bool ts_tree_diff(TSDocument *doc, TSTree *old, TSNode *new_node, TSNode next = ts_node_next_sibling(*new_node); if (next.data) { PRINT("advance before diff ('%s', %lu) -> ('%s', %lu)", - NAME(new_node->data), ts_node_start_char(*new_node), NAME(next.data), - ts_node_start_char(next)); + NAME(new_node->data), ts_node_start_byte(*new_node), NAME(next.data), + ts_node_start_byte(next)); *new_node = next; } else { break; } } else if (new_child_start == old_child_start) { - if (!ts_tree_diff(doc, old_child, new_node, depth, results, extend_last_change)) + if (!ts_tree_get_changes(doc, old_child, new_node, depth, results, extend_last_change)) return false; if (old_child->visible) { TSNode next = ts_node_next_sibling(*new_node); if (next.data) { PRINT("advance after diff ('%s', %lu) -> ('%s', %lu)", - NAME(new_node->data), ts_node_start_char(*new_node), NAME(next.data), - ts_node_start_char(next)); + NAME(new_node->data), ts_node_start_byte(*new_node), NAME(next.data), + ts_node_start_byte(next)); *new_node = next; } } @@ -225,7 +226,7 @@ int ts_document_parse_and_get_changed_ranges(TSDocument *self, TSRange **ranges, if (ranges && range_count) { bool extend_last_change = false; RangeArray result = {0, 0, 0}; - if (!ts_tree_diff(self, old_tree, &new_root, 0, &result, &extend_last_change)) + if (!ts_tree_get_changes(self, old_tree, &new_root, 0, &result, &extend_last_change)) return -1; *ranges = result.contents; *range_count = result.size; diff --git a/src/runtime/length.h b/src/runtime/length.h index dde1f81f..e4840d32 100644 --- a/src/runtime/length.h +++ b/src/runtime/length.h @@ -18,13 +18,19 @@ static inline TSPoint ts_point_sub(TSPoint a, TSPoint b) { return (TSPoint){0, a.column - b.column}; } +static inline TSPoint ts_point_min(TSPoint a, TSPoint b) { + if (a.row < b.row || (a.row == b.row && a.column < b.column)) + return a; + else + return b; +} + static inline bool ts_length_is_unknown(TSLength self) { - return self.chars > 0 && self.bytes == 0; + return self.bytes > 0 && self.chars == 0; } static inline void ts_length_set_unknown(TSLength *self) { - self->bytes = 0; - self->extent = (TSPoint){0, 0}; + self->chars = 0; } static inline TSLength ts_length_min(TSLength len1, TSLength len2) { @@ -34,13 +40,13 @@ static inline TSLength ts_length_min(TSLength len1, TSLength len2) { static inline TSLength ts_length_add(TSLength len1, TSLength len2) { TSLength result; result.chars = len1.chars + len2.chars; + result.bytes = len1.bytes + len2.bytes; + result.extent = ts_point_add(len1.extent, len2.extent); if (ts_length_is_unknown(len1) || ts_length_is_unknown(len2)) { - result.bytes = 0; - result.extent = (TSPoint){0, result.chars}; + result.chars = 0; } else { - result.bytes = len1.bytes + len2.bytes; - result.extent = ts_point_add(len1.extent, len2.extent); + result.chars = len1.chars + len2.chars; } return result; @@ -48,14 +54,13 @@ static inline TSLength ts_length_add(TSLength len1, TSLength len2) { static inline TSLength ts_length_sub(TSLength len1, TSLength len2) { TSLength result; - result.chars = len1.chars - len2.chars; + result.bytes = len1.bytes - len2.bytes; + result.extent = ts_point_sub(len1.extent, len2.extent); if (ts_length_is_unknown(len1) || ts_length_is_unknown(len2)) { - result.bytes = 0; - result.extent = (TSPoint){0, result.chars}; + result.chars = 0; } else { - result.bytes = len1.bytes - len2.bytes; - result.extent = ts_point_sub(len1.extent, len2.extent); + result.chars = len1.chars - len2.chars; } return result; diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 04c2246c..7e3c9a0d 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -144,7 +144,7 @@ error: } static void parser__pop_reusable_node(ReusableNode *reusable_node) { - reusable_node->char_index += ts_tree_total_chars(reusable_node->tree); + reusable_node->byte_index += ts_tree_total_bytes(reusable_node->tree); while (reusable_node->tree) { TSTree *parent = reusable_node->tree->context.parent; size_t next_index = reusable_node->tree->context.index + 1; @@ -270,7 +270,7 @@ static TSTree *parser__lex(Parser *self, TSStateId parse_state) { break; } - if (self->lexer.current_position.chars == position.chars) { + if (self->lexer.current_position.bytes == position.bytes) { if (!skipped_error) { error_start_position = self->lexer.current_position; first_error_character = self->lexer.lookahead; @@ -317,15 +317,15 @@ static TSTree *parser__get_lookahead(Parser *self, StackVersion version, TSLength position = ts_stack_top_position(self->stack, version); while (reusable_node->tree) { - if (reusable_node->char_index > position.chars) { + if (reusable_node->byte_index > position.bytes) { LOG("before_reusable sym:%s, pos:%lu", - SYM_NAME(reusable_node->tree->symbol), reusable_node->char_index); + SYM_NAME(reusable_node->tree->symbol), reusable_node->byte_index); break; } - if (reusable_node->char_index < position.chars) { + if (reusable_node->byte_index < position.bytes) { LOG("past_reusable sym:%s, pos:%lu", - SYM_NAME(reusable_node->tree->symbol), reusable_node->char_index); + SYM_NAME(reusable_node->tree->symbol), reusable_node->byte_index); parser__pop_reusable_node(reusable_node); continue; } @@ -333,7 +333,7 @@ static TSTree *parser__get_lookahead(Parser *self, StackVersion version, if (reusable_node->tree->has_changes) { LOG("cant_reuse_changed tree:%s, size:%lu", SYM_NAME(reusable_node->tree->symbol), - reusable_node->tree->size.chars); + reusable_node->tree->size.bytes); if (!parser__breakdown_reusable_node(reusable_node)) { parser__pop_reusable_node(reusable_node); CHECK(parser__breakdown_top_of_stack(self, version)); @@ -344,7 +344,7 @@ static TSTree *parser__get_lookahead(Parser *self, StackVersion version, if (reusable_node->tree->symbol == ts_builtin_sym_error) { LOG("cant_reuse_error tree:%s, size:%lu", SYM_NAME(reusable_node->tree->symbol), - reusable_node->tree->size.chars); + reusable_node->tree->size.bytes); if (!parser__breakdown_reusable_node(reusable_node)) { parser__pop_reusable_node(reusable_node); CHECK(parser__breakdown_top_of_stack(self, version)); @@ -357,7 +357,7 @@ static TSTree *parser__get_lookahead(Parser *self, StackVersion version, return result; } - if (self->cached_token && position.chars == self->cached_token_char_index) { + if (self->cached_token && position.bytes == self->cached_token_byte_index) { ts_tree_retain(self->cached_token); return self->cached_token; } @@ -1073,7 +1073,7 @@ static bool parser__advance(Parser *self, StackVersion version, validated_lookahead = true; LOG("lookahead sym:%s, size:%lu", SYM_NAME(lookahead->symbol), - lookahead->size.chars); + lookahead->size.bytes); } bool reduction_stopped_at_error = false; diff --git a/src/runtime/parser.h b/src/runtime/parser.h index ac621ca1..146ee6a4 100644 --- a/src/runtime/parser.h +++ b/src/runtime/parser.h @@ -11,7 +11,7 @@ extern "C" { typedef struct { TSTree *tree; - size_t char_index; + size_t byte_index; } ReusableNode; typedef struct { @@ -24,7 +24,7 @@ typedef struct { bool print_debugging_graphs; TSTree scratch_tree; TSTree *cached_token; - size_t cached_token_char_index; + size_t cached_token_byte_index; ReusableNode reusable_node; } Parser; diff --git a/src/runtime/tree.c b/src/runtime/tree.c index dc75d827..85c545d7 100644 --- a/src/runtime/tree.c +++ b/src/runtime/tree.c @@ -307,34 +307,48 @@ static inline long min(long a, long b) { return a <= b ? a : b; } -void ts_tree_edit(TSTree *self, TSInputEdit edit) { - size_t start = edit.position; - size_t new_end = edit.position + edit.chars_inserted; - size_t old_end = edit.position + edit.chars_removed; - assert(old_end <= ts_tree_total_chars(self)); + +void ts_tree_edit(TSTree *self, const TSInputEdit *edit) { + size_t old_end_byte = edit->start_byte + edit->bytes_removed; + size_t new_end_byte = edit->start_byte + edit->bytes_added; + TSPoint old_end_point = ts_point_add(edit->start_point, edit->extent_removed); + TSPoint new_end_point = ts_point_add(edit->start_point, edit->extent_added); + + assert(old_end_byte <= ts_tree_total_bytes(self)); self->has_changes = true; - if (start < self->padding.chars) { + if (edit->start_byte < self->padding.bytes) { ts_length_set_unknown(&self->padding); - long remaining_padding = self->padding.chars - old_end; - if (remaining_padding >= 0) { - self->padding.chars = new_end + remaining_padding; + if (self->padding.bytes >= old_end_byte) { + size_t trailing_padding_bytes = self->padding.bytes - old_end_byte; + TSPoint trailing_padding_extent = ts_point_sub(self->padding.extent, old_end_point); + self->padding.bytes = new_end_byte + trailing_padding_bytes; + self->padding.extent = ts_point_add(new_end_point, trailing_padding_extent); } else { - self->padding.chars = new_end; - self->size.chars += remaining_padding; ts_length_set_unknown(&self->size); + size_t removed_content_bytes = old_end_byte - self->padding.bytes; + TSPoint removed_content_extent = ts_point_sub(old_end_point, self->padding.extent); + self->size.bytes = self->size.bytes - removed_content_bytes; + self->size.extent = ts_point_sub(self->size.extent, removed_content_extent); + self->padding.bytes = new_end_byte; + self->padding.extent = new_end_point; } - } else if (start == self->padding.chars && edit.chars_removed == 0) { - self->padding.chars += edit.chars_inserted; + } else if (edit->start_byte == self->padding.bytes && edit->bytes_removed == 0) { ts_length_set_unknown(&self->padding); + self->padding.bytes = self->padding.bytes + edit->bytes_added; + self->padding.extent = ts_point_add(self->padding.extent, edit->extent_added); } else { - self->size.chars += (edit.chars_inserted - edit.chars_removed); ts_length_set_unknown(&self->size); + size_t trailing_content_bytes = ts_tree_total_bytes(self) - old_end_byte; + TSPoint trailing_content_extent = ts_point_sub(ts_tree_total_extent(self), old_end_point); + self->size.bytes = new_end_byte + trailing_content_bytes - self->padding.bytes; + self->size.extent = ts_point_sub(ts_point_add(new_end_point, trailing_content_extent), self->padding.extent); } bool found_first_child = false; - long remainder_to_delete = edit.chars_removed - edit.chars_inserted; + long remaining_bytes_to_delete = 0; + TSPoint remaining_extent_to_delete = {0, 0}; TSLength child_left, child_right = ts_length_zero(); for (size_t i = 0; i < self->child_count; i++) { TSTree *child = self->children[i]; @@ -342,30 +356,41 @@ void ts_tree_edit(TSTree *self, TSInputEdit edit) { if (!found_first_child) { child_right = ts_length_add(child_left, ts_tree_total_size(child)); - if (child_right.chars >= start) { + if (child_right.bytes >= edit->start_byte) { found_first_child = true; - size_t chars_removed = min(edit.chars_removed, child_right.chars - start); - remainder_to_delete -= (chars_removed - edit.chars_inserted); - ts_tree_edit(child, (TSInputEdit){ - .position = start - child_left.chars, - .chars_inserted = edit.chars_inserted, - .chars_removed = chars_removed, - }); - child_right = ts_length_add(child_left, ts_tree_total_size(child)); + TSInputEdit child_edit = { + .start_byte = edit->start_byte - child_left.bytes, + .bytes_added = edit->bytes_added, + .bytes_removed = edit->bytes_removed, + .start_point = ts_point_sub(edit->start_point, child_left.extent), + .extent_added = edit->extent_added, + .extent_removed = edit->extent_removed, + }; + + if (old_end_byte > child_right.bytes) { + child_edit.bytes_removed = child_right.bytes - edit->start_byte; + child_edit.extent_removed = ts_point_sub(child_right.extent, edit->start_point); + remaining_bytes_to_delete = old_end_byte - child_right.bytes; + remaining_extent_to_delete = ts_point_sub(old_end_point, child_right.extent); + } + + ts_tree_edit(child, &child_edit); } - } else { - if (remainder_to_delete > 0) { - size_t chars_removed = min(remainder_to_delete, ts_tree_total_chars(child)); - remainder_to_delete -= chars_removed; - ts_tree_edit( - child, - (TSInputEdit){ - .position = 0, .chars_inserted = 0, .chars_removed = chars_removed, - }); - } - child_right = ts_length_add(child_right, ts_tree_total_size(child)); + } else if (remaining_bytes_to_delete > 0) { + TSInputEdit child_edit = { + .start_byte = 0, + .bytes_added = 0, + .bytes_removed = min(remaining_bytes_to_delete, ts_tree_total_bytes(child)), + .start_point = {0, 0}, + .extent_added = {0, 0}, + .extent_removed = ts_point_min(remaining_extent_to_delete, ts_tree_total_size(child).extent), + }; + remaining_bytes_to_delete -= child_edit.bytes_removed; + remaining_extent_to_delete = ts_point_sub(remaining_extent_to_delete, child_edit.extent_removed); + ts_tree_edit(child, &child_edit); } + child_right = ts_length_add(child_left, ts_tree_total_size(child)); child->context.offset = child_left; } } diff --git a/src/runtime/tree.h b/src/runtime/tree.h index 7121b2b5..af8d50d9 100644 --- a/src/runtime/tree.h +++ b/src/runtime/tree.h @@ -68,7 +68,7 @@ size_t ts_tree_start_column(const TSTree *self); size_t ts_tree_end_column(const TSTree *self); void ts_tree_set_children(TSTree *, size_t, TSTree **); void ts_tree_assign_parents(TSTree *); -void ts_tree_edit(TSTree *, TSInputEdit); +void ts_tree_edit(TSTree *, const TSInputEdit *edit); char *ts_tree_string(const TSTree *, const TSLanguage *, bool include_all); void ts_tree_print_dot_graph(const TSTree *, const TSLanguage *, FILE *); @@ -76,10 +76,18 @@ static inline size_t ts_tree_total_chars(const TSTree *self) { return self->padding.chars + self->size.chars; } +static inline size_t ts_tree_total_bytes(const TSTree *self) { + return self->padding.bytes + self->size.bytes; +} + static inline TSLength ts_tree_total_size(const TSTree *self) { return ts_length_add(self->padding, self->size); } +static inline TSPoint ts_tree_total_extent(const TSTree *self) { + return ts_point_add(self->padding.extent, self->size.extent); +} + static inline bool ts_tree_is_fragile(const TSTree *tree) { return tree->fragile_left || tree->fragile_right || ts_tree_total_chars(tree) == 0; From fcf9293d35ff7d87ce72488a3fa78e050426199e Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 14 Sep 2016 09:46:41 -0700 Subject: [PATCH 07/16] Use explicit stack for assigning trees' parent pointers --- src/runtime/parser.c | 5 ++++- src/runtime/parser.h | 1 + src/runtime/tree.c | 35 ++++++++++++++++++----------------- src/runtime/tree.h | 2 +- 4 files changed, 24 insertions(+), 19 deletions(-) diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 7e3c9a0d..09a8757a 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -1224,6 +1224,7 @@ bool parser_init(Parser *self) { self->finished_tree = NULL; self->stack = NULL; array_init(&self->reduce_actions); + array_init(&self->tree_stack); self->stack = ts_stack_new(); if (!self->stack) @@ -1249,6 +1250,8 @@ void parser_destroy(Parser *self) { ts_stack_delete(self->stack); if (self->reduce_actions.contents) array_delete(&self->reduce_actions); + if (self->tree_stack.contents) + array_delete(&self->tree_stack); } TSTree *parser_parse(Parser *self, TSInput input, TSTree *old_tree) { @@ -1294,7 +1297,7 @@ TSTree *parser_parse(Parser *self, TSInput input, TSTree *old_tree) { LOG_TREE(); ts_stack_clear(self->stack); parser__clear_cached_token(self); - ts_tree_assign_parents(self->finished_tree); + CHECK(ts_tree_assign_parents(self->finished_tree, &self->tree_stack)); return self->finished_tree; error: diff --git a/src/runtime/parser.h b/src/runtime/parser.h index 146ee6a4..4bc22697 100644 --- a/src/runtime/parser.h +++ b/src/runtime/parser.h @@ -26,6 +26,7 @@ typedef struct { TSTree *cached_token; size_t cached_token_byte_index; ReusableNode reusable_node; + TreeArray tree_stack; } Parser; bool parser_init(Parser *); diff --git a/src/runtime/tree.c b/src/runtime/tree.c index 85c545d7..2bf46d0f 100644 --- a/src/runtime/tree.c +++ b/src/runtime/tree.c @@ -91,28 +91,29 @@ TSTree *ts_tree_make_copy(TSTree *self) { return result; } -void ts_tree_assign_parents(TSTree *self) { - TSLength offset; - -recur: - offset = ts_length_zero(); - for (size_t i = 0; i < self->child_count; i++) { - TSTree *child = self->children[i]; - if (child->context.parent != self || child->context.index != i) { - child->context.parent = self; - child->context.index = i; - child->context.offset = offset; - if (i == self->child_count - 1) { - self = child; - goto recur; +bool ts_tree_assign_parents(TSTree *self, TreeArray *stack) { + array_clear(stack); + if (!array_push(stack, self)) + return false; + while (stack->size > 0) { + TSTree *tree = array_pop(stack); + TSLength offset = ts_length_zero(); + for (size_t i = 0; i < tree->child_count; i++) { + TSTree *child = tree->children[i]; + if (child->context.parent != tree || child->context.index != i) { + child->context.parent = tree; + child->context.index = i; + child->context.offset = offset; + if (!array_push(stack, child)) + return false; } - - ts_tree_assign_parents(child); + offset = ts_length_add(offset, ts_tree_total_size(child)); } - offset = ts_length_add(offset, ts_tree_total_size(child)); } + return true; } + void ts_tree_set_children(TSTree *self, size_t child_count, TSTree **children) { if (self->child_count > 0) ts_free(self->children); diff --git a/src/runtime/tree.h b/src/runtime/tree.h index af8d50d9..146897d8 100644 --- a/src/runtime/tree.h +++ b/src/runtime/tree.h @@ -67,7 +67,7 @@ int ts_tree_compare(const TSTree *tree1, const TSTree *tree2); size_t ts_tree_start_column(const TSTree *self); size_t ts_tree_end_column(const TSTree *self); void ts_tree_set_children(TSTree *, size_t, TSTree **); -void ts_tree_assign_parents(TSTree *); +bool ts_tree_assign_parents(TSTree *, TreeArray *); void ts_tree_edit(TSTree *, const TSInputEdit *edit); char *ts_tree_string(const TSTree *, const TSLanguage *, bool include_all); void ts_tree_print_dot_graph(const TSTree *, const TSLanguage *, FILE *); From 744eb7b356cd5881ec0c0aaec10cae6b98c9bce8 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 14 Sep 2016 22:33:37 -0700 Subject: [PATCH 08/16] Assert consistency of trees' sizes in bytes --- spec/integration/corpus_specs.cc | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/spec/integration/corpus_specs.cc b/spec/integration/corpus_specs.cc index ec01574f..1909c280 100644 --- a/spec/integration/corpus_specs.cc +++ b/spec/integration/corpus_specs.cc @@ -19,6 +19,8 @@ static void expect_the_correct_tree(TSNode node, TSDocument *document, string tr static void expect_a_consistent_tree(TSNode node, TSDocument *document) { size_t child_count = ts_node_child_count(node); + size_t start_byte = ts_node_start_byte(node); + size_t end_byte = ts_node_end_byte(node); size_t start_char = ts_node_start_char(node); size_t end_char = ts_node_end_char(node); TSPoint start_point = ts_node_start_point(node); @@ -26,21 +28,27 @@ static void expect_a_consistent_tree(TSNode node, TSDocument *document) { bool has_changes = ts_node_has_changes(node); bool some_child_has_changes = false; + AssertThat(start_byte, !IsGreaterThan(end_byte)); AssertThat(start_char, !IsGreaterThan(end_char)); AssertThat(start_point, !IsGreaterThan(end_point)); + size_t last_child_end_byte = start_byte; size_t last_child_end_char = start_char; TSPoint last_child_end_point = start_point; for (size_t i = 0; i < child_count; i++) { TSNode child = ts_node_child(node, i); + size_t child_start_byte = ts_node_start_byte(child); + size_t child_end_byte = ts_node_end_byte(child); size_t child_start_char = ts_node_start_char(child); size_t child_end_char = ts_node_end_char(child); TSPoint child_start_point = ts_node_start_point(child); TSPoint child_end_point = ts_node_end_point(child); + AssertThat(child_start_byte, !IsLessThan(last_child_end_byte)); AssertThat(child_start_char, !IsLessThan(last_child_end_char)); AssertThat(child_start_point, !IsLessThan(last_child_end_point)); + last_child_end_byte = child_end_byte; last_child_end_char = child_end_char; last_child_end_point = child_end_point; @@ -51,10 +59,11 @@ static void expect_a_consistent_tree(TSNode node, TSDocument *document) { } if (child_count > 0) { - AssertThat(end_char, !IsLessThan(last_child_end_char)); + AssertThat(end_byte, !IsLessThan(last_child_end_byte)); + AssertThat(end_point, !IsLessThan(last_child_end_point)); if (!has_changes) - AssertThat(end_point, !IsLessThan(last_child_end_point)); + AssertThat(end_char, !IsLessThan(last_child_end_char)); AssertThat(has_changes, Equals(some_child_has_changes)); } From ae3b912ddc1bacaea451813ed7e9894cd345b58e Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 19 Sep 2016 13:34:24 -0700 Subject: [PATCH 09/16] Escape unexpected characters when pretty-printing error nodes --- src/runtime/tree.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/runtime/tree.c b/src/runtime/tree.c index 2bf46d0f..930ff0a3 100644 --- a/src/runtime/tree.c +++ b/src/runtime/tree.c @@ -399,6 +399,12 @@ void ts_tree_edit(TSTree *self, const TSInputEdit *edit) { static size_t ts_tree__write_char_to_string(char *s, size_t n, int32_t c) { if (c == 0) return snprintf(s, n, "EOF"); + else if (c == '\n') + return snprintf(s, n, "'\\n'"); + else if (c == '\t') + return snprintf(s, n, "'\\t'"); + else if (c == '\r') + return snprintf(s, n, "'\\r'"); else if (c < 128) return snprintf(s, n, "'%c'", c); else From 3014101104d1632d983bdd83a025b9592f49f7ee Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 19 Sep 2016 13:35:08 -0700 Subject: [PATCH 10/16] Fix inconsistencies in nodes sizes after edits --- spec/integration/corpus_specs.cc | 67 +++++++++++++++++--------------- spec/runtime/document_spec.cc | 2 +- spec/runtime/node_spec.cc | 13 +------ spec/runtime/parser_spec.cc | 15 +++++-- src/runtime/document.c | 2 +- src/runtime/parser.c | 38 +++++++++--------- 6 files changed, 70 insertions(+), 67 deletions(-) diff --git a/spec/integration/corpus_specs.cc b/spec/integration/corpus_specs.cc index 1909c280..b4612f9d 100644 --- a/spec/integration/corpus_specs.cc +++ b/spec/integration/corpus_specs.cc @@ -10,63 +10,69 @@ #include "helpers/random_helpers.h" #include -static void expect_the_correct_tree(TSNode node, TSDocument *document, string tree_string) { - const char *node_string = ts_node_string(node, document); +static void assert_correct_tree_shape(const TSDocument *document, string tree_string) { + TSNode root_node = ts_document_root_node(document); + const char *node_string = ts_node_string(root_node, document); string result(node_string); ts_free((void *)node_string); AssertThat(result, Equals(tree_string)); } -static void expect_a_consistent_tree(TSNode node, TSDocument *document) { +static void assert_consistent_sizes(TSNode node) { size_t child_count = ts_node_child_count(node); size_t start_byte = ts_node_start_byte(node); size_t end_byte = ts_node_end_byte(node); - size_t start_char = ts_node_start_char(node); - size_t end_char = ts_node_end_char(node); TSPoint start_point = ts_node_start_point(node); TSPoint end_point = ts_node_end_point(node); - bool has_changes = ts_node_has_changes(node); bool some_child_has_changes = false; AssertThat(start_byte, !IsGreaterThan(end_byte)); - AssertThat(start_char, !IsGreaterThan(end_char)); AssertThat(start_point, !IsGreaterThan(end_point)); size_t last_child_end_byte = start_byte; - size_t last_child_end_char = start_char; TSPoint last_child_end_point = start_point; for (size_t i = 0; i < child_count; i++) { TSNode child = ts_node_child(node, i); size_t child_start_byte = ts_node_start_byte(child); - size_t child_end_byte = ts_node_end_byte(child); - size_t child_start_char = ts_node_start_char(child); - size_t child_end_char = ts_node_end_char(child); TSPoint child_start_point = ts_node_start_point(child); - TSPoint child_end_point = ts_node_end_point(child); AssertThat(child_start_byte, !IsLessThan(last_child_end_byte)); - AssertThat(child_start_char, !IsLessThan(last_child_end_char)); AssertThat(child_start_point, !IsLessThan(last_child_end_point)); - last_child_end_byte = child_end_byte; - last_child_end_char = child_end_char; - last_child_end_point = child_end_point; - - expect_a_consistent_tree(child, document); - + assert_consistent_sizes(child); if (ts_node_has_changes(child)) some_child_has_changes = true; + + last_child_end_byte = ts_node_end_byte(child); + last_child_end_point = ts_node_end_point(child); } if (child_count > 0) { AssertThat(end_byte, !IsLessThan(last_child_end_byte)); AssertThat(end_point, !IsLessThan(last_child_end_point)); - - if (!has_changes) - AssertThat(end_char, !IsLessThan(last_child_end_char)); - - AssertThat(has_changes, Equals(some_child_has_changes)); } + + if (some_child_has_changes) { + AssertThat(ts_node_has_changes(node), IsTrue()); + } +} + +static void assert_correct_tree_size(TSDocument *document, string content) { + TSNode root_node = ts_document_root_node(document); + size_t expected_size = content.size(); + + // In the JSON grammar, the start rule (`_value`) is hidden, so the node + // returned from `ts_document_root_node` (e.g. an `object` node), does not + // actually point to the root of the tree. In this weird case, trailing + // whitespace is not included in the root node's size. + // + // TODO: Fix this inconsistency. Maybe disallow the start rule being hidden? + if (ts_document_language(document) == get_test_language("json") && + string(ts_node_type(root_node, document)) != "ERROR") + expected_size = content.find_last_not_of("\n ") + 1; + + AssertThat(ts_node_end_byte(root_node), Equals(expected_size)); + assert_consistent_sizes(root_node); } START_TEST @@ -105,9 +111,8 @@ describe("The Corpus", []() { input = new SpyInput(entry.input, 3); ts_document_set_input(document, input->input()); edit_sequence(); - TSNode root_node = ts_document_root_node(document); - expect_the_correct_tree(root_node, document, entry.tree_string); - expect_a_consistent_tree(root_node, document); + assert_correct_tree_shape(document, entry.tree_string); + assert_correct_tree_size(document, input->content); delete input; }); }; @@ -130,10 +135,10 @@ describe("The Corpus", []() { it_handles_edit_sequence("repairing an insertion of " + description, [&]() { ts_document_edit(document, input->replace(edit_position, 0, inserted_text)); ts_document_parse(document); - - expect_a_consistent_tree(ts_document_root_node(document), document); + assert_correct_tree_size(document, input->content); ts_document_edit(document, input->undo()); + assert_correct_tree_size(document, input->content); ts_document_parse(document); }); } @@ -144,10 +149,10 @@ describe("The Corpus", []() { it_handles_edit_sequence("repairing a deletion of " + desription, [&]() { ts_document_edit(document, input->replace(edit_position, deletion_size, "")); ts_document_parse(document); - - expect_a_consistent_tree(ts_document_root_node(document), document); + assert_correct_tree_size(document, input->content); ts_document_edit(document, input->undo()); + assert_correct_tree_size(document, input->content); ts_document_parse(document); }); } diff --git a/spec/runtime/document_spec.cc b/spec/runtime/document_spec.cc index c36c5aa3..bd07ff3c 100644 --- a/spec/runtime/document_spec.cc +++ b/spec/runtime/document_spec.cc @@ -109,7 +109,7 @@ describe("Document", [&]() { assert_node_string_equals( new_root, "(object (pair (string) (array (null) (number))))"); - AssertThat(spy_input->strings_read, Equals(vector({" [null, 2", ""}))); + AssertThat(spy_input->strings_read, Equals(vector({" [null, 2"}))); }); it("reads from the new input correctly when the old input was blank", [&]() { diff --git a/spec/runtime/node_spec.cc b/spec/runtime/node_spec.cc index 056362a6..085e4d31 100644 --- a/spec/runtime/node_spec.cc +++ b/spec/runtime/node_spec.cc @@ -138,19 +138,8 @@ describe("Node", []() { it("returns an iterator that yields each of the node's symbols", [&]() { const TSLanguage *language = ts_document_language(document); - TSSymbolIterator iterator = ts_node_symbols(array_node); - AssertThat(iterator.done, Equals(false)); - AssertThat(ts_language_symbol_name(language, iterator.value), Equals("array")); - - ts_symbol_iterator_next(&iterator); - AssertThat(iterator.done, Equals(false)); - AssertThat(ts_language_symbol_name(language, iterator.value), Equals("_value")); - - ts_symbol_iterator_next(&iterator); - AssertThat(iterator.done, Equals(true)); - TSNode false_node = ts_node_descendant_for_char_range(array_node, false_index, false_index + 1); - iterator = ts_node_symbols(false_node); + TSSymbolIterator iterator = ts_node_symbols(false_node); AssertThat(iterator.done, Equals(false)); AssertThat(ts_language_symbol_name(language, iterator.value), Equals("false")); diff --git a/spec/runtime/parser_spec.cc b/spec/runtime/parser_spec.cc index 0f0b5fa1..0e6bb97b 100644 --- a/spec/runtime/parser_spec.cc +++ b/spec/runtime/parser_spec.cc @@ -127,7 +127,6 @@ describe("Parser", [&]() { TSNode error = ts_node_named_child(root, 1); AssertThat(ts_node_symbol(error), Equals(ts_builtin_sym_error)); AssertThat(ts_node_type(error, doc), Equals("ERROR")); - AssertThat(get_node_text(error), Equals(", faaaaalse")); AssertThat(ts_node_child_count(error), Equals(2)); TSNode comma = ts_node_child(error, 0); @@ -162,6 +161,15 @@ describe("Parser", [&]() { }); }); + describe("when there is an unexpected string at the end of a token", [&]() { + it("computes the error's size and position correctly", [&]() { + set_text(" [123, \"hi\n, true]"); + + assert_root_node( + "(array (number) (ERROR (UNEXPECTED '\\n')) (true))"); + }); + }); + describe("when there is an unterminated error", [&]() { it("maintains a consistent tree", [&]() { ts_document_set_language(doc, get_test_language("javascript")); @@ -244,7 +252,7 @@ describe("Parser", [&]() { "(identifier) " "(math_op (number) (member_access (identifier) (identifier))))))"); - AssertThat(input->strings_read, Equals(vector({ " + abc.d)", "" }))); + AssertThat(input->strings_read, Equals(vector({ " + abc.d)" }))); }); }); @@ -268,7 +276,7 @@ describe("Parser", [&]() { "(number) " "(math_op (number) (math_op (number) (identifier)))))))"); - AssertThat(input->strings_read, Equals(vector({ "123 || 5 +", "" }))); + AssertThat(input->strings_read, Equals(vector({ "123 || 5 +" }))); }); }); @@ -517,7 +525,6 @@ describe("Parser", [&]() { ts_document_free(doc); doc = nullptr; - AssertThat(record_alloc::outstanding_allocation_indices(), IsEmpty()); } record_alloc::stop(); diff --git a/src/runtime/document.c b/src/runtime/document.c index edb31ca4..325534b6 100644 --- a/src/runtime/document.c +++ b/src/runtime/document.c @@ -82,7 +82,7 @@ void ts_document_edit(TSDocument *self, TSInputEdit edit) { size_t max_bytes = ts_tree_total_bytes(self->tree); if (edit.start_byte > max_bytes) - edit.start_byte = max_bytes; + return; if (edit.bytes_removed > max_bytes - edit.start_byte) edit.bytes_removed = max_bytes - edit.start_byte; diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 09a8757a..75b2b4df 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -247,7 +247,6 @@ static TSTree *parser__lex(Parser *self, TSStateId parse_state) { TSStateId start_state = self->language->lex_states[parse_state]; TSStateId current_state = start_state; TSLength start_position = self->lexer.current_position; - TSLength position = start_position; LOG("lex state:%d", start_state); bool skipped_error = false; @@ -259,34 +258,32 @@ static TSTree *parser__lex(Parser *self, TSStateId parse_state) { while (!self->language->lex_fn(&self->lexer, current_state)) { if (current_state != TS_STATE_ERROR) { LOG("retry_in_error_mode"); - ts_lexer_reset(&self->lexer, position); - ts_lexer_start(&self->lexer, start_state); current_state = TS_STATE_ERROR; + ts_lexer_reset(&self->lexer, start_position); + ts_lexer_start(&self->lexer, current_state); continue; } - if (self->lexer.lookahead == 0) { - self->lexer.result_symbol = ts_builtin_sym_error; - break; + if (!skipped_error) { + error_start_position = self->lexer.token_start_position; + first_error_character = self->lexer.lookahead; } - if (self->lexer.current_position.bytes == position.bytes) { - if (!skipped_error) { - error_start_position = self->lexer.current_position; - first_error_character = self->lexer.lookahead; + if (self->lexer.current_position.bytes == error_end_position.bytes) { + if (self->lexer.lookahead == 0) { + self->lexer.result_symbol = ts_builtin_sym_error; + break; } - skipped_error = true; self->lexer.advance(&self->lexer, TS_STATE_ERROR, false); - error_end_position = self->lexer.current_position; } - position = self->lexer.current_position; + skipped_error = true; + error_end_position = self->lexer.current_position; } TSTree *result; if (skipped_error) { - error_start_position = ts_length_min(error_start_position, self->lexer.token_start_position); TSLength padding = ts_length_sub(error_start_position, start_position); TSLength size = ts_length_sub(error_end_position, error_start_position); ts_lexer_reset(&self->lexer, error_end_position); @@ -804,7 +801,10 @@ static void parser__start(Parser *self, TSInput input, TSTree *previous_tree) { self->finished_tree = NULL; } -static bool parser__accept(Parser *self, StackVersion version) { +static bool parser__accept(Parser *self, StackVersion version, TSTree *lookahead) { + lookahead->extra = true; + assert(lookahead->symbol == ts_builtin_sym_end); + CHECK(ts_stack_push(self->stack, version, lookahead, false, 1)); StackPopResult pop = ts_stack_pop_all(self->stack, version); CHECK(pop.status); CHECK(pop.slices.size); @@ -821,7 +821,7 @@ static bool parser__accept(Parser *self, StackVersion version) { for (size_t j = trees.size - 1; j + 1 > 0; j--) { TSTree *child = trees.contents[j]; if (!child->extra) { - root = ts_tree_make_copy(child); + CHECK(root = ts_tree_make_copy(child)); root->child_count = 0; for (size_t k = 0; k < child->child_count; k++) ts_tree_retain(child->children[k]); @@ -848,6 +848,8 @@ static bool parser__accept(Parser *self, StackVersion version) { return true; error: + for (size_t i = 0; i < pop.slices.size; i++) + ts_tree_array_delete(&pop.slices.contents[i].trees); return false; } @@ -1021,7 +1023,7 @@ static bool parser__recover(Parser *self, StackVersion version, TSStateId state, TreeArray children = array_new(); TSTree *parent = ts_tree_make_error_node(&children); CHECK(parser__push(self, version, parent, 1)); - return parser__accept(self, version); + return parser__accept(self, version, lookahead); } LOG("recover state:%u", state); @@ -1159,7 +1161,7 @@ static bool parser__advance(Parser *self, StackVersion version, continue; LOG("accept"); - CHECK(parser__accept(self, version)); + CHECK(parser__accept(self, version, lookahead)); ts_tree_release(lookahead); return true; From b3140b2689f85e700074fb63b4a81df2638fcd5e Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 22 Sep 2016 18:02:11 -0700 Subject: [PATCH 11/16] Implement ts_document_parse_and_get_changed_ranges --- spec/helpers/scope_sequence.cc | 17 +- spec/integration/corpus_specs.cc | 12 +- spec/runtime/document_spec.cc | 15 +- src/runtime/document.c | 306 ++++++++++++++++++++----------- src/runtime/length.h | 19 ++ src/runtime/parser.c | 11 +- src/runtime/parser.h | 3 +- src/runtime/tree.c | 21 ++- src/runtime/tree.h | 11 +- 9 files changed, 274 insertions(+), 141 deletions(-) diff --git a/spec/helpers/scope_sequence.cc b/spec/helpers/scope_sequence.cc index 32a77bc6..87e059dc 100644 --- a/spec/helpers/scope_sequence.cc +++ b/spec/helpers/scope_sequence.cc @@ -23,23 +23,18 @@ static void append_to_scope_sequence(ScopeSequence *sequence, ScopeStack *current_scopes, TSNode node, TSDocument *document, const std::string &text) { + append_text_to_scope_sequence(sequence, current_scopes, text, ts_node_start_byte(node) - sequence->size()); + string scope = ts_node_type(node, document); current_scopes->push_back(scope); size_t child_count = ts_node_child_count(node); if (child_count > 0) { - size_t previous_child_end = ts_node_start_char(node); for (size_t i = 0; i < child_count; i++) { TSNode child = ts_node_child(node, i); - size_t child_start = ts_node_start_char(child); - size_t spacing = child_start - previous_child_end; - append_text_to_scope_sequence(sequence, current_scopes, text, spacing); append_to_scope_sequence(sequence, current_scopes, child, document, text); - previous_child_end = ts_node_end_char(child); } - size_t spacing = ts_node_end_char(node) - previous_child_end; - append_text_to_scope_sequence(sequence, current_scopes, text, spacing); } else { - size_t length = ts_node_end_char(node) - ts_node_start_char(node); + size_t length = ts_node_end_byte(node) - ts_node_start_byte(node); append_text_to_scope_sequence(sequence, current_scopes, text, length); } current_scopes->pop_back(); @@ -50,7 +45,6 @@ ScopeSequence build_scope_sequence(TSDocument *document, const std::string &text ScopeStack current_scopes; TSNode node = ts_document_root_node(document); append_to_scope_sequence(&sequence, ¤t_scopes, node, document, text); - AssertThat(sequence.size(), Equals(text.size())); return sequence; } @@ -66,7 +60,7 @@ bool operator<=(const TSPoint &left, const TSPoint &right) { void verify_changed_ranges(const ScopeSequence &old_sequence, const ScopeSequence &new_sequence, const string &text, TSRange *ranges, size_t range_count) { TSPoint current_position = {0, 0}; - for (size_t i = 0; i < text.size(); i++) { + for (size_t i = 0; i < old_sequence.size(); i++) { if (text[i] == '\n') { current_position.row++; current_position.column = 0; @@ -89,6 +83,7 @@ void verify_changed_ranges(const ScopeSequence &old_sequence, const ScopeSequenc std::stringstream message_stream; message_stream << "Found changed scope outside of any invalidated range;\n"; message_stream << "Position: " << current_position << "\n"; + message_stream << "Byte index: " << i << "\n"; size_t line_start_index = i - current_position.column; size_t line_end_index = text.find_first_of('\n', i); message_stream << "Line: " << text.substr(line_start_index, line_end_index - line_start_index) << "\n"; @@ -99,7 +94,7 @@ void verify_changed_ranges(const ScopeSequence &old_sequence, const ScopeSequenc message_stream << "New scopes: " << new_scopes << "\n"; message_stream << "Invalidated ranges:\n"; for (size_t j = 0; j < range_count; j++) { - message_stream << " " << ranges[i] << "\n"; + message_stream << " " << ranges[j] << "\n"; } Assert::Failure(message_stream.str()); } diff --git a/spec/integration/corpus_specs.cc b/spec/integration/corpus_specs.cc index b4612f9d..d9701d2e 100644 --- a/spec/integration/corpus_specs.cc +++ b/spec/integration/corpus_specs.cc @@ -8,6 +8,7 @@ #include "helpers/encoding_helpers.h" #include "helpers/record_alloc.h" #include "helpers/random_helpers.h" +#include "helpers/scope_sequence.h" #include static void assert_correct_tree_shape(const TSDocument *document, string tree_string) { @@ -139,7 +140,16 @@ describe("The Corpus", []() { ts_document_edit(document, input->undo()); assert_correct_tree_size(document, input->content); - ts_document_parse(document); + + TSRange *ranges; + size_t range_count; + ScopeSequence old_scope_sequence = build_scope_sequence(document, input->content); + ts_document_parse_and_get_changed_ranges(document, &ranges, &range_count); + + ScopeSequence new_scope_sequence = build_scope_sequence(document, input->content); + verify_changed_ranges(old_scope_sequence, new_scope_sequence, + input->content, ranges, range_count); + ts_free(ranges); }); } diff --git a/spec/runtime/document_spec.cc b/spec/runtime/document_spec.cc index bd07ff3c..417c8d31 100644 --- a/spec/runtime/document_spec.cc +++ b/spec/runtime/document_spec.cc @@ -76,7 +76,6 @@ describe("Document", [&]() { ts_document_set_input(doc, spy_input->input()); ts_document_invalidate(doc); ts_document_parse(doc); - TSNode root_node = ts_document_root_node(doc); }); it("allows the input to be retrieved later", [&]() { @@ -211,7 +210,7 @@ describe("Document", [&]() { }); }); - describe("parse_and_get_changed_ranges()", [&]() { + describe("parse_and_get_changed_ranges()", [&]() { SpyInput *input; before_each([&]() { @@ -234,6 +233,7 @@ describe("Document", [&]() { TSRange *ranges; size_t range_count = 0; + ts_document_parse_and_get_changed_ranges(doc, &ranges, &range_count); vector result; @@ -333,13 +333,18 @@ describe("Document", [&]() { it("reports changes when trees have been wrapped", [&]() { // Wrap the object in an assignment expression. auto ranges = get_ranges([&]() { - return input->replace(0, 0, "x.y = "); + return input->replace(input->content.find("null"), 0, "b === "); }); + assert_node_string_equals( + ts_document_root_node(doc), + "(program (expression_statement (object " + "(pair (identifier) (rel_op (identifier) (null))))))"); + AssertThat(ranges, Equals(vector({ TSRange{ - TSPoint{0, 0}, - TSPoint{0, input->content.find(";")}, + TSPoint{0, input->content.find("b ===")}, + TSPoint{0, input->content.find("}")}, }, }))); }); diff --git a/src/runtime/document.c b/src/runtime/document.c index 325534b6..9afa33c2 100644 --- a/src/runtime/document.c +++ b/src/runtime/document.c @@ -91,118 +91,210 @@ void ts_document_edit(TSDocument *self, TSInputEdit edit) { typedef Array(TSRange) RangeArray; -#define NAME(t) ((t) ? (ts_language_symbol_name(doc->parser.language, ((TSTree *)(t))->symbol)) : "") -// #define PRINT(msg, ...) for (size_t k = 0; k < depth; k++) { printf(" "); } printf(msg "\n", __VA_ARGS__); -#define PRINT(msg, ...) +#define NAME(t) \ + ((t) \ + ? (ts_language_symbol_name(doc->parser.language, ((TSTree *)(t))->symbol)) \ + : "") -static bool push_diff(RangeArray *results, TSNode *node, bool *extend_last_change) { - TSPoint start = ts_node_start_point(*node); - TSPoint end = ts_node_end_point(*node); - if (*extend_last_change) { +static bool push_change(RangeArray *results, TSPoint start, TSPoint end) { + if (results->size > 0) { TSRange *last_range = array_back(results); - last_range->end = end; - return true; - } - *extend_last_change = true; - return array_push(results, ((TSRange){start, end})); -} - -static bool ts_tree_get_changes(TSDocument *doc, TSTree *old, TSNode *new_node, - size_t depth, RangeArray *results, - bool *extend_last_change) { - TSTree *new = (TSTree *)(new_node->data); - - PRINT("At %lu, ('%s', %lu) vs ('%s', %lu) {", - ts_node_start_byte(*new_node), - NAME(old), old->size.bytes, - NAME(new), new->size.bytes); - - if (old->visible) { - if (old == new || (old->symbol == new->symbol && - old->size.bytes == new->size.bytes && !old->has_changes)) { - *extend_last_change = false; - PRINT("}", NULL); - return true; - } - - if (old->symbol != new->symbol) { - PRINT("}", NULL); - return push_diff(results, new_node, extend_last_change); - } - - TSNode child = ts_node_child(*new_node, 0); - if (child.data) { - *new_node = child; - } else { - PRINT("}", NULL); + if (ts_point_lte(start, last_range->end)) { + last_range->end = end; return true; } } - depth++; - size_t old_child_start; - size_t old_child_end = ts_node_start_byte(*new_node) - old->padding.bytes; - - for (size_t j = 0; j < old->child_count; j++) { - TSTree *old_child = old->children[j]; - if (old_child->padding.bytes == 0 && old_child->size.bytes == 0) - continue; - - old_child_start = old_child_end + old_child->padding.bytes; - old_child_end = old_child_start + old_child->size.bytes; - - while (true) { - size_t new_child_start = ts_node_start_byte(*new_node); - if (new_child_start < old_child_start) { - PRINT("skip new:('%s', %lu), old:('%s', %lu), old_parent:%s", - NAME(new_node->data), ts_node_start_byte(*new_node), NAME(old_child), - old_child_start, NAME(old)); - - if (!push_diff(results, new_node, extend_last_change)) - return false; - - TSNode next = ts_node_next_sibling(*new_node); - if (next.data) { - PRINT("advance before diff ('%s', %lu) -> ('%s', %lu)", - NAME(new_node->data), ts_node_start_byte(*new_node), NAME(next.data), - ts_node_start_byte(next)); - - *new_node = next; - } else { - break; - } - } else if (new_child_start == old_child_start) { - if (!ts_tree_get_changes(doc, old_child, new_node, depth, results, extend_last_change)) - return false; - - if (old_child->visible) { - TSNode next = ts_node_next_sibling(*new_node); - if (next.data) { - PRINT("advance after diff ('%s', %lu) -> ('%s', %lu)", - NAME(new_node->data), ts_node_start_byte(*new_node), NAME(next.data), - ts_node_start_byte(next)); - *new_node = next; - } - } - break; - } else { - break; - } - } + if (ts_point_lt(start, end)) { + TSRange range = { start, end }; + return array_push(results, range); } - depth--; - if (old->visible) { - *new_node = ts_node_parent(*new_node); - } - - PRINT("}", NULL); return true; } -int ts_document_parse_and_get_changed_ranges(TSDocument *self, TSRange **ranges, size_t *range_count) { - if (ranges) *ranges = NULL; - if (range_count) *range_count = 0; +static bool tree_path_descend(TreePath *path, TSPoint position) { + bool did_descend; + do { + did_descend = false; + TreePathEntry entry = *array_back(path); + TSLength child_position = entry.position; + for (size_t i = 0; i < entry.tree->child_count; i++) { + TSTree *child = entry.tree->children[i]; + TSLength child_right_position = + ts_length_add(child_position, ts_tree_total_size(child)); + if (ts_point_lt(position, child_right_position.extent)) { + TreePathEntry child_entry = { child, child_position, i }; + if (child->visible) { + array_push(path, child_entry); + return true; + } else if (child->visible_child_count > 0) { + array_push(path, child_entry); + did_descend = true; + break; + } + } + child_position = child_right_position; + } + } while (did_descend); + return false; +} + +static size_t tree_path_advance(TreePath *path) { + size_t ascend_count = 0; + while (path->size > 0) { + TreePathEntry entry = array_pop(path); + if (path->size == 0) + break; + TreePathEntry parent_entry = *array_back(path); + if (parent_entry.tree->visible) { + ascend_count++; + } + TSLength position = + ts_length_add(entry.position, ts_tree_total_size(entry.tree)); + for (size_t i = entry.child_index + 1, n = parent_entry.tree->child_count; + i < n; i++) { + TSTree *next_child = parent_entry.tree->children[i]; + if (next_child->visible || next_child->visible_child_count > 0) { + if (parent_entry.tree->visible) { + ascend_count--; + } + array_push(path, + ((TreePathEntry){ + .tree = next_child, .child_index = i, .position = position, + })); + if (!next_child->visible) + tree_path_descend(path, (TSPoint){ 0, 0 }); + return ascend_count; + } + position = ts_length_add(position, ts_tree_total_size(next_child)); + } + } + return ascend_count; +} + +static void tree_path_ascend(TreePath *path, size_t count) { + for (size_t i = 0; i < count; i++) { + do { + array_pop(path); + } while (path->size > 0 && !array_back(path)->tree->visible); + } +} + +static void tree_path_init(TreePath *path, TSTree *tree) { + array_clear(path); + array_push(path, + ((TreePathEntry){ + .tree = tree, .position = { 0, 0, { 0, 0 } }, .child_index = 0, + })); + if (!tree->visible) + tree_path_descend(path, (TSPoint){ 0, 0 }); +} + +static bool ts_tree_get_changes(TSDocument *doc, TreePath *old_path, + TreePath *new_path, size_t depth, + RangeArray *results) { + TSPoint position = { 0, 0 }; + + while (old_path->size && new_path->size) { + bool is_different = false; + TSPoint next_position = position; + + TreePathEntry old_entry = *array_back(old_path); + TreePathEntry new_entry = *array_back(new_path); + TSTree *old_tree = old_entry.tree; + TSTree *new_tree = new_entry.tree; + TSSymbol old_symbol = old_tree->symbol; + TSSymbol new_symbol = new_tree->symbol; + size_t old_start_byte = old_entry.position.bytes; + size_t new_start_byte = new_entry.position.bytes; + size_t old_end_byte = old_start_byte + ts_tree_total_bytes(old_tree); + size_t new_end_byte = new_start_byte + ts_tree_total_bytes(new_tree); + TSPoint old_start_point = + ts_point_add(old_entry.position.extent, old_tree->padding.extent); + TSPoint new_start_point = + ts_point_add(new_entry.position.extent, new_tree->padding.extent); + TSPoint old_end_point = ts_point_add(old_start_point, old_tree->size.extent); + TSPoint new_end_point = ts_point_add(new_start_point, new_tree->size.extent); + + // printf("At [%-2lu, %-2lu] Compare (%-20s\t [%-2lu, %-2lu] - [%lu, %lu])\tvs\t(%-20s\t [%lu, %lu] - [%lu, %lu])\t", + // position.row, position.column, NAME(old_tree), old_start_point.row, + // old_start_point.column, old_end_point.row, old_end_point.column, + // NAME(new_tree), new_start_point.row, new_start_point.column, + // new_end_point.row, new_end_point.column); + + if (ts_point_lt(position, old_start_point)) { + if (ts_point_lt(position, new_start_point)) { + next_position = ts_point_min(old_start_point, new_start_point); + } else { + is_different = true; + next_position = old_start_point; + } + } else if (ts_point_lt(position, new_start_point)) { + is_different = true; + next_position = new_start_point; + } else { + if (old_tree == new_tree || + (!old_tree->has_changes && old_symbol == new_symbol && + old_start_byte == new_start_byte && old_end_byte == new_end_byte && + old_tree->parse_state != TS_TREE_STATE_NONE && + new_tree->parse_state != TS_TREE_STATE_NONE)) { + next_position = old_end_point; + } else if (old_symbol == new_symbol) { + bool old_descended = tree_path_descend(old_path, position); + bool new_descended = tree_path_descend(new_path, position); + if (old_descended) { + if (!new_descended) { + tree_path_ascend(old_path, 1); + is_different = true; + next_position = new_end_point; + } + } else if (new_descended) { + tree_path_ascend(new_path, 1); + is_different = true; + next_position = old_end_point; + } else { + next_position = ts_point_min(old_end_point, new_end_point); + } + } else { + is_different = true; + next_position = ts_point_min(old_end_point, new_end_point); + } + } + + bool advance_old = ts_point_lte(old_end_point, next_position); + bool advance_new = ts_point_lte(new_end_point, next_position); + + if (advance_new && advance_old) { + size_t old_ascend_count = tree_path_advance(old_path); + size_t new_ascend_count = tree_path_advance(new_path); + if (old_ascend_count > new_ascend_count) { + tree_path_ascend(new_path, old_ascend_count - new_ascend_count); + } else if (new_ascend_count > old_ascend_count) { + tree_path_ascend(old_path, new_ascend_count - old_ascend_count); + } + } else if (advance_new) { + size_t ascend_count = tree_path_advance(new_path); + tree_path_ascend(old_path, ascend_count); + } else if (advance_old) { + size_t ascend_count = tree_path_advance(old_path); + tree_path_ascend(new_path, ascend_count); + } + + if (is_different) + push_change(results, position, next_position); + position = next_position; + } + + return true; +} + +int ts_document_parse_and_get_changed_ranges(TSDocument *self, TSRange **ranges, + size_t *range_count) { + if (ranges) + *ranges = NULL; + if (range_count) + *range_count = 0; if (!self->input.read || !self->parser.language) return -1; @@ -218,15 +310,13 @@ int ts_document_parse_and_get_changed_ranges(TSDocument *self, TSRange **ranges, if (self->tree) { TSTree *old_tree = self->tree; self->tree = tree; - TSNode new_root = ts_document_root_node(self); - - // ts_tree_print_dot_graph(old_tree, self->parser.language, stderr); - // ts_tree_print_dot_graph(tree, self->parser.language, stderr); if (ranges && range_count) { - bool extend_last_change = false; - RangeArray result = {0, 0, 0}; - if (!ts_tree_get_changes(self, old_tree, &new_root, 0, &result, &extend_last_change)) + RangeArray result = { 0, 0, 0 }; + tree_path_init(&self->parser.tree_path1, old_tree); + tree_path_init(&self->parser.tree_path2, tree); + if (!ts_tree_get_changes(self, &self->parser.tree_path1, + &self->parser.tree_path2, 0, &result)) return -1; *ranges = result.contents; *range_count = result.size; diff --git a/src/runtime/length.h b/src/runtime/length.h index e4840d32..5a6ae20a 100644 --- a/src/runtime/length.h +++ b/src/runtime/length.h @@ -18,6 +18,18 @@ static inline TSPoint ts_point_sub(TSPoint a, TSPoint b) { return (TSPoint){0, a.column - b.column}; } +static inline bool ts_point_lte(TSPoint a, TSPoint b) { + return (a.row < b.row) || (a.row == b.row && a.column <= b.column); +} + +static inline bool ts_point_lt(TSPoint a, TSPoint b) { + return (a.row < b.row) || (a.row == b.row && a.column < b.column); +} + +static inline bool ts_point_eq(TSPoint a, TSPoint b) { + return a.row == b.row && a.column == b.column; +} + static inline TSPoint ts_point_min(TSPoint a, TSPoint b) { if (a.row < b.row || (a.row == b.row && a.column < b.column)) return a; @@ -25,6 +37,13 @@ static inline TSPoint ts_point_min(TSPoint a, TSPoint b) { return b; } +static inline TSPoint ts_point_max(TSPoint a, TSPoint b) { + if (a.row > b.row || (a.row == b.row && a.column > b.column)) + return a; + else + return b; +} + static inline bool ts_length_is_unknown(TSLength self) { return self.bytes > 0 && self.chars == 0; } diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 75b2b4df..cfe70439 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -1226,7 +1226,8 @@ bool parser_init(Parser *self) { self->finished_tree = NULL; self->stack = NULL; array_init(&self->reduce_actions); - array_init(&self->tree_stack); + array_init(&self->tree_path1); + array_init(&self->tree_path2); self->stack = ts_stack_new(); if (!self->stack) @@ -1252,8 +1253,10 @@ void parser_destroy(Parser *self) { ts_stack_delete(self->stack); if (self->reduce_actions.contents) array_delete(&self->reduce_actions); - if (self->tree_stack.contents) - array_delete(&self->tree_stack); + if (self->tree_path1.contents) + array_delete(&self->tree_path1); + if (self->tree_path2.contents) + array_delete(&self->tree_path2); } TSTree *parser_parse(Parser *self, TSInput input, TSTree *old_tree) { @@ -1299,7 +1302,7 @@ TSTree *parser_parse(Parser *self, TSInput input, TSTree *old_tree) { LOG_TREE(); ts_stack_clear(self->stack); parser__clear_cached_token(self); - CHECK(ts_tree_assign_parents(self->finished_tree, &self->tree_stack)); + CHECK(ts_tree_assign_parents(self->finished_tree, &self->tree_path1)); return self->finished_tree; error: diff --git a/src/runtime/parser.h b/src/runtime/parser.h index 4bc22697..a5bd60f0 100644 --- a/src/runtime/parser.h +++ b/src/runtime/parser.h @@ -26,7 +26,8 @@ typedef struct { TSTree *cached_token; size_t cached_token_byte_index; ReusableNode reusable_node; - TreeArray tree_stack; + TreePath tree_path1; + TreePath tree_path2; } Parser; bool parser_init(Parser *); diff --git a/src/runtime/tree.c b/src/runtime/tree.c index 930ff0a3..1aae1393 100644 --- a/src/runtime/tree.c +++ b/src/runtime/tree.c @@ -29,6 +29,7 @@ TSTree *ts_tree_make_leaf(TSSymbol sym, TSLength padding, TSLength size, .visible = metadata.visible, .named = metadata.named, .first_leaf.symbol = sym, + .has_changes = false, }; return result; @@ -91,12 +92,12 @@ TSTree *ts_tree_make_copy(TSTree *self) { return result; } -bool ts_tree_assign_parents(TSTree *self, TreeArray *stack) { - array_clear(stack); - if (!array_push(stack, self)) +bool ts_tree_assign_parents(TSTree *self, TreePath *path) { + array_clear(path); + if (!array_push(path, ((TreePathEntry){self, ts_length_zero(), 0}))) return false; - while (stack->size > 0) { - TSTree *tree = array_pop(stack); + while (path->size > 0) { + TSTree *tree = array_pop(path).tree; TSLength offset = ts_length_zero(); for (size_t i = 0; i < tree->child_count; i++) { TSTree *child = tree->children[i]; @@ -104,7 +105,7 @@ bool ts_tree_assign_parents(TSTree *self, TreeArray *stack) { child->context.parent = tree; child->context.index = i; child->context.offset = offset; - if (!array_push(stack, child)) + if (!array_push(path, ((TreePathEntry){child, ts_length_zero(), 0}))) return false; } offset = ts_length_add(offset, ts_tree_total_size(child)); @@ -458,7 +459,7 @@ char *ts_tree_string(const TSTree *self, const TSLanguage *language, return result; } -void ts_tree__print_dot_graph(const TSTree *self, size_t offset, +void ts_tree__print_dot_graph(const TSTree *self, size_t byte_offset, const TSLanguage *language, FILE *f) { fprintf(f, "tree_%p [label=\"%s\"", self, ts_language_symbol_name(language, self->symbol)); @@ -469,13 +470,13 @@ void ts_tree__print_dot_graph(const TSTree *self, size_t offset, fprintf(f, ", fontcolor=gray"); fprintf(f, ", tooltip=\"range:%lu - %lu\nstate:%d\nerror-cost:%u\"]\n", - offset, offset + ts_tree_total_chars(self), self->parse_state, + byte_offset, byte_offset + ts_tree_total_bytes(self), self->parse_state, self->error_cost); for (size_t i = 0; i < self->child_count; i++) { const TSTree *child = self->children[i]; - ts_tree__print_dot_graph(child, offset, language, f); + ts_tree__print_dot_graph(child, byte_offset, language, f); fprintf(f, "tree_%p -> tree_%p [tooltip=%lu]\n", self, child, i); - offset += ts_tree_total_chars(child); + byte_offset += ts_tree_total_bytes(child); } } diff --git a/src/runtime/tree.h b/src/runtime/tree.h index 146897d8..11ef85aa 100644 --- a/src/runtime/tree.h +++ b/src/runtime/tree.h @@ -49,7 +49,16 @@ typedef struct TSTree { bool has_changes : 1; } TSTree; +typedef struct { + TSTree *tree; + TSLength position; + size_t child_index; +} TreePathEntry; + typedef Array(TSTree *) TreeArray; + +typedef Array(TreePathEntry) TreePath; + bool ts_tree_array_copy(TreeArray, TreeArray *); void ts_tree_array_delete(TreeArray *); size_t ts_tree_array_essential_count(const TreeArray *); @@ -67,7 +76,7 @@ int ts_tree_compare(const TSTree *tree1, const TSTree *tree2); size_t ts_tree_start_column(const TSTree *self); size_t ts_tree_end_column(const TSTree *self); void ts_tree_set_children(TSTree *, size_t, TSTree **); -bool ts_tree_assign_parents(TSTree *, TreeArray *); +bool ts_tree_assign_parents(TSTree *, TreePath *); void ts_tree_edit(TSTree *, const TSInputEdit *edit); char *ts_tree_string(const TSTree *, const TSLanguage *, bool include_all); void ts_tree_print_dot_graph(const TSTree *, const TSLanguage *, FILE *); From 4bad58b407bfd3a572ae9db1b6122eac5a806bf1 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Sun, 16 Oct 2016 14:54:59 -0700 Subject: [PATCH 12/16] Remove outdated test --- spec/runtime/parser_spec.cc | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/spec/runtime/parser_spec.cc b/spec/runtime/parser_spec.cc index 0e6bb97b..174b4110 100644 --- a/spec/runtime/parser_spec.cc +++ b/spec/runtime/parser_spec.cc @@ -340,22 +340,6 @@ describe("Parser", [&]() { }); }); - describe("with non-ascii characters", [&]() { - it("inserts the text according to the UTF8 character index", [&]() { - // 'αβδ' + '1' - set_text("'\u03b1\u03b2\u03b4' + '1';"); - - assert_root_node( - "(program (expression_statement (math_op (string) (string))))"); - - // 'αβδ' + 'ψ1' - insert_text(strlen("'abd' + '"), "\u03c8"); - - assert_root_node( - "(program (expression_statement (math_op (string) (string))))"); - }); - }); - describe("into a node containing a extra token", [&]() { it("updates the parse tree", [&]() { set_text("123 *\n" From 1412419434fa590175671b16ed8925efe1442e6b Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Sun, 16 Oct 2016 14:55:51 -0700 Subject: [PATCH 13/16] Verify changed ranges for random deletions as well as random insertions --- spec/integration/corpus_specs.cc | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/spec/integration/corpus_specs.cc b/spec/integration/corpus_specs.cc index d9701d2e..c9ec9876 100644 --- a/spec/integration/corpus_specs.cc +++ b/spec/integration/corpus_specs.cc @@ -163,7 +163,16 @@ describe("The Corpus", []() { ts_document_edit(document, input->undo()); assert_correct_tree_size(document, input->content); - ts_document_parse(document); + + TSRange *ranges; + size_t range_count; + ScopeSequence old_scope_sequence = build_scope_sequence(document, input->content); + ts_document_parse_and_get_changed_ranges(document, &ranges, &range_count); + + ScopeSequence new_scope_sequence = build_scope_sequence(document, input->content); + verify_changed_ranges(old_scope_sequence, new_scope_sequence, + input->content, ranges, range_count); + ts_free(ranges); }); } } From 25d63d68f7c0300a250c10f159598b8344afe0bc Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Sun, 16 Oct 2016 20:42:55 -0700 Subject: [PATCH 14/16] Move TreePath functions into their own file --- src/runtime/document.c | 214 +--------------------------------------- src/runtime/tree_path.h | 208 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 213 insertions(+), 209 deletions(-) create mode 100644 src/runtime/tree_path.h diff --git a/src/runtime/document.c b/src/runtime/document.c index 9afa33c2..ec664c45 100644 --- a/src/runtime/document.c +++ b/src/runtime/document.c @@ -5,6 +5,7 @@ #include "runtime/parser.h" #include "runtime/string_input.h" #include "runtime/document.h" +#include "runtime/tree_path.h" TSDocument *ts_document_new() { TSDocument *self = ts_calloc(1, sizeof(TSDocument)); @@ -89,212 +90,10 @@ void ts_document_edit(TSDocument *self, TSInputEdit edit) { ts_tree_edit(self->tree, &edit); } -typedef Array(TSRange) RangeArray; - -#define NAME(t) \ - ((t) \ - ? (ts_language_symbol_name(doc->parser.language, ((TSTree *)(t))->symbol)) \ - : "") - -static bool push_change(RangeArray *results, TSPoint start, TSPoint end) { - if (results->size > 0) { - TSRange *last_range = array_back(results); - if (ts_point_lte(start, last_range->end)) { - last_range->end = end; - return true; - } - } - - if (ts_point_lt(start, end)) { - TSRange range = { start, end }; - return array_push(results, range); - } - - return true; -} - -static bool tree_path_descend(TreePath *path, TSPoint position) { - bool did_descend; - do { - did_descend = false; - TreePathEntry entry = *array_back(path); - TSLength child_position = entry.position; - for (size_t i = 0; i < entry.tree->child_count; i++) { - TSTree *child = entry.tree->children[i]; - TSLength child_right_position = - ts_length_add(child_position, ts_tree_total_size(child)); - if (ts_point_lt(position, child_right_position.extent)) { - TreePathEntry child_entry = { child, child_position, i }; - if (child->visible) { - array_push(path, child_entry); - return true; - } else if (child->visible_child_count > 0) { - array_push(path, child_entry); - did_descend = true; - break; - } - } - child_position = child_right_position; - } - } while (did_descend); - return false; -} - -static size_t tree_path_advance(TreePath *path) { - size_t ascend_count = 0; - while (path->size > 0) { - TreePathEntry entry = array_pop(path); - if (path->size == 0) - break; - TreePathEntry parent_entry = *array_back(path); - if (parent_entry.tree->visible) { - ascend_count++; - } - TSLength position = - ts_length_add(entry.position, ts_tree_total_size(entry.tree)); - for (size_t i = entry.child_index + 1, n = parent_entry.tree->child_count; - i < n; i++) { - TSTree *next_child = parent_entry.tree->children[i]; - if (next_child->visible || next_child->visible_child_count > 0) { - if (parent_entry.tree->visible) { - ascend_count--; - } - array_push(path, - ((TreePathEntry){ - .tree = next_child, .child_index = i, .position = position, - })); - if (!next_child->visible) - tree_path_descend(path, (TSPoint){ 0, 0 }); - return ascend_count; - } - position = ts_length_add(position, ts_tree_total_size(next_child)); - } - } - return ascend_count; -} - -static void tree_path_ascend(TreePath *path, size_t count) { - for (size_t i = 0; i < count; i++) { - do { - array_pop(path); - } while (path->size > 0 && !array_back(path)->tree->visible); - } -} - -static void tree_path_init(TreePath *path, TSTree *tree) { - array_clear(path); - array_push(path, - ((TreePathEntry){ - .tree = tree, .position = { 0, 0, { 0, 0 } }, .child_index = 0, - })); - if (!tree->visible) - tree_path_descend(path, (TSPoint){ 0, 0 }); -} - -static bool ts_tree_get_changes(TSDocument *doc, TreePath *old_path, - TreePath *new_path, size_t depth, - RangeArray *results) { - TSPoint position = { 0, 0 }; - - while (old_path->size && new_path->size) { - bool is_different = false; - TSPoint next_position = position; - - TreePathEntry old_entry = *array_back(old_path); - TreePathEntry new_entry = *array_back(new_path); - TSTree *old_tree = old_entry.tree; - TSTree *new_tree = new_entry.tree; - TSSymbol old_symbol = old_tree->symbol; - TSSymbol new_symbol = new_tree->symbol; - size_t old_start_byte = old_entry.position.bytes; - size_t new_start_byte = new_entry.position.bytes; - size_t old_end_byte = old_start_byte + ts_tree_total_bytes(old_tree); - size_t new_end_byte = new_start_byte + ts_tree_total_bytes(new_tree); - TSPoint old_start_point = - ts_point_add(old_entry.position.extent, old_tree->padding.extent); - TSPoint new_start_point = - ts_point_add(new_entry.position.extent, new_tree->padding.extent); - TSPoint old_end_point = ts_point_add(old_start_point, old_tree->size.extent); - TSPoint new_end_point = ts_point_add(new_start_point, new_tree->size.extent); - - // printf("At [%-2lu, %-2lu] Compare (%-20s\t [%-2lu, %-2lu] - [%lu, %lu])\tvs\t(%-20s\t [%lu, %lu] - [%lu, %lu])\t", - // position.row, position.column, NAME(old_tree), old_start_point.row, - // old_start_point.column, old_end_point.row, old_end_point.column, - // NAME(new_tree), new_start_point.row, new_start_point.column, - // new_end_point.row, new_end_point.column); - - if (ts_point_lt(position, old_start_point)) { - if (ts_point_lt(position, new_start_point)) { - next_position = ts_point_min(old_start_point, new_start_point); - } else { - is_different = true; - next_position = old_start_point; - } - } else if (ts_point_lt(position, new_start_point)) { - is_different = true; - next_position = new_start_point; - } else { - if (old_tree == new_tree || - (!old_tree->has_changes && old_symbol == new_symbol && - old_start_byte == new_start_byte && old_end_byte == new_end_byte && - old_tree->parse_state != TS_TREE_STATE_NONE && - new_tree->parse_state != TS_TREE_STATE_NONE)) { - next_position = old_end_point; - } else if (old_symbol == new_symbol) { - bool old_descended = tree_path_descend(old_path, position); - bool new_descended = tree_path_descend(new_path, position); - if (old_descended) { - if (!new_descended) { - tree_path_ascend(old_path, 1); - is_different = true; - next_position = new_end_point; - } - } else if (new_descended) { - tree_path_ascend(new_path, 1); - is_different = true; - next_position = old_end_point; - } else { - next_position = ts_point_min(old_end_point, new_end_point); - } - } else { - is_different = true; - next_position = ts_point_min(old_end_point, new_end_point); - } - } - - bool advance_old = ts_point_lte(old_end_point, next_position); - bool advance_new = ts_point_lte(new_end_point, next_position); - - if (advance_new && advance_old) { - size_t old_ascend_count = tree_path_advance(old_path); - size_t new_ascend_count = tree_path_advance(new_path); - if (old_ascend_count > new_ascend_count) { - tree_path_ascend(new_path, old_ascend_count - new_ascend_count); - } else if (new_ascend_count > old_ascend_count) { - tree_path_ascend(old_path, new_ascend_count - old_ascend_count); - } - } else if (advance_new) { - size_t ascend_count = tree_path_advance(new_path); - tree_path_ascend(old_path, ascend_count); - } else if (advance_old) { - size_t ascend_count = tree_path_advance(old_path); - tree_path_ascend(new_path, ascend_count); - } - - if (is_different) - push_change(results, position, next_position); - position = next_position; - } - - return true; -} - int ts_document_parse_and_get_changed_ranges(TSDocument *self, TSRange **ranges, size_t *range_count) { - if (ranges) - *ranges = NULL; - if (range_count) - *range_count = 0; + if (ranges) *ranges = NULL; + if (range_count) *range_count = 0; if (!self->input.read || !self->parser.language) return -1; @@ -312,14 +111,11 @@ int ts_document_parse_and_get_changed_ranges(TSDocument *self, TSRange **ranges, self->tree = tree; if (ranges && range_count) { - RangeArray result = { 0, 0, 0 }; tree_path_init(&self->parser.tree_path1, old_tree); tree_path_init(&self->parser.tree_path2, tree); - if (!ts_tree_get_changes(self, &self->parser.tree_path1, - &self->parser.tree_path2, 0, &result)) + if (!tree_path_get_changes(&self->parser.tree_path1, + &self->parser.tree_path2, ranges, range_count)) return -1; - *ranges = result.contents; - *range_count = result.size; } ts_tree_release(old_tree); diff --git a/src/runtime/tree_path.h b/src/runtime/tree_path.h new file mode 100644 index 00000000..52100092 --- /dev/null +++ b/src/runtime/tree_path.h @@ -0,0 +1,208 @@ +#ifndef RUNTIME_TREE_PATH_H_ +#define RUNTIME_TREE_PATH_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "runtime/tree.h" + +typedef Array(TSRange) RangeArray; + +static bool range_array_add(RangeArray *results, TSPoint start, TSPoint end) { + if (results->size > 0) { + TSRange *last_range = array_back(results); + if (ts_point_lte(start, last_range->end)) { + last_range->end = end; + return true; + } + } + + if (ts_point_lt(start, end)) { + TSRange range = { start, end }; + return array_push(results, range); + } + + return true; +} + +static bool tree_path_descend(TreePath *path, TSPoint position) { + bool did_descend; + do { + did_descend = false; + TreePathEntry entry = *array_back(path); + TSLength child_position = entry.position; + for (size_t i = 0; i < entry.tree->child_count; i++) { + TSTree *child = entry.tree->children[i]; + TSLength child_right_position = + ts_length_add(child_position, ts_tree_total_size(child)); + if (ts_point_lt(position, child_right_position.extent)) { + TreePathEntry child_entry = { child, child_position, i }; + if (child->visible) { + array_push(path, child_entry); + return true; + } else if (child->visible_child_count > 0) { + array_push(path, child_entry); + did_descend = true; + break; + } + } + child_position = child_right_position; + } + } while (did_descend); + return false; +} + +static size_t tree_path_advance(TreePath *path) { + size_t ascend_count = 0; + while (path->size > 0) { + TreePathEntry entry = array_pop(path); + if (path->size == 0) + break; + TreePathEntry parent_entry = *array_back(path); + if (parent_entry.tree->visible) ascend_count++; + TSLength position = + ts_length_add(entry.position, ts_tree_total_size(entry.tree)); + for (size_t i = entry.child_index + 1; i < parent_entry.tree->child_count; i++) { + TSTree *next_child = parent_entry.tree->children[i]; + if (next_child->visible || next_child->visible_child_count > 0) { + if (parent_entry.tree->visible) ascend_count--; + array_push(path, ((TreePathEntry){ + .tree = next_child, + .child_index = i, + .position = position, + })); + if (!next_child->visible) + tree_path_descend(path, (TSPoint){ 0, 0 }); + return ascend_count; + } + position = ts_length_add(position, ts_tree_total_size(next_child)); + } + } + return ascend_count; +} + +static void tree_path_ascend(TreePath *path, size_t count) { + for (size_t i = 0; i < count; i++) { + do { + array_pop(path); + } while (path->size > 0 && !array_back(path)->tree->visible); + } +} + +static void tree_path_init(TreePath *path, TSTree *tree) { + array_clear(path); + array_push(path, + ((TreePathEntry){ + .tree = tree, .position = { 0, 0, { 0, 0 } }, .child_index = 0, + })); + if (!tree->visible) + tree_path_descend(path, (TSPoint){ 0, 0 }); +} + +static bool tree_must_eq(TSTree *old_tree, TSTree *new_tree) { + return old_tree == new_tree || ( + !old_tree->has_changes && + old_tree->symbol == new_tree->symbol && + old_tree->size.bytes == new_tree->size.bytes && + old_tree->parse_state != TS_TREE_STATE_NONE && + new_tree->parse_state != TS_TREE_STATE_NONE && + (old_tree->parse_state == TS_STATE_ERROR) == + (new_tree->parse_state == TS_STATE_ERROR) + ); +} + +static bool tree_path_get_changes(TreePath *old_path, TreePath *new_path, + TSRange **ranges, size_t *range_count) { + TSPoint position = { 0, 0 }; + RangeArray results = array_new(); + + while (old_path->size && new_path->size) { + bool is_changed = false; + TSPoint next_position = position; + + TreePathEntry old_entry = *array_back(old_path); + TreePathEntry new_entry = *array_back(new_path); + TSTree *old_tree = old_entry.tree; + TSTree *new_tree = new_entry.tree; + size_t old_start_byte = old_entry.position.bytes + old_tree->padding.bytes; + size_t new_start_byte = new_entry.position.bytes + new_tree->padding.bytes; + TSPoint old_start_point = + ts_point_add(old_entry.position.extent, old_tree->padding.extent); + TSPoint new_start_point = + ts_point_add(new_entry.position.extent, new_tree->padding.extent); + TSPoint old_end_point = ts_point_add(old_start_point, old_tree->size.extent); + TSPoint new_end_point = ts_point_add(new_start_point, new_tree->size.extent); + + // #define NAME(t) (ts_language_symbol_name(language, ((TSTree *)(t))->symbol)) + // printf("At [%-2lu, %-2lu] Compare (%-20s\t [%-2lu, %-2lu] - [%lu, %lu])\tvs\t(%-20s\t [%lu, %lu] - [%lu, %lu])\n", + // position.row, position.column, NAME(old_tree), old_start_point.row, + // old_start_point.column, old_end_point.row, old_end_point.column, + // NAME(new_tree), new_start_point.row, new_start_point.column, + // new_end_point.row, new_end_point.column); + + if (ts_point_lt(position, old_start_point)) { + if (ts_point_lt(position, new_start_point)) { + next_position = ts_point_min(old_start_point, new_start_point); + } else { + is_changed = true; + next_position = old_start_point; + } + } else if (ts_point_lt(position, new_start_point)) { + is_changed = true; + next_position = new_start_point; + } else if (old_start_byte == new_start_byte && + tree_must_eq(old_tree, new_tree)) { + next_position = old_end_point; + } else if (old_tree->symbol == new_tree->symbol) { + if (tree_path_descend(old_path, position)) { + if (!tree_path_descend(new_path, position)) { + tree_path_ascend(old_path, 1); + is_changed = true; + next_position = new_end_point; + } + } else if (tree_path_descend(new_path, position)) { + tree_path_ascend(new_path, 1); + is_changed = true; + next_position = old_end_point; + } else { + next_position = ts_point_min(old_end_point, new_end_point); + } + } else { + is_changed = true; + next_position = ts_point_min(old_end_point, new_end_point); + } + + bool at_old_end = ts_point_lte(old_end_point, next_position); + bool at_new_end = ts_point_lte(new_end_point, next_position); + + if (at_new_end && at_old_end) { + size_t old_ascend_count = tree_path_advance(old_path); + size_t new_ascend_count = tree_path_advance(new_path); + if (old_ascend_count > new_ascend_count) { + tree_path_ascend(new_path, old_ascend_count - new_ascend_count); + } else if (new_ascend_count > old_ascend_count) { + tree_path_ascend(old_path, new_ascend_count - old_ascend_count); + } + } else if (at_new_end) { + size_t ascend_count = tree_path_advance(new_path); + tree_path_ascend(old_path, ascend_count); + } else if (at_old_end) { + size_t ascend_count = tree_path_advance(old_path); + tree_path_ascend(new_path, ascend_count); + } + + if (is_changed) range_array_add(&results, position, next_position); + position = next_position; + } + + *ranges = results.contents; + *range_count = results.size; + return true; +} + +#ifdef __cplusplus +} +#endif + +#endif // RUNTIME_TREE_H_ From 44b0c96579d3e7fbd3476b1257fc424238607061 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Sun, 16 Oct 2016 21:21:21 -0700 Subject: [PATCH 15/16] Move point functions to their own file --- src/runtime/length.h | 41 +----------------------------------- src/runtime/point.h | 46 +++++++++++++++++++++++++++++++++++++++++ src/runtime/tree_path.h | 10 +-------- 3 files changed, 48 insertions(+), 49 deletions(-) create mode 100644 src/runtime/point.h diff --git a/src/runtime/length.h b/src/runtime/length.h index 16590ca0..d4ce259e 100644 --- a/src/runtime/length.h +++ b/src/runtime/length.h @@ -3,6 +3,7 @@ #include #include +#include "runtime/point.h" #include "tree_sitter/runtime.h" typedef struct { @@ -11,46 +12,6 @@ typedef struct { TSPoint extent; } TSLength; -static inline TSPoint ts_point_add(TSPoint a, TSPoint b) { - if (b.row > 0) - return (TSPoint){a.row + b.row, b.column}; - else - return (TSPoint){a.row, a.column + b.column}; -} - -static inline TSPoint ts_point_sub(TSPoint a, TSPoint b) { - if (a.row > b.row) - return (TSPoint){a.row - b.row, a.column}; - else - return (TSPoint){0, a.column - b.column}; -} - -static inline bool ts_point_lte(TSPoint a, TSPoint b) { - return (a.row < b.row) || (a.row == b.row && a.column <= b.column); -} - -static inline bool ts_point_lt(TSPoint a, TSPoint b) { - return (a.row < b.row) || (a.row == b.row && a.column < b.column); -} - -static inline bool ts_point_eq(TSPoint a, TSPoint b) { - return a.row == b.row && a.column == b.column; -} - -static inline TSPoint ts_point_min(TSPoint a, TSPoint b) { - if (a.row < b.row || (a.row == b.row && a.column < b.column)) - return a; - else - return b; -} - -static inline TSPoint ts_point_max(TSPoint a, TSPoint b) { - if (a.row > b.row || (a.row == b.row && a.column > b.column)) - return a; - else - return b; -} - static inline bool ts_length_is_unknown(TSLength self) { return self.bytes > 0 && self.chars == 0; } diff --git a/src/runtime/point.h b/src/runtime/point.h new file mode 100644 index 00000000..8909a758 --- /dev/null +++ b/src/runtime/point.h @@ -0,0 +1,46 @@ +#ifndef RUNTIME_POINT_H_ +#define RUNTIME_POINT_H_ + +#include "tree_sitter/runtime.h" + +static inline TSPoint ts_point_add(TSPoint a, TSPoint b) { + if (b.row > 0) + return (TSPoint){a.row + b.row, b.column}; + else + return (TSPoint){a.row, a.column + b.column}; +} + +static inline TSPoint ts_point_sub(TSPoint a, TSPoint b) { + if (a.row > b.row) + return (TSPoint){a.row - b.row, a.column}; + else + return (TSPoint){0, a.column - b.column}; +} + +static inline bool ts_point_lte(TSPoint a, TSPoint b) { + return (a.row < b.row) || (a.row == b.row && a.column <= b.column); +} + +static inline bool ts_point_lt(TSPoint a, TSPoint b) { + return (a.row < b.row) || (a.row == b.row && a.column < b.column); +} + +static inline bool ts_point_eq(TSPoint a, TSPoint b) { + return a.row == b.row && a.column == b.column; +} + +static inline TSPoint ts_point_min(TSPoint a, TSPoint b) { + if (a.row < b.row || (a.row == b.row && a.column < b.column)) + return a; + else + return b; +} + +static inline TSPoint ts_point_max(TSPoint a, TSPoint b) { + if (a.row > b.row || (a.row == b.row && a.column > b.column)) + return a; + else + return b; +} + +#endif diff --git a/src/runtime/tree_path.h b/src/runtime/tree_path.h index 30b0b990..f6bf2366 100644 --- a/src/runtime/tree_path.h +++ b/src/runtime/tree_path.h @@ -1,10 +1,6 @@ #ifndef RUNTIME_TREE_PATH_H_ #define RUNTIME_TREE_PATH_H_ -#ifdef __cplusplus -extern "C" { -#endif - #include "runtime/tree.h" #include "runtime/error_costs.h" @@ -202,8 +198,4 @@ static bool tree_path_get_changes(TreePath *old_path, TreePath *new_path, return true; } -#ifdef __cplusplus -} -#endif - -#endif // RUNTIME_TREE_H_ +#endif // RUNTIME_TREE_PATH_H_ From 5638fea12067c06e5ccd8ec7d10396a85260556d Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Sun, 16 Oct 2016 21:21:53 -0700 Subject: [PATCH 16/16] Rename length_set_unknown -> length_set_unknown_chars --- src/runtime/length.h | 8 ++++---- src/runtime/tree.c | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/runtime/length.h b/src/runtime/length.h index d4ce259e..9df859d8 100644 --- a/src/runtime/length.h +++ b/src/runtime/length.h @@ -12,11 +12,11 @@ typedef struct { TSPoint extent; } TSLength; -static inline bool ts_length_is_unknown(TSLength self) { +static inline bool ts_length_has_unknown_chars(TSLength self) { return self.bytes > 0 && self.chars == 0; } -static inline void ts_length_set_unknown(TSLength *self) { +static inline void ts_length_set_unknown_chars(TSLength *self) { self->chars = 0; } @@ -30,7 +30,7 @@ static inline TSLength ts_length_add(TSLength len1, TSLength len2) { result.bytes = len1.bytes + len2.bytes; result.extent = ts_point_add(len1.extent, len2.extent); - if (ts_length_is_unknown(len1) || ts_length_is_unknown(len2)) { + if (ts_length_has_unknown_chars(len1) || ts_length_has_unknown_chars(len2)) { result.chars = 0; } else { result.chars = len1.chars + len2.chars; @@ -44,7 +44,7 @@ static inline TSLength ts_length_sub(TSLength len1, TSLength len2) { result.bytes = len1.bytes - len2.bytes; result.extent = ts_point_sub(len1.extent, len2.extent); - if (ts_length_is_unknown(len1) || ts_length_is_unknown(len2)) { + if (ts_length_has_unknown_chars(len1) || ts_length_has_unknown_chars(len2)) { result.chars = 0; } else { result.chars = len1.chars - len2.chars; diff --git a/src/runtime/tree.c b/src/runtime/tree.c index f35b372f..2f0760c6 100644 --- a/src/runtime/tree.c +++ b/src/runtime/tree.c @@ -320,14 +320,14 @@ void ts_tree_edit(TSTree *self, const TSInputEdit *edit) { self->has_changes = true; if (edit->start_byte < self->padding.bytes) { - ts_length_set_unknown(&self->padding); + ts_length_set_unknown_chars(&self->padding); if (self->padding.bytes >= old_end_byte) { size_t trailing_padding_bytes = self->padding.bytes - old_end_byte; TSPoint trailing_padding_extent = ts_point_sub(self->padding.extent, old_end_point); self->padding.bytes = new_end_byte + trailing_padding_bytes; self->padding.extent = ts_point_add(new_end_point, trailing_padding_extent); } else { - ts_length_set_unknown(&self->size); + ts_length_set_unknown_chars(&self->size); size_t removed_content_bytes = old_end_byte - self->padding.bytes; TSPoint removed_content_extent = ts_point_sub(old_end_point, self->padding.extent); self->size.bytes = self->size.bytes - removed_content_bytes; @@ -336,11 +336,11 @@ void ts_tree_edit(TSTree *self, const TSInputEdit *edit) { self->padding.extent = new_end_point; } } else if (edit->start_byte == self->padding.bytes && edit->bytes_removed == 0) { - ts_length_set_unknown(&self->padding); + ts_length_set_unknown_chars(&self->padding); self->padding.bytes = self->padding.bytes + edit->bytes_added; self->padding.extent = ts_point_add(self->padding.extent, edit->extent_added); } else { - ts_length_set_unknown(&self->size); + ts_length_set_unknown_chars(&self->size); size_t trailing_content_bytes = ts_tree_total_bytes(self) - old_end_byte; TSPoint trailing_content_extent = ts_point_sub(ts_tree_total_extent(self), old_end_point); self->size.bytes = new_end_byte + trailing_content_bytes - self->padding.bytes;