diff --git a/include/tree_sitter/runtime.h b/include/tree_sitter/runtime.h index a1cd7c92..5ee5ba6f 100644 --- a/include/tree_sitter/runtime.h +++ b/include/tree_sitter/runtime.h @@ -34,17 +34,25 @@ typedef struct { void (*log)(void *payload, TSLogType, const char *); } TSLogger; -typedef struct { - size_t position; - size_t chars_inserted; - size_t chars_removed; -} TSInputEdit; - typedef struct { size_t row; size_t column; } TSPoint; +typedef struct { + size_t start_byte; + size_t bytes_removed; + size_t bytes_added; + TSPoint start_point; + TSPoint extent_removed; + TSPoint extent_added; +} TSInputEdit; + +typedef struct { + TSPoint start; + TSPoint end; +} TSRange; + typedef struct { const void *data; size_t offset[3]; @@ -98,6 +106,7 @@ void ts_document_set_logger(TSDocument *, TSLogger); void ts_document_print_debugging_graphs(TSDocument *, bool); void ts_document_edit(TSDocument *, TSInputEdit); int ts_document_parse(TSDocument *); +int ts_document_parse_and_get_changed_ranges(TSDocument *, TSRange **, size_t *); void ts_document_invalidate(TSDocument *); TSNode ts_document_root_node(const TSDocument *); size_t ts_document_parse_count(const TSDocument *); diff --git a/spec/helpers/point_helpers.cc b/spec/helpers/point_helpers.cc index 014a4c1c..e61faf3e 100644 --- a/spec/helpers/point_helpers.cc +++ b/spec/helpers/point_helpers.cc @@ -1,3 +1,4 @@ +#include "./point_helpers.h" #include #include #include "runtime/length.h" @@ -9,8 +10,12 @@ bool operator==(const TSPoint &left, const TSPoint &right) { return left.row == right.row && left.column == right.column; } -std::ostream &operator<<(std::ostream &stream, const TSPoint &point) { - return stream << "{" << point.row << ", " << point.column << "}"; +bool operator==(const TSRange &left, const TSRange &right) { + return left.start == right.start && left.end == right.end; +} + +bool operator==(const TSLength &left, const TSLength &right) { + return ts_length_eq(left, right); } bool operator<(const TSPoint &left, const TSPoint &right) { @@ -23,3 +28,16 @@ bool operator<(const TSPoint &left, const TSPoint &right) { bool operator>(const TSPoint &left, const TSPoint &right) { return right < left; } + +std::ostream &operator<<(std::ostream &stream, const TSPoint &point) { + return stream << "{" << point.row << ", " << point.column << "}"; +} + +std::ostream &operator<<(std::ostream &stream, const TSRange &range) { + return stream << "{" << range.start << ", " << range.end << "}"; +} + +ostream &operator<<(ostream &stream, const TSLength &length) { + return stream << "{chars:" << length.chars << ", bytes:" << + length.bytes << ", extent:" << length.extent << "}"; +} diff --git a/spec/helpers/point_helpers.h b/spec/helpers/point_helpers.h index 321f05ad..de5edd32 100644 --- a/spec/helpers/point_helpers.h +++ b/spec/helpers/point_helpers.h @@ -1,12 +1,23 @@ #ifndef HELPERS_POINT_HELPERS_H_ #define HELPERS_POINT_HELPERS_H_ +#include "runtime/length.h" +#include + bool operator==(const TSPoint &left, const TSPoint &right); bool operator<(const TSPoint &left, const TSPoint &right); bool operator>(const TSPoint &left, const TSPoint &right); +bool operator==(const TSRange &left, const TSRange &right); + +bool operator==(const TSLength &left, const TSLength &right); + std::ostream &operator<<(std::ostream &stream, const TSPoint &point); +std::ostream &operator<<(std::ostream &stream, const TSRange &range); + +std::ostream &operator<<(std::ostream &stream, const TSLength &length); + #endif // HELPERS_POINT_HELPERS_H_ diff --git a/spec/helpers/scope_sequence.cc b/spec/helpers/scope_sequence.cc new file mode 100644 index 00000000..87e059dc --- /dev/null +++ b/spec/helpers/scope_sequence.cc @@ -0,0 +1,105 @@ +#include "./scope_sequence.h" + +#include "bandit/bandit.h" +#include +#include "helpers/stream_methods.h" +#include "helpers/point_helpers.h" + +using std::string; +using std::cout; + +static void append_text_to_scope_sequence(ScopeSequence *sequence, + ScopeStack *current_scopes, + const std::string &text, + size_t length) { + for (size_t i = 0; i < length; i++) { + string character(1, text[sequence->size()]); + sequence->push_back(*current_scopes); + sequence->back().push_back("'" + character + "'"); + } +} + +static void append_to_scope_sequence(ScopeSequence *sequence, + ScopeStack *current_scopes, + TSNode node, TSDocument *document, + const std::string &text) { + append_text_to_scope_sequence(sequence, current_scopes, text, ts_node_start_byte(node) - sequence->size()); + + string scope = ts_node_type(node, document); + current_scopes->push_back(scope); + size_t child_count = ts_node_child_count(node); + if (child_count > 0) { + for (size_t i = 0; i < child_count; i++) { + TSNode child = ts_node_child(node, i); + append_to_scope_sequence(sequence, current_scopes, child, document, text); + } + } else { + size_t length = ts_node_end_byte(node) - ts_node_start_byte(node); + append_text_to_scope_sequence(sequence, current_scopes, text, length); + } + current_scopes->pop_back(); +} + +ScopeSequence build_scope_sequence(TSDocument *document, const std::string &text) { + ScopeSequence sequence; + ScopeStack current_scopes; + TSNode node = ts_document_root_node(document); + append_to_scope_sequence(&sequence, ¤t_scopes, node, document, text); + return sequence; +} + +bool operator<=(const TSPoint &left, const TSPoint &right) { + if (left.row < right.row) + return true; + else if (left.row == right.row) + return left.column <= right.column; + else + return false; +} + +void verify_changed_ranges(const ScopeSequence &old_sequence, const ScopeSequence &new_sequence, + const string &text, TSRange *ranges, size_t range_count) { + TSPoint current_position = {0, 0}; + for (size_t i = 0; i < old_sequence.size(); i++) { + if (text[i] == '\n') { + current_position.row++; + current_position.column = 0; + continue; + } + + const ScopeStack &old_scopes = old_sequence[i]; + const ScopeStack &new_scopes = new_sequence[i]; + if (old_scopes != new_scopes) { + bool found_containing_range = false; + for (size_t j = 0; j < range_count; j++) { + TSRange range = ranges[j]; + if (range.start <= current_position && current_position <= range.end) { + found_containing_range = true; + break; + } + } + + if (!found_containing_range) { + std::stringstream message_stream; + message_stream << "Found changed scope outside of any invalidated range;\n"; + message_stream << "Position: " << current_position << "\n"; + message_stream << "Byte index: " << i << "\n"; + size_t line_start_index = i - current_position.column; + size_t line_end_index = text.find_first_of('\n', i); + message_stream << "Line: " << text.substr(line_start_index, line_end_index - line_start_index) << "\n"; + for (size_t j = 0; j < current_position.column + string("Line: ").size(); j++) + message_stream << " "; + message_stream << "^\n"; + message_stream << "Old scopes: " << old_scopes << "\n"; + message_stream << "New scopes: " << new_scopes << "\n"; + message_stream << "Invalidated ranges:\n"; + for (size_t j = 0; j < range_count; j++) { + message_stream << " " << ranges[j] << "\n"; + } + Assert::Failure(message_stream.str()); + } + } + + current_position.column++; + } +} diff --git a/spec/helpers/scope_sequence.h b/spec/helpers/scope_sequence.h new file mode 100644 index 00000000..c83ad597 --- /dev/null +++ b/spec/helpers/scope_sequence.h @@ -0,0 +1,16 @@ +#ifndef HELPERS_SCOPE_SEQUENCE_H_ +#define HELPERS_SCOPE_SEQUENCE_H_ + +#include +#include +#include "tree_sitter/runtime.h" + +typedef std::string Scope; +typedef std::vector ScopeStack; +typedef std::vector ScopeSequence; + +ScopeSequence build_scope_sequence(TSDocument *document, const std::string &text); + +void verify_changed_ranges(const ScopeSequence &old, const ScopeSequence &new_sequence, const std::string &text, TSRange *ranges, size_t range_count); + +#endif // HELPERS_SCOPE_SEQUENCE_H_ diff --git a/spec/helpers/spy_input.cc b/spec/helpers/spy_input.cc index d5d4bf96..c56ab67e 100644 --- a/spec/helpers/spy_input.cc +++ b/spec/helpers/spy_input.cc @@ -4,6 +4,7 @@ #include #include +using std::pair; using std::string; static const size_t UTF8_MAX_CHAR_SIZE = 4; @@ -68,34 +69,63 @@ TSInput SpyInput::input() { return result; } -TSInputEdit SpyInput::replace(size_t start_char, size_t chars_removed, string text) { - string text_removed = swap_substr(start_char, chars_removed, text); - size_t chars_inserted = string_char_count(encoding, text); - undo_stack.push_back(SpyInputEdit{start_char, chars_inserted, text_removed}); - return {start_char, chars_inserted, chars_removed}; +static TSPoint get_extent(string text) { + TSPoint result = {0, 0}; + for (auto i = text.begin(); i != text.end(); i++) { + if (*i == '\n') { + result.row++; + result.column = 0; + } else { + result.column++; + } + } + return result; +} + +TSInputEdit SpyInput::replace(size_t start_byte, size_t bytes_removed, string text) { + auto swap = swap_substr(start_byte, bytes_removed, text); + size_t bytes_added = text.size(); + undo_stack.push_back(SpyInputEdit{start_byte, bytes_added, swap.first}); + TSInputEdit result = {}; + result.start_byte = start_byte; + result.bytes_added = bytes_added; + result.bytes_removed = bytes_removed; + result.start_point = swap.second; + result.extent_removed = get_extent(swap.first); + result.extent_added = get_extent(text); + return result; } TSInputEdit SpyInput::undo() { SpyInputEdit entry = undo_stack.back(); undo_stack.pop_back(); - swap_substr(entry.position, entry.chars_removed, entry.text_inserted); - size_t chars_inserted = string_char_count(encoding, entry.text_inserted); - return TSInputEdit{entry.position, chars_inserted, entry.chars_removed}; + auto swap = swap_substr(entry.start_byte, entry.bytes_removed, entry.text_inserted); + TSInputEdit result; + result.start_byte = entry.start_byte; + result.bytes_removed = entry.bytes_removed; + result.bytes_added = entry.text_inserted.size(); + result.start_point = swap.second; + result.extent_removed = get_extent(swap.first); + result.extent_added = get_extent(entry.text_inserted); + return result; } -string SpyInput::swap_substr(size_t start_char, size_t chars_removed, string text) { - long start_byte = string_byte_for_character(encoding, content, 0, start_char); - assert(start_byte >= 0); - - long bytes_removed = string_byte_for_character(encoding, content, start_byte, chars_removed); - if (bytes_removed < 0) - bytes_removed = content.size() - start_byte; +pair SpyInput::swap_substr(size_t start_byte, size_t bytes_removed, string text) { + TSPoint start_position = {0, 0}; + for (auto i = content.begin(), n = content.begin() + start_byte; i < n; i++) { + if (*i == '\n') { + start_position.row++; + start_position.column = 0; + } else { + start_position.column++; + } + } string text_removed = content.substr(start_byte, bytes_removed); content.erase(start_byte, bytes_removed); content.insert(start_byte, text); - return text_removed; + return {text_removed, start_position}; } void SpyInput::clear() { diff --git a/spec/helpers/spy_input.h b/spec/helpers/spy_input.h index 24caee6a..a91fc165 100644 --- a/spec/helpers/spy_input.h +++ b/spec/helpers/spy_input.h @@ -6,8 +6,8 @@ #include "tree_sitter/runtime.h" struct SpyInputEdit { - size_t position; - size_t chars_removed; + size_t start_byte; + size_t bytes_removed; std::string text_inserted; }; @@ -20,7 +20,7 @@ class SpyInput { static const char * read(void *, size_t *); static int seek(void *, size_t, size_t); - std::string swap_substr(size_t, size_t, std::string); + std::pair swap_substr(size_t, size_t, std::string); public: SpyInput(std::string content, size_t chars_per_chunk); diff --git a/spec/helpers/tree_helpers.cc b/spec/helpers/tree_helpers.cc index 682ec2c7..04e39194 100644 --- a/spec/helpers/tree_helpers.cc +++ b/spec/helpers/tree_helpers.cc @@ -40,10 +40,6 @@ bool operator==(const TSNode &left, const TSNode &right) { return ts_node_eq(left, right); } -bool operator==(const TSLength &left, const TSLength &right) { - return ts_length_eq(left, right); -} - bool operator==(const std::vector &vec, const TreeArray &array) { if (vec.size() != array.size) return false; @@ -52,8 +48,3 @@ bool operator==(const std::vector &vec, const TreeArray &array) { return false; return true; } - -ostream &operator<<(ostream &stream, const TSLength &length) { - return stream << "{chars:" << length.chars << ", bytes:" << - length.bytes << ", rows:" << length.rows << ", columns:" << length.columns << "}"; -} diff --git a/spec/helpers/tree_helpers.h b/spec/helpers/tree_helpers.h index 18d62b80..e8420dc0 100644 --- a/spec/helpers/tree_helpers.h +++ b/spec/helpers/tree_helpers.h @@ -10,9 +10,7 @@ TSTree ** tree_array(std::vector trees); std::ostream &operator<<(std::ostream &stream, const TSTree *tree); std::ostream &operator<<(std::ostream &stream, const TSNode &node); -std::ostream &operator<<(std::ostream &stream, const TSLength &length); bool operator==(const TSNode &left, const TSNode &right); -bool operator==(const TSLength &left, const TSLength &right); bool operator==(const std::vector &right, const TreeArray &array); #endif // HELPERS_TREE_HELPERS_H_ diff --git a/spec/integration/corpus_specs.cc b/spec/integration/corpus_specs.cc index 3653809d..c9ec9876 100644 --- a/spec/integration/corpus_specs.cc +++ b/spec/integration/corpus_specs.cc @@ -8,57 +8,72 @@ #include "helpers/encoding_helpers.h" #include "helpers/record_alloc.h" #include "helpers/random_helpers.h" +#include "helpers/scope_sequence.h" #include -static void expect_the_correct_tree(TSNode node, TSDocument *document, string tree_string) { - const char *node_string = ts_node_string(node, document); +static void assert_correct_tree_shape(const TSDocument *document, string tree_string) { + TSNode root_node = ts_document_root_node(document); + const char *node_string = ts_node_string(root_node, document); string result(node_string); ts_free((void *)node_string); AssertThat(result, Equals(tree_string)); } -static void expect_a_consistent_tree(TSNode node, TSDocument *document) { +static void assert_consistent_sizes(TSNode node) { size_t child_count = ts_node_child_count(node); - size_t start_char = ts_node_start_char(node); - size_t end_char = ts_node_end_char(node); + size_t start_byte = ts_node_start_byte(node); + size_t end_byte = ts_node_end_byte(node); TSPoint start_point = ts_node_start_point(node); TSPoint end_point = ts_node_end_point(node); - bool has_changes = ts_node_has_changes(node); bool some_child_has_changes = false; - AssertThat(start_char, !IsGreaterThan(end_char)); + AssertThat(start_byte, !IsGreaterThan(end_byte)); AssertThat(start_point, !IsGreaterThan(end_point)); - size_t last_child_end_char = 0; - TSPoint last_child_end_point = {0, 0}; + size_t last_child_end_byte = start_byte; + TSPoint last_child_end_point = start_point; for (size_t i = 0; i < child_count; i++) { TSNode child = ts_node_child(node, i); - size_t child_start_char = ts_node_start_char(child); - size_t child_end_char = ts_node_end_char(child); + size_t child_start_byte = ts_node_start_byte(child); TSPoint child_start_point = ts_node_start_point(child); - TSPoint child_end_point = ts_node_end_point(child); - - if (i > 0) { - AssertThat(child_start_char, !IsLessThan(last_child_end_char)); - AssertThat(child_start_point, !IsLessThan(last_child_end_point)); - last_child_end_char = child_end_char; - last_child_end_point = child_end_point; - } - - AssertThat(child_start_char, !IsLessThan(start_char)); - AssertThat(child_end_char, !IsGreaterThan(end_char)); - AssertThat(child_start_point, !IsLessThan(start_point)); - AssertThat(child_end_point, !IsGreaterThan(end_point)); - - expect_a_consistent_tree(child, document); + AssertThat(child_start_byte, !IsLessThan(last_child_end_byte)); + AssertThat(child_start_point, !IsLessThan(last_child_end_point)); + assert_consistent_sizes(child); if (ts_node_has_changes(child)) some_child_has_changes = true; + + last_child_end_byte = ts_node_end_byte(child); + last_child_end_point = ts_node_end_point(child); } - if (child_count > 0) - AssertThat(has_changes, Equals(some_child_has_changes)); + if (child_count > 0) { + AssertThat(end_byte, !IsLessThan(last_child_end_byte)); + AssertThat(end_point, !IsLessThan(last_child_end_point)); + } + + if (some_child_has_changes) { + AssertThat(ts_node_has_changes(node), IsTrue()); + } +} + +static void assert_correct_tree_size(TSDocument *document, string content) { + TSNode root_node = ts_document_root_node(document); + size_t expected_size = content.size(); + + // In the JSON grammar, the start rule (`_value`) is hidden, so the node + // returned from `ts_document_root_node` (e.g. an `object` node), does not + // actually point to the root of the tree. In this weird case, trailing + // whitespace is not included in the root node's size. + // + // TODO: Fix this inconsistency. Maybe disallow the start rule being hidden? + if (ts_document_language(document) == get_test_language("json") && + string(ts_node_type(root_node, document)) != "ERROR") + expected_size = content.find_last_not_of("\n ") + 1; + + AssertThat(ts_node_end_byte(root_node), Equals(expected_size)); + assert_consistent_sizes(root_node); } START_TEST @@ -97,9 +112,8 @@ describe("The Corpus", []() { input = new SpyInput(entry.input, 3); ts_document_set_input(document, input->input()); edit_sequence(); - TSNode root_node = ts_document_root_node(document); - expect_the_correct_tree(root_node, document, entry.tree_string); - expect_a_consistent_tree(root_node, document); + assert_correct_tree_shape(document, entry.tree_string); + assert_correct_tree_size(document, input->content); delete input; }); }; @@ -122,11 +136,20 @@ describe("The Corpus", []() { it_handles_edit_sequence("repairing an insertion of " + description, [&]() { ts_document_edit(document, input->replace(edit_position, 0, inserted_text)); ts_document_parse(document); - - expect_a_consistent_tree(ts_document_root_node(document), document); + assert_correct_tree_size(document, input->content); ts_document_edit(document, input->undo()); - ts_document_parse(document); + assert_correct_tree_size(document, input->content); + + TSRange *ranges; + size_t range_count; + ScopeSequence old_scope_sequence = build_scope_sequence(document, input->content); + ts_document_parse_and_get_changed_ranges(document, &ranges, &range_count); + + ScopeSequence new_scope_sequence = build_scope_sequence(document, input->content); + verify_changed_ranges(old_scope_sequence, new_scope_sequence, + input->content, ranges, range_count); + ts_free(ranges); }); } @@ -136,11 +159,20 @@ describe("The Corpus", []() { it_handles_edit_sequence("repairing a deletion of " + desription, [&]() { ts_document_edit(document, input->replace(edit_position, deletion_size, "")); ts_document_parse(document); - - expect_a_consistent_tree(ts_document_root_node(document), document); + assert_correct_tree_size(document, input->content); ts_document_edit(document, input->undo()); - ts_document_parse(document); + assert_correct_tree_size(document, input->content); + + TSRange *ranges; + size_t range_count; + ScopeSequence old_scope_sequence = build_scope_sequence(document, input->content); + ts_document_parse_and_get_changed_ranges(document, &ranges, &range_count); + + ScopeSequence new_scope_sequence = build_scope_sequence(document, input->content); + verify_changed_ranges(old_scope_sequence, new_scope_sequence, + input->content, ranges, range_count); + ts_free(ranges); }); } } diff --git a/spec/runtime/document_spec.cc b/spec/runtime/document_spec.cc index bbbc67c9..78a53ee2 100644 --- a/spec/runtime/document_spec.cc +++ b/spec/runtime/document_spec.cc @@ -3,6 +3,7 @@ #include "helpers/record_alloc.h" #include "helpers/stream_methods.h" #include "helpers/tree_helpers.h" +#include "helpers/point_helpers.h" #include "helpers/spy_logger.h" #include "helpers/spy_input.h" #include "helpers/load_language.h" @@ -66,6 +67,17 @@ describe("Document", [&]() { "(array (true) (false))"); }); + it("allows columns to be measured in either bytes or characters", [&]() { + const char16_t content[] = u"[true, false]"; + spy_input->content = string((const char *)content, sizeof(content)); + spy_input->encoding = TSInputEncodingUTF16; + // spy_input->measure_columns_in_bytes + + ts_document_set_input(doc, spy_input->input()); + ts_document_invalidate(doc); + ts_document_parse(doc); + }); + it("allows the input to be retrieved later", [&]() { ts_document_set_input(doc, spy_input->input()); AssertThat(ts_document_input(doc).payload, Equals(spy_input)); @@ -84,14 +96,19 @@ describe("Document", [&]() { ts_document_set_input(doc, spy_input->input()); // Insert 'null', delete '1'. - ts_document_edit(doc, {strlen("{\"key\": ["), 4, 1}); + TSInputEdit edit = {}; + edit.start_point.column = edit.start_byte = strlen("{\"key\": ["); + edit.extent_added.column = edit.bytes_added = 4; + edit.extent_removed.column = edit.bytes_removed = 1; + + ts_document_edit(doc, edit); ts_document_parse(doc); TSNode new_root = ts_document_root_node(doc); assert_node_string_equals( new_root, "(object (pair (string) (array (null) (number))))"); - AssertThat(spy_input->strings_read, Equals(vector({" [null, 2", ""}))); + AssertThat(spy_input->strings_read, Equals(vector({" [null, 2"}))); }); it("reads from the new input correctly when the old input was blank", [&]() { @@ -192,6 +209,146 @@ describe("Document", [&]() { }); }); }); + + describe("parse_and_get_changed_ranges()", [&]() { + SpyInput *input; + + before_each([&]() { + ts_document_set_language(doc, get_test_language("javascript")); + input = new SpyInput("{a: null};", 3); + ts_document_set_input(doc, input->input()); + ts_document_parse(doc); + assert_node_string_equals( + ts_document_root_node(doc), + "(program (expression_statement (object (pair (identifier) (null)))))"); + }); + + after_each([&]() { + delete input; + }); + + auto get_ranges = [&](std::function callback) -> vector { + TSInputEdit edit = callback(); + ts_document_edit(doc, edit); + + TSRange *ranges; + size_t range_count = 0; + + ts_document_parse_and_get_changed_ranges(doc, &ranges, &range_count); + + vector result; + for (size_t i = 0; i < range_count; i++) + result.push_back(ranges[i]); + ts_free(ranges); + + return result; + }; + + it("reports changes when one token has been updated", [&]() { + // Replace `null` with `nothing` + auto ranges = get_ranges([&]() { + return input->replace(input->content.find("ull"), 1, "othing"); + }); + + AssertThat(ranges, Equals(vector({ + TSRange{ + TSPoint{0, input->content.find("nothing")}, + TSPoint{0, input->content.find("}")} + }, + }))); + + // Replace `nothing` with `null` again + ranges = get_ranges([&]() { + return input->undo(); + }); + + AssertThat(ranges, Equals(vector({ + TSRange{ + TSPoint{0, input->content.find("null")}, + TSPoint{0, input->content.find("}")} + }, + }))); + }); + + it("reports changes when tokens have been appended", [&]() { + // Add a second key-value pair + auto ranges = get_ranges([&]() { + return input->replace(input->content.find("}"), 0, ", b: false"); + }); + + AssertThat(ranges, Equals(vector({ + TSRange{ + TSPoint{0, input->content.find(",")}, + TSPoint{0, input->content.find("}")}, + }, + }))); + + // Add a third key-value pair in between the first two + ranges = get_ranges([&]() { + return input->replace(input->content.find(", b"), 0, ", c: 1"); + }); + + assert_node_string_equals( + ts_document_root_node(doc), + "(program (expression_statement (object " + "(pair (identifier) (null)) " + "(pair (identifier) (number)) " + "(pair (identifier) (false)))))"); + + AssertThat(ranges, Equals(vector({ + TSRange{ + TSPoint{0, input->content.find(", c")}, + TSPoint{0, input->content.find(", b")}, + }, + }))); + + // Delete the middle pair. + ranges = get_ranges([&]() { + return input->undo(); + }); + + assert_node_string_equals( + ts_document_root_node(doc), + "(program (expression_statement (object " + "(pair (identifier) (null)) " + "(pair (identifier) (false)))))"); + + AssertThat(ranges, Equals(vector({ + }))); + + // Delete the second pair. + ranges = get_ranges([&]() { + return input->undo(); + }); + + assert_node_string_equals( + ts_document_root_node(doc), + "(program (expression_statement (object " + "(pair (identifier) (null)))))"); + + AssertThat(ranges, Equals(vector({ + }))); + }); + + it("reports changes when trees have been wrapped", [&]() { + // Wrap the object in an assignment expression. + auto ranges = get_ranges([&]() { + return input->replace(input->content.find("null"), 0, "b === "); + }); + + assert_node_string_equals( + ts_document_root_node(doc), + "(program (expression_statement (object " + "(pair (identifier) (rel_op (identifier) (null))))))"); + + AssertThat(ranges, Equals(vector({ + TSRange{ + TSPoint{0, input->content.find("b ===")}, + TSPoint{0, input->content.find("}")}, + }, + }))); + }); + }); }); END_TEST diff --git a/spec/runtime/node_spec.cc b/spec/runtime/node_spec.cc index 056362a6..085e4d31 100644 --- a/spec/runtime/node_spec.cc +++ b/spec/runtime/node_spec.cc @@ -138,19 +138,8 @@ describe("Node", []() { it("returns an iterator that yields each of the node's symbols", [&]() { const TSLanguage *language = ts_document_language(document); - TSSymbolIterator iterator = ts_node_symbols(array_node); - AssertThat(iterator.done, Equals(false)); - AssertThat(ts_language_symbol_name(language, iterator.value), Equals("array")); - - ts_symbol_iterator_next(&iterator); - AssertThat(iterator.done, Equals(false)); - AssertThat(ts_language_symbol_name(language, iterator.value), Equals("_value")); - - ts_symbol_iterator_next(&iterator); - AssertThat(iterator.done, Equals(true)); - TSNode false_node = ts_node_descendant_for_char_range(array_node, false_index, false_index + 1); - iterator = ts_node_symbols(false_node); + TSSymbolIterator iterator = ts_node_symbols(false_node); AssertThat(iterator.done, Equals(false)); AssertThat(ts_language_symbol_name(language, iterator.value), Equals("false")); diff --git a/spec/runtime/parser_spec.cc b/spec/runtime/parser_spec.cc index 03a27db4..00acdd85 100644 --- a/spec/runtime/parser_spec.cc +++ b/spec/runtime/parser_spec.cc @@ -126,7 +126,6 @@ describe("Parser", [&]() { TSNode error = ts_node_named_child(root, 1); AssertThat(ts_node_type(error, doc), Equals("ERROR")); - AssertThat(get_node_text(error), Equals(", faaaaalse")); AssertThat(ts_node_child_count(error), Equals(2)); TSNode comma = ts_node_child(error, 0); @@ -161,6 +160,15 @@ describe("Parser", [&]() { }); }); + describe("when there is an unexpected string at the end of a token", [&]() { + it("computes the error's size and position correctly", [&]() { + set_text(" [123, \"hi\n, true]"); + + assert_root_node( + "(array (number) (ERROR (UNEXPECTED '\\n')) (true))"); + }); + }); + describe("when there is an unterminated error", [&]() { it("maintains a consistent tree", [&]() { ts_document_set_language(doc, get_test_language("javascript")); @@ -243,7 +251,7 @@ describe("Parser", [&]() { "(identifier) " "(math_op (number) (member_access (identifier) (identifier))))))"); - AssertThat(input->strings_read, Equals(vector({ " + abc.d)", "" }))); + AssertThat(input->strings_read, Equals(vector({ " + abc.d)" }))); }); }); @@ -267,7 +275,7 @@ describe("Parser", [&]() { "(number) " "(math_op (number) (math_op (number) (identifier)))))))"); - AssertThat(input->strings_read, Equals(vector({ "123 || 5 +", "" }))); + AssertThat(input->strings_read, Equals(vector({ "123 || 5 +" }))); }); }); @@ -331,22 +339,6 @@ describe("Parser", [&]() { }); }); - describe("with non-ascii characters", [&]() { - it("inserts the text according to the UTF8 character index", [&]() { - // 'αβδ' + '1' - set_text("'\u03b1\u03b2\u03b4' + '1';"); - - assert_root_node( - "(program (expression_statement (math_op (string) (string))))"); - - // 'αβδ' + 'ψ1' - insert_text(strlen("'abd' + '"), "\u03c8"); - - assert_root_node( - "(program (expression_statement (math_op (string) (string))))"); - }); - }); - describe("into a node containing a extra token", [&]() { it("updates the parse tree", [&]() { set_text("123 *\n" @@ -516,7 +508,6 @@ describe("Parser", [&]() { ts_document_free(doc); doc = nullptr; - AssertThat(record_alloc::outstanding_allocation_indices(), IsEmpty()); } record_alloc::stop(); diff --git a/spec/runtime/stack_spec.cc b/spec/runtime/stack_spec.cc index 4227b049..7738f2fe 100644 --- a/spec/runtime/stack_spec.cc +++ b/spec/runtime/stack_spec.cc @@ -1,5 +1,6 @@ #include "spec_helper.h" #include "helpers/tree_helpers.h" +#include "helpers/point_helpers.h" #include "helpers/record_alloc.h" #include "helpers/stream_methods.h" #include "runtime/stack.h" @@ -19,7 +20,7 @@ enum { }; TSLength operator*(const TSLength &length, size_t factor) { - return {length.bytes * factor, length.chars * factor, 0, length.columns * factor}; + return {length.bytes * factor, length.chars * factor, {0, length.extent.column * factor}}; } void free_slice_array(StackSliceArray *slices) { @@ -69,7 +70,7 @@ describe("Stack", [&]() { Stack *stack; const size_t tree_count = 11; TSTree *trees[tree_count]; - TSLength tree_len = {2, 3, 0, 3}; + TSLength tree_len = {2, 3, {0, 3}}; before_each([&]() { record_alloc::start(); diff --git a/spec/runtime/tree_spec.cc b/spec/runtime/tree_spec.cc index 6420a206..79395d09 100644 --- a/spec/runtime/tree_spec.cc +++ b/spec/runtime/tree_spec.cc @@ -1,8 +1,25 @@ #include "spec_helper.h" #include "helpers/tree_helpers.h" +#include "helpers/point_helpers.h" #include "runtime/tree.h" #include "runtime/length.h" +void assert_consistent(const TSTree *tree) { + if (tree->child_count == 0) + return; + AssertThat(tree->children[0]->padding, Equals(tree->padding)); + + TSLength total_children_size = ts_length_zero(); + for (size_t i = 0; i < tree->child_count; i++) { + TSTree *child = tree->children[i]; + AssertThat(child->context.offset, Equals(total_children_size)); + assert_consistent(child); + total_children_size = ts_length_add(total_children_size, ts_tree_total_size(child)); + } + + AssertThat(total_children_size, Equals(ts_tree_total_size(tree))); +}; + START_TEST enum { @@ -20,8 +37,8 @@ describe("Tree", []() { TSSymbolMetadata invisible = {false, false, false, true}; before_each([&]() { - tree1 = ts_tree_make_leaf(cat, {2, 1, 0, 1}, {5, 4, 0, 4}, visible); - tree2 = ts_tree_make_leaf(cat, {1, 1, 0, 1}, {3, 3, 0, 3}, visible); + tree1 = ts_tree_make_leaf(cat, {2, 1, {0, 1}}, {5, 4, {0, 4}}, visible); + tree2 = ts_tree_make_leaf(cat, {1, 1, {0, 1}}, {3, 3, {0, 3}}, visible); ts_tree_retain(tree1); ts_tree_retain(tree2); @@ -150,77 +167,89 @@ describe("Tree", []() { before_each([&]() { tree = ts_tree_make_node(cat, 3, tree_array({ - ts_tree_make_leaf(dog, {2, 2, 0, 2}, {3, 3, 0, 3}, visible), - ts_tree_make_leaf(eel, {2, 2, 0, 2}, {3, 3, 0, 3}, visible), - ts_tree_make_leaf(fox, {2, 2, 0, 2}, {3, 3, 0, 3}, visible), + ts_tree_make_leaf(dog, {2, 2, {0, 2}}, {3, 3, {0, 3}}, visible), + ts_tree_make_leaf(eel, {2, 2, {0, 2}}, {3, 3, {0, 3}}, visible), + ts_tree_make_leaf(fox, {2, 2, {0, 2}}, {3, 3, {0, 3}}, visible), }), visible); - AssertThat(tree->padding, Equals({2, 2, 0, 2})); - AssertThat(tree->size, Equals({13, 13, 0, 13})); + AssertThat(tree->padding, Equals({2, 2, {0, 2}})); + AssertThat(tree->size, Equals({13, 13, {0, 13}})); }); after_each([&]() { ts_tree_release(tree); }); - auto assert_consistent = [&](const TSTree *tree) { - AssertThat(tree->children[0]->padding, Equals(tree->padding)); - - TSLength total_children_size = ts_length_zero(); - for (size_t i = 0; i < tree->child_count; i++) - total_children_size = ts_length_add(total_children_size, ts_tree_total_size(tree->children[i])); - AssertThat(total_children_size, Equals(ts_tree_total_size(tree))); - }; describe("edits within a tree's padding", [&]() { it("resizes the padding of the tree and its leftmost descendants", [&]() { - ts_tree_edit(tree, {1, 1, 0}); - + TSInputEdit edit; + edit.start_byte = 1; + edit.bytes_removed = 0; + edit.bytes_added = 1; + edit.start_point = {0, 1}; + edit.extent_removed = {0, 0}; + edit.extent_added = {0, 1}; + ts_tree_edit(tree, &edit); assert_consistent(tree); AssertThat(tree->has_changes, IsTrue()); - AssertThat(tree->padding, Equals({0, 3, 0, 0})); - AssertThat(tree->size, Equals({13, 13, 0, 13})); + AssertThat(tree->padding, Equals({3, 0, {0, 3}})); + AssertThat(tree->size, Equals({13, 13, {0, 13}})); AssertThat(tree->children[0]->has_changes, IsTrue()); - AssertThat(tree->children[0]->padding, Equals({0, 3, 0, 0})); - AssertThat(tree->children[0]->size, Equals({3, 3, 0, 3})); + AssertThat(tree->children[0]->padding, Equals({3, 0, {0, 3}})); + AssertThat(tree->children[0]->size, Equals({3, 3, {0, 3}})); AssertThat(tree->children[1]->has_changes, IsFalse()); - AssertThat(tree->children[1]->padding, Equals({2, 2, 0, 2})); - AssertThat(tree->children[1]->size, Equals({3, 3, 0, 3})); + AssertThat(tree->children[1]->padding, Equals({2, 2, {0, 2}})); + AssertThat(tree->children[1]->size, Equals({3, 3, {0, 3}})); }); }); describe("edits that start in a tree's padding but extend into its content", [&]() { it("shrinks the content to compensate for the expanded padding", [&]() { - ts_tree_edit(tree, {1, 4, 3}); - + TSInputEdit edit; + edit.start_byte = 1; + edit.bytes_removed = 3; + edit.bytes_added = 4; + edit.start_point = {0, 1}; + edit.extent_removed = {0, 3}; + edit.extent_added = {0, 4}; + ts_tree_edit(tree, &edit); assert_consistent(tree); AssertThat(tree->has_changes, IsTrue()); - AssertThat(tree->padding, Equals({0, 5, 0, 0})); - AssertThat(tree->size, Equals({0, 11, 0, 0})); + AssertThat(tree->padding, Equals({5, 0, {0, 5}})); + AssertThat(tree->size, Equals({11, 0, {0, 11}})); AssertThat(tree->children[0]->has_changes, IsTrue()); - AssertThat(tree->children[0]->padding, Equals({0, 5, 0, 0})); - AssertThat(tree->children[0]->size, Equals({0, 1, 0, 0})); + AssertThat(tree->children[0]->padding, Equals({5, 0, {0, 5}})); + AssertThat(tree->children[0]->size, Equals({1, 0, {0, 1}})); }); }); describe("insertions at the edge of a tree's padding", [&]() { it("expands the tree's padding", [&]() { - ts_tree_edit(tree, {2, 2, 0}); + TSInputEdit edit; + edit.start_byte = 2; + edit.bytes_removed = 0; + edit.bytes_added = 2; + edit.start_point = {0, 2}; + edit.extent_removed = {0, 0}; + edit.extent_added = {0, 2}; + ts_tree_edit(tree, &edit); + assert_consistent(tree); assert_consistent(tree); AssertThat(tree->has_changes, IsTrue()); - AssertThat(tree->padding, Equals({0, 4, 0, 0})); - AssertThat(tree->size, Equals({13, 13, 0, 13})); + AssertThat(tree->padding, Equals({4, 0, {0, 4}})); + AssertThat(tree->size, Equals({13, 13, {0, 13}})); AssertThat(tree->children[0]->has_changes, IsTrue()); - AssertThat(tree->children[0]->padding, Equals({0, 4, 0, 0})); - AssertThat(tree->children[0]->size, Equals({3, 3, 0, 3})); + AssertThat(tree->children[0]->padding, Equals({4, 0, {0, 4}})); + AssertThat(tree->children[0]->size, Equals({3, 3, {0, 3}})); AssertThat(tree->children[1]->has_changes, IsFalse()); }); @@ -228,17 +257,23 @@ describe("Tree", []() { describe("replacements starting at the edge of a tree's padding", [&]() { it("resizes the content and not the padding", [&]() { - ts_tree_edit(tree, {2, 5, 2}); - + TSInputEdit edit; + edit.start_byte = 2; + edit.bytes_removed = 2; + edit.bytes_added = 5; + edit.start_point = {0, 2}; + edit.extent_removed = {0, 2}; + edit.extent_added = {0, 5}; + ts_tree_edit(tree, &edit); assert_consistent(tree); AssertThat(tree->has_changes, IsTrue()); - AssertThat(tree->padding, Equals({2, 2, 0, 2})); - AssertThat(tree->size, Equals({0, 16, 0, 0})); + AssertThat(tree->padding, Equals({2, 2, {0, 2}})); + AssertThat(tree->size, Equals({16, 0, {0, 16}})); AssertThat(tree->children[0]->has_changes, IsTrue()); - AssertThat(tree->children[0]->padding, Equals({2, 2, 0, 2})); - AssertThat(tree->children[0]->size, Equals({0, 6, 0, 0})); + AssertThat(tree->children[0]->padding, Equals({2, 2, {0, 2}})); + AssertThat(tree->children[0]->size, Equals({6, 0, {0, 6}})); AssertThat(tree->children[1]->has_changes, IsFalse()); }); @@ -246,35 +281,43 @@ describe("Tree", []() { describe("deletions that span more than one child node", [&]() { it("shrinks subsequent child nodes", [&]() { - ts_tree_edit(tree, {1, 3, 10}); + TSInputEdit edit; + edit.start_byte = 1; + edit.bytes_removed = 10; + edit.bytes_added = 3; + edit.start_point = {0, 1}; + edit.extent_removed = {0, 10}; + edit.extent_added = {0, 3}; + ts_tree_edit(tree, &edit); + assert_consistent(tree); assert_consistent(tree); AssertThat(tree->has_changes, IsTrue()); - AssertThat(tree->padding, Equals({0, 4, 0, 0})); - AssertThat(tree->size, Equals({0, 4, 0, 0})); + AssertThat(tree->padding, Equals({4, 0, {0, 4}})); + AssertThat(tree->size, Equals({4, 0, {0, 4}})); AssertThat(tree->children[0]->has_changes, IsTrue()); - AssertThat(tree->children[0]->padding, Equals({0, 4, 0, 0})); - AssertThat(tree->children[0]->size, Equals({0, 0, 0, 0})); + AssertThat(tree->children[0]->padding, Equals({4, 0, {0, 4}})); + AssertThat(tree->children[0]->size, Equals({0, 0, {0, 0}})); AssertThat(tree->children[1]->has_changes, IsTrue()); - AssertThat(tree->children[1]->padding, Equals({0, 0, 0, 0})); - AssertThat(tree->children[1]->size, Equals({0, 0, 0, 0})); + AssertThat(tree->children[1]->padding, Equals({0, 0, {0, 0}})); + AssertThat(tree->children[1]->size, Equals({0, 0, {0, 0}})); AssertThat(tree->children[2]->has_changes, IsTrue()); - AssertThat(tree->children[2]->padding, Equals({0, 1, 0, 0})); - AssertThat(tree->children[2]->size, Equals({3, 3, 0, 3})); + AssertThat(tree->children[2]->padding, Equals({1, 0, {0, 1}})); + AssertThat(tree->children[2]->size, Equals({3, 3, {0, 3}})); }); }); }); describe("equality", [&]() { it("returns true for identical trees", [&]() { - TSTree *tree1_copy = ts_tree_make_leaf(cat, {2, 1, 1, 1}, {5, 4, 1, 4}, visible); + TSTree *tree1_copy = ts_tree_make_leaf(cat, {2, 1, {1, 1}}, {5, 4, {1, 4}}, visible); AssertThat(ts_tree_eq(tree1, tree1_copy), IsTrue()); - TSTree *tree2_copy = ts_tree_make_leaf(cat, {1, 1, 0, 1}, {3, 3, 0, 3}, visible); + TSTree *tree2_copy = ts_tree_make_leaf(cat, {1, 1, {0, 1}}, {3, 3, {0, 3}}, visible); AssertThat(ts_tree_eq(tree2, tree2_copy), IsTrue()); TSTree *parent2 = ts_tree_make_node(dog, 2, tree_array({ @@ -305,11 +348,11 @@ describe("Tree", []() { }); it("returns false for trees with different sizes", [&]() { - TSTree *tree1_copy = ts_tree_make_leaf(cat, {2, 1, 0, 1}, tree1->size, invisible); + TSTree *tree1_copy = ts_tree_make_leaf(cat, {2, 1, {0, 1}}, tree1->size, invisible); AssertThat(ts_tree_eq(tree1, tree1_copy), IsFalse()); ts_tree_release(tree1_copy); - tree1_copy = ts_tree_make_leaf(cat, tree1->padding, {5, 4, 1, 10}, invisible); + tree1_copy = ts_tree_make_leaf(cat, tree1->padding, {5, 4, {1, 10}}, invisible); AssertThat(ts_tree_eq(tree1, tree1_copy), IsFalse()); ts_tree_release(tree1_copy); }); diff --git a/src/runtime/document.c b/src/runtime/document.c index ae0537c0..2ccb8d6f 100644 --- a/src/runtime/document.c +++ b/src/runtime/document.c @@ -4,6 +4,7 @@ #include "runtime/parser.h" #include "runtime/string_input.h" #include "runtime/document.h" +#include "runtime/tree_path.h" TSDocument *ts_document_new() { TSDocument *self = ts_calloc(1, sizeof(TSDocument)); @@ -79,16 +80,20 @@ void ts_document_edit(TSDocument *self, TSInputEdit edit) { if (!self->tree) return; - size_t max_chars = ts_tree_total_chars(self->tree); - if (edit.position > max_chars) - edit.position = max_chars; - if (edit.chars_removed > max_chars - edit.position) - edit.chars_removed = max_chars - edit.position; + size_t max_bytes = ts_tree_total_bytes(self->tree); + if (edit.start_byte > max_bytes) + return; + if (edit.bytes_removed > max_bytes - edit.start_byte) + edit.bytes_removed = max_bytes - edit.start_byte; - ts_tree_edit(self->tree, edit); + ts_tree_edit(self->tree, &edit); } -int ts_document_parse(TSDocument *self) { +int ts_document_parse_and_get_changed_ranges(TSDocument *self, TSRange **ranges, + size_t *range_count) { + if (ranges) *ranges = NULL; + if (range_count) *range_count = 0; + if (!self->input.read || !self->parser.language) return -1; @@ -100,14 +105,31 @@ int ts_document_parse(TSDocument *self) { if (!tree) return -1; - if (self->tree) - ts_tree_release(self->tree); + if (self->tree) { + TSTree *old_tree = self->tree; + self->tree = tree; + + if (ranges && range_count) { + tree_path_init(&self->parser.tree_path1, old_tree); + tree_path_init(&self->parser.tree_path2, tree); + if (!tree_path_get_changes(&self->parser.tree_path1, + &self->parser.tree_path2, ranges, range_count)) + return -1; + } + + ts_tree_release(old_tree); + } + self->tree = tree; self->parse_count++; self->valid = true; return 0; } +int ts_document_parse(TSDocument *self) { + return ts_document_parse_and_get_changed_ranges(self, NULL, NULL); +} + void ts_document_invalidate(TSDocument *self) { self->valid = false; } diff --git a/src/runtime/length.h b/src/runtime/length.h index 398ef418..9df859d8 100644 --- a/src/runtime/length.h +++ b/src/runtime/length.h @@ -3,22 +3,21 @@ #include #include +#include "runtime/point.h" +#include "tree_sitter/runtime.h" typedef struct { size_t bytes; size_t chars; - size_t rows; - size_t columns; + TSPoint extent; } TSLength; -static inline bool ts_length_is_unknown(TSLength self) { - return self.chars > 0 && self.bytes == 0; +static inline bool ts_length_has_unknown_chars(TSLength self) { + return self.bytes > 0 && self.chars == 0; } -static inline void ts_length_set_unknown(TSLength *self) { - self->bytes = 0; - self->rows = 0; - self->columns = 0; +static inline void ts_length_set_unknown_chars(TSLength *self) { + self->chars = 0; } static inline TSLength ts_length_min(TSLength len1, TSLength len2) { @@ -28,20 +27,13 @@ static inline TSLength ts_length_min(TSLength len1, TSLength len2) { static inline TSLength ts_length_add(TSLength len1, TSLength len2) { TSLength result; result.chars = len1.chars + len2.chars; + result.bytes = len1.bytes + len2.bytes; + result.extent = ts_point_add(len1.extent, len2.extent); - if (ts_length_is_unknown(len1) || ts_length_is_unknown(len2)) { - result.bytes = 0; - result.rows = 0; - result.columns = result.chars; + if (ts_length_has_unknown_chars(len1) || ts_length_has_unknown_chars(len2)) { + result.chars = 0; } else { - result.bytes = len1.bytes + len2.bytes; - if (len2.rows == 0) { - result.rows = len1.rows; - result.columns = len1.columns + len2.columns; - } else { - result.rows = len1.rows + len2.rows; - result.columns = len2.columns; - } + result.chars = len1.chars + len2.chars; } return result; @@ -49,33 +41,26 @@ static inline TSLength ts_length_add(TSLength len1, TSLength len2) { static inline TSLength ts_length_sub(TSLength len1, TSLength len2) { TSLength result; - result.chars = len1.chars - len2.chars; + result.bytes = len1.bytes - len2.bytes; + result.extent = ts_point_sub(len1.extent, len2.extent); - if (ts_length_is_unknown(len1) || ts_length_is_unknown(len2)) { - result.bytes = 0; - result.rows = 0; - result.columns = result.chars; + if (ts_length_has_unknown_chars(len1) || ts_length_has_unknown_chars(len2)) { + result.chars = 0; } else { - result.bytes = len1.bytes - len2.bytes; - if (len1.rows == len2.rows) { - result.rows = 0; - result.columns = len1.columns - len2.columns; - } else { - result.rows = len1.rows - len2.rows; - result.columns = len1.columns; - } + result.chars = len1.chars - len2.chars; } return result; } static inline TSLength ts_length_zero() { - return (TSLength){ 0, 0, 0, 0 }; + return (TSLength){ 0, 0, {0, 0} }; } static inline bool ts_length_eq(TSLength self, TSLength other) { return self.bytes == other.bytes && self.chars == other.chars && - self.rows == other.rows && self.columns == other.columns; + self.extent.row == other.extent.row && + self.extent.column == other.extent.column; } #endif diff --git a/src/runtime/lexer.c b/src/runtime/lexer.c index 6296c6d5..f8a023f2 100644 --- a/src/runtime/lexer.c +++ b/src/runtime/lexer.c @@ -55,10 +55,10 @@ static void ts_lexer__advance(void *payload, TSStateId state, bool skip) { self->current_position.bytes += self->lookahead_size; self->current_position.chars++; if (self->data.lookahead == '\n') { - self->current_position.rows++; - self->current_position.columns = 0; + self->current_position.extent.row++; + self->current_position.extent.column = 0; } else { - self->current_position.columns++; + self->current_position.extent.column++; } } diff --git a/src/runtime/node.c b/src/runtime/node.c index e51350e2..ea767ffc 100644 --- a/src/runtime/node.c +++ b/src/runtime/node.c @@ -48,7 +48,7 @@ static inline TSNode ts_node__direct_parent(TSNode self, size_t *index) { return ts_node_make(tree->context.parent, ts_node__offset_char(self) - tree->context.offset.chars, ts_node__offset_byte(self) - tree->context.offset.bytes, - ts_node__offset_row(self) - tree->context.offset.rows); + ts_node__offset_row(self) - tree->context.offset.extent.row); } static inline TSNode ts_node__direct_child(TSNode self, size_t i) { @@ -56,7 +56,7 @@ static inline TSNode ts_node__direct_child(TSNode self, size_t i) { return ts_node_make( child_tree, ts_node__offset_char(self) + child_tree->context.offset.chars, ts_node__offset_byte(self) + child_tree->context.offset.bytes, - ts_node__offset_row(self) + child_tree->context.offset.rows); + ts_node__offset_row(self) + child_tree->context.offset.extent.row); } static inline TSNode ts_node__child(TSNode self, size_t child_index, @@ -244,14 +244,14 @@ size_t ts_node_end_byte(TSNode self) { TSPoint ts_node_start_point(TSNode self) { const TSTree *tree = ts_node__tree(self); - return (TSPoint){ ts_node__offset_row(self) + tree->padding.rows, + return (TSPoint){ ts_node__offset_row(self) + tree->padding.extent.row, ts_tree_start_column(tree) }; } TSPoint ts_node_end_point(TSNode self) { const TSTree *tree = ts_node__tree(self); - return (TSPoint){ ts_node__offset_row(self) + tree->padding.rows + - tree->size.rows, + return (TSPoint){ ts_node__offset_row(self) + tree->padding.extent.row + + tree->size.extent.row, ts_tree_end_column(tree) }; } diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 03426f42..8040f5af 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -144,7 +144,7 @@ error: } static void parser__pop_reusable_node(ReusableNode *reusable_node) { - reusable_node->char_index += ts_tree_total_chars(reusable_node->tree); + reusable_node->byte_index += ts_tree_total_bytes(reusable_node->tree); while (reusable_node->tree) { TSTree *parent = reusable_node->tree->context.parent; size_t next_index = reusable_node->tree->context.index + 1; @@ -246,7 +246,6 @@ static TSTree *parser__lex(Parser *self, TSStateId parse_state) { TSStateId start_state = self->language->lex_states[parse_state]; TSStateId current_state = start_state; TSLength start_position = self->lexer.current_position; - TSLength position = start_position; LOG("lex state:%d", start_state); bool skipped_error = false; @@ -258,35 +257,32 @@ static TSTree *parser__lex(Parser *self, TSStateId parse_state) { while (!self->language->lex_fn(&self->lexer.data, current_state)) { if (current_state != ERROR_STATE) { LOG("retry_in_error_mode"); - ts_lexer_reset(&self->lexer, position); - ts_lexer_start(&self->lexer, start_state); current_state = ERROR_STATE; + ts_lexer_reset(&self->lexer, start_position); + ts_lexer_start(&self->lexer, current_state); continue; } - if (self->lexer.data.lookahead == 0) { - self->lexer.data.result_symbol = ts_builtin_sym_error; - break; + if (!skipped_error) { + error_start_position = self->lexer.token_start_position; + first_error_character = self->lexer.data.lookahead; } - if (self->lexer.current_position.chars == position.chars) { - if (!skipped_error) { - error_start_position = self->lexer.current_position; - first_error_character = self->lexer.data.lookahead; + if (self->lexer.current_position.bytes == error_end_position.bytes) { + if (self->lexer.data.lookahead == 0) { + self->lexer.data.result_symbol = ts_builtin_sym_error; + break; } - skipped_error = true; self->lexer.data.advance(&self->lexer, ERROR_STATE, false); - error_end_position = self->lexer.current_position; } - position = self->lexer.current_position; + skipped_error = true; + error_end_position = self->lexer.current_position; } TSTree *result; if (skipped_error) { - error_start_position = - ts_length_min(error_start_position, self->lexer.token_start_position); TSLength padding = ts_length_sub(error_start_position, start_position); TSLength size = ts_length_sub(error_end_position, error_start_position); ts_lexer_reset(&self->lexer, error_end_position); @@ -320,15 +316,15 @@ static TSTree *parser__get_lookahead(Parser *self, StackVersion version, TSLength position = ts_stack_top_position(self->stack, version); while (reusable_node->tree) { - if (reusable_node->char_index > position.chars) { + if (reusable_node->byte_index > position.bytes) { LOG("before_reusable sym:%s, pos:%lu", - SYM_NAME(reusable_node->tree->symbol), reusable_node->char_index); + SYM_NAME(reusable_node->tree->symbol), reusable_node->byte_index); break; } - if (reusable_node->char_index < position.chars) { + if (reusable_node->byte_index < position.bytes) { LOG("past_reusable sym:%s, pos:%lu", - SYM_NAME(reusable_node->tree->symbol), reusable_node->char_index); + SYM_NAME(reusable_node->tree->symbol), reusable_node->byte_index); parser__pop_reusable_node(reusable_node); continue; } @@ -336,7 +332,7 @@ static TSTree *parser__get_lookahead(Parser *self, StackVersion version, if (reusable_node->tree->has_changes) { LOG("cant_reuse_changed tree:%s, size:%lu", SYM_NAME(reusable_node->tree->symbol), - reusable_node->tree->size.chars); + reusable_node->tree->size.bytes); if (!parser__breakdown_reusable_node(reusable_node)) { parser__pop_reusable_node(reusable_node); CHECK(parser__breakdown_top_of_stack(self, version)); @@ -347,7 +343,7 @@ static TSTree *parser__get_lookahead(Parser *self, StackVersion version, if (reusable_node->tree->symbol == ts_builtin_sym_error) { LOG("cant_reuse_error tree:%s, size:%lu", SYM_NAME(reusable_node->tree->symbol), - reusable_node->tree->size.chars); + reusable_node->tree->size.bytes); if (!parser__breakdown_reusable_node(reusable_node)) { parser__pop_reusable_node(reusable_node); CHECK(parser__breakdown_top_of_stack(self, version)); @@ -360,7 +356,7 @@ static TSTree *parser__get_lookahead(Parser *self, StackVersion version, return result; } - if (self->cached_token && position.chars == self->cached_token_char_index) { + if (self->cached_token && position.bytes == self->cached_token_byte_index) { ts_tree_retain(self->cached_token); return self->cached_token; } @@ -808,7 +804,10 @@ static void parser__start(Parser *self, TSInput input, TSTree *previous_tree) { self->finished_tree = NULL; } -static bool parser__accept(Parser *self, StackVersion version) { +static bool parser__accept(Parser *self, StackVersion version, TSTree *lookahead) { + lookahead->extra = true; + assert(lookahead->symbol == ts_builtin_sym_end); + CHECK(ts_stack_push(self->stack, version, lookahead, false, 1)); StackPopResult pop = ts_stack_pop_all(self->stack, version); CHECK(pop.status); CHECK(pop.slices.size); @@ -825,7 +824,7 @@ static bool parser__accept(Parser *self, StackVersion version) { for (size_t j = trees.size - 1; j + 1 > 0; j--) { TSTree *child = trees.contents[j]; if (!child->extra) { - root = ts_tree_make_copy(child); + CHECK(root = ts_tree_make_copy(child)); root->child_count = 0; for (size_t k = 0; k < child->child_count; k++) ts_tree_retain(child->children[k]); @@ -852,6 +851,8 @@ static bool parser__accept(Parser *self, StackVersion version) { return true; error: + for (size_t i = 0; i < pop.slices.size; i++) + ts_tree_array_delete(&pop.slices.contents[i].trees); return false; } @@ -1025,7 +1026,7 @@ static bool parser__recover(Parser *self, StackVersion version, TSStateId state, TreeArray children = array_new(); TSTree *parent = ts_tree_make_error_node(&children); CHECK(parser__push(self, version, parent, 1)); - return parser__accept(self, version); + return parser__accept(self, version, lookahead); } LOG("recover state:%u", state); @@ -1077,7 +1078,7 @@ static bool parser__advance(Parser *self, StackVersion version, validated_lookahead = true; LOG("lookahead sym:%s, size:%lu", SYM_NAME(lookahead->symbol), - lookahead->size.chars); + lookahead->size.bytes); } bool reduction_stopped_at_error = false; @@ -1163,7 +1164,7 @@ static bool parser__advance(Parser *self, StackVersion version, continue; LOG("accept"); - CHECK(parser__accept(self, version)); + CHECK(parser__accept(self, version, lookahead)); ts_tree_release(lookahead); return true; @@ -1228,6 +1229,8 @@ bool parser_init(Parser *self) { self->finished_tree = NULL; self->stack = NULL; array_init(&self->reduce_actions); + array_init(&self->tree_path1); + array_init(&self->tree_path2); self->stack = ts_stack_new(); if (!self->stack) @@ -1253,6 +1256,10 @@ void parser_destroy(Parser *self) { ts_stack_delete(self->stack); if (self->reduce_actions.contents) array_delete(&self->reduce_actions); + if (self->tree_path1.contents) + array_delete(&self->tree_path1); + if (self->tree_path2.contents) + array_delete(&self->tree_path2); } TSTree *parser_parse(Parser *self, TSInput input, TSTree *old_tree) { @@ -1276,8 +1283,8 @@ TSTree *parser_parse(Parser *self, TSInput input, TSTree *old_tree) { LOG("process version:%d, version_count:%lu, state:%d, row:%lu, col:%lu", version, ts_stack_version_count(self->stack), ts_stack_top_state(self->stack, version), - ts_stack_top_position(self->stack, version).rows + 1, - ts_stack_top_position(self->stack, version).columns + 1); + ts_stack_top_position(self->stack, version).extent.row + 1, + ts_stack_top_position(self->stack, version).extent.column + 1); CHECK(parser__advance(self, version, &reusable_node)); LOG_STACK(); @@ -1298,7 +1305,7 @@ TSTree *parser_parse(Parser *self, TSInput input, TSTree *old_tree) { LOG_TREE(); ts_stack_clear(self->stack); parser__clear_cached_token(self); - ts_tree_assign_parents(self->finished_tree); + CHECK(ts_tree_assign_parents(self->finished_tree, &self->tree_path1)); return self->finished_tree; error: diff --git a/src/runtime/parser.h b/src/runtime/parser.h index cce04393..c27e652e 100644 --- a/src/runtime/parser.h +++ b/src/runtime/parser.h @@ -12,7 +12,7 @@ extern "C" { typedef struct { TSTree *tree; - size_t char_index; + size_t byte_index; } ReusableNode; typedef struct { @@ -25,8 +25,10 @@ typedef struct { bool print_debugging_graphs; TSTree scratch_tree; TSTree *cached_token; - size_t cached_token_char_index; + size_t cached_token_byte_index; ReusableNode reusable_node; + TreePath tree_path1; + TreePath tree_path2; } Parser; bool parser_init(Parser *); diff --git a/src/runtime/point.h b/src/runtime/point.h new file mode 100644 index 00000000..8909a758 --- /dev/null +++ b/src/runtime/point.h @@ -0,0 +1,46 @@ +#ifndef RUNTIME_POINT_H_ +#define RUNTIME_POINT_H_ + +#include "tree_sitter/runtime.h" + +static inline TSPoint ts_point_add(TSPoint a, TSPoint b) { + if (b.row > 0) + return (TSPoint){a.row + b.row, b.column}; + else + return (TSPoint){a.row, a.column + b.column}; +} + +static inline TSPoint ts_point_sub(TSPoint a, TSPoint b) { + if (a.row > b.row) + return (TSPoint){a.row - b.row, a.column}; + else + return (TSPoint){0, a.column - b.column}; +} + +static inline bool ts_point_lte(TSPoint a, TSPoint b) { + return (a.row < b.row) || (a.row == b.row && a.column <= b.column); +} + +static inline bool ts_point_lt(TSPoint a, TSPoint b) { + return (a.row < b.row) || (a.row == b.row && a.column < b.column); +} + +static inline bool ts_point_eq(TSPoint a, TSPoint b) { + return a.row == b.row && a.column == b.column; +} + +static inline TSPoint ts_point_min(TSPoint a, TSPoint b) { + if (a.row < b.row || (a.row == b.row && a.column < b.column)) + return a; + else + return b; +} + +static inline TSPoint ts_point_max(TSPoint a, TSPoint b) { + if (a.row > b.row || (a.row == b.row && a.column > b.column)) + return a; + else + return b; +} + +#endif diff --git a/src/runtime/stack.c b/src/runtime/stack.c index f3fa3bfa..c1863a47 100644 --- a/src/runtime/stack.c +++ b/src/runtime/stack.c @@ -122,7 +122,7 @@ static StackNode *stack_node_new(StackNode *next, TSTree *tree, bool is_pending, ERROR_COST_PER_SKIPPED_CHAR * (tree->padding.chars + tree->size.chars) + ERROR_COST_PER_SKIPPED_LINE * - (tree->padding.rows + tree->size.rows); + (tree->padding.extent.row + tree->size.extent.row); } } } else { @@ -600,11 +600,10 @@ bool ts_stack_print_dot_graph(Stack *self, const char **symbol_names, FILE *f) { else fprintf(f, "label=\"%d\"", node->state); - fprintf( - f, - " tooltip=\"position: %lu,%lu\nerror_count: %u\nerror_cost: %u\"];\n", - node->position.rows, node->position.columns, node->error_count, - node->error_cost); + fprintf(f, + " tooltip=\"position: %lu,%lu\nerror_count: %u\nerror_cost: %u\"];\n", + node->position.extent.row, node->position.extent.column, node->error_count, + node->error_cost); for (int j = 0; j < node->link_count; j++) { StackLink link = node->links[j]; diff --git a/src/runtime/tree.c b/src/runtime/tree.c index a8d08a3b..2f0760c6 100644 --- a/src/runtime/tree.c +++ b/src/runtime/tree.c @@ -28,6 +28,7 @@ TSTree *ts_tree_make_leaf(TSSymbol sym, TSLength padding, TSLength size, .visible = metadata.visible, .named = metadata.named, .first_leaf.symbol = sym, + .has_changes = false, }; return result; @@ -90,28 +91,29 @@ TSTree *ts_tree_make_copy(TSTree *self) { return result; } -void ts_tree_assign_parents(TSTree *self) { - TSLength offset; - -recur: - offset = ts_length_zero(); - for (size_t i = 0; i < self->child_count; i++) { - TSTree *child = self->children[i]; - if (child->context.parent != self || child->context.index != i) { - child->context.parent = self; - child->context.index = i; - child->context.offset = offset; - if (i == self->child_count - 1) { - self = child; - goto recur; +bool ts_tree_assign_parents(TSTree *self, TreePath *path) { + array_clear(path); + if (!array_push(path, ((TreePathEntry){self, ts_length_zero(), 0}))) + return false; + while (path->size > 0) { + TSTree *tree = array_pop(path).tree; + TSLength offset = ts_length_zero(); + for (size_t i = 0; i < tree->child_count; i++) { + TSTree *child = tree->children[i]; + if (child->context.parent != tree || child->context.index != i) { + child->context.parent = tree; + child->context.index = i; + child->context.offset = offset; + if (!array_push(path, ((TreePathEntry){child, ts_length_zero(), 0}))) + return false; } - - ts_tree_assign_parents(child); + offset = ts_length_add(offset, ts_tree_total_size(child)); } - offset = ts_length_add(offset, ts_tree_total_size(child)); } + return true; } + void ts_tree_set_children(TSTree *self, size_t child_count, TSTree **children) { if (self->child_count > 0) ts_free(self->children); @@ -151,7 +153,7 @@ void ts_tree_set_children(TSTree *self, size_t child_count, TSTree **children) { if (self->symbol == ts_builtin_sym_error) { self->error_cost += ERROR_COST_PER_SKIPPED_CHAR * self->size.chars + - ERROR_COST_PER_SKIPPED_LINE * self->size.rows; + ERROR_COST_PER_SKIPPED_LINE * self->size.extent.row; for (size_t i = 0; i < child_count; i++) if (!self->children[i]->extra) self->error_cost += ERROR_COST_PER_SKIPPED_TREE; @@ -232,20 +234,20 @@ recur: } size_t ts_tree_start_column(const TSTree *self) { - size_t column = self->padding.columns; - if (self->padding.rows > 0) + size_t column = self->padding.extent.column; + if (self->padding.extent.row > 0) return column; for (const TSTree *tree = self; tree != NULL; tree = tree->context.parent) { - column += tree->context.offset.columns; - if (tree->context.offset.rows > 0) + column += tree->context.offset.extent.column; + if (tree->context.offset.extent.row > 0) break; } return column; } size_t ts_tree_end_column(const TSTree *self) { - size_t result = self->size.columns; - if (self->size.rows == 0) + size_t result = self->size.extent.column; + if (self->size.extent.row == 0) result += ts_tree_start_column(self); return result; } @@ -306,71 +308,103 @@ static inline long min(long a, long b) { return a <= b ? a : b; } -void ts_tree_edit(TSTree *self, TSInputEdit edit) { - size_t start = edit.position; - size_t new_end = edit.position + edit.chars_inserted; - size_t old_end = edit.position + edit.chars_removed; - assert(old_end <= ts_tree_total_chars(self)); + +void ts_tree_edit(TSTree *self, const TSInputEdit *edit) { + size_t old_end_byte = edit->start_byte + edit->bytes_removed; + size_t new_end_byte = edit->start_byte + edit->bytes_added; + TSPoint old_end_point = ts_point_add(edit->start_point, edit->extent_removed); + TSPoint new_end_point = ts_point_add(edit->start_point, edit->extent_added); + + assert(old_end_byte <= ts_tree_total_bytes(self)); self->has_changes = true; - if (start < self->padding.chars) { - ts_length_set_unknown(&self->padding); - long remaining_padding = self->padding.chars - old_end; - if (remaining_padding >= 0) { - self->padding.chars = new_end + remaining_padding; + if (edit->start_byte < self->padding.bytes) { + ts_length_set_unknown_chars(&self->padding); + if (self->padding.bytes >= old_end_byte) { + size_t trailing_padding_bytes = self->padding.bytes - old_end_byte; + TSPoint trailing_padding_extent = ts_point_sub(self->padding.extent, old_end_point); + self->padding.bytes = new_end_byte + trailing_padding_bytes; + self->padding.extent = ts_point_add(new_end_point, trailing_padding_extent); } else { - self->padding.chars = new_end; - self->size.chars += remaining_padding; - ts_length_set_unknown(&self->size); + ts_length_set_unknown_chars(&self->size); + size_t removed_content_bytes = old_end_byte - self->padding.bytes; + TSPoint removed_content_extent = ts_point_sub(old_end_point, self->padding.extent); + self->size.bytes = self->size.bytes - removed_content_bytes; + self->size.extent = ts_point_sub(self->size.extent, removed_content_extent); + self->padding.bytes = new_end_byte; + self->padding.extent = new_end_point; } - } else if (start == self->padding.chars && edit.chars_removed == 0) { - self->padding.chars += edit.chars_inserted; - ts_length_set_unknown(&self->padding); + } else if (edit->start_byte == self->padding.bytes && edit->bytes_removed == 0) { + ts_length_set_unknown_chars(&self->padding); + self->padding.bytes = self->padding.bytes + edit->bytes_added; + self->padding.extent = ts_point_add(self->padding.extent, edit->extent_added); } else { - self->size.chars += (edit.chars_inserted - edit.chars_removed); - ts_length_set_unknown(&self->size); + ts_length_set_unknown_chars(&self->size); + size_t trailing_content_bytes = ts_tree_total_bytes(self) - old_end_byte; + TSPoint trailing_content_extent = ts_point_sub(ts_tree_total_extent(self), old_end_point); + self->size.bytes = new_end_byte + trailing_content_bytes - self->padding.bytes; + self->size.extent = ts_point_sub(ts_point_add(new_end_point, trailing_content_extent), self->padding.extent); } bool found_first_child = false; - long remainder_to_delete = edit.chars_removed - edit.chars_inserted; - size_t child_left = 0, child_right = 0; + long remaining_bytes_to_delete = 0; + TSPoint remaining_extent_to_delete = {0, 0}; + TSLength child_left, child_right = ts_length_zero(); for (size_t i = 0; i < self->child_count; i++) { TSTree *child = self->children[i]; - size_t child_size = ts_tree_total_chars(child); child_left = child_right; - child_right += child_size; if (!found_first_child) { - if (child_right >= start) { + child_right = ts_length_add(child_left, ts_tree_total_size(child)); + if (child_right.bytes >= edit->start_byte) { found_first_child = true; - size_t chars_removed = min(edit.chars_removed, child_right - start); - remainder_to_delete -= (chars_removed - edit.chars_inserted); - ts_tree_edit(child, (TSInputEdit){ - .position = start - child_left, - .chars_inserted = edit.chars_inserted, - .chars_removed = chars_removed, - }); - } - } else { - if (remainder_to_delete > 0) { - size_t chars_removed = min(remainder_to_delete, child_size); - remainder_to_delete -= chars_removed; - ts_tree_edit( - child, - (TSInputEdit){ - .position = 0, .chars_inserted = 0, .chars_removed = chars_removed, - }); - } else { - break; + TSInputEdit child_edit = { + .start_byte = edit->start_byte - child_left.bytes, + .bytes_added = edit->bytes_added, + .bytes_removed = edit->bytes_removed, + .start_point = ts_point_sub(edit->start_point, child_left.extent), + .extent_added = edit->extent_added, + .extent_removed = edit->extent_removed, + }; + + if (old_end_byte > child_right.bytes) { + child_edit.bytes_removed = child_right.bytes - edit->start_byte; + child_edit.extent_removed = ts_point_sub(child_right.extent, edit->start_point); + remaining_bytes_to_delete = old_end_byte - child_right.bytes; + remaining_extent_to_delete = ts_point_sub(old_end_point, child_right.extent); + } + + ts_tree_edit(child, &child_edit); } + } else if (remaining_bytes_to_delete > 0) { + TSInputEdit child_edit = { + .start_byte = 0, + .bytes_added = 0, + .bytes_removed = min(remaining_bytes_to_delete, ts_tree_total_bytes(child)), + .start_point = {0, 0}, + .extent_added = {0, 0}, + .extent_removed = ts_point_min(remaining_extent_to_delete, ts_tree_total_size(child).extent), + }; + remaining_bytes_to_delete -= child_edit.bytes_removed; + remaining_extent_to_delete = ts_point_sub(remaining_extent_to_delete, child_edit.extent_removed); + ts_tree_edit(child, &child_edit); } + + child_right = ts_length_add(child_left, ts_tree_total_size(child)); + child->context.offset = child_left; } } static size_t ts_tree__write_char_to_string(char *s, size_t n, int32_t c) { if (c == 0) return snprintf(s, n, "EOF"); + else if (c == '\n') + return snprintf(s, n, "'\\n'"); + else if (c == '\t') + return snprintf(s, n, "'\\t'"); + else if (c == '\r') + return snprintf(s, n, "'\\r'"); else if (c < 128) return snprintf(s, n, "'%c'", c); else @@ -425,7 +459,7 @@ char *ts_tree_string(const TSTree *self, const TSLanguage *language, return result; } -void ts_tree__print_dot_graph(const TSTree *self, size_t offset, +void ts_tree__print_dot_graph(const TSTree *self, size_t byte_offset, const TSLanguage *language, FILE *f) { fprintf(f, "tree_%p [label=\"%s\"", self, ts_language_symbol_name(language, self->symbol)); @@ -436,13 +470,13 @@ void ts_tree__print_dot_graph(const TSTree *self, size_t offset, fprintf(f, ", fontcolor=gray"); fprintf(f, ", tooltip=\"range:%lu - %lu\nstate:%d\nerror-cost:%u\"]\n", - offset, offset + ts_tree_total_chars(self), self->parse_state, + byte_offset, byte_offset + ts_tree_total_bytes(self), self->parse_state, self->error_cost); for (size_t i = 0; i < self->child_count; i++) { const TSTree *child = self->children[i]; - ts_tree__print_dot_graph(child, offset, language, f); + ts_tree__print_dot_graph(child, byte_offset, language, f); fprintf(f, "tree_%p -> tree_%p [tooltip=%lu]\n", self, child, i); - offset += ts_tree_total_chars(child); + byte_offset += ts_tree_total_bytes(child); } } diff --git a/src/runtime/tree.h b/src/runtime/tree.h index 34c0ba8c..a274c186 100644 --- a/src/runtime/tree.h +++ b/src/runtime/tree.h @@ -50,7 +50,16 @@ typedef struct TSTree { bool has_changes : 1; } TSTree; +typedef struct { + TSTree *tree; + TSLength position; + size_t child_index; +} TreePathEntry; + typedef Array(TSTree *) TreeArray; + +typedef Array(TreePathEntry) TreePath; + bool ts_tree_array_copy(TreeArray, TreeArray *); void ts_tree_array_delete(TreeArray *); size_t ts_tree_array_essential_count(const TreeArray *); @@ -68,8 +77,8 @@ int ts_tree_compare(const TSTree *tree1, const TSTree *tree2); size_t ts_tree_start_column(const TSTree *self); size_t ts_tree_end_column(const TSTree *self); void ts_tree_set_children(TSTree *, size_t, TSTree **); -void ts_tree_assign_parents(TSTree *); -void ts_tree_edit(TSTree *, TSInputEdit); +bool ts_tree_assign_parents(TSTree *, TreePath *); +void ts_tree_edit(TSTree *, const TSInputEdit *edit); char *ts_tree_string(const TSTree *, const TSLanguage *, bool include_all); void ts_tree_print_dot_graph(const TSTree *, const TSLanguage *, FILE *); @@ -77,10 +86,18 @@ static inline size_t ts_tree_total_chars(const TSTree *self) { return self->padding.chars + self->size.chars; } +static inline size_t ts_tree_total_bytes(const TSTree *self) { + return self->padding.bytes + self->size.bytes; +} + static inline TSLength ts_tree_total_size(const TSTree *self) { return ts_length_add(self->padding, self->size); } +static inline TSPoint ts_tree_total_extent(const TSTree *self) { + return ts_point_add(self->padding.extent, self->size.extent); +} + static inline bool ts_tree_is_fragile(const TSTree *tree) { return tree->fragile_left || tree->fragile_right || ts_tree_total_chars(tree) == 0; diff --git a/src/runtime/tree_path.h b/src/runtime/tree_path.h new file mode 100644 index 00000000..f6bf2366 --- /dev/null +++ b/src/runtime/tree_path.h @@ -0,0 +1,201 @@ +#ifndef RUNTIME_TREE_PATH_H_ +#define RUNTIME_TREE_PATH_H_ + +#include "runtime/tree.h" +#include "runtime/error_costs.h" + +typedef Array(TSRange) RangeArray; + +static bool range_array_add(RangeArray *results, TSPoint start, TSPoint end) { + if (results->size > 0) { + TSRange *last_range = array_back(results); + if (ts_point_lte(start, last_range->end)) { + last_range->end = end; + return true; + } + } + + if (ts_point_lt(start, end)) { + TSRange range = { start, end }; + return array_push(results, range); + } + + return true; +} + +static bool tree_path_descend(TreePath *path, TSPoint position) { + bool did_descend; + do { + did_descend = false; + TreePathEntry entry = *array_back(path); + TSLength child_position = entry.position; + for (size_t i = 0; i < entry.tree->child_count; i++) { + TSTree *child = entry.tree->children[i]; + TSLength child_right_position = + ts_length_add(child_position, ts_tree_total_size(child)); + if (ts_point_lt(position, child_right_position.extent)) { + TreePathEntry child_entry = { child, child_position, i }; + if (child->visible) { + array_push(path, child_entry); + return true; + } else if (child->visible_child_count > 0) { + array_push(path, child_entry); + did_descend = true; + break; + } + } + child_position = child_right_position; + } + } while (did_descend); + return false; +} + +static size_t tree_path_advance(TreePath *path) { + size_t ascend_count = 0; + while (path->size > 0) { + TreePathEntry entry = array_pop(path); + if (path->size == 0) + break; + TreePathEntry parent_entry = *array_back(path); + if (parent_entry.tree->visible) ascend_count++; + TSLength position = + ts_length_add(entry.position, ts_tree_total_size(entry.tree)); + for (size_t i = entry.child_index + 1; i < parent_entry.tree->child_count; i++) { + TSTree *next_child = parent_entry.tree->children[i]; + if (next_child->visible || next_child->visible_child_count > 0) { + if (parent_entry.tree->visible) ascend_count--; + array_push(path, ((TreePathEntry){ + .tree = next_child, + .child_index = i, + .position = position, + })); + if (!next_child->visible) + tree_path_descend(path, (TSPoint){ 0, 0 }); + return ascend_count; + } + position = ts_length_add(position, ts_tree_total_size(next_child)); + } + } + return ascend_count; +} + +static void tree_path_ascend(TreePath *path, size_t count) { + for (size_t i = 0; i < count; i++) { + do { + array_pop(path); + } while (path->size > 0 && !array_back(path)->tree->visible); + } +} + +static void tree_path_init(TreePath *path, TSTree *tree) { + array_clear(path); + array_push(path, + ((TreePathEntry){ + .tree = tree, .position = { 0, 0, { 0, 0 } }, .child_index = 0, + })); + if (!tree->visible) + tree_path_descend(path, (TSPoint){ 0, 0 }); +} + +static bool tree_must_eq(TSTree *old_tree, TSTree *new_tree) { + return old_tree == new_tree || ( + !old_tree->has_changes && + old_tree->symbol == new_tree->symbol && + old_tree->size.bytes == new_tree->size.bytes && + old_tree->parse_state != TS_TREE_STATE_NONE && + new_tree->parse_state != TS_TREE_STATE_NONE && + (old_tree->parse_state == ERROR_STATE) == + (new_tree->parse_state == ERROR_STATE) + ); +} + +static bool tree_path_get_changes(TreePath *old_path, TreePath *new_path, + TSRange **ranges, size_t *range_count) { + TSPoint position = { 0, 0 }; + RangeArray results = array_new(); + + while (old_path->size && new_path->size) { + bool is_changed = false; + TSPoint next_position = position; + + TreePathEntry old_entry = *array_back(old_path); + TreePathEntry new_entry = *array_back(new_path); + TSTree *old_tree = old_entry.tree; + TSTree *new_tree = new_entry.tree; + size_t old_start_byte = old_entry.position.bytes + old_tree->padding.bytes; + size_t new_start_byte = new_entry.position.bytes + new_tree->padding.bytes; + TSPoint old_start_point = + ts_point_add(old_entry.position.extent, old_tree->padding.extent); + TSPoint new_start_point = + ts_point_add(new_entry.position.extent, new_tree->padding.extent); + TSPoint old_end_point = ts_point_add(old_start_point, old_tree->size.extent); + TSPoint new_end_point = ts_point_add(new_start_point, new_tree->size.extent); + + // #define NAME(t) (ts_language_symbol_name(language, ((TSTree *)(t))->symbol)) + // printf("At [%-2lu, %-2lu] Compare (%-20s\t [%-2lu, %-2lu] - [%lu, %lu])\tvs\t(%-20s\t [%lu, %lu] - [%lu, %lu])\n", + // position.row, position.column, NAME(old_tree), old_start_point.row, + // old_start_point.column, old_end_point.row, old_end_point.column, + // NAME(new_tree), new_start_point.row, new_start_point.column, + // new_end_point.row, new_end_point.column); + + if (ts_point_lt(position, old_start_point)) { + if (ts_point_lt(position, new_start_point)) { + next_position = ts_point_min(old_start_point, new_start_point); + } else { + is_changed = true; + next_position = old_start_point; + } + } else if (ts_point_lt(position, new_start_point)) { + is_changed = true; + next_position = new_start_point; + } else if (old_start_byte == new_start_byte && + tree_must_eq(old_tree, new_tree)) { + next_position = old_end_point; + } else if (old_tree->symbol == new_tree->symbol) { + if (tree_path_descend(old_path, position)) { + if (!tree_path_descend(new_path, position)) { + tree_path_ascend(old_path, 1); + is_changed = true; + next_position = new_end_point; + } + } else if (tree_path_descend(new_path, position)) { + tree_path_ascend(new_path, 1); + is_changed = true; + next_position = old_end_point; + } else { + next_position = ts_point_min(old_end_point, new_end_point); + } + } else { + is_changed = true; + next_position = ts_point_min(old_end_point, new_end_point); + } + + bool at_old_end = ts_point_lte(old_end_point, next_position); + bool at_new_end = ts_point_lte(new_end_point, next_position); + + if (at_new_end && at_old_end) { + size_t old_ascend_count = tree_path_advance(old_path); + size_t new_ascend_count = tree_path_advance(new_path); + if (old_ascend_count > new_ascend_count) { + tree_path_ascend(new_path, old_ascend_count - new_ascend_count); + } else if (new_ascend_count > old_ascend_count) { + tree_path_ascend(old_path, new_ascend_count - old_ascend_count); + } + } else if (at_new_end) { + size_t ascend_count = tree_path_advance(new_path); + tree_path_ascend(old_path, ascend_count); + } else if (at_old_end) { + size_t ascend_count = tree_path_advance(old_path); + tree_path_ascend(new_path, ascend_count); + } + + if (is_changed) range_array_add(&results, position, next_position); + position = next_position; + } + + *ranges = results.contents; + *range_count = results.size; + return true; +} + +#endif // RUNTIME_TREE_PATH_H_