diff --git a/include/tree_sitter/runtime.h b/include/tree_sitter/runtime.h index 1d869dc7..c639bd37 100644 --- a/include/tree_sitter/runtime.h +++ b/include/tree_sitter/runtime.h @@ -29,9 +29,8 @@ typedef enum { typedef struct { void *payload; const char *(*read)(void *payload, uint32_t *bytes_read); - int (*seek)(void *payload, uint32_t character_index, uint32_t byte_index); + int (*seek)(void *payload, uint32_t byte_index); TSInputEncoding encoding; - bool measure_columns_in_bytes; } TSInput; typedef enum { @@ -65,7 +64,7 @@ typedef struct { typedef struct { const void *data; - uint32_t offset[3]; + uint32_t offset[2]; } TSNode; typedef struct { @@ -74,10 +73,8 @@ typedef struct { void *data; } TSSymbolIterator; -uint32_t ts_node_start_char(TSNode); uint32_t ts_node_start_byte(TSNode); TSPoint ts_node_start_point(TSNode); -uint32_t ts_node_end_char(TSNode); uint32_t ts_node_end_byte(TSNode); TSPoint ts_node_end_point(TSNode); TSSymbol ts_node_symbol(TSNode); @@ -98,8 +95,6 @@ TSNode ts_node_next_sibling(TSNode); TSNode ts_node_next_named_sibling(TSNode); TSNode ts_node_prev_sibling(TSNode); TSNode ts_node_prev_named_sibling(TSNode); -TSNode ts_node_descendant_for_char_range(TSNode, uint32_t, uint32_t); -TSNode ts_node_named_descendant_for_char_range(TSNode, uint32_t, uint32_t); TSNode ts_node_descendant_for_byte_range(TSNode, uint32_t, uint32_t); TSNode ts_node_named_descendant_for_byte_range(TSNode, uint32_t, uint32_t); TSNode ts_node_descendant_for_point_range(TSNode, TSPoint, TSPoint); diff --git a/src/runtime/document.c b/src/runtime/document.c index c013e8cf..94dd99dc 100644 --- a/src/runtime/document.c +++ b/src/runtime/document.c @@ -30,7 +30,6 @@ void ts_document_free(TSDocument *self) { NULL, NULL, TSInputEncodingUTF8, - false }); ts_free(self); } @@ -159,7 +158,7 @@ void ts_document_invalidate(TSDocument *self) { } TSNode ts_document_root_node(const TSDocument *self) { - return ts_node_make(self->tree, 0, 0, 0); + return ts_node_make(self->tree, 0, 0); } uint32_t ts_document_parse_count(const TSDocument *self) { diff --git a/src/runtime/length.h b/src/runtime/length.h index 7d2c0b01..2afbc42b 100644 --- a/src/runtime/length.h +++ b/src/runtime/length.h @@ -8,16 +8,13 @@ typedef struct { uint32_t bytes; - uint32_t chars; TSPoint extent; } Length; -static inline bool length_has_unknown_chars(Length self) { - return self.bytes > 0 && self.chars == 0; -} +static const Length LENGTH_UNDEFINED = {0, {0, 1}}; -static inline void length_set_unknown_chars(Length *self) { - self->chars = 0; +static inline bool length_is_undefined(Length length) { + return length.bytes == 0 && length.extent.column != 0; } static inline Length length_min(Length len1, Length len2) { @@ -28,13 +25,6 @@ static inline Length length_add(Length len1, Length len2) { Length result; result.bytes = len1.bytes + len2.bytes; result.extent = point_add(len1.extent, len2.extent); - - if (length_has_unknown_chars(len1) || length_has_unknown_chars(len2)) { - result.chars = 0; - } else { - result.chars = len1.chars + len2.chars; - } - return result; } @@ -42,18 +32,11 @@ static inline Length length_sub(Length len1, Length len2) { Length result; result.bytes = len1.bytes - len2.bytes; result.extent = point_sub(len1.extent, len2.extent); - - if (length_has_unknown_chars(len1) || length_has_unknown_chars(len2)) { - result.chars = 0; - } else { - result.chars = len1.chars - len2.chars; - } - return result; } static inline Length length_zero() { - Length result = {0, 0, {0, 0}}; + Length result = {0, {0, 0}}; return result; } diff --git a/src/runtime/lexer.c b/src/runtime/lexer.c index c15e6ab1..65da088e 100644 --- a/src/runtime/lexer.c +++ b/src/runtime/lexer.c @@ -16,19 +16,16 @@ static const char empty_chunk[2] = { 0, 0 }; -static Length unknown_length = {UINT32_MAX, 0, {0, 0}}; - static void ts_lexer__get_chunk(Lexer *self) { TSInput input = self->input; if (!self->chunk || - self->current_position.bytes != self->chunk_start + self->chunk_size) - input.seek(input.payload, self->current_position.chars, - self->current_position.bytes); + self->current_position.bytes != self->chunk_start + self->chunk_size) { + input.seek(input.payload, self->current_position.bytes); + } self->chunk_start = self->current_position.bytes; self->chunk = input.read(input.payload, &self->chunk_size); - if (!self->chunk_size) - self->chunk = empty_chunk; + if (!self->chunk_size) self->chunk = empty_chunk; } static void ts_lexer__get_lookahead(Lexer *self) { @@ -62,14 +59,11 @@ static void ts_lexer__advance(void *payload, bool skip) { if (self->lookahead_size) { self->current_position.bytes += self->lookahead_size; - self->current_position.chars++; if (self->data.lookahead == '\n') { self->current_position.extent.row++; self->current_position.extent.column = 0; - } else if (self->input.measure_columns_in_bytes) { - self->current_position.extent.column += self->lookahead_size; } else { - self->current_position.extent.column++; + self->current_position.extent.column += self->lookahead_size; } } @@ -93,7 +87,22 @@ static void ts_lexer__mark_end(void *payload) { static uint32_t ts_lexer__get_column(void *payload) { Lexer *self = (Lexer *)payload; - return self->current_position.extent.column; + uint32_t goal_byte = self->current_position.bytes; + + self->current_position.bytes -= self->current_position.extent.column; + self->current_position.extent.column = 0; + + if (self->current_position.bytes < self->chunk_start) { + ts_lexer__get_chunk(self); + } + + uint32_t result = 0; + while (self->current_position.bytes < goal_byte) { + ts_lexer__advance(self, false); + result++; + } + + return result; } /* @@ -122,7 +131,7 @@ void ts_lexer_init(Lexer *self) { static inline void ts_lexer__reset(Lexer *self, Length position) { self->token_start_position = position; - self->token_end_position = unknown_length; + self->token_end_position = LENGTH_UNDEFINED; self->current_position = position; if (self->chunk && (position.bytes < self->chunk_start || @@ -152,7 +161,7 @@ void ts_lexer_reset(Lexer *self, Length position) { void ts_lexer_start(Lexer *self) { self->token_start_position = self->current_position; - self->token_end_position = unknown_length; + self->token_end_position = LENGTH_UNDEFINED; self->data.result_symbol = 0; if (!self->chunk) diff --git a/src/runtime/node.c b/src/runtime/node.c index f701c047..ce01db31 100644 --- a/src/runtime/node.c +++ b/src/runtime/node.c @@ -3,8 +3,8 @@ #include "runtime/tree.h" #include "runtime/document.h" -TSNode ts_node_make(const Tree *tree, uint32_t chars, uint32_t byte, uint32_t row) { - return (TSNode){.data = tree, .offset = { chars, byte, row } }; +TSNode ts_node_make(const Tree *tree, uint32_t byte, uint32_t row) { + return (TSNode){.data = tree, .offset = { byte, row } }; } /* @@ -12,23 +12,19 @@ TSNode ts_node_make(const Tree *tree, uint32_t chars, uint32_t byte, uint32_t ro */ static inline TSNode ts_node__null() { - return ts_node_make(NULL, 0, 0, 0); + return ts_node_make(NULL, 0, 0); } static inline const Tree *ts_node__tree(TSNode self) { return self.data; } -static inline uint32_t ts_node__offset_char(TSNode self) { +static inline uint32_t ts_node__offset_byte(TSNode self) { return self.offset[0]; } -static inline uint32_t ts_node__offset_byte(TSNode self) { - return self.offset[1]; -} - static inline uint32_t ts_node__offset_row(TSNode self) { - return self.offset[2]; + return self.offset[1]; } static inline bool ts_node__is_relevant(TSNode self, bool include_anonymous) { @@ -57,18 +53,20 @@ static inline uint32_t ts_node__relevant_child_count(TSNode self, static inline TSNode ts_node__direct_parent(TSNode self, uint32_t *index) { const Tree *tree = ts_node__tree(self); *index = tree->context.index; - return ts_node_make(tree->context.parent, - ts_node__offset_char(self) - tree->context.offset.chars, - ts_node__offset_byte(self) - tree->context.offset.bytes, - ts_node__offset_row(self) - tree->context.offset.extent.row); + return ts_node_make( + tree->context.parent, + ts_node__offset_byte(self) - tree->context.offset.bytes, + ts_node__offset_row(self) - tree->context.offset.extent.row + ); } static inline TSNode ts_node__direct_child(TSNode self, uint32_t i) { const Tree *child_tree = ts_node__tree(self)->children[i]; return ts_node_make( - child_tree, ts_node__offset_char(self) + child_tree->context.offset.chars, + child_tree, ts_node__offset_byte(self) + child_tree->context.offset.bytes, - ts_node__offset_row(self) + child_tree->context.offset.extent.row); + ts_node__offset_row(self) + child_tree->context.offset.extent.row + ); } static inline TSNode ts_node__child(TSNode self, uint32_t child_index, @@ -154,33 +152,6 @@ static inline bool point_gt(TSPoint a, TSPoint b) { return a.row > b.row || (a.row == b.row && a.column > b.column); } -static inline TSNode ts_node__descendant_for_char_range(TSNode self, uint32_t min, - uint32_t max, - bool include_anonymous) { - TSNode node = self; - TSNode last_visible_node = self; - - bool did_descend = true; - while (did_descend) { - did_descend = false; - - for (uint32_t i = 0; i < ts_node__tree(node)->child_count; i++) { - TSNode child = ts_node__direct_child(node, i); - if (ts_node_start_char(child) > min) - break; - if (ts_node_end_char(child) > max) { - node = child; - if (ts_node__is_relevant(node, include_anonymous)) - last_visible_node = node; - did_descend = true; - break; - } - } - } - - return last_visible_node; -} - static inline TSNode ts_node__descendant_for_byte_range(TSNode self, uint32_t min, uint32_t max, bool include_anonymous) { @@ -193,12 +164,10 @@ static inline TSNode ts_node__descendant_for_byte_range(TSNode self, uint32_t mi for (uint32_t i = 0; i < ts_node__tree(node)->child_count; i++) { TSNode child = ts_node__direct_child(node, i); - if (ts_node_start_byte(child) > min) - break; if (ts_node_end_byte(child) > max) { + if (ts_node_start_byte(child) > min) break; node = child; - if (ts_node__is_relevant(node, include_anonymous)) - last_visible_node = node; + if (ts_node__is_relevant(node, include_anonymous)) last_visible_node = node; did_descend = true; break; } @@ -208,8 +177,9 @@ static inline TSNode ts_node__descendant_for_byte_range(TSNode self, uint32_t mi return last_visible_node; } -static inline TSNode ts_node__descendant_for_point_range( - TSNode self, TSPoint min, TSPoint max, bool include_anonymous) { +static inline TSNode ts_node__descendant_for_point_range(TSNode self, TSPoint min, + TSPoint max, + bool include_anonymous) { TSNode node = self; TSNode last_visible_node = self; @@ -219,12 +189,10 @@ static inline TSNode ts_node__descendant_for_point_range( for (uint32_t i = 0; i < ts_node__tree(node)->child_count; i++) { TSNode child = ts_node__direct_child(node, i); - if (point_gt(ts_node_start_point(child), min)) - break; if (point_gt(ts_node_end_point(child), max)) { + if (point_gt(ts_node_start_point(child), min)) break; node = child; - if (ts_node__is_relevant(node, include_anonymous)) - last_visible_node = node; + if (ts_node__is_relevant(node, include_anonymous)) last_visible_node = node; did_descend = true; break; } @@ -238,14 +206,6 @@ static inline TSNode ts_node__descendant_for_point_range( * Public */ -uint32_t ts_node_start_char(TSNode self) { - return ts_node__offset_char(self) + ts_node__tree(self)->padding.chars; -} - -uint32_t ts_node_end_char(TSNode self) { - return ts_node_start_char(self) + ts_node__tree(self)->size.chars; -} - uint32_t ts_node_start_byte(TSNode self) { return ts_node__offset_byte(self) + ts_node__tree(self)->padding.bytes; } @@ -301,9 +261,10 @@ char *ts_node_string(TSNode self, const TSDocument *document) { } bool ts_node_eq(TSNode self, TSNode other) { - return ts_tree_eq(ts_node__tree(self), ts_node__tree(other)) && - self.offset[0] == other.offset[0] && - self.offset[1] == other.offset[1] && self.offset[2] == other.offset[2]; + return + ts_tree_eq(ts_node__tree(self), ts_node__tree(other)) && + self.offset[0] == other.offset[0] && + self.offset[1] == other.offset[1]; } bool ts_node_is_named(TSNode self) { @@ -374,15 +335,6 @@ TSNode ts_node_prev_named_sibling(TSNode self) { return ts_node__prev_sibling(self, false); } -TSNode ts_node_descendant_for_char_range(TSNode self, uint32_t min, uint32_t max) { - return ts_node__descendant_for_char_range(self, min, max, true); -} - -TSNode ts_node_named_descendant_for_char_range(TSNode self, uint32_t min, - uint32_t max) { - return ts_node__descendant_for_char_range(self, min, max, false); -} - TSNode ts_node_descendant_for_byte_range(TSNode self, uint32_t min, uint32_t max) { return ts_node__descendant_for_byte_range(self, min, max, true); } diff --git a/src/runtime/node.h b/src/runtime/node.h index 671d9835..ee184c9a 100644 --- a/src/runtime/node.h +++ b/src/runtime/node.h @@ -3,6 +3,6 @@ #include "runtime/tree.h" -TSNode ts_node_make(const Tree *, uint32_t character, uint32_t byte, uint32_t row); +TSNode ts_node_make(const Tree *, uint32_t byte, uint32_t row); #endif diff --git a/src/runtime/parser.c b/src/runtime/parser.c index ef6592ef..b89eca39 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -341,7 +341,7 @@ static Tree *parser__lex(Parser *self, StackVersion version, TSStateId parse_sta &self->lexer.data, valid_external_tokens )) { - if (length_has_unknown_chars(self->lexer.token_end_position)) { + if (length_is_undefined(self->lexer.token_end_position)) { self->lexer.token_end_position = self->lexer.current_position; } @@ -367,7 +367,7 @@ static Tree *parser__lex(Parser *self, StackVersion version, TSStateId parse_sta ); ts_lexer_start(&self->lexer); if (self->language->lex_fn(&self->lexer.data, lex_mode.lex_state)) { - if (length_has_unknown_chars(self->lexer.token_end_position)) { + if (length_is_undefined(self->lexer.token_end_position)) { self->lexer.token_end_position = self->lexer.current_position; } break; @@ -912,7 +912,7 @@ static void parser__recover(Parser *self, StackVersion version, Tree *lookahead) unsigned new_cost = depth * ERROR_COST_PER_SKIPPED_TREE + - (position.chars - entry.position.chars) * ERROR_COST_PER_SKIPPED_CHAR + + (position.bytes - entry.position.bytes) * ERROR_COST_PER_SKIPPED_CHAR + (position.extent.row - entry.position.extent.row) * ERROR_COST_PER_SKIPPED_LINE; if (parser__better_version_exists(self, version, false, new_cost)) break; diff --git a/src/runtime/stack.c b/src/runtime/stack.c index 46dfe8d0..a5fdc94c 100644 --- a/src/runtime/stack.c +++ b/src/runtime/stack.c @@ -134,11 +134,11 @@ static StackNode *stack_node_new(StackNode *previous_node, Tree *tree, bool is_p if (state == ERROR_STATE && !tree->extra) { node->error_cost += ERROR_COST_PER_SKIPPED_TREE * ((tree->visible || tree->child_count == 0) ? 1 : tree->visible_child_count) + - ERROR_COST_PER_SKIPPED_CHAR * tree->size.chars + + ERROR_COST_PER_SKIPPED_CHAR * tree->size.bytes + ERROR_COST_PER_SKIPPED_LINE * tree->size.extent.row; if (previous_node->links[0].tree) { node->error_cost += - ERROR_COST_PER_SKIPPED_CHAR * tree->padding.chars + + ERROR_COST_PER_SKIPPED_CHAR * tree->padding.bytes + ERROR_COST_PER_SKIPPED_LINE * tree->padding.extent.row; } } @@ -568,7 +568,7 @@ bool ts_stack_can_merge(Stack *self, StackVersion version1, StackVersion version return !head1->is_halted && !head2->is_halted && head1->node->state == head2->node->state && - head1->node->position.chars == head2->node->position.chars && + head1->node->position.bytes == head2->node->position.bytes && head1->node->depth == head2->node->depth && ts_tree_external_token_state_eq(head1->last_external_token, head2->last_external_token); } diff --git a/src/runtime/string_input.c b/src/runtime/string_input.c index 6cbf5b2c..cccef94e 100644 --- a/src/runtime/string_input.c +++ b/src/runtime/string_input.c @@ -8,7 +8,7 @@ typedef struct { uint32_t length; } TSStringInput; -const char *ts_string_input_read(void *payload, uint32_t *bytes_read) { +static const char *ts_string_input__read(void *payload, uint32_t *bytes_read) { TSStringInput *input = (TSStringInput *)payload; if (input->position >= input->length) { *bytes_read = 0; @@ -20,7 +20,7 @@ const char *ts_string_input_read(void *payload, uint32_t *bytes_read) { return input->string + previous_position; } -int ts_string_input_seek(void *payload, uint32_t character, uint32_t byte) { +static int ts_string_input__seek(void *payload, uint32_t byte) { TSStringInput *input = (TSStringInput *)payload; input->position = byte; return (byte < input->length); @@ -40,12 +40,11 @@ TSInput ts_string_input_make_with_length(const char *string, uint32_t length) { input->length = length; return (TSInput){ .payload = input, - .read = ts_string_input_read, - .seek = ts_string_input_seek, + .read = ts_string_input__read, + .seek = ts_string_input__seek, .encoding = TSInputEncodingUTF8, - .measure_columns_in_bytes = false, }; error: - return (TSInput){ NULL, NULL, NULL, TSInputEncodingUTF8, false }; + return (TSInput){ NULL, NULL, NULL, TSInputEncodingUTF8 }; } diff --git a/src/runtime/tree.c b/src/runtime/tree.c index b1e1c8ca..9e16a81b 100644 --- a/src/runtime/tree.c +++ b/src/runtime/tree.c @@ -248,7 +248,7 @@ void ts_tree_set_children(Tree *self, uint32_t child_count, Tree **children, } if (self->symbol == ts_builtin_sym_error) { - self->error_cost += ERROR_COST_PER_SKIPPED_CHAR * self->size.chars + + self->error_cost += ERROR_COST_PER_SKIPPED_CHAR * self->size.bytes + ERROR_COST_PER_SKIPPED_LINE * self->size.extent.row; for (uint32_t i = 0; i < child_count; i++) if (!self->children[i]->extra) @@ -408,6 +408,9 @@ bool ts_tree_invalidate_lookahead(Tree *self, uint32_t edit_byte_offset) { return true; } +static inline TSPoint ts_tree_total_extent(const Tree *self) { + return point_add(self->padding.extent, self->size.extent); +} void ts_tree_edit(Tree *self, const TSInputEdit *edit) { uint32_t old_end_byte = edit->start_byte + edit->bytes_removed; @@ -420,14 +423,12 @@ void ts_tree_edit(Tree *self, const TSInputEdit *edit) { self->has_changes = true; if (edit->start_byte < self->padding.bytes) { - length_set_unknown_chars(&self->padding); if (self->padding.bytes >= old_end_byte) { uint32_t trailing_padding_bytes = self->padding.bytes - old_end_byte; TSPoint trailing_padding_extent = point_sub(self->padding.extent, old_end_point); self->padding.bytes = new_end_byte + trailing_padding_bytes; self->padding.extent = point_add(new_end_point, trailing_padding_extent); } else { - length_set_unknown_chars(&self->size); uint32_t removed_content_bytes = old_end_byte - self->padding.bytes; TSPoint removed_content_extent = point_sub(old_end_point, self->padding.extent); self->size.bytes = self->size.bytes - removed_content_bytes; @@ -436,11 +437,9 @@ void ts_tree_edit(Tree *self, const TSInputEdit *edit) { self->padding.extent = new_end_point; } } else if (edit->start_byte == self->padding.bytes && edit->bytes_removed == 0) { - length_set_unknown_chars(&self->padding); self->padding.bytes = self->padding.bytes + edit->bytes_added; self->padding.extent = point_add(self->padding.extent, edit->extent_added); } else { - length_set_unknown_chars(&self->size); uint32_t trailing_content_bytes = ts_tree_total_bytes(self) - old_end_byte; TSPoint trailing_content_extent = point_sub(ts_tree_total_extent(self), old_end_point); self->size.bytes = new_end_byte + trailing_content_bytes - self->padding.bytes; @@ -545,7 +544,7 @@ static size_t ts_tree__write_to_string(const Tree *self, const TSLanguage *langu } if (visible) { - if (self->symbol == ts_builtin_sym_error && self->child_count == 0 && self->size.chars > 0) { + if (self->symbol == ts_builtin_sym_error && self->child_count == 0 && self->size.bytes > 0) { cursor += snprintf(*writer, limit, "(UNEXPECTED "); cursor += ts_tree__write_char_to_string(*writer, limit, self->lookahead_char); } else { diff --git a/src/runtime/tree.h b/src/runtime/tree.h index a05edb12..243239a8 100644 --- a/src/runtime/tree.h +++ b/src/runtime/tree.h @@ -116,10 +116,6 @@ static inline Length ts_tree_total_size(const Tree *self) { return length_add(self->padding, self->size); } -static inline TSPoint ts_tree_total_extent(const Tree *self) { - return point_add(self->padding.extent, self->size.extent); -} - static inline bool ts_tree_is_fragile(const Tree *tree) { return tree->fragile_left || tree->fragile_right || ts_tree_total_bytes(tree) == 0; diff --git a/test/helpers/point_helpers.cc b/test/helpers/point_helpers.cc index 60f4f9a7..40dd67fd 100644 --- a/test/helpers/point_helpers.cc +++ b/test/helpers/point_helpers.cc @@ -15,9 +15,7 @@ bool operator==(const TSRange &left, const TSRange &right) { } bool operator==(const Length &left, const Length &right) { - return left.bytes == right.bytes && - left.chars == right.chars && - left.extent == right.extent; + return left.bytes == right.bytes && left.extent == right.extent; } bool operator<(const TSPoint &left, const TSPoint &right) { @@ -40,6 +38,5 @@ std::ostream &operator<<(std::ostream &stream, const TSRange &range) { } ostream &operator<<(ostream &stream, const Length &length) { - return stream << "{chars:" << length.chars << ", bytes:" << - length.bytes << ", extent:" << length.extent << "}"; + return stream << "{bytes:" << length.bytes << ", extent:" << length.extent << "}"; } diff --git a/test/helpers/spy_input.cc b/test/helpers/spy_input.cc index 01554e85..4ccde383 100644 --- a/test/helpers/spy_input.cc +++ b/test/helpers/spy_input.cc @@ -70,7 +70,7 @@ const char * SpyInput::read(void *payload, uint32_t *bytes_read) { return spy->buffer; } -int SpyInput::seek(void *payload, uint32_t character, uint32_t byte) { +int SpyInput::seek(void *payload, uint32_t byte) { auto spy = static_cast(payload); spy->byte_offset = byte; return 0; @@ -90,7 +90,6 @@ TSInput SpyInput::input() { result.encoding = encoding; result.seek = seek; result.read = read; - result.measure_columns_in_bytes = true; return result; } diff --git a/test/helpers/spy_input.h b/test/helpers/spy_input.h index 2de1a301..fb77b5ae 100644 --- a/test/helpers/spy_input.h +++ b/test/helpers/spy_input.h @@ -17,7 +17,7 @@ class SpyInput { std::vector undo_stack; static const char * read(void *, uint32_t *); - static int seek(void *, uint32_t, uint32_t); + static int seek(void *, uint32_t); std::pair swap_substr(size_t, size_t, std::string); public: diff --git a/test/helpers/tree_helpers.cc b/test/helpers/tree_helpers.cc index bae33cf7..abd6bc10 100644 --- a/test/helpers/tree_helpers.cc +++ b/test/helpers/tree_helpers.cc @@ -35,7 +35,7 @@ ostream &operator<<(std::ostream &stream, const Tree *tree) { ostream &operator<<(ostream &stream, const TSNode &node) { return stream << string("{") << (const Tree *)node.data << - string(", ") << to_string(ts_node_start_char(node)) << string("}"); + string(", ") << to_string(ts_node_start_byte(node)) << string("}"); } bool operator==(const TSNode &left, const TSNode &right) { diff --git a/test/runtime/document_test.cc b/test/runtime/document_test.cc index 7917a152..9cf5a14c 100644 --- a/test/runtime/document_test.cc +++ b/test/runtime/document_test.cc @@ -82,21 +82,12 @@ describe("Document", [&]() { ts_document_parse(document); }); - it("allows columns to be measured in either bytes or characters", [&]() { + it("measures columns in bytes", [&]() { const char16_t content[] = u"[true, false]"; spy_input->content = string((const char *)content, sizeof(content)); spy_input->encoding = TSInputEncodingUTF16; TSInput input = spy_input->input(); - input.measure_columns_in_bytes = false; - ts_document_set_input(document, input); - ts_document_invalidate(document); - ts_document_parse(document); - - TSNode root = ts_document_root_node(document); - AssertThat(ts_node_end_point(root), Equals({0, 14})); - - input.measure_columns_in_bytes = true; ts_document_set_input(document, input); ts_document_invalidate(document); ts_document_parse(document); @@ -142,7 +133,7 @@ describe("Document", [&]() { ts_document_set_input_string_with_length(document, content, 1); ts_document_parse(document); TSNode new_root = ts_document_root_node(document); - AssertThat(ts_node_end_char(new_root), Equals(1)); + AssertThat(ts_node_end_byte(new_root), Equals(1)); assert_node_string_equals( new_root, "(value (number))"); @@ -152,7 +143,7 @@ describe("Document", [&]() { ts_document_set_input_string(document, ""); ts_document_parse(document); TSNode new_root = ts_document_root_node(document); - AssertThat(ts_node_end_char(new_root), Equals(0)); + AssertThat(ts_node_end_byte(new_root), Equals(0)); assert_node_string_equals( new_root, "(ERROR)"); @@ -160,7 +151,7 @@ describe("Document", [&]() { ts_document_set_input_string(document, "1"); ts_document_parse(document); new_root = ts_document_root_node(document); - AssertThat(ts_node_end_char(new_root), Equals(1)); + AssertThat(ts_node_end_byte(new_root), Equals(1)); assert_node_string_equals( new_root, "(value (number))"); @@ -445,7 +436,6 @@ describe("Document", [&]() { root, "(ERROR (number) (null) (UNEXPECTED 'e'))"); - AssertThat(ts_node_end_char(root), Equals(input_string.size())); AssertThat(ts_node_end_byte(root), Equals(input_string.size())); }); diff --git a/test/runtime/node_test.cc b/test/runtime/node_test.cc index a18aed6d..08783522 100644 --- a/test/runtime/node_test.cc +++ b/test/runtime/node_test.cc @@ -99,8 +99,6 @@ describe("Node", [&]() { AssertThat(ts_node_named_child_count(root_node), Equals(3)); AssertThat(ts_node_start_byte(root_node), Equals(array_index)); AssertThat(ts_node_end_byte(root_node), Equals(array_end_index)); - AssertThat(ts_node_start_char(root_node), Equals(array_index)); - AssertThat(ts_node_end_char(root_node), Equals(array_end_index)); AssertThat(ts_node_start_point(root_node), Equals({ 2, 0 })); AssertThat(ts_node_end_point(root_node), Equals({ 8, 1 })); @@ -114,8 +112,6 @@ describe("Node", [&]() { AssertThat(ts_node_start_byte(number_node), Equals(number_index)); AssertThat(ts_node_end_byte(number_node), Equals(number_end_index)); - AssertThat(ts_node_start_char(number_node), Equals(number_index)); - AssertThat(ts_node_end_char(number_node), Equals(number_end_index)); AssertThat(ts_node_start_point(number_node), Equals({ 3, 2 })); AssertThat(ts_node_end_point(number_node), Equals({ 3, 5 })); @@ -191,7 +187,7 @@ describe("Node", [&]() { it("returns an iterator that yields each of the node's symbols", [&]() { const TSLanguage *language = ts_document_language(document); - TSNode false_node = ts_node_descendant_for_char_range(root_node, false_index, false_index + 1); + TSNode false_node = ts_node_descendant_for_byte_range(root_node, false_index, false_index + 1); TSSymbolIterator iterator = ts_node_symbols(false_node); AssertThat(iterator.done, Equals(false)); AssertThat(ts_language_symbol_name(language, iterator.value), Equals("false")); @@ -203,7 +199,7 @@ describe("Node", [&]() { ts_symbol_iterator_next(&iterator); AssertThat(iterator.done, Equals(true)); - TSNode comma_node = ts_node_descendant_for_char_range(root_node, number_end_index, number_end_index); + TSNode comma_node = ts_node_descendant_for_byte_range(root_node, number_end_index, number_end_index); iterator = ts_node_symbols(comma_node); AssertThat(iterator.done, Equals(false)); AssertThat(ts_language_symbol_name(language, iterator.value), Equals(",")); @@ -375,17 +371,17 @@ describe("Node", [&]() { }); }); - describe("named_descendant_for_char_range(start, end)", [&]() { + describe("named_descendant_for_byte_range(start, end)", [&]() { describe("when there is a leaf node that spans the given range exactly", [&]() { it("returns that leaf node", [&]() { - TSNode leaf = ts_node_named_descendant_for_char_range(root_node, string_index, string_end_index - 1); + TSNode leaf = ts_node_named_descendant_for_byte_range(root_node, string_index, string_end_index - 1); AssertThat(ts_node_type(leaf, document), Equals("string")); AssertThat(ts_node_start_byte(leaf), Equals(string_index)); AssertThat(ts_node_end_byte(leaf), Equals(string_end_index)); AssertThat(ts_node_start_point(leaf), Equals({ 6, 4 })); AssertThat(ts_node_end_point(leaf), Equals({ 6, 7 })); - leaf = ts_node_named_descendant_for_char_range(root_node, number_index, number_end_index - 1); + leaf = ts_node_named_descendant_for_byte_range(root_node, number_index, number_end_index - 1); AssertThat(ts_node_type(leaf, document), Equals("number")); AssertThat(ts_node_start_byte(leaf), Equals(number_index)); AssertThat(ts_node_end_byte(leaf), Equals(number_end_index)); @@ -396,14 +392,14 @@ describe("Node", [&]() { describe("when there is a leaf node that extends beyond the given range", [&]() { it("returns that leaf node", [&]() { - TSNode leaf = ts_node_named_descendant_for_char_range(root_node, string_index, string_index + 1); + TSNode leaf = ts_node_named_descendant_for_byte_range(root_node, string_index, string_index + 1); AssertThat(ts_node_type(leaf, document), Equals("string")); AssertThat(ts_node_start_byte(leaf), Equals(string_index)); AssertThat(ts_node_end_byte(leaf), Equals(string_end_index)); AssertThat(ts_node_start_point(leaf), Equals({ 6, 4 })); AssertThat(ts_node_end_point(leaf), Equals({ 6, 7 })); - leaf = ts_node_named_descendant_for_char_range(root_node, string_index + 1, string_index + 2); + leaf = ts_node_named_descendant_for_byte_range(root_node, string_index + 1, string_index + 2); AssertThat(ts_node_type(leaf, document), Equals("string")); AssertThat(ts_node_start_byte(leaf), Equals(string_index)); AssertThat(ts_node_end_byte(leaf), Equals(string_end_index)); @@ -414,7 +410,7 @@ describe("Node", [&]() { describe("when there is no leaf node that spans the given range", [&]() { it("returns the smallest node that does span the range", [&]() { - TSNode pair_node = ts_node_named_descendant_for_char_range(root_node, string_index, string_index + 3); + TSNode pair_node = ts_node_named_descendant_for_byte_range(root_node, string_index, string_index + 3); AssertThat(ts_node_type(pair_node, document), Equals("pair")); AssertThat(ts_node_start_byte(pair_node), Equals(string_index)); AssertThat(ts_node_end_byte(pair_node), Equals(null_end_index)); @@ -423,7 +419,7 @@ describe("Node", [&]() { }); it("does not return invisible nodes (repeats)", [&]() { - TSNode node = ts_node_named_descendant_for_char_range(root_node, number_end_index, number_end_index + 1); + TSNode node = ts_node_named_descendant_for_byte_range(root_node, number_end_index, number_end_index + 1); AssertThat(ts_node_type(node, document), Equals("array")); AssertThat(ts_node_start_byte(node), Equals(array_index)); AssertThat(ts_node_end_byte(node), Equals(array_end_index)); @@ -433,31 +429,31 @@ describe("Node", [&]() { }); }); - describe("descendant_for_char_range(start, end)", [&]() { - it("returns the smallest node that spans the given range", [&]() { - TSNode node1 = ts_node_descendant_for_char_range(root_node, colon_index, colon_index); + describe("descendant_for_byte_range(start, end)", [&]() { + it("returns the smallest node that spans the given byte offsets", [&]() { + TSNode node1 = ts_node_descendant_for_byte_range(root_node, colon_index, colon_index); AssertThat(ts_node_type(node1, document), Equals(":")); AssertThat(ts_node_start_byte(node1), Equals(colon_index)); AssertThat(ts_node_end_byte(node1), Equals(colon_index + 1)); AssertThat(ts_node_start_point(node1), Equals({ 6, 7 })); AssertThat(ts_node_end_point(node1), Equals({ 6, 8 })); - TSNode node2 = ts_node_descendant_for_char_range(root_node, string_index + 2, string_index + 4); + TSNode node2 = ts_node_descendant_for_byte_range(root_node, string_index + 2, string_index + 4); AssertThat(ts_node_type(node2, document), Equals("pair")); AssertThat(ts_node_start_byte(node2), Equals(string_index)); AssertThat(ts_node_end_byte(node2), Equals(null_end_index)); AssertThat(ts_node_start_point(node2), Equals({ 6, 4 })); AssertThat(ts_node_end_point(node2), Equals({ 6, 13 })); }); - }); - describe("descendant_for_byte_range(start, end)", [&]() { - it("returns the smallest concrete node that spans the given range", [&]() { - ts_document_set_input_string(document, "[\"αβγδ\", \"αβγδ\"]"); + it("works in the presence of multi-byte characters", [&]() { + string input_string = "[\"αβγδ\", \"αβγδ\"]"; + ts_document_set_input_string(document, input_string.c_str()); ts_document_parse(document); TSNode root_node = ts_document_root_node(document); - TSNode node1 = ts_node_descendant_for_char_range(root_node, 7, 7); + uint32_t comma_position = input_string.find(","); + TSNode node1 = ts_node_descendant_for_byte_range(root_node, comma_position, comma_position); AssertThat(ts_node_type(node1, document), Equals(",")); TSNode node2 = ts_node_descendant_for_byte_range(root_node, 6, 10); diff --git a/test/runtime/parser_test.cc b/test/runtime/parser_test.cc index 420cf092..5d1bbe30 100644 --- a/test/runtime/parser_test.cc +++ b/test/runtime/parser_test.cc @@ -335,7 +335,7 @@ describe("Parser", [&]() { assert_root_node( "(program (expression_statement (binary_expression (identifier) (number))))"); - TSNode node = ts_node_named_descendant_for_char_range(root, 1, 1); + TSNode node = ts_node_named_descendant_for_byte_range(root, 1, 1); AssertThat(ts_node_type(node, document), Equals("identifier")); AssertThat(ts_node_end_byte(node), Equals(strlen("abXYZc"))); }); @@ -354,7 +354,7 @@ describe("Parser", [&]() { assert_root_node( "(program (expression_statement (binary_expression (identifier) (number))))"); - TSNode node = ts_node_named_descendant_for_char_range(root, 1, 1); + TSNode node = ts_node_named_descendant_for_byte_range(root, 1, 1); AssertThat(ts_node_type(node, document), Equals("identifier")); AssertThat(ts_node_end_byte(node), Equals(strlen("abcXYZ"))); }); @@ -487,7 +487,6 @@ describe("Parser", [&]() { assert_root_node( "(program (expression_statement (string)))"); - AssertThat(ts_node_end_char(root), Equals(strlen("'OOO - DD';"))); AssertThat(ts_node_end_byte(root), Equals(strlen("'\u03A9\u03A9\u03A9 \u2014 \u0394\u0394';"))); }); diff --git a/test/runtime/stack_test.cc b/test/runtime/stack_test.cc index cb5abf2d..b40972dc 100644 --- a/test/runtime/stack_test.cc +++ b/test/runtime/stack_test.cc @@ -20,7 +20,7 @@ enum { }; Length operator*(const Length &length, uint32_t factor) { - return {length.bytes * factor, length.chars * factor, {0, length.extent.column * factor}}; + return {length.bytes * factor, {0, length.extent.column * factor}}; } void free_slice_array(StackSliceArray *slices) { @@ -71,7 +71,7 @@ describe("Stack", [&]() { Stack *stack; const size_t tree_count = 11; Tree *trees[tree_count]; - Length tree_len = {2, 3, {0, 3}}; + Length tree_len = {3, {0, 3}}; before_each([&]() { record_alloc::start(); diff --git a/test/runtime/tree_test.cc b/test/runtime/tree_test.cc index fad9b9c1..a3191ac3 100644 --- a/test/runtime/tree_test.cc +++ b/test/runtime/tree_test.cc @@ -42,7 +42,7 @@ describe("Tree", []() { describe("make_leaf", [&]() { it("does not mark the tree as fragile", [&]() { - Tree *tree = ts_tree_make_leaf(symbol1, {2, 1, {0, 1}}, {5, 4, {0, 4}}, &language); + Tree *tree = ts_tree_make_leaf(symbol1, {2, {0, 1}}, {5, {0, 4}}, &language); AssertThat(tree->fragile_left, IsFalse()); AssertThat(tree->fragile_right, IsFalse()); }); @@ -68,8 +68,8 @@ describe("Tree", []() { Tree *tree1, *tree2, *parent1; before_each([&]() { - tree1 = ts_tree_make_leaf(symbol1, {2, 1, {0, 1}}, {5, 4, {0, 4}}, &language); - tree2 = ts_tree_make_leaf(symbol2, {1, 1, {0, 1}}, {3, 3, {0, 3}}, &language); + tree1 = ts_tree_make_leaf(symbol1, {2, {0, 1}}, {5, {0, 4}}, &language); + tree2 = ts_tree_make_leaf(symbol2, {1, {0, 1}}, {3, {0, 3}}, &language); ts_tree_retain(tree1); ts_tree_retain(tree2); @@ -87,12 +87,9 @@ describe("Tree", []() { it("computes its size and padding based on its child nodes", [&]() { AssertThat(parent1->size.bytes, Equals( - tree1->size.bytes + tree2->padding.bytes + tree2->size.bytes)); - AssertThat(parent1->size.chars, Equals( - tree1->size.chars + tree2->padding.chars + tree2->size.chars)); - + tree1->size.bytes + tree2->padding.bytes + tree2->size.bytes + )); AssertThat(parent1->padding.bytes, Equals(tree1->padding.bytes)); - AssertThat(parent1->padding.chars, Equals(tree1->padding.chars)); }); describe("when the first node is fragile on the left side", [&]() { @@ -174,13 +171,13 @@ describe("Tree", []() { before_each([&]() { tree = ts_tree_make_node(symbol1, 3, tree_array({ - ts_tree_make_leaf(symbol2, {2, 2, {0, 2}}, {3, 3, {0, 3}}, &language), - ts_tree_make_leaf(symbol3, {2, 2, {0, 2}}, {3, 3, {0, 3}}, &language), - ts_tree_make_leaf(symbol4, {2, 2, {0, 2}}, {3, 3, {0, 3}}, &language), + ts_tree_make_leaf(symbol2, {2, {0, 2}}, {3, {0, 3}}, &language), + ts_tree_make_leaf(symbol3, {2, {0, 2}}, {3, {0, 3}}, &language), + ts_tree_make_leaf(symbol4, {2, {0, 2}}, {3, {0, 3}}, &language), }), 0, &language); - AssertThat(tree->padding, Equals({2, 2, {0, 2}})); - AssertThat(tree->size, Equals({13, 13, {0, 13}})); + AssertThat(tree->padding, Equals({2, {0, 2}})); + AssertThat(tree->size, Equals({13, {0, 13}})); }); after_each([&]() { @@ -200,16 +197,16 @@ describe("Tree", []() { assert_consistent(tree); AssertThat(tree->has_changes, IsTrue()); - AssertThat(tree->padding, Equals({3, 0, {0, 3}})); - AssertThat(tree->size, Equals({13, 13, {0, 13}})); + AssertThat(tree->padding, Equals({3, {0, 3}})); + AssertThat(tree->size, Equals({13, {0, 13}})); AssertThat(tree->children[0]->has_changes, IsTrue()); - AssertThat(tree->children[0]->padding, Equals({3, 0, {0, 3}})); - AssertThat(tree->children[0]->size, Equals({3, 3, {0, 3}})); + AssertThat(tree->children[0]->padding, Equals({3, {0, 3}})); + AssertThat(tree->children[0]->size, Equals({3, {0, 3}})); AssertThat(tree->children[1]->has_changes, IsFalse()); - AssertThat(tree->children[1]->padding, Equals({2, 2, {0, 2}})); - AssertThat(tree->children[1]->size, Equals({3, 3, {0, 3}})); + AssertThat(tree->children[1]->padding, Equals({2, {0, 2}})); + AssertThat(tree->children[1]->size, Equals({3, {0, 3}})); }); }); @@ -226,12 +223,12 @@ describe("Tree", []() { assert_consistent(tree); AssertThat(tree->has_changes, IsTrue()); - AssertThat(tree->padding, Equals({5, 0, {0, 5}})); - AssertThat(tree->size, Equals({11, 0, {0, 11}})); + AssertThat(tree->padding, Equals({5, {0, 5}})); + AssertThat(tree->size, Equals({11, {0, 11}})); AssertThat(tree->children[0]->has_changes, IsTrue()); - AssertThat(tree->children[0]->padding, Equals({5, 0, {0, 5}})); - AssertThat(tree->children[0]->size, Equals({1, 0, {0, 1}})); + AssertThat(tree->children[0]->padding, Equals({5, {0, 5}})); + AssertThat(tree->children[0]->size, Equals({1, {0, 1}})); }); }); @@ -250,12 +247,12 @@ describe("Tree", []() { assert_consistent(tree); AssertThat(tree->has_changes, IsTrue()); - AssertThat(tree->padding, Equals({4, 0, {0, 4}})); - AssertThat(tree->size, Equals({13, 13, {0, 13}})); + AssertThat(tree->padding, Equals({4, {0, 4}})); + AssertThat(tree->size, Equals({13, {0, 13}})); AssertThat(tree->children[0]->has_changes, IsTrue()); - AssertThat(tree->children[0]->padding, Equals({4, 0, {0, 4}})); - AssertThat(tree->children[0]->size, Equals({3, 3, {0, 3}})); + AssertThat(tree->children[0]->padding, Equals({4, {0, 4}})); + AssertThat(tree->children[0]->size, Equals({3, {0, 3}})); AssertThat(tree->children[1]->has_changes, IsFalse()); }); @@ -274,12 +271,12 @@ describe("Tree", []() { assert_consistent(tree); AssertThat(tree->has_changes, IsTrue()); - AssertThat(tree->padding, Equals({2, 2, {0, 2}})); - AssertThat(tree->size, Equals({16, 0, {0, 16}})); + AssertThat(tree->padding, Equals({2, {0, 2}})); + AssertThat(tree->size, Equals({16, {0, 16}})); AssertThat(tree->children[0]->has_changes, IsTrue()); - AssertThat(tree->children[0]->padding, Equals({2, 2, {0, 2}})); - AssertThat(tree->children[0]->size, Equals({6, 0, {0, 6}})); + AssertThat(tree->children[0]->padding, Equals({2, {0, 2}})); + AssertThat(tree->children[0]->size, Equals({6, {0, 6}})); AssertThat(tree->children[1]->has_changes, IsFalse()); }); @@ -300,20 +297,20 @@ describe("Tree", []() { assert_consistent(tree); AssertThat(tree->has_changes, IsTrue()); - AssertThat(tree->padding, Equals({4, 0, {0, 4}})); - AssertThat(tree->size, Equals({4, 0, {0, 4}})); + AssertThat(tree->padding, Equals({4, {0, 4}})); + AssertThat(tree->size, Equals({4, {0, 4}})); AssertThat(tree->children[0]->has_changes, IsTrue()); - AssertThat(tree->children[0]->padding, Equals({4, 0, {0, 4}})); - AssertThat(tree->children[0]->size, Equals({0, 0, {0, 0}})); + AssertThat(tree->children[0]->padding, Equals({4, {0, 4}})); + AssertThat(tree->children[0]->size, Equals({0, {0, 0}})); AssertThat(tree->children[1]->has_changes, IsTrue()); - AssertThat(tree->children[1]->padding, Equals({0, 0, {0, 0}})); - AssertThat(tree->children[1]->size, Equals({0, 0, {0, 0}})); + AssertThat(tree->children[1]->padding, Equals({0, {0, 0}})); + AssertThat(tree->children[1]->size, Equals({0, {0, 0}})); AssertThat(tree->children[2]->has_changes, IsTrue()); - AssertThat(tree->children[2]->padding, Equals({1, 0, {0, 1}})); - AssertThat(tree->children[2]->size, Equals({3, 3, {0, 3}})); + AssertThat(tree->children[2]->padding, Equals({1, {0, 1}})); + AssertThat(tree->children[2]->size, Equals({3, {0, 3}})); }); }); @@ -340,7 +337,7 @@ describe("Tree", []() { Tree *leaf; before_each([&]() { - leaf = ts_tree_make_leaf(symbol1, {2, 1, {0, 1}}, {5, 4, {0, 4}}, &language); + leaf = ts_tree_make_leaf(symbol1, {2, {0, 1}}, {5, {0, 4}}, &language); }); after_each([&]() { @@ -348,7 +345,7 @@ describe("Tree", []() { }); it("returns true for identical trees", [&]() { - Tree *leaf_copy = ts_tree_make_leaf(symbol1, {2, 1, {1, 1}}, {5, 4, {1, 4}}, &language); + Tree *leaf_copy = ts_tree_make_leaf(symbol1, {2, {1, 1}}, {5, {1, 4}}, &language); AssertThat(ts_tree_eq(leaf, leaf_copy), IsTrue()); Tree *parent = ts_tree_make_node(symbol2, 2, tree_array({ @@ -402,7 +399,7 @@ describe("Tree", []() { }); it("returns false for trees with different children", [&]() { - Tree *leaf2 = ts_tree_make_leaf(symbol2, {1, 1, {0, 1}}, {3, 3, {0, 3}}, &language); + Tree *leaf2 = ts_tree_make_leaf(symbol2, {1, {0, 1}}, {3, {0, 3}}, &language); Tree *parent = ts_tree_make_node(symbol2, 2, tree_array({ leaf, @@ -428,8 +425,8 @@ describe("Tree", []() { }); describe("last_external_token", [&]() { - Length padding = {1, 1, {0, 1}}; - Length size = {2, 2, {0, 2}}; + Length padding = {1, {0, 1}}; + Length size = {2, {0, 2}}; auto make_external = [](Tree *tree) { tree->has_external_tokens = true;