Return a character count from the lexer's get_column method

This commit is contained in:
Max Brunsfeld 2017-12-20 16:26:38 -08:00
parent fcff16cb86
commit 0e69da37a5
20 changed files with 143 additions and 233 deletions

View file

@ -30,7 +30,6 @@ void ts_document_free(TSDocument *self) {
NULL,
NULL,
TSInputEncodingUTF8,
false
});
ts_free(self);
}
@ -159,7 +158,7 @@ void ts_document_invalidate(TSDocument *self) {
}
TSNode ts_document_root_node(const TSDocument *self) {
return ts_node_make(self->tree, 0, 0, 0);
return ts_node_make(self->tree, 0, 0);
}
uint32_t ts_document_parse_count(const TSDocument *self) {

View file

@ -8,16 +8,13 @@
typedef struct {
uint32_t bytes;
uint32_t chars;
TSPoint extent;
} Length;
static inline bool length_has_unknown_chars(Length self) {
return self.bytes > 0 && self.chars == 0;
}
static const Length LENGTH_UNDEFINED = {0, {0, 1}};
static inline void length_set_unknown_chars(Length *self) {
self->chars = 0;
static inline bool length_is_undefined(Length length) {
return length.bytes == 0 && length.extent.column != 0;
}
static inline Length length_min(Length len1, Length len2) {
@ -28,13 +25,6 @@ static inline Length length_add(Length len1, Length len2) {
Length result;
result.bytes = len1.bytes + len2.bytes;
result.extent = point_add(len1.extent, len2.extent);
if (length_has_unknown_chars(len1) || length_has_unknown_chars(len2)) {
result.chars = 0;
} else {
result.chars = len1.chars + len2.chars;
}
return result;
}
@ -42,18 +32,11 @@ static inline Length length_sub(Length len1, Length len2) {
Length result;
result.bytes = len1.bytes - len2.bytes;
result.extent = point_sub(len1.extent, len2.extent);
if (length_has_unknown_chars(len1) || length_has_unknown_chars(len2)) {
result.chars = 0;
} else {
result.chars = len1.chars - len2.chars;
}
return result;
}
static inline Length length_zero() {
Length result = {0, 0, {0, 0}};
Length result = {0, {0, 0}};
return result;
}

View file

@ -16,19 +16,16 @@
static const char empty_chunk[2] = { 0, 0 };
static Length unknown_length = {UINT32_MAX, 0, {0, 0}};
static void ts_lexer__get_chunk(Lexer *self) {
TSInput input = self->input;
if (!self->chunk ||
self->current_position.bytes != self->chunk_start + self->chunk_size)
input.seek(input.payload, self->current_position.chars,
self->current_position.bytes);
self->current_position.bytes != self->chunk_start + self->chunk_size) {
input.seek(input.payload, self->current_position.bytes);
}
self->chunk_start = self->current_position.bytes;
self->chunk = input.read(input.payload, &self->chunk_size);
if (!self->chunk_size)
self->chunk = empty_chunk;
if (!self->chunk_size) self->chunk = empty_chunk;
}
static void ts_lexer__get_lookahead(Lexer *self) {
@ -62,14 +59,11 @@ static void ts_lexer__advance(void *payload, bool skip) {
if (self->lookahead_size) {
self->current_position.bytes += self->lookahead_size;
self->current_position.chars++;
if (self->data.lookahead == '\n') {
self->current_position.extent.row++;
self->current_position.extent.column = 0;
} else if (self->input.measure_columns_in_bytes) {
self->current_position.extent.column += self->lookahead_size;
} else {
self->current_position.extent.column++;
self->current_position.extent.column += self->lookahead_size;
}
}
@ -93,7 +87,22 @@ static void ts_lexer__mark_end(void *payload) {
static uint32_t ts_lexer__get_column(void *payload) {
Lexer *self = (Lexer *)payload;
return self->current_position.extent.column;
uint32_t goal_byte = self->current_position.bytes;
self->current_position.bytes -= self->current_position.extent.column;
self->current_position.extent.column = 0;
if (self->current_position.bytes < self->chunk_start) {
ts_lexer__get_chunk(self);
}
uint32_t result = 0;
while (self->current_position.bytes < goal_byte) {
ts_lexer__advance(self, false);
result++;
}
return result;
}
/*
@ -122,7 +131,7 @@ void ts_lexer_init(Lexer *self) {
static inline void ts_lexer__reset(Lexer *self, Length position) {
self->token_start_position = position;
self->token_end_position = unknown_length;
self->token_end_position = LENGTH_UNDEFINED;
self->current_position = position;
if (self->chunk && (position.bytes < self->chunk_start ||
@ -152,7 +161,7 @@ void ts_lexer_reset(Lexer *self, Length position) {
void ts_lexer_start(Lexer *self) {
self->token_start_position = self->current_position;
self->token_end_position = unknown_length;
self->token_end_position = LENGTH_UNDEFINED;
self->data.result_symbol = 0;
if (!self->chunk)

View file

@ -3,8 +3,8 @@
#include "runtime/tree.h"
#include "runtime/document.h"
TSNode ts_node_make(const Tree *tree, uint32_t chars, uint32_t byte, uint32_t row) {
return (TSNode){.data = tree, .offset = { chars, byte, row } };
TSNode ts_node_make(const Tree *tree, uint32_t byte, uint32_t row) {
return (TSNode){.data = tree, .offset = { byte, row } };
}
/*
@ -12,23 +12,19 @@ TSNode ts_node_make(const Tree *tree, uint32_t chars, uint32_t byte, uint32_t ro
*/
static inline TSNode ts_node__null() {
return ts_node_make(NULL, 0, 0, 0);
return ts_node_make(NULL, 0, 0);
}
static inline const Tree *ts_node__tree(TSNode self) {
return self.data;
}
static inline uint32_t ts_node__offset_char(TSNode self) {
static inline uint32_t ts_node__offset_byte(TSNode self) {
return self.offset[0];
}
static inline uint32_t ts_node__offset_byte(TSNode self) {
return self.offset[1];
}
static inline uint32_t ts_node__offset_row(TSNode self) {
return self.offset[2];
return self.offset[1];
}
static inline bool ts_node__is_relevant(TSNode self, bool include_anonymous) {
@ -57,18 +53,20 @@ static inline uint32_t ts_node__relevant_child_count(TSNode self,
static inline TSNode ts_node__direct_parent(TSNode self, uint32_t *index) {
const Tree *tree = ts_node__tree(self);
*index = tree->context.index;
return ts_node_make(tree->context.parent,
ts_node__offset_char(self) - tree->context.offset.chars,
ts_node__offset_byte(self) - tree->context.offset.bytes,
ts_node__offset_row(self) - tree->context.offset.extent.row);
return ts_node_make(
tree->context.parent,
ts_node__offset_byte(self) - tree->context.offset.bytes,
ts_node__offset_row(self) - tree->context.offset.extent.row
);
}
static inline TSNode ts_node__direct_child(TSNode self, uint32_t i) {
const Tree *child_tree = ts_node__tree(self)->children[i];
return ts_node_make(
child_tree, ts_node__offset_char(self) + child_tree->context.offset.chars,
child_tree,
ts_node__offset_byte(self) + child_tree->context.offset.bytes,
ts_node__offset_row(self) + child_tree->context.offset.extent.row);
ts_node__offset_row(self) + child_tree->context.offset.extent.row
);
}
static inline TSNode ts_node__child(TSNode self, uint32_t child_index,
@ -154,33 +152,6 @@ static inline bool point_gt(TSPoint a, TSPoint b) {
return a.row > b.row || (a.row == b.row && a.column > b.column);
}
static inline TSNode ts_node__descendant_for_char_range(TSNode self, uint32_t min,
uint32_t max,
bool include_anonymous) {
TSNode node = self;
TSNode last_visible_node = self;
bool did_descend = true;
while (did_descend) {
did_descend = false;
for (uint32_t i = 0; i < ts_node__tree(node)->child_count; i++) {
TSNode child = ts_node__direct_child(node, i);
if (ts_node_start_char(child) > min)
break;
if (ts_node_end_char(child) > max) {
node = child;
if (ts_node__is_relevant(node, include_anonymous))
last_visible_node = node;
did_descend = true;
break;
}
}
}
return last_visible_node;
}
static inline TSNode ts_node__descendant_for_byte_range(TSNode self, uint32_t min,
uint32_t max,
bool include_anonymous) {
@ -193,12 +164,10 @@ static inline TSNode ts_node__descendant_for_byte_range(TSNode self, uint32_t mi
for (uint32_t i = 0; i < ts_node__tree(node)->child_count; i++) {
TSNode child = ts_node__direct_child(node, i);
if (ts_node_start_byte(child) > min)
break;
if (ts_node_end_byte(child) > max) {
if (ts_node_start_byte(child) > min) break;
node = child;
if (ts_node__is_relevant(node, include_anonymous))
last_visible_node = node;
if (ts_node__is_relevant(node, include_anonymous)) last_visible_node = node;
did_descend = true;
break;
}
@ -208,8 +177,9 @@ static inline TSNode ts_node__descendant_for_byte_range(TSNode self, uint32_t mi
return last_visible_node;
}
static inline TSNode ts_node__descendant_for_point_range(
TSNode self, TSPoint min, TSPoint max, bool include_anonymous) {
static inline TSNode ts_node__descendant_for_point_range(TSNode self, TSPoint min,
TSPoint max,
bool include_anonymous) {
TSNode node = self;
TSNode last_visible_node = self;
@ -219,12 +189,10 @@ static inline TSNode ts_node__descendant_for_point_range(
for (uint32_t i = 0; i < ts_node__tree(node)->child_count; i++) {
TSNode child = ts_node__direct_child(node, i);
if (point_gt(ts_node_start_point(child), min))
break;
if (point_gt(ts_node_end_point(child), max)) {
if (point_gt(ts_node_start_point(child), min)) break;
node = child;
if (ts_node__is_relevant(node, include_anonymous))
last_visible_node = node;
if (ts_node__is_relevant(node, include_anonymous)) last_visible_node = node;
did_descend = true;
break;
}
@ -238,14 +206,6 @@ static inline TSNode ts_node__descendant_for_point_range(
* Public
*/
uint32_t ts_node_start_char(TSNode self) {
return ts_node__offset_char(self) + ts_node__tree(self)->padding.chars;
}
uint32_t ts_node_end_char(TSNode self) {
return ts_node_start_char(self) + ts_node__tree(self)->size.chars;
}
uint32_t ts_node_start_byte(TSNode self) {
return ts_node__offset_byte(self) + ts_node__tree(self)->padding.bytes;
}
@ -301,9 +261,10 @@ char *ts_node_string(TSNode self, const TSDocument *document) {
}
bool ts_node_eq(TSNode self, TSNode other) {
return ts_tree_eq(ts_node__tree(self), ts_node__tree(other)) &&
self.offset[0] == other.offset[0] &&
self.offset[1] == other.offset[1] && self.offset[2] == other.offset[2];
return
ts_tree_eq(ts_node__tree(self), ts_node__tree(other)) &&
self.offset[0] == other.offset[0] &&
self.offset[1] == other.offset[1];
}
bool ts_node_is_named(TSNode self) {
@ -374,15 +335,6 @@ TSNode ts_node_prev_named_sibling(TSNode self) {
return ts_node__prev_sibling(self, false);
}
TSNode ts_node_descendant_for_char_range(TSNode self, uint32_t min, uint32_t max) {
return ts_node__descendant_for_char_range(self, min, max, true);
}
TSNode ts_node_named_descendant_for_char_range(TSNode self, uint32_t min,
uint32_t max) {
return ts_node__descendant_for_char_range(self, min, max, false);
}
TSNode ts_node_descendant_for_byte_range(TSNode self, uint32_t min, uint32_t max) {
return ts_node__descendant_for_byte_range(self, min, max, true);
}

View file

@ -3,6 +3,6 @@
#include "runtime/tree.h"
TSNode ts_node_make(const Tree *, uint32_t character, uint32_t byte, uint32_t row);
TSNode ts_node_make(const Tree *, uint32_t byte, uint32_t row);
#endif

View file

@ -341,7 +341,7 @@ static Tree *parser__lex(Parser *self, StackVersion version, TSStateId parse_sta
&self->lexer.data,
valid_external_tokens
)) {
if (length_has_unknown_chars(self->lexer.token_end_position)) {
if (length_is_undefined(self->lexer.token_end_position)) {
self->lexer.token_end_position = self->lexer.current_position;
}
@ -367,7 +367,7 @@ static Tree *parser__lex(Parser *self, StackVersion version, TSStateId parse_sta
);
ts_lexer_start(&self->lexer);
if (self->language->lex_fn(&self->lexer.data, lex_mode.lex_state)) {
if (length_has_unknown_chars(self->lexer.token_end_position)) {
if (length_is_undefined(self->lexer.token_end_position)) {
self->lexer.token_end_position = self->lexer.current_position;
}
break;
@ -912,7 +912,7 @@ static void parser__recover(Parser *self, StackVersion version, Tree *lookahead)
unsigned new_cost =
depth * ERROR_COST_PER_SKIPPED_TREE +
(position.chars - entry.position.chars) * ERROR_COST_PER_SKIPPED_CHAR +
(position.bytes - entry.position.bytes) * ERROR_COST_PER_SKIPPED_CHAR +
(position.extent.row - entry.position.extent.row) * ERROR_COST_PER_SKIPPED_LINE;
if (parser__better_version_exists(self, version, false, new_cost)) break;

View file

@ -134,11 +134,11 @@ static StackNode *stack_node_new(StackNode *previous_node, Tree *tree, bool is_p
if (state == ERROR_STATE && !tree->extra) {
node->error_cost +=
ERROR_COST_PER_SKIPPED_TREE * ((tree->visible || tree->child_count == 0) ? 1 : tree->visible_child_count) +
ERROR_COST_PER_SKIPPED_CHAR * tree->size.chars +
ERROR_COST_PER_SKIPPED_CHAR * tree->size.bytes +
ERROR_COST_PER_SKIPPED_LINE * tree->size.extent.row;
if (previous_node->links[0].tree) {
node->error_cost +=
ERROR_COST_PER_SKIPPED_CHAR * tree->padding.chars +
ERROR_COST_PER_SKIPPED_CHAR * tree->padding.bytes +
ERROR_COST_PER_SKIPPED_LINE * tree->padding.extent.row;
}
}
@ -568,7 +568,7 @@ bool ts_stack_can_merge(Stack *self, StackVersion version1, StackVersion version
return
!head1->is_halted && !head2->is_halted &&
head1->node->state == head2->node->state &&
head1->node->position.chars == head2->node->position.chars &&
head1->node->position.bytes == head2->node->position.bytes &&
head1->node->depth == head2->node->depth &&
ts_tree_external_token_state_eq(head1->last_external_token, head2->last_external_token);
}

View file

@ -8,7 +8,7 @@ typedef struct {
uint32_t length;
} TSStringInput;
const char *ts_string_input_read(void *payload, uint32_t *bytes_read) {
static const char *ts_string_input__read(void *payload, uint32_t *bytes_read) {
TSStringInput *input = (TSStringInput *)payload;
if (input->position >= input->length) {
*bytes_read = 0;
@ -20,7 +20,7 @@ const char *ts_string_input_read(void *payload, uint32_t *bytes_read) {
return input->string + previous_position;
}
int ts_string_input_seek(void *payload, uint32_t character, uint32_t byte) {
static int ts_string_input__seek(void *payload, uint32_t byte) {
TSStringInput *input = (TSStringInput *)payload;
input->position = byte;
return (byte < input->length);
@ -40,12 +40,11 @@ TSInput ts_string_input_make_with_length(const char *string, uint32_t length) {
input->length = length;
return (TSInput){
.payload = input,
.read = ts_string_input_read,
.seek = ts_string_input_seek,
.read = ts_string_input__read,
.seek = ts_string_input__seek,
.encoding = TSInputEncodingUTF8,
.measure_columns_in_bytes = false,
};
error:
return (TSInput){ NULL, NULL, NULL, TSInputEncodingUTF8, false };
return (TSInput){ NULL, NULL, NULL, TSInputEncodingUTF8 };
}

View file

@ -248,7 +248,7 @@ void ts_tree_set_children(Tree *self, uint32_t child_count, Tree **children,
}
if (self->symbol == ts_builtin_sym_error) {
self->error_cost += ERROR_COST_PER_SKIPPED_CHAR * self->size.chars +
self->error_cost += ERROR_COST_PER_SKIPPED_CHAR * self->size.bytes +
ERROR_COST_PER_SKIPPED_LINE * self->size.extent.row;
for (uint32_t i = 0; i < child_count; i++)
if (!self->children[i]->extra)
@ -408,6 +408,9 @@ bool ts_tree_invalidate_lookahead(Tree *self, uint32_t edit_byte_offset) {
return true;
}
static inline TSPoint ts_tree_total_extent(const Tree *self) {
return point_add(self->padding.extent, self->size.extent);
}
void ts_tree_edit(Tree *self, const TSInputEdit *edit) {
uint32_t old_end_byte = edit->start_byte + edit->bytes_removed;
@ -420,14 +423,12 @@ void ts_tree_edit(Tree *self, const TSInputEdit *edit) {
self->has_changes = true;
if (edit->start_byte < self->padding.bytes) {
length_set_unknown_chars(&self->padding);
if (self->padding.bytes >= old_end_byte) {
uint32_t trailing_padding_bytes = self->padding.bytes - old_end_byte;
TSPoint trailing_padding_extent = point_sub(self->padding.extent, old_end_point);
self->padding.bytes = new_end_byte + trailing_padding_bytes;
self->padding.extent = point_add(new_end_point, trailing_padding_extent);
} else {
length_set_unknown_chars(&self->size);
uint32_t removed_content_bytes = old_end_byte - self->padding.bytes;
TSPoint removed_content_extent = point_sub(old_end_point, self->padding.extent);
self->size.bytes = self->size.bytes - removed_content_bytes;
@ -436,11 +437,9 @@ void ts_tree_edit(Tree *self, const TSInputEdit *edit) {
self->padding.extent = new_end_point;
}
} else if (edit->start_byte == self->padding.bytes && edit->bytes_removed == 0) {
length_set_unknown_chars(&self->padding);
self->padding.bytes = self->padding.bytes + edit->bytes_added;
self->padding.extent = point_add(self->padding.extent, edit->extent_added);
} else {
length_set_unknown_chars(&self->size);
uint32_t trailing_content_bytes = ts_tree_total_bytes(self) - old_end_byte;
TSPoint trailing_content_extent = point_sub(ts_tree_total_extent(self), old_end_point);
self->size.bytes = new_end_byte + trailing_content_bytes - self->padding.bytes;
@ -545,7 +544,7 @@ static size_t ts_tree__write_to_string(const Tree *self, const TSLanguage *langu
}
if (visible) {
if (self->symbol == ts_builtin_sym_error && self->child_count == 0 && self->size.chars > 0) {
if (self->symbol == ts_builtin_sym_error && self->child_count == 0 && self->size.bytes > 0) {
cursor += snprintf(*writer, limit, "(UNEXPECTED ");
cursor += ts_tree__write_char_to_string(*writer, limit, self->lookahead_char);
} else {

View file

@ -116,10 +116,6 @@ static inline Length ts_tree_total_size(const Tree *self) {
return length_add(self->padding, self->size);
}
static inline TSPoint ts_tree_total_extent(const Tree *self) {
return point_add(self->padding.extent, self->size.extent);
}
static inline bool ts_tree_is_fragile(const Tree *tree) {
return tree->fragile_left || tree->fragile_right ||
ts_tree_total_bytes(tree) == 0;