Fix behavior of Lexer.get_column when at EOF

This commit is contained in:
Max Brunsfeld 2021-03-11 11:25:10 -08:00
parent 57036b4f8a
commit e29d3714f7
4 changed files with 337 additions and 56 deletions

View file

@ -102,6 +102,56 @@ static void ts_lexer__get_lookahead(Lexer *self) {
}
}
static void ts_lexer_goto(Lexer *self, Length position) {
self->current_position = position;
bool found_included_range = false;
// Move to the first valid position at or after the given position.
for (unsigned i = 0; i < self->included_range_count; i++) {
TSRange *included_range = &self->included_ranges[i];
if (included_range->end_byte > position.bytes) {
if (included_range->start_byte > position.bytes) {
self->current_position = (Length) {
.bytes = included_range->start_byte,
.extent = included_range->start_point,
};
}
self->current_included_range_index = i;
found_included_range = true;
break;
}
}
if (found_included_range) {
// If the current position is outside of the current chunk of text,
// then clear out the current chunk of text.
if (self->chunk && (
position.bytes < self->chunk_start ||
position.bytes >= self->chunk_start + self->chunk_size
)) {
ts_lexer__clear_chunk(self);
}
self->lookahead_size = 0;
self->data.lookahead = '\0';
}
// If the given position is beyond any of included ranges, move to the EOF
// state - past the end of the included ranges.
else {
self->current_included_range_index = self->included_range_count;
TSRange *last_included_range = &self->included_ranges[self->included_range_count - 1];
self->current_position = (Length) {
.bytes = last_included_range->end_byte,
.extent = last_included_range->end_point,
};
ts_lexer__clear_chunk(self);
self->lookahead_size = 1;
self->data.lookahead = '\0';
}
}
// Advance to the next character in the source code, retrieving a new
// chunk of source code if needed.
static void ts_lexer__advance(TSLexer *_self, bool skip) {
@ -185,12 +235,15 @@ static uint32_t ts_lexer__get_column(TSLexer *_self) {
Lexer *self = (Lexer *)_self;
uint32_t goal_byte = self->current_position.bytes;
self->current_position.bytes -= self->current_position.extent.column;
self->current_position.extent.column = 0;
if (self->current_position.bytes < self->chunk_start) {
ts_lexer__get_chunk(self);
}
ts_lexer_goto(self, (Length) {
.bytes = self->current_position.bytes - self->current_position.extent.column,
.extent = {
.row = self->current_position.extent.row,
.column = 0,
}
});
if (!self->chunk_size) ts_lexer__get_chunk(self);
if (!self->lookahead_size) ts_lexer__get_lookahead(self);
uint32_t result = 0;
while (self->current_position.bytes < goal_byte) {
@ -247,56 +300,6 @@ void ts_lexer_delete(Lexer *self) {
ts_free(self->included_ranges);
}
static void ts_lexer_goto(Lexer *self, Length position) {
self->current_position = position;
bool found_included_range = false;
// Move to the first valid position at or after the given position.
for (unsigned i = 0; i < self->included_range_count; i++) {
TSRange *included_range = &self->included_ranges[i];
if (included_range->end_byte > position.bytes) {
if (included_range->start_byte > position.bytes) {
self->current_position = (Length) {
.bytes = included_range->start_byte,
.extent = included_range->start_point,
};
}
self->current_included_range_index = i;
found_included_range = true;
break;
}
}
if (found_included_range) {
// If the current position is outside of the current chunk of text,
// then clear out the current chunk of text.
if (self->chunk && (
position.bytes < self->chunk_start ||
position.bytes >= self->chunk_start + self->chunk_size
)) {
ts_lexer__clear_chunk(self);
}
self->lookahead_size = 0;
self->data.lookahead = '\0';
}
// If the given position is beyond any of included ranges, move to the EOF
// state - past the end of the included ranges.
else {
self->current_included_range_index = self->included_range_count;
TSRange *last_included_range = &self->included_ranges[self->included_range_count - 1];
self->current_position = (Length) {
.bytes = last_included_range->end_byte,
.extent = last_included_range->end_point,
};
ts_lexer__clear_chunk(self);
self->lookahead_size = 1;
self->data.lookahead = '\0';
}
}
void ts_lexer_set_input(Lexer *self, TSInput input) {
self->input = input;
ts_lexer__clear_chunk(self);