Fix behavior of Lexer.get_column when at EOF
This commit is contained in:
parent
57036b4f8a
commit
e29d3714f7
4 changed files with 337 additions and 56 deletions
115
lib/src/lexer.c
115
lib/src/lexer.c
|
|
@ -102,6 +102,56 @@ static void ts_lexer__get_lookahead(Lexer *self) {
|
|||
}
|
||||
}
|
||||
|
||||
static void ts_lexer_goto(Lexer *self, Length position) {
|
||||
self->current_position = position;
|
||||
bool found_included_range = false;
|
||||
|
||||
// Move to the first valid position at or after the given position.
|
||||
for (unsigned i = 0; i < self->included_range_count; i++) {
|
||||
TSRange *included_range = &self->included_ranges[i];
|
||||
if (included_range->end_byte > position.bytes) {
|
||||
if (included_range->start_byte > position.bytes) {
|
||||
self->current_position = (Length) {
|
||||
.bytes = included_range->start_byte,
|
||||
.extent = included_range->start_point,
|
||||
};
|
||||
}
|
||||
|
||||
self->current_included_range_index = i;
|
||||
found_included_range = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (found_included_range) {
|
||||
// If the current position is outside of the current chunk of text,
|
||||
// then clear out the current chunk of text.
|
||||
if (self->chunk && (
|
||||
position.bytes < self->chunk_start ||
|
||||
position.bytes >= self->chunk_start + self->chunk_size
|
||||
)) {
|
||||
ts_lexer__clear_chunk(self);
|
||||
}
|
||||
|
||||
self->lookahead_size = 0;
|
||||
self->data.lookahead = '\0';
|
||||
}
|
||||
|
||||
// If the given position is beyond any of included ranges, move to the EOF
|
||||
// state - past the end of the included ranges.
|
||||
else {
|
||||
self->current_included_range_index = self->included_range_count;
|
||||
TSRange *last_included_range = &self->included_ranges[self->included_range_count - 1];
|
||||
self->current_position = (Length) {
|
||||
.bytes = last_included_range->end_byte,
|
||||
.extent = last_included_range->end_point,
|
||||
};
|
||||
ts_lexer__clear_chunk(self);
|
||||
self->lookahead_size = 1;
|
||||
self->data.lookahead = '\0';
|
||||
}
|
||||
}
|
||||
|
||||
// Advance to the next character in the source code, retrieving a new
|
||||
// chunk of source code if needed.
|
||||
static void ts_lexer__advance(TSLexer *_self, bool skip) {
|
||||
|
|
@ -185,12 +235,15 @@ static uint32_t ts_lexer__get_column(TSLexer *_self) {
|
|||
Lexer *self = (Lexer *)_self;
|
||||
uint32_t goal_byte = self->current_position.bytes;
|
||||
|
||||
self->current_position.bytes -= self->current_position.extent.column;
|
||||
self->current_position.extent.column = 0;
|
||||
|
||||
if (self->current_position.bytes < self->chunk_start) {
|
||||
ts_lexer__get_chunk(self);
|
||||
}
|
||||
ts_lexer_goto(self, (Length) {
|
||||
.bytes = self->current_position.bytes - self->current_position.extent.column,
|
||||
.extent = {
|
||||
.row = self->current_position.extent.row,
|
||||
.column = 0,
|
||||
}
|
||||
});
|
||||
if (!self->chunk_size) ts_lexer__get_chunk(self);
|
||||
if (!self->lookahead_size) ts_lexer__get_lookahead(self);
|
||||
|
||||
uint32_t result = 0;
|
||||
while (self->current_position.bytes < goal_byte) {
|
||||
|
|
@ -247,56 +300,6 @@ void ts_lexer_delete(Lexer *self) {
|
|||
ts_free(self->included_ranges);
|
||||
}
|
||||
|
||||
static void ts_lexer_goto(Lexer *self, Length position) {
|
||||
self->current_position = position;
|
||||
bool found_included_range = false;
|
||||
|
||||
// Move to the first valid position at or after the given position.
|
||||
for (unsigned i = 0; i < self->included_range_count; i++) {
|
||||
TSRange *included_range = &self->included_ranges[i];
|
||||
if (included_range->end_byte > position.bytes) {
|
||||
if (included_range->start_byte > position.bytes) {
|
||||
self->current_position = (Length) {
|
||||
.bytes = included_range->start_byte,
|
||||
.extent = included_range->start_point,
|
||||
};
|
||||
}
|
||||
|
||||
self->current_included_range_index = i;
|
||||
found_included_range = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (found_included_range) {
|
||||
// If the current position is outside of the current chunk of text,
|
||||
// then clear out the current chunk of text.
|
||||
if (self->chunk && (
|
||||
position.bytes < self->chunk_start ||
|
||||
position.bytes >= self->chunk_start + self->chunk_size
|
||||
)) {
|
||||
ts_lexer__clear_chunk(self);
|
||||
}
|
||||
|
||||
self->lookahead_size = 0;
|
||||
self->data.lookahead = '\0';
|
||||
}
|
||||
|
||||
// If the given position is beyond any of included ranges, move to the EOF
|
||||
// state - past the end of the included ranges.
|
||||
else {
|
||||
self->current_included_range_index = self->included_range_count;
|
||||
TSRange *last_included_range = &self->included_ranges[self->included_range_count - 1];
|
||||
self->current_position = (Length) {
|
||||
.bytes = last_included_range->end_byte,
|
||||
.extent = last_included_range->end_point,
|
||||
};
|
||||
ts_lexer__clear_chunk(self);
|
||||
self->lookahead_size = 1;
|
||||
self->data.lookahead = '\0';
|
||||
}
|
||||
}
|
||||
|
||||
void ts_lexer_set_input(Lexer *self, TSInput input) {
|
||||
self->input = input;
|
||||
ts_lexer__clear_chunk(self);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue