Merge pull request #1581 from tlaplus-community/get-codepoint-column
`get_column` now counts codepoints instead of bytes
This commit is contained in:
commit
9df064c9fe
6 changed files with 232 additions and 14 deletions
|
|
@ -152,18 +152,8 @@ static void ts_lexer_goto(Lexer *self, Length position) {
|
|||
}
|
||||
}
|
||||
|
||||
// Advance to the next character in the source code, retrieving a new
|
||||
// chunk of source code if needed.
|
||||
static void ts_lexer__advance(TSLexer *_self, bool skip) {
|
||||
Lexer *self = (Lexer *)_self;
|
||||
if (!self->chunk) return;
|
||||
|
||||
if (skip) {
|
||||
LOG("skip", self->data.lookahead);
|
||||
} else {
|
||||
LOG("consume", self->data.lookahead);
|
||||
}
|
||||
|
||||
// Intended to be called only from functions that control logging.
|
||||
static void ts_lexer__do_advance(Lexer *self, bool skip) {
|
||||
if (self->lookahead_size) {
|
||||
self->current_position.bytes += self->lookahead_size;
|
||||
if (self->data.lookahead == '\n') {
|
||||
|
|
@ -205,6 +195,21 @@ static void ts_lexer__advance(TSLexer *_self, bool skip) {
|
|||
}
|
||||
}
|
||||
|
||||
// Advance to the next character in the source code, retrieving a new
|
||||
// chunk of source code if needed.
|
||||
static void ts_lexer__advance(TSLexer *_self, bool skip) {
|
||||
Lexer *self = (Lexer *)_self;
|
||||
if (!self->chunk) return;
|
||||
|
||||
if (skip) {
|
||||
LOG("skip", self->data.lookahead);
|
||||
} else {
|
||||
LOG("consume", self->data.lookahead);
|
||||
}
|
||||
|
||||
ts_lexer__do_advance(self, skip);
|
||||
}
|
||||
|
||||
// Mark that a token match has completed. This can be called multiple
|
||||
// times if a longer match is found later.
|
||||
static void ts_lexer__mark_end(TSLexer *_self) {
|
||||
|
|
@ -233,8 +238,25 @@ static void ts_lexer__mark_end(TSLexer *_self) {
|
|||
|
||||
static uint32_t ts_lexer__get_column(TSLexer *_self) {
|
||||
Lexer *self = (Lexer *)_self;
|
||||
|
||||
uint32_t goal_byte = self->current_position.bytes;
|
||||
|
||||
self->did_get_column = true;
|
||||
return self->current_position.extent.column;
|
||||
self->current_position.bytes -= self->current_position.extent.column;
|
||||
self->current_position.extent.column = 0;
|
||||
|
||||
if (self->current_position.bytes < self->chunk_start) {
|
||||
ts_lexer__get_chunk(self);
|
||||
}
|
||||
|
||||
uint32_t result = 0;
|
||||
ts_lexer__get_lookahead(self);
|
||||
while (self->current_position.bytes < goal_byte && !ts_lexer__eof(_self) && self->chunk) {
|
||||
ts_lexer__do_advance(self, false);
|
||||
result++;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// Is the lexer at a boundary between two disjoint included ranges of
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue