Merge pull request #1581 from tlaplus-community/get-codepoint-column

`get_column` now counts codepoints instead of bytes
This commit is contained in:
Max Brunsfeld 2022-01-12 10:49:44 -08:00 committed by GitHub
commit 9df064c9fe
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 232 additions and 14 deletions

View file

@ -152,18 +152,8 @@ static void ts_lexer_goto(Lexer *self, Length position) {
}
}
// Advance to the next character in the source code, retrieving a new
// chunk of source code if needed.
static void ts_lexer__advance(TSLexer *_self, bool skip) {
Lexer *self = (Lexer *)_self;
if (!self->chunk) return;
if (skip) {
LOG("skip", self->data.lookahead);
} else {
LOG("consume", self->data.lookahead);
}
// Intended to be called only from functions that control logging.
static void ts_lexer__do_advance(Lexer *self, bool skip) {
if (self->lookahead_size) {
self->current_position.bytes += self->lookahead_size;
if (self->data.lookahead == '\n') {
@ -205,6 +195,21 @@ static void ts_lexer__advance(TSLexer *_self, bool skip) {
}
}
// Advance to the next character in the source code, retrieving a new
// chunk of source code if needed.
static void ts_lexer__advance(TSLexer *_self, bool skip) {
Lexer *self = (Lexer *)_self;
if (!self->chunk) return;
if (skip) {
LOG("skip", self->data.lookahead);
} else {
LOG("consume", self->data.lookahead);
}
ts_lexer__do_advance(self, skip);
}
// Mark that a token match has completed. This can be called multiple
// times if a longer match is found later.
static void ts_lexer__mark_end(TSLexer *_self) {
@ -233,8 +238,25 @@ static void ts_lexer__mark_end(TSLexer *_self) {
static uint32_t ts_lexer__get_column(TSLexer *_self) {
Lexer *self = (Lexer *)_self;
uint32_t goal_byte = self->current_position.bytes;
self->did_get_column = true;
return self->current_position.extent.column;
self->current_position.bytes -= self->current_position.extent.column;
self->current_position.extent.column = 0;
if (self->current_position.bytes < self->chunk_start) {
ts_lexer__get_chunk(self);
}
uint32_t result = 0;
ts_lexer__get_lookahead(self);
while (self->current_position.bytes < goal_byte && !ts_lexer__eof(_self) && self->chunk) {
ts_lexer__do_advance(self, false);
result++;
}
return result;
}
// Is the lexer at a boundary between two disjoint included ranges of