fix(lib): advance the lookahead end byte by 4 when there's an invalid code point

This helps in the case where an edit was made in the middle of a code
point, but bytes 1-3 are valid, thus we could advance by at most 4 bytes
This commit is contained in:
Amaan Qureshi 2024-04-30 19:48:04 -04:00
parent 61d0395543
commit 4c083252ec
2 changed files with 24 additions and 1 deletions

View file

@ -365,7 +365,7 @@ void ts_lexer_finish(Lexer *self, uint32_t *lookahead_end_byte) {
// Therefore, the next byte *after* the current (invalid) character
// affects the interpretation of the current character.
if (self->data.lookahead == TS_DECODE_ERROR) {
current_lookahead_end_byte++;
current_lookahead_end_byte += 4; // the maximum number of bytes read to identify an invalid code point
}
if (current_lookahead_end_byte > *lookahead_end_byte) {