diff --git a/lib/src/lexer.c b/lib/src/lexer.c index c75b7e56..acaf3e8c 100644 --- a/lib/src/lexer.c +++ b/lib/src/lexer.c @@ -346,6 +346,13 @@ void ts_lexer_finish(Lexer *self, uint32_t *lookahead_end_byte) { ts_lexer__mark_end(&self->data); } + // If the token ended at an included range boundary, then its end position + // will have been reset to the end of the preceding range. Reset the start + // position to match. + if (self->token_end_position.bytes < self->token_start_position.bytes) { + self->token_start_position = self->token_end_position; + } + uint32_t current_lookahead_end_byte = self->current_position.bytes + 1; // In order to determine that a byte sequence is invalid UTF8 or UTF16, diff --git a/lib/src/parser.c b/lib/src/parser.c index 75305d4f..f186ce33 100644 --- a/lib/src/parser.c +++ b/lib/src/parser.c @@ -447,8 +447,14 @@ static Subtree ts_parser__lex( // avoid infinite loops which could otherwise occur, because the lexer is // looking for any possible token, instead of looking for the specific set of // tokens that are valid in some parse state. + // + // Note that it's possible that the token end position may be *before* the + // original position of the lexer because of the way that tokens are positioned + // at included range boundaries: when a token is terminated at the start of + // an included range, it is marked as ending at the *end* of the preceding + // included range. if ( - self->lexer.token_end_position.bytes == current_position.bytes && + self->lexer.token_end_position.bytes <= current_position.bytes && (error_mode || !ts_stack_has_advanced_since_error(self->stack, version)) && !external_scanner_state_changed ) { @@ -525,10 +531,6 @@ static Subtree ts_parser__lex( self->language ); } else { - if (self->lexer.token_end_position.bytes < self->lexer.token_start_position.bytes) { - self->lexer.token_start_position = self->lexer.token_end_position; - } - bool is_keyword = false; TSSymbol symbol = self->lexer.data.result_symbol; Length padding = length_sub(self->lexer.token_start_position, start_position);