From 5cbd50c7d72677618037b1f7264af21d8b4967c1 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 29 Aug 2017 14:44:24 -0700 Subject: [PATCH] Remember how far ahead the lexer looked on failed calls This needs to be included in the 'bytes_scanned' property of the token that is ultimately produced. --- src/runtime/parser.c | 44 ++++++++++++++++++++++++++++---------------- 1 file changed, 28 insertions(+), 16 deletions(-) diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 66e83596..c89a46a8 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -265,6 +265,7 @@ static void parser__restore_external_scanner(Parser *self, Tree *external_token) static Tree *parser__lex(Parser *self, StackVersion version) { TSStateId parse_state = ts_stack_top_state(self->stack, version); Length start_position = ts_stack_top_position(self->stack, version); + Tree *external_token = ts_stack_last_external_token(self->stack, version); TSLexMode lex_mode = self->language->lex_modes[parse_state]; const bool *valid_external_tokens = ts_language_enabled_external_tokens( self->language, @@ -275,46 +276,51 @@ static Tree *parser__lex(Parser *self, StackVersion version) { bool skipped_error = false; int32_t first_error_character = 0; Length error_start_position, error_end_position; + uint32_t last_byte_scanned = start_position.bytes; ts_lexer_reset(&self->lexer, start_position); for (;;) { Length current_position = self->lexer.current_position; if (valid_external_tokens) { - LOG("lex_external state:%d, row:%u, column:%u", lex_mode.external_lex_state, - current_position.extent.row, current_position.extent.column); - - Tree *external_token = ts_stack_last_external_token(self->stack, version); - parser__restore_external_scanner(self, external_token); - + LOG( + "lex_external state:%d, row:%u, column:%u", + lex_mode.external_lex_state, + current_position.extent.row, + current_position.extent.column + ); ts_lexer_start(&self->lexer); - found_external_token = self->language->external_scanner.scan( + parser__restore_external_scanner(self, external_token); + if (self->language->external_scanner.scan( self->external_scanner_payload, &self->lexer.data, valid_external_tokens - ); - - if (found_external_token) { + )) { if (length_has_unknown_chars(self->lexer.token_end_position)) { self->lexer.token_end_position = self->lexer.current_position; } - // Don't allow zero-length external tokens durring error recovery. if (lex_mode.lex_state == ERROR_STATE && self->lexer.token_end_position.bytes <= current_position.bytes) { LOG("disregard_empty_token"); - parser__restore_external_scanner(self, external_token); - found_external_token = false; } else { + found_external_token = true; break; } } + if (self->lexer.current_position.bytes > last_byte_scanned) { + last_byte_scanned = self->lexer.current_position.bytes; + } ts_lexer_reset(&self->lexer, current_position); } - LOG("lex_internal state:%d, row:%u, column:%u", lex_mode.lex_state, - current_position.extent.row, current_position.extent.column); + LOG( + "lex_internal state:%d, row:%u, column:%u", + lex_mode.lex_state, + current_position.extent.row, + current_position.extent.column + ); ts_lexer_start(&self->lexer); if (self->language->lex_fn(&self->lexer.data, lex_mode.lex_state)) { if (length_has_unknown_chars(self->lexer.token_end_position)) { @@ -330,6 +336,9 @@ static Tree *parser__lex(Parser *self, StackVersion version) { self->language, lex_mode.external_lex_state ); + if (self->lexer.current_position.bytes > last_byte_scanned) { + last_byte_scanned = self->lexer.current_position.bytes; + } ts_lexer_reset(&self->lexer, start_position); continue; } @@ -378,7 +387,10 @@ static Tree *parser__lex(Parser *self, StackVersion version) { } } - result->bytes_scanned = self->lexer.current_position.bytes - start_position.bytes + 1; + if (self->lexer.current_position.bytes > last_byte_scanned) { + last_byte_scanned = self->lexer.current_position.bytes; + } + result->bytes_scanned = last_byte_scanned - start_position.bytes + 1; result->parse_state = parse_state; result->first_leaf.lex_mode = lex_mode;