From d07f864815ecb1e0f1f0bab17fec80438eb4c455 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 11 Nov 2022 16:34:57 -0800 Subject: [PATCH] Fix parse error when reusing a node at the end of an included range --- cli/src/tests/parser_test.rs | 24 ++++++++++++++---- lib/src/lexer.c | 47 +++++++++++++++++++++--------------- 2 files changed, 46 insertions(+), 25 deletions(-) diff --git a/cli/src/tests/parser_test.rs b/cli/src/tests/parser_test.rs index 0a6bf68d..cf3b6437 100644 --- a/cli/src/tests/parser_test.rs +++ b/cli/src/tests/parser_test.rs @@ -951,7 +951,9 @@ fn test_parsing_with_included_range_containing_mismatched_positions() { parser.set_included_ranges(&[range_to_parse]).unwrap(); - let html_tree = parser.parse(source_code, None).unwrap(); + let html_tree = parser + .parse_with(&mut chunked_input(source_code, 3), None) + .unwrap(); assert_eq!(html_tree.root_node().range(), range_to_parse); @@ -1078,7 +1080,9 @@ fn test_parsing_with_a_newly_excluded_range() { // Parse HTML including the template directive, which will cause an error let mut parser = Parser::new(); parser.set_language(get_language("html")).unwrap(); - let mut first_tree = parser.parse(&source_code, None).unwrap(); + let mut first_tree = parser + .parse_with(&mut chunked_input(&source_code, 3), None) + .unwrap(); // Insert code at the beginning of the document. let prefix = "a very very long line of plain text. "; @@ -1113,7 +1117,9 @@ fn test_parsing_with_a_newly_excluded_range() { }, ]) .unwrap(); - let tree = parser.parse(&source_code, Some(&first_tree)).unwrap(); + let tree = parser + .parse_with(&mut chunked_input(&source_code, 3), Some(&first_tree)) + .unwrap(); assert_eq!( tree.root_node().to_sexp(), @@ -1164,7 +1170,9 @@ fn test_parsing_with_a_newly_included_range() { parser .set_included_ranges(&[simple_range(range1_start, range1_end)]) .unwrap(); - let tree = parser.parse(source_code, None).unwrap(); + let tree = parser + .parse_with(&mut chunked_input(&source_code, 3), None) + .unwrap(); assert_eq!( tree.root_node().to_sexp(), concat!( @@ -1181,7 +1189,9 @@ fn test_parsing_with_a_newly_included_range() { simple_range(range3_start, range3_end), ]) .unwrap(); - let tree2 = parser.parse(&source_code, Some(&tree)).unwrap(); + let tree2 = parser + .parse_with(&mut chunked_input(&source_code, 3), Some(&tree)) + .unwrap(); assert_eq!( tree2.root_node().to_sexp(), concat!( @@ -1289,3 +1299,7 @@ fn simple_range(start: usize, end: usize) -> Range { end_point: Point::new(0, end), } } + +fn chunked_input<'a>(text: &'a str, size: usize) -> impl FnMut(usize, Point) -> &'a [u8] { + move |offset, _| text[offset..text.len().min(offset + size)].as_bytes() +} diff --git a/lib/src/lexer.c b/lib/src/lexer.c index f2c10fbd..c75b7e56 100644 --- a/lib/src/lexer.c +++ b/lib/src/lexer.c @@ -104,13 +104,16 @@ static void ts_lexer__get_lookahead(Lexer *self) { static void ts_lexer_goto(Lexer *self, Length position) { self->current_position = position; - bool found_included_range = false; // Move to the first valid position at or after the given position. + bool found_included_range = false; for (unsigned i = 0; i < self->included_range_count; i++) { TSRange *included_range = &self->included_ranges[i]; - if (included_range->end_byte > position.bytes) { - if (included_range->start_byte >= position.bytes) { + if ( + included_range->end_byte > self->current_position.bytes && + included_range->end_byte > included_range->start_byte + ) { + if (included_range->start_byte >= self->current_position.bytes) { self->current_position = (Length) { .bytes = included_range->start_byte, .extent = included_range->start_point, @@ -127,8 +130,8 @@ static void ts_lexer_goto(Lexer *self, Length position) { // If the current position is outside of the current chunk of text, // then clear out the current chunk of text. if (self->chunk && ( - position.bytes < self->chunk_start || - position.bytes >= self->chunk_start + self->chunk_size + self->current_position.bytes < self->chunk_start || + self->current_position.bytes >= self->chunk_start + self->chunk_size )) { ts_lexer__clear_chunk(self); } @@ -164,27 +167,31 @@ static void ts_lexer__do_advance(Lexer *self, bool skip) { } } - const TSRange *current_range = NULL; - if (self->current_included_range_index < self->included_range_count) { - current_range = &self->included_ranges[self->current_included_range_index]; - if (self->current_position.bytes == current_range->end_byte) { - self->current_included_range_index++; - if (self->current_included_range_index < self->included_range_count) { - current_range++; - self->current_position = (Length) { - current_range->start_byte, - current_range->start_point, - }; - } else { - current_range = NULL; - } + const TSRange *current_range = &self->included_ranges[self->current_included_range_index]; + while ( + self->current_position.bytes >= current_range->end_byte || + current_range->end_byte == current_range->start_byte + ) { + self->current_included_range_index++; + if (self->current_included_range_index < self->included_range_count) { + current_range++; + self->current_position = (Length) { + current_range->start_byte, + current_range->start_point, + }; + } else { + current_range = NULL; + break; } } if (skip) self->token_start_position = self->current_position; if (current_range) { - if (self->current_position.bytes >= self->chunk_start + self->chunk_size) { + if ( + self->current_position.bytes < self->chunk_start || + self->current_position.bytes >= self->chunk_start + self->chunk_size + ) { ts_lexer__get_chunk(self); } ts_lexer__get_lookahead(self);