Fix parse error when reusing a node at the end of an included range

This commit is contained in:
Max Brunsfeld 2022-11-11 16:34:57 -08:00
parent 618146260e
commit d07f864815
2 changed files with 46 additions and 25 deletions

View file

@ -951,7 +951,9 @@ fn test_parsing_with_included_range_containing_mismatched_positions() {
parser.set_included_ranges(&[range_to_parse]).unwrap();
let html_tree = parser.parse(source_code, None).unwrap();
let html_tree = parser
.parse_with(&mut chunked_input(source_code, 3), None)
.unwrap();
assert_eq!(html_tree.root_node().range(), range_to_parse);
@ -1078,7 +1080,9 @@ fn test_parsing_with_a_newly_excluded_range() {
// Parse HTML including the template directive, which will cause an error
let mut parser = Parser::new();
parser.set_language(get_language("html")).unwrap();
let mut first_tree = parser.parse(&source_code, None).unwrap();
let mut first_tree = parser
.parse_with(&mut chunked_input(&source_code, 3), None)
.unwrap();
// Insert code at the beginning of the document.
let prefix = "a very very long line of plain text. ";
@ -1113,7 +1117,9 @@ fn test_parsing_with_a_newly_excluded_range() {
},
])
.unwrap();
let tree = parser.parse(&source_code, Some(&first_tree)).unwrap();
let tree = parser
.parse_with(&mut chunked_input(&source_code, 3), Some(&first_tree))
.unwrap();
assert_eq!(
tree.root_node().to_sexp(),
@ -1164,7 +1170,9 @@ fn test_parsing_with_a_newly_included_range() {
parser
.set_included_ranges(&[simple_range(range1_start, range1_end)])
.unwrap();
let tree = parser.parse(source_code, None).unwrap();
let tree = parser
.parse_with(&mut chunked_input(&source_code, 3), None)
.unwrap();
assert_eq!(
tree.root_node().to_sexp(),
concat!(
@ -1181,7 +1189,9 @@ fn test_parsing_with_a_newly_included_range() {
simple_range(range3_start, range3_end),
])
.unwrap();
let tree2 = parser.parse(&source_code, Some(&tree)).unwrap();
let tree2 = parser
.parse_with(&mut chunked_input(&source_code, 3), Some(&tree))
.unwrap();
assert_eq!(
tree2.root_node().to_sexp(),
concat!(
@ -1289,3 +1299,7 @@ fn simple_range(start: usize, end: usize) -> Range {
end_point: Point::new(0, end),
}
}
fn chunked_input<'a>(text: &'a str, size: usize) -> impl FnMut(usize, Point) -> &'a [u8] {
move |offset, _| text[offset..text.len().min(offset + size)].as_bytes()
}

View file

@ -104,13 +104,16 @@ static void ts_lexer__get_lookahead(Lexer *self) {
static void ts_lexer_goto(Lexer *self, Length position) {
self->current_position = position;
bool found_included_range = false;
// Move to the first valid position at or after the given position.
bool found_included_range = false;
for (unsigned i = 0; i < self->included_range_count; i++) {
TSRange *included_range = &self->included_ranges[i];
if (included_range->end_byte > position.bytes) {
if (included_range->start_byte >= position.bytes) {
if (
included_range->end_byte > self->current_position.bytes &&
included_range->end_byte > included_range->start_byte
) {
if (included_range->start_byte >= self->current_position.bytes) {
self->current_position = (Length) {
.bytes = included_range->start_byte,
.extent = included_range->start_point,
@ -127,8 +130,8 @@ static void ts_lexer_goto(Lexer *self, Length position) {
// If the current position is outside of the current chunk of text,
// then clear out the current chunk of text.
if (self->chunk && (
position.bytes < self->chunk_start ||
position.bytes >= self->chunk_start + self->chunk_size
self->current_position.bytes < self->chunk_start ||
self->current_position.bytes >= self->chunk_start + self->chunk_size
)) {
ts_lexer__clear_chunk(self);
}
@ -164,27 +167,31 @@ static void ts_lexer__do_advance(Lexer *self, bool skip) {
}
}
const TSRange *current_range = NULL;
if (self->current_included_range_index < self->included_range_count) {
current_range = &self->included_ranges[self->current_included_range_index];
if (self->current_position.bytes == current_range->end_byte) {
self->current_included_range_index++;
if (self->current_included_range_index < self->included_range_count) {
current_range++;
self->current_position = (Length) {
current_range->start_byte,
current_range->start_point,
};
} else {
current_range = NULL;
}
const TSRange *current_range = &self->included_ranges[self->current_included_range_index];
while (
self->current_position.bytes >= current_range->end_byte ||
current_range->end_byte == current_range->start_byte
) {
self->current_included_range_index++;
if (self->current_included_range_index < self->included_range_count) {
current_range++;
self->current_position = (Length) {
current_range->start_byte,
current_range->start_point,
};
} else {
current_range = NULL;
break;
}
}
if (skip) self->token_start_position = self->current_position;
if (current_range) {
if (self->current_position.bytes >= self->chunk_start + self->chunk_size) {
if (
self->current_position.bytes < self->chunk_start ||
self->current_position.bytes >= self->chunk_start + self->chunk_size
) {
ts_lexer__get_chunk(self);
}
ts_lexer__get_lookahead(self);