From d07f864815ecb1e0f1f0bab17fec80438eb4c455 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 11 Nov 2022 16:34:57 -0800
Subject: [PATCH] Fix parse error when reusing a node at the end of an included
 range

---
 cli/src/tests/parser_test.rs | 24 ++++++++++++++----
 lib/src/lexer.c              | 47 +++++++++++++++++++++---------------
 2 files changed, 46 insertions(+), 25 deletions(-)

diff --git a/cli/src/tests/parser_test.rs b/cli/src/tests/parser_test.rs
index 0a6bf68d..cf3b6437 100644
--- a/cli/src/tests/parser_test.rs
+++ b/cli/src/tests/parser_test.rs
@@ -951,7 +951,9 @@ fn test_parsing_with_included_range_containing_mismatched_positions() {
 
     parser.set_included_ranges(&[range_to_parse]).unwrap();
 
-    let html_tree = parser.parse(source_code, None).unwrap();
+    let html_tree = parser
+        .parse_with(&mut chunked_input(source_code, 3), None)
+        .unwrap();
 
     assert_eq!(html_tree.root_node().range(), range_to_parse);
 
@@ -1078,7 +1080,9 @@ fn test_parsing_with_a_newly_excluded_range() {
     // Parse HTML including the template directive, which will cause an error
     let mut parser = Parser::new();
     parser.set_language(get_language("html")).unwrap();
-    let mut first_tree = parser.parse(&source_code, None).unwrap();
+    let mut first_tree = parser
+        .parse_with(&mut chunked_input(&source_code, 3), None)
+        .unwrap();
 
     // Insert code at the beginning of the document.
     let prefix = "a very very long line of plain text. ";
@@ -1113,7 +1117,9 @@ fn test_parsing_with_a_newly_excluded_range() {
             },
         ])
         .unwrap();
-    let tree = parser.parse(&source_code, Some(&first_tree)).unwrap();
+    let tree = parser
+        .parse_with(&mut chunked_input(&source_code, 3), Some(&first_tree))
+        .unwrap();
 
     assert_eq!(
         tree.root_node().to_sexp(),
@@ -1164,7 +1170,9 @@ fn test_parsing_with_a_newly_included_range() {
     parser
         .set_included_ranges(&[simple_range(range1_start, range1_end)])
         .unwrap();
-    let tree = parser.parse(source_code, None).unwrap();
+    let tree = parser
+        .parse_with(&mut chunked_input(&source_code, 3), None)
+        .unwrap();
     assert_eq!(
         tree.root_node().to_sexp(),
         concat!(
@@ -1181,7 +1189,9 @@ fn test_parsing_with_a_newly_included_range() {
             simple_range(range3_start, range3_end),
         ])
         .unwrap();
-    let tree2 = parser.parse(&source_code, Some(&tree)).unwrap();
+    let tree2 = parser
+        .parse_with(&mut chunked_input(&source_code, 3), Some(&tree))
+        .unwrap();
     assert_eq!(
         tree2.root_node().to_sexp(),
         concat!(
@@ -1289,3 +1299,7 @@ fn simple_range(start: usize, end: usize) -> Range {
         end_point: Point::new(0, end),
     }
 }
+
+fn chunked_input<'a>(text: &'a str, size: usize) -> impl FnMut(usize, Point) -> &'a [u8] {
+    move |offset, _| text[offset..text.len().min(offset + size)].as_bytes()
+}
diff --git a/lib/src/lexer.c b/lib/src/lexer.c
index f2c10fbd..c75b7e56 100644
--- a/lib/src/lexer.c
+++ b/lib/src/lexer.c
@@ -104,13 +104,16 @@ static void ts_lexer__get_lookahead(Lexer *self) {
 
 static void ts_lexer_goto(Lexer *self, Length position) {
   self->current_position = position;
-  bool found_included_range = false;
 
   // Move to the first valid position at or after the given position.
+  bool found_included_range = false;
   for (unsigned i = 0; i < self->included_range_count; i++) {
     TSRange *included_range = &self->included_ranges[i];
-    if (included_range->end_byte > position.bytes) {
-      if (included_range->start_byte >= position.bytes) {
+    if (
+      included_range->end_byte > self->current_position.bytes &&
+      included_range->end_byte > included_range->start_byte
+    ) {
+      if (included_range->start_byte >= self->current_position.bytes) {
         self->current_position = (Length) {
           .bytes = included_range->start_byte,
           .extent = included_range->start_point,
@@ -127,8 +130,8 @@ static void ts_lexer_goto(Lexer *self, Length position) {
     // If the current position is outside of the current chunk of text,
     // then clear out the current chunk of text.
     if (self->chunk && (
-      position.bytes < self->chunk_start ||
-      position.bytes >= self->chunk_start + self->chunk_size
+      self->current_position.bytes < self->chunk_start ||
+      self->current_position.bytes >= self->chunk_start + self->chunk_size
     )) {
       ts_lexer__clear_chunk(self);
     }
@@ -164,27 +167,31 @@ static void ts_lexer__do_advance(Lexer *self, bool skip) {
     }
   }
 
-  const TSRange *current_range = NULL;
-  if (self->current_included_range_index < self->included_range_count) {
-    current_range = &self->included_ranges[self->current_included_range_index];
-    if (self->current_position.bytes == current_range->end_byte) {
-      self->current_included_range_index++;
-      if (self->current_included_range_index < self->included_range_count) {
-        current_range++;
-        self->current_position = (Length) {
-          current_range->start_byte,
-          current_range->start_point,
-        };
-      } else {
-        current_range = NULL;
-      }
+  const TSRange *current_range = &self->included_ranges[self->current_included_range_index];
+  while (
+    self->current_position.bytes >= current_range->end_byte ||
+    current_range->end_byte == current_range->start_byte
+  ) {
+    self->current_included_range_index++;
+    if (self->current_included_range_index < self->included_range_count) {
+      current_range++;
+      self->current_position = (Length) {
+        current_range->start_byte,
+        current_range->start_point,
+      };
+    } else {
+      current_range = NULL;
+      break;
     }
   }
 
   if (skip) self->token_start_position = self->current_position;
 
   if (current_range) {
-    if (self->current_position.bytes >= self->chunk_start + self->chunk_size) {
+    if (
+      self->current_position.bytes < self->chunk_start ||
+      self->current_position.bytes >= self->chunk_start + self->chunk_size
+    ) {
       ts_lexer__get_chunk(self);
     }
     ts_lexer__get_lookahead(self);