Merge pull request #1952 from tree-sitter/tree-included-ranges

Allow retrieving a tree's list of included ranges, fix some included range bugs
2022-11-14 16:54:58 -08:00 · 2022-11-14 16:54:58 -08:00 · 36b5b6c89e
commit 36b5b6c89e
parent fb5fbdd787 8e3dc7cd7a
13 changed files with 369 additions and 104 deletions
--- a/cli/src/tests/corpus_test.rs
+++ b/cli/src/tests/corpus_test.rs
@ -14,7 +14,7 @@ use crate::{
    util,
 };
 use std::fs;
-use tree_sitter::{LogType, Node, Parser, Tree};
+use tree_sitter::{LogType, Node, Parser, Point, Range, Tree};

 #[test]
 fn test_bash_corpus() {
@ -79,40 +79,49 @@ fn test_rust_corpus() {
 fn test_language_corpus(language_name: &str) {
    let grammars_dir = fixtures_dir().join("grammars");
    let error_corpus_dir = fixtures_dir().join("error_corpus");
+    let template_corpus_dir = fixtures_dir().join("template_corpus");
    let mut corpus_dir = grammars_dir.join(language_name).join("corpus");
    if !corpus_dir.is_dir() {
        corpus_dir = grammars_dir.join(language_name).join("test").join("corpus");
    }

    let error_corpus_file = error_corpus_dir.join(&format!("{}_errors.txt", language_name));
+    let template_corpus_file =
+        template_corpus_dir.join(&format!("{}_templates.txt", language_name));
    let main_tests = parse_tests(&corpus_dir).unwrap();
    let error_tests = parse_tests(&error_corpus_file).unwrap_or(TestEntry::default());
+    let template_tests = parse_tests(&template_corpus_file).unwrap_or(TestEntry::default());
    let mut tests = flatten_tests(main_tests);
    tests.extend(flatten_tests(error_tests));
+    tests.extend(flatten_tests(template_tests).into_iter().map(|mut t| {
+        t.template_delimiters = Some(("<%", "%>"));
+        t
+    }));

    let language = get_language(language_name);
    let mut failure_count = 0;
-    for (example_name, input, expected_output, has_fields) in tests {
-        println!("  {} example - {}", language_name, example_name);
+    for test in tests {
+        println!("  {} example - {}", language_name, test.name);

        let passed = allocations::record(|| {
            let mut log_session = None;
            let mut parser = get_parser(&mut log_session, "log.html");
            parser.set_language(language).unwrap();
+            set_included_ranges(&mut parser, &test.input, test.template_delimiters);

-            let tree = parser.parse(&input, None).unwrap();
+            let tree = parser.parse(&test.input, None).unwrap();
            let mut actual_output = tree.root_node().to_sexp();
-            if !has_fields {
+            if !test.has_fields {
                actual_output = strip_sexp_fields(actual_output);
            }

-            if actual_output != expected_output {
+            if actual_output != test.output {
                println!(
                    "Incorrect initial parse for {} - {}",
-                    language_name, example_name,
+                    language_name, test.name,
                );
                print_diff_key();
-                print_diff(&actual_output, &expected_output);
+                print_diff(&actual_output, &test.output);
                println!("");
                return false;
            }
@ -127,7 +136,7 @@ fn test_language_corpus(language_name: &str) {

        let mut parser = Parser::new();
        parser.set_language(language).unwrap();
-        let tree = parser.parse(&input, None).unwrap();
+        let tree = parser.parse(&test.input, None).unwrap();
        drop(parser);

        for trial in 0..*ITERATION_COUNT {
@ -138,7 +147,7 @@ fn test_language_corpus(language_name: &str) {
                let mut parser = get_parser(&mut log_session, "log.html");
                parser.set_language(language).unwrap();
                let mut tree = tree.clone();
-                let mut input = input.clone();
+                let mut input = test.input.clone();

                if *LOG_GRAPH_ENABLED {
                    eprintln!("{}\n", String::from_utf8_lossy(&input));
@ -158,6 +167,7 @@ fn test_language_corpus(language_name: &str) {
                    eprintln!("{}\n", String::from_utf8_lossy(&input));
                }

+                set_included_ranges(&mut parser, &input, test.template_delimiters);
                let mut tree2 = parser.parse(&input, Some(&tree)).unwrap();

                // Check that the new tree is consistent.
@ -178,21 +188,22 @@ fn test_language_corpus(language_name: &str) {
                    eprintln!("{}\n", String::from_utf8_lossy(&input));
                }

+                set_included_ranges(&mut parser, &test.input, test.template_delimiters);
                let tree3 = parser.parse(&input, Some(&tree2)).unwrap();

                // Verify that the final tree matches the expectation from the corpus.
                let mut actual_output = tree3.root_node().to_sexp();
-                if !has_fields {
+                if !test.has_fields {
                    actual_output = strip_sexp_fields(actual_output);
                }

-                if actual_output != expected_output {
+                if actual_output != test.output {
                    println!(
                        "Incorrect parse for {} - {} - seed {}",
-                        language_name, example_name, seed
+                        language_name, test.name, seed
                    );
                    print_diff_key();
-                    print_diff(&actual_output, &expected_output);
+                    print_diff(&actual_output, &test.output);
                    println!("");
                    return false;
                }
@ -293,23 +304,23 @@ fn test_feature_corpus_files() {
                eprintln!("test language: {:?}", language_name);
            }

-            for (name, input, expected_output, has_fields) in tests {
-                eprintln!("  example: {:?}", name);
+            for test in tests {
+                eprintln!("  example: {:?}", test.name);

                let passed = allocations::record(|| {
                    let mut log_session = None;
                    let mut parser = get_parser(&mut log_session, "log.html");
                    parser.set_language(language).unwrap();
-                    let tree = parser.parse(&input, None).unwrap();
+                    let tree = parser.parse(&test.input, None).unwrap();
                    let mut actual_output = tree.root_node().to_sexp();
-                    if !has_fields {
+                    if !test.has_fields {
                        actual_output = strip_sexp_fields(actual_output);
                    }
-                    if actual_output == expected_output {
+                    if actual_output == test.output {
                        true
                    } else {
                        print_diff_key();
-                        print_diff(&actual_output, &expected_output);
+                        print_diff(&actual_output, &test.output);
                        println!("");
                        false
                    }
@ -390,6 +401,7 @@ fn check_changed_ranges(old_tree: &Tree, new_tree: &Tree, input: &Vec<u8>) -> Re

    let old_range = old_tree.root_node().range();
    let new_range = new_tree.root_node().range();
+
    let byte_range =
        old_range.start_byte.min(new_range.start_byte)..old_range.end_byte.max(new_range.end_byte);
    let point_range = old_range.start_point.min(new_range.start_point)
@ -407,6 +419,45 @@ fn check_changed_ranges(old_tree: &Tree, new_tree: &Tree, input: &Vec<u8>) -> Re
    old_scope_sequence.check_changes(&new_scope_sequence, &input, &changed_ranges)
 }

+fn set_included_ranges(parser: &mut Parser, input: &[u8], delimiters: Option<(&str, &str)>) {
+    if let Some((start, end)) = delimiters {
+        let mut ranges = Vec::new();
+        let mut ix = 0;
+        while ix < input.len() {
+            let Some(mut start_ix) = input[ix..].windows(2).position(|win| win == start.as_bytes()) else { break };
+            start_ix += ix + start.len();
+            let end_ix = input[start_ix..]
+                .windows(2)
+                .position(|win| win == end.as_bytes())
+                .map_or(input.len(), |ix| start_ix + ix);
+            ix = end_ix;
+            ranges.push(Range {
+                start_byte: start_ix,
+                end_byte: end_ix,
+                start_point: point_for_offset(input, start_ix),
+                end_point: point_for_offset(input, end_ix),
+            });
+        }
+
+        parser.set_included_ranges(&ranges).unwrap();
+    } else {
+        parser.set_included_ranges(&[]).unwrap();
+    }
+}
+
+fn point_for_offset(text: &[u8], offset: usize) -> Point {
+    let mut point = Point::default();
+    for byte in &text[..offset] {
+        if *byte == b'\n' {
+            point.row += 1;
+            point.column = 0;
+        } else {
+            point.column += 1;
+        }
+    }
+    point
+}
+
 fn get_parser(session: &mut Option<util::LogSession>, log_filename: &str) -> Parser {
    let mut parser = Parser::new();

@ -425,13 +476,16 @@ fn get_parser(session: &mut Option<util::LogSession>, log_filename: &str) -> Par
    parser
 }

-fn flatten_tests(test: TestEntry) -> Vec<(String, Vec<u8>, String, bool)> {
-    fn helper(
-        test: TestEntry,
-        is_root: bool,
-        prefix: &str,
-        result: &mut Vec<(String, Vec<u8>, String, bool)>,
-    ) {
+struct FlattenedTest {
+    name: String,
+    input: Vec<u8>,
+    output: String,
+    has_fields: bool,
+    template_delimiters: Option<(&'static str, &'static str)>,
+}
+
+fn flatten_tests(test: TestEntry) -> Vec<FlattenedTest> {
+    fn helper(test: TestEntry, is_root: bool, prefix: &str, result: &mut Vec<FlattenedTest>) {
        match test {
            TestEntry::Example {
                mut name,
@ -448,7 +502,13 @@ fn flatten_tests(test: TestEntry) -> Vec<(String, Vec<u8>, String, bool)> {
                        return;
                    }
                }
-                result.push((name, input, output, has_fields));
+                result.push(FlattenedTest {
+                    name,
+                    input,
+                    output,
+                    has_fields,
+                    template_delimiters: None,
+                });
            }
            TestEntry::Group {
                mut name, children, ..
--- a/cli/src/tests/helpers/random.rs
+++ b/cli/src/tests/helpers/random.rs
@ -4,7 +4,7 @@ use rand::{
 };

 const OPERATORS: &[char] = &[
-    '+', '-', '<', '>', '(', ')', '*', '/', '&', '|', '!', ',', '.',
+    '+', '-', '<', '>', '(', ')', '*', '/', '&', '|', '!', ',', '.', '%',
 ];

 pub struct Rand(StdRng);
--- a/cli/src/tests/helpers/scope_sequence.rs
+++ b/cli/src/tests/helpers/scope_sequence.rs
@ -44,20 +44,10 @@ impl ScopeSequence {
        text: &Vec<u8>,
        known_changed_ranges: &Vec<Range>,
    ) -> Result<(), String> {
-        if self.0.len() != text.len() {
-            panic!(
-                "Inconsistent scope sequence: {:?}",
-                self.0
-                    .iter()
-                    .zip(text.iter().map(|c| *c as char))
-                    .collect::<Vec<_>>()
-            );
-        }
-
-        assert_eq!(self.0.len(), other.0.len());
        let mut position = Point { row: 0, column: 0 };
-        for (i, stack) in self.0.iter().enumerate() {
-            let other_stack = &other.0[i];
+        for i in 0..(self.0.len().max(other.0.len())) {
+            let stack = &self.0.get(i);
+            let other_stack = &other.0.get(i);
            if *stack != *other_stack && ![b'\r', b'\n'].contains(&text[i]) {
                let containing_range = known_changed_ranges
                    .iter()
--- a/cli/src/tests/parser_test.rs
+++ b/cli/src/tests/parser_test.rs
@ -829,6 +829,7 @@ fn test_parsing_with_one_included_range() {
        js_tree.root_node().start_position(),
        Point::new(0, source_code.find("console").unwrap())
    );
+    assert_eq!(js_tree.included_ranges(), &[script_content_node.range()]);
 }

 #[test]
@ -853,28 +854,27 @@ fn test_parsing_with_multiple_included_ranges() {
    let close_quote_node = template_string_node.child(3).unwrap();

    parser.set_language(get_language("html")).unwrap();
-    parser
-        .set_included_ranges(&[
-            Range {
-                start_byte: open_quote_node.end_byte(),
-                start_point: open_quote_node.end_position(),
-                end_byte: interpolation_node1.start_byte(),
-                end_point: interpolation_node1.start_position(),
-            },
-            Range {
-                start_byte: interpolation_node1.end_byte(),
-                start_point: interpolation_node1.end_position(),
-                end_byte: interpolation_node2.start_byte(),
-                end_point: interpolation_node2.start_position(),
-            },
-            Range {
-                start_byte: interpolation_node2.end_byte(),
-                start_point: interpolation_node2.end_position(),
-                end_byte: close_quote_node.start_byte(),
-                end_point: close_quote_node.start_position(),
-            },
-        ])
-        .unwrap();
+    let html_ranges = &[
+        Range {
+            start_byte: open_quote_node.end_byte(),
+            start_point: open_quote_node.end_position(),
+            end_byte: interpolation_node1.start_byte(),
+            end_point: interpolation_node1.start_position(),
+        },
+        Range {
+            start_byte: interpolation_node1.end_byte(),
+            start_point: interpolation_node1.end_position(),
+            end_byte: interpolation_node2.start_byte(),
+            end_point: interpolation_node2.start_position(),
+        },
+        Range {
+            start_byte: interpolation_node2.end_byte(),
+            start_point: interpolation_node2.end_position(),
+            end_byte: close_quote_node.start_byte(),
+            end_point: close_quote_node.start_position(),
+        },
+    ];
+    parser.set_included_ranges(html_ranges).unwrap();
    let html_tree = parser.parse(source_code, None).unwrap();

    assert_eq!(
@ -888,6 +888,7 @@ fn test_parsing_with_multiple_included_ranges() {
            " (end_tag (tag_name))))",
        )
    );
+    assert_eq!(html_tree.included_ranges(), html_ranges);

    let div_element_node = html_tree.root_node().child(0).unwrap();
    let hello_text_node = div_element_node.child(1).unwrap();
@ -950,7 +951,9 @@ fn test_parsing_with_included_range_containing_mismatched_positions() {

    parser.set_included_ranges(&[range_to_parse]).unwrap();

-    let html_tree = parser.parse(source_code, None).unwrap();
+    let html_tree = parser
+        .parse_with(&mut chunked_input(source_code, 3), None)
+        .unwrap();

    assert_eq!(html_tree.root_node().range(), range_to_parse);

@ -1077,7 +1080,9 @@ fn test_parsing_with_a_newly_excluded_range() {
    // Parse HTML including the template directive, which will cause an error
    let mut parser = Parser::new();
    parser.set_language(get_language("html")).unwrap();
-    let mut first_tree = parser.parse(&source_code, None).unwrap();
+    let mut first_tree = parser
+        .parse_with(&mut chunked_input(&source_code, 3), None)
+        .unwrap();

    // Insert code at the beginning of the document.
    let prefix = "a very very long line of plain text. ";
@ -1112,7 +1117,9 @@ fn test_parsing_with_a_newly_excluded_range() {
            },
        ])
        .unwrap();
-    let tree = parser.parse(&source_code, Some(&first_tree)).unwrap();
+    let tree = parser
+        .parse_with(&mut chunked_input(&source_code, 3), Some(&first_tree))
+        .unwrap();

    assert_eq!(
        tree.root_node().to_sexp(),
@ -1163,7 +1170,9 @@ fn test_parsing_with_a_newly_included_range() {
    parser
        .set_included_ranges(&[simple_range(range1_start, range1_end)])
        .unwrap();
-    let tree = parser.parse(source_code, None).unwrap();
+    let tree = parser
+        .parse_with(&mut chunked_input(&source_code, 3), None)
+        .unwrap();
    assert_eq!(
        tree.root_node().to_sexp(),
        concat!(
@ -1180,7 +1189,9 @@ fn test_parsing_with_a_newly_included_range() {
            simple_range(range3_start, range3_end),
        ])
        .unwrap();
-    let tree2 = parser.parse(&source_code, Some(&tree)).unwrap();
+    let tree2 = parser
+        .parse_with(&mut chunked_input(&source_code, 3), Some(&tree))
+        .unwrap();
    assert_eq!(
        tree2.root_node().to_sexp(),
        concat!(
@ -1288,3 +1299,7 @@ fn simple_range(start: usize, end: usize) -> Range {
        end_point: Point::new(0, end),
    }
 }
+
+fn chunked_input<'a>(text: &'a str, size: usize) -> impl FnMut(usize, Point) -> &'a [u8] {
+    move |offset, _| text[offset..text.len().min(offset + size)].as_bytes()
+}
--- a/cli/src/tests/tree_test.rs
+++ b/cli/src/tests/tree_test.rs
@ -232,6 +232,71 @@ fn test_tree_edit() {
    }
 }

+#[test]
+fn test_tree_edit_with_included_ranges() {
+    let mut parser = Parser::new();
+    parser.set_language(get_language("html")).unwrap();
+
+    let source = "<div><% if a %><span>a</span><% else %><span>b</span><% end %></div>";
+
+    let ranges = [0..5, 15..29, 39..53, 62..68];
+
+    parser
+        .set_included_ranges(
+            &ranges
+                .iter()
+                .map(|range| Range {
+                    start_byte: range.start,
+                    end_byte: range.end,
+                    start_point: Point::new(0, range.start),
+                    end_point: Point::new(0, range.end),
+                })
+                .collect::<Vec<_>>(),
+        )
+        .unwrap();
+
+    let mut tree = parser.parse(source, None).unwrap();
+
+    tree.edit(&InputEdit {
+        start_byte: 29,
+        old_end_byte: 53,
+        new_end_byte: 29,
+        start_position: Point::new(0, 29),
+        old_end_position: Point::new(0, 53),
+        new_end_position: Point::new(0, 29),
+    });
+
+    assert_eq!(
+        tree.included_ranges(),
+        &[
+            Range {
+                start_byte: 0,
+                end_byte: 5,
+                start_point: Point::new(0, 0),
+                end_point: Point::new(0, 5),
+            },
+            Range {
+                start_byte: 15,
+                end_byte: 29,
+                start_point: Point::new(0, 15),
+                end_point: Point::new(0, 29),
+            },
+            Range {
+                start_byte: 29,
+                end_byte: 29,
+                start_point: Point::new(0, 29),
+                end_point: Point::new(0, 29),
+            },
+            Range {
+                start_byte: 38,
+                end_byte: 44,
+                start_point: Point::new(0, 38),
+                end_point: Point::new(0, 44),
+            }
+        ]
+    );
+}
+
 #[test]
 fn test_tree_cursor() {
    let mut parser = Parser::new();
--- a/lib/binding_rust/bindings.rs
+++ b/lib/binding_rust/bindings.rs
@ -345,6 +345,9 @@ extern "C" {
    #[doc = " Get the language that was used to parse the syntax tree."]
    pub fn ts_tree_language(arg1: *const TSTree) -> *const TSLanguage;
 }
+extern "C" {
+    pub fn ts_tree_included_ranges(arg1: *const TSTree, length: *mut u32) -> *mut TSRange;
+}
 extern "C" {
    #[doc = " Edit the syntax tree to keep it in sync with source code that has been"]
    #[doc = " edited."]
--- a/lib/binding_rust/lib.rs
+++ b/lib/binding_rust/lib.rs
@ -763,6 +763,18 @@ impl Tree {
            util::CBufferIter::new(ptr, count as usize).map(|r| r.into())
        }
    }
+
+    /// Get the included ranges that were used to parse the syntax tree.
+    pub fn included_ranges(&self) -> Vec<Range> {
+        let mut count = 0u32;
+        unsafe {
+            let ptr = ffi::ts_tree_included_ranges(self.0.as_ptr(), &mut count as *mut u32);
+            let ranges = slice::from_raw_parts(ptr, count as usize);
+            let result = ranges.iter().copied().map(|range| range.into()).collect();
+            (FREE_FN)(ptr as *mut c_void);
+            result
+        }
+    }
 }

 impl fmt::Debug for Tree {
--- a/lib/include/tree_sitter/api.h
+++ b/lib/include/tree_sitter/api.h
@ -381,6 +381,13 @@ TSNode ts_tree_root_node_with_offset(
 */
 const TSLanguage *ts_tree_language(const TSTree *);

+/**
+ * Get the array of included ranges that was used to parse the syntax tree.
+ *
+ * The returned pointer must be freed by the caller.
+ */
+TSRange *ts_tree_included_ranges(const TSTree *, uint32_t *length);
+
 /**
 * Edit the syntax tree to keep it in sync with source code that has been
 * edited.
--- a/lib/src/lexer.c
+++ b/lib/src/lexer.c
@ -104,13 +104,16 @@ static void ts_lexer__get_lookahead(Lexer *self) {

 static void ts_lexer_goto(Lexer *self, Length position) {
  self->current_position = position;
-  bool found_included_range = false;

  // Move to the first valid position at or after the given position.
+  bool found_included_range = false;
  for (unsigned i = 0; i < self->included_range_count; i++) {
    TSRange *included_range = &self->included_ranges[i];
-    if (included_range->end_byte > position.bytes) {
-      if (included_range->start_byte >= position.bytes) {
+    if (
+      included_range->end_byte > self->current_position.bytes &&
+      included_range->end_byte > included_range->start_byte
+    ) {
+      if (included_range->start_byte >= self->current_position.bytes) {
        self->current_position = (Length) {
          .bytes = included_range->start_byte,
          .extent = included_range->start_point,
@ -127,8 +130,8 @@ static void ts_lexer_goto(Lexer *self, Length position) {
    // If the current position is outside of the current chunk of text,
    // then clear out the current chunk of text.
    if (self->chunk && (
-      position.bytes < self->chunk_start ||
-      position.bytes >= self->chunk_start + self->chunk_size
+      self->current_position.bytes < self->chunk_start ||
+      self->current_position.bytes >= self->chunk_start + self->chunk_size
    )) {
      ts_lexer__clear_chunk(self);
    }
@ -164,27 +167,31 @@ static void ts_lexer__do_advance(Lexer *self, bool skip) {
    }
  }

-  const TSRange *current_range = NULL;
-  if (self->current_included_range_index < self->included_range_count) {
-    current_range = &self->included_ranges[self->current_included_range_index];
-    if (self->current_position.bytes == current_range->end_byte) {
-      self->current_included_range_index++;
-      if (self->current_included_range_index < self->included_range_count) {
-        current_range++;
-        self->current_position = (Length) {
-          current_range->start_byte,
-          current_range->start_point,
-        };
-      } else {
-        current_range = NULL;
-      }
+  const TSRange *current_range = &self->included_ranges[self->current_included_range_index];
+  while (
+    self->current_position.bytes >= current_range->end_byte ||
+    current_range->end_byte == current_range->start_byte
+  ) {
+    self->current_included_range_index++;
+    if (self->current_included_range_index < self->included_range_count) {
+      current_range++;
+      self->current_position = (Length) {
+        current_range->start_byte,
+        current_range->start_point,
+      };
+    } else {
+      current_range = NULL;
+      break;
    }
  }

  if (skip) self->token_start_position = self->current_position;

  if (current_range) {
-    if (self->current_position.bytes >= self->chunk_start + self->chunk_size) {
+    if (
+      self->current_position.bytes < self->chunk_start ||
+      self->current_position.bytes >= self->chunk_start + self->chunk_size
+    ) {
      ts_lexer__get_chunk(self);
    }
    ts_lexer__get_lookahead(self);
@ -339,6 +346,13 @@ void ts_lexer_finish(Lexer *self, uint32_t *lookahead_end_byte) {
    ts_lexer__mark_end(&self->data);
  }

+  // If the token ended at an included range boundary, then its end position
+  // will have been reset to the end of the preceding range. Reset the start
+  // position to match.
+  if (self->token_end_position.bytes < self->token_start_position.bytes) {
+    self->token_start_position = self->token_end_position;
+  }
+
  uint32_t current_lookahead_end_byte = self->current_position.bytes + 1;

  // In order to determine that a byte sequence is invalid UTF8 or UTF16,
--- a/lib/src/parser.c
+++ b/lib/src/parser.c
@ -447,8 +447,14 @@ static Subtree ts_parser__lex(
        // avoid infinite loops which could otherwise occur, because the lexer is
        // looking for any possible token, instead of looking for the specific set of
        // tokens that are valid in some parse state.
+        //
+        // Note that it's possible that the token end position may be *before* the
+        // original position of the lexer because of the way that tokens are positioned
+        // at included range boundaries: when a token is terminated at the start of
+        // an included range, it is marked as ending at the *end* of the preceding
+        // included range.
        if (
-          self->lexer.token_end_position.bytes == current_position.bytes &&
+          self->lexer.token_end_position.bytes <= current_position.bytes &&
          (error_mode || !ts_stack_has_advanced_since_error(self->stack, version)) &&
          !external_scanner_state_changed
        ) {
@ -525,10 +531,6 @@ static Subtree ts_parser__lex(
      self->language
    );
  } else {
-    if (self->lexer.token_end_position.bytes < self->lexer.token_start_position.bytes) {
-      self->lexer.token_start_position = self->lexer.token_end_position;
-    }
-
    bool is_keyword = false;
    TSSymbol symbol = self->lexer.data.result_symbol;
    Length padding = length_sub(self->lexer.token_start_position, start_position);
--- a/lib/src/tree.c
+++ b/lib/src/tree.c
@ -66,17 +66,23 @@ void ts_tree_edit(TSTree *self, const TSInputEdit *edit) {
          range->end_point = POINT_MAX;
        }
      }
-      if (range->start_byte >= edit->old_end_byte) {
-        range->start_byte = edit->new_end_byte + (range->start_byte - edit->old_end_byte);
-        range->start_point = point_add(
-          edit->new_end_point,
-          point_sub(range->start_point, edit->old_end_point)
-        );
-        if (range->start_byte < edit->new_end_byte) {
-          range->start_byte = UINT32_MAX;
-          range->start_point = POINT_MAX;
-        }
+    } else if (range->end_byte > edit->start_byte) {
+      range->end_byte = edit->start_byte;
+      range->end_point = edit->start_point;
+    }
+    if (range->start_byte >= edit->old_end_byte) {
+      range->start_byte = edit->new_end_byte + (range->start_byte - edit->old_end_byte);
+      range->start_point = point_add(
+        edit->new_end_point,
+        point_sub(range->start_point, edit->old_end_point)
+      );
+      if (range->start_byte < edit->new_end_byte) {
+        range->start_byte = UINT32_MAX;
+        range->start_point = POINT_MAX;
      }
+    } else if (range->start_byte > edit->start_byte) {
+      range->start_byte = edit->start_byte;
+      range->start_point = edit->start_point;
    }
  }

@ -85,6 +91,13 @@ void ts_tree_edit(TSTree *self, const TSInputEdit *edit) {
  ts_subtree_pool_delete(&pool);
 }

+TSRange *ts_tree_included_ranges(const TSTree *self, uint32_t *length) {
+  *length = self->included_range_count;
+  TSRange *ranges = ts_calloc(self->included_range_count, sizeof(TSRange));
+  memcpy(ranges, self->included_ranges, self->included_range_count * sizeof(TSRange));
+  return ranges;
+}
+
 TSRange *ts_tree_get_changed_ranges(const TSTree *self, const TSTree *other, uint32_t *count) {
  TreeCursor cursor1 = {NULL, array_new()};
  TreeCursor cursor2 = {NULL, array_new()};
--- a/test/fixtures/template_corpus/readme.md
+++ b/test/fixtures/template_corpus/readme.md
@ -0,0 +1,6 @@
+The Template Corpus
+===================
+
+This directory contains corpus tests that exercise parsing a set of disjoint ranges within a file.
+
+Each of these input files contains source code surrounded by the delimiters `<%` and `%>`. The content outside of these delimiters is meant to be ignored.
--- a/test/fixtures/template_corpus/ruby_templates.txt
+++ b/test/fixtures/template_corpus/ruby_templates.txt
@ -0,0 +1,78 @@
+==============================
+Templates with errors
+==============================
+
+<div>
+  <% if notice.present? %>
+    <p id="notice"><% notice %></p>
+  <% end %>
+  <div>
+    <h1>Foods</h1>
+    <div>
+      <% link_to 'New food', new_food_path, class: "block font-medium" %>
+      <% link_to 'Search Database', database_foods_search_path, class: "block font-medium" %>
+    </div>
+  </div>
+
+  <% . render partial: "form", locals: { food: @new_food } %>
+
+  <% form_with url: "/search", method: :get do |form| %>
+    <% form.label :previous_query, 'Search previous foods:' %>
+    <% form.text_field :previous_query %>
+    <% form.submit "Search" %>
+  <% end %>
+
+  <div id="recipes">
+    <% render @foods %>
+  </div>
+</div>
+
+---
+
+(program
+  (if
+    (call (identifier) (identifier))
+    (then (identifier)))
+  (call
+    (identifier)
+    (argument_list
+      (string (string_content))
+      (identifier)
+      (pair (hash_key_symbol) (string (string_content)))))
+  (call
+    (identifier)
+    (argument_list
+      (string (string_content))
+      (identifier)
+      (pair (hash_key_symbol) (string (string_content)))))
+  (ERROR)
+  (call
+    (identifier)
+    (argument_list
+      (pair (hash_key_symbol) (string (string_content)))
+      (pair (hash_key_symbol) (hash (pair (hash_key_symbol) (instance_variable))))))
+  (call
+    (identifier)
+    (argument_list
+      (pair (hash_key_symbol) (string (string_content)))
+      (pair (hash_key_symbol) (simple_symbol)))
+    (do_block
+      (block_parameters
+        (identifier))
+      (body_statement
+        (call
+          (identifier)
+          (identifier)
+          (argument_list (simple_symbol) (string (string_content))))
+  (call
+    (identifier)
+    (identifier)
+    (argument_list
+      (simple_symbol)))
+  (call
+    (identifier)
+    (identifier)
+    (argument_list (string (string_content)))))))
+  (call
+    (identifier)
+    (argument_list (instance_variable))))