From 477b6677537e89c7bdff14ce84dad6d23a6415bb Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 18 Aug 2022 13:48:47 -0700 Subject: [PATCH 1/2] Add ts_tree_root_node_with_offset API --- cli/src/tests/node_test.rs | 28 ++++++++++++++++++++++++++++ lib/binding_rust/bindings.rs | 9 +++++++++ lib/binding_rust/lib.rs | 14 ++++++++++++++ lib/include/tree_sitter/api.h | 10 ++++++++++ lib/src/tree.c | 10 ++++++++++ 5 files changed, 71 insertions(+) diff --git a/cli/src/tests/node_test.rs b/cli/src/tests/node_test.rs index 776ca2c7..6d5ed61d 100644 --- a/cli/src/tests/node_test.rs +++ b/cli/src/tests/node_test.rs @@ -529,6 +529,34 @@ fn test_node_edit() { } } +#[test] +fn test_root_node_with_offset() { + let mut parser = Parser::new(); + parser.set_language(get_language("javascript")).unwrap(); + let tree = parser.parse(" if (a) b", None).unwrap(); + + let node = tree.root_node_with_offset(6, Point::new(2, 2)); + assert_eq!(node.byte_range(), 8..16); + assert_eq!(node.start_position(), Point::new(2, 4)); + assert_eq!(node.end_position(), Point::new(2, 12)); + + let child = node.child(0).unwrap().child(2).unwrap(); + assert_eq!(child.kind(), "expression_statement"); + assert_eq!(child.byte_range(), 15..16); + assert_eq!(child.start_position(), Point::new(2, 11)); + assert_eq!(child.end_position(), Point::new(2, 12)); + + let mut cursor = node.walk(); + cursor.goto_first_child(); + cursor.goto_first_child(); + cursor.goto_next_sibling(); + let child = cursor.node(); + assert_eq!(child.kind(), "parenthesized_expression"); + assert_eq!(child.byte_range(), 11..14); + assert_eq!(child.start_position(), Point::new(2, 7)); + assert_eq!(child.end_position(), Point::new(2, 10)); +} + #[test] fn test_node_is_extra() { let mut parser = Parser::new(); diff --git a/lib/binding_rust/bindings.rs b/lib/binding_rust/bindings.rs index 1447d09d..0266521d 100644 --- a/lib/binding_rust/bindings.rs +++ b/lib/binding_rust/bindings.rs @@ -332,6 +332,15 @@ extern "C" { #[doc = " Get the root node of the syntax tree."] pub fn ts_tree_root_node(self_: *const TSTree) -> TSNode; } +extern "C" { + #[doc = " Get the root node of the syntax tree, but with its position"] + #[doc = " shifted forward by the given offset."] + pub fn ts_tree_root_node_with_offset( + self_: *const TSTree, + offset_bytes: u32, + offset_point: TSPoint, + ) -> TSNode; +} extern "C" { #[doc = " Get the language that was used to parse the syntax tree."] pub fn ts_tree_language(arg1: *const TSTree) -> *const TSLanguage; diff --git a/lib/binding_rust/lib.rs b/lib/binding_rust/lib.rs index f757b107..934915fe 100644 --- a/lib/binding_rust/lib.rs +++ b/lib/binding_rust/lib.rs @@ -708,6 +708,20 @@ impl Tree { Node::new(unsafe { ffi::ts_tree_root_node(self.0.as_ptr()) }).unwrap() } + /// Get the root node of the syntax tree, but with its position shifted + /// forward by the given offset. + #[doc(alias = "ts_tree_root_node_with_offset")] + pub fn root_node_with_offset(&self, offset_bytes: usize, offset_extent: Point) -> Node { + Node::new(unsafe { + ffi::ts_tree_root_node_with_offset( + self.0.as_ptr(), + offset_bytes as u32, + offset_extent.into(), + ) + }) + .unwrap() + } + /// Get the language that was used to parse the syntax tree. #[doc(alias = "ts_tree_language")] pub fn language(&self) -> Language { diff --git a/lib/include/tree_sitter/api.h b/lib/include/tree_sitter/api.h index e2941532..727dded3 100644 --- a/lib/include/tree_sitter/api.h +++ b/lib/include/tree_sitter/api.h @@ -366,6 +366,16 @@ void ts_tree_delete(TSTree *self); */ TSNode ts_tree_root_node(const TSTree *self); +/** + * Get the root node of the syntax tree, but with its position + * shifted forward by the given offset. + */ +TSNode ts_tree_root_node_with_offset( + const TSTree *self, + uint32_t offset_bytes, + TSPoint offset_point +); + /** * Get the language that was used to parse the syntax tree. */ diff --git a/lib/src/tree.c b/lib/src/tree.c index f2cc85ef..103ba84f 100644 --- a/lib/src/tree.c +++ b/lib/src/tree.c @@ -1,6 +1,7 @@ #include "tree_sitter/api.h" #include "./array.h" #include "./get_changed_ranges.h" +#include "./length.h" #include "./subtree.h" #include "./tree_cursor.h" #include "./tree.h" @@ -37,6 +38,15 @@ TSNode ts_tree_root_node(const TSTree *self) { return ts_node_new(self, &self->root, ts_subtree_padding(self->root), 0); } +TSNode ts_tree_root_node_with_offset( + const TSTree *self, + uint32_t offset_bytes, + TSPoint offset_extent +) { + Length offset = {offset_bytes, offset_extent}; + return ts_node_new(self, &self->root, length_add(offset, ts_subtree_padding(self->root)), 0); +} + const TSLanguage *ts_tree_language(const TSTree *self) { return self->language; } From 53ed4cf037aee1493f7136ae0f50e1f47625642a Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 25 Aug 2022 10:20:40 -0700 Subject: [PATCH 2/2] Tolerate tree edits whose old range extends beyond the end of the tree --- cli/src/tests/corpus_test.rs | 19 +++++++- cli/src/tests/tree_test.rs | 93 ++++++++++++++++++++++++++---------- lib/src/length.h | 8 ++++ lib/src/subtree.c | 34 ++++++------- 4 files changed, 108 insertions(+), 46 deletions(-) diff --git a/cli/src/tests/corpus_test.rs b/cli/src/tests/corpus_test.rs index 7a6a9ba8..b0fedb9c 100644 --- a/cli/src/tests/corpus_test.rs +++ b/cli/src/tests/corpus_test.rs @@ -384,9 +384,26 @@ fn check_consistent_sizes(tree: &Tree, input: &Vec) { } fn check_changed_ranges(old_tree: &Tree, new_tree: &Tree, input: &Vec) -> Result<(), String> { - let changed_ranges = old_tree.changed_ranges(new_tree).collect(); + let changed_ranges = old_tree.changed_ranges(new_tree).collect::>(); let old_scope_sequence = ScopeSequence::new(old_tree); let new_scope_sequence = ScopeSequence::new(new_tree); + + let old_range = old_tree.root_node().range(); + let new_range = new_tree.root_node().range(); + let byte_range = + old_range.start_byte.min(new_range.start_byte)..old_range.end_byte.max(new_range.end_byte); + let point_range = old_range.start_point.min(new_range.start_point) + ..old_range.end_point.max(new_range.end_point); + + for range in &changed_ranges { + if range.end_byte > byte_range.end || range.end_point > point_range.end { + return Err(format!( + "changed range extends outside of the old and new trees {:?}", + range + )); + } + } + old_scope_sequence.check_changes(&new_scope_sequence, &input, &changed_ranges) } diff --git a/cli/src/tests/tree_test.rs b/cli/src/tests/tree_test.rs index d2b1eb80..d5c54545 100644 --- a/cli/src/tests/tree_test.rs +++ b/cli/src/tests/tree_test.rs @@ -89,14 +89,11 @@ fn test_tree_edit() { let child2 = expr.child(1).unwrap(); assert!(expr.has_changes()); - assert_eq!(expr.start_byte(), 4); - assert_eq!(expr.end_byte(), 17); + assert_eq!(expr.byte_range(), 4..17); assert!(child1.has_changes()); - assert_eq!(child1.start_byte(), 4); - assert_eq!(child1.end_byte(), 7); + assert_eq!(child1.byte_range(), 4..7); assert!(!child2.has_changes()); - assert_eq!(child2.start_byte(), 9); - assert_eq!(child2.end_byte(), 12); + assert_eq!(child2.byte_range(), 9..12); } // replacement starting at the edge of the tree's padding: @@ -117,14 +114,11 @@ fn test_tree_edit() { let child2 = expr.child(1).unwrap(); assert!(expr.has_changes()); - assert_eq!(expr.start_byte(), 4); - assert_eq!(expr.end_byte(), 17); + assert_eq!(expr.byte_range(), 4..17); assert!(child1.has_changes()); - assert_eq!(child1.start_byte(), 4); - assert_eq!(child1.end_byte(), 7); + assert_eq!(child1.byte_range(), 4..7); assert!(!child2.has_changes()); - assert_eq!(child2.start_byte(), 9); - assert_eq!(child2.end_byte(), 12); + assert_eq!(child2.byte_range(), 9..12); } // deletion that spans more than one child node: @@ -146,17 +140,13 @@ fn test_tree_edit() { let child3 = expr.child(2).unwrap(); assert!(expr.has_changes()); - assert_eq!(expr.start_byte(), 4); - assert_eq!(expr.end_byte(), 8); + assert_eq!(expr.byte_range(), 4..8); assert!(child1.has_changes()); - assert_eq!(child1.start_byte(), 4); - assert_eq!(child1.end_byte(), 4); + assert_eq!(child1.byte_range(), 4..4); assert!(child2.has_changes()); - assert_eq!(child2.start_byte(), 4); - assert_eq!(child2.end_byte(), 4); + assert_eq!(child2.byte_range(), 4..4); assert!(child3.has_changes()); - assert_eq!(child3.start_byte(), 5); - assert_eq!(child3.end_byte(), 8); + assert_eq!(child3.byte_range(), 5..8); } // insertion at the end of the tree: @@ -178,14 +168,67 @@ fn test_tree_edit() { let child3 = expr.child(2).unwrap(); assert!(expr.has_changes()); - assert_eq!(expr.start_byte(), 2); - assert_eq!(expr.end_byte(), 16); + assert_eq!(expr.byte_range(), 2..16); assert!(!child1.has_changes()); - assert_eq!(child1.end_byte(), 5); + assert_eq!(child1.byte_range(), 2..5); assert!(!child2.has_changes()); - assert_eq!(child2.end_byte(), 10); + assert_eq!(child2.byte_range(), 7..10); + assert!(child3.has_changes()); + assert_eq!(child3.byte_range(), 12..16); + } + + // replacement that starts within a token and extends beyond the end of the tree: + // resize the token and empty out any subsequent child nodes. + { + let mut tree = tree.clone(); + tree.edit(&InputEdit { + start_byte: 3, + old_end_byte: 90, + new_end_byte: 4, + start_position: Point::new(0, 3), + old_end_position: Point::new(0, 90), + new_end_position: Point::new(0, 4), + }); + + let expr = tree.root_node().child(0).unwrap().child(0).unwrap(); + let child1 = expr.child(0).unwrap(); + let child2 = expr.child(1).unwrap(); + let child3 = expr.child(2).unwrap(); + assert_eq!(expr.byte_range(), 2..4); + assert!(expr.has_changes()); + assert_eq!(child1.byte_range(), 2..4); + assert!(child1.has_changes()); + assert_eq!(child2.byte_range(), 4..4); + assert!(child2.has_changes()); + assert_eq!(child3.byte_range(), 4..4); + assert!(child3.has_changes()); + } + + // replacement that starts in whitespace and extends beyond the end of the tree: + // shift the token's start position and empty out its content. + { + let mut tree = tree.clone(); + tree.edit(&InputEdit { + start_byte: 6, + old_end_byte: 90, + new_end_byte: 8, + start_position: Point::new(0, 6), + old_end_position: Point::new(0, 90), + new_end_position: Point::new(0, 8), + }); + + let expr = tree.root_node().child(0).unwrap().child(0).unwrap(); + let child1 = expr.child(0).unwrap(); + let child2 = expr.child(1).unwrap(); + let child3 = expr.child(2).unwrap(); + assert_eq!(expr.byte_range(), 2..8); + assert!(expr.has_changes()); + assert_eq!(child1.byte_range(), 2..5); + assert!(!child1.has_changes()); + assert_eq!(child2.byte_range(), 8..8); + assert!(child2.has_changes()); + assert_eq!(child3.byte_range(), 8..8); assert!(child3.has_changes()); - assert_eq!(child3.end_byte(), 16); } } diff --git a/lib/src/length.h b/lib/src/length.h index 61de9fc1..42d61ef3 100644 --- a/lib/src/length.h +++ b/lib/src/length.h @@ -41,4 +41,12 @@ static inline Length length_zero(void) { return result; } +static inline Length length_saturating_sub(Length len1, Length len2) { + if (len1.bytes > len2.bytes) { + return length_sub(len1, len2); + } else { + return length_zero(); + } +} + #endif diff --git a/lib/src/subtree.c b/lib/src/subtree.c index d6cd2d71..1e99c799 100644 --- a/lib/src/subtree.c +++ b/lib/src/subtree.c @@ -666,8 +666,9 @@ Subtree ts_subtree_edit(Subtree self, const TSInputEdit *edit, SubtreePool *pool Length size = ts_subtree_size(*entry.tree); Length padding = ts_subtree_padding(*entry.tree); + Length total_size = length_add(padding, size); uint32_t lookahead_bytes = ts_subtree_lookahead_bytes(*entry.tree); - uint32_t end_byte = padding.bytes + size.bytes + lookahead_bytes; + uint32_t end_byte = total_size.bytes + lookahead_bytes; if (edit.start.bytes > end_byte || (is_noop && edit.start.bytes == end_byte)) continue; // If the edit is entirely within the space before this subtree, then shift this @@ -679,7 +680,7 @@ Subtree ts_subtree_edit(Subtree self, const TSInputEdit *edit, SubtreePool *pool // If the edit starts in the space before this subtree and extends into this subtree, // shrink the subtree's content to compensate for the change in the space before it. else if (edit.start.bytes < padding.bytes) { - size = length_sub(size, length_sub(edit.old_end, padding)); + size = length_saturating_sub(size, length_sub(edit.old_end, padding)); padding = edit.new_end; } @@ -690,15 +691,14 @@ Subtree ts_subtree_edit(Subtree self, const TSInputEdit *edit, SubtreePool *pool } // If the edit is within this subtree, resize the subtree to reflect the edit. - else { - uint32_t total_bytes = padding.bytes + size.bytes; - if (edit.start.bytes < total_bytes || - (edit.start.bytes == total_bytes && is_pure_insertion)) { - size = length_add( - length_sub(edit.new_end, padding), - length_sub(size, length_sub(edit.old_end, padding)) - ); - } + else if ( + edit.start.bytes < total_size.bytes || + (edit.start.bytes == total_size.bytes && is_pure_insertion) + ) { + size = length_add( + length_sub(edit.new_end, padding), + length_saturating_sub(total_size, edit.old_end) + ); } MutableSubtree result = ts_subtree_make_mut(pool, *entry.tree); @@ -764,17 +764,11 @@ Subtree ts_subtree_edit(Subtree self, const TSInputEdit *edit, SubtreePool *pool // Transform edit into the child's coordinate space. Edit child_edit = { - .start = length_sub(edit.start, child_left), - .old_end = length_sub(edit.old_end, child_left), - .new_end = length_sub(edit.new_end, child_left), + .start = length_saturating_sub(edit.start, child_left), + .old_end = length_saturating_sub(edit.old_end, child_left), + .new_end = length_saturating_sub(edit.new_end, child_left), }; - // Clamp child_edit to the child's bounds. - if (edit.start.bytes < child_left.bytes) child_edit.start = length_zero(); - if (edit.old_end.bytes < child_left.bytes) child_edit.old_end = length_zero(); - if (edit.new_end.bytes < child_left.bytes) child_edit.new_end = length_zero(); - if (edit.old_end.bytes > child_right.bytes) child_edit.old_end = child_size; - // Interpret all inserted text as applying to the *first* child that touches the edit. // Subsequent children are only never have any text inserted into them; they are only // shrunk to compensate for the edit.