From 53ed4cf037aee1493f7136ae0f50e1f47625642a Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 25 Aug 2022 10:20:40 -0700 Subject: [PATCH] Tolerate tree edits whose old range extends beyond the end of the tree --- cli/src/tests/corpus_test.rs | 19 +++++++- cli/src/tests/tree_test.rs | 93 ++++++++++++++++++++++++++---------- lib/src/length.h | 8 ++++ lib/src/subtree.c | 34 ++++++------- 4 files changed, 108 insertions(+), 46 deletions(-) diff --git a/cli/src/tests/corpus_test.rs b/cli/src/tests/corpus_test.rs index 7a6a9ba8..b0fedb9c 100644 --- a/cli/src/tests/corpus_test.rs +++ b/cli/src/tests/corpus_test.rs @@ -384,9 +384,26 @@ fn check_consistent_sizes(tree: &Tree, input: &Vec) { } fn check_changed_ranges(old_tree: &Tree, new_tree: &Tree, input: &Vec) -> Result<(), String> { - let changed_ranges = old_tree.changed_ranges(new_tree).collect(); + let changed_ranges = old_tree.changed_ranges(new_tree).collect::>(); let old_scope_sequence = ScopeSequence::new(old_tree); let new_scope_sequence = ScopeSequence::new(new_tree); + + let old_range = old_tree.root_node().range(); + let new_range = new_tree.root_node().range(); + let byte_range = + old_range.start_byte.min(new_range.start_byte)..old_range.end_byte.max(new_range.end_byte); + let point_range = old_range.start_point.min(new_range.start_point) + ..old_range.end_point.max(new_range.end_point); + + for range in &changed_ranges { + if range.end_byte > byte_range.end || range.end_point > point_range.end { + return Err(format!( + "changed range extends outside of the old and new trees {:?}", + range + )); + } + } + old_scope_sequence.check_changes(&new_scope_sequence, &input, &changed_ranges) } diff --git a/cli/src/tests/tree_test.rs b/cli/src/tests/tree_test.rs index d2b1eb80..d5c54545 100644 --- a/cli/src/tests/tree_test.rs +++ b/cli/src/tests/tree_test.rs @@ -89,14 +89,11 @@ fn test_tree_edit() { let child2 = expr.child(1).unwrap(); assert!(expr.has_changes()); - assert_eq!(expr.start_byte(), 4); - assert_eq!(expr.end_byte(), 17); + assert_eq!(expr.byte_range(), 4..17); assert!(child1.has_changes()); - assert_eq!(child1.start_byte(), 4); - assert_eq!(child1.end_byte(), 7); + assert_eq!(child1.byte_range(), 4..7); assert!(!child2.has_changes()); - assert_eq!(child2.start_byte(), 9); - assert_eq!(child2.end_byte(), 12); + assert_eq!(child2.byte_range(), 9..12); } // replacement starting at the edge of the tree's padding: @@ -117,14 +114,11 @@ fn test_tree_edit() { let child2 = expr.child(1).unwrap(); assert!(expr.has_changes()); - assert_eq!(expr.start_byte(), 4); - assert_eq!(expr.end_byte(), 17); + assert_eq!(expr.byte_range(), 4..17); assert!(child1.has_changes()); - assert_eq!(child1.start_byte(), 4); - assert_eq!(child1.end_byte(), 7); + assert_eq!(child1.byte_range(), 4..7); assert!(!child2.has_changes()); - assert_eq!(child2.start_byte(), 9); - assert_eq!(child2.end_byte(), 12); + assert_eq!(child2.byte_range(), 9..12); } // deletion that spans more than one child node: @@ -146,17 +140,13 @@ fn test_tree_edit() { let child3 = expr.child(2).unwrap(); assert!(expr.has_changes()); - assert_eq!(expr.start_byte(), 4); - assert_eq!(expr.end_byte(), 8); + assert_eq!(expr.byte_range(), 4..8); assert!(child1.has_changes()); - assert_eq!(child1.start_byte(), 4); - assert_eq!(child1.end_byte(), 4); + assert_eq!(child1.byte_range(), 4..4); assert!(child2.has_changes()); - assert_eq!(child2.start_byte(), 4); - assert_eq!(child2.end_byte(), 4); + assert_eq!(child2.byte_range(), 4..4); assert!(child3.has_changes()); - assert_eq!(child3.start_byte(), 5); - assert_eq!(child3.end_byte(), 8); + assert_eq!(child3.byte_range(), 5..8); } // insertion at the end of the tree: @@ -178,14 +168,67 @@ fn test_tree_edit() { let child3 = expr.child(2).unwrap(); assert!(expr.has_changes()); - assert_eq!(expr.start_byte(), 2); - assert_eq!(expr.end_byte(), 16); + assert_eq!(expr.byte_range(), 2..16); assert!(!child1.has_changes()); - assert_eq!(child1.end_byte(), 5); + assert_eq!(child1.byte_range(), 2..5); assert!(!child2.has_changes()); - assert_eq!(child2.end_byte(), 10); + assert_eq!(child2.byte_range(), 7..10); + assert!(child3.has_changes()); + assert_eq!(child3.byte_range(), 12..16); + } + + // replacement that starts within a token and extends beyond the end of the tree: + // resize the token and empty out any subsequent child nodes. + { + let mut tree = tree.clone(); + tree.edit(&InputEdit { + start_byte: 3, + old_end_byte: 90, + new_end_byte: 4, + start_position: Point::new(0, 3), + old_end_position: Point::new(0, 90), + new_end_position: Point::new(0, 4), + }); + + let expr = tree.root_node().child(0).unwrap().child(0).unwrap(); + let child1 = expr.child(0).unwrap(); + let child2 = expr.child(1).unwrap(); + let child3 = expr.child(2).unwrap(); + assert_eq!(expr.byte_range(), 2..4); + assert!(expr.has_changes()); + assert_eq!(child1.byte_range(), 2..4); + assert!(child1.has_changes()); + assert_eq!(child2.byte_range(), 4..4); + assert!(child2.has_changes()); + assert_eq!(child3.byte_range(), 4..4); + assert!(child3.has_changes()); + } + + // replacement that starts in whitespace and extends beyond the end of the tree: + // shift the token's start position and empty out its content. + { + let mut tree = tree.clone(); + tree.edit(&InputEdit { + start_byte: 6, + old_end_byte: 90, + new_end_byte: 8, + start_position: Point::new(0, 6), + old_end_position: Point::new(0, 90), + new_end_position: Point::new(0, 8), + }); + + let expr = tree.root_node().child(0).unwrap().child(0).unwrap(); + let child1 = expr.child(0).unwrap(); + let child2 = expr.child(1).unwrap(); + let child3 = expr.child(2).unwrap(); + assert_eq!(expr.byte_range(), 2..8); + assert!(expr.has_changes()); + assert_eq!(child1.byte_range(), 2..5); + assert!(!child1.has_changes()); + assert_eq!(child2.byte_range(), 8..8); + assert!(child2.has_changes()); + assert_eq!(child3.byte_range(), 8..8); assert!(child3.has_changes()); - assert_eq!(child3.end_byte(), 16); } } diff --git a/lib/src/length.h b/lib/src/length.h index 61de9fc1..42d61ef3 100644 --- a/lib/src/length.h +++ b/lib/src/length.h @@ -41,4 +41,12 @@ static inline Length length_zero(void) { return result; } +static inline Length length_saturating_sub(Length len1, Length len2) { + if (len1.bytes > len2.bytes) { + return length_sub(len1, len2); + } else { + return length_zero(); + } +} + #endif diff --git a/lib/src/subtree.c b/lib/src/subtree.c index d6cd2d71..1e99c799 100644 --- a/lib/src/subtree.c +++ b/lib/src/subtree.c @@ -666,8 +666,9 @@ Subtree ts_subtree_edit(Subtree self, const TSInputEdit *edit, SubtreePool *pool Length size = ts_subtree_size(*entry.tree); Length padding = ts_subtree_padding(*entry.tree); + Length total_size = length_add(padding, size); uint32_t lookahead_bytes = ts_subtree_lookahead_bytes(*entry.tree); - uint32_t end_byte = padding.bytes + size.bytes + lookahead_bytes; + uint32_t end_byte = total_size.bytes + lookahead_bytes; if (edit.start.bytes > end_byte || (is_noop && edit.start.bytes == end_byte)) continue; // If the edit is entirely within the space before this subtree, then shift this @@ -679,7 +680,7 @@ Subtree ts_subtree_edit(Subtree self, const TSInputEdit *edit, SubtreePool *pool // If the edit starts in the space before this subtree and extends into this subtree, // shrink the subtree's content to compensate for the change in the space before it. else if (edit.start.bytes < padding.bytes) { - size = length_sub(size, length_sub(edit.old_end, padding)); + size = length_saturating_sub(size, length_sub(edit.old_end, padding)); padding = edit.new_end; } @@ -690,15 +691,14 @@ Subtree ts_subtree_edit(Subtree self, const TSInputEdit *edit, SubtreePool *pool } // If the edit is within this subtree, resize the subtree to reflect the edit. - else { - uint32_t total_bytes = padding.bytes + size.bytes; - if (edit.start.bytes < total_bytes || - (edit.start.bytes == total_bytes && is_pure_insertion)) { - size = length_add( - length_sub(edit.new_end, padding), - length_sub(size, length_sub(edit.old_end, padding)) - ); - } + else if ( + edit.start.bytes < total_size.bytes || + (edit.start.bytes == total_size.bytes && is_pure_insertion) + ) { + size = length_add( + length_sub(edit.new_end, padding), + length_saturating_sub(total_size, edit.old_end) + ); } MutableSubtree result = ts_subtree_make_mut(pool, *entry.tree); @@ -764,17 +764,11 @@ Subtree ts_subtree_edit(Subtree self, const TSInputEdit *edit, SubtreePool *pool // Transform edit into the child's coordinate space. Edit child_edit = { - .start = length_sub(edit.start, child_left), - .old_end = length_sub(edit.old_end, child_left), - .new_end = length_sub(edit.new_end, child_left), + .start = length_saturating_sub(edit.start, child_left), + .old_end = length_saturating_sub(edit.old_end, child_left), + .new_end = length_saturating_sub(edit.new_end, child_left), }; - // Clamp child_edit to the child's bounds. - if (edit.start.bytes < child_left.bytes) child_edit.start = length_zero(); - if (edit.old_end.bytes < child_left.bytes) child_edit.old_end = length_zero(); - if (edit.new_end.bytes < child_left.bytes) child_edit.new_end = length_zero(); - if (edit.old_end.bytes > child_right.bytes) child_edit.old_end = child_size; - // Interpret all inserted text as applying to the *first* child that touches the edit. // Subsequent children are only never have any text inserted into them; they are only // shrunk to compensate for the edit.