Tolerate tree edits whose old range extends beyond the end of the tree

This commit is contained in:
Max Brunsfeld 2022-08-25 10:20:40 -07:00
parent 477b667753
commit 53ed4cf037
4 changed files with 108 additions and 46 deletions

View file

@ -384,9 +384,26 @@ fn check_consistent_sizes(tree: &Tree, input: &Vec<u8>) {
}
fn check_changed_ranges(old_tree: &Tree, new_tree: &Tree, input: &Vec<u8>) -> Result<(), String> {
let changed_ranges = old_tree.changed_ranges(new_tree).collect();
let changed_ranges = old_tree.changed_ranges(new_tree).collect::<Vec<_>>();
let old_scope_sequence = ScopeSequence::new(old_tree);
let new_scope_sequence = ScopeSequence::new(new_tree);
let old_range = old_tree.root_node().range();
let new_range = new_tree.root_node().range();
let byte_range =
old_range.start_byte.min(new_range.start_byte)..old_range.end_byte.max(new_range.end_byte);
let point_range = old_range.start_point.min(new_range.start_point)
..old_range.end_point.max(new_range.end_point);
for range in &changed_ranges {
if range.end_byte > byte_range.end || range.end_point > point_range.end {
return Err(format!(
"changed range extends outside of the old and new trees {:?}",
range
));
}
}
old_scope_sequence.check_changes(&new_scope_sequence, &input, &changed_ranges)
}

View file

@ -89,14 +89,11 @@ fn test_tree_edit() {
let child2 = expr.child(1).unwrap();
assert!(expr.has_changes());
assert_eq!(expr.start_byte(), 4);
assert_eq!(expr.end_byte(), 17);
assert_eq!(expr.byte_range(), 4..17);
assert!(child1.has_changes());
assert_eq!(child1.start_byte(), 4);
assert_eq!(child1.end_byte(), 7);
assert_eq!(child1.byte_range(), 4..7);
assert!(!child2.has_changes());
assert_eq!(child2.start_byte(), 9);
assert_eq!(child2.end_byte(), 12);
assert_eq!(child2.byte_range(), 9..12);
}
// replacement starting at the edge of the tree's padding:
@ -117,14 +114,11 @@ fn test_tree_edit() {
let child2 = expr.child(1).unwrap();
assert!(expr.has_changes());
assert_eq!(expr.start_byte(), 4);
assert_eq!(expr.end_byte(), 17);
assert_eq!(expr.byte_range(), 4..17);
assert!(child1.has_changes());
assert_eq!(child1.start_byte(), 4);
assert_eq!(child1.end_byte(), 7);
assert_eq!(child1.byte_range(), 4..7);
assert!(!child2.has_changes());
assert_eq!(child2.start_byte(), 9);
assert_eq!(child2.end_byte(), 12);
assert_eq!(child2.byte_range(), 9..12);
}
// deletion that spans more than one child node:
@ -146,17 +140,13 @@ fn test_tree_edit() {
let child3 = expr.child(2).unwrap();
assert!(expr.has_changes());
assert_eq!(expr.start_byte(), 4);
assert_eq!(expr.end_byte(), 8);
assert_eq!(expr.byte_range(), 4..8);
assert!(child1.has_changes());
assert_eq!(child1.start_byte(), 4);
assert_eq!(child1.end_byte(), 4);
assert_eq!(child1.byte_range(), 4..4);
assert!(child2.has_changes());
assert_eq!(child2.start_byte(), 4);
assert_eq!(child2.end_byte(), 4);
assert_eq!(child2.byte_range(), 4..4);
assert!(child3.has_changes());
assert_eq!(child3.start_byte(), 5);
assert_eq!(child3.end_byte(), 8);
assert_eq!(child3.byte_range(), 5..8);
}
// insertion at the end of the tree:
@ -178,14 +168,67 @@ fn test_tree_edit() {
let child3 = expr.child(2).unwrap();
assert!(expr.has_changes());
assert_eq!(expr.start_byte(), 2);
assert_eq!(expr.end_byte(), 16);
assert_eq!(expr.byte_range(), 2..16);
assert!(!child1.has_changes());
assert_eq!(child1.end_byte(), 5);
assert_eq!(child1.byte_range(), 2..5);
assert!(!child2.has_changes());
assert_eq!(child2.end_byte(), 10);
assert_eq!(child2.byte_range(), 7..10);
assert!(child3.has_changes());
assert_eq!(child3.byte_range(), 12..16);
}
// replacement that starts within a token and extends beyond the end of the tree:
// resize the token and empty out any subsequent child nodes.
{
let mut tree = tree.clone();
tree.edit(&InputEdit {
start_byte: 3,
old_end_byte: 90,
new_end_byte: 4,
start_position: Point::new(0, 3),
old_end_position: Point::new(0, 90),
new_end_position: Point::new(0, 4),
});
let expr = tree.root_node().child(0).unwrap().child(0).unwrap();
let child1 = expr.child(0).unwrap();
let child2 = expr.child(1).unwrap();
let child3 = expr.child(2).unwrap();
assert_eq!(expr.byte_range(), 2..4);
assert!(expr.has_changes());
assert_eq!(child1.byte_range(), 2..4);
assert!(child1.has_changes());
assert_eq!(child2.byte_range(), 4..4);
assert!(child2.has_changes());
assert_eq!(child3.byte_range(), 4..4);
assert!(child3.has_changes());
}
// replacement that starts in whitespace and extends beyond the end of the tree:
// shift the token's start position and empty out its content.
{
let mut tree = tree.clone();
tree.edit(&InputEdit {
start_byte: 6,
old_end_byte: 90,
new_end_byte: 8,
start_position: Point::new(0, 6),
old_end_position: Point::new(0, 90),
new_end_position: Point::new(0, 8),
});
let expr = tree.root_node().child(0).unwrap().child(0).unwrap();
let child1 = expr.child(0).unwrap();
let child2 = expr.child(1).unwrap();
let child3 = expr.child(2).unwrap();
assert_eq!(expr.byte_range(), 2..8);
assert!(expr.has_changes());
assert_eq!(child1.byte_range(), 2..5);
assert!(!child1.has_changes());
assert_eq!(child2.byte_range(), 8..8);
assert!(child2.has_changes());
assert_eq!(child3.byte_range(), 8..8);
assert!(child3.has_changes());
assert_eq!(child3.end_byte(), 16);
}
}

View file

@ -41,4 +41,12 @@ static inline Length length_zero(void) {
return result;
}
static inline Length length_saturating_sub(Length len1, Length len2) {
if (len1.bytes > len2.bytes) {
return length_sub(len1, len2);
} else {
return length_zero();
}
}
#endif

View file

@ -666,8 +666,9 @@ Subtree ts_subtree_edit(Subtree self, const TSInputEdit *edit, SubtreePool *pool
Length size = ts_subtree_size(*entry.tree);
Length padding = ts_subtree_padding(*entry.tree);
Length total_size = length_add(padding, size);
uint32_t lookahead_bytes = ts_subtree_lookahead_bytes(*entry.tree);
uint32_t end_byte = padding.bytes + size.bytes + lookahead_bytes;
uint32_t end_byte = total_size.bytes + lookahead_bytes;
if (edit.start.bytes > end_byte || (is_noop && edit.start.bytes == end_byte)) continue;
// If the edit is entirely within the space before this subtree, then shift this
@ -679,7 +680,7 @@ Subtree ts_subtree_edit(Subtree self, const TSInputEdit *edit, SubtreePool *pool
// If the edit starts in the space before this subtree and extends into this subtree,
// shrink the subtree's content to compensate for the change in the space before it.
else if (edit.start.bytes < padding.bytes) {
size = length_sub(size, length_sub(edit.old_end, padding));
size = length_saturating_sub(size, length_sub(edit.old_end, padding));
padding = edit.new_end;
}
@ -690,15 +691,14 @@ Subtree ts_subtree_edit(Subtree self, const TSInputEdit *edit, SubtreePool *pool
}
// If the edit is within this subtree, resize the subtree to reflect the edit.
else {
uint32_t total_bytes = padding.bytes + size.bytes;
if (edit.start.bytes < total_bytes ||
(edit.start.bytes == total_bytes && is_pure_insertion)) {
size = length_add(
length_sub(edit.new_end, padding),
length_sub(size, length_sub(edit.old_end, padding))
);
}
else if (
edit.start.bytes < total_size.bytes ||
(edit.start.bytes == total_size.bytes && is_pure_insertion)
) {
size = length_add(
length_sub(edit.new_end, padding),
length_saturating_sub(total_size, edit.old_end)
);
}
MutableSubtree result = ts_subtree_make_mut(pool, *entry.tree);
@ -764,17 +764,11 @@ Subtree ts_subtree_edit(Subtree self, const TSInputEdit *edit, SubtreePool *pool
// Transform edit into the child's coordinate space.
Edit child_edit = {
.start = length_sub(edit.start, child_left),
.old_end = length_sub(edit.old_end, child_left),
.new_end = length_sub(edit.new_end, child_left),
.start = length_saturating_sub(edit.start, child_left),
.old_end = length_saturating_sub(edit.old_end, child_left),
.new_end = length_saturating_sub(edit.new_end, child_left),
};
// Clamp child_edit to the child's bounds.
if (edit.start.bytes < child_left.bytes) child_edit.start = length_zero();
if (edit.old_end.bytes < child_left.bytes) child_edit.old_end = length_zero();
if (edit.new_end.bytes < child_left.bytes) child_edit.new_end = length_zero();
if (edit.old_end.bytes > child_right.bytes) child_edit.old_end = child_size;
// Interpret all inserted text as applying to the *first* child that touches the edit.
// Subsequent children are only never have any text inserted into them; they are only
// shrunk to compensate for the edit.