Avoid recursion in ts_subtree_edit
This prevents stack overflows when editing very large trees. Co-Authored-By: Ashi Krishnan <queerviolet@github.com>
This commit is contained in:
parent
9e8bec458d
commit
0f0adfb681
2 changed files with 115 additions and 74 deletions
|
|
@ -478,86 +478,128 @@ int ts_subtree_compare(const Subtree *left, const Subtree *right) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
const Subtree *ts_subtree_invalidate_lookahead(const Subtree *self, uint32_t edit_byte_offset,
|
||||
SubtreePool *pool) {
|
||||
if (edit_byte_offset >= self->bytes_scanned) return self;
|
||||
const Subtree *ts_subtree_edit(const Subtree *self, const TSInputEdit *edit, SubtreePool *pool) {
|
||||
typedef struct {
|
||||
const Subtree **tree;
|
||||
Edit edit;
|
||||
} StackEntry;
|
||||
|
||||
Subtree *result = ts_subtree_make_mut(pool, self);
|
||||
result->has_changes = true;
|
||||
Array(StackEntry) stack = array_new();
|
||||
array_push(&stack, ((StackEntry) {
|
||||
.tree = &self,
|
||||
.edit = (Edit) {
|
||||
.start = {edit->start_byte, edit->start_point},
|
||||
.old_end = {edit->old_end_byte, edit->old_end_point},
|
||||
.new_end = {edit->new_end_byte, edit->new_end_point},
|
||||
},
|
||||
}));
|
||||
|
||||
if (result->children.size > 0) {
|
||||
uint32_t child_start_byte = 0;
|
||||
while (stack.size) {
|
||||
StackEntry entry = array_pop(&stack);
|
||||
Edit edit = entry.edit;
|
||||
|
||||
// We use point edits to represent a subtree that may need to be marked dirty
|
||||
// because an edit has occurred within its lookahead.
|
||||
if (edit.old_end.bytes == edit.start.bytes && edit.new_end.bytes == edit.start.bytes) {
|
||||
if (edit.start.bytes >= (*entry.tree)->bytes_scanned) continue;
|
||||
|
||||
Subtree *result = ts_subtree_make_mut(pool, *entry.tree);
|
||||
result->has_changes = true;
|
||||
*entry.tree = result;
|
||||
|
||||
Length child_start = length_zero();
|
||||
for (uint32_t i = 0; i < result->children.size; i++) {
|
||||
const Subtree **child = &result->children.contents[i];
|
||||
if (child_start.bytes > edit.start.bytes) break;
|
||||
Length child_edit_location = length_sub(edit.start, child_start);
|
||||
array_push(&stack, ((StackEntry) {
|
||||
.tree = child,
|
||||
.edit = {child_edit_location, child_edit_location, child_edit_location}
|
||||
}));
|
||||
child_start = length_add(child_start, ts_subtree_total_size(*child));
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
Subtree *result = ts_subtree_make_mut(pool, *entry.tree);
|
||||
result->has_changes = true;
|
||||
*entry.tree = result;
|
||||
|
||||
bool pure_insertion = edit.old_end.bytes == edit.start.bytes;
|
||||
|
||||
if (edit.old_end.bytes <= result->padding.bytes) {
|
||||
// If the edit ends in the space before this subtree, then shift this
|
||||
// subtree according to the edit without changing its size.
|
||||
result->padding = length_add(edit.new_end, length_sub(result->padding, edit.old_end));
|
||||
} else if (edit.start.bytes < result->padding.bytes) {
|
||||
// Otherwise, if the edit starts in the space before this subtree, we know
|
||||
// it extends into this subtree, so shrink the subtree's content to compensate
|
||||
// for the change in whitespace before it.
|
||||
result->size = length_sub(result->size, length_sub(edit.old_end, result->padding));
|
||||
result->padding = edit.new_end;
|
||||
} else if (edit.start.bytes == result->padding.bytes && pure_insertion) {
|
||||
// Otherwise, if we're just inserting at the start of the subtree, just
|
||||
// shift the subtree over.
|
||||
result->padding = edit.new_end;
|
||||
} else {
|
||||
// Finally, we must be editing within the subtree's content, so stretch
|
||||
// the content to accomodate the edit.
|
||||
result->size = length_add(
|
||||
length_sub(edit.new_end, result->padding),
|
||||
length_sub(result->size, length_sub(edit.old_end, result->padding))
|
||||
);
|
||||
}
|
||||
|
||||
Length child_left, child_right = length_zero();
|
||||
for (uint32_t i = 0; i < result->children.size; i++) {
|
||||
const Subtree **child = &result->children.contents[i];
|
||||
if (child_start_byte > edit_byte_offset) break;
|
||||
*child = ts_subtree_invalidate_lookahead(*child, edit_byte_offset - child_start_byte, pool);
|
||||
child_start_byte += ts_subtree_total_bytes(*child);
|
||||
Length child_size = ts_subtree_total_size(*child);
|
||||
child_left = child_right;
|
||||
child_right = length_add(child_left, child_size);
|
||||
|
||||
// If this child starts after the edit, then we're done processing children.
|
||||
if (child_left.bytes > edit.old_end.bytes ||
|
||||
(child_left.bytes == edit.old_end.bytes && child_size.bytes > 0 && i > 0)) break;
|
||||
|
||||
// If the child ends after the start of the edit, or we're just inserting
|
||||
// into the end of the child's subtree, then recursively edit the child.
|
||||
if (child_right.bytes > edit.start.bytes ||
|
||||
(child_right.bytes == edit.start.bytes && pure_insertion)) {
|
||||
// Transform edit into the child's coordinate space.
|
||||
Edit child_edit = {
|
||||
.start = length_sub(edit.start, child_left),
|
||||
.old_end = length_sub(edit.old_end, child_left),
|
||||
.new_end = length_sub(edit.new_end, child_left),
|
||||
};
|
||||
|
||||
// Clamp child_edit to the child's bounds.
|
||||
if (edit.start.bytes < child_left.bytes) child_edit.start = length_zero();
|
||||
if (edit.old_end.bytes < child_left.bytes) child_edit.old_end = length_zero();
|
||||
if (edit.new_end.bytes < child_left.bytes) child_edit.new_end = length_zero();
|
||||
if (edit.old_end.bytes > child_right.bytes) child_edit.old_end = child_size;
|
||||
|
||||
// Queue processing of this child's subtree.
|
||||
array_push(&stack, ((StackEntry) {
|
||||
.tree = child,
|
||||
.edit = child_edit,
|
||||
}));
|
||||
|
||||
// Clear out any insertion from the edit; we interpret all inserted text as applying
|
||||
// to one tree. Subsequent children are only shrunk to compensate for the insertion.
|
||||
edit.new_end = edit.start;
|
||||
} else {
|
||||
Length edit_location = length_sub(edit.start, child_left);
|
||||
array_push(&stack, ((StackEntry) {
|
||||
.tree = child,
|
||||
.edit = {edit_location, edit_location, edit_location},
|
||||
}));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
const Subtree *ts_subtree__edit(const Subtree *self, Edit edit, SubtreePool *pool) {
|
||||
Subtree *result = ts_subtree_make_mut(pool, self);
|
||||
result->has_changes = true;
|
||||
|
||||
bool pure_insertion = edit.old_end.bytes == edit.start.bytes;
|
||||
|
||||
if (edit.old_end.bytes <= result->padding.bytes) {
|
||||
result->padding = length_add(edit.new_end, length_sub(result->padding, edit.old_end));
|
||||
} else if (edit.start.bytes < result->padding.bytes) {
|
||||
result->size = length_sub(result->size, length_sub(edit.old_end, result->padding));
|
||||
result->padding = edit.new_end;
|
||||
} else if (edit.start.bytes == result->padding.bytes && pure_insertion) {
|
||||
result->padding = edit.new_end;
|
||||
} else {
|
||||
result->size = length_add(
|
||||
length_sub(edit.new_end, result->padding),
|
||||
length_sub(result->size, length_sub(edit.old_end, result->padding))
|
||||
);
|
||||
}
|
||||
|
||||
Length child_left, child_right = length_zero();
|
||||
for (uint32_t i = 0; i < result->children.size; i++) {
|
||||
const Subtree **child = &result->children.contents[i];
|
||||
Length child_size = ts_subtree_total_size(*child);
|
||||
child_left = child_right;
|
||||
child_right = length_add(child_left, child_size);
|
||||
|
||||
if (child_left.bytes > edit.old_end.bytes ||
|
||||
(child_left.bytes == edit.old_end.bytes && child_size.bytes > 0 && i > 0)) break;
|
||||
|
||||
if (child_right.bytes > edit.start.bytes ||
|
||||
(child_right.bytes == edit.start.bytes && pure_insertion)) {
|
||||
Edit child_edit = {
|
||||
.start = length_sub(edit.start, child_left),
|
||||
.old_end = length_sub(edit.old_end, child_left),
|
||||
.new_end = length_sub(edit.new_end, child_left),
|
||||
};
|
||||
|
||||
if (edit.start.bytes < child_left.bytes) child_edit.start = length_zero();
|
||||
if (edit.old_end.bytes < child_left.bytes) child_edit.old_end = length_zero();
|
||||
if (edit.new_end.bytes < child_left.bytes) child_edit.new_end = length_zero();
|
||||
if (edit.old_end.bytes > child_right.bytes) child_edit.old_end = child_size;
|
||||
|
||||
edit.new_end = edit.start;
|
||||
|
||||
*child = ts_subtree__edit(*child, child_edit, pool);
|
||||
} else if (child_left.bytes <= edit.start.bytes) {
|
||||
*child = ts_subtree_invalidate_lookahead(*child, edit.start.bytes - child_left.bytes, pool);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
const Subtree *ts_subtree_edit(const Subtree *self, const TSInputEdit *edit, SubtreePool *pool) {
|
||||
return ts_subtree__edit(self, (Edit) {
|
||||
.start = {edit->start_byte, edit->start_point},
|
||||
.old_end = {edit->old_end_byte, edit->old_end_point},
|
||||
.new_end = {edit->new_end_byte, edit->new_end_point},
|
||||
}, pool);
|
||||
array_delete(&stack);
|
||||
return self;
|
||||
}
|
||||
|
||||
const Subtree *ts_subtree_last_external_token(const Subtree *tree) {
|
||||
|
|
|
|||
|
|
@ -908,7 +908,6 @@ describe("Parser", [&]() {
|
|||
2u * static_cast<uint32_t>(source_code.find(u"</script")),
|
||||
};
|
||||
|
||||
ts_parser_set_logger(parser, stderr_logger_new(true));
|
||||
ts_parser_set_included_ranges(parser, &included_range, 1);
|
||||
ts_parser_set_language(parser, load_real_language("javascript"));
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue