Avoid recursion in ts_subtree_edit

This prevents stack overflows when editing very large trees.

Co-Authored-By: Ashi Krishnan <queerviolet@github.com>
This commit is contained in:
Max Brunsfeld 2018-07-12 13:53:31 -07:00
parent 9e8bec458d
commit 0f0adfb681
2 changed files with 115 additions and 74 deletions

View file

@ -478,86 +478,128 @@ int ts_subtree_compare(const Subtree *left, const Subtree *right) {
return 0;
}
const Subtree *ts_subtree_invalidate_lookahead(const Subtree *self, uint32_t edit_byte_offset,
SubtreePool *pool) {
if (edit_byte_offset >= self->bytes_scanned) return self;
const Subtree *ts_subtree_edit(const Subtree *self, const TSInputEdit *edit, SubtreePool *pool) {
typedef struct {
const Subtree **tree;
Edit edit;
} StackEntry;
Subtree *result = ts_subtree_make_mut(pool, self);
result->has_changes = true;
Array(StackEntry) stack = array_new();
array_push(&stack, ((StackEntry) {
.tree = &self,
.edit = (Edit) {
.start = {edit->start_byte, edit->start_point},
.old_end = {edit->old_end_byte, edit->old_end_point},
.new_end = {edit->new_end_byte, edit->new_end_point},
},
}));
if (result->children.size > 0) {
uint32_t child_start_byte = 0;
while (stack.size) {
StackEntry entry = array_pop(&stack);
Edit edit = entry.edit;
// We use point edits to represent a subtree that may need to be marked dirty
// because an edit has occurred within its lookahead.
if (edit.old_end.bytes == edit.start.bytes && edit.new_end.bytes == edit.start.bytes) {
if (edit.start.bytes >= (*entry.tree)->bytes_scanned) continue;
Subtree *result = ts_subtree_make_mut(pool, *entry.tree);
result->has_changes = true;
*entry.tree = result;
Length child_start = length_zero();
for (uint32_t i = 0; i < result->children.size; i++) {
const Subtree **child = &result->children.contents[i];
if (child_start.bytes > edit.start.bytes) break;
Length child_edit_location = length_sub(edit.start, child_start);
array_push(&stack, ((StackEntry) {
.tree = child,
.edit = {child_edit_location, child_edit_location, child_edit_location}
}));
child_start = length_add(child_start, ts_subtree_total_size(*child));
}
continue;
}
Subtree *result = ts_subtree_make_mut(pool, *entry.tree);
result->has_changes = true;
*entry.tree = result;
bool pure_insertion = edit.old_end.bytes == edit.start.bytes;
if (edit.old_end.bytes <= result->padding.bytes) {
// If the edit ends in the space before this subtree, then shift this
// subtree according to the edit without changing its size.
result->padding = length_add(edit.new_end, length_sub(result->padding, edit.old_end));
} else if (edit.start.bytes < result->padding.bytes) {
// Otherwise, if the edit starts in the space before this subtree, we know
// it extends into this subtree, so shrink the subtree's content to compensate
// for the change in whitespace before it.
result->size = length_sub(result->size, length_sub(edit.old_end, result->padding));
result->padding = edit.new_end;
} else if (edit.start.bytes == result->padding.bytes && pure_insertion) {
// Otherwise, if we're just inserting at the start of the subtree, just
// shift the subtree over.
result->padding = edit.new_end;
} else {
// Finally, we must be editing within the subtree's content, so stretch
// the content to accomodate the edit.
result->size = length_add(
length_sub(edit.new_end, result->padding),
length_sub(result->size, length_sub(edit.old_end, result->padding))
);
}
Length child_left, child_right = length_zero();
for (uint32_t i = 0; i < result->children.size; i++) {
const Subtree **child = &result->children.contents[i];
if (child_start_byte > edit_byte_offset) break;
*child = ts_subtree_invalidate_lookahead(*child, edit_byte_offset - child_start_byte, pool);
child_start_byte += ts_subtree_total_bytes(*child);
Length child_size = ts_subtree_total_size(*child);
child_left = child_right;
child_right = length_add(child_left, child_size);
// If this child starts after the edit, then we're done processing children.
if (child_left.bytes > edit.old_end.bytes ||
(child_left.bytes == edit.old_end.bytes && child_size.bytes > 0 && i > 0)) break;
// If the child ends after the start of the edit, or we're just inserting
// into the end of the child's subtree, then recursively edit the child.
if (child_right.bytes > edit.start.bytes ||
(child_right.bytes == edit.start.bytes && pure_insertion)) {
// Transform edit into the child's coordinate space.
Edit child_edit = {
.start = length_sub(edit.start, child_left),
.old_end = length_sub(edit.old_end, child_left),
.new_end = length_sub(edit.new_end, child_left),
};
// Clamp child_edit to the child's bounds.
if (edit.start.bytes < child_left.bytes) child_edit.start = length_zero();
if (edit.old_end.bytes < child_left.bytes) child_edit.old_end = length_zero();
if (edit.new_end.bytes < child_left.bytes) child_edit.new_end = length_zero();
if (edit.old_end.bytes > child_right.bytes) child_edit.old_end = child_size;
// Queue processing of this child's subtree.
array_push(&stack, ((StackEntry) {
.tree = child,
.edit = child_edit,
}));
// Clear out any insertion from the edit; we interpret all inserted text as applying
// to one tree. Subsequent children are only shrunk to compensate for the insertion.
edit.new_end = edit.start;
} else {
Length edit_location = length_sub(edit.start, child_left);
array_push(&stack, ((StackEntry) {
.tree = child,
.edit = {edit_location, edit_location, edit_location},
}));
}
}
}
return result;
}
const Subtree *ts_subtree__edit(const Subtree *self, Edit edit, SubtreePool *pool) {
Subtree *result = ts_subtree_make_mut(pool, self);
result->has_changes = true;
bool pure_insertion = edit.old_end.bytes == edit.start.bytes;
if (edit.old_end.bytes <= result->padding.bytes) {
result->padding = length_add(edit.new_end, length_sub(result->padding, edit.old_end));
} else if (edit.start.bytes < result->padding.bytes) {
result->size = length_sub(result->size, length_sub(edit.old_end, result->padding));
result->padding = edit.new_end;
} else if (edit.start.bytes == result->padding.bytes && pure_insertion) {
result->padding = edit.new_end;
} else {
result->size = length_add(
length_sub(edit.new_end, result->padding),
length_sub(result->size, length_sub(edit.old_end, result->padding))
);
}
Length child_left, child_right = length_zero();
for (uint32_t i = 0; i < result->children.size; i++) {
const Subtree **child = &result->children.contents[i];
Length child_size = ts_subtree_total_size(*child);
child_left = child_right;
child_right = length_add(child_left, child_size);
if (child_left.bytes > edit.old_end.bytes ||
(child_left.bytes == edit.old_end.bytes && child_size.bytes > 0 && i > 0)) break;
if (child_right.bytes > edit.start.bytes ||
(child_right.bytes == edit.start.bytes && pure_insertion)) {
Edit child_edit = {
.start = length_sub(edit.start, child_left),
.old_end = length_sub(edit.old_end, child_left),
.new_end = length_sub(edit.new_end, child_left),
};
if (edit.start.bytes < child_left.bytes) child_edit.start = length_zero();
if (edit.old_end.bytes < child_left.bytes) child_edit.old_end = length_zero();
if (edit.new_end.bytes < child_left.bytes) child_edit.new_end = length_zero();
if (edit.old_end.bytes > child_right.bytes) child_edit.old_end = child_size;
edit.new_end = edit.start;
*child = ts_subtree__edit(*child, child_edit, pool);
} else if (child_left.bytes <= edit.start.bytes) {
*child = ts_subtree_invalidate_lookahead(*child, edit.start.bytes - child_left.bytes, pool);
}
}
return result;
}
const Subtree *ts_subtree_edit(const Subtree *self, const TSInputEdit *edit, SubtreePool *pool) {
return ts_subtree__edit(self, (Edit) {
.start = {edit->start_byte, edit->start_point},
.old_end = {edit->old_end_byte, edit->old_end_point},
.new_end = {edit->new_end_byte, edit->new_end_point},
}, pool);
array_delete(&stack);
return self;
}
const Subtree *ts_subtree_last_external_token(const Subtree *tree) {

View file

@ -908,7 +908,6 @@ describe("Parser", [&]() {
2u * static_cast<uint32_t>(source_code.find(u"</script")),
};
ts_parser_set_logger(parser, stderr_logger_new(true));
ts_parser_set_included_ranges(parser, &included_range, 1);
ts_parser_set_language(parser, load_real_language("javascript"));