Avoid recursion in ts_subtree_edit

This prevents stack overflows when editing very large trees. Co-Authored-By: Ashi Krishnan <queerviolet@github.com>
2018-07-12 13:53:31 -07:00 · 2018-07-12 13:53:31 -07:00 · 0f0adfb681
commit 0f0adfb681
parent 9e8bec458d
2 changed files with 115 additions and 74 deletions
--- a/src/runtime/subtree.c
+++ b/src/runtime/subtree.c
@ -478,86 +478,128 @@ int ts_subtree_compare(const Subtree *left, const Subtree *right) {
  return 0;
 }

-const Subtree *ts_subtree_invalidate_lookahead(const Subtree *self, uint32_t edit_byte_offset,
-                                               SubtreePool *pool) {
-  if (edit_byte_offset >= self->bytes_scanned) return self;
+const Subtree *ts_subtree_edit(const Subtree *self, const TSInputEdit *edit, SubtreePool *pool) {
+  typedef struct {
+    const Subtree **tree;
+    Edit edit;
+  } StackEntry;

-  Subtree *result = ts_subtree_make_mut(pool, self);
-  result->has_changes = true;
+  Array(StackEntry) stack = array_new();
+  array_push(&stack, ((StackEntry) {
+    .tree = &self,
+    .edit = (Edit) {
+      .start = {edit->start_byte, edit->start_point},
+      .old_end = {edit->old_end_byte, edit->old_end_point},
+      .new_end = {edit->new_end_byte, edit->new_end_point},
+    },
+  }));

-  if (result->children.size > 0) {
-    uint32_t child_start_byte = 0;
+  while (stack.size) {
+    StackEntry entry = array_pop(&stack);
+    Edit edit = entry.edit;
+
+    // We use point edits to represent a subtree that may need to be marked dirty
+    // because an edit has occurred within its lookahead.
+    if (edit.old_end.bytes == edit.start.bytes && edit.new_end.bytes == edit.start.bytes) {
+      if (edit.start.bytes >= (*entry.tree)->bytes_scanned) continue;
+
+      Subtree *result = ts_subtree_make_mut(pool, *entry.tree);
+      result->has_changes = true;
+      *entry.tree = result;
+
+      Length child_start = length_zero();
+      for (uint32_t i = 0; i < result->children.size; i++) {
+        const Subtree **child = &result->children.contents[i];
+        if (child_start.bytes > edit.start.bytes) break;
+        Length child_edit_location = length_sub(edit.start, child_start);
+        array_push(&stack, ((StackEntry) {
+          .tree = child,
+          .edit = {child_edit_location, child_edit_location, child_edit_location}
+        }));
+        child_start = length_add(child_start, ts_subtree_total_size(*child));
+      }
+
+      continue;
+    }
+
+    Subtree *result = ts_subtree_make_mut(pool, *entry.tree);
+    result->has_changes = true;
+    *entry.tree = result;
+
+    bool pure_insertion = edit.old_end.bytes == edit.start.bytes;
+
+    if (edit.old_end.bytes <= result->padding.bytes) {
+      // If the edit ends in the space before this subtree, then shift this
+      // subtree according to the edit without changing its size.
+      result->padding = length_add(edit.new_end, length_sub(result->padding, edit.old_end));
+    } else if (edit.start.bytes < result->padding.bytes) {
+      // Otherwise, if the edit starts in the space before this subtree, we know
+      // it extends into this subtree, so shrink the subtree's content to compensate
+      // for the change in whitespace before it.
+      result->size = length_sub(result->size, length_sub(edit.old_end, result->padding));
+      result->padding = edit.new_end;
+    } else if (edit.start.bytes == result->padding.bytes && pure_insertion) {
+      // Otherwise, if we're just inserting at the start of the subtree, just
+      // shift the subtree over.
+      result->padding = edit.new_end;
+    } else {
+      // Finally, we must be editing within the subtree's content, so stretch
+      // the content to accomodate the edit.
+      result->size = length_add(
+        length_sub(edit.new_end, result->padding),
+        length_sub(result->size, length_sub(edit.old_end, result->padding))
+      );
+    }
+
+    Length child_left, child_right = length_zero();
    for (uint32_t i = 0; i < result->children.size; i++) {
      const Subtree **child = &result->children.contents[i];
-      if (child_start_byte > edit_byte_offset) break;
-      *child = ts_subtree_invalidate_lookahead(*child, edit_byte_offset - child_start_byte, pool);
-      child_start_byte += ts_subtree_total_bytes(*child);
+      Length child_size = ts_subtree_total_size(*child);
+      child_left = child_right;
+      child_right = length_add(child_left, child_size);
+
+      // If this child starts after the edit, then we're done processing children.
+      if (child_left.bytes > edit.old_end.bytes ||
+          (child_left.bytes == edit.old_end.bytes && child_size.bytes > 0 && i > 0)) break;
+
+      // If the child ends after the start of the edit, or we're just inserting
+      // into the end of the child's subtree, then recursively edit the child.
+      if (child_right.bytes > edit.start.bytes ||
+          (child_right.bytes == edit.start.bytes && pure_insertion)) {
+        // Transform edit into the child's coordinate space.
+        Edit child_edit = {
+          .start = length_sub(edit.start, child_left),
+          .old_end = length_sub(edit.old_end, child_left),
+          .new_end = length_sub(edit.new_end, child_left),
+        };
+
+        // Clamp child_edit to the child's bounds.
+        if (edit.start.bytes < child_left.bytes) child_edit.start = length_zero();
+        if (edit.old_end.bytes < child_left.bytes) child_edit.old_end = length_zero();
+        if (edit.new_end.bytes < child_left.bytes) child_edit.new_end = length_zero();
+        if (edit.old_end.bytes > child_right.bytes) child_edit.old_end = child_size;
+
+        // Queue processing of this child's subtree.
+        array_push(&stack, ((StackEntry) {
+          .tree = child,
+          .edit = child_edit,
+        }));
+
+        // Clear out any insertion from the edit; we interpret all inserted text as applying
+        // to one tree. Subsequent children are only shrunk to compensate for the insertion.
+        edit.new_end = edit.start;
+      } else {
+        Length edit_location = length_sub(edit.start, child_left);
+        array_push(&stack, ((StackEntry) {
+          .tree = child,
+          .edit = {edit_location, edit_location, edit_location},
+        }));
+      }
    }
  }

-  return result;
-}
-
-const Subtree *ts_subtree__edit(const Subtree *self, Edit edit, SubtreePool *pool) {
-  Subtree *result = ts_subtree_make_mut(pool, self);
-  result->has_changes = true;
-
-  bool pure_insertion = edit.old_end.bytes == edit.start.bytes;
-
-  if (edit.old_end.bytes <= result->padding.bytes) {
-    result->padding = length_add(edit.new_end, length_sub(result->padding, edit.old_end));
-  } else if (edit.start.bytes < result->padding.bytes) {
-    result->size = length_sub(result->size, length_sub(edit.old_end, result->padding));
-    result->padding = edit.new_end;
-  } else if (edit.start.bytes == result->padding.bytes && pure_insertion) {
-    result->padding = edit.new_end;
-  } else {
-    result->size = length_add(
-      length_sub(edit.new_end, result->padding),
-      length_sub(result->size, length_sub(edit.old_end, result->padding))
-    );
-  }
-
-  Length child_left, child_right = length_zero();
-  for (uint32_t i = 0; i < result->children.size; i++) {
-    const Subtree **child = &result->children.contents[i];
-    Length child_size = ts_subtree_total_size(*child);
-    child_left = child_right;
-    child_right = length_add(child_left, child_size);
-
-    if (child_left.bytes > edit.old_end.bytes ||
-        (child_left.bytes == edit.old_end.bytes && child_size.bytes > 0 && i > 0)) break;
-
-    if (child_right.bytes > edit.start.bytes ||
-        (child_right.bytes == edit.start.bytes && pure_insertion)) {
-      Edit child_edit = {
-        .start = length_sub(edit.start, child_left),
-        .old_end = length_sub(edit.old_end, child_left),
-        .new_end = length_sub(edit.new_end, child_left),
-      };
-
-      if (edit.start.bytes < child_left.bytes) child_edit.start = length_zero();
-      if (edit.old_end.bytes < child_left.bytes) child_edit.old_end = length_zero();
-      if (edit.new_end.bytes < child_left.bytes) child_edit.new_end = length_zero();
-      if (edit.old_end.bytes > child_right.bytes) child_edit.old_end = child_size;
-
-      edit.new_end = edit.start;
-
-      *child = ts_subtree__edit(*child, child_edit, pool);
-    } else if (child_left.bytes <= edit.start.bytes) {
-      *child = ts_subtree_invalidate_lookahead(*child, edit.start.bytes - child_left.bytes, pool);
-    }
-  }
-
-  return result;
-}
-
-const Subtree *ts_subtree_edit(const Subtree *self, const TSInputEdit *edit, SubtreePool *pool) {
-  return ts_subtree__edit(self, (Edit) {
-    .start = {edit->start_byte, edit->start_point},
-    .old_end = {edit->old_end_byte, edit->old_end_point},
-    .new_end = {edit->new_end_byte, edit->new_end_point},
-  }, pool);
+  array_delete(&stack);
+  return self;
 }

 const Subtree *ts_subtree_last_external_token(const Subtree *tree) {
--- a/test/runtime/parser_test.cc
+++ b/test/runtime/parser_test.cc
@ -908,7 +908,6 @@ describe("Parser", [&]() {
        2u * static_cast<uint32_t>(source_code.find(u"</script")),
      };

-      ts_parser_set_logger(parser, stderr_logger_new(true));
      ts_parser_set_included_ranges(parser, &included_range, 1);
      ts_parser_set_language(parser, load_real_language("javascript"));