tree-sitter/src/runtime/subtree.c
Max Brunsfeld 5dbd15f5f2 Loosen criteria for when to increment repeat depth
Sometimes, an internal repeat node may have children that are
not themselves repeat nodes, because of unit reduction elimination.
This happened with the embedded template parser.
2018-11-03 23:19:31 -07:00

908 lines
31 KiB
C

#include <assert.h>
#include <ctype.h>
#include <limits.h>
#include <stdbool.h>
#include <string.h>
#include <stdio.h>
#include "runtime/alloc.h"
#include "runtime/atomic.h"
#include "runtime/subtree.h"
#include "runtime/length.h"
#include "runtime/language.h"
#include "runtime/error_costs.h"
#include <stddef.h>
typedef struct {
Length start;
Length old_end;
Length new_end;
} Edit;
TSStateId TS_TREE_STATE_NONE = USHRT_MAX;
#ifdef TREE_SITTER_TEST
#define TS_MAX_INLINE_TREE_LENGTH 2
#define TS_MAX_TREE_POOL_SIZE 0
#else
#define TS_MAX_INLINE_TREE_LENGTH UINT8_MAX
#define TS_MAX_TREE_POOL_SIZE 32
#endif
static const ExternalScannerState empty_state = {.length = 0, .short_data = {0}};
// ExternalScannerState
void ts_external_scanner_state_init(ExternalScannerState *self, const char *data, unsigned length) {
self->length = length;
if (length > sizeof(self->short_data)) {
self->long_data = ts_malloc(length);
memcpy(self->long_data, data, length);
} else {
memcpy(self->short_data, data, length);
}
}
ExternalScannerState ts_external_scanner_state_copy(const ExternalScannerState *self) {
ExternalScannerState result = *self;
if (self->length > sizeof(self->short_data)) {
result.long_data = ts_malloc(self->length);
memcpy(result.long_data, self->long_data, self->length);
}
return result;
}
void ts_external_scanner_state_delete(ExternalScannerState *self) {
if (self->length > sizeof(self->short_data)) {
ts_free(self->long_data);
}
}
const char *ts_external_scanner_state_data(const ExternalScannerState *self) {
if (self->length > sizeof(self->short_data)) {
return self->long_data;
} else {
return self->short_data;
}
}
bool ts_external_scanner_state_eq(const ExternalScannerState *a, const ExternalScannerState *b) {
return a == b || (
a->length == b->length &&
!memcmp(ts_external_scanner_state_data(a), ts_external_scanner_state_data(b), a->length)
);
}
// SubtreeArray
void ts_subtree_array_copy(SubtreeArray self, SubtreeArray *dest) {
dest->size = self.size;
dest->capacity = self.capacity;
dest->contents = self.contents;
if (self.capacity > 0) {
dest->contents = ts_calloc(self.capacity, sizeof(Subtree));
memcpy(dest->contents, self.contents, self.size * sizeof(Subtree));
for (uint32_t i = 0; i < self.size; i++) {
ts_subtree_retain(dest->contents[i]);
}
}
}
void ts_subtree_array_delete(SubtreePool *pool, SubtreeArray *self) {
for (uint32_t i = 0; i < self->size; i++) {
ts_subtree_release(pool, self->contents[i]);
}
array_delete(self);
}
SubtreeArray ts_subtree_array_remove_trailing_extras(SubtreeArray *self) {
SubtreeArray result = array_new();
uint32_t i = self->size - 1;
for (; i + 1 > 0; i--) {
Subtree child = self->contents[i];
if (!ts_subtree_extra(child)) break;
array_push(&result, child);
}
self->size = i + 1;
ts_subtree_array_reverse(&result);
return result;
}
void ts_subtree_array_reverse(SubtreeArray *self) {
for (uint32_t i = 0, limit = self->size / 2; i < limit; i++) {
size_t reverse_index = self->size - 1 - i;
Subtree swap = self->contents[i];
self->contents[i] = self->contents[reverse_index];
self->contents[reverse_index] = swap;
}
}
// SubtreePool
SubtreePool ts_subtree_pool_new(uint32_t capacity) {
SubtreePool self = {array_new(), array_new()};
array_reserve(&self.free_trees, capacity);
return self;
}
void ts_subtree_pool_delete(SubtreePool *self) {
if (self->free_trees.contents) {
for (unsigned i = 0; i < self->free_trees.size; i++) {
ts_free(self->free_trees.contents[i].ptr);
}
array_delete(&self->free_trees);
}
if (self->tree_stack.contents) array_delete(&self->tree_stack);
}
static SubtreeHeapData *ts_subtree_pool_allocate(SubtreePool *self) {
if (self->free_trees.size > 0) {
return array_pop(&self->free_trees).ptr;
} else {
return ts_malloc(sizeof(SubtreeHeapData));
}
}
static void ts_subtree_pool_free(SubtreePool *self, SubtreeHeapData *tree) {
if (self->free_trees.capacity > 0 && self->free_trees.size + 1 <= TS_MAX_TREE_POOL_SIZE) {
array_push(&self->free_trees, (MutableSubtree) {.ptr = tree});
} else {
ts_free(tree);
}
}
// Subtree
static inline bool ts_subtree_can_inline(Length padding, Length size, uint32_t lookahead_bytes) {
return
padding.bytes < TS_MAX_INLINE_TREE_LENGTH &&
padding.extent.row < 16 &&
padding.extent.column < TS_MAX_INLINE_TREE_LENGTH &&
size.extent.row == 0 &&
size.extent.column < TS_MAX_INLINE_TREE_LENGTH &&
lookahead_bytes < 16;
}
Subtree ts_subtree_new_leaf(
SubtreePool *pool, TSSymbol symbol, Length padding, Length size,
uint32_t lookahead_bytes, TSStateId parse_state, bool has_external_tokens,
bool is_keyword, const TSLanguage *language
) {
TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol);
bool extra = symbol == ts_builtin_sym_end;
bool is_inline = (
symbol <= UINT8_MAX &&
!has_external_tokens &&
ts_subtree_can_inline(padding, size, lookahead_bytes)
);
if (is_inline) {
return (Subtree) {{
.parse_state = parse_state,
.symbol = symbol,
.padding_bytes = padding.bytes,
.padding_rows = padding.extent.row,
.padding_columns = padding.extent.column,
.size_bytes = size.bytes,
.lookahead_bytes = lookahead_bytes,
.visible = metadata.visible,
.named = metadata.named,
.extra = extra,
.has_changes = false,
.is_missing = false,
.is_keyword = is_keyword,
.is_inline = true,
}};
} else {
SubtreeHeapData *data = ts_subtree_pool_allocate(pool);
*data = (SubtreeHeapData) {
.ref_count = 1,
.padding = padding,
.size = size,
.lookahead_bytes = lookahead_bytes,
.error_cost = 0,
.child_count = 0,
.symbol = symbol,
.parse_state = parse_state,
.visible = metadata.visible,
.named = metadata.named,
.extra = extra,
.fragile_left = false,
.fragile_right = false,
.has_changes = false,
.has_external_tokens = has_external_tokens,
.is_missing = false,
.is_keyword = is_keyword,
.first_leaf = {.symbol = 0, .parse_state = 0},
};
return (Subtree) {.ptr = data};
}
}
Subtree ts_subtree_new_error(
SubtreePool *pool, int32_t lookahead_char, Length padding, Length size,
uint32_t bytes_scanned, TSStateId parse_state, const TSLanguage *language
) {
Subtree result = ts_subtree_new_leaf(
pool, ts_builtin_sym_error, padding, size, bytes_scanned,
parse_state, false, false, language
);
SubtreeHeapData *data = (SubtreeHeapData *)result.ptr;
data->fragile_left = true;
data->fragile_right = true;
data->lookahead_char = lookahead_char;
return result;
}
MutableSubtree ts_subtree_make_mut(SubtreePool *pool, Subtree self) {
if (self.data.is_inline) return (MutableSubtree) {self.data};
if (self.ptr->ref_count == 1) return ts_subtree_to_mut_unsafe(self);
SubtreeHeapData *result = ts_subtree_pool_allocate(pool);
memcpy(result, self.ptr, sizeof(SubtreeHeapData));
if (result->child_count > 0) {
result->children = ts_calloc(self.ptr->child_count, sizeof(Subtree));
memcpy(result->children, self.ptr->children, result->child_count * sizeof(Subtree));
for (uint32_t i = 0; i < result->child_count; i++) {
ts_subtree_retain(result->children[i]);
}
} else if (result->has_external_tokens) {
result->external_scanner_state = ts_external_scanner_state_copy(&self.ptr->external_scanner_state);
}
result->ref_count = 1;
ts_subtree_release(pool, self);
return (MutableSubtree) {.ptr = result};
}
static void ts_subtree__compress(MutableSubtree self, unsigned count, const TSLanguage *language,
MutableSubtreeArray *stack) {
unsigned initial_stack_size = stack->size;
MutableSubtree tree = self;
TSSymbol symbol = tree.ptr->symbol;
for (unsigned i = 0; i < count; i++) {
if (tree.ptr->ref_count > 1 || tree.ptr->child_count != 2) break;
MutableSubtree child = ts_subtree_to_mut_unsafe(tree.ptr->children[0]);
if (
child.data.is_inline ||
child.ptr->child_count != 2 ||
child.ptr->ref_count > 1 ||
child.ptr->symbol != symbol
) break;
MutableSubtree grandchild = ts_subtree_to_mut_unsafe(child.ptr->children[0]);
if (
grandchild.data.is_inline ||
grandchild.ptr->child_count != 2 ||
grandchild.ptr->ref_count > 1 ||
grandchild.ptr->symbol != symbol
) break;
tree.ptr->children[0] = ts_subtree_from_mut(grandchild);
child.ptr->children[0] = grandchild.ptr->children[1];
grandchild.ptr->children[1] = ts_subtree_from_mut(child);
array_push(stack, tree);
tree = grandchild;
}
while (stack->size > initial_stack_size) {
tree = array_pop(stack);
MutableSubtree child = ts_subtree_to_mut_unsafe(tree.ptr->children[0]);
MutableSubtree grandchild = ts_subtree_to_mut_unsafe(child.ptr->children[1]);
ts_subtree_set_children(grandchild, grandchild.ptr->children, grandchild.ptr->child_count, language);
ts_subtree_set_children(child, child.ptr->children, child.ptr->child_count, language);
ts_subtree_set_children(tree, tree.ptr->children, tree.ptr->child_count, language);
}
}
void ts_subtree_balance(Subtree self, SubtreePool *pool, const TSLanguage *language) {
array_clear(&pool->tree_stack);
if (ts_subtree_child_count(self) > 0 && self.ptr->ref_count == 1) {
array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(self));
}
while (pool->tree_stack.size > 0) {
MutableSubtree tree = array_pop(&pool->tree_stack);
if (tree.ptr->repeat_depth > 0) {
Subtree child1 = tree.ptr->children[0];
Subtree child2 = tree.ptr->children[1];
if (
ts_subtree_child_count(child1) > 0 &&
ts_subtree_child_count(child2) > 0 &&
child1.ptr->repeat_depth > child2.ptr->repeat_depth
) {
unsigned n = child1.ptr->repeat_depth - child2.ptr->repeat_depth;
for (unsigned i = n / 2; i > 0; i /= 2) {
ts_subtree__compress(tree, i, language, &pool->tree_stack);
n -= i;
}
}
}
for (uint32_t i = 0; i < tree.ptr->child_count; i++) {
Subtree child = tree.ptr->children[i];
if (ts_subtree_child_count(child) > 0 && child.ptr->ref_count == 1) {
array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(child));
}
}
}
}
static inline uint32_t ts_subtree_repeat_depth(Subtree self) {
return ts_subtree_child_count(self) ? self.ptr->repeat_depth : 0;
}
void ts_subtree_set_children(
MutableSubtree self, Subtree *children, uint32_t child_count, const TSLanguage *language
) {
assert(!self.data.is_inline);
if (self.ptr->child_count > 0 && children != self.ptr->children) {
ts_free(self.ptr->children);
}
self.ptr->child_count = child_count;
self.ptr->children = children;
self.ptr->named_child_count = 0;
self.ptr->visible_child_count = 0;
self.ptr->error_cost = 0;
self.ptr->repeat_depth = 0;
self.ptr->node_count = 1;
self.ptr->has_external_tokens = false;
self.ptr->dynamic_precedence = 0;
uint32_t non_extra_index = 0;
const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->alias_sequence_id);
uint32_t lookahead_end_byte = 0;
for (uint32_t i = 0; i < self.ptr->child_count; i++) {
Subtree child = self.ptr->children[i];
if (i == 0) {
self.ptr->padding = ts_subtree_padding(child);
self.ptr->size = ts_subtree_size(child);
} else {
self.ptr->size = length_add(self.ptr->size, ts_subtree_total_size(child));
}
uint32_t child_lookahead_end_byte =
self.ptr->padding.bytes +
self.ptr->size.bytes +
ts_subtree_lookahead_bytes(child);
if (child_lookahead_end_byte > lookahead_end_byte) lookahead_end_byte = child_lookahead_end_byte;
if (ts_subtree_symbol(child) != ts_builtin_sym_error_repeat) {
self.ptr->error_cost += ts_subtree_error_cost(child);
}
self.ptr->dynamic_precedence += ts_subtree_dynamic_precedence(child);
self.ptr->node_count += ts_subtree_node_count(child);
if (alias_sequence && alias_sequence[non_extra_index] != 0 && !ts_subtree_extra(child)) {
self.ptr->visible_child_count++;
if (ts_language_symbol_metadata(language, alias_sequence[non_extra_index]).named) {
self.ptr->named_child_count++;
}
} else if (ts_subtree_visible(child)) {
self.ptr->visible_child_count++;
if (ts_subtree_named(child)) self.ptr->named_child_count++;
} else if (ts_subtree_child_count(child) > 0) {
self.ptr->visible_child_count += child.ptr->visible_child_count;
self.ptr->named_child_count += child.ptr->named_child_count;
}
if (ts_subtree_has_external_tokens(child)) self.ptr->has_external_tokens = true;
if (ts_subtree_is_error(child)) {
self.ptr->fragile_left = self.ptr->fragile_right = true;
self.ptr->parse_state = TS_TREE_STATE_NONE;
}
if (!ts_subtree_extra(child)) non_extra_index++;
}
self.ptr->lookahead_bytes = lookahead_end_byte - self.ptr->size.bytes - self.ptr->padding.bytes;
if (self.ptr->symbol == ts_builtin_sym_error || self.ptr->symbol == ts_builtin_sym_error_repeat) {
self.ptr->error_cost +=
ERROR_COST_PER_RECOVERY +
ERROR_COST_PER_SKIPPED_CHAR * self.ptr->size.bytes +
ERROR_COST_PER_SKIPPED_LINE * self.ptr->size.extent.row;
for (uint32_t i = 0; i < self.ptr->child_count; i++) {
Subtree child = self.ptr->children[i];
uint32_t grandchild_count = ts_subtree_child_count(child);
if (ts_subtree_extra(child)) continue;
if (ts_subtree_is_error(child) && grandchild_count == 0) continue;
if (ts_subtree_visible(child)) {
self.ptr->error_cost += ERROR_COST_PER_SKIPPED_TREE;
} else if (grandchild_count > 0) {
self.ptr->error_cost += ERROR_COST_PER_SKIPPED_TREE * child.ptr->visible_child_count;
}
}
}
if (self.ptr->child_count > 0) {
Subtree first_child = self.ptr->children[0];
Subtree last_child = self.ptr->children[self.ptr->child_count - 1];
self.ptr->first_leaf.symbol = ts_subtree_leaf_symbol(first_child);
self.ptr->first_leaf.parse_state = ts_subtree_leaf_parse_state(first_child);
if (ts_subtree_fragile_left(first_child)) self.ptr->fragile_left = true;
if (ts_subtree_fragile_right(last_child)) self.ptr->fragile_right = true;
if (self.ptr->child_count == 2 && !self.ptr->visible && !self.ptr->named) {
if (ts_subtree_repeat_depth(first_child) > ts_subtree_repeat_depth(last_child)) {
self.ptr->repeat_depth = ts_subtree_repeat_depth(first_child) + 1;
} else {
self.ptr->repeat_depth = ts_subtree_repeat_depth(last_child) + 1;
}
}
}
}
MutableSubtree ts_subtree_new_node(SubtreePool *pool, TSSymbol symbol,
SubtreeArray *children, unsigned alias_sequence_id,
const TSLanguage *language) {
TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol);
bool fragile = symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat;
SubtreeHeapData *data = ts_subtree_pool_allocate(pool);
*data = (SubtreeHeapData) {
.ref_count = 1,
.symbol = symbol,
.alias_sequence_id = alias_sequence_id,
.visible = metadata.visible,
.named = metadata.named,
.has_changes = false,
.fragile_left = fragile,
.fragile_right = fragile,
.is_keyword = false,
.node_count = 0,
.first_leaf = {.symbol = 0, .parse_state = 0},
};
MutableSubtree result = {.ptr = data};
ts_subtree_set_children(result, children->contents, children->size, language);
return result;
}
Subtree ts_subtree_new_error_node(SubtreePool *pool, SubtreeArray *children,
bool extra, const TSLanguage *language) {
MutableSubtree result = ts_subtree_new_node(
pool, ts_builtin_sym_error, children, 0, language
);
result.ptr->extra = extra;
return ts_subtree_from_mut(result);
}
Subtree ts_subtree_new_missing_leaf(SubtreePool *pool, TSSymbol symbol, Length padding,
const TSLanguage *language) {
Subtree result = ts_subtree_new_leaf(
pool, symbol, padding, length_zero(), 0,
0, false, false, language
);
if (result.data.is_inline) {
result.data.is_missing = true;
} else {
((SubtreeHeapData *)result.ptr)->is_missing = true;
}
return result;
}
void ts_subtree_retain(Subtree self) {
if (self.data.is_inline) return;
assert(self.ptr->ref_count > 0);
atomic_inc((volatile uint32_t *)&self.ptr->ref_count);
assert(self.ptr->ref_count != 0);
}
void ts_subtree_release(SubtreePool *pool, Subtree self) {
if (self.data.is_inline) return;
array_clear(&pool->tree_stack);
assert(self.ptr->ref_count > 0);
if (atomic_dec((volatile uint32_t *)&self.ptr->ref_count) == 0) {
array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(self));
}
while (pool->tree_stack.size > 0) {
MutableSubtree tree = array_pop(&pool->tree_stack);
if (tree.ptr->child_count > 0) {
for (uint32_t i = 0; i < tree.ptr->child_count; i++) {
Subtree child = tree.ptr->children[i];
if (child.data.is_inline) continue;
assert(child.ptr->ref_count > 0);
if (atomic_dec((volatile uint32_t *)&child.ptr->ref_count) == 0) {
array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(child));
}
}
ts_free(tree.ptr->children);
} else if (tree.ptr->has_external_tokens) {
ts_external_scanner_state_delete(&tree.ptr->external_scanner_state);
}
ts_subtree_pool_free(pool, tree.ptr);
}
}
bool ts_subtree_eq(Subtree self, Subtree other) {
if (self.data.is_inline || other.data.is_inline) {
return memcmp(&self, &other, sizeof(SubtreeInlineData)) == 0;
}
if (self.ptr) {
if (!other.ptr) return false;
} else {
return !other.ptr;
}
if (self.ptr->symbol != other.ptr->symbol) return false;
if (self.ptr->visible != other.ptr->visible) return false;
if (self.ptr->named != other.ptr->named) return false;
if (self.ptr->padding.bytes != other.ptr->padding.bytes) return false;
if (self.ptr->size.bytes != other.ptr->size.bytes) return false;
if (self.ptr->symbol == ts_builtin_sym_error) return self.ptr->lookahead_char == other.ptr->lookahead_char;
if (self.ptr->child_count != other.ptr->child_count) return false;
if (self.ptr->child_count > 0) {
if (self.ptr->visible_child_count != other.ptr->visible_child_count) return false;
if (self.ptr->named_child_count != other.ptr->named_child_count) return false;
for (uint32_t i = 0; i < self.ptr->child_count; i++) {
if (!ts_subtree_eq(self.ptr->children[i], other.ptr->children[i])) {
return false;
}
}
}
return true;
}
int ts_subtree_compare(Subtree left, Subtree right) {
if (ts_subtree_symbol(left) < ts_subtree_symbol(right)) return -1;
if (ts_subtree_symbol(right) < ts_subtree_symbol(left)) return 1;
if (ts_subtree_child_count(left) < ts_subtree_child_count(right)) return -1;
if (ts_subtree_child_count(right) < ts_subtree_child_count(left)) return 1;
for (uint32_t i = 0, n = ts_subtree_child_count(left); i < n; i++) {
Subtree left_child = left.ptr->children[i];
Subtree right_child = right.ptr->children[i];
switch (ts_subtree_compare(left_child, right_child)) {
case -1: return -1;
case 1: return 1;
default: break;
}
}
return 0;
}
static inline void ts_subtree_set_has_changes(MutableSubtree *self) {
if (self->data.is_inline) {
self->data.has_changes = true;
} else {
self->ptr->has_changes = true;
}
}
Subtree ts_subtree_edit(Subtree self, const TSInputEdit *edit, SubtreePool *pool) {
typedef struct {
Subtree *tree;
Edit edit;
} StackEntry;
Array(StackEntry) stack = array_new();
array_push(&stack, ((StackEntry) {
.tree = &self,
.edit = (Edit) {
.start = {edit->start_byte, edit->start_point},
.old_end = {edit->old_end_byte, edit->old_end_point},
.new_end = {edit->new_end_byte, edit->new_end_point},
},
}));
while (stack.size) {
StackEntry entry = array_pop(&stack);
Edit edit = entry.edit;
bool is_noop = edit.old_end.bytes == edit.start.bytes && edit.new_end.bytes == edit.start.bytes;
bool is_pure_insertion = edit.old_end.bytes == edit.start.bytes;
Length size = ts_subtree_size(*entry.tree);
Length padding = ts_subtree_padding(*entry.tree);
uint32_t lookahead_bytes = ts_subtree_lookahead_bytes(*entry.tree);
if (is_noop && edit.start.bytes >= padding.bytes + size.bytes + lookahead_bytes) continue;
// If the edit is entirely within the space before this subtree, then shift this
// subtree over according to the edit without changing its size.
if (edit.old_end.bytes <= padding.bytes) {
padding = length_add(edit.new_end, length_sub(padding, edit.old_end));
}
// If the edit starts in the space before this subtree and extends into this subtree,
// shrink the subtree's content to compensate for the change in the space before it.
else if (edit.start.bytes < padding.bytes) {
size = length_sub(size, length_sub(edit.old_end, padding));
padding = edit.new_end;
}
// If the edit is a pure insertion right at the start of the subtree,
// shift the subtree over according to the insertion.
else if (edit.start.bytes == padding.bytes && is_pure_insertion) {
padding = edit.new_end;
}
// If the edit is within this subtree, resize the subtree to reflect the edit.
else {
uint32_t total_bytes = padding.bytes + size.bytes;
if (edit.start.bytes < total_bytes ||
(edit.start.bytes == total_bytes && is_pure_insertion)) {
size = length_add(
length_sub(edit.new_end, padding),
length_sub(size, length_sub(edit.old_end, padding))
);
}
}
MutableSubtree result = ts_subtree_make_mut(pool, *entry.tree);
if (result.data.is_inline) {
if (ts_subtree_can_inline(padding, size, lookahead_bytes)) {
result.data.padding_bytes = padding.bytes;
result.data.padding_rows = padding.extent.row;
result.data.padding_columns = padding.extent.column;
result.data.size_bytes = size.bytes;
} else {
SubtreeHeapData *data = ts_subtree_pool_allocate(pool);
data->ref_count = 1;
data->padding = padding;
data->size = size;
data->lookahead_bytes = lookahead_bytes;
data->error_cost = 0;
data->child_count = 0;
data->symbol = result.data.symbol;
data->parse_state = result.data.parse_state;
data->visible = result.data.visible;
data->named = result.data.named;
data->extra = result.data.extra;
data->fragile_left = false;
data->fragile_right = false;
data->has_changes = false;
data->has_external_tokens = false;
data->is_missing = result.data.is_missing;
data->is_keyword = result.data.is_keyword;
result.ptr = data;
}
} else {
result.ptr->padding = padding;
result.ptr->size = size;
}
ts_subtree_set_has_changes(&result);
*entry.tree = ts_subtree_from_mut(result);
Length child_left, child_right = length_zero();
for (uint32_t i = 0, n = ts_subtree_child_count(*entry.tree); i < n; i++) {
Subtree *child = &result.ptr->children[i];
Length child_size = ts_subtree_total_size(*child);
child_left = child_right;
child_right = length_add(child_left, child_size);
// If this child starts after the edit, then we're done processing children.
if (child_left.bytes > edit.old_end.bytes ||
(child_left.bytes == edit.old_end.bytes && child_size.bytes > 0 && i > 0)) break;
// Transform edit into the child's coordinate space.
Edit child_edit = {
.start = length_sub(edit.start, child_left),
.old_end = length_sub(edit.old_end, child_left),
.new_end = length_sub(edit.new_end, child_left),
};
// Clamp child_edit to the child's bounds.
if (edit.start.bytes < child_left.bytes) child_edit.start = length_zero();
if (edit.old_end.bytes < child_left.bytes) child_edit.old_end = length_zero();
if (edit.new_end.bytes < child_left.bytes) child_edit.new_end = length_zero();
if (edit.old_end.bytes > child_right.bytes) child_edit.old_end = child_size;
// Interpret all inserted text as applying to the *first* child that touches the edit.
// Subsequent children are only never have any text inserted into them; they are only
// shrunk to compensate for the edit.
if (child_right.bytes > edit.start.bytes ||
(child_right.bytes == edit.start.bytes && is_pure_insertion)) {
edit.new_end = edit.start;
}
// Children that occur before the edit are not reshaped by the edit.
else {
child_edit.old_end = child_edit.start;
child_edit.new_end = child_edit.start;
}
// Queue processing of this child's subtree.
array_push(&stack, ((StackEntry) {
.tree = child,
.edit = child_edit,
}));
}
}
array_delete(&stack);
return self;
}
Subtree ts_subtree_last_external_token(Subtree tree) {
if (!ts_subtree_has_external_tokens(tree)) return NULL_SUBTREE;
while (tree.ptr->child_count > 0) {
for (uint32_t i = tree.ptr->child_count - 1; i + 1 > 0; i--) {
Subtree child = tree.ptr->children[i];
if (ts_subtree_has_external_tokens(child)) {
tree = child;
break;
}
}
}
return tree;
}
static size_t ts_subtree__write_char_to_string(char *s, size_t n, int32_t c) {
if (c == 0)
return snprintf(s, n, "EOF");
if (c == -1)
return snprintf(s, n, "INVALID");
else if (c == '\n')
return snprintf(s, n, "'\\n'");
else if (c == '\t')
return snprintf(s, n, "'\\t'");
else if (c == '\r')
return snprintf(s, n, "'\\r'");
else if (0 < c && c < 128 && isprint(c))
return snprintf(s, n, "'%c'", c);
else
return snprintf(s, n, "%d", c);
}
static size_t ts_subtree__write_to_string(Subtree self, char *string, size_t limit,
const TSLanguage *language, bool is_root,
bool include_all, TSSymbol alias_symbol,
bool alias_is_named) {
if (!self.ptr) return snprintf(string, limit, "(NULL)");
char *cursor = string;
char **writer = (limit > 0) ? &cursor : &string;
bool visible =
include_all ||
is_root ||
ts_subtree_missing(self) ||
(ts_subtree_visible(self) && ts_subtree_named(self)) ||
alias_is_named;
if (visible && !is_root) {
cursor += snprintf(*writer, limit, " ");
}
if (visible) {
if (ts_subtree_is_error(self) && ts_subtree_child_count(self) == 0 && self.ptr->size.bytes > 0) {
cursor += snprintf(*writer, limit, "(UNEXPECTED ");
cursor += ts_subtree__write_char_to_string(*writer, limit, self.ptr->lookahead_char);
} else if (ts_subtree_missing(self)) {
cursor += snprintf(*writer, limit, "(MISSING");
} else {
TSSymbol symbol = alias_symbol ? alias_symbol : ts_subtree_symbol(self);
const char *symbol_name = ts_language_symbol_name(language, symbol);
cursor += snprintf(*writer, limit, "(%s", symbol_name);
}
}
if (ts_subtree_child_count(self)) {
const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->alias_sequence_id);
uint32_t structural_child_index = 0;
for (uint32_t i = 0; i < self.ptr->child_count; i++) {
Subtree child = self.ptr->children[i];
if (ts_subtree_extra(child)) {
cursor += ts_subtree__write_to_string(
child, *writer, limit,
language, false, include_all,
0, false
);
} else {
TSSymbol alias_symbol = alias_sequence ? alias_sequence[structural_child_index] : 0;
cursor += ts_subtree__write_to_string(
child, *writer, limit,
language, false, include_all,
alias_symbol,
alias_symbol ? ts_language_symbol_metadata(language, alias_symbol).named : false
);
structural_child_index++;
}
}
}
if (visible) cursor += snprintf(*writer, limit, ")");
return cursor - string;
}
char *ts_subtree_string(Subtree self, const TSLanguage *language, bool include_all) {
char scratch_string[1];
size_t size = ts_subtree__write_to_string(
self, scratch_string, 0,
language, true,
include_all, 0, false
) + 1;
char *result = ts_malloc(size * sizeof(char));
ts_subtree__write_to_string(self, result, size, language, true, include_all, 0, false);
return result;
}
void ts_subtree__print_dot_graph(const Subtree *self, uint32_t start_offset,
const TSLanguage *language, TSSymbol alias_symbol,
FILE *f) {
TSSymbol subtree_symbol = ts_subtree_symbol(*self);
TSSymbol symbol = alias_symbol ? alias_symbol : subtree_symbol;
uint32_t end_offset = start_offset + ts_subtree_total_bytes(*self);
fprintf(
f, "tree_%p [label=\"%s\"",
self, ts_language_symbol_name(language, symbol)
);
if (ts_subtree_child_count(*self) == 0) fprintf(f, ", shape=plaintext");
if (ts_subtree_extra(*self)) fprintf(f, ", fontcolor=gray");
fprintf(f, ", tooltip=\""
"range: %u - %u\n"
"state: %d\n"
"error-cost: %u\n"
"has-changes: %u\n"
"repeat-depth: %u\n"
"lookahead-bytes: %u\"]\n",
start_offset, end_offset,
ts_subtree_parse_state(*self),
ts_subtree_error_cost(*self),
ts_subtree_has_changes(*self),
ts_subtree_repeat_depth(*self),
ts_subtree_lookahead_bytes(*self)
);
uint32_t child_start_offset = start_offset;
uint32_t structural_child_index = 0;
const TSSymbol *alias_sequence = ts_language_alias_sequence(
language,
ts_subtree_alias_sequence_id(*self)
);
for (uint32_t i = 0, n = ts_subtree_child_count(*self); i < n; i++) {
const Subtree *child = &self->ptr->children[i];
if (ts_subtree_extra(*child)) {
ts_subtree__print_dot_graph(child, child_start_offset, language, 0, f);
} else {
TSSymbol alias_symbol = alias_sequence ? alias_sequence[structural_child_index] : 0;
ts_subtree__print_dot_graph(child, child_start_offset, language, alias_symbol, f);
structural_child_index++;
}
fprintf(f, "tree_%p -> tree_%p [tooltip=%u]\n", self, child, i);
child_start_offset += ts_subtree_total_bytes(*child);
}
}
void ts_subtree_print_dot_graph(Subtree self, const TSLanguage *language, FILE *f) {
fprintf(f, "digraph tree {\n");
fprintf(f, "edge [arrowhead=none]\n");
ts_subtree__print_dot_graph(&self, 0, language, 0, f);
fprintf(f, "}\n");
}
bool ts_subtree_external_scanner_state_eq(Subtree self, Subtree other) {
const ExternalScannerState *state1 = &empty_state;
const ExternalScannerState *state2 = &empty_state;
if (self.ptr && ts_subtree_has_external_tokens(self) && !self.ptr->child_count) {
state1 = &self.ptr->external_scanner_state;
}
if (other.ptr && ts_subtree_has_external_tokens(other) && !other.ptr->child_count) {
state2 = &other.ptr->external_scanner_state;
}
return ts_external_scanner_state_eq(state1, state2);
}