tree-sitter/src/runtime/subtree.c
2018-05-11 10:46:13 -07:00

703 lines
23 KiB
C

#include <assert.h>
#include <ctype.h>
#include <limits.h>
#include <stdbool.h>
#include <string.h>
#include <stdio.h>
#include "runtime/alloc.h"
#include "runtime/atomic.h"
#include "runtime/subtree.h"
#include "runtime/length.h"
#include "runtime/language.h"
#include "runtime/error_costs.h"
typedef struct {
Length start;
Length added;
Length removed;
} Edit;
TSStateId TS_TREE_STATE_NONE = USHRT_MAX;
static const uint32_t MAX_TREE_POOL_SIZE = 1024;
static const TSExternalTokenState empty_state = {.length = 0, .short_data = {0}};
// ExternalTokenState
void ts_external_token_state_init(TSExternalTokenState *self, const char *content, unsigned length) {
self->length = length;
if (length > sizeof(self->short_data)) {
self->long_data = ts_malloc(length);
memcpy(self->long_data, content, length);
} else {
memcpy(self->short_data, content, length);
}
}
void ts_external_token_state_delete(TSExternalTokenState *self) {
if (self->length > sizeof(self->short_data)) {
ts_free(self->long_data);
}
}
const char *ts_external_token_state_data(const TSExternalTokenState *self) {
if (self->length > sizeof(self->short_data)) {
return self->long_data;
} else {
return self->short_data;
}
}
bool ts_external_token_state_eq(const TSExternalTokenState *a, const TSExternalTokenState *b) {
return a == b ||
(a->length == b->length &&
memcmp(ts_external_token_state_data(a), ts_external_token_state_data(b), a->length) == 0);
}
// SubtreeArray
bool ts_subtree_array_copy(SubtreeArray self, SubtreeArray *dest) {
Subtree **contents = NULL;
if (self.capacity > 0) {
contents = ts_calloc(self.capacity, sizeof(Subtree *));
memcpy(contents, self.contents, self.size * sizeof(Subtree *));
for (uint32_t i = 0; i < self.size; i++) {
ts_subtree_retain(contents[i]);
}
}
dest->size = self.size;
dest->capacity = self.capacity;
dest->contents = contents;
return true;
}
void ts_subtree_array_delete(SubtreePool *pool, SubtreeArray *self) {
for (uint32_t i = 0; i < self->size; i++) {
ts_subtree_release(pool, self->contents[i]);
}
array_delete(self);
}
SubtreeArray ts_subtree_array_remove_trailing_extras(SubtreeArray *self) {
SubtreeArray result = array_new();
uint32_t i = self->size - 1;
for (; i + 1 > 0; i--) {
Subtree *child = self->contents[i];
if (!child->extra) break;
array_push(&result, child);
}
self->size = i + 1;
ts_subtree_array_reverse(&result);
return result;
}
void ts_subtree_array_reverse(SubtreeArray *self) {
for (uint32_t i = 0, limit = self->size / 2; i < limit; i++) {
size_t reverse_index = self->size - 1 - i;
Subtree *swap = self->contents[i];
self->contents[i] = self->contents[reverse_index];
self->contents[reverse_index] = swap;
}
}
// SubtreePool
SubtreePool ts_subtree_pool_new(uint32_t capacity) {
SubtreePool self = {array_new(), array_new()};
array_reserve(&self.free_trees, capacity);
return self;
}
void ts_subtree_pool_delete(SubtreePool *self) {
if (self->free_trees.contents) {
for (unsigned i = 0; i < self->free_trees.size; i++) {
ts_free(self->free_trees.contents[i]);
}
array_delete(&self->free_trees);
}
if (self->tree_stack.contents) array_delete(&self->tree_stack);
}
Subtree *ts_subtree_pool_allocate(SubtreePool *self) {
if (self->free_trees.size > 0) {
return array_pop(&self->free_trees);
} else {
return ts_malloc(sizeof(Subtree));
}
}
void ts_subtree_pool_free(SubtreePool *self, Subtree *tree) {
if (self->free_trees.capacity > 0 && self->free_trees.size < MAX_TREE_POOL_SIZE) {
array_push(&self->free_trees, tree);
} else {
ts_free(tree);
}
}
// Subtree
Subtree *ts_subtree_make_leaf(SubtreePool *pool, TSSymbol symbol, Length padding, Length size,
const TSLanguage *language) {
TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol);
Subtree *result = ts_subtree_pool_allocate(pool);
*result = (Subtree){
.ref_count = 1,
.symbol = symbol,
.size = size,
.visible_child_count = 0,
.named_child_count = 0,
.alias_sequence_id = 0,
.padding = padding,
.visible = metadata.visible,
.named = metadata.named,
.node_count = 1,
.has_changes = false,
.first_leaf = {
.symbol = symbol,
.lex_mode = {0, 0},
},
.has_external_tokens = false,
};
return result;
}
Subtree *ts_subtree_make_error(SubtreePool *pool, Length size, Length padding,
int32_t lookahead_char, const TSLanguage *language) {
Subtree *result = ts_subtree_make_leaf(pool, ts_builtin_sym_error, padding, size, language);
result->fragile_left = true;
result->fragile_right = true;
result->lookahead_char = lookahead_char;
return result;
}
Subtree *ts_subtree_make_copy(SubtreePool *pool, Subtree *self) {
Subtree *result = ts_subtree_pool_allocate(pool);
*result = *self;
if (result->children.size > 0) {
ts_subtree_array_copy(self->children, &result->children);
}
result->ref_count = 1;
return result;
}
static Subtree *ts_subtree_make_mut(SubtreePool *pool, Subtree *self) {
if (self->ref_count == 1) {
return self;
} else {
Subtree *result = ts_subtree_make_copy(pool, self);
ts_subtree_release(pool, self);
return result;
}
}
static void ts_subtree__compress(Subtree *self, unsigned count, const TSLanguage *language,
SubtreeArray *stack) {
unsigned initial_stack_size = stack->size;
Subtree *tree = self;
for (unsigned i = 0; i < count; i++) {
if (tree->ref_count > 1 || tree->children.size != 2) break;
Subtree *child = tree->children.contents[0];
if (
child->ref_count > 1 ||
child->children.size != 2 ||
child->symbol != tree->symbol
) break;
Subtree *grandchild = child->children.contents[0];
if (
grandchild->ref_count > 1 ||
grandchild->children.size != 2 ||
grandchild->symbol != tree->symbol
) break;
tree->children.contents[0] = grandchild;
child->children.contents[0] = grandchild->children.contents[1];
grandchild->children.contents[1] = child;
array_push(stack, tree);
tree = grandchild;
}
while (stack->size > initial_stack_size) {
tree = array_pop(stack);
assert(tree);
Subtree *child = tree->children.contents[0];
Subtree *grandchild = child->children.contents[1];
ts_subtree_set_children(grandchild, &grandchild->children, language);
ts_subtree_set_children(child, &child->children, language);
ts_subtree_set_children(tree, &tree->children, language);
}
}
void ts_subtree_balance(Subtree *self, SubtreePool *pool, const TSLanguage *language) {
array_clear(&pool->tree_stack);
array_push(&pool->tree_stack, self);
while (pool->tree_stack.size > 0) {
Subtree *tree = array_pop(&pool->tree_stack);
assert(tree);
if (tree->repeat_depth > 0) {
if (tree->children.contents[0]->repeat_depth > tree->children.contents[1]->repeat_depth) {
unsigned n = (
tree->children.contents[0]->repeat_depth -
tree->children.contents[1]->repeat_depth
);
for (unsigned i = n / 2; i > 0; i /= 2) {
ts_subtree__compress(tree, i, language, &pool->tree_stack);
n -= i;
}
}
}
for (uint32_t i = 0; i < tree->children.size; i++) {
Subtree *child = tree->children.contents[i];
if (child->ref_count == 1) {
array_push(&pool->tree_stack, child);
}
}
}
}
void ts_subtree_set_children(Subtree *self, SubtreeArray *children, const TSLanguage *language) {
if (self->children.size > 0 && children->contents != self->children.contents) {
array_delete(&self->children);
}
self->children = *children;
self->named_child_count = 0;
self->visible_child_count = 0;
self->error_cost = 0;
self->repeat_depth = 0;
self->node_count = 1;
self->has_external_tokens = false;
self->dynamic_precedence = 0;
uint32_t non_extra_index = 0;
const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self->alias_sequence_id);
for (uint32_t i = 0; i < self->children.size; i++) {
Subtree *child = self->children.contents[i];
if (i == 0) {
self->padding = child->padding;
self->size = child->size;
self->bytes_scanned = child->bytes_scanned;
} else {
uint32_t bytes_scanned = ts_subtree_total_bytes(self) + child->bytes_scanned;
if (bytes_scanned > self->bytes_scanned) self->bytes_scanned = bytes_scanned;
self->size = length_add(self->size, ts_subtree_total_size(child));
}
if (child->symbol != ts_builtin_sym_error_repeat) {
self->error_cost += child->error_cost;
}
self->dynamic_precedence += child->dynamic_precedence;
self->node_count += child->node_count;
if (alias_sequence && alias_sequence[non_extra_index] != 0 && !child->extra) {
self->visible_child_count++;
if (ts_language_symbol_metadata(language, alias_sequence[non_extra_index]).named) {
self->named_child_count++;
}
} else if (child->visible) {
self->visible_child_count++;
if (child->named) self->named_child_count++;
} else if (child->children.size > 0) {
self->visible_child_count += child->visible_child_count;
self->named_child_count += child->named_child_count;
}
if (child->has_external_tokens) self->has_external_tokens = true;
if (child->symbol == ts_builtin_sym_error) {
self->fragile_left = self->fragile_right = true;
self->parse_state = TS_TREE_STATE_NONE;
}
if (!child->extra) non_extra_index++;
}
if (self->symbol == ts_builtin_sym_error || self->symbol == ts_builtin_sym_error_repeat) {
self->error_cost += ERROR_COST_PER_RECOVERY +
ERROR_COST_PER_SKIPPED_CHAR * self->size.bytes +
ERROR_COST_PER_SKIPPED_LINE * self->size.extent.row;
for (uint32_t i = 0; i < self->children.size; i++) {
Subtree *child = self->children.contents[i];
if (child->extra) continue;
if (child->symbol == ts_builtin_sym_error && child->children.size == 0) continue;
if (child->visible) {
self->error_cost += ERROR_COST_PER_SKIPPED_TREE;
} else {
self->error_cost += ERROR_COST_PER_SKIPPED_TREE * child->visible_child_count;
}
}
}
if (self->children.size > 0) {
Subtree *first_child = self->children.contents[0];
Subtree *last_child = self->children.contents[self->children.size - 1];
self->first_leaf = first_child->first_leaf;
if (first_child->fragile_left) self->fragile_left = true;
if (last_child->fragile_right) self->fragile_right = true;
if (
self->children.size == 2 &&
!self->visible && !self->named &&
first_child->symbol == self->symbol &&
last_child->symbol == self->symbol
) {
if (first_child->repeat_depth > last_child->repeat_depth) {
self->repeat_depth = first_child->repeat_depth + 1;
} else {
self->repeat_depth = last_child->repeat_depth + 1;
}
}
}
}
Subtree *ts_subtree_make_node(SubtreePool *pool, TSSymbol symbol, SubtreeArray *children,
unsigned alias_sequence_id, const TSLanguage *language) {
Subtree *result = ts_subtree_make_leaf(pool, symbol, length_zero(), length_zero(), language);
result->alias_sequence_id = alias_sequence_id;
if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) {
result->fragile_left = true;
result->fragile_right = true;
}
ts_subtree_set_children(result, children, language);
return result;
}
Subtree *ts_subtree_make_error_node(SubtreePool *pool, SubtreeArray *children,
const TSLanguage *language) {
return ts_subtree_make_node(pool, ts_builtin_sym_error, children, 0, language);
}
Subtree *ts_subtree_make_missing_leaf(SubtreePool *pool, TSSymbol symbol,
const TSLanguage *language) {
Subtree *result = ts_subtree_make_leaf(pool, symbol, length_zero(), length_zero(), language);
result->is_missing = true;
result->error_cost = ERROR_COST_PER_MISSING_TREE + ERROR_COST_PER_RECOVERY;
return result;
}
void ts_subtree_retain(Subtree *self) {
assert(self->ref_count > 0);
atomic_inc(&self->ref_count);
assert(self->ref_count != 0);
}
void ts_subtree_release(SubtreePool *pool, Subtree *self) {
array_clear(&pool->tree_stack);
array_push(&pool->tree_stack, self);
while (pool->tree_stack.size > 0) {
Subtree *tree = array_pop(&pool->tree_stack);
assert(tree->ref_count > 0);
if (atomic_dec(&tree->ref_count) == 0) {
if (tree->children.size > 0) {
for (uint32_t i = 0; i < tree->children.size; i++) {
array_push(&pool->tree_stack, tree->children.contents[i]);
}
array_delete(&tree->children);
} else if (tree->has_external_tokens) {
ts_external_token_state_delete(&tree->external_token_state);
}
ts_subtree_pool_free(pool, tree);
}
}
}
bool ts_subtree_eq(const Subtree *self, const Subtree *other) {
if (self) {
if (!other) return false;
} else {
return !other;
}
if (self->symbol != other->symbol) return false;
if (self->visible != other->visible) return false;
if (self->named != other->named) return false;
if (self->padding.bytes != other->padding.bytes) return false;
if (self->size.bytes != other->size.bytes) return false;
if (self->symbol == ts_builtin_sym_error) return self->lookahead_char == other->lookahead_char;
if (self->children.size != other->children.size) return false;
if (self->visible_child_count != other->visible_child_count) return false;
if (self->named_child_count != other->named_child_count) return false;
for (uint32_t i = 0; i < self->children.size; i++) {
if (!ts_subtree_eq(self->children.contents[i], other->children.contents[i])) {
return false;
}
}
return true;
}
int ts_subtree_compare(const Subtree *left, const Subtree *right) {
if (left->symbol < right->symbol)
return -1;
if (right->symbol < left->symbol)
return 1;
if (left->children.size < right->children.size)
return -1;
if (right->children.size < left->children.size)
return 1;
for (uint32_t i = 0; i < left->children.size; i++) {
Subtree *left_child = left->children.contents[i];
Subtree *right_child = right->children.contents[i];
switch (ts_subtree_compare(left_child, right_child)) {
case -1:
return -1;
case 1:
return 1;
default:
break;
}
}
return 0;
}
Subtree *ts_subtree_invalidate_lookahead(Subtree *self, uint32_t edit_byte_offset,
SubtreePool *pool) {
if (edit_byte_offset >= self->bytes_scanned) return self;
Subtree *result = ts_subtree_make_mut(pool, self);
result->has_changes = true;
if (result->children.size > 0) {
uint32_t child_start_byte = 0;
for (uint32_t i = 0; i < result->children.size; i++) {
Subtree **child = &result->children.contents[i];
if (child_start_byte > edit_byte_offset) break;
*child = ts_subtree_invalidate_lookahead(*child, edit_byte_offset - child_start_byte, pool);
child_start_byte += ts_subtree_total_bytes(*child);
}
}
return result;
}
Subtree *ts_subtree__edit(Subtree *self, Edit edit, SubtreePool *pool) {
Length new_end = length_add(edit.start, edit.added);
Length old_end = length_add(edit.start, edit.removed);
Subtree *result = ts_subtree_make_mut(pool, self);
result->has_changes = true;
if (old_end.bytes <= result->padding.bytes) {
result->padding = length_add(new_end, length_sub(result->padding, old_end));
} else if (edit.start.bytes < result->padding.bytes) {
result->size = length_sub(result->size, length_sub(old_end, result->padding));
result->padding = new_end;
} else if (edit.start.bytes == result->padding.bytes && edit.removed.bytes == 0) {
result->padding = length_add(result->padding, edit.added);
} else {
Length new_total_size = length_add(new_end, length_sub(ts_subtree_total_size(result), old_end));
result->size = length_sub(new_total_size, result->padding);
}
Length child_left, child_right = length_zero();
for (uint32_t i = 0; i < result->children.size; i++) {
Subtree **child = &result->children.contents[i];
Length child_size = ts_subtree_total_size(*child);
child_left = child_right;
child_right = length_add(child_left, child_size);
if (child_left.bytes > old_end.bytes ||
(child_left.bytes == old_end.bytes && child_size.bytes > 0 && i > 0)) break;
if (child_right.bytes > edit.start.bytes ||
(child_right.bytes == edit.start.bytes && edit.removed.bytes == 0)) {
Edit child_edit = {
.start = length_sub(edit.start, child_left),
.added = edit.added,
.removed = edit.removed,
};
if (edit.start.bytes < child_left.bytes) {
child_edit.start = length_zero();
}
if (old_end.bytes > child_right.bytes) {
child_edit.removed = length_sub(child_size, child_edit.start);
}
edit.added = length_zero();
edit.removed = length_sub(edit.removed, child_edit.removed);
*child = ts_subtree__edit(*child, child_edit, pool);
} else if (child_left.bytes <= edit.start.bytes) {
*child = ts_subtree_invalidate_lookahead(*child, edit.start.bytes - child_left.bytes, pool);
}
}
return result;
}
Subtree *ts_subtree_edit(Subtree *self, const TSInputEdit *edit, SubtreePool *pool) {
return ts_subtree__edit(self, (Edit) {
.start = {edit->start_byte, edit->start_point},
.added = {edit->bytes_added, edit->extent_added},
.removed = {edit->bytes_removed, edit->extent_removed},
}, pool);
}
Subtree *ts_subtree_last_external_token(Subtree *tree) {
if (!tree->has_external_tokens) return NULL;
while (tree->children.size > 0) {
for (uint32_t i = tree->children.size - 1; i + 1 > 0; i--) {
Subtree *child = tree->children.contents[i];
if (child->has_external_tokens) {
tree = child;
break;
}
}
}
return tree;
}
static size_t ts_subtree__write_char_to_string(char *s, size_t n, int32_t c) {
if (c == 0)
return snprintf(s, n, "EOF");
if (c == -1)
return snprintf(s, n, "INVALID");
else if (c == '\n')
return snprintf(s, n, "'\\n'");
else if (c == '\t')
return snprintf(s, n, "'\\t'");
else if (c == '\r')
return snprintf(s, n, "'\\r'");
else if (0 < c && c < 128 && isprint(c))
return snprintf(s, n, "'%c'", c);
else
return snprintf(s, n, "%d", c);
}
static size_t ts_subtree__write_to_string(const Subtree *self, char *string, size_t limit,
const TSLanguage *language, bool is_root,
bool include_all, TSSymbol alias_symbol,
bool alias_is_named) {
if (!self) return snprintf(string, limit, "(NULL)");
char *cursor = string;
char **writer = (limit > 0) ? &cursor : &string;
bool visible =
include_all ||
is_root ||
self->is_missing ||
(self->visible && self->named) ||
alias_is_named;
if (visible && !is_root) {
cursor += snprintf(*writer, limit, " ");
}
if (visible) {
if (self->symbol == ts_builtin_sym_error && self->children.size == 0 && self->size.bytes > 0) {
cursor += snprintf(*writer, limit, "(UNEXPECTED ");
cursor += ts_subtree__write_char_to_string(*writer, limit, self->lookahead_char);
} else if (self->is_missing) {
cursor += snprintf(*writer, limit, "(MISSING");
} else {
TSSymbol symbol = alias_symbol ? alias_symbol : self->symbol;
const char *symbol_name = ts_language_symbol_name(language, symbol);
cursor += snprintf(*writer, limit, "(%s", symbol_name);
}
}
const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self->alias_sequence_id);
uint32_t structural_child_index = 0;
for (uint32_t i = 0; i < self->children.size; i++) {
Subtree *child = self->children.contents[i];
if (child->extra) {
cursor += ts_subtree__write_to_string(
child, *writer, limit,
language, false, include_all,
0, false
);
} else {
TSSymbol alias_symbol = alias_sequence ? alias_sequence[structural_child_index] : 0;
cursor += ts_subtree__write_to_string(
child, *writer, limit,
language, false, include_all,
alias_symbol,
alias_symbol ? ts_language_symbol_metadata(language, alias_symbol).named : false
);
structural_child_index++;
}
}
if (visible) cursor += snprintf(*writer, limit, ")");
return cursor - string;
}
char *ts_subtree_string(const Subtree *self, const TSLanguage *language, bool include_all) {
char scratch_string[1];
size_t size = ts_subtree__write_to_string(
self, scratch_string, 0,
language, true,
include_all, 0, false
) + 1;
char *result = ts_malloc(size * sizeof(char));
ts_subtree__write_to_string(self, result, size, language, true, include_all, 0, false);
return result;
}
void ts_subtree__print_dot_graph(const Subtree *self, uint32_t byte_offset,
const TSLanguage *language, TSSymbol alias_symbol, FILE *f) {
TSSymbol symbol = alias_symbol ? alias_symbol : self->symbol;
fprintf(f, "tree_%p [label=\"%s\"", self, ts_language_symbol_name(language, symbol));
if (self->children.size == 0)
fprintf(f, ", shape=plaintext");
if (self->extra)
fprintf(f, ", fontcolor=gray");
fprintf(f, ", tooltip=\""
"address:%p\n"
"range:%u - %u\n"
"state:%d\n"
"error-cost:%u\n"
"repeat-depth:%u\n"
"bytes-scanned:%u\"]\n",
self,
byte_offset, byte_offset + ts_subtree_total_bytes(self),
self->parse_state,
self->error_cost,
self->repeat_depth,
self->bytes_scanned
);
const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self->alias_sequence_id);
uint32_t structural_child_index = 0;
for (uint32_t i = 0; i < self->children.size; i++) {
const Subtree *child = self->children.contents[i];
if (child->extra) {
ts_subtree__print_dot_graph(child, byte_offset, language, 0, f);
} else {
TSSymbol alias_symbol = alias_sequence ? alias_sequence[structural_child_index] : 0;
ts_subtree__print_dot_graph(child, byte_offset, language, alias_symbol, f);
structural_child_index++;
}
fprintf(f, "tree_%p -> tree_%p [tooltip=%u]\n", self, child, i);
byte_offset += ts_subtree_total_bytes(child);
}
}
void ts_subtree_print_dot_graph(const Subtree *self, const TSLanguage *language, FILE *f) {
fprintf(f, "digraph tree {\n");
fprintf(f, "edge [arrowhead=none]\n");
ts_subtree__print_dot_graph(self, 0, language, 0, f);
fprintf(f, "}\n");
}
bool ts_subtree_external_token_state_eq(const Subtree *self, const Subtree *other) {
const TSExternalTokenState *state1 = &empty_state;
const TSExternalTokenState *state2 = &empty_state;
if (self && self->has_external_tokens) state1 = &self->external_token_state;
if (other && other->has_external_tokens) state2 = &other->external_token_state;
return ts_external_token_state_eq(state1, state2);
}