Merge pull request #780 from tree-sitter/fewer-node-allocs

Allocate parent node data alongside the array of children
This commit is contained in:
Max Brunsfeld 2020-10-25 09:41:05 -07:00 committed by GitHub
commit a7125feb49
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 354 additions and 169 deletions

View file

@ -38,7 +38,6 @@ script:
- (eval "$WASM_ENV" && script/generate-fixtures-wasm)
# Run the tests
- export TREE_SITTER_STATIC_ANALYSIS=1
- script/test
- script/test-wasm
- script/benchmark

View file

@ -74,6 +74,9 @@ extern "C" {
#define array_assign(self, other) \
array__assign((VoidArray *)(self), (const VoidArray *)(other), array__elem_size(self))
#define array_swap(self, other) \
array__swap((VoidArray *)(self), (VoidArray *)(other))
// Search a sorted array for a given `needle` value, using the given `compare`
// callback to determine the order.
//
@ -139,7 +142,7 @@ static inline void array__reserve(VoidArray *self, size_t element_size, uint32_t
if (self->contents) {
self->contents = ts_realloc(self->contents, new_capacity * element_size);
} else {
self->contents = ts_calloc(new_capacity, element_size);
self->contents = ts_malloc(new_capacity * element_size);
}
self->capacity = new_capacity;
}
@ -151,6 +154,12 @@ static inline void array__assign(VoidArray *self, const VoidArray *other, size_t
memcpy(self->contents, other->contents, self->size * element_size);
}
static inline void array__swap(VoidArray *self, VoidArray *other) {
VoidArray swap = *other;
*other = *self;
*self = swap;
}
static inline void array__grow(VoidArray *self, size_t count, size_t element_size) {
size_t new_size = self->size + count;
if (new_size > self->capacity) {

View file

@ -205,7 +205,7 @@ static bool iterator_descend(Iterator *self, uint32_t goal_position) {
Length position = entry.position;
uint32_t structural_child_index = 0;
for (uint32_t i = 0, n = ts_subtree_child_count(*entry.subtree); i < n; i++) {
const Subtree *child = &entry.subtree->ptr->children[i];
const Subtree *child = &ts_subtree_children(*entry.subtree)[i];
Length child_left = length_add(position, ts_subtree_padding(*child));
Length child_right = length_add(child_left, ts_subtree_size(*child));
@ -260,7 +260,7 @@ static void iterator_advance(Iterator *self) {
Length position = length_add(entry.position, ts_subtree_total_size(*entry.subtree));
uint32_t structural_child_index = entry.structural_child_index;
if (!ts_subtree_extra(*entry.subtree)) structural_child_index++;
const Subtree *next_child = &parent->ptr->children[child_index];
const Subtree *next_child = &ts_subtree_children(*parent)[child_index];
array_push(&self->cursor.stack, ((TreeCursorEntry){
.subtree = next_child,

View file

@ -79,7 +79,7 @@ static inline bool ts_node_child_iterator_next(
TSNode *result
) {
if (!self->parent.ptr || ts_node_child_iterator_done(self)) return false;
const Subtree *child = &self->parent.ptr->children[self->child_index];
const Subtree *child = &ts_subtree_children(self->parent)[self->child_index];
TSSymbol alias_symbol = 0;
if (!ts_subtree_extra(*child)) {
if (self->alias_sequence) {
@ -178,7 +178,7 @@ static bool ts_subtree_has_trailing_empty_descendant(
Subtree other
) {
for (unsigned i = ts_subtree_child_count(self) - 1; i + 1 > 0; i--) {
Subtree child = self.ptr->children[i];
Subtree child = ts_subtree_children(self)[i];
if (ts_subtree_total_bytes(child) > 0) break;
if (child.ptr == other.ptr || ts_subtree_has_trailing_empty_descendant(child, other)) {
return true;

View file

@ -60,8 +60,9 @@ struct TSParser {
const TSLanguage *language;
ReduceActionSet reduce_actions;
Subtree finished_tree;
SubtreeHeapData scratch_tree_data;
MutableSubtree scratch_tree;
SubtreeArray trailing_extras;
SubtreeArray trailing_extras2;
SubtreeArray scratch_trees;
TokenCache token_cache;
ReusableNode reusable_node;
void *external_scanner_payload;
@ -155,7 +156,7 @@ static bool ts_parser__breakdown_top_of_stack(
Subtree parent = *array_front(&slice.subtrees);
for (uint32_t j = 0, n = ts_subtree_child_count(parent); j < n; j++) {
Subtree child = parent.ptr->children[j];
Subtree child = ts_subtree_children(parent)[j];
pending = ts_subtree_child_count(child) > 0;
if (ts_subtree_is_error(child)) {
@ -672,6 +673,10 @@ static Subtree ts_parser__reuse_node(
return NULL_SUBTREE;
}
// Determine if a given tree should be replaced by an alternative tree.
//
// The decision is based on the trees' error costs (if any), their dynamic precedence,
// and finally, as a default, by a recursive comparison of the trees' symbols.
static bool ts_parser__select_tree(TSParser *self, Subtree left, Subtree right) {
if (!left.ptr) return true;
if (!right.ptr) return false;
@ -717,6 +722,33 @@ static bool ts_parser__select_tree(TSParser *self, Subtree left, Subtree right)
}
}
// Determine if a given tree's children should be replaced by an alternative
// array of children.
static bool ts_parser__select_children(
TSParser *self,
Subtree left,
const SubtreeArray *children
) {
array_assign(&self->scratch_trees, children);
// Create a temporary subtree using the scratch trees array. This node does
// not perform any allocation except for possibly growing the array to make
// room for its own heap data. The scratch tree is never explicitly released,
// so the same 'scratch trees' array can be reused again later.
MutableSubtree scratch_tree = ts_subtree_new_node(
ts_subtree_symbol(left),
&self->scratch_trees,
0,
self->language
);
return ts_parser__select_tree(
self,
left,
ts_subtree_from_mut(scratch_tree)
);
}
static void ts_parser__shift(
TSParser *self,
StackVersion version,
@ -742,22 +774,6 @@ static void ts_parser__shift(
}
}
static bool ts_parser__replace_children(
TSParser *self,
MutableSubtree *tree,
SubtreeArray *children
) {
*self->scratch_tree.ptr = *tree->ptr;
self->scratch_tree.ptr->child_count = 0;
ts_subtree_set_children(self->scratch_tree, children->contents, children->size, self->language);
if (ts_parser__select_tree(self, ts_subtree_from_mut(*tree), ts_subtree_from_mut(self->scratch_tree))) {
*tree->ptr = *self->scratch_tree.ptr;
return true;
} else {
return false;
}
}
static StackVersion ts_parser__reduce(
TSParser *self,
StackVersion version,
@ -802,11 +818,9 @@ static StackVersion ts_parser__reduce(
// node. They will be re-pushed onto the stack after the parent node is
// created and pushed.
SubtreeArray children = slice.subtrees;
while (children.size > 0 && ts_subtree_extra(children.contents[children.size - 1])) {
children.size--;
}
ts_subtree_array_remove_trailing_extras(&children, &self->trailing_extras);
MutableSubtree parent = ts_subtree_new_node(&self->tree_pool,
MutableSubtree parent = ts_subtree_new_node(
symbol, &children, production_id, self->language
);
@ -820,21 +834,25 @@ static StackVersion ts_parser__reduce(
i++;
SubtreeArray children = next_slice.subtrees;
while (children.size > 0 && ts_subtree_extra(children.contents[children.size - 1])) {
children.size--;
}
ts_subtree_array_remove_trailing_extras(&children, &self->trailing_extras2);
if (ts_parser__replace_children(self, &parent, &children)) {
ts_subtree_array_delete(&self->tree_pool, &slice.subtrees);
slice = next_slice;
if (ts_parser__select_children(
self,
ts_subtree_from_mut(parent),
&children
)) {
ts_subtree_array_clear(&self->tree_pool, &self->trailing_extras);
ts_subtree_release(&self->tree_pool, ts_subtree_from_mut(parent));
array_swap(&self->trailing_extras, &self->trailing_extras2);
parent = ts_subtree_new_node(
symbol, &children, production_id, self->language
);
} else {
array_clear(&self->trailing_extras2);
ts_subtree_array_delete(&self->tree_pool, &next_slice.subtrees);
}
}
parent.ptr->dynamic_precedence += dynamic_precedence;
parent.ptr->production_id = production_id;
TSStateId state = ts_stack_state(self->stack, slice_version);
TSStateId next_state = ts_language_next_state(self->language, state, symbol);
if (end_of_non_terminal_extra && next_state == state) {
@ -847,12 +865,13 @@ static StackVersion ts_parser__reduce(
} else {
parent.ptr->parse_state = state;
}
parent.ptr->dynamic_precedence += dynamic_precedence;
// Push the parent node onto the stack, along with any extra tokens that
// were previously on top of the stack.
ts_stack_push(self->stack, slice_version, ts_subtree_from_mut(parent), false, next_state);
for (uint32_t j = parent.ptr->child_count; j < slice.subtrees.size; j++) {
ts_stack_push(self->stack, slice_version, slice.subtrees.contents[j], false, next_state);
for (uint32_t j = 0; j < self->trailing_extras.size; j++) {
ts_stack_push(self->stack, slice_version, self->trailing_extras.contents[j], false, next_state);
}
for (StackVersion j = 0; j < slice_version; j++) {
@ -884,22 +903,22 @@ static void ts_parser__accept(
Subtree root = NULL_SUBTREE;
for (uint32_t j = trees.size - 1; j + 1 > 0; j--) {
Subtree child = trees.contents[j];
if (!ts_subtree_extra(child)) {
assert(!child.data.is_inline);
uint32_t child_count = ts_subtree_child_count(child);
Subtree tree = trees.contents[j];
if (!ts_subtree_extra(tree)) {
assert(!tree.data.is_inline);
uint32_t child_count = ts_subtree_child_count(tree);
const Subtree *children = ts_subtree_children(tree);
for (uint32_t k = 0; k < child_count; k++) {
ts_subtree_retain(child.ptr->children[k]);
ts_subtree_retain(children[k]);
}
array_splice(&trees, j, 1, child_count, child.ptr->children);
array_splice(&trees, j, 1, child_count, children);
root = ts_subtree_from_mut(ts_subtree_new_node(
&self->tree_pool,
ts_subtree_symbol(child),
ts_subtree_symbol(tree),
&trees,
child.ptr->production_id,
tree.ptr->production_id,
self->language
));
ts_subtree_release(&self->tree_pool, child);
ts_subtree_release(&self->tree_pool, tree);
break;
}
}
@ -1125,7 +1144,7 @@ static bool ts_parser__recover_to_state(
Subtree error_tree = error_trees.contents[0];
uint32_t error_child_count = ts_subtree_child_count(error_tree);
if (error_child_count > 0) {
array_splice(&slice.subtrees, 0, 0, error_child_count, error_tree.ptr->children);
array_splice(&slice.subtrees, 0, 0, error_child_count, ts_subtree_children(error_tree));
for (unsigned j = 0; j < error_child_count; j++) {
ts_subtree_retain(slice.subtrees.contents[j]);
}
@ -1133,22 +1152,21 @@ static bool ts_parser__recover_to_state(
ts_subtree_array_delete(&self->tree_pool, &error_trees);
}
SubtreeArray trailing_extras = ts_subtree_array_remove_trailing_extras(&slice.subtrees);
ts_subtree_array_remove_trailing_extras(&slice.subtrees, &self->trailing_extras);
if (slice.subtrees.size > 0) {
Subtree error = ts_subtree_new_error_node(&self->tree_pool, &slice.subtrees, true, self->language);
Subtree error = ts_subtree_new_error_node(&slice.subtrees, true, self->language);
ts_stack_push(self->stack, slice.version, error, false, goal_state);
} else {
array_delete(&slice.subtrees);
}
for (unsigned j = 0; j < trailing_extras.size; j++) {
Subtree tree = trailing_extras.contents[j];
for (unsigned j = 0; j < self->trailing_extras.size; j++) {
Subtree tree = self->trailing_extras.contents[j];
ts_stack_push(self->stack, slice.version, tree, false, goal_state);
}
previous_version = slice.version;
array_delete(&trailing_extras);
}
return previous_version != STACK_VERSION_NONE;
@ -1245,7 +1263,7 @@ static void ts_parser__recover(
if (ts_subtree_is_eof(lookahead)) {
LOG("recover_eof");
SubtreeArray children = array_new();
Subtree parent = ts_subtree_new_error_node(&self->tree_pool, &children, false, self->language);
Subtree parent = ts_subtree_new_error_node(&children, false, self->language);
ts_stack_push(self->stack, version, parent, false, 1);
ts_parser__accept(self, version, lookahead);
return;
@ -1278,7 +1296,6 @@ static void ts_parser__recover(
array_reserve(&children, 1);
array_push(&children, lookahead);
MutableSubtree error_repeat = ts_subtree_new_node(
&self->tree_pool,
ts_builtin_sym_error_repeat,
&children,
0,
@ -1307,7 +1324,6 @@ static void ts_parser__recover(
ts_stack_renumber_version(self->stack, pop.contents[0].version, version);
array_push(&pop.contents[0].subtrees, ts_subtree_from_mut(error_repeat));
error_repeat = ts_subtree_new_node(
&self->tree_pool,
ts_builtin_sym_error_repeat,
&pop.contents[0].subtrees,
0,
@ -1666,7 +1682,6 @@ TSParser *ts_parser_new(void) {
self->end_clock = clock_null();
self->operation_count = 0;
self->old_tree = NULL_SUBTREE;
self->scratch_tree.ptr = &self->scratch_tree_data;
self->included_range_differences = (TSRangeArray) array_new();
self->included_range_difference_index = 0;
ts_parser__set_cached_token(self, 0, NULL_SUBTREE, NULL_SUBTREE);
@ -1692,6 +1707,9 @@ void ts_parser_delete(TSParser *self) {
ts_parser__set_cached_token(self, 0, NULL_SUBTREE, NULL_SUBTREE);
ts_subtree_pool_delete(&self->tree_pool);
reusable_node_delete(&self->reusable_node);
array_delete(&self->trailing_extras);
array_delete(&self->trailing_extras2);
array_delete(&self->scratch_trees);
ts_free(self);
}

View file

@ -53,7 +53,7 @@ static inline void reusable_node_advance(ReusableNode *self) {
} while (ts_subtree_child_count(tree) <= next_index);
array_push(&self->stack, ((StackEntry) {
.tree = tree.ptr->children[next_index],
.tree = ts_subtree_children(tree)[next_index],
.child_index = next_index,
.byte_offset = byte_offset,
}));
@ -63,7 +63,7 @@ static inline bool reusable_node_descend(ReusableNode *self) {
StackEntry last_entry = *array_back(&self->stack);
if (ts_subtree_child_count(last_entry.tree) > 0) {
array_push(&self->stack, ((StackEntry) {
.tree = last_entry.tree.ptr->children[0],
.tree = ts_subtree_children(last_entry.tree)[0],
.child_index = 0,
.byte_offset = last_entry.byte_offset,
}));

View file

@ -288,7 +288,7 @@ inline StackSliceArray stack__iter(Stack *self, StackVersion version,
bool include_subtrees = false;
if (goal_subtree_count >= 0) {
include_subtrees = true;
array_reserve(&iterator.subtrees, goal_subtree_count);
array_reserve(&iterator.subtrees, ts_subtree_alloc_size(goal_subtree_count) / sizeof(Subtree));
}
array_push(&self->iterators, iterator);
@ -304,8 +304,9 @@ inline StackSliceArray stack__iter(Stack *self, StackVersion version,
if (should_pop) {
SubtreeArray subtrees = iterator->subtrees;
if (!should_stop)
if (!should_stop) {
ts_subtree_array_copy(subtrees, &subtrees);
}
ts_subtree_array_reverse(&subtrees);
ts_stack__add_slice(
self,

View file

@ -80,26 +80,33 @@ void ts_subtree_array_copy(SubtreeArray self, SubtreeArray *dest) {
}
}
void ts_subtree_array_delete(SubtreePool *pool, SubtreeArray *self) {
void ts_subtree_array_clear(SubtreePool *pool, SubtreeArray *self) {
for (uint32_t i = 0; i < self->size; i++) {
ts_subtree_release(pool, self->contents[i]);
}
array_clear(self);
}
void ts_subtree_array_delete(SubtreePool *pool, SubtreeArray *self) {
ts_subtree_array_clear(pool, self);
array_delete(self);
}
SubtreeArray ts_subtree_array_remove_trailing_extras(SubtreeArray *self) {
SubtreeArray result = array_new();
uint32_t i = self->size - 1;
for (; i + 1 > 0; i--) {
Subtree child = self->contents[i];
if (!ts_subtree_extra(child)) break;
array_push(&result, child);
void ts_subtree_array_remove_trailing_extras(
SubtreeArray *self,
SubtreeArray *destination
) {
array_clear(destination);
while (self->size > 0) {
Subtree last = self->contents[self->size - 1];
if (ts_subtree_extra(last)) {
self->size--;
array_push(destination, last);
} else {
break;
}
}
self->size = i + 1;
ts_subtree_array_reverse(&result);
return result;
ts_subtree_array_reverse(destination);
}
void ts_subtree_array_reverse(SubtreeArray *self) {
@ -247,28 +254,45 @@ Subtree ts_subtree_new_error(
return result;
}
MutableSubtree ts_subtree_make_mut(SubtreePool *pool, Subtree self) {
if (self.data.is_inline) return (MutableSubtree) {self.data};
if (self.ptr->ref_count == 1) return ts_subtree_to_mut_unsafe(self);
SubtreeHeapData *result = ts_subtree_pool_allocate(pool);
memcpy(result, self.ptr, sizeof(SubtreeHeapData));
if (result->child_count > 0) {
result->children = ts_calloc(self.ptr->child_count, sizeof(Subtree));
memcpy(result->children, self.ptr->children, result->child_count * sizeof(Subtree));
for (uint32_t i = 0; i < result->child_count; i++) {
ts_subtree_retain(result->children[i]);
// Clone a subtree.
MutableSubtree ts_subtree_clone(Subtree self) {
size_t alloc_size = ts_subtree_alloc_size(self.ptr->child_count);
Subtree *new_children = ts_malloc(alloc_size);
Subtree *old_children = ts_subtree_children(self);
memcpy(new_children, old_children, alloc_size);
SubtreeHeapData *result = (SubtreeHeapData *)&new_children[self.ptr->child_count];
if (self.ptr->child_count > 0) {
for (uint32_t i = 0; i < self.ptr->child_count; i++) {
ts_subtree_retain(new_children[i]);
}
} else if (result->has_external_tokens) {
result->external_scanner_state = ts_external_scanner_state_copy(&self.ptr->external_scanner_state);
} else if (self.ptr->has_external_tokens) {
result->external_scanner_state = ts_external_scanner_state_copy(
&self.ptr->external_scanner_state
);
}
result->ref_count = 1;
ts_subtree_release(pool, self);
return (MutableSubtree) {.ptr = result};
}
static void ts_subtree__compress(MutableSubtree self, unsigned count, const TSLanguage *language,
MutableSubtreeArray *stack) {
// Get mutable version of a subtree.
//
// This takes ownership of the subtree. If the subtree has only one owner,
// this will directly convert it into a mutable version. Otherwise, it will
// perform a copy.
MutableSubtree ts_subtree_make_mut(SubtreePool *pool, Subtree self) {
if (self.data.is_inline) return (MutableSubtree) {self.data};
if (self.ptr->ref_count == 1) return ts_subtree_to_mut_unsafe(self);
MutableSubtree result = ts_subtree_clone(self);
ts_subtree_release(pool, self);
return result;
}
static void ts_subtree__compress(
MutableSubtree self,
unsigned count,
const TSLanguage *language,
MutableSubtreeArray *stack
) {
unsigned initial_stack_size = stack->size;
MutableSubtree tree = self;
@ -276,7 +300,7 @@ static void ts_subtree__compress(MutableSubtree self, unsigned count, const TSLa
for (unsigned i = 0; i < count; i++) {
if (tree.ptr->ref_count > 1 || tree.ptr->child_count < 2) break;
MutableSubtree child = ts_subtree_to_mut_unsafe(tree.ptr->children[0]);
MutableSubtree child = ts_subtree_to_mut_unsafe(ts_subtree_children(tree)[0]);
if (
child.data.is_inline ||
child.ptr->child_count < 2 ||
@ -284,7 +308,7 @@ static void ts_subtree__compress(MutableSubtree self, unsigned count, const TSLa
child.ptr->symbol != symbol
) break;
MutableSubtree grandchild = ts_subtree_to_mut_unsafe(child.ptr->children[0]);
MutableSubtree grandchild = ts_subtree_to_mut_unsafe(ts_subtree_children(child)[0]);
if (
grandchild.data.is_inline ||
grandchild.ptr->child_count < 2 ||
@ -292,20 +316,20 @@ static void ts_subtree__compress(MutableSubtree self, unsigned count, const TSLa
grandchild.ptr->symbol != symbol
) break;
tree.ptr->children[0] = ts_subtree_from_mut(grandchild);
child.ptr->children[0] = grandchild.ptr->children[grandchild.ptr->child_count - 1];
grandchild.ptr->children[grandchild.ptr->child_count - 1] = ts_subtree_from_mut(child);
ts_subtree_children(tree)[0] = ts_subtree_from_mut(grandchild);
ts_subtree_children(child)[0] = ts_subtree_children(grandchild)[grandchild.ptr->child_count - 1];
ts_subtree_children(grandchild)[grandchild.ptr->child_count - 1] = ts_subtree_from_mut(child);
array_push(stack, tree);
tree = grandchild;
}
while (stack->size > initial_stack_size) {
tree = array_pop(stack);
MutableSubtree child = ts_subtree_to_mut_unsafe(tree.ptr->children[0]);
MutableSubtree grandchild = ts_subtree_to_mut_unsafe(child.ptr->children[child.ptr->child_count - 1]);
ts_subtree_set_children(grandchild, grandchild.ptr->children, grandchild.ptr->child_count, language);
ts_subtree_set_children(child, child.ptr->children, child.ptr->child_count, language);
ts_subtree_set_children(tree, tree.ptr->children, tree.ptr->child_count, language);
MutableSubtree child = ts_subtree_to_mut_unsafe(ts_subtree_children(tree)[0]);
MutableSubtree grandchild = ts_subtree_to_mut_unsafe(ts_subtree_children(child)[child.ptr->child_count - 1]);
ts_subtree_summarize_children(grandchild, language);
ts_subtree_summarize_children(child, language);
ts_subtree_summarize_children(tree, language);
}
}
@ -320,8 +344,8 @@ void ts_subtree_balance(Subtree self, SubtreePool *pool, const TSLanguage *langu
MutableSubtree tree = array_pop(&pool->tree_stack);
if (tree.ptr->repeat_depth > 0) {
Subtree child1 = tree.ptr->children[0];
Subtree child2 = tree.ptr->children[tree.ptr->child_count - 1];
Subtree child1 = ts_subtree_children(tree)[0];
Subtree child2 = ts_subtree_children(tree)[tree.ptr->child_count - 1];
long repeat_delta = (long)ts_subtree_repeat_depth(child1) - (long)ts_subtree_repeat_depth(child2);
if (repeat_delta > 0) {
unsigned n = repeat_delta;
@ -333,7 +357,7 @@ void ts_subtree_balance(Subtree self, SubtreePool *pool, const TSLanguage *langu
}
for (uint32_t i = 0; i < tree.ptr->child_count; i++) {
Subtree child = tree.ptr->children[i];
Subtree child = ts_subtree_children(tree)[i];
if (ts_subtree_child_count(child) > 0 && child.ptr->ref_count == 1) {
array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(child));
}
@ -341,17 +365,13 @@ void ts_subtree_balance(Subtree self, SubtreePool *pool, const TSLanguage *langu
}
}
void ts_subtree_set_children(
MutableSubtree self, Subtree *children, uint32_t child_count, const TSLanguage *language
// Assign all of the node's properties that depend on its children.
void ts_subtree_summarize_children(
MutableSubtree self,
const TSLanguage *language
) {
assert(!self.data.is_inline);
if (self.ptr->child_count > 0 && children != self.ptr->children) {
ts_free(self.ptr->children);
}
self.ptr->child_count = child_count;
self.ptr->children = children;
self.ptr->named_child_count = 0;
self.ptr->visible_child_count = 0;
self.ptr->error_cost = 0;
@ -364,8 +384,9 @@ void ts_subtree_set_children(
const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->production_id);
uint32_t lookahead_end_byte = 0;
const Subtree *children = ts_subtree_children(self);
for (uint32_t i = 0; i < self.ptr->child_count; i++) {
Subtree child = self.ptr->children[i];
Subtree child = children[i];
if (i == 0) {
self.ptr->padding = ts_subtree_padding(child);
@ -384,6 +405,17 @@ void ts_subtree_set_children(
self.ptr->error_cost += ts_subtree_error_cost(child);
}
uint32_t grandchild_count = ts_subtree_child_count(child);
if (self.ptr->symbol == ts_builtin_sym_error || self.ptr->symbol == ts_builtin_sym_error_repeat) {
if (!ts_subtree_extra(child) && !(ts_subtree_is_error(child) && grandchild_count == 0)) {
if (ts_subtree_visible(child)) {
self.ptr->error_cost += ERROR_COST_PER_SKIPPED_TREE;
} else if (grandchild_count > 0) {
self.ptr->error_cost += ERROR_COST_PER_SKIPPED_TREE * child.ptr->visible_child_count;
}
}
}
self.ptr->dynamic_precedence += ts_subtree_dynamic_precedence(child);
self.ptr->node_count += ts_subtree_node_count(child);
@ -395,7 +427,7 @@ void ts_subtree_set_children(
} else if (ts_subtree_visible(child)) {
self.ptr->visible_child_count++;
if (ts_subtree_named(child)) self.ptr->named_child_count++;
} else if (ts_subtree_child_count(child) > 0) {
} else if (grandchild_count > 0) {
self.ptr->visible_child_count += child.ptr->visible_child_count;
self.ptr->named_child_count += child.ptr->named_child_count;
}
@ -417,22 +449,11 @@ void ts_subtree_set_children(
ERROR_COST_PER_RECOVERY +
ERROR_COST_PER_SKIPPED_CHAR * self.ptr->size.bytes +
ERROR_COST_PER_SKIPPED_LINE * self.ptr->size.extent.row;
for (uint32_t i = 0; i < self.ptr->child_count; i++) {
Subtree child = self.ptr->children[i];
uint32_t grandchild_count = ts_subtree_child_count(child);
if (ts_subtree_extra(child)) continue;
if (ts_subtree_is_error(child) && grandchild_count == 0) continue;
if (ts_subtree_visible(child)) {
self.ptr->error_cost += ERROR_COST_PER_SKIPPED_TREE;
} else if (grandchild_count > 0) {
self.ptr->error_cost += ERROR_COST_PER_SKIPPED_TREE * child.ptr->visible_child_count;
}
}
}
if (self.ptr->child_count > 0) {
Subtree first_child = self.ptr->children[0];
Subtree last_child = self.ptr->children[self.ptr->child_count - 1];
Subtree first_child = children[0];
Subtree last_child = children[self.ptr->child_count - 1];
self.ptr->first_leaf.symbol = ts_subtree_leaf_symbol(first_child);
self.ptr->first_leaf.parse_state = ts_subtree_leaf_parse_state(first_child);
@ -455,15 +476,30 @@ void ts_subtree_set_children(
}
}
MutableSubtree ts_subtree_new_node(SubtreePool *pool, TSSymbol symbol,
SubtreeArray *children, unsigned production_id,
const TSLanguage *language) {
// Create a new parent node with the given children.
//
// This takes ownership of the children array.
MutableSubtree ts_subtree_new_node(
TSSymbol symbol,
SubtreeArray *children,
unsigned production_id,
const TSLanguage *language
) {
TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol);
bool fragile = symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat;
SubtreeHeapData *data = ts_subtree_pool_allocate(pool);
// Allocate the node's data at the end of the array of children.
size_t new_byte_size = ts_subtree_alloc_size(children->size);
if (children->capacity * sizeof(Subtree) < new_byte_size) {
children->contents = ts_realloc(children->contents, new_byte_size);
children->capacity = new_byte_size / sizeof(Subtree);
}
SubtreeHeapData *data = (SubtreeHeapData *)&children->contents[children->size];
*data = (SubtreeHeapData) {
.ref_count = 1,
.symbol = symbol,
.child_count = children->size,
.visible = metadata.visible,
.named = metadata.named,
.has_changes = false,
@ -477,32 +513,45 @@ MutableSubtree ts_subtree_new_node(SubtreePool *pool, TSSymbol symbol,
}}
};
MutableSubtree result = {.ptr = data};
ts_subtree_set_children(result, children->contents, children->size, language);
ts_subtree_summarize_children(result, language);
return result;
}
Subtree ts_subtree_new_error_node(SubtreePool *pool, SubtreeArray *children,
bool extra, const TSLanguage *language) {
// Create a new error node contaning the given children.
//
// This node is treated as 'extra'. Its children are prevented from having
// having any effect on the parse state.
Subtree ts_subtree_new_error_node(
SubtreeArray *children,
bool extra,
const TSLanguage *language
) {
MutableSubtree result = ts_subtree_new_node(
pool, ts_builtin_sym_error, children, 0, language
ts_builtin_sym_error, children, 0, language
);
result.ptr->extra = extra;
return ts_subtree_from_mut(result);
}
Subtree ts_subtree_new_missing_leaf(SubtreePool *pool, TSSymbol symbol, Length padding,
const TSLanguage *language) {
// Create a new 'missing leaf' node.
//
// This node is treated as 'extra'. Its children are prevented from having
// having any effect on the parse state.
Subtree ts_subtree_new_missing_leaf(
SubtreePool *pool,
TSSymbol symbol,
Length padding,
const TSLanguage *language
) {
Subtree result = ts_subtree_new_leaf(
pool, symbol, padding, length_zero(), 0,
0, false, false, language
);
if (result.data.is_inline) {
result.data.is_missing = true;
} else {
((SubtreeHeapData *)result.ptr)->is_missing = true;
}
return result;
}
@ -525,19 +574,22 @@ void ts_subtree_release(SubtreePool *pool, Subtree self) {
while (pool->tree_stack.size > 0) {
MutableSubtree tree = array_pop(&pool->tree_stack);
if (tree.ptr->child_count > 0) {
Subtree *children = ts_subtree_children(tree);
for (uint32_t i = 0; i < tree.ptr->child_count; i++) {
Subtree child = tree.ptr->children[i];
Subtree child = children[i];
if (child.data.is_inline) continue;
assert(child.ptr->ref_count > 0);
if (atomic_dec((volatile uint32_t *)&child.ptr->ref_count) == 0) {
array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(child));
}
}
ts_free(tree.ptr->children);
} else if (tree.ptr->has_external_tokens) {
ts_external_scanner_state_delete(&tree.ptr->external_scanner_state);
ts_free(children);
} else {
if (tree.ptr->has_external_tokens) {
ts_external_scanner_state_delete(&tree.ptr->external_scanner_state);
}
ts_subtree_pool_free(pool, tree.ptr);
}
ts_subtree_pool_free(pool, tree.ptr);
}
}
@ -564,7 +616,7 @@ bool ts_subtree_eq(Subtree self, Subtree other) {
if (self.ptr->named_child_count != other.ptr->named_child_count) return false;
for (uint32_t i = 0; i < self.ptr->child_count; i++) {
if (!ts_subtree_eq(self.ptr->children[i], other.ptr->children[i])) {
if (!ts_subtree_eq(ts_subtree_children(self)[i], ts_subtree_children(other)[i])) {
return false;
}
}
@ -578,8 +630,8 @@ int ts_subtree_compare(Subtree left, Subtree right) {
if (ts_subtree_child_count(left) < ts_subtree_child_count(right)) return -1;
if (ts_subtree_child_count(right) < ts_subtree_child_count(left)) return 1;
for (uint32_t i = 0, n = ts_subtree_child_count(left); i < n; i++) {
Subtree left_child = left.ptr->children[i];
Subtree right_child = right.ptr->children[i];
Subtree left_child = ts_subtree_children(left)[i];
Subtree right_child = ts_subtree_children(right)[i];
switch (ts_subtree_compare(left_child, right_child)) {
case -1: return -1;
case 1: return 1;
@ -695,7 +747,7 @@ Subtree ts_subtree_edit(Subtree self, const TSInputEdit *edit, SubtreePool *pool
Length child_left, child_right = length_zero();
for (uint32_t i = 0, n = ts_subtree_child_count(*entry.tree); i < n; i++) {
Subtree *child = &result.ptr->children[i];
Subtree *child = &ts_subtree_children(*entry.tree)[i];
Length child_size = ts_subtree_total_size(*child);
child_left = child_right;
child_right = length_add(child_left, child_size);
@ -750,7 +802,7 @@ Subtree ts_subtree_last_external_token(Subtree tree) {
if (!ts_subtree_has_external_tokens(tree)) return NULL_SUBTREE;
while (tree.ptr->child_count > 0) {
for (uint32_t i = tree.ptr->child_count - 1; i + 1 > 0; i--) {
Subtree child = tree.ptr->children[i];
Subtree child = ts_subtree_children(tree)[i];
if (ts_subtree_has_external_tokens(child)) {
tree = child;
break;
@ -853,7 +905,7 @@ static size_t ts_subtree__write_to_string(
uint32_t structural_child_index = 0;
for (uint32_t i = 0; i < self.ptr->child_count; i++) {
Subtree child = self.ptr->children[i];
Subtree child = ts_subtree_children(self)[i];
if (ts_subtree_extra(child)) {
cursor += ts_subtree__write_to_string(
child, *writer, limit,
@ -950,7 +1002,7 @@ void ts_subtree__print_dot_graph(const Subtree *self, uint32_t start_offset,
language->max_alias_sequence_length *
ts_subtree_production_id(*self);
for (uint32_t i = 0, n = ts_subtree_child_count(*self); i < n; i++) {
const Subtree *child = &self->ptr->children[i];
const Subtree *child = &ts_subtree_children(*self)[i];
TSSymbol alias_symbol = 0;
if (!ts_subtree_extra(*child) && child_info_offset) {
alias_symbol = language->alias_sequences[child_info_offset];

View file

@ -14,12 +14,19 @@ extern "C" {
#include "tree_sitter/api.h"
#include "tree_sitter/parser.h"
static const TSStateId TS_TREE_STATE_NONE = USHRT_MAX;
#define TS_TREE_STATE_NONE USHRT_MAX
#define NULL_SUBTREE ((Subtree) {.ptr = NULL})
typedef union Subtree Subtree;
typedef union MutableSubtree MutableSubtree;
// The serialized state of an external scanner.
//
// Every time an external token subtree is created after a call to an
// external scanner, the scanner's `serialize` function is called to
// retrieve a serialized copy of its state. The bytes are then copied
// onto the subtree itself so that the scanner's state can later be
// restored using its `deserialize` function.
//
// Small byte arrays are stored inline, and long ones are allocated
// separately on the heap.
typedef struct {
union {
char *long_data;
@ -28,6 +35,10 @@ typedef struct {
uint32_t length;
} ExternalScannerState;
// A compact representation of a subtree.
//
// This representation is used for small leaf nodes that are not
// errors, and were not created by an external scanner.
typedef struct {
bool is_inline : 1;
bool visible : 1;
@ -45,6 +56,11 @@ typedef struct {
uint16_t parse_state;
} SubtreeInlineData;
// A heap-allocated representation of a subtree.
//
// This representation is used for parent nodes, external tokens,
// errors, and other leaf nodes whose data is too large to fit into
// the inlinen representation.
typedef struct {
volatile uint32_t ref_count;
Length padding;
@ -68,7 +84,6 @@ typedef struct {
union {
// Non-terminal subtrees (`child_count > 0`)
struct {
Subtree *children;
uint32_t visible_child_count;
uint32_t named_child_count;
uint32_t node_count;
@ -89,15 +104,17 @@ typedef struct {
};
} SubtreeHeapData;
union Subtree {
// The fundamental building block of a syntax tree.
typedef union {
SubtreeInlineData data;
const SubtreeHeapData *ptr;
};
} Subtree;
union MutableSubtree {
// Like Subtree, but mutable.
typedef union {
SubtreeInlineData data;
SubtreeHeapData *ptr;
};
} MutableSubtree;
typedef Array(Subtree) SubtreeArray;
typedef Array(MutableSubtree) MutableSubtreeArray;
@ -111,8 +128,9 @@ void ts_external_scanner_state_init(ExternalScannerState *, const char *, unsign
const char *ts_external_scanner_state_data(const ExternalScannerState *);
void ts_subtree_array_copy(SubtreeArray, SubtreeArray *);
void ts_subtree_array_clear(SubtreePool *, SubtreeArray *);
void ts_subtree_array_delete(SubtreePool *, SubtreeArray *);
SubtreeArray ts_subtree_array_remove_trailing_extras(SubtreeArray *);
void ts_subtree_array_remove_trailing_extras(SubtreeArray *, SubtreeArray *);
void ts_subtree_array_reverse(SubtreeArray *);
SubtreePool ts_subtree_pool_new(uint32_t capacity);
@ -125,8 +143,8 @@ Subtree ts_subtree_new_leaf(
Subtree ts_subtree_new_error(
SubtreePool *, int32_t, Length, Length, uint32_t, TSStateId, const TSLanguage *
);
MutableSubtree ts_subtree_new_node(SubtreePool *, TSSymbol, SubtreeArray *, unsigned, const TSLanguage *);
Subtree ts_subtree_new_error_node(SubtreePool *, SubtreeArray *, bool, const TSLanguage *);
MutableSubtree ts_subtree_new_node(TSSymbol, SubtreeArray *, unsigned, const TSLanguage *);
Subtree ts_subtree_new_error_node(SubtreeArray *, bool, const TSLanguage *);
Subtree ts_subtree_new_missing_leaf(SubtreePool *, TSSymbol, Length, const TSLanguage *);
MutableSubtree ts_subtree_make_mut(SubtreePool *, Subtree);
void ts_subtree_retain(Subtree);
@ -134,7 +152,8 @@ void ts_subtree_release(SubtreePool *, Subtree);
bool ts_subtree_eq(Subtree, Subtree);
int ts_subtree_compare(Subtree, Subtree);
void ts_subtree_set_symbol(MutableSubtree *, TSSymbol, const TSLanguage *);
void ts_subtree_set_children(MutableSubtree, Subtree *, uint32_t, const TSLanguage *);
void ts_subtree_summarize(MutableSubtree, const Subtree *, uint32_t, const TSLanguage *);
void ts_subtree_summarize_children(MutableSubtree, const TSLanguage *);
void ts_subtree_balance(Subtree, SubtreePool *, const TSLanguage *);
Subtree ts_subtree_edit(Subtree, const TSInputEdit *edit, SubtreePool *);
char *ts_subtree_string(Subtree, const TSLanguage *, bool include_all);
@ -156,6 +175,17 @@ static inline uint32_t ts_subtree_lookahead_bytes(Subtree self) { return SUBTREE
#undef SUBTREE_GET
// Get the size needed to store a heap-allocated subtree with the given
// number of children.
static inline size_t ts_subtree_alloc_size(uint32_t child_count) {
return child_count * sizeof(Subtree) + sizeof(SubtreeHeapData);
}
// Get a subtree's children, which are allocated immediately before the
// tree's own heap data.
#define ts_subtree_children(self) \
((self).data.is_inline ? NULL : (Subtree *)((self).ptr) - (self).ptr->child_count)
static inline void ts_subtree_set_extra(MutableSubtree *self) {
if (self->data.is_inline) {
self->data.extra = true;

View file

@ -38,7 +38,7 @@ static inline bool ts_tree_cursor_child_iterator_next(CursorChildIterator *self,
TreeCursorEntry *result,
bool *visible) {
if (!self->parent.ptr || self->child_index == self->parent.ptr->child_count) return false;
const Subtree *child = &self->parent.ptr->children[self->child_index];
const Subtree *child = &ts_subtree_children(self->parent)[self->child_index];
*result = (TreeCursorEntry) {
.subtree = child,
.position = self->position,
@ -56,7 +56,7 @@ static inline bool ts_tree_cursor_child_iterator_next(CursorChildIterator *self,
self->child_index++;
if (self->child_index < self->parent.ptr->child_count) {
Subtree next_child = self->parent.ptr->children[self->child_index];
Subtree next_child = ts_subtree_children(self->parent)[self->child_index];
self->position = length_add(self->position, ts_subtree_padding(next_child));
}
@ -306,7 +306,7 @@ void ts_tree_cursor_current_status(
unsigned structural_child_index = entry->structural_child_index;
if (!ts_subtree_extra(*entry->subtree)) structural_child_index++;
for (unsigned j = entry->child_index + 1; j < sibling_count; j++) {
Subtree sibling = parent_entry->subtree->ptr->children[j];
Subtree sibling = ts_subtree_children(*parent_entry->subtree)[j];
TSSymbolMetadata sibling_metadata = ts_language_symbol_metadata(
self->tree->language,
subtree_symbol(sibling, structural_child_index)

34
script/heap-profile Executable file
View file

@ -0,0 +1,34 @@
#!/usr/bin/env bash
#
# Usage:
# script/heap-profile
#
# Parse an example source file and record memory usage
#
# Dependencies:
# * `pprof` executable: https://github.com/google/pprof
# * `gperftools` package: https://github.com/gperftools/gperftools
set -e
GRAMMARS_DIR=$PWD/test/fixtures/grammars
# Build the library
make
# Build the heap-profiling harness
clang++ \
-I lib/include \
-I $GRAMMARS_DIR \
-D GRAMMARS_DIR=\"${GRAMMARS_DIR}/\" \
-l tcmalloc \
./libtree-sitter.a \
test/profile/heap.cc \
-o target/heap-profile
# Run the harness with heap profiling enabled.
export HEAPPROFILE=$PWD/profile
target/heap-profile $@
# Extract statistics using pprof.
pprof -top -cum profile.0001.heap

42
test/profile/heap.cc Normal file
View file

@ -0,0 +1,42 @@
#include <fstream>
#include <string>
#include <cstdlib>
#include <tree_sitter/api.h>
extern "C" {
#include "javascript/src/parser.c"
#include "javascript/src/scanner.c"
}
#define LANGUAGE tree_sitter_javascript
#define SOURCE_PATH "javascript/examples/jquery.js"
int main() {
TSParser *parser = ts_parser_new();
if (!ts_parser_set_language(parser, LANGUAGE())) {
fprintf(stderr, "Invalid language\n");
exit(1);
}
const char *source_path = GRAMMARS_DIR SOURCE_PATH;
printf("Parsing %s\n", source_path);
std::ifstream source_file(source_path);
if (!source_file.good()) {
fprintf(stderr, "Invalid source path %s\n", source_path);
exit(1);
}
std::string source_code(
(std::istreambuf_iterator<char>(source_file)),
std::istreambuf_iterator<char>()
);
TSTree *tree = ts_parser_parse_string(
parser,
NULL,
source_code.c_str(),
source_code.size()
);
}