From d533d1f07654b7011c6c914537df3d1e498b8193 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 22 Oct 2020 16:21:47 -0700 Subject: [PATCH 1/5] Allocate parent nodes together with their child array --- lib/src/array.h | 11 +- lib/src/get_changed_ranges.c | 4 +- lib/src/node.c | 4 +- lib/src/parser.c | 110 ++++++++-------- lib/src/reusable_node.h | 4 +- lib/src/stack.c | 5 +- lib/src/subtree.c | 234 ++++++++++++++++++++++------------- lib/src/subtree.h | 19 ++- lib/src/tree_cursor.c | 6 +- 9 files changed, 240 insertions(+), 157 deletions(-) diff --git a/lib/src/array.h b/lib/src/array.h index de8c8cb3..13117194 100644 --- a/lib/src/array.h +++ b/lib/src/array.h @@ -74,6 +74,9 @@ extern "C" { #define array_assign(self, other) \ array__assign((VoidArray *)(self), (const VoidArray *)(other), array__elem_size(self)) +#define array_swap(self, other) \ + array__swap((VoidArray *)(self), (VoidArray *)(other)) + // Search a sorted array for a given `needle` value, using the given `compare` // callback to determine the order. // @@ -139,7 +142,7 @@ static inline void array__reserve(VoidArray *self, size_t element_size, uint32_t if (self->contents) { self->contents = ts_realloc(self->contents, new_capacity * element_size); } else { - self->contents = ts_calloc(new_capacity, element_size); + self->contents = ts_malloc(new_capacity * element_size); } self->capacity = new_capacity; } @@ -151,6 +154,12 @@ static inline void array__assign(VoidArray *self, const VoidArray *other, size_t memcpy(self->contents, other->contents, self->size * element_size); } +static inline void array__swap(VoidArray *self, VoidArray *other) { + VoidArray swap = *other; + *other = *self; + *self = swap; +} + static inline void array__grow(VoidArray *self, size_t count, size_t element_size) { size_t new_size = self->size + count; if (new_size > self->capacity) { diff --git a/lib/src/get_changed_ranges.c b/lib/src/get_changed_ranges.c index b24f3149..b8915544 100644 --- a/lib/src/get_changed_ranges.c +++ b/lib/src/get_changed_ranges.c @@ -205,7 +205,7 @@ static bool iterator_descend(Iterator *self, uint32_t goal_position) { Length position = entry.position; uint32_t structural_child_index = 0; for (uint32_t i = 0, n = ts_subtree_child_count(*entry.subtree); i < n; i++) { - const Subtree *child = &entry.subtree->ptr->children[i]; + const Subtree *child = &ts_subtree_children(*entry.subtree)[i]; Length child_left = length_add(position, ts_subtree_padding(*child)); Length child_right = length_add(child_left, ts_subtree_size(*child)); @@ -260,7 +260,7 @@ static void iterator_advance(Iterator *self) { Length position = length_add(entry.position, ts_subtree_total_size(*entry.subtree)); uint32_t structural_child_index = entry.structural_child_index; if (!ts_subtree_extra(*entry.subtree)) structural_child_index++; - const Subtree *next_child = &parent->ptr->children[child_index]; + const Subtree *next_child = &ts_subtree_children(*parent)[child_index]; array_push(&self->cursor.stack, ((TreeCursorEntry){ .subtree = next_child, diff --git a/lib/src/node.c b/lib/src/node.c index 576f3ef3..9ce0f0b3 100644 --- a/lib/src/node.c +++ b/lib/src/node.c @@ -79,7 +79,7 @@ static inline bool ts_node_child_iterator_next( TSNode *result ) { if (!self->parent.ptr || ts_node_child_iterator_done(self)) return false; - const Subtree *child = &self->parent.ptr->children[self->child_index]; + const Subtree *child = &ts_subtree_children(self->parent)[self->child_index]; TSSymbol alias_symbol = 0; if (!ts_subtree_extra(*child)) { if (self->alias_sequence) { @@ -178,7 +178,7 @@ static bool ts_subtree_has_trailing_empty_descendant( Subtree other ) { for (unsigned i = ts_subtree_child_count(self) - 1; i + 1 > 0; i--) { - Subtree child = self.ptr->children[i]; + Subtree child = ts_subtree_children(self)[i]; if (ts_subtree_total_bytes(child) > 0) break; if (child.ptr == other.ptr || ts_subtree_has_trailing_empty_descendant(child, other)) { return true; diff --git a/lib/src/parser.c b/lib/src/parser.c index b88f84e4..3984d002 100644 --- a/lib/src/parser.c +++ b/lib/src/parser.c @@ -60,8 +60,9 @@ struct TSParser { const TSLanguage *language; ReduceActionSet reduce_actions; Subtree finished_tree; - SubtreeHeapData scratch_tree_data; - MutableSubtree scratch_tree; + SubtreeArray trailing_extras; + SubtreeArray trailing_extras2; + SubtreeArray scratch_trees; TokenCache token_cache; ReusableNode reusable_node; void *external_scanner_payload; @@ -155,7 +156,7 @@ static bool ts_parser__breakdown_top_of_stack( Subtree parent = *array_front(&slice.subtrees); for (uint32_t j = 0, n = ts_subtree_child_count(parent); j < n; j++) { - Subtree child = parent.ptr->children[j]; + Subtree child = ts_subtree_children(parent)[j]; pending = ts_subtree_child_count(child) > 0; if (ts_subtree_is_error(child)) { @@ -717,6 +718,25 @@ static bool ts_parser__select_tree(TSParser *self, Subtree left, Subtree right) } } +static bool ts_parser__select_children( + TSParser *self, + Subtree left, + const SubtreeArray *children +) { + array_assign(&self->scratch_trees, children); + MutableSubtree scratch_tree = ts_subtree_new_node( + ts_subtree_symbol(left), + &self->scratch_trees, + 0, + self->language + ); + return ts_parser__select_tree( + self, + left, + ts_subtree_from_mut(scratch_tree) + ); +} + static void ts_parser__shift( TSParser *self, StackVersion version, @@ -742,22 +762,6 @@ static void ts_parser__shift( } } -static bool ts_parser__replace_children( - TSParser *self, - MutableSubtree *tree, - SubtreeArray *children -) { - *self->scratch_tree.ptr = *tree->ptr; - self->scratch_tree.ptr->child_count = 0; - ts_subtree_set_children(self->scratch_tree, children->contents, children->size, self->language); - if (ts_parser__select_tree(self, ts_subtree_from_mut(*tree), ts_subtree_from_mut(self->scratch_tree))) { - *tree->ptr = *self->scratch_tree.ptr; - return true; - } else { - return false; - } -} - static StackVersion ts_parser__reduce( TSParser *self, StackVersion version, @@ -802,11 +806,9 @@ static StackVersion ts_parser__reduce( // node. They will be re-pushed onto the stack after the parent node is // created and pushed. SubtreeArray children = slice.subtrees; - while (children.size > 0 && ts_subtree_extra(children.contents[children.size - 1])) { - children.size--; - } + ts_subtree_array_remove_trailing_extras(&children, &self->trailing_extras); - MutableSubtree parent = ts_subtree_new_node(&self->tree_pool, + MutableSubtree parent = ts_subtree_new_node( symbol, &children, production_id, self->language ); @@ -820,14 +822,21 @@ static StackVersion ts_parser__reduce( i++; SubtreeArray children = next_slice.subtrees; - while (children.size > 0 && ts_subtree_extra(children.contents[children.size - 1])) { - children.size--; - } + ts_subtree_array_remove_trailing_extras(&children, &self->trailing_extras2); - if (ts_parser__replace_children(self, &parent, &children)) { - ts_subtree_array_delete(&self->tree_pool, &slice.subtrees); - slice = next_slice; + if (ts_parser__select_children( + self, + ts_subtree_from_mut(parent), + &children + )) { + ts_subtree_array_clear(&self->tree_pool, &self->trailing_extras); + ts_subtree_release(&self->tree_pool, ts_subtree_from_mut(parent)); + array_swap(&self->trailing_extras, &self->trailing_extras2); + parent = ts_subtree_new_node( + symbol, &children, production_id, self->language + ); } else { + array_clear(&self->trailing_extras2); ts_subtree_array_delete(&self->tree_pool, &next_slice.subtrees); } } @@ -851,8 +860,8 @@ static StackVersion ts_parser__reduce( // Push the parent node onto the stack, along with any extra tokens that // were previously on top of the stack. ts_stack_push(self->stack, slice_version, ts_subtree_from_mut(parent), false, next_state); - for (uint32_t j = parent.ptr->child_count; j < slice.subtrees.size; j++) { - ts_stack_push(self->stack, slice_version, slice.subtrees.contents[j], false, next_state); + for (uint32_t j = 0; j < self->trailing_extras.size; j++) { + ts_stack_push(self->stack, slice_version, self->trailing_extras.contents[j], false, next_state); } for (StackVersion j = 0; j < slice_version; j++) { @@ -884,22 +893,22 @@ static void ts_parser__accept( Subtree root = NULL_SUBTREE; for (uint32_t j = trees.size - 1; j + 1 > 0; j--) { - Subtree child = trees.contents[j]; - if (!ts_subtree_extra(child)) { - assert(!child.data.is_inline); - uint32_t child_count = ts_subtree_child_count(child); + Subtree tree = trees.contents[j]; + if (!ts_subtree_extra(tree)) { + assert(!tree.data.is_inline); + uint32_t child_count = ts_subtree_child_count(tree); + const Subtree *children = ts_subtree_children(tree); for (uint32_t k = 0; k < child_count; k++) { - ts_subtree_retain(child.ptr->children[k]); + ts_subtree_retain(children[k]); } - array_splice(&trees, j, 1, child_count, child.ptr->children); + array_splice(&trees, j, 1, child_count, children); root = ts_subtree_from_mut(ts_subtree_new_node( - &self->tree_pool, - ts_subtree_symbol(child), + ts_subtree_symbol(tree), &trees, - child.ptr->production_id, + tree.ptr->production_id, self->language )); - ts_subtree_release(&self->tree_pool, child); + ts_subtree_release(&self->tree_pool, tree); break; } } @@ -1125,7 +1134,7 @@ static bool ts_parser__recover_to_state( Subtree error_tree = error_trees.contents[0]; uint32_t error_child_count = ts_subtree_child_count(error_tree); if (error_child_count > 0) { - array_splice(&slice.subtrees, 0, 0, error_child_count, error_tree.ptr->children); + array_splice(&slice.subtrees, 0, 0, error_child_count, ts_subtree_children(error_tree)); for (unsigned j = 0; j < error_child_count; j++) { ts_subtree_retain(slice.subtrees.contents[j]); } @@ -1133,22 +1142,21 @@ static bool ts_parser__recover_to_state( ts_subtree_array_delete(&self->tree_pool, &error_trees); } - SubtreeArray trailing_extras = ts_subtree_array_remove_trailing_extras(&slice.subtrees); + ts_subtree_array_remove_trailing_extras(&slice.subtrees, &self->trailing_extras); if (slice.subtrees.size > 0) { - Subtree error = ts_subtree_new_error_node(&self->tree_pool, &slice.subtrees, true, self->language); + Subtree error = ts_subtree_new_error_node(&slice.subtrees, true, self->language); ts_stack_push(self->stack, slice.version, error, false, goal_state); } else { array_delete(&slice.subtrees); } - for (unsigned j = 0; j < trailing_extras.size; j++) { - Subtree tree = trailing_extras.contents[j]; + for (unsigned j = 0; j < self->trailing_extras.size; j++) { + Subtree tree = self->trailing_extras.contents[j]; ts_stack_push(self->stack, slice.version, tree, false, goal_state); } previous_version = slice.version; - array_delete(&trailing_extras); } return previous_version != STACK_VERSION_NONE; @@ -1245,7 +1253,7 @@ static void ts_parser__recover( if (ts_subtree_is_eof(lookahead)) { LOG("recover_eof"); SubtreeArray children = array_new(); - Subtree parent = ts_subtree_new_error_node(&self->tree_pool, &children, false, self->language); + Subtree parent = ts_subtree_new_error_node(&children, false, self->language); ts_stack_push(self->stack, version, parent, false, 1); ts_parser__accept(self, version, lookahead); return; @@ -1278,7 +1286,6 @@ static void ts_parser__recover( array_reserve(&children, 1); array_push(&children, lookahead); MutableSubtree error_repeat = ts_subtree_new_node( - &self->tree_pool, ts_builtin_sym_error_repeat, &children, 0, @@ -1307,7 +1314,6 @@ static void ts_parser__recover( ts_stack_renumber_version(self->stack, pop.contents[0].version, version); array_push(&pop.contents[0].subtrees, ts_subtree_from_mut(error_repeat)); error_repeat = ts_subtree_new_node( - &self->tree_pool, ts_builtin_sym_error_repeat, &pop.contents[0].subtrees, 0, @@ -1666,7 +1672,6 @@ TSParser *ts_parser_new(void) { self->end_clock = clock_null(); self->operation_count = 0; self->old_tree = NULL_SUBTREE; - self->scratch_tree.ptr = &self->scratch_tree_data; self->included_range_differences = (TSRangeArray) array_new(); self->included_range_difference_index = 0; ts_parser__set_cached_token(self, 0, NULL_SUBTREE, NULL_SUBTREE); @@ -1692,6 +1697,9 @@ void ts_parser_delete(TSParser *self) { ts_parser__set_cached_token(self, 0, NULL_SUBTREE, NULL_SUBTREE); ts_subtree_pool_delete(&self->tree_pool); reusable_node_delete(&self->reusable_node); + array_delete(&self->trailing_extras); + array_delete(&self->trailing_extras2); + array_delete(&self->scratch_trees); ts_free(self); } diff --git a/lib/src/reusable_node.h b/lib/src/reusable_node.h index e5ccaa2a..63fe3c1a 100644 --- a/lib/src/reusable_node.h +++ b/lib/src/reusable_node.h @@ -53,7 +53,7 @@ static inline void reusable_node_advance(ReusableNode *self) { } while (ts_subtree_child_count(tree) <= next_index); array_push(&self->stack, ((StackEntry) { - .tree = tree.ptr->children[next_index], + .tree = ts_subtree_children(tree)[next_index], .child_index = next_index, .byte_offset = byte_offset, })); @@ -63,7 +63,7 @@ static inline bool reusable_node_descend(ReusableNode *self) { StackEntry last_entry = *array_back(&self->stack); if (ts_subtree_child_count(last_entry.tree) > 0) { array_push(&self->stack, ((StackEntry) { - .tree = last_entry.tree.ptr->children[0], + .tree = ts_subtree_children(last_entry.tree)[0], .child_index = 0, .byte_offset = last_entry.byte_offset, })); diff --git a/lib/src/stack.c b/lib/src/stack.c index 6a8d897c..cc728b05 100644 --- a/lib/src/stack.c +++ b/lib/src/stack.c @@ -288,7 +288,7 @@ inline StackSliceArray stack__iter(Stack *self, StackVersion version, bool include_subtrees = false; if (goal_subtree_count >= 0) { include_subtrees = true; - array_reserve(&iterator.subtrees, goal_subtree_count); + array_reserve(&iterator.subtrees, ts_subtree_alloc_size(goal_subtree_count) / sizeof(Subtree)); } array_push(&self->iterators, iterator); @@ -304,8 +304,9 @@ inline StackSliceArray stack__iter(Stack *self, StackVersion version, if (should_pop) { SubtreeArray subtrees = iterator->subtrees; - if (!should_stop) + if (!should_stop) { ts_subtree_array_copy(subtrees, &subtrees); + } ts_subtree_array_reverse(&subtrees); ts_stack__add_slice( self, diff --git a/lib/src/subtree.c b/lib/src/subtree.c index a72d2ec2..4d3986ec 100644 --- a/lib/src/subtree.c +++ b/lib/src/subtree.c @@ -80,26 +80,33 @@ void ts_subtree_array_copy(SubtreeArray self, SubtreeArray *dest) { } } -void ts_subtree_array_delete(SubtreePool *pool, SubtreeArray *self) { +void ts_subtree_array_clear(SubtreePool *pool, SubtreeArray *self) { for (uint32_t i = 0; i < self->size; i++) { ts_subtree_release(pool, self->contents[i]); } + array_clear(self); +} + +void ts_subtree_array_delete(SubtreePool *pool, SubtreeArray *self) { + ts_subtree_array_clear(pool, self); array_delete(self); } -SubtreeArray ts_subtree_array_remove_trailing_extras(SubtreeArray *self) { - SubtreeArray result = array_new(); - - uint32_t i = self->size - 1; - for (; i + 1 > 0; i--) { - Subtree child = self->contents[i]; - if (!ts_subtree_extra(child)) break; - array_push(&result, child); +void ts_subtree_array_remove_trailing_extras( + SubtreeArray *self, + SubtreeArray *destination +) { + array_clear(destination); + while (self->size > 0) { + Subtree last = self->contents[self->size - 1]; + if (ts_subtree_extra(last)) { + self->size--; + array_push(destination, last); + } else { + break; + } } - - self->size = i + 1; - ts_subtree_array_reverse(&result); - return result; + ts_subtree_array_reverse(destination); } void ts_subtree_array_reverse(SubtreeArray *self) { @@ -247,28 +254,49 @@ Subtree ts_subtree_new_error( return result; } -MutableSubtree ts_subtree_make_mut(SubtreePool *pool, Subtree self) { - if (self.data.is_inline) return (MutableSubtree) {self.data}; - if (self.ptr->ref_count == 1) return ts_subtree_to_mut_unsafe(self); - - SubtreeHeapData *result = ts_subtree_pool_allocate(pool); - memcpy(result, self.ptr, sizeof(SubtreeHeapData)); - if (result->child_count > 0) { - result->children = ts_calloc(self.ptr->child_count, sizeof(Subtree)); - memcpy(result->children, self.ptr->children, result->child_count * sizeof(Subtree)); - for (uint32_t i = 0; i < result->child_count; i++) { - ts_subtree_retain(result->children[i]); +// Get the size needed to store a heap-allocated subtree with +// the given number o children. +// Clone a subtree. +// +// This will reuse the given allocated buffer if it is present. If the +// buffer is NULL, a new allocation will be created. +MutableSubtree ts_subtree_clone(Subtree self, Subtree *buffer_to_reuse) { + size_t alloc_size = ts_subtree_alloc_size(self.ptr->child_count); + Subtree *children = ts_realloc(buffer_to_reuse, alloc_size); + memcpy(children, ts_subtree_children(self), alloc_size); + SubtreeHeapData *result = (SubtreeHeapData *)&children[self.ptr->child_count]; + if (self.ptr->child_count > 0) { + for (uint32_t i = 0; i < self.ptr->child_count; i++) { + ts_subtree_retain(children[i]); } - } else if (result->has_external_tokens) { - result->external_scanner_state = ts_external_scanner_state_copy(&self.ptr->external_scanner_state); + } else if (self.ptr->has_external_tokens) { + result->external_scanner_state = ts_external_scanner_state_copy( + &self.ptr->external_scanner_state + ); } result->ref_count = 1; - ts_subtree_release(pool, self); return (MutableSubtree) {.ptr = result}; } -static void ts_subtree__compress(MutableSubtree self, unsigned count, const TSLanguage *language, - MutableSubtreeArray *stack) { +// Get mutable version of a subtree. +// +// This takes ownership of the subtree. If the subtree has only one owner, +// this will directly convert it into a mutable version. Otherwise, it will +// perform a copy. +MutableSubtree ts_subtree_make_mut(SubtreePool *pool, Subtree self) { + if (self.data.is_inline) return (MutableSubtree) {self.data}; + if (self.ptr->ref_count == 1) return ts_subtree_to_mut_unsafe(self); + MutableSubtree result = ts_subtree_clone(self, NULL); + ts_subtree_release(pool, self); + return result; +} + +static void ts_subtree__compress( + MutableSubtree self, + unsigned count, + const TSLanguage *language, + MutableSubtreeArray *stack +) { unsigned initial_stack_size = stack->size; MutableSubtree tree = self; @@ -276,7 +304,7 @@ static void ts_subtree__compress(MutableSubtree self, unsigned count, const TSLa for (unsigned i = 0; i < count; i++) { if (tree.ptr->ref_count > 1 || tree.ptr->child_count < 2) break; - MutableSubtree child = ts_subtree_to_mut_unsafe(tree.ptr->children[0]); + MutableSubtree child = ts_subtree_to_mut_unsafe(ts_subtree_children(tree)[0]); if ( child.data.is_inline || child.ptr->child_count < 2 || @@ -284,7 +312,7 @@ static void ts_subtree__compress(MutableSubtree self, unsigned count, const TSLa child.ptr->symbol != symbol ) break; - MutableSubtree grandchild = ts_subtree_to_mut_unsafe(child.ptr->children[0]); + MutableSubtree grandchild = ts_subtree_to_mut_unsafe(ts_subtree_children(child)[0]); if ( grandchild.data.is_inline || grandchild.ptr->child_count < 2 || @@ -292,20 +320,20 @@ static void ts_subtree__compress(MutableSubtree self, unsigned count, const TSLa grandchild.ptr->symbol != symbol ) break; - tree.ptr->children[0] = ts_subtree_from_mut(grandchild); - child.ptr->children[0] = grandchild.ptr->children[grandchild.ptr->child_count - 1]; - grandchild.ptr->children[grandchild.ptr->child_count - 1] = ts_subtree_from_mut(child); + ts_subtree_children(tree)[0] = ts_subtree_from_mut(grandchild); + ts_subtree_children(child)[0] = ts_subtree_children(grandchild)[grandchild.ptr->child_count - 1]; + ts_subtree_children(grandchild)[grandchild.ptr->child_count - 1] = ts_subtree_from_mut(child); array_push(stack, tree); tree = grandchild; } while (stack->size > initial_stack_size) { tree = array_pop(stack); - MutableSubtree child = ts_subtree_to_mut_unsafe(tree.ptr->children[0]); - MutableSubtree grandchild = ts_subtree_to_mut_unsafe(child.ptr->children[child.ptr->child_count - 1]); - ts_subtree_set_children(grandchild, grandchild.ptr->children, grandchild.ptr->child_count, language); - ts_subtree_set_children(child, child.ptr->children, child.ptr->child_count, language); - ts_subtree_set_children(tree, tree.ptr->children, tree.ptr->child_count, language); + MutableSubtree child = ts_subtree_to_mut_unsafe(ts_subtree_children(tree)[0]); + MutableSubtree grandchild = ts_subtree_to_mut_unsafe(ts_subtree_children(child)[child.ptr->child_count - 1]); + ts_subtree_summarize_children(grandchild, language); + ts_subtree_summarize_children(child, language); + ts_subtree_summarize_children(tree, language); } } @@ -320,8 +348,8 @@ void ts_subtree_balance(Subtree self, SubtreePool *pool, const TSLanguage *langu MutableSubtree tree = array_pop(&pool->tree_stack); if (tree.ptr->repeat_depth > 0) { - Subtree child1 = tree.ptr->children[0]; - Subtree child2 = tree.ptr->children[tree.ptr->child_count - 1]; + Subtree child1 = ts_subtree_children(tree)[0]; + Subtree child2 = ts_subtree_children(tree)[tree.ptr->child_count - 1]; long repeat_delta = (long)ts_subtree_repeat_depth(child1) - (long)ts_subtree_repeat_depth(child2); if (repeat_delta > 0) { unsigned n = repeat_delta; @@ -333,7 +361,7 @@ void ts_subtree_balance(Subtree self, SubtreePool *pool, const TSLanguage *langu } for (uint32_t i = 0; i < tree.ptr->child_count; i++) { - Subtree child = tree.ptr->children[i]; + Subtree child = ts_subtree_children(tree)[i]; if (ts_subtree_child_count(child) > 0 && child.ptr->ref_count == 1) { array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(child)); } @@ -341,17 +369,13 @@ void ts_subtree_balance(Subtree self, SubtreePool *pool, const TSLanguage *langu } } -void ts_subtree_set_children( - MutableSubtree self, Subtree *children, uint32_t child_count, const TSLanguage *language +// Assign all of the node's properties that depend on its children. +void ts_subtree_summarize_children( + MutableSubtree self, + const TSLanguage *language ) { assert(!self.data.is_inline); - if (self.ptr->child_count > 0 && children != self.ptr->children) { - ts_free(self.ptr->children); - } - - self.ptr->child_count = child_count; - self.ptr->children = children; self.ptr->named_child_count = 0; self.ptr->visible_child_count = 0; self.ptr->error_cost = 0; @@ -364,8 +388,9 @@ void ts_subtree_set_children( const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->production_id); uint32_t lookahead_end_byte = 0; + const Subtree *children = ts_subtree_children(self); for (uint32_t i = 0; i < self.ptr->child_count; i++) { - Subtree child = self.ptr->children[i]; + Subtree child = children[i]; if (i == 0) { self.ptr->padding = ts_subtree_padding(child); @@ -384,6 +409,17 @@ void ts_subtree_set_children( self.ptr->error_cost += ts_subtree_error_cost(child); } + uint32_t grandchild_count = ts_subtree_child_count(child); + if (self.ptr->symbol == ts_builtin_sym_error || self.ptr->symbol == ts_builtin_sym_error_repeat) { + if (!ts_subtree_extra(child) && !(ts_subtree_is_error(child) && grandchild_count == 0)) { + if (ts_subtree_visible(child)) { + self.ptr->error_cost += ERROR_COST_PER_SKIPPED_TREE; + } else if (grandchild_count > 0) { + self.ptr->error_cost += ERROR_COST_PER_SKIPPED_TREE * child.ptr->visible_child_count; + } + } + } + self.ptr->dynamic_precedence += ts_subtree_dynamic_precedence(child); self.ptr->node_count += ts_subtree_node_count(child); @@ -395,7 +431,7 @@ void ts_subtree_set_children( } else if (ts_subtree_visible(child)) { self.ptr->visible_child_count++; if (ts_subtree_named(child)) self.ptr->named_child_count++; - } else if (ts_subtree_child_count(child) > 0) { + } else if (grandchild_count > 0) { self.ptr->visible_child_count += child.ptr->visible_child_count; self.ptr->named_child_count += child.ptr->named_child_count; } @@ -417,22 +453,11 @@ void ts_subtree_set_children( ERROR_COST_PER_RECOVERY + ERROR_COST_PER_SKIPPED_CHAR * self.ptr->size.bytes + ERROR_COST_PER_SKIPPED_LINE * self.ptr->size.extent.row; - for (uint32_t i = 0; i < self.ptr->child_count; i++) { - Subtree child = self.ptr->children[i]; - uint32_t grandchild_count = ts_subtree_child_count(child); - if (ts_subtree_extra(child)) continue; - if (ts_subtree_is_error(child) && grandchild_count == 0) continue; - if (ts_subtree_visible(child)) { - self.ptr->error_cost += ERROR_COST_PER_SKIPPED_TREE; - } else if (grandchild_count > 0) { - self.ptr->error_cost += ERROR_COST_PER_SKIPPED_TREE * child.ptr->visible_child_count; - } - } } if (self.ptr->child_count > 0) { - Subtree first_child = self.ptr->children[0]; - Subtree last_child = self.ptr->children[self.ptr->child_count - 1]; + Subtree first_child = children[0]; + Subtree last_child = children[self.ptr->child_count - 1]; self.ptr->first_leaf.symbol = ts_subtree_leaf_symbol(first_child); self.ptr->first_leaf.parse_state = ts_subtree_leaf_parse_state(first_child); @@ -455,15 +480,30 @@ void ts_subtree_set_children( } } -MutableSubtree ts_subtree_new_node(SubtreePool *pool, TSSymbol symbol, - SubtreeArray *children, unsigned production_id, - const TSLanguage *language) { +// Create a new parent node with the given children. +// +// This takes ownership of the children array. +MutableSubtree ts_subtree_new_node( + TSSymbol symbol, + SubtreeArray *children, + unsigned production_id, + const TSLanguage *language +) { TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol); bool fragile = symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat; - SubtreeHeapData *data = ts_subtree_pool_allocate(pool); + + // Allocate the node's data at the end of the array of children. + size_t new_byte_size = ts_subtree_alloc_size(children->size); + if (children->capacity * sizeof(Subtree) < new_byte_size) { + children->contents = ts_realloc(children->contents, new_byte_size); + children->capacity = new_byte_size / sizeof(Subtree); + } + SubtreeHeapData *data = (SubtreeHeapData *)&children->contents[children->size]; + *data = (SubtreeHeapData) { .ref_count = 1, .symbol = symbol, + .child_count = children->size, .visible = metadata.visible, .named = metadata.named, .has_changes = false, @@ -477,32 +517,45 @@ MutableSubtree ts_subtree_new_node(SubtreePool *pool, TSSymbol symbol, }} }; MutableSubtree result = {.ptr = data}; - ts_subtree_set_children(result, children->contents, children->size, language); + ts_subtree_summarize_children(result, language); return result; } -Subtree ts_subtree_new_error_node(SubtreePool *pool, SubtreeArray *children, - bool extra, const TSLanguage *language) { +// Create a new error node contaning the given children. +// +// This node is treated as 'extra'. Its children are prevented from having +// having any effect on the parse state. +Subtree ts_subtree_new_error_node( + SubtreeArray *children, + bool extra, + const TSLanguage *language +) { MutableSubtree result = ts_subtree_new_node( - pool, ts_builtin_sym_error, children, 0, language + ts_builtin_sym_error, children, 0, language ); result.ptr->extra = extra; return ts_subtree_from_mut(result); } -Subtree ts_subtree_new_missing_leaf(SubtreePool *pool, TSSymbol symbol, Length padding, - const TSLanguage *language) { +// Create a new 'missing leaf' node. +// +// This node is treated as 'extra'. Its children are prevented from having +// having any effect on the parse state. +Subtree ts_subtree_new_missing_leaf( + SubtreePool *pool, + TSSymbol symbol, + Length padding, + const TSLanguage *language +) { Subtree result = ts_subtree_new_leaf( pool, symbol, padding, length_zero(), 0, 0, false, false, language ); - if (result.data.is_inline) { result.data.is_missing = true; } else { ((SubtreeHeapData *)result.ptr)->is_missing = true; } - return result; } @@ -525,19 +578,22 @@ void ts_subtree_release(SubtreePool *pool, Subtree self) { while (pool->tree_stack.size > 0) { MutableSubtree tree = array_pop(&pool->tree_stack); if (tree.ptr->child_count > 0) { + Subtree *children = ts_subtree_children(tree); for (uint32_t i = 0; i < tree.ptr->child_count; i++) { - Subtree child = tree.ptr->children[i]; + Subtree child = children[i]; if (child.data.is_inline) continue; assert(child.ptr->ref_count > 0); if (atomic_dec((volatile uint32_t *)&child.ptr->ref_count) == 0) { array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(child)); } } - ts_free(tree.ptr->children); - } else if (tree.ptr->has_external_tokens) { - ts_external_scanner_state_delete(&tree.ptr->external_scanner_state); + ts_free(children); + } else { + if (tree.ptr->has_external_tokens) { + ts_external_scanner_state_delete(&tree.ptr->external_scanner_state); + } + ts_subtree_pool_free(pool, tree.ptr); } - ts_subtree_pool_free(pool, tree.ptr); } } @@ -564,7 +620,7 @@ bool ts_subtree_eq(Subtree self, Subtree other) { if (self.ptr->named_child_count != other.ptr->named_child_count) return false; for (uint32_t i = 0; i < self.ptr->child_count; i++) { - if (!ts_subtree_eq(self.ptr->children[i], other.ptr->children[i])) { + if (!ts_subtree_eq(ts_subtree_children(self)[i], ts_subtree_children(other)[i])) { return false; } } @@ -578,8 +634,8 @@ int ts_subtree_compare(Subtree left, Subtree right) { if (ts_subtree_child_count(left) < ts_subtree_child_count(right)) return -1; if (ts_subtree_child_count(right) < ts_subtree_child_count(left)) return 1; for (uint32_t i = 0, n = ts_subtree_child_count(left); i < n; i++) { - Subtree left_child = left.ptr->children[i]; - Subtree right_child = right.ptr->children[i]; + Subtree left_child = ts_subtree_children(left)[i]; + Subtree right_child = ts_subtree_children(right)[i]; switch (ts_subtree_compare(left_child, right_child)) { case -1: return -1; case 1: return 1; @@ -695,7 +751,7 @@ Subtree ts_subtree_edit(Subtree self, const TSInputEdit *edit, SubtreePool *pool Length child_left, child_right = length_zero(); for (uint32_t i = 0, n = ts_subtree_child_count(*entry.tree); i < n; i++) { - Subtree *child = &result.ptr->children[i]; + Subtree *child = &ts_subtree_children(*entry.tree)[i]; Length child_size = ts_subtree_total_size(*child); child_left = child_right; child_right = length_add(child_left, child_size); @@ -750,7 +806,7 @@ Subtree ts_subtree_last_external_token(Subtree tree) { if (!ts_subtree_has_external_tokens(tree)) return NULL_SUBTREE; while (tree.ptr->child_count > 0) { for (uint32_t i = tree.ptr->child_count - 1; i + 1 > 0; i--) { - Subtree child = tree.ptr->children[i]; + Subtree child = ts_subtree_children(tree)[i]; if (ts_subtree_has_external_tokens(child)) { tree = child; break; @@ -853,7 +909,7 @@ static size_t ts_subtree__write_to_string( uint32_t structural_child_index = 0; for (uint32_t i = 0; i < self.ptr->child_count; i++) { - Subtree child = self.ptr->children[i]; + Subtree child = ts_subtree_children(self)[i]; if (ts_subtree_extra(child)) { cursor += ts_subtree__write_to_string( child, *writer, limit, @@ -950,7 +1006,7 @@ void ts_subtree__print_dot_graph(const Subtree *self, uint32_t start_offset, language->max_alias_sequence_length * ts_subtree_production_id(*self); for (uint32_t i = 0, n = ts_subtree_child_count(*self); i < n; i++) { - const Subtree *child = &self->ptr->children[i]; + const Subtree *child = &ts_subtree_children(*self)[i]; TSSymbol alias_symbol = 0; if (!ts_subtree_extra(*child) && child_info_offset) { alias_symbol = language->alias_sequences[child_info_offset]; diff --git a/lib/src/subtree.h b/lib/src/subtree.h index 18c48dcb..7df8b09a 100644 --- a/lib/src/subtree.h +++ b/lib/src/subtree.h @@ -68,7 +68,6 @@ typedef struct { union { // Non-terminal subtrees (`child_count > 0`) struct { - Subtree *children; uint32_t visible_child_count; uint32_t named_child_count; uint32_t node_count; @@ -111,8 +110,9 @@ void ts_external_scanner_state_init(ExternalScannerState *, const char *, unsign const char *ts_external_scanner_state_data(const ExternalScannerState *); void ts_subtree_array_copy(SubtreeArray, SubtreeArray *); +void ts_subtree_array_clear(SubtreePool *, SubtreeArray *); void ts_subtree_array_delete(SubtreePool *, SubtreeArray *); -SubtreeArray ts_subtree_array_remove_trailing_extras(SubtreeArray *); +void ts_subtree_array_remove_trailing_extras(SubtreeArray *, SubtreeArray *); void ts_subtree_array_reverse(SubtreeArray *); SubtreePool ts_subtree_pool_new(uint32_t capacity); @@ -125,8 +125,8 @@ Subtree ts_subtree_new_leaf( Subtree ts_subtree_new_error( SubtreePool *, int32_t, Length, Length, uint32_t, TSStateId, const TSLanguage * ); -MutableSubtree ts_subtree_new_node(SubtreePool *, TSSymbol, SubtreeArray *, unsigned, const TSLanguage *); -Subtree ts_subtree_new_error_node(SubtreePool *, SubtreeArray *, bool, const TSLanguage *); +MutableSubtree ts_subtree_new_node(TSSymbol, SubtreeArray *, unsigned, const TSLanguage *); +Subtree ts_subtree_new_error_node(SubtreeArray *, bool, const TSLanguage *); Subtree ts_subtree_new_missing_leaf(SubtreePool *, TSSymbol, Length, const TSLanguage *); MutableSubtree ts_subtree_make_mut(SubtreePool *, Subtree); void ts_subtree_retain(Subtree); @@ -134,13 +134,15 @@ void ts_subtree_release(SubtreePool *, Subtree); bool ts_subtree_eq(Subtree, Subtree); int ts_subtree_compare(Subtree, Subtree); void ts_subtree_set_symbol(MutableSubtree *, TSSymbol, const TSLanguage *); -void ts_subtree_set_children(MutableSubtree, Subtree *, uint32_t, const TSLanguage *); +void ts_subtree_summarize(MutableSubtree, const Subtree *, uint32_t, const TSLanguage *); +void ts_subtree_summarize_children(MutableSubtree, const TSLanguage *); void ts_subtree_balance(Subtree, SubtreePool *, const TSLanguage *); Subtree ts_subtree_edit(Subtree, const TSInputEdit *edit, SubtreePool *); char *ts_subtree_string(Subtree, const TSLanguage *, bool include_all); void ts_subtree_print_dot_graph(Subtree, const TSLanguage *, FILE *); Subtree ts_subtree_last_external_token(Subtree); bool ts_subtree_external_scanner_state_eq(Subtree, Subtree); +MutableSubtree ts_subtree_clone(Subtree self, Subtree *buffer_to_reuse); #define SUBTREE_GET(self, name) (self.data.is_inline ? self.data.name : self.ptr->name) @@ -156,6 +158,10 @@ static inline uint32_t ts_subtree_lookahead_bytes(Subtree self) { return SUBTREE #undef SUBTREE_GET +static inline size_t ts_subtree_alloc_size(uint32_t child_count) { + return child_count * sizeof(Subtree) + sizeof(SubtreeHeapData); +} + static inline void ts_subtree_set_extra(MutableSubtree *self) { if (self->data.is_inline) { self->data.extra = true; @@ -202,6 +208,9 @@ static inline uint32_t ts_subtree_total_bytes(Subtree self) { return ts_subtree_total_size(self).bytes; } +#define ts_subtree_children(self) \ + ((self).data.is_inline ? NULL : (Subtree *)((self).ptr) - (self).ptr->child_count) + static inline uint32_t ts_subtree_child_count(Subtree self) { return self.data.is_inline ? 0 : self.ptr->child_count; } diff --git a/lib/src/tree_cursor.c b/lib/src/tree_cursor.c index f109524e..8af44a34 100644 --- a/lib/src/tree_cursor.c +++ b/lib/src/tree_cursor.c @@ -38,7 +38,7 @@ static inline bool ts_tree_cursor_child_iterator_next(CursorChildIterator *self, TreeCursorEntry *result, bool *visible) { if (!self->parent.ptr || self->child_index == self->parent.ptr->child_count) return false; - const Subtree *child = &self->parent.ptr->children[self->child_index]; + const Subtree *child = &ts_subtree_children(self->parent)[self->child_index]; *result = (TreeCursorEntry) { .subtree = child, .position = self->position, @@ -56,7 +56,7 @@ static inline bool ts_tree_cursor_child_iterator_next(CursorChildIterator *self, self->child_index++; if (self->child_index < self->parent.ptr->child_count) { - Subtree next_child = self->parent.ptr->children[self->child_index]; + Subtree next_child = ts_subtree_children(self->parent)[self->child_index]; self->position = length_add(self->position, ts_subtree_padding(next_child)); } @@ -306,7 +306,7 @@ void ts_tree_cursor_current_status( unsigned structural_child_index = entry->structural_child_index; if (!ts_subtree_extra(*entry->subtree)) structural_child_index++; for (unsigned j = entry->child_index + 1; j < sibling_count; j++) { - Subtree sibling = parent_entry->subtree->ptr->children[j]; + Subtree sibling = ts_subtree_children(*parent_entry->subtree)[j]; TSSymbolMetadata sibling_metadata = ts_language_symbol_metadata( self->tree->language, subtree_symbol(sibling, structural_child_index) From 908b102786f4d5a6c40e63233b59ca5be3e705ba Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 23 Oct 2020 11:58:27 -0700 Subject: [PATCH 2/5] Add more doc comments in the C lib --- lib/src/parser.c | 16 +++++++++++++--- lib/src/subtree.c | 11 +++-------- lib/src/subtree.h | 45 +++++++++++++++++++++++++++++++++------------ 3 files changed, 49 insertions(+), 23 deletions(-) diff --git a/lib/src/parser.c b/lib/src/parser.c index 3984d002..0c711b0c 100644 --- a/lib/src/parser.c +++ b/lib/src/parser.c @@ -673,6 +673,10 @@ static Subtree ts_parser__reuse_node( return NULL_SUBTREE; } +// Determine if a given tree should be replaced by an alternative tree. +// +// The decision is based on the trees' error costs (if any), their dynamic precedence, +// and finally, as a default, by a recursive comparison of the trees' symbols. static bool ts_parser__select_tree(TSParser *self, Subtree left, Subtree right) { if (!left.ptr) return true; if (!right.ptr) return false; @@ -718,18 +722,26 @@ static bool ts_parser__select_tree(TSParser *self, Subtree left, Subtree right) } } +// Determine if a given tree's children should be replaced by an alternative +// array of children. static bool ts_parser__select_children( TSParser *self, Subtree left, const SubtreeArray *children ) { array_assign(&self->scratch_trees, children); + + // Create a temporary subtree using the scratch trees array. This node does + // not perform any allocation except for possibly growing the array to make + // room for its own heap data. The scratch tree is never explicitly released, + // so the same 'scratch trees' array can be reused again later. MutableSubtree scratch_tree = ts_subtree_new_node( ts_subtree_symbol(left), &self->scratch_trees, 0, self->language ); + return ts_parser__select_tree( self, left, @@ -841,9 +853,6 @@ static StackVersion ts_parser__reduce( } } - parent.ptr->dynamic_precedence += dynamic_precedence; - parent.ptr->production_id = production_id; - TSStateId state = ts_stack_state(self->stack, slice_version); TSStateId next_state = ts_language_next_state(self->language, state, symbol); if (end_of_non_terminal_extra && next_state == state) { @@ -856,6 +865,7 @@ static StackVersion ts_parser__reduce( } else { parent.ptr->parse_state = state; } + parent.ptr->dynamic_precedence += dynamic_precedence; // Push the parent node onto the stack, along with any extra tokens that // were previously on top of the stack. diff --git a/lib/src/subtree.c b/lib/src/subtree.c index 4d3986ec..fc1db617 100644 --- a/lib/src/subtree.c +++ b/lib/src/subtree.c @@ -254,15 +254,10 @@ Subtree ts_subtree_new_error( return result; } -// Get the size needed to store a heap-allocated subtree with -// the given number o children. // Clone a subtree. -// -// This will reuse the given allocated buffer if it is present. If the -// buffer is NULL, a new allocation will be created. -MutableSubtree ts_subtree_clone(Subtree self, Subtree *buffer_to_reuse) { +MutableSubtree ts_subtree_clone(Subtree self) { size_t alloc_size = ts_subtree_alloc_size(self.ptr->child_count); - Subtree *children = ts_realloc(buffer_to_reuse, alloc_size); + Subtree *children = ts_malloc(alloc_size); memcpy(children, ts_subtree_children(self), alloc_size); SubtreeHeapData *result = (SubtreeHeapData *)&children[self.ptr->child_count]; if (self.ptr->child_count > 0) { @@ -286,7 +281,7 @@ MutableSubtree ts_subtree_clone(Subtree self, Subtree *buffer_to_reuse) { MutableSubtree ts_subtree_make_mut(SubtreePool *pool, Subtree self) { if (self.data.is_inline) return (MutableSubtree) {self.data}; if (self.ptr->ref_count == 1) return ts_subtree_to_mut_unsafe(self); - MutableSubtree result = ts_subtree_clone(self, NULL); + MutableSubtree result = ts_subtree_clone(self); ts_subtree_release(pool, self); return result; } diff --git a/lib/src/subtree.h b/lib/src/subtree.h index 7df8b09a..b020deb6 100644 --- a/lib/src/subtree.h +++ b/lib/src/subtree.h @@ -14,12 +14,19 @@ extern "C" { #include "tree_sitter/api.h" #include "tree_sitter/parser.h" -static const TSStateId TS_TREE_STATE_NONE = USHRT_MAX; +#define TS_TREE_STATE_NONE USHRT_MAX #define NULL_SUBTREE ((Subtree) {.ptr = NULL}) -typedef union Subtree Subtree; -typedef union MutableSubtree MutableSubtree; - +// The serialized state of an external scanner. +// +// Every time an external token subtree is created after a call to an +// external scanner, the scanner's `serialize` function is called to +// retrieve a serialized copy of its state. The bytes are then copied +// onto the subtree itself so that the scanner's state can later be +// restored using its `deserialize` function. +// +// Small byte arrays are stored inline, and long ones are allocated +// separately on the heap. typedef struct { union { char *long_data; @@ -28,6 +35,10 @@ typedef struct { uint32_t length; } ExternalScannerState; +// A compact representation of a subtree. +// +// This representation is used for small leaf nodes that are not +// errors, and were not created by an external scanner. typedef struct { bool is_inline : 1; bool visible : 1; @@ -45,6 +56,11 @@ typedef struct { uint16_t parse_state; } SubtreeInlineData; +// A heap-allocated representation of a subtree. +// +// This representation is used for parent nodes, external tokens, +// errors, and other leaf nodes whose data is too large to fit into +// the inlinen representation. typedef struct { volatile uint32_t ref_count; Length padding; @@ -88,15 +104,17 @@ typedef struct { }; } SubtreeHeapData; -union Subtree { +// The fundamental building block of a syntax tree. +typedef union { SubtreeInlineData data; const SubtreeHeapData *ptr; -}; +} Subtree; -union MutableSubtree { +// Like Subtree, but mutable. +typedef union { SubtreeInlineData data; SubtreeHeapData *ptr; -}; +} MutableSubtree; typedef Array(Subtree) SubtreeArray; typedef Array(MutableSubtree) MutableSubtreeArray; @@ -142,7 +160,6 @@ char *ts_subtree_string(Subtree, const TSLanguage *, bool include_all); void ts_subtree_print_dot_graph(Subtree, const TSLanguage *, FILE *); Subtree ts_subtree_last_external_token(Subtree); bool ts_subtree_external_scanner_state_eq(Subtree, Subtree); -MutableSubtree ts_subtree_clone(Subtree self, Subtree *buffer_to_reuse); #define SUBTREE_GET(self, name) (self.data.is_inline ? self.data.name : self.ptr->name) @@ -158,10 +175,17 @@ static inline uint32_t ts_subtree_lookahead_bytes(Subtree self) { return SUBTREE #undef SUBTREE_GET +// Get the size needed to store a heap-allocated subtree with the given +// number of children. static inline size_t ts_subtree_alloc_size(uint32_t child_count) { return child_count * sizeof(Subtree) + sizeof(SubtreeHeapData); } +// Get a subtree's children, which are allocated immediately before the +// tree's own heap data. +#define ts_subtree_children(self) \ + ((self).data.is_inline ? NULL : (Subtree *)((self).ptr) - (self).ptr->child_count) + static inline void ts_subtree_set_extra(MutableSubtree *self) { if (self->data.is_inline) { self->data.extra = true; @@ -208,9 +232,6 @@ static inline uint32_t ts_subtree_total_bytes(Subtree self) { return ts_subtree_total_size(self).bytes; } -#define ts_subtree_children(self) \ - ((self).data.is_inline ? NULL : (Subtree *)((self).ptr) - (self).ptr->child_count) - static inline uint32_t ts_subtree_child_count(Subtree self) { return self.data.is_inline ? 0 : self.ptr->child_count; } From 533aaa462b301187aeff4fb7ad096e68c73c6545 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 23 Oct 2020 13:20:57 -0700 Subject: [PATCH 3/5] Add heap-profiling script --- script/heap-profile | 34 ++++++++++++++++++++++++++++++++++ test/profile/heap.cc | 42 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+) create mode 100755 script/heap-profile create mode 100644 test/profile/heap.cc diff --git a/script/heap-profile b/script/heap-profile new file mode 100755 index 00000000..012d86c7 --- /dev/null +++ b/script/heap-profile @@ -0,0 +1,34 @@ +#!/usr/bin/env bash +# +# Usage: +# script/heap-profile +# +# Parse an example source file and record memory usage +# +# Dependencies: +# * `pprof` executable: https://github.com/google/pprof +# * `gperftools` package: https://github.com/gperftools/gperftools + +set -e + +GRAMMARS_DIR=$PWD/test/fixtures/grammars + +# Build the library +make + +# Build the heap-profiling harness +clang++ \ + -I lib/include \ + -I $GRAMMARS_DIR \ + -D GRAMMARS_DIR=\"${GRAMMARS_DIR}/\" \ + -l tcmalloc \ + ./libtree-sitter.a \ + test/profile/heap.cc \ + -o target/heap-profile + +# Run the harness with heap profiling enabled. +export HEAPPROFILE=$PWD/profile +target/heap-profile $@ + +# Extract statistics using pprof. +pprof -top -cum profile.0001.heap diff --git a/test/profile/heap.cc b/test/profile/heap.cc new file mode 100644 index 00000000..6c0027e8 --- /dev/null +++ b/test/profile/heap.cc @@ -0,0 +1,42 @@ +#include +#include +#include +#include + +extern "C" { +#include "javascript/src/parser.c" +#include "javascript/src/scanner.c" +} + +#define LANGUAGE tree_sitter_javascript +#define SOURCE_PATH "javascript/examples/jquery.js" + +int main() { + TSParser *parser = ts_parser_new(); + if (!ts_parser_set_language(parser, LANGUAGE())) { + fprintf(stderr, "Invalid language\n"); + exit(1); + } + + const char *source_path = GRAMMARS_DIR SOURCE_PATH; + + printf("Parsing %s\n", source_path); + + std::ifstream source_file(source_path); + if (!source_file.good()) { + fprintf(stderr, "Invalid source path %s\n", source_path); + exit(1); + } + + std::string source_code( + (std::istreambuf_iterator(source_file)), + std::istreambuf_iterator() + ); + + TSTree *tree = ts_parser_parse_string( + parser, + NULL, + source_code.c_str(), + source_code.size() + ); +} From 9774f760c37e39aa54a58f861c606c150348559c Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 23 Oct 2020 18:44:58 -0700 Subject: [PATCH 4/5] Disable clang static analyzer on CI All of its output seems to be false positives. --- .travis.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index ab9a6866..7205ae03 100644 --- a/.travis.yml +++ b/.travis.yml @@ -38,7 +38,6 @@ script: - (eval "$WASM_ENV" && script/generate-fixtures-wasm) # Run the tests - - export TREE_SITTER_STATIC_ANALYSIS=1 - script/test - script/test-wasm - script/benchmark From 2465207fee94bf07b0b5e950c2958d2b558b21dd Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Sun, 25 Oct 2020 09:13:12 -0700 Subject: [PATCH 5/5] Suppress false non-null error in subtree_clone --- lib/src/subtree.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/lib/src/subtree.c b/lib/src/subtree.c index fc1db617..e90dc9d7 100644 --- a/lib/src/subtree.c +++ b/lib/src/subtree.c @@ -257,12 +257,13 @@ Subtree ts_subtree_new_error( // Clone a subtree. MutableSubtree ts_subtree_clone(Subtree self) { size_t alloc_size = ts_subtree_alloc_size(self.ptr->child_count); - Subtree *children = ts_malloc(alloc_size); - memcpy(children, ts_subtree_children(self), alloc_size); - SubtreeHeapData *result = (SubtreeHeapData *)&children[self.ptr->child_count]; + Subtree *new_children = ts_malloc(alloc_size); + Subtree *old_children = ts_subtree_children(self); + memcpy(new_children, old_children, alloc_size); + SubtreeHeapData *result = (SubtreeHeapData *)&new_children[self.ptr->child_count]; if (self.ptr->child_count > 0) { for (uint32_t i = 0; i < self.ptr->child_count; i++) { - ts_subtree_retain(children[i]); + ts_subtree_retain(new_children[i]); } } else if (self.ptr->has_external_tokens) { result->external_scanner_state = ts_external_scanner_state_copy(