From c7306722dd823c6a13004647f3ff648cf7aeeee1 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 13 Sep 2018 16:13:49 -0700 Subject: [PATCH] Use smaller allocations for normal leaf nodes --- src/runtime/parser.c | 4 +- src/runtime/subtree.c | 157 +++++++++++++++++++++-------------- src/runtime/subtree.h | 8 +- test/runtime/stack_test.cc | 2 +- test/runtime/subtree_test.cc | 43 +++++----- 5 files changed, 127 insertions(+), 87 deletions(-) diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 46993781..0af7b8f6 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -422,7 +422,7 @@ static const Subtree *ts_parser__lex(TSParser *self, StackVersion version, TSSta } } - result = ts_subtree_new_leaf(&self->tree_pool, symbol, padding, size, self->language); + result = ts_subtree_new_leaf(&self->tree_pool, symbol, padding, size, !found_external_token, self->language); result->is_keyword = is_keyword; if (found_external_token) { @@ -937,7 +937,7 @@ static void ts_parser__halt_parse(TSParser *self) { Subtree *root_error = ts_subtree_new_error_node(&self->tree_pool, &children, self->language); ts_stack_push(self->stack, 0, root_error, false, 0); - Subtree *eof = ts_subtree_new_leaf(&self->tree_pool, ts_builtin_sym_end, length_zero(), length_zero(), self->language); + Subtree *eof = ts_subtree_new_leaf(&self->tree_pool, ts_builtin_sym_end, length_zero(), length_zero(), true, self->language); ts_parser__accept(self, 0, eof); } diff --git a/src/runtime/subtree.c b/src/runtime/subtree.c index ff9b37ef..08d4aca7 100644 --- a/src/runtime/subtree.c +++ b/src/runtime/subtree.c @@ -10,6 +10,7 @@ #include "runtime/length.h" #include "runtime/language.h" #include "runtime/error_costs.h" +#include typedef struct { Length start; @@ -19,7 +20,9 @@ typedef struct { TSStateId TS_TREE_STATE_NONE = USHRT_MAX; -static const uint32_t MAX_TREE_POOL_SIZE = 1024; +static const uint32_t MAX_TREE_POOL_SIZE = 0; +static const uint32_t SMALL_TREE_SIZE = offsetof(Subtree, children); +static const uint32_t LARGE_TREE_SIZE = sizeof(Subtree); static const ExternalScannerState empty_state = {.length = 0, .short_data = {0}}; @@ -114,8 +117,9 @@ void ts_subtree_array_reverse(SubtreeArray *self) { // SubtreePool SubtreePool ts_subtree_pool_new(uint32_t capacity) { - SubtreePool self = {array_new(), array_new()}; + SubtreePool self = {array_new(), array_new(), array_new()}; array_reserve(&self.free_trees, capacity); + array_reserve(&self.free_small_trees, capacity); return self; } @@ -126,20 +130,31 @@ void ts_subtree_pool_delete(SubtreePool *self) { } array_delete(&self->free_trees); } + if (self->free_small_trees.contents) { + for (unsigned i = 0; i < self->free_small_trees.size; i++) { + ts_free(self->free_small_trees.contents[i]); + } + array_delete(&self->free_small_trees); + } if (self->tree_stack.contents) array_delete(&self->tree_stack); } -Subtree *ts_subtree_pool_allocate(SubtreePool *self) { - if (self->free_trees.size > 0) { - return array_pop(&self->free_trees); +Subtree *ts_subtree_pool_allocate(SubtreePool *self, bool is_small) { + MutableSubtreeArray *array = is_small ? &self->free_small_trees : &self->free_trees; + Subtree *result; + if (array->size > 0) { + result = array_pop(array); } else { - return ts_malloc(sizeof(Subtree)); + result = ts_malloc(is_small ? SMALL_TREE_SIZE : LARGE_TREE_SIZE); } + result->is_small = is_small; + return result; } void ts_subtree_pool_free(SubtreePool *self, Subtree *tree) { - if (self->free_trees.capacity > 0 && self->free_trees.size < MAX_TREE_POOL_SIZE) { - array_push(&self->free_trees, tree); + MutableSubtreeArray *array = tree->is_small ? &self->free_small_trees : &self->free_trees; + if (array->capacity > 0 && array->size < MAX_TREE_POOL_SIZE) { + array_push(array, tree); } else { ts_free(tree); } @@ -148,35 +163,37 @@ void ts_subtree_pool_free(SubtreePool *self, Subtree *tree) { // Subtree Subtree *ts_subtree_new_leaf(SubtreePool *pool, TSSymbol symbol, Length padding, Length size, - const TSLanguage *language) { + bool is_small, const TSLanguage *language) { TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol); - Subtree *result = ts_subtree_pool_allocate(pool); - *result = (Subtree){ - .ref_count = 1, - .symbol = symbol, - .size = size, - .visible_child_count = 0, - .named_child_count = 0, - .alias_sequence_id = 0, - .padding = padding, - .visible = metadata.visible, - .named = metadata.named, - .node_count = 1, - .has_changes = false, - .is_keyword = false, - .first_leaf = { - .symbol = symbol, - .lex_mode = {0, 0}, - }, - .has_external_tokens = false, - }; - if (symbol == ts_builtin_sym_end) result->extra = true; + Subtree *result = ts_subtree_pool_allocate(pool, is_small); + result->padding = padding; + result->size = size; + result->ref_count = 1; + result->bytes_scanned = 0; + result->error_cost = 0; + result->node_count = 0; + result->dynamic_precedence = 0; + result->child_count = 0; + result->is_small = is_small; + result->visible = metadata.visible; + result->named = metadata.named; + result->extra = symbol == ts_builtin_sym_end; + result->fragile_left = false; + result->fragile_right = false; + result->has_changes = false; + result->has_external_tokens = false; + result->is_missing = false; + result->is_keyword = false; + result->symbol = symbol; + result->parse_state = 0; + result->first_leaf.symbol = symbol; + result->first_leaf.lex_mode = (TSLexMode) {0, 0}; return result; } Subtree *ts_subtree_new_error(SubtreePool *pool, Length size, Length padding, int32_t lookahead_char, const TSLanguage *language) { - Subtree *result = ts_subtree_new_leaf(pool, ts_builtin_sym_error, padding, size, language); + Subtree *result = ts_subtree_new_leaf(pool, ts_builtin_sym_error, padding, size, false, language); result->fragile_left = true; result->fragile_right = true; result->lookahead_char = lookahead_char; @@ -184,8 +201,8 @@ Subtree *ts_subtree_new_error(SubtreePool *pool, Length size, Length padding, } Subtree *ts_subtree_new_copy(SubtreePool *pool, const Subtree *self) { - Subtree *result = ts_subtree_pool_allocate(pool); - *result = *self; + Subtree *result = ts_subtree_pool_allocate(pool, self->is_small); + memcpy(result, self, self->is_small ? SMALL_TREE_SIZE : LARGE_TREE_SIZE); if (result->child_count > 0) { result->children = ts_calloc(self->child_count, sizeof(const Subtree *)); memcpy(result->children, self->children, self->child_count * sizeof(const Subtree *)); @@ -260,12 +277,14 @@ void ts_subtree_balance(const Subtree *self, SubtreePool *pool, const TSLanguage Subtree *tree = array_pop(&pool->tree_stack); assert(tree); - if (tree->repeat_depth > 0 && - tree->children[0]->repeat_depth > tree->children[1]->repeat_depth) { - unsigned n = ( - tree->children[0]->repeat_depth - - tree->children[1]->repeat_depth - ); + if ( + tree->child_count > 0 && + tree->repeat_depth > 0 && + tree->children[0]->child_count > 0 && + tree->children[1]->child_count > 0 && + tree->children[0]->repeat_depth > tree->children[1]->repeat_depth + ) { + unsigned n = tree->children[0]->repeat_depth - tree->children[1]->repeat_depth; for (unsigned i = n / 2; i > 0; i /= 2) { ts_subtree__compress(tree, i, language, &pool->tree_stack); n -= i; @@ -286,6 +305,8 @@ void ts_subtree_set_children(Subtree *self, const Subtree **children, uint32_t c ts_free(self->children); } + assert(!self->is_small); + self->child_count = child_count; self->children = children; self->named_child_count = 0; @@ -380,8 +401,18 @@ void ts_subtree_set_children(Subtree *self, const Subtree **children, uint32_t c Subtree *ts_subtree_new_node(SubtreePool *pool, TSSymbol symbol, SubtreeArray *children, unsigned alias_sequence_id, const TSLanguage *language) { - Subtree *result = ts_subtree_new_leaf(pool, symbol, length_zero(), length_zero(), language); - result->alias_sequence_id = alias_sequence_id; + TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol); + Subtree *result = ts_subtree_pool_allocate(pool, false); + *result = (Subtree){ + .ref_count = 1, + .symbol = symbol, + .alias_sequence_id = alias_sequence_id, + .visible = metadata.visible, + .named = metadata.named, + .has_changes = false, + .is_keyword = false, + .node_count = 0, + }; if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) { result->fragile_left = true; result->fragile_right = true; @@ -397,7 +428,7 @@ Subtree *ts_subtree_new_error_node(SubtreePool *pool, SubtreeArray *children, Subtree *ts_subtree_new_missing_leaf(SubtreePool *pool, TSSymbol symbol, Length padding, const TSLanguage *language) { - Subtree *result = ts_subtree_new_leaf(pool, symbol, padding, length_zero(), language); + Subtree *result = ts_subtree_new_leaf(pool, symbol, padding, length_zero(), true, language); result->is_missing = true; result->error_cost = ERROR_COST_PER_MISSING_TREE + ERROR_COST_PER_RECOVERY; return result; @@ -655,25 +686,27 @@ static size_t ts_subtree__write_to_string(const Subtree *self, char *string, siz } } - const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self->alias_sequence_id); - uint32_t structural_child_index = 0; - for (uint32_t i = 0; i < self->child_count; i++) { - const Subtree *child = self->children[i]; - if (child->extra) { - cursor += ts_subtree__write_to_string( - child, *writer, limit, - language, false, include_all, - 0, false - ); - } else { - TSSymbol alias_symbol = alias_sequence ? alias_sequence[structural_child_index] : 0; - cursor += ts_subtree__write_to_string( - child, *writer, limit, - language, false, include_all, - alias_symbol, - alias_symbol ? ts_language_symbol_metadata(language, alias_symbol).named : false - ); - structural_child_index++; + if (self->child_count) { + const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self->alias_sequence_id); + uint32_t structural_child_index = 0; + for (uint32_t i = 0; i < self->child_count; i++) { + const Subtree *child = self->children[i]; + if (child->extra) { + cursor += ts_subtree__write_to_string( + child, *writer, limit, + language, false, include_all, + 0, false + ); + } else { + TSSymbol alias_symbol = alias_sequence ? alias_sequence[structural_child_index] : 0; + cursor += ts_subtree__write_to_string( + child, *writer, limit, + language, false, include_all, + alias_symbol, + alias_symbol ? ts_language_symbol_metadata(language, alias_symbol).named : false + ); + structural_child_index++; + } } } @@ -715,7 +748,7 @@ void ts_subtree__print_dot_graph(const Subtree *self, uint32_t byte_offset, byte_offset, byte_offset + ts_subtree_total_bytes(self), self->parse_state, self->error_cost, - self->repeat_depth, + self->child_count > 0 ? self->repeat_depth : 0, self->bytes_scanned ); diff --git a/src/runtime/subtree.h b/src/runtime/subtree.h index bd8dd0c5..1a766f4f 100644 --- a/src/runtime/subtree.h +++ b/src/runtime/subtree.h @@ -31,10 +31,10 @@ struct Subtree { uint32_t bytes_scanned; uint32_t error_cost; uint32_t node_count; - uint32_t repeat_depth; int32_t dynamic_precedence; uint32_t child_count; + bool is_small : 1; bool visible : 1; bool named : 1; bool extra : 1; @@ -57,6 +57,7 @@ struct Subtree { const Subtree **children; uint32_t visible_child_count; uint32_t named_child_count; + uint32_t repeat_depth; uint16_t alias_sequence_id; }; @@ -73,6 +74,7 @@ typedef Array(Subtree *) MutableSubtreeArray; typedef struct { MutableSubtreeArray free_trees; + MutableSubtreeArray free_small_trees; MutableSubtreeArray tree_stack; } SubtreePool; @@ -86,10 +88,10 @@ void ts_subtree_array_reverse(SubtreeArray *); SubtreePool ts_subtree_pool_new(uint32_t capacity); void ts_subtree_pool_delete(SubtreePool *); -Subtree *ts_subtree_pool_allocate(SubtreePool *); +Subtree *ts_subtree_pool_allocate(SubtreePool *, bool); void ts_subtree_pool_free(SubtreePool *, Subtree *); -Subtree *ts_subtree_new_leaf(SubtreePool *, TSSymbol, Length, Length, const TSLanguage *); +Subtree *ts_subtree_new_leaf(SubtreePool *, TSSymbol, Length, Length, bool, const TSLanguage *); Subtree *ts_subtree_new_node(SubtreePool *, TSSymbol, SubtreeArray *, unsigned, const TSLanguage *); Subtree *ts_subtree_new_copy(SubtreePool *, const Subtree *); Subtree *ts_subtree_new_error_node(SubtreePool *, SubtreeArray *, const TSLanguage *); diff --git a/test/runtime/stack_test.cc b/test/runtime/stack_test.cc index 68104935..9cadf347 100644 --- a/test/runtime/stack_test.cc +++ b/test/runtime/stack_test.cc @@ -88,7 +88,7 @@ describe("Stack", [&]() { dummy_language.symbol_metadata = symbol_metadata; for (size_t i = 0; i < subtree_count; i++) { - subtrees[i] = ts_subtree_new_leaf(&pool, i + 1, length_zero(), tree_len, &dummy_language); + subtrees[i] = ts_subtree_new_leaf(&pool, i + 1, length_zero(), tree_len, false, &dummy_language); } }); diff --git a/test/runtime/subtree_test.cc b/test/runtime/subtree_test.cc index 7001980d..54cd88b6 100644 --- a/test/runtime/subtree_test.cc +++ b/test/runtime/subtree_test.cc @@ -50,7 +50,7 @@ describe("Subtree", []() { describe("make_leaf", [&]() { it("does not mark the tree as fragile", [&]() { - const Subtree *tree = ts_subtree_new_leaf(&pool, symbol1, {2, {0, 1}}, {5, {0, 4}}, &language); + const Subtree *tree = ts_subtree_new_leaf(&pool, symbol1, {2, {0, 1}}, {5, {0, 4}}, false, &language); AssertThat(tree->fragile_left, IsFalse()); AssertThat(tree->fragile_right, IsFalse()); @@ -79,8 +79,8 @@ describe("Subtree", []() { const Subtree *tree1, *tree2, *parent1; before_each([&]() { - tree1 = ts_subtree_new_leaf(&pool, symbol1, {2, {0, 1}}, {5, {0, 4}}, &language); - tree2 = ts_subtree_new_leaf(&pool, symbol2, {1, {0, 1}}, {3, {0, 3}}, &language); + tree1 = ts_subtree_new_leaf(&pool, symbol1, {2, {0, 1}}, {5, {0, 4}}, false, &language); + tree2 = ts_subtree_new_leaf(&pool, symbol2, {1, {0, 1}}, {3, {0, 3}}, false, &language); ts_subtree_retain(tree1); ts_subtree_retain(tree2); @@ -186,9 +186,9 @@ describe("Subtree", []() { before_each([&]() { tree = ts_subtree_new_node(&pool, symbol1, tree_array({ - ts_subtree_new_leaf(&pool, symbol2, {2, {0, 2}}, {3, {0, 3}}, &language), - ts_subtree_new_leaf(&pool, symbol3, {2, {0, 2}}, {3, {0, 3}}, &language), - ts_subtree_new_leaf(&pool, symbol4, {2, {0, 2}}, {3, {0, 3}}, &language), + ts_subtree_new_leaf(&pool, symbol2, {2, {0, 2}}, {3, {0, 3}}, false, &language), + ts_subtree_new_leaf(&pool, symbol3, {2, {0, 2}}, {3, {0, 3}}, false, &language), + ts_subtree_new_leaf(&pool, symbol4, {2, {0, 2}}, {3, {0, 3}}, false, &language), }), 0, &language); AssertThat(tree->padding, Equals({2, {0, 2}})); @@ -421,7 +421,7 @@ describe("Subtree", []() { const Subtree *leaf; before_each([&]() { - leaf = ts_subtree_new_leaf(&pool, symbol1, {2, {0, 1}}, {5, {0, 4}}, &language); + leaf = ts_subtree_new_leaf(&pool, symbol1, {2, {0, 1}}, {5, {0, 4}}, false, &language); }); after_each([&]() { @@ -429,7 +429,7 @@ describe("Subtree", []() { }); it("returns true for identical trees", [&]() { - const Subtree *leaf_copy = ts_subtree_new_leaf(&pool, symbol1, {2, {1, 1}}, {5, {1, 4}}, &language); + const Subtree *leaf_copy = ts_subtree_new_leaf(&pool, symbol1, {2, {1, 1}}, {5, {1, 4}}, false, &language); AssertThat(ts_subtree_eq(leaf, leaf_copy), IsTrue()); const Subtree *parent = ts_subtree_new_node(&pool, symbol2, tree_array({ @@ -459,6 +459,7 @@ describe("Subtree", []() { leaf->symbol + 1, leaf->padding, leaf->size, + false, &language ); @@ -468,7 +469,7 @@ describe("Subtree", []() { it("returns false for trees with different options", [&]() { const Subtree *different_leaf = ts_subtree_new_leaf( - &pool, leaf->symbol, leaf->padding, leaf->size, &language + &pool, leaf->symbol, leaf->padding, leaf->size, false, &language ); ((Subtree *)different_leaf)->visible = !leaf->visible; AssertThat(ts_subtree_eq(leaf, different_leaf), IsFalse()); @@ -476,17 +477,19 @@ describe("Subtree", []() { }); it("returns false for trees with different paddings or sizes", [&]() { - const Subtree *different_leaf = ts_subtree_new_leaf(&pool, leaf->symbol, {}, leaf->size, &language); + const Subtree *different_leaf = ts_subtree_new_leaf( + &pool, leaf->symbol, {}, leaf->size, false, &language + ); AssertThat(ts_subtree_eq(leaf, different_leaf), IsFalse()); ts_subtree_release(&pool, different_leaf); - different_leaf = ts_subtree_new_leaf(&pool, symbol1, leaf->padding, {}, &language); + different_leaf = ts_subtree_new_leaf(&pool, symbol1, leaf->padding, {}, false, &language); AssertThat(ts_subtree_eq(leaf, different_leaf), IsFalse()); ts_subtree_release(&pool, different_leaf); }); it("returns false for trees with different children", [&]() { - const Subtree *leaf2 = ts_subtree_new_leaf(&pool, symbol2, {1, {0, 1}}, {3, {0, 3}}, &language); + const Subtree *leaf2 = ts_subtree_new_leaf(&pool, symbol2, {1, {0, 1}}, {3, {0, 3}}, false, &language); const Subtree *parent = ts_subtree_new_node(&pool, symbol2, tree_array({ leaf, @@ -515,8 +518,10 @@ describe("Subtree", []() { Length padding = {1, {0, 1}}; Length size = {2, {0, 2}}; - auto make_external = [](const Subtree *tree) { - ((Subtree *)tree)->has_external_tokens = true; + auto make_external = [](const Subtree *_tree) { + Subtree *tree = (Subtree *)_tree; + tree->has_external_tokens = true; + ts_external_scanner_state_init(&tree->external_scanner_state, NULL, 0); return tree; }; @@ -525,15 +530,15 @@ describe("Subtree", []() { tree1 = ts_subtree_new_node(&pool, symbol1, tree_array({ (tree2 = ts_subtree_new_node(&pool, symbol2, tree_array({ - (tree3 = make_external(ts_subtree_new_leaf(&pool, symbol3, padding, size, &language))), - (tree4 = ts_subtree_new_leaf(&pool, symbol4, padding, size, &language)), - (tree5 = ts_subtree_new_leaf(&pool, symbol5, padding, size, &language)), + (tree3 = make_external(ts_subtree_new_leaf(&pool, symbol3, padding, size, false, &language))), + (tree4 = ts_subtree_new_leaf(&pool, symbol4, padding, size, false, &language)), + (tree5 = ts_subtree_new_leaf(&pool, symbol5, padding, size, false, &language)), }), 0, &language)), (tree6 = ts_subtree_new_node(&pool, symbol6, tree_array({ (tree7 = ts_subtree_new_node(&pool, symbol7, tree_array({ - (tree8 = ts_subtree_new_leaf(&pool, symbol8, padding, size, &language)), + (tree8 = ts_subtree_new_leaf(&pool, symbol8, padding, size, false, &language)), }), 0, &language)), - (tree9 = ts_subtree_new_leaf(&pool, symbol9, padding, size, &language)), + (tree9 = ts_subtree_new_leaf(&pool, symbol9, padding, size, false, &language)), }), 0, &language)), }), 0, &language);