Use smaller allocations for normal leaf nodes

This commit is contained in:
Max Brunsfeld 2018-09-13 16:13:49 -07:00
parent 25797cad5f
commit c7306722dd
5 changed files with 127 additions and 87 deletions

View file

@ -422,7 +422,7 @@ static const Subtree *ts_parser__lex(TSParser *self, StackVersion version, TSSta
}
}
result = ts_subtree_new_leaf(&self->tree_pool, symbol, padding, size, self->language);
result = ts_subtree_new_leaf(&self->tree_pool, symbol, padding, size, !found_external_token, self->language);
result->is_keyword = is_keyword;
if (found_external_token) {
@ -937,7 +937,7 @@ static void ts_parser__halt_parse(TSParser *self) {
Subtree *root_error = ts_subtree_new_error_node(&self->tree_pool, &children, self->language);
ts_stack_push(self->stack, 0, root_error, false, 0);
Subtree *eof = ts_subtree_new_leaf(&self->tree_pool, ts_builtin_sym_end, length_zero(), length_zero(), self->language);
Subtree *eof = ts_subtree_new_leaf(&self->tree_pool, ts_builtin_sym_end, length_zero(), length_zero(), true, self->language);
ts_parser__accept(self, 0, eof);
}

View file

@ -10,6 +10,7 @@
#include "runtime/length.h"
#include "runtime/language.h"
#include "runtime/error_costs.h"
#include <stddef.h>
typedef struct {
Length start;
@ -19,7 +20,9 @@ typedef struct {
TSStateId TS_TREE_STATE_NONE = USHRT_MAX;
static const uint32_t MAX_TREE_POOL_SIZE = 1024;
static const uint32_t MAX_TREE_POOL_SIZE = 0;
static const uint32_t SMALL_TREE_SIZE = offsetof(Subtree, children);
static const uint32_t LARGE_TREE_SIZE = sizeof(Subtree);
static const ExternalScannerState empty_state = {.length = 0, .short_data = {0}};
@ -114,8 +117,9 @@ void ts_subtree_array_reverse(SubtreeArray *self) {
// SubtreePool
SubtreePool ts_subtree_pool_new(uint32_t capacity) {
SubtreePool self = {array_new(), array_new()};
SubtreePool self = {array_new(), array_new(), array_new()};
array_reserve(&self.free_trees, capacity);
array_reserve(&self.free_small_trees, capacity);
return self;
}
@ -126,20 +130,31 @@ void ts_subtree_pool_delete(SubtreePool *self) {
}
array_delete(&self->free_trees);
}
if (self->free_small_trees.contents) {
for (unsigned i = 0; i < self->free_small_trees.size; i++) {
ts_free(self->free_small_trees.contents[i]);
}
array_delete(&self->free_small_trees);
}
if (self->tree_stack.contents) array_delete(&self->tree_stack);
}
Subtree *ts_subtree_pool_allocate(SubtreePool *self) {
if (self->free_trees.size > 0) {
return array_pop(&self->free_trees);
Subtree *ts_subtree_pool_allocate(SubtreePool *self, bool is_small) {
MutableSubtreeArray *array = is_small ? &self->free_small_trees : &self->free_trees;
Subtree *result;
if (array->size > 0) {
result = array_pop(array);
} else {
return ts_malloc(sizeof(Subtree));
result = ts_malloc(is_small ? SMALL_TREE_SIZE : LARGE_TREE_SIZE);
}
result->is_small = is_small;
return result;
}
void ts_subtree_pool_free(SubtreePool *self, Subtree *tree) {
if (self->free_trees.capacity > 0 && self->free_trees.size < MAX_TREE_POOL_SIZE) {
array_push(&self->free_trees, tree);
MutableSubtreeArray *array = tree->is_small ? &self->free_small_trees : &self->free_trees;
if (array->capacity > 0 && array->size < MAX_TREE_POOL_SIZE) {
array_push(array, tree);
} else {
ts_free(tree);
}
@ -148,35 +163,37 @@ void ts_subtree_pool_free(SubtreePool *self, Subtree *tree) {
// Subtree
Subtree *ts_subtree_new_leaf(SubtreePool *pool, TSSymbol symbol, Length padding, Length size,
const TSLanguage *language) {
bool is_small, const TSLanguage *language) {
TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol);
Subtree *result = ts_subtree_pool_allocate(pool);
*result = (Subtree){
.ref_count = 1,
.symbol = symbol,
.size = size,
.visible_child_count = 0,
.named_child_count = 0,
.alias_sequence_id = 0,
.padding = padding,
.visible = metadata.visible,
.named = metadata.named,
.node_count = 1,
.has_changes = false,
.is_keyword = false,
.first_leaf = {
.symbol = symbol,
.lex_mode = {0, 0},
},
.has_external_tokens = false,
};
if (symbol == ts_builtin_sym_end) result->extra = true;
Subtree *result = ts_subtree_pool_allocate(pool, is_small);
result->padding = padding;
result->size = size;
result->ref_count = 1;
result->bytes_scanned = 0;
result->error_cost = 0;
result->node_count = 0;
result->dynamic_precedence = 0;
result->child_count = 0;
result->is_small = is_small;
result->visible = metadata.visible;
result->named = metadata.named;
result->extra = symbol == ts_builtin_sym_end;
result->fragile_left = false;
result->fragile_right = false;
result->has_changes = false;
result->has_external_tokens = false;
result->is_missing = false;
result->is_keyword = false;
result->symbol = symbol;
result->parse_state = 0;
result->first_leaf.symbol = symbol;
result->first_leaf.lex_mode = (TSLexMode) {0, 0};
return result;
}
Subtree *ts_subtree_new_error(SubtreePool *pool, Length size, Length padding,
int32_t lookahead_char, const TSLanguage *language) {
Subtree *result = ts_subtree_new_leaf(pool, ts_builtin_sym_error, padding, size, language);
Subtree *result = ts_subtree_new_leaf(pool, ts_builtin_sym_error, padding, size, false, language);
result->fragile_left = true;
result->fragile_right = true;
result->lookahead_char = lookahead_char;
@ -184,8 +201,8 @@ Subtree *ts_subtree_new_error(SubtreePool *pool, Length size, Length padding,
}
Subtree *ts_subtree_new_copy(SubtreePool *pool, const Subtree *self) {
Subtree *result = ts_subtree_pool_allocate(pool);
*result = *self;
Subtree *result = ts_subtree_pool_allocate(pool, self->is_small);
memcpy(result, self, self->is_small ? SMALL_TREE_SIZE : LARGE_TREE_SIZE);
if (result->child_count > 0) {
result->children = ts_calloc(self->child_count, sizeof(const Subtree *));
memcpy(result->children, self->children, self->child_count * sizeof(const Subtree *));
@ -260,12 +277,14 @@ void ts_subtree_balance(const Subtree *self, SubtreePool *pool, const TSLanguage
Subtree *tree = array_pop(&pool->tree_stack);
assert(tree);
if (tree->repeat_depth > 0 &&
tree->children[0]->repeat_depth > tree->children[1]->repeat_depth) {
unsigned n = (
tree->children[0]->repeat_depth -
tree->children[1]->repeat_depth
);
if (
tree->child_count > 0 &&
tree->repeat_depth > 0 &&
tree->children[0]->child_count > 0 &&
tree->children[1]->child_count > 0 &&
tree->children[0]->repeat_depth > tree->children[1]->repeat_depth
) {
unsigned n = tree->children[0]->repeat_depth - tree->children[1]->repeat_depth;
for (unsigned i = n / 2; i > 0; i /= 2) {
ts_subtree__compress(tree, i, language, &pool->tree_stack);
n -= i;
@ -286,6 +305,8 @@ void ts_subtree_set_children(Subtree *self, const Subtree **children, uint32_t c
ts_free(self->children);
}
assert(!self->is_small);
self->child_count = child_count;
self->children = children;
self->named_child_count = 0;
@ -380,8 +401,18 @@ void ts_subtree_set_children(Subtree *self, const Subtree **children, uint32_t c
Subtree *ts_subtree_new_node(SubtreePool *pool, TSSymbol symbol, SubtreeArray *children,
unsigned alias_sequence_id, const TSLanguage *language) {
Subtree *result = ts_subtree_new_leaf(pool, symbol, length_zero(), length_zero(), language);
result->alias_sequence_id = alias_sequence_id;
TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol);
Subtree *result = ts_subtree_pool_allocate(pool, false);
*result = (Subtree){
.ref_count = 1,
.symbol = symbol,
.alias_sequence_id = alias_sequence_id,
.visible = metadata.visible,
.named = metadata.named,
.has_changes = false,
.is_keyword = false,
.node_count = 0,
};
if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) {
result->fragile_left = true;
result->fragile_right = true;
@ -397,7 +428,7 @@ Subtree *ts_subtree_new_error_node(SubtreePool *pool, SubtreeArray *children,
Subtree *ts_subtree_new_missing_leaf(SubtreePool *pool, TSSymbol symbol, Length padding,
const TSLanguage *language) {
Subtree *result = ts_subtree_new_leaf(pool, symbol, padding, length_zero(), language);
Subtree *result = ts_subtree_new_leaf(pool, symbol, padding, length_zero(), true, language);
result->is_missing = true;
result->error_cost = ERROR_COST_PER_MISSING_TREE + ERROR_COST_PER_RECOVERY;
return result;
@ -655,25 +686,27 @@ static size_t ts_subtree__write_to_string(const Subtree *self, char *string, siz
}
}
const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self->alias_sequence_id);
uint32_t structural_child_index = 0;
for (uint32_t i = 0; i < self->child_count; i++) {
const Subtree *child = self->children[i];
if (child->extra) {
cursor += ts_subtree__write_to_string(
child, *writer, limit,
language, false, include_all,
0, false
);
} else {
TSSymbol alias_symbol = alias_sequence ? alias_sequence[structural_child_index] : 0;
cursor += ts_subtree__write_to_string(
child, *writer, limit,
language, false, include_all,
alias_symbol,
alias_symbol ? ts_language_symbol_metadata(language, alias_symbol).named : false
);
structural_child_index++;
if (self->child_count) {
const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self->alias_sequence_id);
uint32_t structural_child_index = 0;
for (uint32_t i = 0; i < self->child_count; i++) {
const Subtree *child = self->children[i];
if (child->extra) {
cursor += ts_subtree__write_to_string(
child, *writer, limit,
language, false, include_all,
0, false
);
} else {
TSSymbol alias_symbol = alias_sequence ? alias_sequence[structural_child_index] : 0;
cursor += ts_subtree__write_to_string(
child, *writer, limit,
language, false, include_all,
alias_symbol,
alias_symbol ? ts_language_symbol_metadata(language, alias_symbol).named : false
);
structural_child_index++;
}
}
}
@ -715,7 +748,7 @@ void ts_subtree__print_dot_graph(const Subtree *self, uint32_t byte_offset,
byte_offset, byte_offset + ts_subtree_total_bytes(self),
self->parse_state,
self->error_cost,
self->repeat_depth,
self->child_count > 0 ? self->repeat_depth : 0,
self->bytes_scanned
);

View file

@ -31,10 +31,10 @@ struct Subtree {
uint32_t bytes_scanned;
uint32_t error_cost;
uint32_t node_count;
uint32_t repeat_depth;
int32_t dynamic_precedence;
uint32_t child_count;
bool is_small : 1;
bool visible : 1;
bool named : 1;
bool extra : 1;
@ -57,6 +57,7 @@ struct Subtree {
const Subtree **children;
uint32_t visible_child_count;
uint32_t named_child_count;
uint32_t repeat_depth;
uint16_t alias_sequence_id;
};
@ -73,6 +74,7 @@ typedef Array(Subtree *) MutableSubtreeArray;
typedef struct {
MutableSubtreeArray free_trees;
MutableSubtreeArray free_small_trees;
MutableSubtreeArray tree_stack;
} SubtreePool;
@ -86,10 +88,10 @@ void ts_subtree_array_reverse(SubtreeArray *);
SubtreePool ts_subtree_pool_new(uint32_t capacity);
void ts_subtree_pool_delete(SubtreePool *);
Subtree *ts_subtree_pool_allocate(SubtreePool *);
Subtree *ts_subtree_pool_allocate(SubtreePool *, bool);
void ts_subtree_pool_free(SubtreePool *, Subtree *);
Subtree *ts_subtree_new_leaf(SubtreePool *, TSSymbol, Length, Length, const TSLanguage *);
Subtree *ts_subtree_new_leaf(SubtreePool *, TSSymbol, Length, Length, bool, const TSLanguage *);
Subtree *ts_subtree_new_node(SubtreePool *, TSSymbol, SubtreeArray *, unsigned, const TSLanguage *);
Subtree *ts_subtree_new_copy(SubtreePool *, const Subtree *);
Subtree *ts_subtree_new_error_node(SubtreePool *, SubtreeArray *, const TSLanguage *);

View file

@ -88,7 +88,7 @@ describe("Stack", [&]() {
dummy_language.symbol_metadata = symbol_metadata;
for (size_t i = 0; i < subtree_count; i++) {
subtrees[i] = ts_subtree_new_leaf(&pool, i + 1, length_zero(), tree_len, &dummy_language);
subtrees[i] = ts_subtree_new_leaf(&pool, i + 1, length_zero(), tree_len, false, &dummy_language);
}
});

View file

@ -50,7 +50,7 @@ describe("Subtree", []() {
describe("make_leaf", [&]() {
it("does not mark the tree as fragile", [&]() {
const Subtree *tree = ts_subtree_new_leaf(&pool, symbol1, {2, {0, 1}}, {5, {0, 4}}, &language);
const Subtree *tree = ts_subtree_new_leaf(&pool, symbol1, {2, {0, 1}}, {5, {0, 4}}, false, &language);
AssertThat(tree->fragile_left, IsFalse());
AssertThat(tree->fragile_right, IsFalse());
@ -79,8 +79,8 @@ describe("Subtree", []() {
const Subtree *tree1, *tree2, *parent1;
before_each([&]() {
tree1 = ts_subtree_new_leaf(&pool, symbol1, {2, {0, 1}}, {5, {0, 4}}, &language);
tree2 = ts_subtree_new_leaf(&pool, symbol2, {1, {0, 1}}, {3, {0, 3}}, &language);
tree1 = ts_subtree_new_leaf(&pool, symbol1, {2, {0, 1}}, {5, {0, 4}}, false, &language);
tree2 = ts_subtree_new_leaf(&pool, symbol2, {1, {0, 1}}, {3, {0, 3}}, false, &language);
ts_subtree_retain(tree1);
ts_subtree_retain(tree2);
@ -186,9 +186,9 @@ describe("Subtree", []() {
before_each([&]() {
tree = ts_subtree_new_node(&pool, symbol1, tree_array({
ts_subtree_new_leaf(&pool, symbol2, {2, {0, 2}}, {3, {0, 3}}, &language),
ts_subtree_new_leaf(&pool, symbol3, {2, {0, 2}}, {3, {0, 3}}, &language),
ts_subtree_new_leaf(&pool, symbol4, {2, {0, 2}}, {3, {0, 3}}, &language),
ts_subtree_new_leaf(&pool, symbol2, {2, {0, 2}}, {3, {0, 3}}, false, &language),
ts_subtree_new_leaf(&pool, symbol3, {2, {0, 2}}, {3, {0, 3}}, false, &language),
ts_subtree_new_leaf(&pool, symbol4, {2, {0, 2}}, {3, {0, 3}}, false, &language),
}), 0, &language);
AssertThat(tree->padding, Equals<Length>({2, {0, 2}}));
@ -421,7 +421,7 @@ describe("Subtree", []() {
const Subtree *leaf;
before_each([&]() {
leaf = ts_subtree_new_leaf(&pool, symbol1, {2, {0, 1}}, {5, {0, 4}}, &language);
leaf = ts_subtree_new_leaf(&pool, symbol1, {2, {0, 1}}, {5, {0, 4}}, false, &language);
});
after_each([&]() {
@ -429,7 +429,7 @@ describe("Subtree", []() {
});
it("returns true for identical trees", [&]() {
const Subtree *leaf_copy = ts_subtree_new_leaf(&pool, symbol1, {2, {1, 1}}, {5, {1, 4}}, &language);
const Subtree *leaf_copy = ts_subtree_new_leaf(&pool, symbol1, {2, {1, 1}}, {5, {1, 4}}, false, &language);
AssertThat(ts_subtree_eq(leaf, leaf_copy), IsTrue());
const Subtree *parent = ts_subtree_new_node(&pool, symbol2, tree_array({
@ -459,6 +459,7 @@ describe("Subtree", []() {
leaf->symbol + 1,
leaf->padding,
leaf->size,
false,
&language
);
@ -468,7 +469,7 @@ describe("Subtree", []() {
it("returns false for trees with different options", [&]() {
const Subtree *different_leaf = ts_subtree_new_leaf(
&pool, leaf->symbol, leaf->padding, leaf->size, &language
&pool, leaf->symbol, leaf->padding, leaf->size, false, &language
);
((Subtree *)different_leaf)->visible = !leaf->visible;
AssertThat(ts_subtree_eq(leaf, different_leaf), IsFalse());
@ -476,17 +477,19 @@ describe("Subtree", []() {
});
it("returns false for trees with different paddings or sizes", [&]() {
const Subtree *different_leaf = ts_subtree_new_leaf(&pool, leaf->symbol, {}, leaf->size, &language);
const Subtree *different_leaf = ts_subtree_new_leaf(
&pool, leaf->symbol, {}, leaf->size, false, &language
);
AssertThat(ts_subtree_eq(leaf, different_leaf), IsFalse());
ts_subtree_release(&pool, different_leaf);
different_leaf = ts_subtree_new_leaf(&pool, symbol1, leaf->padding, {}, &language);
different_leaf = ts_subtree_new_leaf(&pool, symbol1, leaf->padding, {}, false, &language);
AssertThat(ts_subtree_eq(leaf, different_leaf), IsFalse());
ts_subtree_release(&pool, different_leaf);
});
it("returns false for trees with different children", [&]() {
const Subtree *leaf2 = ts_subtree_new_leaf(&pool, symbol2, {1, {0, 1}}, {3, {0, 3}}, &language);
const Subtree *leaf2 = ts_subtree_new_leaf(&pool, symbol2, {1, {0, 1}}, {3, {0, 3}}, false, &language);
const Subtree *parent = ts_subtree_new_node(&pool, symbol2, tree_array({
leaf,
@ -515,8 +518,10 @@ describe("Subtree", []() {
Length padding = {1, {0, 1}};
Length size = {2, {0, 2}};
auto make_external = [](const Subtree *tree) {
((Subtree *)tree)->has_external_tokens = true;
auto make_external = [](const Subtree *_tree) {
Subtree *tree = (Subtree *)_tree;
tree->has_external_tokens = true;
ts_external_scanner_state_init(&tree->external_scanner_state, NULL, 0);
return tree;
};
@ -525,15 +530,15 @@ describe("Subtree", []() {
tree1 = ts_subtree_new_node(&pool, symbol1, tree_array({
(tree2 = ts_subtree_new_node(&pool, symbol2, tree_array({
(tree3 = make_external(ts_subtree_new_leaf(&pool, symbol3, padding, size, &language))),
(tree4 = ts_subtree_new_leaf(&pool, symbol4, padding, size, &language)),
(tree5 = ts_subtree_new_leaf(&pool, symbol5, padding, size, &language)),
(tree3 = make_external(ts_subtree_new_leaf(&pool, symbol3, padding, size, false, &language))),
(tree4 = ts_subtree_new_leaf(&pool, symbol4, padding, size, false, &language)),
(tree5 = ts_subtree_new_leaf(&pool, symbol5, padding, size, false, &language)),
}), 0, &language)),
(tree6 = ts_subtree_new_node(&pool, symbol6, tree_array({
(tree7 = ts_subtree_new_node(&pool, symbol7, tree_array({
(tree8 = ts_subtree_new_leaf(&pool, symbol8, padding, size, &language)),
(tree8 = ts_subtree_new_leaf(&pool, symbol8, padding, size, false, &language)),
}), 0, &language)),
(tree9 = ts_subtree_new_leaf(&pool, symbol9, padding, size, &language)),
(tree9 = ts_subtree_new_leaf(&pool, symbol9, padding, size, false, &language)),
}), 0, &language)),
}), 0, &language);