Use an object pool for stack nodes, to reduce allocations

Also, fix some leaks in the case where memory allocation failed during parsing
This commit is contained in:
Max Brunsfeld 2016-02-04 11:15:46 -08:00
parent a302ee822a
commit c96c4a08e6
8 changed files with 196 additions and 103 deletions

View file

@ -24,7 +24,8 @@ describe("Parser", [&]() {
});
after_each([&]() {
ts_document_free(doc);
if (doc)
ts_document_free(doc);
if (input)
delete input;
@ -440,15 +441,9 @@ describe("Parser", [&]() {
});
describe("handling allocation failures", [&]() {
before_each([&]() {
record_alloc::start();
});
after_each([&]() {
record_alloc::stop();
});
it("handles failures when allocating documents", [&]() {
record_alloc::start();
TSDocument *document = ts_document_make();
ts_document_free(document);
AssertThat(record_alloc::outstanding_allocation_indices(), IsEmpty());
@ -462,43 +457,61 @@ describe("Parser", [&]() {
AssertThat(ts_document_make(), Equals<TSDocument *>(nullptr));
AssertThat(record_alloc::outstanding_allocation_indices(), IsEmpty());
}
record_alloc::stop();
});
it("handles allocation failures during parsing", [&]() {
ts_document_set_language(doc, get_test_language("cpp"));
const TSLanguage *language = get_test_language("cpp");
const char *input_string = "int main() { return vector<int *>().size(); }";
string expected_node_string =
"(translation_unit (function_definition "
"(identifier) "
"(function_declarator (identifier)) "
"(compound_statement "
"(return_statement (call_expression (field_expression "
"(call_expression (template_call "
"(identifier) "
"(type_name (identifier) (abstract_pointer_declarator)))) "
"(identifier)))))))";
set_text("int main() { return vector<int *>().size(); }");
record_alloc::start();
ts_document_set_language(doc, language);
ts_document_set_input_string(doc, input_string);
AssertThat(ts_document_parse(doc), Equals(0));
size_t allocation_count = record_alloc::allocation_count();
AssertThat(allocation_count, IsGreaterThan<size_t>(1));
char *node_string = ts_node_string(root, doc);
AssertThat(node_string, Equals("(translation_unit (function_definition "
"(identifier) "
"(function_declarator (identifier)) "
"(compound_statement "
"(return_statement (call_expression (field_expression "
"(call_expression (template_call "
"(identifier) "
"(type_name (identifier) (abstract_pointer_declarator)))) "
"(identifier)))))))"));
assert_root_node(expected_node_string);
ts_document_free(doc);
AssertThat(record_alloc::outstanding_allocation_indices(), IsEmpty());
for (size_t i = 0; i < allocation_count; i++) {
record_alloc::stop();
doc = ts_document_make();
record_alloc::start();
record_alloc::fail_at_allocation_index(i);
ts_document_invalidate(doc);
ts_document_set_language(doc, language);
ts_document_set_input_string(doc, input_string);
AssertThat(ts_document_parse(doc), Equals(-1));
AssertThat(ts_document_root_node(doc).data, Equals<void *>(nullptr));
ts_document_free(doc);
doc = nullptr;
AssertThat(record_alloc::outstanding_allocation_indices(), IsEmpty());
}
record_alloc::stop();
doc = ts_document_make();
record_alloc::start();
record_alloc::fail_at_allocation_index(allocation_count + 1);
ts_document_invalidate(doc);
ts_document_set_language(doc, language);
ts_document_set_input_string(doc, input_string);
AssertThat(ts_document_parse(doc), Equals(0));
char *node_string2 = ts_node_string(ts_document_root_node(doc), doc);
AssertThat(string(node_string2), Equals(node_string));
ts_free(node_string2);
ts_free(node_string);
assert_root_node(expected_node_string);
});
});
});

View file

@ -9,14 +9,17 @@
TSDocument *ts_document_make() {
TSDocument *self = ts_calloc(1, sizeof(TSDocument));
if (!self)
return NULL;
goto error;
if (!ts_parser_init(&self->parser)) {
ts_free(self);
return NULL;
}
if (!ts_parser_init(&self->parser))
goto error;
return self;
error:
if (self)
ts_free(self);
return NULL;
}
void ts_document_free(TSDocument *self) {
@ -61,8 +64,11 @@ void ts_document_set_input(TSDocument *self, TSInput input) {
void ts_document_set_input_string(TSDocument *self, const char *text) {
ts_document_invalidate(self);
ts_document_set_input(self, ts_string_input_make(text));
self->owns_input = true;
TSInput input = ts_string_input_make(text);
ts_document_set_input(self, input);
if (input.payload) {
self->owns_input = true;
}
}
void ts_document_edit(TSDocument *self, TSInputEdit edit) {
@ -80,7 +86,7 @@ void ts_document_edit(TSDocument *self, TSInputEdit edit) {
int ts_document_parse(TSDocument *self) {
if (!self->input.read_fn || !self->parser.language)
return 0;
return -1;
TSTree *reusable_tree = self->valid ? self->tree : NULL;
if (reusable_tree && !reusable_tree->has_changes)

View file

@ -122,8 +122,8 @@ static TSTree *ts_lexer__accept(TSLexer *self, TSSymbol symbol,
* can call them without needing to be linked against this library.
*/
TSLexer ts_lexer_make() {
TSLexer result = (TSLexer){
void ts_lexer_init(TSLexer *self) {
*self = (TSLexer){
.start_fn = ts_lexer__start,
.start_token_fn = ts_lexer__start_token,
.advance_fn = ts_lexer__advance,
@ -132,8 +132,7 @@ TSLexer ts_lexer_make() {
.chunk_start = 0,
.debugger = ts_debugger_null(),
};
ts_lexer_reset(&result, ts_length_zero());
return result;
ts_lexer_reset(self, ts_length_zero());
}
static inline void ts_lexer__reset(TSLexer *self, TSLength position) {

View file

@ -7,7 +7,7 @@ extern "C" {
#include "tree_sitter/parser.h"
TSLexer ts_lexer_make();
void ts_lexer_init(TSLexer *);
void ts_lexer_set_input(TSLexer *, TSInput);
void ts_lexer_reset(TSLexer *, TSLength);

View file

@ -356,8 +356,12 @@ static ParseActionResult ts_parser__reduce(TSParser *self, int head,
size_t child_count = pop_result->tree_count - trailing_extra_count;
parent =
ts_tree_make_node(symbol, child_count, pop_result->trees, metadata);
if (!parent)
if (!parent) {
for (size_t i = 0; i < pop_result->tree_count; i++)
ts_tree_release(pop_result->trees[i]);
ts_free(pop_result->trees);
goto error;
}
}
if (!vector_push(&self->reduce_parents, &parent))
goto error;
@ -424,6 +428,7 @@ static ParseActionResult ts_parser__reduce(TSParser *self, int head,
*/
switch (ts_stack_push(self->stack, new_head, state, parent)) {
case StackPushResultFailed:
ts_tree_release(parent);
goto error;
case StackPushResultMerged:
LOG("merge_during_reduce head:%d", new_head);
@ -590,7 +595,7 @@ static ParseActionResult ts_parser__start(TSParser *self, TSInput input,
static ParseActionResult ts_parser__accept(TSParser *self, int head) {
Vector pop_results = ts_stack_pop(self->stack, head, -1, true);
if (!pop_results.size)
return FailedToUpdateStackHead;
goto error;
for (size_t j = 0; j < pop_results.size; j++) {
StackPopResult *pop_result = vector_get(&pop_results, j);
@ -604,7 +609,7 @@ static ParseActionResult ts_parser__accept(TSParser *self, int head) {
root->child_count + leading_extra_count + trailing_extra_count,
sizeof(TSTree *));
if (!new_children)
return FailedToUpdateStackHead;
goto error;
if (leading_extra_count > 0)
memcpy(new_children, pop_result->trees,
@ -629,6 +634,15 @@ static ParseActionResult ts_parser__accept(TSParser *self, int head) {
}
return RemovedStackHead;
error:
if (pop_results.size) {
StackPopResult *pop_result = vector_get(&pop_results, 0);
for (size_t i = 0; i < pop_result->tree_count; i++)
ts_tree_release(pop_result->trees[i]);
ts_free(pop_result->trees);
}
return FailedToUpdateStackHead;
}
/*
@ -743,34 +757,43 @@ static ParseActionResult ts_parser__consume_lookahead(TSParser *self, int head,
*/
bool ts_parser_init(TSParser *self) {
ts_lexer_init(&self->lexer);
self->finished_tree = NULL;
self->lexer = ts_lexer_make();
self->stack = NULL;
self->lookahead_states = vector_new(sizeof(LookaheadState));
self->reduce_parents = vector_new(sizeof(TSTree *));
self->stack = ts_stack_new();
if (!self->stack) {
return false;
}
if (!self->stack)
goto error;
self->lookahead_states = vector_new(sizeof(LookaheadState), 4);
if (!self->lookahead_states.contents) {
ts_stack_delete(self->stack);
return false;
}
if (!vector_grow(&self->lookahead_states, 4))
goto error;
self->reduce_parents = vector_new(sizeof(TSTree *), 4);
if (!self->reduce_parents.contents) {
ts_stack_delete(self->stack);
vector_delete(&self->lookahead_states);
return false;
}
if (!vector_grow(&self->reduce_parents, 4))
goto error;
return true;
error:
if (self->stack) {
ts_stack_delete(self->stack);
self->stack = NULL;
}
if (self->lookahead_states.contents)
vector_delete(&self->lookahead_states);
if (self->reduce_parents.contents)
vector_delete(&self->reduce_parents);
return false;
}
void ts_parser_destroy(TSParser *self) {
ts_stack_delete(self->stack);
vector_delete(&self->lookahead_states);
vector_delete(&self->reduce_parents);
if (self->stack)
ts_stack_delete(self->stack);
if (self->lookahead_states.contents)
vector_delete(&self->lookahead_states);
if (self->reduce_parents.contents)
vector_delete(&self->reduce_parents);
}
TSDebugger ts_parser_debugger(const TSParser *self) {
@ -826,6 +849,7 @@ TSTree *ts_parser_parse(TSParser *self, TSInput input, TSTree *previous_tree) {
switch (ts_parser__consume_lookahead(self, head, lookahead)) {
case FailedToUpdateStackHead:
ts_tree_release(lookahead);
goto error;
case RemovedStackHead:
removed = true;

View file

@ -10,6 +10,7 @@
#define MAX_SUCCESSOR_COUNT 8
#define INITIAL_HEAD_CAPACITY 3
#define STARTING_TREE_CAPACITY 10
#define MAX_NODE_POOL_SIZE 50
typedef struct StackNode {
StackEntry entry;
@ -24,6 +25,7 @@ struct Stack {
int head_capacity;
Vector pop_results;
Vector pop_paths;
Vector node_pool;
void *tree_selection_payload;
TreeSelectionFunction tree_selection_function;
};
@ -50,20 +52,26 @@ Stack *ts_stack_new() {
self->head_count = 1;
self->head_capacity = INITIAL_HEAD_CAPACITY;
self->heads = NULL;
self->pop_results = vector_new(sizeof(StackPopResult));
self->pop_paths = vector_new(sizeof(PopPath));
self->node_pool = vector_new(sizeof(StackNode *));
self->tree_selection_payload = NULL;
self->tree_selection_function = ts_stack__default_tree_selection;
self->heads = ts_calloc(INITIAL_HEAD_CAPACITY, sizeof(StackNode *));
if (!self->heads)
goto error;
self->pop_results = vector_new(sizeof(StackPopResult), 4);
if (!vector_valid(&self->pop_results))
if (!vector_grow(&self->pop_results, 4))
goto error;
self->pop_paths = vector_new(sizeof(PopPath), 4);
if (!vector_valid(&self->pop_paths))
if (!vector_grow(&self->pop_paths, 4))
goto error;
if (!vector_grow(&self->node_pool, 20))
goto error;
self->tree_selection_payload = NULL;
self->tree_selection_function = ts_stack__default_tree_selection;
return self;
error:
@ -74,6 +82,8 @@ error:
vector_delete(&self->pop_results);
if (self->pop_paths.contents)
vector_delete(&self->pop_paths);
if (self->node_pool.contents)
vector_delete(&self->node_pool);
ts_free(self);
}
return NULL;
@ -127,40 +137,50 @@ static void stack_node_retain(StackNode *self) {
self->ref_count++;
}
static bool stack_node_release(StackNode *self) {
if (!self)
static bool stack_node_release(Stack *self, StackNode *node) {
if (!node)
return false;
assert(self->ref_count != 0);
self->ref_count--;
if (self->ref_count == 0) {
for (int i = 0; i < self->successor_count; i++)
stack_node_release(self->successors[i]);
ts_tree_release(self->entry.tree);
ts_free(self);
assert(node->ref_count != 0);
node->ref_count--;
if (node->ref_count == 0) {
for (int i = 0; i < node->successor_count; i++)
stack_node_release(self, node->successors[i]);
ts_tree_release(node->entry.tree);
if (self->node_pool.size >= MAX_NODE_POOL_SIZE)
ts_free(node);
else
vector_push(&self->node_pool, &node);
return true;
} else {
return false;
}
}
static StackNode *stack_node_new(StackNode *next, TSStateId state, TSTree *tree) {
static StackNode *stack_node_new(Stack *self, StackNode *next, TSStateId state, TSTree *tree) {
assert(tree->ref_count > 0);
StackNode *self = ts_malloc(sizeof(StackNode));
if (!self)
return NULL;
StackNode *node;
if (self->node_pool.size == 0) {
node = ts_malloc(sizeof(StackNode));
if (!node)
return NULL;
} else {
node = *(StackNode **)vector_pop(&self->node_pool);
}
ts_tree_retain(tree);
stack_node_retain(next);
TSLength position = ts_tree_total_size(tree);
if (next)
position = ts_length_add(next->entry.position, position);
*self = (StackNode){
*node = (StackNode){
.ref_count = 1,
.successor_count = 1,
.successors = { next, NULL, NULL },
.entry = {.state = state, .tree = tree, .position = position },
};
return self;
return node;
}
static void ts_stack__add_alternative_tree(Stack *self, StackNode *node,
@ -225,7 +245,7 @@ static int ts_stack__find_head(Stack *self, StackNode *node) {
}
void ts_stack_remove_head(Stack *self, int head_index) {
stack_node_release(self->heads[head_index]);
stack_node_release(self, self->heads[head_index]);
for (int i = head_index; i < self->head_count - 1; i++)
self->heads[i] = self->heads[i + 1];
self->head_count--;
@ -262,11 +282,11 @@ StackPushResult ts_stack_push(Stack *self, int head_index, TSStateId state,
if (ts_stack__merge_head(self, head_index, state, tree, position))
return StackPushResultMerged;
StackNode *new_head = stack_node_new(self->heads[head_index], state, tree);
StackNode *new_head = stack_node_new(self, self->heads[head_index], state, tree);
if (!new_head)
return StackPushResultFailed;
stack_node_release(self->heads[head_index]);
stack_node_release(self, self->heads[head_index]);
self->heads[head_index] = new_head;
return StackPushResultContinued;
}
@ -292,11 +312,11 @@ Vector ts_stack_pop(Stack *self, int head_index, int child_count,
PopPath initial_path = {
.goal_tree_count = child_count,
.node = previous_head,
.trees = vector_new(sizeof(TSTree *), capacity),
.trees = vector_new(sizeof(TSTree *)),
.is_shared = false,
};
if (!vector_valid(&initial_path.trees))
if (!vector_grow(&initial_path.trees, capacity))
goto error;
if (!vector_push(&self->pop_paths, &initial_path))
@ -382,11 +402,12 @@ Vector ts_stack_pop(Stack *self, int head_index, int child_count,
goto error;
}
stack_node_release(previous_head);
stack_node_release(self, previous_head);
return self->pop_results;
error:
return vector_new(0, 0);
vector_delete(&initial_path.trees);
return vector_new(0);
}
void ts_stack_shrink(Stack *self, int head_index, int count) {
@ -398,13 +419,13 @@ void ts_stack_shrink(Stack *self, int head_index, int count) {
new_head = new_head->successors[0];
}
stack_node_retain(new_head);
stack_node_release(head);
stack_node_release(self, head);
self->heads[head_index] = new_head;
}
void ts_stack_clear(Stack *self) {
for (int i = 0; i < self->head_count; i++)
stack_node_release(self->heads[i]);
stack_node_release(self, self->heads[i]);
self->head_count = 1;
self->heads[0] = NULL;
}
@ -416,9 +437,17 @@ void ts_stack_set_tree_selection_callback(Stack *self, void *payload,
}
void ts_stack_delete(Stack *self) {
vector_delete(&self->pop_results);
vector_delete(&self->pop_paths);
if (self->pop_paths.contents)
vector_delete(&self->pop_results);
if (self->pop_paths.contents)
vector_delete(&self->pop_paths);
ts_stack_clear(self);
for (size_t i = 0; i < self->node_pool.size; i++) {
StackNode **node = vector_get(&self->node_pool, i);
ts_free(*node);
}
if (self->node_pool.contents)
vector_delete(&self->node_pool);
ts_free(self->heads);
ts_free(self);
}

View file

@ -28,6 +28,9 @@ int ts_string_input_seek(void *payload, size_t character, size_t byte) {
TSInput ts_string_input_make(const char *string) {
TSStringInput *input = ts_malloc(sizeof(TSStringInput));
if (!input)
goto error;
input->string = string;
input->position = 0;
input->length = strlen(string);
@ -36,4 +39,7 @@ TSInput ts_string_input_make(const char *string) {
.read_fn = ts_string_input_read,
.seek_fn = ts_string_input_seek,
};
error:
return (TSInput){NULL, NULL, NULL};
}

View file

@ -18,21 +18,29 @@ typedef struct {
size_t element_size;
} Vector;
static inline Vector vector_new(size_t element_size, size_t capacity) {
static inline Vector vector_new(size_t element_size) {
Vector result;
result.contents = NULL;
result.size = 0;
result.capacity = capacity;
result.capacity = 0;
result.element_size = element_size;
if (capacity > 0) {
result.contents = ts_calloc(capacity, element_size);
if (!result.contents)
result.element_size = 0;
}
return result;
}
static inline bool vector_grow(Vector *self, size_t capacity) {
void *new_contents;
if (self->contents)
new_contents = ts_realloc(self->contents, capacity * self->element_size);
else
new_contents = ts_calloc(capacity, self->element_size);
if (!new_contents)
return false;
self->capacity = capacity;
self->contents = new_contents;
return true;
}
static inline bool vector_valid(Vector *self) {
return self->element_size > 0;
}
@ -41,6 +49,8 @@ static inline void vector_delete(Vector *self) {
if (self->contents) {
ts_free(self->contents);
self->contents = NULL;
self->size = 0;
self->capacity = 0;
}
}
@ -54,6 +64,12 @@ static inline void *vector_back(Vector *self) {
return vector_get(self, self->size - 1);
}
static inline void *vector_pop(Vector *self) {
void *result = vector_back(self);
self->size--;
return result;
}
static inline void vector_clear(Vector *self) {
self->size = 0;
}