diff --git a/include/tree_sitter/runtime.h b/include/tree_sitter/runtime.h index 59e73cd4..18f55ed4 100644 --- a/include/tree_sitter/runtime.h +++ b/include/tree_sitter/runtime.h @@ -58,7 +58,7 @@ size_t ts_node_end_byte(TSNode); TSPoint ts_node_end_point(TSNode); TSSymbol ts_node_symbol(TSNode); const char *ts_node_name(TSNode, const TSDocument *); -const char *ts_node_string(TSNode, const TSDocument *); +char *ts_node_string(TSNode, const TSDocument *); bool ts_node_eq(TSNode, TSNode); bool ts_node_is_named(TSNode); bool ts_node_has_changes(TSNode); @@ -84,7 +84,7 @@ void ts_document_set_input_string(TSDocument *, const char *); TSDebugger ts_document_debugger(const TSDocument *); void ts_document_set_debugger(TSDocument *, TSDebugger); void ts_document_edit(TSDocument *, TSInputEdit); -void ts_document_parse(TSDocument *); +int ts_document_parse(TSDocument *); void ts_document_invalidate(TSDocument *); TSNode ts_document_root_node(const TSDocument *); size_t ts_document_parse_count(const TSDocument *); diff --git a/project.gyp b/project.gyp index f3624564..79e9aa27 100644 --- a/project.gyp +++ b/project.gyp @@ -142,6 +142,15 @@ 'GCC_OPTIMIZATION_LEVEL': '0', }, }, + 'Test': { + 'defines': ['TREE_SITTER_WRAP_MALLOC=true'], + 'cflags': [ '-g' ], + 'ldflags': [ '-g' ], + 'xcode_settings': { + 'OTHER_LDFLAGS': ['-g'], + 'GCC_OPTIMIZATION_LEVEL': '0', + }, + }, 'Release': { 'cflags': [ '-O2', '-fno-strict-aliasing' ], 'cflags!': [ '-O3', '-fstrict-aliasing' ], diff --git a/script/check-mallocs b/script/check-mallocs new file mode 100755 index 00000000..0bd064d0 --- /dev/null +++ b/script/check-mallocs @@ -0,0 +1,20 @@ +#!/usr/bin/env bash + +src_dir="src/runtime" + +allocation_functions=( + malloc + calloc + realloc + free +) + +for function in ${allocation_functions[@]}; do + usages=$(grep --line-number -E "\b${function}\(" -r "${src_dir}" --exclude alloc.h ) + + if [[ ! -z $usages ]]; then + echo "The ${function} function should not be called directly, but is called here:" + echo "$usages" + exit 1 + fi +done diff --git a/script/ci b/script/ci index 2aefb67a..70035525 100755 --- a/script/ci +++ b/script/ci @@ -3,4 +3,5 @@ set -e script/fetch-fixtures +script/check-mallocs script/test diff --git a/script/test b/script/test index 19b74f49..0655bbfb 100755 --- a/script/test +++ b/script/test @@ -27,7 +27,8 @@ profile= mode=normal args=() target=tests -cmd="out/Debug/${target}" +export BUILDTYPE=Test +cmd="out/${BUILDTYPE}/${target}" while getopts "df:s:ghpv" option; do case ${option} in @@ -56,7 +57,7 @@ while getopts "df:s:ghpv" option; do esac done -BUILDTYPE=Debug make $target +make $target args=${args:-""} if [[ -n $profile ]]; then diff --git a/spec/helpers/load_language.cc b/spec/helpers/load_language.cc index c58af0d9..6d623b03 100644 --- a/spec/helpers/load_language.cc +++ b/spec/helpers/load_language.cc @@ -78,6 +78,7 @@ const TSLanguage *load_language(const string &name, const string &code) { compiler_name, "-x", "c", "-fPIC", + "-g", "-I", header_dir.c_str(), "-c", source_filename.c_str(), "-o", obj_filename.c_str(), diff --git a/spec/helpers/record_alloc.cc b/spec/helpers/record_alloc.cc new file mode 100644 index 00000000..3e5a2961 --- /dev/null +++ b/spec/helpers/record_alloc.cc @@ -0,0 +1,95 @@ +#include +#include +#include +#include "bandit/bandit.h" + +using std::map; +using std::set; + +bool _enabled = false; +static size_t _allocation_count = 0; +static map _outstanding_allocations; +static size_t _allocation_failure_index = -1; + +namespace record_alloc { + +void start() { + _enabled = true; + _allocation_count = 0; + _outstanding_allocations.clear(); + _allocation_failure_index = -1; +} + +void stop() { + _enabled = false; +} + +void fail_at_allocation_index(size_t failure_index) { + _allocation_failure_index = failure_index; +} + +set outstanding_allocation_indices() { + set result; + for (const auto &entry : _outstanding_allocations) { + result.insert(entry.second); + } + return result; +} + +size_t allocation_count() { + return _allocation_count; +} + +} // namespace record_alloc + +static void *record_allocation(void *result) { + if (!_enabled) + return result; + + if (_allocation_count > _allocation_failure_index) { + free(result); + Assert::Failure("Allocated after a previous allocation failed!"); + } + + if (_allocation_count == _allocation_failure_index) { + _allocation_count++; + free(result); + return nullptr; + } + + _outstanding_allocations[result] = _allocation_count; + _allocation_count++; + return result; +} + +static void record_deallocation(void *pointer) { + if (!_enabled) + return; + + auto entry = _outstanding_allocations.find(pointer); + if (entry != _outstanding_allocations.end()) { + _outstanding_allocations.erase(entry); + } +} + +extern "C" { + +void *ts_record_malloc(size_t size) { + return record_allocation(malloc(size)); +} + +void *ts_record_realloc(void *pointer, size_t size) { + record_deallocation(pointer); + return record_allocation(realloc(pointer, size)); +} + +void *ts_record_calloc(size_t count, size_t size) { + return record_allocation(calloc(count, size)); +} + +void ts_record_free(void *pointer) { + free(pointer); + record_deallocation(pointer); +} + +} diff --git a/spec/helpers/record_alloc.h b/spec/helpers/record_alloc.h new file mode 100644 index 00000000..50cd62ad --- /dev/null +++ b/spec/helpers/record_alloc.h @@ -0,0 +1,16 @@ +#ifndef HELPERS_RECORD_ALLOC_H_ +#define HELPERS_RECORD_ALLOC_H_ + +#include + +namespace record_alloc { + +void start(); +void stop(); +void fail_at_allocation_index(size_t failure_index); +std::set outstanding_allocation_indices(); +size_t allocation_count(); + +} // namespace record_alloc + +#endif // HELPERS_RECORD_ALLOC_H_ diff --git a/spec/helpers/test_languages.cc b/spec/helpers/test_languages.cc index f8ac6010..86c04744 100644 --- a/spec/helpers/test_languages.cc +++ b/spec/helpers/test_languages.cc @@ -16,9 +16,9 @@ int libcompiler_mtime = -1; const char libcompiler_path[] = #if defined(__linux) - "out/Debug/obj.target/libcompiler.a" + "out/Test/obj.target/libcompiler.a" #else - "out/Debug/libcompiler.a" + "out/Test/libcompiler.a" #endif ; diff --git a/spec/runtime/parser_spec.cc b/spec/runtime/parser_spec.cc index 7da3484e..7b325dd9 100644 --- a/spec/runtime/parser_spec.cc +++ b/spec/runtime/parser_spec.cc @@ -2,6 +2,7 @@ #include "helpers/spy_input.h" #include "helpers/test_languages.h" #include "helpers/log_debugger.h" +#include "helpers/record_alloc.h" START_TEST @@ -14,11 +15,14 @@ describe("Parser", [&]() { before_each([&]() { chunk_size = 3; input = nullptr; + doc = ts_document_make(); }); after_each([&]() { - ts_document_free(doc); + if (doc) + ts_document_free(doc); + if (input) delete input; }); @@ -26,7 +30,7 @@ describe("Parser", [&]() { auto set_text = [&](const char *text) { input = new SpyInput(text, chunk_size); ts_document_set_input(doc, input->input()); - ts_document_parse(doc); + AssertThat(ts_document_parse(doc), Equals(0)); root = ts_document_root_node(doc); AssertThat(ts_node_end_byte(root), Equals(strlen(text))); @@ -421,6 +425,69 @@ describe("Parser", [&]() { AssertThat(ts_node_end_byte(root), Equals(strlen("'\u03A9\u03A9\u03A9 \u2014 \u0394\u0394';"))); }); }); + + describe("handling allocation failures", [&]() { + before_each([&]() { + record_alloc::start(); + }); + + after_each([&]() { + record_alloc::stop(); + }); + + it("handles failures when allocating documents", [&]() { + TSDocument *document = ts_document_make(); + ts_document_free(document); + AssertThat(record_alloc::outstanding_allocation_indices(), IsEmpty()); + + size_t allocation_count = record_alloc::allocation_count(); + AssertThat(allocation_count, IsGreaterThan(1)); + + for (size_t i = 0; i < allocation_count; i++) { + record_alloc::start(); + record_alloc::fail_at_allocation_index(i); + AssertThat(ts_document_make(), Equals(nullptr)); + AssertThat(record_alloc::outstanding_allocation_indices(), IsEmpty()); + } + }); + + it("handles allocation failures during parsing", [&]() { + ts_document_set_language(doc, get_test_language("cpp")); + + set_text("int main() { return vector().size(); }"); + + size_t allocation_count = record_alloc::allocation_count(); + AssertThat(allocation_count, IsGreaterThan(1)); + + char *node_string = ts_node_string(root, doc); + AssertThat(node_string, Equals("(translation_unit (function_definition " + "(identifier) " + "(function_declarator (identifier)) " + "(compound_statement " + "(return_statement (call_expression (field_expression " + "(call_expression (template_call " + "(identifier) " + "(type_name (identifier) (abstract_pointer_declarator)))) " + "(identifier)))))))")); + + for (size_t i = 0; i < allocation_count; i++) { + record_alloc::start(); + record_alloc::fail_at_allocation_index(i); + ts_document_invalidate(doc); + AssertThat(ts_document_parse(doc), Equals(-1)); + } + + record_alloc::start(); + record_alloc::fail_at_allocation_index(allocation_count + 1); + ts_document_invalidate(doc); + AssertThat(ts_document_parse(doc), Equals(0)); + + char *node_string2 = ts_node_string(ts_document_root_node(doc), doc); + AssertThat(string(node_string2), Equals(node_string)); + free(node_string2); + free(node_string); + }); + }); }); END_TEST diff --git a/spec/runtime/stack_spec.cc b/spec/runtime/stack_spec.cc index 7d96facd..3ed40bc6 100644 --- a/spec/runtime/stack_spec.cc +++ b/spec/runtime/stack_spec.cc @@ -214,10 +214,8 @@ describe("Stack", [&]() { * A0__B1__C2__D3__G6. * \__E4__F5__/ */ - bool merged = ts_stack_push(stack, 0, stateG, trees[6]); - AssertThat(merged, IsFalse()); - merged = ts_stack_push(stack, 1, stateG, trees[6]); - AssertThat(merged, IsTrue()); + AssertThat(ts_stack_push(stack, 0, stateG, trees[6]), Equals(StackPushResultContinued)); + AssertThat(ts_stack_push(stack, 1, stateG, trees[6]), Equals(StackPushResultMerged)); AssertThat(ts_stack_head_count(stack), Equals(1)); const StackEntry *entry1 = ts_stack_head(stack, 0); @@ -239,10 +237,8 @@ describe("Stack", [&]() { * A0__B1__C2__D3__G(6|7) * \__E4__F5____/ */ - bool merged = ts_stack_push(stack, 0, stateG, trees[6]); - AssertThat(merged, IsFalse()); - merged = ts_stack_push(stack, 1, stateG, trees[7]); - AssertThat(merged, IsTrue()); + AssertThat(ts_stack_push(stack, 0, stateG, trees[6]), Equals(StackPushResultContinued)); + AssertThat(ts_stack_push(stack, 1, stateG, trees[7]), Equals(StackPushResultMerged)); AssertThat(ts_stack_head_count(stack), Equals(1)); AssertThat(tree_selection_spy.call_count, Equals(1)); @@ -258,19 +254,15 @@ describe("Stack", [&]() { * A0__B1__C2__D3__G6__H7. * \__E4__F5__G6. */ - bool merged = ts_stack_push(stack, 0, stateG, trees[6]); - AssertThat(merged, IsFalse()); - merged = ts_stack_push(stack, 0, stateH, trees[7]); - AssertThat(merged, IsFalse()); - merged = ts_stack_push(stack, 1, stateG, trees[6]); - AssertThat(merged, IsFalse()); + AssertThat(ts_stack_push(stack, 0, stateG, trees[6]), Equals(StackPushResultContinued)); + AssertThat(ts_stack_push(stack, 0, stateH, trees[7]), Equals(StackPushResultContinued)); + AssertThat(ts_stack_push(stack, 1, stateG, trees[6]), Equals(StackPushResultContinued)); /* * A0__B1__C2__D3__G6__H7. * \__E4__F5_/ */ - merged = ts_stack_push(stack, 1, stateH, trees[7]); - AssertThat(merged, IsTrue()); + AssertThat(ts_stack_push(stack, 1, stateH, trees[7]), Equals(StackPushResultMerged)); AssertThat(ts_stack_head_count(stack), Equals(1)); StackEntry *head = ts_stack_head(stack, 0); @@ -298,10 +290,8 @@ describe("Stack", [&]() { tree_selection_spy.tree_to_return = parent; tree_selection_spy.call_count = 0; - bool merged = ts_stack_push(stack, 1, stateB, trees[2]); - AssertThat(merged, IsFalse()); - merged = ts_stack_push(stack, 1, stateC, trees[3]); - AssertThat(merged, IsTrue()); + AssertThat(ts_stack_push(stack, 1, stateB, trees[2]), Equals(StackPushResultContinued)); + AssertThat(ts_stack_push(stack, 1, stateC, trees[3]), Equals(StackPushResultMerged)); AssertThat(tree_selection_spy.call_count, Equals(1)); AssertThat(ts_stack_head_count(stack), Equals(1)); @@ -368,8 +358,7 @@ describe("Stack", [&]() { * A0__B1__C2__D3__G6__H7. * \__E4__F5__/ */ - bool merged = ts_stack_push(stack, 0, stateH, trees[7]); - AssertThat(merged, IsFalse()); + AssertThat(ts_stack_push(stack, 0, stateH, trees[7]), Equals(StackPushResultContinued)); AssertThat(ts_stack_head_count(stack), Equals(1)); /* diff --git a/src/runtime/alloc.h b/src/runtime/alloc.h new file mode 100644 index 00000000..6aee72a1 --- /dev/null +++ b/src/runtime/alloc.h @@ -0,0 +1,57 @@ +#ifndef RUNTIME_ALLOC_H_ +#define RUNTIME_ALLOC_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#if defined(TREE_SITTER_WRAP_MALLOC) + +void *ts_record_malloc(size_t); +void *ts_record_calloc(size_t, size_t); +void *ts_record_realloc(void *, size_t); +void ts_record_free(void *); + +static inline void *ts_malloc(size_t size) { + return ts_record_malloc(size); +} + +static inline void *ts_calloc(size_t count, size_t size) { + return ts_record_calloc(count, size); +} + +static inline void *ts_realloc(void *buffer, size_t size) { + return ts_record_realloc(buffer, size); +} + +static inline void ts_free(void *buffer) { + return ts_record_free(buffer); +} + +#else + +#include + +static inline void *ts_malloc(size_t size) { + return malloc(size); +} + +static inline void *ts_calloc(size_t count, size_t size) { + return calloc(count, size); +} + +static inline void *ts_realloc(void *buffer, size_t size) { + return realloc(buffer, size); +} + +static inline void ts_free(void *buffer) { + return free(buffer); +} + +#endif + +#ifdef __cplusplus +} +#endif + +#endif // RUNTIME_ALLOC_H_ diff --git a/src/runtime/document.c b/src/runtime/document.c index 40b509ec..b5416e56 100644 --- a/src/runtime/document.c +++ b/src/runtime/document.c @@ -1,4 +1,5 @@ #include "tree_sitter/parser.h" +#include "runtime/alloc.h" #include "runtime/node.h" #include "runtime/tree.h" #include "runtime/parser.h" @@ -6,16 +7,23 @@ #include "runtime/document.h" TSDocument *ts_document_make() { - TSDocument *document = calloc(sizeof(TSDocument), 1); - document->parser = ts_parser_make(); - return document; + TSDocument *self = ts_calloc(1, sizeof(TSDocument)); + if (!self) + return NULL; + + if (!ts_parser_init(&self->parser)) { + ts_free(self); + return NULL; + } + + return self; } void ts_document_free(TSDocument *self) { ts_parser_destroy(&self->parser); if (self->tree) ts_tree_release(self->tree); - free(self); + ts_free(self); } const TSLanguage *ts_document_language(TSDocument *self) { @@ -62,21 +70,25 @@ void ts_document_edit(TSDocument *self, TSInputEdit edit) { ts_tree_edit(self->tree, edit); } -void ts_document_parse(TSDocument *self) { +int ts_document_parse(TSDocument *self) { if (!self->input.read_fn || !self->parser.language) - return; + return 0; TSTree *reusable_tree = self->valid ? self->tree : NULL; if (reusable_tree && !reusable_tree->has_changes) - return; + return 0; TSTree *tree = ts_parser_parse(&self->parser, self->input, reusable_tree); + if (!tree) + return -1; + ts_tree_retain(tree); if (self->tree) ts_tree_release(self->tree); self->tree = tree; self->parse_count++; self->valid = true; + return 0; } void ts_document_invalidate(TSDocument *self) { diff --git a/src/runtime/lexer.c b/src/runtime/lexer.c index 1e8d6f3e..6dacfe0b 100644 --- a/src/runtime/lexer.c +++ b/src/runtime/lexer.c @@ -108,6 +108,9 @@ static TSTree *ts_lexer__accept(TSLexer *self, TSSymbol symbol, result = ts_tree_make_leaf(symbol, padding, size, metadata); } + if (!result) + return NULL; + if (fragile) result->lex_state = self->starting_state; diff --git a/src/runtime/node.c b/src/runtime/node.c index 1597cccd..519910a0 100644 --- a/src/runtime/node.c +++ b/src/runtime/node.c @@ -206,7 +206,7 @@ const char *ts_node_name(TSNode self, const TSDocument *document) { return document->parser.language->symbol_names[ts_node__tree(self)->symbol]; } -const char *ts_node_string(TSNode self, const TSDocument *document) { +char *ts_node_string(TSNode self, const TSDocument *document) { return ts_tree_string(ts_node__tree(self), document->parser.language->symbol_names, false); } diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 7ff35f25..e144837e 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -9,6 +9,7 @@ #include "runtime/length.h" #include "runtime/vector.h" #include "runtime/language.h" +#include "runtime/alloc.h" /* * Debugging @@ -31,21 +32,31 @@ typedef struct { bool is_verifying; } LookaheadState; +typedef enum { + UpdatedStackHead, + RemovedStackHead, + FailedToUpdateStackHead, +} ParseActionResult; + /* * Private */ -static void ts_parser__breakdown_top_of_stack(TSParser *self, int head) { +static ParseActionResult ts_parser__breakdown_top_of_stack(TSParser *self, int head) { TSTree *last_child = NULL; do { Vector pop_results = ts_stack_pop(self->stack, head, 1, false); + if (!vector_valid(&pop_results)) + return FailedToUpdateStackHead; + assert(pop_results.size > 0); /* * Since only one entry (not counting extra trees) is being popped from the * stack, there should only be one possible array of removed trees. */ StackPopResult *first_result = vector_get(&pop_results, 0); + assert(first_result->tree_count > 0); TSTree **removed_trees = first_result->trees; TSTree *parent = removed_trees[0]; LOG("breakdown_pop sym:%s, size:%lu", SYM_NAME(parent->symbol), @@ -56,7 +67,7 @@ static void ts_parser__breakdown_top_of_stack(TSParser *self, int head) { assert(pop_result->trees == removed_trees); int head_index = pop_result->head_index; - bool merged = true; + StackPushResult last_push = StackPushResultContinued; TSStateId state = ts_stack_top_state(self->stack, head_index); for (size_t j = 0; j < parent->child_count; j++) { last_child = parent->children[j]; @@ -69,19 +80,32 @@ static void ts_parser__breakdown_top_of_stack(TSParser *self, int head) { LOG("breakdown_push sym:%s, size:%lu", SYM_NAME(last_child->symbol), ts_tree_total_size(last_child).chars); - merged = - ts_stack_push(self->stack, pop_result->head_index, state, last_child); + + last_push = ts_stack_push(self->stack, head_index, state, last_child); + if (last_push == StackPushResultFailed) + goto error; } - for (size_t j = 1, count = pop_result->tree_count; j < count; j++) - merged = ts_stack_push(self->stack, pop_result->head_index, state, - pop_result->trees[j]); + for (size_t j = 1, count = pop_result->tree_count; j < count; j++) { + TSTree *tree = pop_result->trees[j]; + last_push = ts_stack_push(self->stack, head_index, state, tree); + if (last_push == StackPushResultFailed) + goto error; + } - assert((i == 0) ^ merged); + if (i == 0) + assert(last_push != StackPushResultMerged); + else + assert(last_push == StackPushResultMerged); } - free(removed_trees); + ts_free(removed_trees); } while (last_child && last_child->child_count > 0); + + return UpdatedStackHead; + +error: + return FailedToUpdateStackHead; } static void ts_parser__pop_reusable_subtree(LookaheadState *state); @@ -243,33 +267,43 @@ static TSTree *ts_parser__select_tree(void *data, TSTree *left, TSTree *right) { * Parse Actions */ -static bool ts_parser__shift(TSParser *self, int head, TSStateId parse_state, - TSTree *lookahead) { - if (ts_stack_push(self->stack, head, parse_state, lookahead)) { - LOG("merge head:%d", head); - vector_erase(&self->lookahead_states, head); - return false; - } else { - return true; +static ParseActionResult ts_parser__shift(TSParser *self, int head, + TSStateId parse_state, + TSTree *lookahead) { + switch (ts_stack_push(self->stack, head, parse_state, lookahead)) { + case StackPushResultFailed: + return FailedToUpdateStackHead; + case StackPushResultMerged: + LOG("merge head:%d", head); + vector_erase(&self->lookahead_states, head); + return RemovedStackHead; + case StackPushResultContinued: + return UpdatedStackHead; } } -static bool ts_parser__shift_extra(TSParser *self, int head, TSStateId state, +static ParseActionResult ts_parser__shift_extra(TSParser *self, int head, TSStateId state, TSTree *lookahead) { TSSymbolMetadata metadata = self->language->symbol_metadata[lookahead->symbol]; - if (metadata.structural && ts_stack_head_count(self->stack) > 1) + if (metadata.structural && ts_stack_head_count(self->stack) > 1) { lookahead = ts_tree_make_copy(lookahead); + if (!lookahead) + return FailedToUpdateStackHead; + } + lookahead->extra = true; return ts_parser__shift(self, head, state, lookahead); } -static bool ts_parser__reduce(TSParser *self, int head, TSSymbol symbol, +static ParseActionResult ts_parser__reduce(TSParser *self, int head, TSSymbol symbol, int child_count, bool extra, bool fragile, bool count_extra) { vector_clear(&self->reduce_parents); const TSSymbolMetadata *all_metadata = self->language->symbol_metadata; TSSymbolMetadata metadata = all_metadata[symbol]; Vector pop_results = ts_stack_pop(self->stack, head, child_count, count_extra); + if (!pop_results.element_size) + return FailedToUpdateStackHead; int last_head_index = -1; size_t removed_heads = 0; @@ -305,11 +339,13 @@ static bool ts_parser__reduce(TSParser *self, int head, TSSymbol symbol, break; } - parent = - ts_tree_make_node(symbol, pop_result->tree_count - trailing_extra_count, - pop_result->trees, metadata); + size_t child_count = pop_result->tree_count - trailing_extra_count; + parent = ts_tree_make_node(symbol, child_count, pop_result->trees, metadata); + if (!parent) + return FailedToUpdateStackHead; } - vector_push(&self->reduce_parents, &parent); + if (!vector_push(&self->reduce_parents, &parent)) + return FailedToUpdateStackHead; /* * If another path led to the same stack head, add this new parent tree @@ -335,8 +371,10 @@ static bool ts_parser__reduce(TSParser *self, int head, TSSymbol symbol, } LOG("split_during_reduce new_head:%d", new_head); - LookaheadState lookahead_state = *(LookaheadState *)vector_get(&self->lookahead_states, head); - vector_push(&self->lookahead_states, &lookahead_state); + LookaheadState lookahead_state = + *(LookaheadState *)vector_get(&self->lookahead_states, head); + if (!vector_push(&self->lookahead_states, &lookahead_state)) + return FailedToUpdateStackHead; } /* @@ -368,21 +406,31 @@ static bool ts_parser__reduce(TSParser *self, int head, TSSymbol symbol, * If the given state already existed at a different head of the stack, * then remove the lookahead state for the head. */ - if (ts_stack_push(self->stack, new_head, state, parent)) { - LOG("merge_during_reduce head:%d", new_head); - vector_erase(&self->lookahead_states, new_head); - removed_heads++; - continue; + switch (ts_stack_push(self->stack, new_head, state, parent)) { + case StackPushResultFailed: + return FailedToUpdateStackHead; + case StackPushResultMerged: + LOG("merge_during_reduce head:%d", new_head); + vector_erase(&self->lookahead_states, new_head); + removed_heads++; + continue; + case StackPushResultContinued: + break; } if (trailing_extra_count > 0) { for (size_t j = 0; j < trailing_extra_count; j++) { size_t index = pop_result->tree_count - trailing_extra_count + j; - if (ts_stack_push(self->stack, new_head, state, - pop_result->trees[index])) { - vector_erase(&self->lookahead_states, new_head); - removed_heads++; - continue; + TSTree *tree = pop_result->trees[index]; + switch (ts_stack_push(self->stack, new_head, state, tree)) { + case StackPushResultFailed: + return FailedToUpdateStackHead; + case StackPushResultMerged: + vector_erase(&self->lookahead_states, new_head); + removed_heads++; + continue; + case StackPushResultContinued: + break; } } } @@ -404,25 +452,34 @@ static bool ts_parser__reduce(TSParser *self, int head, TSSymbol symbol, } } - return removed_heads < revealed_heads; + if (removed_heads < revealed_heads) + return UpdatedStackHead; + else + return RemovedStackHead; } -static void ts_parser__reduce_error(TSParser *self, int head, +static ParseActionResult ts_parser__reduce_error(TSParser *self, int head, size_t child_count, TSTree *lookahead) { - bool result = ts_parser__reduce(self, head, ts_builtin_sym_error, child_count, - false, false, true); - if (result) { - TSTree **parent = vector_back(&self->reduce_parents); - StackEntry *stack_entry = ts_stack_head(self->stack, head); - stack_entry->position = - ts_length_add(stack_entry->position, lookahead->padding); - (*parent)->size = ts_length_add((*parent)->size, lookahead->padding); - (*parent)->fragile_left = (*parent)->fragile_right = true; - lookahead->padding = ts_length_zero(); + switch(ts_parser__reduce(self, head, ts_builtin_sym_error, child_count, + false, false, true)) { + case FailedToUpdateStackHead: + return FailedToUpdateStackHead; + case RemovedStackHead: + return RemovedStackHead; + case UpdatedStackHead: { + TSTree **parent = vector_back(&self->reduce_parents); + StackEntry *stack_entry = ts_stack_head(self->stack, head); + stack_entry->position = + ts_length_add(stack_entry->position, lookahead->padding); + (*parent)->size = ts_length_add((*parent)->size, lookahead->padding); + (*parent)->fragile_left = (*parent)->fragile_right = true; + lookahead->padding = ts_length_zero(); + return UpdatedStackHead; + } } } -static bool ts_parser__handle_error(TSParser *self, int head, TSTree *lookahead) { +static ParseActionResult ts_parser__handle_error(TSParser *self, int head, TSTree *lookahead) { size_t error_token_count = 1; StackEntry *entry_before_error = ts_stack_head(self->stack, head); @@ -448,7 +505,7 @@ static bool ts_parser__handle_error(TSParser *self, int head, TSTree *lookahead) LOG("recover state:%u, count:%lu", state_after_error, error_token_count + i); ts_parser__reduce_error(self, head, error_token_count + i, lookahead); - return true; + return UpdatedStackHead; } } @@ -461,9 +518,12 @@ static bool ts_parser__handle_error(TSParser *self, int head, TSTree *lookahead) * current lookahead token, advance to the next token. */ LOG("skip token:%s", SYM_NAME(lookahead->symbol)); - ts_parser__shift(self, head, ts_stack_top_state(self->stack, head), - lookahead); + TSStateId state = ts_stack_top_state(self->stack, head); + if (ts_parser__shift(self, head, state, lookahead) == FailedToUpdateStackHead) + return FailedToUpdateStackHead; lookahead = self->language->lex_fn(&self->lexer, 0, true); + if (!lookahead) + return FailedToUpdateStackHead; error_token_count++; /* @@ -472,12 +532,12 @@ static bool ts_parser__handle_error(TSParser *self, int head, TSTree *lookahead) if (lookahead->symbol == ts_builtin_sym_end) { LOG("fail_to_recover"); ts_parser__reduce_error(self, head, -1, lookahead); - return false; + return RemovedStackHead; } } } -static void ts_parser__start(TSParser *self, TSInput input, +static ParseActionResult ts_parser__start(TSParser *self, TSInput input, TSTree *previous_tree) { if (previous_tree) { LOG("parse_after_edit"); @@ -498,10 +558,13 @@ static void ts_parser__start(TSParser *self, TSInput input, vector_clear(&self->lookahead_states); vector_push(&self->lookahead_states, &lookahead_state); self->finished_tree = NULL; + return UpdatedStackHead; } -static void ts_parser__accept(TSParser *self, int head) { +static ParseActionResult ts_parser__accept(TSParser *self, int head) { Vector pop_results = ts_stack_pop(self->stack, head, -1, true); + if (!pop_results.size) + return FailedToUpdateStackHead; for (size_t j = 0; j < pop_results.size; j++) { StackPopResult *pop_result = vector_get(&pop_results, j); @@ -511,9 +574,12 @@ static void ts_parser__accept(TSParser *self, int head) { TSTree *root = pop_result->trees[i]; size_t leading_extra_count = i; size_t trailing_extra_count = pop_result->tree_count - 1 - i; - TSTree **new_children = malloc( - (root->child_count + leading_extra_count + trailing_extra_count) * + TSTree **new_children = ts_calloc( + root->child_count + leading_extra_count + trailing_extra_count, sizeof(TSTree *)); + if (!new_children) + return FailedToUpdateStackHead; + memcpy(new_children, pop_result->trees, leading_extra_count * sizeof(TSTree *)); memcpy(new_children + leading_extra_count, root->children, @@ -532,13 +598,16 @@ static void ts_parser__accept(TSParser *self, int head) { } } } + + return RemovedStackHead; } /* * Continue performing parse actions for the given head until the current * lookahead symbol is consumed. */ -static bool ts_parser__consume_lookahead(TSParser *self, int head, + +static ParseActionResult ts_parser__consume_lookahead(TSParser *self, int head, TSTree *lookahead) { for (;;) { TSStateId state = ts_stack_top_state(self->stack, head); @@ -577,20 +646,22 @@ static bool ts_parser__consume_lookahead(TSParser *self, int head, if (lookahead_state->is_verifying) { ts_parser__breakdown_top_of_stack(self, current_head); lookahead_state->is_verifying = false; - return false; + return RemovedStackHead; } if (ts_stack_head_count(self->stack) == 1) { - if (ts_parser__handle_error(self, current_head, lookahead)) { - return true; - } else { - ts_parser__accept(self, current_head); - return false; + switch (ts_parser__handle_error(self, current_head, lookahead)) { + case FailedToUpdateStackHead: + return FailedToUpdateStackHead; + case UpdatedStackHead: + return UpdatedStackHead; + case RemovedStackHead: + return ts_parser__accept(self, current_head); } } else { LOG("bail current_head:%d", current_head); ts_parser__remove_head(self, current_head); - return false; + return RemovedStackHead; } case TSParseActionTypeShift: @@ -600,8 +671,8 @@ static bool ts_parser__consume_lookahead(TSParser *self, int head, } else { LOG("shift state:%u", action.data.to_state); lookahead_state->is_verifying = (lookahead->child_count > 0); - return ts_parser__shift(self, current_head, action.data.to_state, - lookahead); + TSStateId state = action.data.to_state; + return ts_parser__shift(self, current_head, state, lookahead); } case TSParseActionTypeReduce: @@ -615,18 +686,24 @@ static bool ts_parser__consume_lookahead(TSParser *self, int head, LOG("reduce sym:%s, child_count:%u, fragile:%s", SYM_NAME(action.data.symbol), action.data.child_count, BOOL_STRING(action.fragile)); - if (!ts_parser__reduce(self, current_head, action.data.symbol, + switch (ts_parser__reduce(self, current_head, action.data.symbol, action.data.child_count, false, - action.fragile, false)) - if (current_head == head) - return false; + action.fragile, false)) { + case FailedToUpdateStackHead: + return FailedToUpdateStackHead; + case RemovedStackHead: + if (current_head == head) + return RemovedStackHead; + break; + case UpdatedStackHead: + break; + } } break; case TSParseActionTypeAccept: LOG("accept"); - ts_parser__accept(self, current_head); - return false; + return ts_parser__accept(self, current_head); } } } @@ -636,18 +713,35 @@ static bool ts_parser__consume_lookahead(TSParser *self, int head, * Public */ -TSParser ts_parser_make() { - return (TSParser){ - .lexer = ts_lexer_make(), - .stack = ts_stack_new(), - .lookahead_states = vector_new(sizeof(LookaheadState), 4), - .reduce_parents = vector_new(sizeof(TSTree *), 4), - .finished_tree = NULL, - }; +bool ts_parser_init(TSParser *self) { + self->finished_tree = NULL; + self->lexer = ts_lexer_make(); + + self->stack = ts_stack_new(); + if (!self->stack) { + return false; + } + + self->lookahead_states = vector_new(sizeof(LookaheadState), 4); + if (!self->lookahead_states.contents) { + ts_stack_delete(self->stack); + return false; + } + + self->reduce_parents = vector_new(sizeof(TSTree *), 4); + if (!self->reduce_parents.contents) { + ts_stack_delete(self->stack); + vector_delete(&self->lookahead_states); + return false; + } + + return true; } void ts_parser_destroy(TSParser *self) { ts_stack_delete(self->stack); + vector_delete(&self->lookahead_states); + vector_delete(&self->reduce_parents); } TSDebugger ts_parser_debugger(const TSParser *self) { @@ -669,7 +763,7 @@ TSTree *ts_parser_parse(TSParser *self, TSInput input, TSTree *previous_tree) { self->is_split = ts_stack_head_count(self->stack) > 1; for (int head = 0; head < ts_stack_head_count(self->stack);) { - for (;;) { + for (bool removed = false; !removed;) { last_position = position; position = ts_stack_top_position(self->stack, head); @@ -689,14 +783,24 @@ TSTree *ts_parser_parse(TSParser *self, TSInput input, TSTree *previous_tree) { ts_stack_top_state(self->stack, head), position.chars); if (position.chars != last_position.chars || - !ts_parser__can_reuse(self, head, lookahead)) + !ts_parser__can_reuse(self, head, lookahead)) { lookahead = ts_parser__get_next_lookahead(self, head); + if (!lookahead) + return NULL; + } LOG("lookahead sym:%s, size:%lu", SYM_NAME(lookahead->symbol), ts_tree_total_chars(lookahead)); - if (!ts_parser__consume_lookahead(self, head, lookahead)) - break; + switch (ts_parser__consume_lookahead(self, head, lookahead)) { + case FailedToUpdateStackHead: + return NULL; + case RemovedStackHead: + removed = true; + break; + case UpdatedStackHead: + break; + } } } diff --git a/src/runtime/parser.h b/src/runtime/parser.h index cc537090..6fc83acd 100644 --- a/src/runtime/parser.h +++ b/src/runtime/parser.h @@ -18,7 +18,7 @@ typedef struct { bool is_split; } TSParser; -TSParser ts_parser_make(); +bool ts_parser_init(TSParser *); void ts_parser_destroy(TSParser *); TSDebugger ts_parser_debugger(const TSParser *); void ts_parser_set_debugger(TSParser *, TSDebugger); diff --git a/src/runtime/stack.c b/src/runtime/stack.c index ce9c0794..a1b28244 100644 --- a/src/runtime/stack.c +++ b/src/runtime/stack.c @@ -1,4 +1,5 @@ #include "tree_sitter/parser.h" +#include "runtime/alloc.h" #include "runtime/tree.h" #include "runtime/vector.h" #include "runtime/stack.h" @@ -42,24 +43,46 @@ static TSTree *ts_stack__default_tree_selection(void *p, TSTree *t1, TSTree *t2) } Stack *ts_stack_new() { - Stack *self = malloc(sizeof(Stack)); - *self = (Stack){ - .heads = calloc(INITIAL_HEAD_CAPACITY, sizeof(StackNode *)), - .head_count = 1, - .head_capacity = INITIAL_HEAD_CAPACITY, - .tree_selection_payload = NULL, - .tree_selection_function = ts_stack__default_tree_selection, - .pop_results = vector_new(sizeof(StackPopResult), 4), - .pop_paths = vector_new(sizeof(PopPath), 4), - }; + Stack *self = ts_calloc(1, sizeof(Stack)); + if (!self) + goto error; + + self->head_count = 1; + self->head_capacity = INITIAL_HEAD_CAPACITY; + self->heads = ts_calloc(INITIAL_HEAD_CAPACITY, sizeof(StackNode *)); + if (!self->heads) + goto error; + + self->pop_results = vector_new(sizeof(StackPopResult), 4); + if (!vector_valid(&self->pop_results)) + goto error; + + self->pop_paths = vector_new(sizeof(PopPath), 4); + if (!vector_valid(&self->pop_paths)) + goto error; + + self->tree_selection_payload = NULL; + self->tree_selection_function = ts_stack__default_tree_selection; return self; + +error: + if (self) { + if (self->heads) + ts_free(self->heads); + if (self->pop_results.contents) + vector_delete(&self->pop_results); + if (self->pop_paths.contents) + vector_delete(&self->pop_paths); + ts_free(self); + } + return NULL; } void ts_stack_delete(Stack *self) { vector_delete(&self->pop_results); vector_delete(&self->pop_paths); - free(self->heads); - free(self); + ts_free(self->heads); + ts_free(self); } /* @@ -119,7 +142,7 @@ static bool stack_node_release(StackNode *self) { for (int i = 0; i < self->successor_count; i++) stack_node_release(self->successors[i]); ts_tree_release(self->entry.tree); - free(self); + ts_free(self); return true; } else { return false; @@ -127,8 +150,11 @@ static bool stack_node_release(StackNode *self) { } static StackNode *stack_node_new(StackNode *next, TSStateId state, TSTree *tree) { - StackNode *self = malloc(sizeof(StackNode)); assert(tree->ref_count > 0); + StackNode *self = ts_malloc(sizeof(StackNode)); + if (!self) + return NULL; + ts_tree_retain(tree); stack_node_retain(next); TSLength position = ts_tree_total_size(tree); @@ -179,7 +205,7 @@ static int ts_stack__add_head(Stack *self, StackNode *node) { if (self->head_count == self->head_capacity) { self->head_capacity += 3; self->heads = - realloc(self->heads, self->head_capacity * sizeof(StackNode *)); + ts_realloc(self->heads, self->head_capacity * sizeof(StackNode *)); } int new_index = self->head_count++; self->heads[new_index] = node; @@ -225,15 +251,24 @@ static bool ts_stack__merge_head(Stack *self, int head_index, TSStateId state, * Section: Mutating the stack (Public) */ -bool ts_stack_push(Stack *self, int head_index, TSStateId state, TSTree *tree) { +StackPushResult ts_stack_push(Stack *self, int head_index, TSStateId state, + TSTree *tree) { assert(head_index < self->head_count); + assert(tree); + TSLength position = ts_tree_total_size(tree); if (self->heads[head_index]) position = ts_length_add(self->heads[head_index]->entry.position, position); + if (ts_stack__merge_head(self, head_index, state, tree, position)) - return true; - self->heads[head_index] = stack_node_new(self->heads[head_index], state, tree); - return false; + return StackPushResultMerged; + + StackNode *new_head = stack_node_new(self->heads[head_index], state, tree); + if (!new_head) + return StackPushResultFailed; + + self->heads[head_index] = new_head; + return StackPushResultContinued; } void ts_stack_add_alternative(Stack *self, int head_index, TSTree *tree) { @@ -250,6 +285,9 @@ int ts_stack_split(Stack *self, int head_index) { Vector ts_stack_pop(Stack *self, int head_index, int child_count, bool count_extra) { + vector_clear(&self->pop_results); + vector_clear(&self->pop_paths); + StackNode *previous_head = self->heads[head_index]; int capacity = (child_count == -1) ? STARTING_TREE_CAPACITY : child_count; PopPath initial_path = { @@ -259,9 +297,11 @@ Vector ts_stack_pop(Stack *self, int head_index, int child_count, .is_shared = false, }; - vector_clear(&self->pop_results); - vector_clear(&self->pop_paths); - vector_push(&self->pop_paths, &initial_path); + if (!vector_valid(&initial_path.trees)) + goto error; + + if (!vector_push(&self->pop_paths, &initial_path)) + goto error; /* * Reduce along every possible path in parallel. Stop when the given number @@ -296,12 +336,15 @@ Vector ts_stack_pop(Stack *self, int head_index, int child_count, } ts_tree_retain(node->entry.tree); - vector_push(&path->trees, &node->entry.tree); + if (!vector_push(&path->trees, &node->entry.tree)) + goto error; path->node = path->node->successors[0]; PopPath path_copy = *path; for (int j = 1; j < node->successor_count; j++) { - vector_push(&self->pop_paths, &path_copy); + if (!vector_push(&self->pop_paths, &path_copy)) + goto error; + PopPath *next_path = vector_back(&self->pop_paths); next_path->node = node->successors[j]; next_path->is_shared = true; @@ -331,11 +374,15 @@ Vector ts_stack_pop(Stack *self, int head_index, int child_count, result.head_index = ts_stack__add_head(self, path->node); } - vector_push(&self->pop_results, &result); + if (!vector_push(&self->pop_results, &result)) + goto error; } stack_node_release(previous_head); return self->pop_results; + +error: + return vector_new(0, 0); } void ts_stack_shrink(Stack *self, int head_index, int count) { diff --git a/src/runtime/stack.h b/src/runtime/stack.h index 753916db..bffaacb6 100644 --- a/src/runtime/stack.h +++ b/src/runtime/stack.h @@ -22,6 +22,12 @@ typedef struct { int head_index; } StackPopResult; +typedef enum { + StackPushResultFailed, + StackPushResultMerged, + StackPushResultContinued, +} StackPushResult; + typedef TSTree *(*TreeSelectionFunction)(void *, TSTree *, TSTree *); /* @@ -73,11 +79,10 @@ int ts_stack_entry_next_count(const StackEntry *); StackEntry *ts_stack_entry_next(const StackEntry *, int); /* - * Push a (tree, state) pair onto the given head of the stack. Returns - * a boolean indicating whether the stack head was merged with an - * existing head. + * Push a (tree, state) pair onto the given head of the stack. This could cause + * the head to merge with an existing head. */ -bool ts_stack_push(Stack *, int head, TSStateId, TSTree *); +StackPushResult ts_stack_push(Stack *, int head, TSStateId, TSTree *); /* * Add an alternative tree for the given head of the stack. diff --git a/src/runtime/string_input.c b/src/runtime/string_input.c index 5f91e759..2ebd9c00 100644 --- a/src/runtime/string_input.c +++ b/src/runtime/string_input.c @@ -1,4 +1,5 @@ #include "runtime/string_input.h" +#include "runtime/alloc.h" #include typedef struct { @@ -26,7 +27,7 @@ int ts_string_input_seek(void *payload, size_t character, size_t byte) { } TSInput ts_string_input_make(const char *string) { - TSStringInput *input = malloc(sizeof(TSStringInput)); + TSStringInput *input = ts_malloc(sizeof(TSStringInput)); input->string = string; input->position = 0; input->length = strlen(string); diff --git a/src/runtime/tree.c b/src/runtime/tree.c index 1442c28f..52c022ed 100644 --- a/src/runtime/tree.c +++ b/src/runtime/tree.c @@ -4,6 +4,7 @@ #include #include #include "tree_sitter/parser.h" +#include "runtime/alloc.h" #include "runtime/tree.h" #include "runtime/length.h" @@ -12,7 +13,10 @@ TSStateId TS_TREE_STATE_ERROR = USHRT_MAX - 1; TSTree *ts_tree_make_leaf(TSSymbol sym, TSLength padding, TSLength size, TSSymbolMetadata metadata) { - TSTree *result = malloc(sizeof(TSTree)); + TSTree *result = ts_malloc(sizeof(TSTree)); + if (!result) + return NULL; + *result = (TSTree){ .ref_count = 1, .symbol = sym, @@ -41,12 +45,18 @@ TSTree *ts_tree_make_error(TSLength size, TSLength padding, char lookahead_char) (TSSymbolMetadata){ .visible = true, .named = true, }); + if (!result) + return NULL; + result->lookahead_char = lookahead_char; return result; } TSTree *ts_tree_make_copy(TSTree *self) { - TSTree *result = malloc(sizeof(TSTree)); + TSTree *result = ts_malloc(sizeof(TSTree)); + if (!result) + return NULL; + *result = *self; return result; } @@ -109,6 +119,9 @@ TSTree *ts_tree_make_node(TSSymbol symbol, size_t child_count, TSTree **children, TSSymbolMetadata metadata) { TSTree *result = ts_tree_make_leaf(symbol, ts_length_zero(), ts_length_zero(), metadata); + if (!result) + return NULL; + ts_tree_set_children(result, child_count, children); return result; } @@ -125,8 +138,8 @@ void ts_tree_release(TSTree *self) { for (size_t i = 0; i < self->child_count; i++) ts_tree_release(self->children[i]); if (self->child_count > 0) - free(self->children); - free(self); + ts_free(self->children); + ts_free(self); } } @@ -252,7 +265,7 @@ char *ts_tree_string(const TSTree *self, const char **symbol_names, static char SCRATCH[1]; size_t size = 1 + ts_tree__write_to_string(self, symbol_names, SCRATCH, 0, true, include_anonymous); - char *result = malloc(size * sizeof(char)); + char *result = ts_malloc(size * sizeof(char)); ts_tree__write_to_string(self, symbol_names, result, size, true, include_anonymous); return result; diff --git a/src/runtime/vector.h b/src/runtime/vector.h index 00019ca0..df187479 100644 --- a/src/runtime/vector.h +++ b/src/runtime/vector.h @@ -8,6 +8,7 @@ extern "C" { #include #include #include +#include "runtime/alloc.h" typedef struct { void *contents; @@ -18,15 +19,25 @@ typedef struct { static inline Vector vector_new(size_t element_size, size_t capacity) { Vector result; - result.contents = malloc(capacity * element_size); result.size = 0; result.capacity = capacity; result.element_size = element_size; + + if (capacity > 0) { + result.contents = ts_calloc(capacity, element_size); + if (!result.contents) + result.element_size = 0; + } + return result; } +static inline bool vector_valid(Vector *self) { + return self->element_size > 0; +} + static inline void vector_delete(Vector *self) { - free(self->contents); + ts_free(self->contents); } static inline void *vector_get(Vector *self, size_t index) { @@ -52,17 +63,20 @@ static inline void vector_erase(Vector *self, size_t index) { self->size--; } -static inline void vector_push(Vector *self, void *entry) { +static inline bool vector_push(Vector *self, void *entry) { if (self->size == self->capacity) { self->capacity += 4; - self->contents = - realloc(self->contents, self->capacity * self->element_size); + void *new_contents = ts_realloc(self->contents, self->capacity * self->element_size); + if (!new_contents) + return false; + self->contents = new_contents; } char *contents = (char *)self->contents; memcpy(contents + (self->size * self->element_size), (char *)entry, self->element_size); self->size++; + return true; } static inline void vector_reverse(Vector *self) { @@ -80,7 +94,7 @@ static inline void vector_reverse(Vector *self) { static inline Vector vector_copy(Vector *self) { Vector copy = *self; - copy.contents = memcpy(malloc(self->capacity * self->element_size), + copy.contents = memcpy(ts_calloc(self->capacity, self->element_size), self->contents, self->size * self->element_size); return copy; } diff --git a/tests.gyp b/tests.gyp index 31dfe462..4bfb8113 100644 --- a/tests.gyp +++ b/tests.gyp @@ -23,8 +23,8 @@ 'libraries': [ '-ldl' ], - 'default_configuration': 'Debug', - 'configurations': {'Debug': {}, 'Release': {}}, + 'default_configuration': 'Test', + 'configurations': {'Test': {}, 'Release': {}}, 'cflags': [ '-g', '-O0',