diff --git a/.clang_complete b/.clang_complete index 12c483f7..91ab8360 100644 --- a/.clang_complete +++ b/.clang_complete @@ -5,3 +5,4 @@ -Iexternals/utf8proc -Iexternals/json-parser -Iexternals/bandit +-Iexternals/crypto-algorithms diff --git a/.gitmodules b/.gitmodules index fa884d94..72fbdb67 100644 --- a/.gitmodules +++ b/.gitmodules @@ -10,3 +10,6 @@ [submodule "externals/json-parser"] path = externals/json-parser url = https://github.com/udp/json-parser.git +[submodule "externals/crypto-algorithms"] + path = externals/crypto-algorithms + url = https://github.com/B-Con/crypto-algorithms.git diff --git a/externals/crypto-algorithms b/externals/crypto-algorithms new file mode 160000 index 00000000..cfbde484 --- /dev/null +++ b/externals/crypto-algorithms @@ -0,0 +1 @@ +Subproject commit cfbde48414baacf51fc7c74f275190881f037d32 diff --git a/src/runtime/error_costs.c b/src/runtime/error_costs.c index ac055f45..b287cecd 100644 --- a/src/runtime/error_costs.c +++ b/src/runtime/error_costs.c @@ -2,6 +2,7 @@ static const unsigned MAX_COST_DIFFERENCE = 16 * ERROR_COST_PER_SKIPPED_TREE; static const unsigned MAX_PUSH_COUNT_WITH_COUNT_DIFFERENCE = 24; +static const unsigned MAX_DEPTH_TO_ALLOW_MULTIPLE = 12; ErrorComparison error_status_compare(ErrorStatus a, ErrorStatus b, bool are_mergeable) { if (a.count < b.count) { @@ -42,5 +43,14 @@ ErrorComparison error_status_compare(ErrorStatus a, ErrorStatus b, bool are_merg } } + if (a.count > 0) { + if (a.depth > MAX_DEPTH_TO_ALLOW_MULTIPLE || + b.depth > MAX_DEPTH_TO_ALLOW_MULTIPLE) { + return a.depth <= b.depth ? + ErrorComparisonTakeLeft : + ErrorComparisonTakeRight; + } + } + return ErrorComparisonNone; } diff --git a/src/runtime/error_costs.h b/src/runtime/error_costs.h index 968c2422..f65b9c93 100644 --- a/src/runtime/error_costs.h +++ b/src/runtime/error_costs.h @@ -16,6 +16,7 @@ typedef struct { unsigned count; unsigned cost; unsigned push_count; + unsigned depth; } ErrorStatus; typedef enum { diff --git a/src/runtime/parser.c b/src/runtime/parser.c index b77ec2b3..047fca9a 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -40,7 +40,9 @@ #define SYM_NAME(symbol) ts_language_symbol_name(self->language, symbol) -static const uint32_t MAX_STACK_VERSION_COUNT = 16; +static const uint32_t SOFT_MAX_VERSION_COUNT = 10; +static const uint32_t HARD_MAX_VERSION_COUNT = 18; +static const uint32_t MAX_PRECEDING_TREES_TO_SKIP = 32; typedef struct { Parser *parser; @@ -214,8 +216,8 @@ static CondenseResult parser__condense_stack(Parser *self) { } } - while (ts_stack_version_count(self->stack) > MAX_STACK_VERSION_COUNT) { - ts_stack_remove_version(self->stack, MAX_STACK_VERSION_COUNT); + while (ts_stack_version_count(self->stack) > SOFT_MAX_VERSION_COUNT) { + ts_stack_remove_version(self->stack, SOFT_MAX_VERSION_COUNT); result |= CondenseResultMadeChange; } @@ -476,8 +478,7 @@ static bool parser__better_version_exists(Parser *self, StackVersion version, return true; for (StackVersion i = 0, n = ts_stack_version_count(self->stack); i < n; i++) { - if (i == version || ts_stack_is_halted(self->stack, i)) - continue; + if (i == version || ts_stack_is_halted(self->stack, i)) continue; switch (error_status_compare(my_error_status, ts_stack_error_status(self->stack, i), @@ -487,7 +488,7 @@ static bool parser__better_version_exists(Parser *self, StackVersion version, ts_stack_halt(self->stack, i); break; case ErrorComparisonTakeRight: - return true; + if (i < version) return true; default: break; } @@ -570,8 +571,7 @@ static StackPopResult parser__reduce(Parser *self, StackVersion version, // delete the rest of the tree arrays. while (i + 1 < pop.slices.size) { StackSlice next_slice = pop.slices.contents[i + 1]; - if (next_slice.version != slice.version) - break; + if (next_slice.version != slice.version) break; i++; uint32_t child_count = next_slice.trees.size; @@ -609,8 +609,9 @@ static StackPopResult parser__reduce(Parser *self, StackVersion version, } ErrorStatus error_status = ts_stack_error_status(self->stack, other_version); - if (parser__better_version_exists(self, version, error_status)) + if (parser__better_version_exists(self, version, error_status)) { ts_stack_remove_version(self->stack, other_version); + } } // Push the parent node onto the stack, along with any extra tokens that @@ -962,6 +963,7 @@ static bool parser__do_potential_reductions(Parser *self, StackVersion version) static StackIterateAction parser__skip_preceding_trees_callback( void *payload, TSStateId state, const TreeArray *trees, uint32_t tree_count) { + if (trees->size > MAX_PRECEDING_TREES_TO_SKIP) return StackIterateStop; if (tree_count > 0 && state != ERROR_STATE) { uint32_t bytes_skipped = 0; for (uint32_t i = 0; i < trees->size; i++) { @@ -1085,9 +1087,18 @@ static void parser__recover(Parser *self, StackVersion version, TSStateId state, } LOG("recover state:%u", state); - StackVersion new_version = ts_stack_copy_version(self->stack, version); - bool can_be_extra = ts_language_symbol_metadata(self->language, lookahead->symbol).extra; - parser__shift(self, new_version, ERROR_STATE, lookahead, can_be_extra); + + if (ts_stack_version_count(self->stack) < HARD_MAX_VERSION_COUNT) { + StackVersion new_version = ts_stack_copy_version(self->stack, version); + bool can_be_extra = ts_language_symbol_metadata(self->language, lookahead->symbol).extra; + parser__shift(self, new_version, ERROR_STATE, lookahead, can_be_extra); + + ErrorStatus error_status = ts_stack_error_status(self->stack, new_version); + if (parser__better_version_exists(self, version, error_status)) { + ts_stack_remove_version(self->stack, new_version); + } + } + parser__shift(self, version, state, lookahead, false); } diff --git a/src/runtime/stack.c b/src/runtime/stack.c index d244fab0..6bb59095 100644 --- a/src/runtime/stack.c +++ b/src/runtime/stack.c @@ -81,22 +81,31 @@ static void stack_node_retain(StackNode *self) { } static void stack_node_release(StackNode *self, StackNodeArray *pool) { - if (!self) return; +recur: assert(self->ref_count != 0); self->ref_count--; - if (self->ref_count == 0) { - for (int i = 0; i < self->link_count; i++) { - if (self->links[i].tree) { - ts_tree_release(self->links[i].tree); - } + if (self->ref_count > 0) return; + + StackNode *last_predecessor = NULL; + if (self->link_count > 0) { + unsigned i = 0; + for (; i < self->link_count - 1; i++) { + if (self->links[i].tree) ts_tree_release(self->links[i].tree); stack_node_release(self->links[i].node, pool); } + if (self->links[i].tree) ts_tree_release(self->links[i].tree); + last_predecessor = self->links[i].node; + } - if (pool->size < MAX_NODE_POOL_SIZE) { - array_push(pool, self); - } else { - ts_free(self); - } + if (pool->size < MAX_NODE_POOL_SIZE) { + array_push(pool, self); + } else { + ts_free(self); + } + + if (last_predecessor) { + self = last_predecessor; + goto recur; } } @@ -294,8 +303,9 @@ inline StackPopResult stack__iter(Stack *self, StackVersion version, if (!link.tree->extra) { next_iterator->tree_count++; next_iterator->depth--; - if (!link.is_pending) + if (!link.is_pending) { next_iterator->is_pending = false; + } } array_push(&next_iterator->trees, link.tree); ts_tree_retain(link.tree); @@ -384,6 +394,7 @@ ErrorStatus ts_stack_error_status(const Stack *self, StackVersion version) { .cost = head->node->error_cost, .count = head->node->error_count, .push_count = head->push_count, + .depth = head->depth, }; } @@ -551,9 +562,8 @@ void ts_stack_force_merge(Stack *self, StackVersion version1, StackVersion versi for (uint32_t i = 0; i < head2->node->link_count; i++) { stack_node_add_link(head1->node, head2->node->links[i]); } - if (head2->push_count > head1->push_count) { - head1->push_count = head2->push_count; - } + if (head2->push_count > head1->push_count) head1->push_count = head2->push_count; + if (head2->depth > head1->depth) head1->depth = head2->depth; ts_stack_remove_version(self, version2); } diff --git a/test/helpers/tree_helpers.cc b/test/helpers/tree_helpers.cc index 2d8efb1b..8b6b6623 100644 --- a/test/helpers/tree_helpers.cc +++ b/test/helpers/tree_helpers.cc @@ -1,4 +1,6 @@ +#include "bandit/bandit.h" #include "helpers/tree_helpers.h" +#include "helpers/point_helpers.h" #include "runtime/document.h" #include "runtime/node.h" #include @@ -48,3 +50,42 @@ bool operator==(const std::vector &vec, const TreeArray &array) { return false; return true; } + +void assert_consistent_tree_sizes(TSNode node) { + size_t child_count = ts_node_child_count(node); + size_t start_byte = ts_node_start_byte(node); + size_t end_byte = ts_node_end_byte(node); + TSPoint start_point = ts_node_start_point(node); + TSPoint end_point = ts_node_end_point(node); + bool some_child_has_changes = false; + + AssertThat(start_byte, !IsGreaterThan(end_byte)); + AssertThat(start_point, !IsGreaterThan(end_point)); + + size_t last_child_end_byte = start_byte; + TSPoint last_child_end_point = start_point; + + for (size_t i = 0; i < child_count; i++) { + TSNode child = ts_node_child(node, i); + size_t child_start_byte = ts_node_start_byte(child); + TSPoint child_start_point = ts_node_start_point(child); + + AssertThat(child_start_byte, !IsLessThan(last_child_end_byte)); + AssertThat(child_start_point, !IsLessThan(last_child_end_point)); + assert_consistent_tree_sizes(child); + if (ts_node_has_changes(child)) + some_child_has_changes = true; + + last_child_end_byte = ts_node_end_byte(child); + last_child_end_point = ts_node_end_point(child); + } + + if (child_count > 0) { + AssertThat(end_byte, !IsLessThan(last_child_end_byte)); + AssertThat(end_point, !IsLessThan(last_child_end_point)); + } + + if (some_child_has_changes) { + AssertThat(ts_node_has_changes(node), IsTrue()); + } +} diff --git a/test/helpers/tree_helpers.h b/test/helpers/tree_helpers.h index e91a3c8f..19ae2c70 100644 --- a/test/helpers/tree_helpers.h +++ b/test/helpers/tree_helpers.h @@ -13,4 +13,6 @@ std::ostream &operator<<(std::ostream &stream, const TSNode &node); bool operator==(const TSNode &left, const TSNode &right); bool operator==(const std::vector &right, const TreeArray &array); +void assert_consistent_tree_sizes(TSNode node); + #endif // HELPERS_TREE_HELPERS_H_ diff --git a/test/integration/fuzzing-examples.cc b/test/integration/fuzzing-examples.cc new file mode 100644 index 00000000..5c76efb9 --- /dev/null +++ b/test/integration/fuzzing-examples.cc @@ -0,0 +1,60 @@ +#include "test_helper.h" +#include "base64.c" +#include "helpers/load_language.h" +#include "helpers/tree_helpers.h" +#include "helpers/record_alloc.h" + +START_TEST + +vector> examples({ + { + "javascript", + "Bi0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLXGK0i0vLS0tLS0tLS0tLS0tLS0tLS0tLS0tLXGK0i0vLS0tLS0tLS0tLS0tLS0xLS0tLTYtLfpZAA==" + }, +}); + +describe("examples found via fuzzing", [&]() { + before_each([&]() { + record_alloc::start(); + }); + + after_each([&]() { + AssertThat(record_alloc::outstanding_allocation_indices(), IsEmpty()); + }); + + for (unsigned i = 0, n = examples.size(); i < n; i++) { + + it(("parses example number " + to_string(i)).c_str(), [&]() { + TSDocument *document = ts_document_new(); + // ts_document_print_debugging_graphs(document, true); + + const string &language_name = examples[i].first; + ts_document_set_language(document, load_real_language(language_name)); + + string input; + const string &base64_input = examples[i].second; + input.resize(base64_input.size()); + input.resize(base64_decode( + reinterpret_cast(base64_input.c_str()), + reinterpret_cast(&input[0]), + base64_input.size() + )); + + ts_document_set_input_string_with_length( + document, + input.c_str(), + input.size() + ); + + ts_document_parse(document); + + TSNode node = ts_document_root_node(document); + assert_consistent_tree_sizes(node); + + ts_document_free(document); + }); + + } +}); + +END_TEST diff --git a/test/integration/real_grammars.cc b/test/integration/real_grammars.cc index d89c97fd..dc8f82dd 100644 --- a/test/integration/real_grammars.cc +++ b/test/integration/real_grammars.cc @@ -9,47 +9,9 @@ #include "helpers/record_alloc.h" #include "helpers/random_helpers.h" #include "helpers/scope_sequence.h" +#include "helpers/tree_helpers.h" #include -static void assert_consistent_sizes(TSNode node) { - size_t child_count = ts_node_child_count(node); - size_t start_byte = ts_node_start_byte(node); - size_t end_byte = ts_node_end_byte(node); - TSPoint start_point = ts_node_start_point(node); - TSPoint end_point = ts_node_end_point(node); - bool some_child_has_changes = false; - - AssertThat(start_byte, !IsGreaterThan(end_byte)); - AssertThat(start_point, !IsGreaterThan(end_point)); - - size_t last_child_end_byte = start_byte; - TSPoint last_child_end_point = start_point; - - for (size_t i = 0; i < child_count; i++) { - TSNode child = ts_node_child(node, i); - size_t child_start_byte = ts_node_start_byte(child); - TSPoint child_start_point = ts_node_start_point(child); - - AssertThat(child_start_byte, !IsLessThan(last_child_end_byte)); - AssertThat(child_start_point, !IsLessThan(last_child_end_point)); - assert_consistent_sizes(child); - if (ts_node_has_changes(child)) - some_child_has_changes = true; - - last_child_end_byte = ts_node_end_byte(child); - last_child_end_point = ts_node_end_point(child); - } - - if (child_count > 0) { - AssertThat(end_byte, !IsLessThan(last_child_end_byte)); - AssertThat(end_point, !IsLessThan(last_child_end_point)); - } - - if (some_child_has_changes) { - AssertThat(ts_node_has_changes(node), IsTrue()); - } -} - static void assert_correct_tree_size(TSDocument *document, string content) { TSNode root_node = ts_document_root_node(document); size_t expected_size = content.size(); @@ -65,7 +27,7 @@ static void assert_correct_tree_size(TSDocument *document, string content) { expected_size = content.find_last_not_of("\n ") + 1; AssertThat(ts_node_end_byte(root_node), Equals(expected_size)); - assert_consistent_sizes(root_node); + assert_consistent_tree_sizes(root_node); } START_TEST diff --git a/test/runtime/stack_test.cc b/test/runtime/stack_test.cc index 03bf7091..8d1d47f5 100644 --- a/test/runtime/stack_test.cc +++ b/test/runtime/stack_test.cc @@ -390,7 +390,7 @@ describe("Stack", [&]() { StackSlice slice2 = pop.slices.contents[1]; AssertThat(slice2.version, Equals(1)); - AssertThat(slice2.trees, Equals(vector({ trees[4], trees[5], trees[6], trees[10] }))) + AssertThat(slice2.trees, Equals(vector({ trees[4], trees[5], trees[6], trees[10] }))); AssertThat(ts_stack_version_count(stack), Equals(2)); AssertThat(ts_stack_top_state(stack, 0), Equals(stateI)); @@ -441,15 +441,15 @@ describe("Stack", [&]() { StackSlice slice1 = pop.slices.contents[0]; AssertThat(slice1.version, Equals(1)); - AssertThat(slice1.trees, Equals(vector({ trees[3], trees[10] }))) + AssertThat(slice1.trees, Equals(vector({ trees[3], trees[10] }))); StackSlice slice2 = pop.slices.contents[1]; AssertThat(slice2.version, Equals(2)); - AssertThat(slice2.trees, Equals(vector({ trees[6], trees[10] }))) + AssertThat(slice2.trees, Equals(vector({ trees[6], trees[10] }))); StackSlice slice3 = pop.slices.contents[2]; AssertThat(slice3.version, Equals(3)); - AssertThat(slice3.trees, Equals(vector({ trees[9], trees[10] }))) + AssertThat(slice3.trees, Equals(vector({ trees[9], trees[10] }))); AssertThat(ts_stack_version_count(stack), Equals(4)); AssertThat(ts_stack_top_state(stack, 0), Equals(stateI)); diff --git a/tests.gyp b/tests.gyp index dedde1ba..bede6dfe 100644 --- a/tests.gyp +++ b/tests.gyp @@ -12,6 +12,7 @@ 'test', 'externals/bandit', 'externals/utf8proc', + 'externals/crypto-algorithms', ], 'sources': [ 'test/tests.cc',