Merge pull request #85 from tree-sitter/fuzzing-examples
Fix more performance problems found by fuzzing
This commit is contained in:
commit
69500c9dd7
13 changed files with 174 additions and 71 deletions
|
|
@ -5,3 +5,4 @@
|
|||
-Iexternals/utf8proc
|
||||
-Iexternals/json-parser
|
||||
-Iexternals/bandit
|
||||
-Iexternals/crypto-algorithms
|
||||
|
|
|
|||
3
.gitmodules
vendored
3
.gitmodules
vendored
|
|
@ -10,3 +10,6 @@
|
|||
[submodule "externals/json-parser"]
|
||||
path = externals/json-parser
|
||||
url = https://github.com/udp/json-parser.git
|
||||
[submodule "externals/crypto-algorithms"]
|
||||
path = externals/crypto-algorithms
|
||||
url = https://github.com/B-Con/crypto-algorithms.git
|
||||
|
|
|
|||
1
externals/crypto-algorithms
vendored
Submodule
1
externals/crypto-algorithms
vendored
Submodule
|
|
@ -0,0 +1 @@
|
|||
Subproject commit cfbde48414baacf51fc7c74f275190881f037d32
|
||||
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
static const unsigned MAX_COST_DIFFERENCE = 16 * ERROR_COST_PER_SKIPPED_TREE;
|
||||
static const unsigned MAX_PUSH_COUNT_WITH_COUNT_DIFFERENCE = 24;
|
||||
static const unsigned MAX_DEPTH_TO_ALLOW_MULTIPLE = 12;
|
||||
|
||||
ErrorComparison error_status_compare(ErrorStatus a, ErrorStatus b, bool are_mergeable) {
|
||||
if (a.count < b.count) {
|
||||
|
|
@ -42,5 +43,14 @@ ErrorComparison error_status_compare(ErrorStatus a, ErrorStatus b, bool are_merg
|
|||
}
|
||||
}
|
||||
|
||||
if (a.count > 0) {
|
||||
if (a.depth > MAX_DEPTH_TO_ALLOW_MULTIPLE ||
|
||||
b.depth > MAX_DEPTH_TO_ALLOW_MULTIPLE) {
|
||||
return a.depth <= b.depth ?
|
||||
ErrorComparisonTakeLeft :
|
||||
ErrorComparisonTakeRight;
|
||||
}
|
||||
}
|
||||
|
||||
return ErrorComparisonNone;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ typedef struct {
|
|||
unsigned count;
|
||||
unsigned cost;
|
||||
unsigned push_count;
|
||||
unsigned depth;
|
||||
} ErrorStatus;
|
||||
|
||||
typedef enum {
|
||||
|
|
|
|||
|
|
@ -40,7 +40,9 @@
|
|||
|
||||
#define SYM_NAME(symbol) ts_language_symbol_name(self->language, symbol)
|
||||
|
||||
static const uint32_t MAX_STACK_VERSION_COUNT = 16;
|
||||
static const uint32_t SOFT_MAX_VERSION_COUNT = 10;
|
||||
static const uint32_t HARD_MAX_VERSION_COUNT = 18;
|
||||
static const uint32_t MAX_PRECEDING_TREES_TO_SKIP = 32;
|
||||
|
||||
typedef struct {
|
||||
Parser *parser;
|
||||
|
|
@ -214,8 +216,8 @@ static CondenseResult parser__condense_stack(Parser *self) {
|
|||
}
|
||||
}
|
||||
|
||||
while (ts_stack_version_count(self->stack) > MAX_STACK_VERSION_COUNT) {
|
||||
ts_stack_remove_version(self->stack, MAX_STACK_VERSION_COUNT);
|
||||
while (ts_stack_version_count(self->stack) > SOFT_MAX_VERSION_COUNT) {
|
||||
ts_stack_remove_version(self->stack, SOFT_MAX_VERSION_COUNT);
|
||||
result |= CondenseResultMadeChange;
|
||||
}
|
||||
|
||||
|
|
@ -476,8 +478,7 @@ static bool parser__better_version_exists(Parser *self, StackVersion version,
|
|||
return true;
|
||||
|
||||
for (StackVersion i = 0, n = ts_stack_version_count(self->stack); i < n; i++) {
|
||||
if (i == version || ts_stack_is_halted(self->stack, i))
|
||||
continue;
|
||||
if (i == version || ts_stack_is_halted(self->stack, i)) continue;
|
||||
|
||||
switch (error_status_compare(my_error_status,
|
||||
ts_stack_error_status(self->stack, i),
|
||||
|
|
@ -487,7 +488,7 @@ static bool parser__better_version_exists(Parser *self, StackVersion version,
|
|||
ts_stack_halt(self->stack, i);
|
||||
break;
|
||||
case ErrorComparisonTakeRight:
|
||||
return true;
|
||||
if (i < version) return true;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
@ -570,8 +571,7 @@ static StackPopResult parser__reduce(Parser *self, StackVersion version,
|
|||
// delete the rest of the tree arrays.
|
||||
while (i + 1 < pop.slices.size) {
|
||||
StackSlice next_slice = pop.slices.contents[i + 1];
|
||||
if (next_slice.version != slice.version)
|
||||
break;
|
||||
if (next_slice.version != slice.version) break;
|
||||
i++;
|
||||
|
||||
uint32_t child_count = next_slice.trees.size;
|
||||
|
|
@ -609,8 +609,9 @@ static StackPopResult parser__reduce(Parser *self, StackVersion version,
|
|||
}
|
||||
|
||||
ErrorStatus error_status = ts_stack_error_status(self->stack, other_version);
|
||||
if (parser__better_version_exists(self, version, error_status))
|
||||
if (parser__better_version_exists(self, version, error_status)) {
|
||||
ts_stack_remove_version(self->stack, other_version);
|
||||
}
|
||||
}
|
||||
|
||||
// Push the parent node onto the stack, along with any extra tokens that
|
||||
|
|
@ -962,6 +963,7 @@ static bool parser__do_potential_reductions(Parser *self, StackVersion version)
|
|||
|
||||
static StackIterateAction parser__skip_preceding_trees_callback(
|
||||
void *payload, TSStateId state, const TreeArray *trees, uint32_t tree_count) {
|
||||
if (trees->size > MAX_PRECEDING_TREES_TO_SKIP) return StackIterateStop;
|
||||
if (tree_count > 0 && state != ERROR_STATE) {
|
||||
uint32_t bytes_skipped = 0;
|
||||
for (uint32_t i = 0; i < trees->size; i++) {
|
||||
|
|
@ -1085,9 +1087,18 @@ static void parser__recover(Parser *self, StackVersion version, TSStateId state,
|
|||
}
|
||||
|
||||
LOG("recover state:%u", state);
|
||||
StackVersion new_version = ts_stack_copy_version(self->stack, version);
|
||||
bool can_be_extra = ts_language_symbol_metadata(self->language, lookahead->symbol).extra;
|
||||
parser__shift(self, new_version, ERROR_STATE, lookahead, can_be_extra);
|
||||
|
||||
if (ts_stack_version_count(self->stack) < HARD_MAX_VERSION_COUNT) {
|
||||
StackVersion new_version = ts_stack_copy_version(self->stack, version);
|
||||
bool can_be_extra = ts_language_symbol_metadata(self->language, lookahead->symbol).extra;
|
||||
parser__shift(self, new_version, ERROR_STATE, lookahead, can_be_extra);
|
||||
|
||||
ErrorStatus error_status = ts_stack_error_status(self->stack, new_version);
|
||||
if (parser__better_version_exists(self, version, error_status)) {
|
||||
ts_stack_remove_version(self->stack, new_version);
|
||||
}
|
||||
}
|
||||
|
||||
parser__shift(self, version, state, lookahead, false);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -81,22 +81,31 @@ static void stack_node_retain(StackNode *self) {
|
|||
}
|
||||
|
||||
static void stack_node_release(StackNode *self, StackNodeArray *pool) {
|
||||
if (!self) return;
|
||||
recur:
|
||||
assert(self->ref_count != 0);
|
||||
self->ref_count--;
|
||||
if (self->ref_count == 0) {
|
||||
for (int i = 0; i < self->link_count; i++) {
|
||||
if (self->links[i].tree) {
|
||||
ts_tree_release(self->links[i].tree);
|
||||
}
|
||||
if (self->ref_count > 0) return;
|
||||
|
||||
StackNode *last_predecessor = NULL;
|
||||
if (self->link_count > 0) {
|
||||
unsigned i = 0;
|
||||
for (; i < self->link_count - 1; i++) {
|
||||
if (self->links[i].tree) ts_tree_release(self->links[i].tree);
|
||||
stack_node_release(self->links[i].node, pool);
|
||||
}
|
||||
if (self->links[i].tree) ts_tree_release(self->links[i].tree);
|
||||
last_predecessor = self->links[i].node;
|
||||
}
|
||||
|
||||
if (pool->size < MAX_NODE_POOL_SIZE) {
|
||||
array_push(pool, self);
|
||||
} else {
|
||||
ts_free(self);
|
||||
}
|
||||
if (pool->size < MAX_NODE_POOL_SIZE) {
|
||||
array_push(pool, self);
|
||||
} else {
|
||||
ts_free(self);
|
||||
}
|
||||
|
||||
if (last_predecessor) {
|
||||
self = last_predecessor;
|
||||
goto recur;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -294,8 +303,9 @@ inline StackPopResult stack__iter(Stack *self, StackVersion version,
|
|||
if (!link.tree->extra) {
|
||||
next_iterator->tree_count++;
|
||||
next_iterator->depth--;
|
||||
if (!link.is_pending)
|
||||
if (!link.is_pending) {
|
||||
next_iterator->is_pending = false;
|
||||
}
|
||||
}
|
||||
array_push(&next_iterator->trees, link.tree);
|
||||
ts_tree_retain(link.tree);
|
||||
|
|
@ -384,6 +394,7 @@ ErrorStatus ts_stack_error_status(const Stack *self, StackVersion version) {
|
|||
.cost = head->node->error_cost,
|
||||
.count = head->node->error_count,
|
||||
.push_count = head->push_count,
|
||||
.depth = head->depth,
|
||||
};
|
||||
}
|
||||
|
||||
|
|
@ -551,9 +562,8 @@ void ts_stack_force_merge(Stack *self, StackVersion version1, StackVersion versi
|
|||
for (uint32_t i = 0; i < head2->node->link_count; i++) {
|
||||
stack_node_add_link(head1->node, head2->node->links[i]);
|
||||
}
|
||||
if (head2->push_count > head1->push_count) {
|
||||
head1->push_count = head2->push_count;
|
||||
}
|
||||
if (head2->push_count > head1->push_count) head1->push_count = head2->push_count;
|
||||
if (head2->depth > head1->depth) head1->depth = head2->depth;
|
||||
ts_stack_remove_version(self, version2);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,4 +1,6 @@
|
|||
#include "bandit/bandit.h"
|
||||
#include "helpers/tree_helpers.h"
|
||||
#include "helpers/point_helpers.h"
|
||||
#include "runtime/document.h"
|
||||
#include "runtime/node.h"
|
||||
#include <ostream>
|
||||
|
|
@ -48,3 +50,42 @@ bool operator==(const std::vector<Tree *> &vec, const TreeArray &array) {
|
|||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
void assert_consistent_tree_sizes(TSNode node) {
|
||||
size_t child_count = ts_node_child_count(node);
|
||||
size_t start_byte = ts_node_start_byte(node);
|
||||
size_t end_byte = ts_node_end_byte(node);
|
||||
TSPoint start_point = ts_node_start_point(node);
|
||||
TSPoint end_point = ts_node_end_point(node);
|
||||
bool some_child_has_changes = false;
|
||||
|
||||
AssertThat(start_byte, !IsGreaterThan(end_byte));
|
||||
AssertThat(start_point, !IsGreaterThan(end_point));
|
||||
|
||||
size_t last_child_end_byte = start_byte;
|
||||
TSPoint last_child_end_point = start_point;
|
||||
|
||||
for (size_t i = 0; i < child_count; i++) {
|
||||
TSNode child = ts_node_child(node, i);
|
||||
size_t child_start_byte = ts_node_start_byte(child);
|
||||
TSPoint child_start_point = ts_node_start_point(child);
|
||||
|
||||
AssertThat(child_start_byte, !IsLessThan(last_child_end_byte));
|
||||
AssertThat(child_start_point, !IsLessThan(last_child_end_point));
|
||||
assert_consistent_tree_sizes(child);
|
||||
if (ts_node_has_changes(child))
|
||||
some_child_has_changes = true;
|
||||
|
||||
last_child_end_byte = ts_node_end_byte(child);
|
||||
last_child_end_point = ts_node_end_point(child);
|
||||
}
|
||||
|
||||
if (child_count > 0) {
|
||||
AssertThat(end_byte, !IsLessThan(last_child_end_byte));
|
||||
AssertThat(end_point, !IsLessThan(last_child_end_point));
|
||||
}
|
||||
|
||||
if (some_child_has_changes) {
|
||||
AssertThat(ts_node_has_changes(node), IsTrue());
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -13,4 +13,6 @@ std::ostream &operator<<(std::ostream &stream, const TSNode &node);
|
|||
bool operator==(const TSNode &left, const TSNode &right);
|
||||
bool operator==(const std::vector<Tree *> &right, const TreeArray &array);
|
||||
|
||||
void assert_consistent_tree_sizes(TSNode node);
|
||||
|
||||
#endif // HELPERS_TREE_HELPERS_H_
|
||||
|
|
|
|||
60
test/integration/fuzzing-examples.cc
Normal file
60
test/integration/fuzzing-examples.cc
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
#include "test_helper.h"
|
||||
#include "base64.c"
|
||||
#include "helpers/load_language.h"
|
||||
#include "helpers/tree_helpers.h"
|
||||
#include "helpers/record_alloc.h"
|
||||
|
||||
START_TEST
|
||||
|
||||
vector<pair<string, string>> examples({
|
||||
{
|
||||
"javascript",
|
||||
"Bi0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLXGK0i0vLS0tLS0tLS0tLS0tLS0tLS0tLS0tLXGK0i0vLS0tLS0tLS0tLS0tLS0xLS0tLTYtLfpZAA=="
|
||||
},
|
||||
});
|
||||
|
||||
describe("examples found via fuzzing", [&]() {
|
||||
before_each([&]() {
|
||||
record_alloc::start();
|
||||
});
|
||||
|
||||
after_each([&]() {
|
||||
AssertThat(record_alloc::outstanding_allocation_indices(), IsEmpty());
|
||||
});
|
||||
|
||||
for (unsigned i = 0, n = examples.size(); i < n; i++) {
|
||||
|
||||
it(("parses example number " + to_string(i)).c_str(), [&]() {
|
||||
TSDocument *document = ts_document_new();
|
||||
// ts_document_print_debugging_graphs(document, true);
|
||||
|
||||
const string &language_name = examples[i].first;
|
||||
ts_document_set_language(document, load_real_language(language_name));
|
||||
|
||||
string input;
|
||||
const string &base64_input = examples[i].second;
|
||||
input.resize(base64_input.size());
|
||||
input.resize(base64_decode(
|
||||
reinterpret_cast<const unsigned char *>(base64_input.c_str()),
|
||||
reinterpret_cast<unsigned char *>(&input[0]),
|
||||
base64_input.size()
|
||||
));
|
||||
|
||||
ts_document_set_input_string_with_length(
|
||||
document,
|
||||
input.c_str(),
|
||||
input.size()
|
||||
);
|
||||
|
||||
ts_document_parse(document);
|
||||
|
||||
TSNode node = ts_document_root_node(document);
|
||||
assert_consistent_tree_sizes(node);
|
||||
|
||||
ts_document_free(document);
|
||||
});
|
||||
|
||||
}
|
||||
});
|
||||
|
||||
END_TEST
|
||||
|
|
@ -9,47 +9,9 @@
|
|||
#include "helpers/record_alloc.h"
|
||||
#include "helpers/random_helpers.h"
|
||||
#include "helpers/scope_sequence.h"
|
||||
#include "helpers/tree_helpers.h"
|
||||
#include <set>
|
||||
|
||||
static void assert_consistent_sizes(TSNode node) {
|
||||
size_t child_count = ts_node_child_count(node);
|
||||
size_t start_byte = ts_node_start_byte(node);
|
||||
size_t end_byte = ts_node_end_byte(node);
|
||||
TSPoint start_point = ts_node_start_point(node);
|
||||
TSPoint end_point = ts_node_end_point(node);
|
||||
bool some_child_has_changes = false;
|
||||
|
||||
AssertThat(start_byte, !IsGreaterThan(end_byte));
|
||||
AssertThat(start_point, !IsGreaterThan(end_point));
|
||||
|
||||
size_t last_child_end_byte = start_byte;
|
||||
TSPoint last_child_end_point = start_point;
|
||||
|
||||
for (size_t i = 0; i < child_count; i++) {
|
||||
TSNode child = ts_node_child(node, i);
|
||||
size_t child_start_byte = ts_node_start_byte(child);
|
||||
TSPoint child_start_point = ts_node_start_point(child);
|
||||
|
||||
AssertThat(child_start_byte, !IsLessThan(last_child_end_byte));
|
||||
AssertThat(child_start_point, !IsLessThan(last_child_end_point));
|
||||
assert_consistent_sizes(child);
|
||||
if (ts_node_has_changes(child))
|
||||
some_child_has_changes = true;
|
||||
|
||||
last_child_end_byte = ts_node_end_byte(child);
|
||||
last_child_end_point = ts_node_end_point(child);
|
||||
}
|
||||
|
||||
if (child_count > 0) {
|
||||
AssertThat(end_byte, !IsLessThan(last_child_end_byte));
|
||||
AssertThat(end_point, !IsLessThan(last_child_end_point));
|
||||
}
|
||||
|
||||
if (some_child_has_changes) {
|
||||
AssertThat(ts_node_has_changes(node), IsTrue());
|
||||
}
|
||||
}
|
||||
|
||||
static void assert_correct_tree_size(TSDocument *document, string content) {
|
||||
TSNode root_node = ts_document_root_node(document);
|
||||
size_t expected_size = content.size();
|
||||
|
|
@ -65,7 +27,7 @@ static void assert_correct_tree_size(TSDocument *document, string content) {
|
|||
expected_size = content.find_last_not_of("\n ") + 1;
|
||||
|
||||
AssertThat(ts_node_end_byte(root_node), Equals(expected_size));
|
||||
assert_consistent_sizes(root_node);
|
||||
assert_consistent_tree_sizes(root_node);
|
||||
}
|
||||
|
||||
START_TEST
|
||||
|
|
|
|||
|
|
@ -390,7 +390,7 @@ describe("Stack", [&]() {
|
|||
|
||||
StackSlice slice2 = pop.slices.contents[1];
|
||||
AssertThat(slice2.version, Equals<StackVersion>(1));
|
||||
AssertThat(slice2.trees, Equals(vector<Tree *>({ trees[4], trees[5], trees[6], trees[10] })))
|
||||
AssertThat(slice2.trees, Equals(vector<Tree *>({ trees[4], trees[5], trees[6], trees[10] })));
|
||||
|
||||
AssertThat(ts_stack_version_count(stack), Equals<size_t>(2));
|
||||
AssertThat(ts_stack_top_state(stack, 0), Equals(stateI));
|
||||
|
|
@ -441,15 +441,15 @@ describe("Stack", [&]() {
|
|||
|
||||
StackSlice slice1 = pop.slices.contents[0];
|
||||
AssertThat(slice1.version, Equals<StackVersion>(1));
|
||||
AssertThat(slice1.trees, Equals(vector<Tree *>({ trees[3], trees[10] })))
|
||||
AssertThat(slice1.trees, Equals(vector<Tree *>({ trees[3], trees[10] })));
|
||||
|
||||
StackSlice slice2 = pop.slices.contents[1];
|
||||
AssertThat(slice2.version, Equals<StackVersion>(2));
|
||||
AssertThat(slice2.trees, Equals(vector<Tree *>({ trees[6], trees[10] })))
|
||||
AssertThat(slice2.trees, Equals(vector<Tree *>({ trees[6], trees[10] })));
|
||||
|
||||
StackSlice slice3 = pop.slices.contents[2];
|
||||
AssertThat(slice3.version, Equals<StackVersion>(3));
|
||||
AssertThat(slice3.trees, Equals(vector<Tree *>({ trees[9], trees[10] })))
|
||||
AssertThat(slice3.trees, Equals(vector<Tree *>({ trees[9], trees[10] })));
|
||||
|
||||
AssertThat(ts_stack_version_count(stack), Equals<size_t>(4));
|
||||
AssertThat(ts_stack_top_state(stack, 0), Equals(stateI));
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@
|
|||
'test',
|
||||
'externals/bandit',
|
||||
'externals/utf8proc',
|
||||
'externals/crypto-algorithms',
|
||||
],
|
||||
'sources': [
|
||||
'test/tests.cc',
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue