Merge pull request #85 from tree-sitter/fuzzing-examples

Fix more performance problems found by fuzzing
This commit is contained in:
Max Brunsfeld 2017-07-05 09:48:56 -07:00 committed by GitHub
commit 69500c9dd7
13 changed files with 174 additions and 71 deletions

View file

@ -5,3 +5,4 @@
-Iexternals/utf8proc
-Iexternals/json-parser
-Iexternals/bandit
-Iexternals/crypto-algorithms

3
.gitmodules vendored
View file

@ -10,3 +10,6 @@
[submodule "externals/json-parser"]
path = externals/json-parser
url = https://github.com/udp/json-parser.git
[submodule "externals/crypto-algorithms"]
path = externals/crypto-algorithms
url = https://github.com/B-Con/crypto-algorithms.git

1
externals/crypto-algorithms vendored Submodule

@ -0,0 +1 @@
Subproject commit cfbde48414baacf51fc7c74f275190881f037d32

View file

@ -2,6 +2,7 @@
static const unsigned MAX_COST_DIFFERENCE = 16 * ERROR_COST_PER_SKIPPED_TREE;
static const unsigned MAX_PUSH_COUNT_WITH_COUNT_DIFFERENCE = 24;
static const unsigned MAX_DEPTH_TO_ALLOW_MULTIPLE = 12;
ErrorComparison error_status_compare(ErrorStatus a, ErrorStatus b, bool are_mergeable) {
if (a.count < b.count) {
@ -42,5 +43,14 @@ ErrorComparison error_status_compare(ErrorStatus a, ErrorStatus b, bool are_merg
}
}
if (a.count > 0) {
if (a.depth > MAX_DEPTH_TO_ALLOW_MULTIPLE ||
b.depth > MAX_DEPTH_TO_ALLOW_MULTIPLE) {
return a.depth <= b.depth ?
ErrorComparisonTakeLeft :
ErrorComparisonTakeRight;
}
}
return ErrorComparisonNone;
}

View file

@ -16,6 +16,7 @@ typedef struct {
unsigned count;
unsigned cost;
unsigned push_count;
unsigned depth;
} ErrorStatus;
typedef enum {

View file

@ -40,7 +40,9 @@
#define SYM_NAME(symbol) ts_language_symbol_name(self->language, symbol)
static const uint32_t MAX_STACK_VERSION_COUNT = 16;
static const uint32_t SOFT_MAX_VERSION_COUNT = 10;
static const uint32_t HARD_MAX_VERSION_COUNT = 18;
static const uint32_t MAX_PRECEDING_TREES_TO_SKIP = 32;
typedef struct {
Parser *parser;
@ -214,8 +216,8 @@ static CondenseResult parser__condense_stack(Parser *self) {
}
}
while (ts_stack_version_count(self->stack) > MAX_STACK_VERSION_COUNT) {
ts_stack_remove_version(self->stack, MAX_STACK_VERSION_COUNT);
while (ts_stack_version_count(self->stack) > SOFT_MAX_VERSION_COUNT) {
ts_stack_remove_version(self->stack, SOFT_MAX_VERSION_COUNT);
result |= CondenseResultMadeChange;
}
@ -476,8 +478,7 @@ static bool parser__better_version_exists(Parser *self, StackVersion version,
return true;
for (StackVersion i = 0, n = ts_stack_version_count(self->stack); i < n; i++) {
if (i == version || ts_stack_is_halted(self->stack, i))
continue;
if (i == version || ts_stack_is_halted(self->stack, i)) continue;
switch (error_status_compare(my_error_status,
ts_stack_error_status(self->stack, i),
@ -487,7 +488,7 @@ static bool parser__better_version_exists(Parser *self, StackVersion version,
ts_stack_halt(self->stack, i);
break;
case ErrorComparisonTakeRight:
return true;
if (i < version) return true;
default:
break;
}
@ -570,8 +571,7 @@ static StackPopResult parser__reduce(Parser *self, StackVersion version,
// delete the rest of the tree arrays.
while (i + 1 < pop.slices.size) {
StackSlice next_slice = pop.slices.contents[i + 1];
if (next_slice.version != slice.version)
break;
if (next_slice.version != slice.version) break;
i++;
uint32_t child_count = next_slice.trees.size;
@ -609,8 +609,9 @@ static StackPopResult parser__reduce(Parser *self, StackVersion version,
}
ErrorStatus error_status = ts_stack_error_status(self->stack, other_version);
if (parser__better_version_exists(self, version, error_status))
if (parser__better_version_exists(self, version, error_status)) {
ts_stack_remove_version(self->stack, other_version);
}
}
// Push the parent node onto the stack, along with any extra tokens that
@ -962,6 +963,7 @@ static bool parser__do_potential_reductions(Parser *self, StackVersion version)
static StackIterateAction parser__skip_preceding_trees_callback(
void *payload, TSStateId state, const TreeArray *trees, uint32_t tree_count) {
if (trees->size > MAX_PRECEDING_TREES_TO_SKIP) return StackIterateStop;
if (tree_count > 0 && state != ERROR_STATE) {
uint32_t bytes_skipped = 0;
for (uint32_t i = 0; i < trees->size; i++) {
@ -1085,9 +1087,18 @@ static void parser__recover(Parser *self, StackVersion version, TSStateId state,
}
LOG("recover state:%u", state);
StackVersion new_version = ts_stack_copy_version(self->stack, version);
bool can_be_extra = ts_language_symbol_metadata(self->language, lookahead->symbol).extra;
parser__shift(self, new_version, ERROR_STATE, lookahead, can_be_extra);
if (ts_stack_version_count(self->stack) < HARD_MAX_VERSION_COUNT) {
StackVersion new_version = ts_stack_copy_version(self->stack, version);
bool can_be_extra = ts_language_symbol_metadata(self->language, lookahead->symbol).extra;
parser__shift(self, new_version, ERROR_STATE, lookahead, can_be_extra);
ErrorStatus error_status = ts_stack_error_status(self->stack, new_version);
if (parser__better_version_exists(self, version, error_status)) {
ts_stack_remove_version(self->stack, new_version);
}
}
parser__shift(self, version, state, lookahead, false);
}

View file

@ -81,22 +81,31 @@ static void stack_node_retain(StackNode *self) {
}
static void stack_node_release(StackNode *self, StackNodeArray *pool) {
if (!self) return;
recur:
assert(self->ref_count != 0);
self->ref_count--;
if (self->ref_count == 0) {
for (int i = 0; i < self->link_count; i++) {
if (self->links[i].tree) {
ts_tree_release(self->links[i].tree);
}
if (self->ref_count > 0) return;
StackNode *last_predecessor = NULL;
if (self->link_count > 0) {
unsigned i = 0;
for (; i < self->link_count - 1; i++) {
if (self->links[i].tree) ts_tree_release(self->links[i].tree);
stack_node_release(self->links[i].node, pool);
}
if (self->links[i].tree) ts_tree_release(self->links[i].tree);
last_predecessor = self->links[i].node;
}
if (pool->size < MAX_NODE_POOL_SIZE) {
array_push(pool, self);
} else {
ts_free(self);
}
if (pool->size < MAX_NODE_POOL_SIZE) {
array_push(pool, self);
} else {
ts_free(self);
}
if (last_predecessor) {
self = last_predecessor;
goto recur;
}
}
@ -294,8 +303,9 @@ inline StackPopResult stack__iter(Stack *self, StackVersion version,
if (!link.tree->extra) {
next_iterator->tree_count++;
next_iterator->depth--;
if (!link.is_pending)
if (!link.is_pending) {
next_iterator->is_pending = false;
}
}
array_push(&next_iterator->trees, link.tree);
ts_tree_retain(link.tree);
@ -384,6 +394,7 @@ ErrorStatus ts_stack_error_status(const Stack *self, StackVersion version) {
.cost = head->node->error_cost,
.count = head->node->error_count,
.push_count = head->push_count,
.depth = head->depth,
};
}
@ -551,9 +562,8 @@ void ts_stack_force_merge(Stack *self, StackVersion version1, StackVersion versi
for (uint32_t i = 0; i < head2->node->link_count; i++) {
stack_node_add_link(head1->node, head2->node->links[i]);
}
if (head2->push_count > head1->push_count) {
head1->push_count = head2->push_count;
}
if (head2->push_count > head1->push_count) head1->push_count = head2->push_count;
if (head2->depth > head1->depth) head1->depth = head2->depth;
ts_stack_remove_version(self, version2);
}

View file

@ -1,4 +1,6 @@
#include "bandit/bandit.h"
#include "helpers/tree_helpers.h"
#include "helpers/point_helpers.h"
#include "runtime/document.h"
#include "runtime/node.h"
#include <ostream>
@ -48,3 +50,42 @@ bool operator==(const std::vector<Tree *> &vec, const TreeArray &array) {
return false;
return true;
}
void assert_consistent_tree_sizes(TSNode node) {
size_t child_count = ts_node_child_count(node);
size_t start_byte = ts_node_start_byte(node);
size_t end_byte = ts_node_end_byte(node);
TSPoint start_point = ts_node_start_point(node);
TSPoint end_point = ts_node_end_point(node);
bool some_child_has_changes = false;
AssertThat(start_byte, !IsGreaterThan(end_byte));
AssertThat(start_point, !IsGreaterThan(end_point));
size_t last_child_end_byte = start_byte;
TSPoint last_child_end_point = start_point;
for (size_t i = 0; i < child_count; i++) {
TSNode child = ts_node_child(node, i);
size_t child_start_byte = ts_node_start_byte(child);
TSPoint child_start_point = ts_node_start_point(child);
AssertThat(child_start_byte, !IsLessThan(last_child_end_byte));
AssertThat(child_start_point, !IsLessThan(last_child_end_point));
assert_consistent_tree_sizes(child);
if (ts_node_has_changes(child))
some_child_has_changes = true;
last_child_end_byte = ts_node_end_byte(child);
last_child_end_point = ts_node_end_point(child);
}
if (child_count > 0) {
AssertThat(end_byte, !IsLessThan(last_child_end_byte));
AssertThat(end_point, !IsLessThan(last_child_end_point));
}
if (some_child_has_changes) {
AssertThat(ts_node_has_changes(node), IsTrue());
}
}

View file

@ -13,4 +13,6 @@ std::ostream &operator<<(std::ostream &stream, const TSNode &node);
bool operator==(const TSNode &left, const TSNode &right);
bool operator==(const std::vector<Tree *> &right, const TreeArray &array);
void assert_consistent_tree_sizes(TSNode node);
#endif // HELPERS_TREE_HELPERS_H_

View file

@ -0,0 +1,60 @@
#include "test_helper.h"
#include "base64.c"
#include "helpers/load_language.h"
#include "helpers/tree_helpers.h"
#include "helpers/record_alloc.h"
START_TEST
vector<pair<string, string>> examples({
{
"javascript",
"Bi0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLXGK0i0vLS0tLS0tLS0tLS0tLS0tLS0tLS0tLXGK0i0vLS0tLS0tLS0tLS0tLS0xLS0tLTYtLfpZAA=="
},
});
describe("examples found via fuzzing", [&]() {
before_each([&]() {
record_alloc::start();
});
after_each([&]() {
AssertThat(record_alloc::outstanding_allocation_indices(), IsEmpty());
});
for (unsigned i = 0, n = examples.size(); i < n; i++) {
it(("parses example number " + to_string(i)).c_str(), [&]() {
TSDocument *document = ts_document_new();
// ts_document_print_debugging_graphs(document, true);
const string &language_name = examples[i].first;
ts_document_set_language(document, load_real_language(language_name));
string input;
const string &base64_input = examples[i].second;
input.resize(base64_input.size());
input.resize(base64_decode(
reinterpret_cast<const unsigned char *>(base64_input.c_str()),
reinterpret_cast<unsigned char *>(&input[0]),
base64_input.size()
));
ts_document_set_input_string_with_length(
document,
input.c_str(),
input.size()
);
ts_document_parse(document);
TSNode node = ts_document_root_node(document);
assert_consistent_tree_sizes(node);
ts_document_free(document);
});
}
});
END_TEST

View file

@ -9,47 +9,9 @@
#include "helpers/record_alloc.h"
#include "helpers/random_helpers.h"
#include "helpers/scope_sequence.h"
#include "helpers/tree_helpers.h"
#include <set>
static void assert_consistent_sizes(TSNode node) {
size_t child_count = ts_node_child_count(node);
size_t start_byte = ts_node_start_byte(node);
size_t end_byte = ts_node_end_byte(node);
TSPoint start_point = ts_node_start_point(node);
TSPoint end_point = ts_node_end_point(node);
bool some_child_has_changes = false;
AssertThat(start_byte, !IsGreaterThan(end_byte));
AssertThat(start_point, !IsGreaterThan(end_point));
size_t last_child_end_byte = start_byte;
TSPoint last_child_end_point = start_point;
for (size_t i = 0; i < child_count; i++) {
TSNode child = ts_node_child(node, i);
size_t child_start_byte = ts_node_start_byte(child);
TSPoint child_start_point = ts_node_start_point(child);
AssertThat(child_start_byte, !IsLessThan(last_child_end_byte));
AssertThat(child_start_point, !IsLessThan(last_child_end_point));
assert_consistent_sizes(child);
if (ts_node_has_changes(child))
some_child_has_changes = true;
last_child_end_byte = ts_node_end_byte(child);
last_child_end_point = ts_node_end_point(child);
}
if (child_count > 0) {
AssertThat(end_byte, !IsLessThan(last_child_end_byte));
AssertThat(end_point, !IsLessThan(last_child_end_point));
}
if (some_child_has_changes) {
AssertThat(ts_node_has_changes(node), IsTrue());
}
}
static void assert_correct_tree_size(TSDocument *document, string content) {
TSNode root_node = ts_document_root_node(document);
size_t expected_size = content.size();
@ -65,7 +27,7 @@ static void assert_correct_tree_size(TSDocument *document, string content) {
expected_size = content.find_last_not_of("\n ") + 1;
AssertThat(ts_node_end_byte(root_node), Equals(expected_size));
assert_consistent_sizes(root_node);
assert_consistent_tree_sizes(root_node);
}
START_TEST

View file

@ -390,7 +390,7 @@ describe("Stack", [&]() {
StackSlice slice2 = pop.slices.contents[1];
AssertThat(slice2.version, Equals<StackVersion>(1));
AssertThat(slice2.trees, Equals(vector<Tree *>({ trees[4], trees[5], trees[6], trees[10] })))
AssertThat(slice2.trees, Equals(vector<Tree *>({ trees[4], trees[5], trees[6], trees[10] })));
AssertThat(ts_stack_version_count(stack), Equals<size_t>(2));
AssertThat(ts_stack_top_state(stack, 0), Equals(stateI));
@ -441,15 +441,15 @@ describe("Stack", [&]() {
StackSlice slice1 = pop.slices.contents[0];
AssertThat(slice1.version, Equals<StackVersion>(1));
AssertThat(slice1.trees, Equals(vector<Tree *>({ trees[3], trees[10] })))
AssertThat(slice1.trees, Equals(vector<Tree *>({ trees[3], trees[10] })));
StackSlice slice2 = pop.slices.contents[1];
AssertThat(slice2.version, Equals<StackVersion>(2));
AssertThat(slice2.trees, Equals(vector<Tree *>({ trees[6], trees[10] })))
AssertThat(slice2.trees, Equals(vector<Tree *>({ trees[6], trees[10] })));
StackSlice slice3 = pop.slices.contents[2];
AssertThat(slice3.version, Equals<StackVersion>(3));
AssertThat(slice3.trees, Equals(vector<Tree *>({ trees[9], trees[10] })))
AssertThat(slice3.trees, Equals(vector<Tree *>({ trees[9], trees[10] })));
AssertThat(ts_stack_version_count(stack), Equals<size_t>(4));
AssertThat(ts_stack_top_state(stack, 0), Equals(stateI));

View file

@ -12,6 +12,7 @@
'test',
'externals/bandit',
'externals/utf8proc',
'externals/crypto-algorithms',
],
'sources': [
'test/tests.cc',