Merge pull request #170 from tree-sitter/pausing-and-resuming

Add APIs for pausing a parse after N operations and resuming later
This commit is contained in:
Max Brunsfeld 2018-05-23 15:20:53 -07:00 committed by GitHub
commit 986ceefc81
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 91 additions and 23 deletions

View file

@ -86,8 +86,11 @@ void ts_parser_print_dot_graphs(TSParser *, FILE *);
void ts_parser_halt_on_error(TSParser *, bool);
TSTree *ts_parser_parse(TSParser *, const TSTree *, TSInput);
TSTree *ts_parser_parse_string(TSParser *, const TSTree *, const char *, uint32_t);
bool ts_parser_enabled(TSParser *);
bool ts_parser_enabled(const TSParser *);
void ts_parser_set_enabled(TSParser *, bool);
size_t ts_parser_operation_limit(const TSParser *);
void ts_parser_set_operation_limit(TSParser *, size_t);
TSTree *ts_parser_resume(TSParser *);
TSTree *ts_tree_copy(const TSTree *);
void ts_tree_delete(TSTree *);

View file

@ -58,9 +58,10 @@ struct TSParser {
ReusableNode reusable_node;
void *external_scanner_payload;
FILE *dot_graph_file;
bool halt_on_error;
unsigned accept_count;
size_t operation_limit;
volatile bool enabled;
bool halt_on_error;
};
typedef struct {
@ -700,19 +701,14 @@ static void ts_parser__start(TSParser *self, TSInput input, const Subtree *previ
}
ts_lexer_set_input(&self->lexer, input);
ts_stack_clear(self->stack);
reusable_node_reset(&self->reusable_node, previous_tree);
self->finished_tree = NULL;
self->accept_count = 0;
}
static void ts_parser__stop(TSParser *self) {
ts_stack_clear(self->stack);
ts_parser__set_cached_token(self, 0, NULL, NULL);
reusable_node_reset(&self->reusable_node, previous_tree);
if (self->finished_tree) {
ts_subtree_release(&self->tree_pool, self->finished_tree);
self->finished_tree = NULL;
}
self->accept_count = 0;
}
static void ts_parser__accept(TSParser *self, StackVersion version, const Subtree *lookahead) {
@ -1319,6 +1315,7 @@ TSParser *ts_parser_new() {
self->dot_graph_file = NULL;
self->halt_on_error = false;
self->enabled = true;
self->operation_limit = SIZE_MAX;
ts_parser__set_cached_token(self, 0, NULL, NULL);
return self;
}
@ -1373,7 +1370,7 @@ void ts_parser_halt_on_error(TSParser *self, bool should_halt_on_error) {
self->halt_on_error = should_halt_on_error;
}
bool ts_parser_enabled(TSParser *self) {
bool ts_parser_enabled(const TSParser *self) {
return self->enabled;
}
@ -1381,20 +1378,26 @@ void ts_parser_set_enabled(TSParser *self, bool enabled) {
self->enabled = enabled;
}
TSTree *ts_parser_parse(TSParser *self, const TSTree *old_tree, TSInput input) {
if (!self->language) return NULL;
ts_parser__start(self, input, old_tree ? old_tree->root : NULL);
size_t ts_parser_operation_limit(const TSParser *self) {
return self->operation_limit;
}
void ts_parser_set_operation_limit(TSParser *self, size_t limit) {
self->operation_limit = limit;
}
TSTree *ts_parser_resume(TSParser *self) {
if (!self->language || !self->lexer.input.read) return NULL;
uint32_t position = 0, last_position = 0, version_count = 0;
size_t operation_count = 0;
do {
for (StackVersion version = 0;
version_count = ts_stack_version_count(self->stack), version < version_count;
version++) {
if (!self->enabled) {
ts_parser__stop(self);
return NULL;
}
operation_count++;
if (operation_count > self->operation_limit || !self->enabled) return NULL;
bool allow_node_reuse = version_count == 1;
while (ts_stack_is_active(self->stack, version)) {
@ -1424,19 +1427,23 @@ TSTree *ts_parser_parse(TSParser *self, const TSTree *old_tree, TSInput input) {
}
} while (version_count != 0);
ts_stack_clear(self->stack);
ts_parser__set_cached_token(self, 0, NULL, NULL);
ts_subtree_balance(self->finished_tree, &self->tree_pool, self->language);
TSTree *result = ts_tree_new(self->finished_tree, self->language);
LOG("done");
LOG_TREE();
self->finished_tree = NULL;
ts_parser__stop(self);
TSTree *result = ts_tree_new(self->finished_tree, self->language);
self->finished_tree = NULL;
ts_stack_clear(self->stack);
ts_parser__set_cached_token(self, 0, NULL, NULL);
ts_lexer_set_input(&self->lexer, (TSInput) { NULL, NULL, NULL, 0 });
return result;
}
TSTree *ts_parser_parse(TSParser *self, const TSTree *old_tree, TSInput input) {
ts_parser__start(self, input, old_tree ? old_tree->root : NULL);
return ts_parser_resume(self);
}
TSTree *ts_parser_parse_string(TSParser *self, const TSTree *old_tree,
const char *string, uint32_t length) {
TSStringInput input;

View file

@ -656,6 +656,64 @@ describe("Parser", [&]() {
ts_tree_delete(tree);
});
});
describe("set_operation_limit(limit)", [&]() {
it("limits the amount of work the parser does on any given call to parse()", [&]() {
ts_parser_set_language(parser, load_real_language("json"));
struct InputState {
const char *string;
size_t read_count;
};
InputState state = {"[", 0};
// An input that repeats the given string forever, counting how many times
// it has been read.
TSInput infinite_input = {
&state,
[](void *payload, uint32_t *bytes_read) {
InputState *state = static_cast<InputState *>(payload);
assert(state->read_count++ <= 10);
*bytes_read = strlen(state->string);
return state->string;
},
[](void *payload, unsigned byte, TSPoint position) -> int {
return true;
},
TSInputEncodingUTF8
};
ts_parser_set_operation_limit(parser, 10);
TSTree *tree = ts_parser_parse(parser, nullptr, infinite_input);
AssertThat(tree, Equals<TSTree *>(nullptr));
state.read_count = 0;
state.string = "";
tree = ts_parser_resume(parser);
AssertThat(tree, !Equals<TSTree *>(nullptr));
ts_tree_delete(tree);
});
});
describe("resume()", [&]() {
it("does nothing unless parsing was previously halted", [&]() {
ts_parser_set_language(parser, load_real_language("json"));
TSTree *tree = ts_parser_resume(parser);
AssertThat(tree, Equals<TSTree *>(nullptr));
tree = ts_parser_resume(parser);
AssertThat(tree, Equals<TSTree *>(nullptr));
tree = ts_parser_parse_string(parser, nullptr, "true", 4);
AssertThat(tree, !Equals<TSTree *>(nullptr));
ts_tree_delete(tree);
tree = ts_parser_resume(parser);
AssertThat(tree, Equals<TSTree *>(nullptr));
});
});
});
END_TEST