Remove resume method, make parse resume by default

Also, add a `reset` method to explicitly discard an outstanding parse.

Co-Authored-By: Ashi Krishnan <queerviolet@github.com>
This commit is contained in:
Max Brunsfeld 2018-06-19 15:25:49 -07:00
parent a24f7764d8
commit d7c1f84d7b
5 changed files with 140 additions and 73 deletions

View file

@ -89,7 +89,7 @@ bool ts_parser_enabled(const TSParser *);
void ts_parser_set_enabled(TSParser *, bool);
size_t ts_parser_operation_limit(const TSParser *);
void ts_parser_set_operation_limit(TSParser *, size_t);
TSTree *ts_parser_resume(TSParser *);
void ts_parser_reset(TSParser *);
TSTree *ts_tree_copy(const TSTree *);
void ts_tree_delete(TSTree *);

View file

@ -45,8 +45,7 @@ static void ts_lexer__get_lookahead(Lexer *self) {
} else {
self->lookahead_size = lookahead_size;
}
}
else {
} else {
self->lookahead_size = utf16_iterate(chunk, size, &self->data.lookahead);
}
}
@ -109,7 +108,7 @@ static uint32_t ts_lexer__get_column(void *payload) {
// parsers can call them without needing to be linked against this library.
void ts_lexer_init(Lexer *self) {
*self = (Lexer){
*self = (Lexer) {
.data = {
.advance = ts_lexer__advance,
.mark_end = ts_lexer__mark_end,
@ -127,33 +126,30 @@ void ts_lexer_init(Lexer *self) {
ts_lexer_reset(self, length_zero());
}
static inline void ts_lexer__reset(Lexer *self, Length position) {
self->token_start_position = position;
self->token_end_position = LENGTH_UNDEFINED;
self->current_position = position;
if (self->chunk && (position.bytes < self->chunk_start ||
position.bytes >= self->chunk_start + self->chunk_size)) {
self->chunk = 0;
self->chunk_start = 0;
self->chunk_size = 0;
}
self->lookahead_size = 0;
self->data.lookahead = 0;
}
void ts_lexer_set_input(Lexer *self, TSInput input) {
self->input = input;
self->data.lookahead = 0;
self->lookahead_size = 0;
self->chunk = 0;
self->chunk_start = 0;
self->chunk_size = 0;
ts_lexer__reset(self, length_zero());
}
void ts_lexer_reset(Lexer *self, Length position) {
if (position.bytes != self->current_position.bytes) {
ts_lexer__reset(self, position);
self->token_start_position = position;
self->token_end_position = LENGTH_UNDEFINED;
self->current_position = position;
if (self->chunk && (position.bytes < self->chunk_start ||
position.bytes >= self->chunk_start + self->chunk_size)) {
self->chunk = 0;
self->chunk_start = 0;
self->chunk_size = 0;
}
self->lookahead_size = 0;
self->data.lookahead = 0;
}
}

View file

@ -61,7 +61,7 @@ struct TSParser {
size_t operation_limit;
volatile bool enabled;
bool halt_on_error;
const Subtree *old_tree;
};
typedef struct {
@ -710,28 +710,6 @@ static StackSliceArray ts_parser__reduce(TSParser *self, StackVersion version, T
return pop;
}
static void ts_parser__start(TSParser *self, TSInput input, const Subtree *previous_tree) {
if (previous_tree) {
LOG("parse_after_edit");
} else {
LOG("new_parse");
}
if (self->language->external_scanner.deserialize) {
self->language->external_scanner.deserialize(self->external_scanner_payload, NULL, 0);
}
ts_lexer_set_input(&self->lexer, input);
ts_stack_clear(self->stack);
ts_parser__set_cached_token(self, 0, NULL, NULL);
reusable_node_reset(&self->reusable_node, previous_tree);
if (self->finished_tree) {
ts_subtree_release(&self->tree_pool, self->finished_tree);
self->finished_tree = NULL;
}
self->accept_count = 0;
}
static void ts_parser__accept(TSParser *self, StackVersion version, const Subtree *lookahead) {
assert(lookahead->symbol == ts_builtin_sym_end);
ts_stack_push(self->stack, version, lookahead, false, 1);
@ -1345,6 +1323,13 @@ static unsigned ts_parser__condense_stack(TSParser *self) {
return min_error_cost;
}
static bool ts_parser_has_outstanding_parse(TSParser *self) {
return (
self->lexer.current_position.bytes > 0 ||
ts_stack_state(self->stack, 0) != 1
);
}
// Parser - Public
TSParser *ts_parser_new() {
@ -1360,17 +1345,21 @@ TSParser *ts_parser_new() {
self->halt_on_error = false;
self->enabled = true;
self->operation_limit = SIZE_MAX;
self->old_tree = NULL;
ts_parser__set_cached_token(self, 0, NULL, NULL);
return self;
}
void ts_parser_delete(TSParser *self) {
if (self->stack) {
ts_stack_delete(self->stack);
}
ts_stack_delete(self->stack);
if (self->reduce_actions.contents) {
array_delete(&self->reduce_actions);
}
if (self->old_tree) {
ts_subtree_release(&self->tree_pool, self->old_tree);
self->old_tree = NULL;
}
ts_parser__set_cached_token(self, 0, NULL, NULL);
ts_subtree_pool_delete(&self->tree_pool);
reusable_node_delete(&self->reusable_node);
ts_parser_set_language(self, NULL);
@ -1430,8 +1419,43 @@ void ts_parser_set_operation_limit(TSParser *self, size_t limit) {
self->operation_limit = limit;
}
TSTree *ts_parser_resume(TSParser *self) {
if (!self->language || !self->lexer.input.read) return NULL;
void ts_parser_reset(TSParser *self) {
if (self->language->external_scanner.deserialize) {
self->language->external_scanner.deserialize(self->external_scanner_payload, NULL, 0);
}
if (self->old_tree) {
ts_subtree_release(&self->tree_pool, self->old_tree);
self->old_tree = NULL;
}
reusable_node_clear(&self->reusable_node);
ts_lexer_reset(&self->lexer, length_zero());
ts_stack_clear(self->stack);
ts_parser__set_cached_token(self, 0, NULL, NULL);
if (self->finished_tree) {
ts_subtree_release(&self->tree_pool, self->finished_tree);
self->finished_tree = NULL;
}
self->accept_count = 0;
}
TSTree *ts_parser_parse(TSParser *self, const TSTree *old_tree, TSInput input) {
if (!self->language || !input.read) return NULL;
ts_lexer_set_input(&self->lexer, input);
if (ts_parser_has_outstanding_parse(self)) {
LOG("resume_parsing");
} else if (old_tree) {
ts_subtree_retain(old_tree->root);
self->old_tree = old_tree->root;
reusable_node_reset(&self->reusable_node, old_tree->root);
LOG("parse_after_edit");
} else {
reusable_node_clear(&self->reusable_node);
LOG("new_parse");
}
uint32_t position = 0, last_position = 0, version_count = 0;
size_t operation_count = 0;
@ -1440,8 +1464,8 @@ TSTree *ts_parser_resume(TSParser *self) {
for (StackVersion version = 0;
version_count = ts_stack_version_count(self->stack), version < version_count;
version++) {
operation_count++;
if (operation_count > self->operation_limit || !self->enabled) return NULL;
operation_count++;
bool allow_node_reuse = version_count == 1;
while (ts_stack_is_active(self->stack, version)) {
@ -1477,18 +1501,10 @@ TSTree *ts_parser_resume(TSParser *self) {
TSTree *result = ts_tree_new(self->finished_tree, self->language);
self->finished_tree = NULL;
ts_stack_clear(self->stack);
ts_parser__set_cached_token(self, 0, NULL, NULL);
ts_lexer_set_input(&self->lexer, (TSInput) { NULL, NULL, 0 });
ts_parser_reset(self);
return result;
}
TSTree *ts_parser_parse(TSParser *self, const TSTree *old_tree, TSInput input) {
if (!self->language) return NULL;
ts_parser__start(self, input, old_tree ? old_tree->root : NULL);
return ts_parser_resume(self);
}
TSTree *ts_parser_parse_string(TSParser *self, const TSTree *old_tree,
const char *string, uint32_t length) {
TSStringInput input = {string, length};

View file

@ -15,14 +15,18 @@ static inline ReusableNode reusable_node_new() {
return (ReusableNode) {array_new(), NULL};
}
static inline void reusable_node_reset(ReusableNode *self, const Subtree *tree) {
static inline void reusable_node_clear(ReusableNode *self) {
array_clear(&self->stack);
self->last_external_token = NULL;
}
static inline void reusable_node_reset(ReusableNode *self, const Subtree *tree) {
reusable_node_clear(self);
array_push(&self->stack, ((StackEntry) {
.tree = tree,
.child_index = 0,
.byte_offset = 0,
}));
self->last_external_token = NULL;
}
static inline const Subtree *reusable_node_tree(ReusableNode *self) {

View file

@ -680,7 +680,7 @@ describe("Parser", [&]() {
&state,
[](void *payload, uint32_t byte, TSPoint position, uint32_t *bytes_read) {
InputState *state = static_cast<InputState *>(payload);
assert(state->read_count++ <= 10);
assert(state->read_count++ <= 11);
*bytes_read = strlen(state->string);
return state->string;
},
@ -694,27 +694,78 @@ describe("Parser", [&]() {
state.read_count = 0;
state.string = "";
tree = ts_parser_resume(parser);
tree = ts_parser_parse(parser, nullptr, infinite_input);
AssertThat(tree, !Equals<TSTree *>(nullptr));
ts_tree_delete(tree);
});
});
describe("resume()", [&]() {
it("does nothing unless parsing was previously halted", [&]() {
it("retains the old tree even if the parser halts before finishing parsing", [&]() {
ts_parser_set_language(parser, load_real_language("json"));
TSTree *tree = ts_parser_resume(parser);
AssertThat(tree, Equals<TSTree *>(nullptr));
tree = ts_parser_resume(parser);
SpyInput input("[1234, 5, 6, 4, 5]", 3);
tree = ts_parser_parse(parser, nullptr, input.input());
assert_root_node("(value (array (number) (number) (number) (number) (number)))");
input.clear();
TSInputEdit edit = input.replace(1, 4, "null");
ts_tree_edit(tree, &edit);
ts_parser_set_operation_limit(parser, 1);
TSTree *new_tree = ts_parser_parse(parser, tree, input.input());
AssertThat(new_tree, Equals<TSTree *>(nullptr));
ts_tree_delete(tree);
ts_parser_set_operation_limit(parser, SIZE_MAX);
tree = ts_parser_parse(parser, nullptr, input.input());
assert_root_node("(value (array (null) (number) (number) (number) (number)))");
AssertThat(input.strings_read(), Equals(vector<string>({
"[null,",
})));
});
it("does not leak the old tree if parsing halts and never finishes", [&]() {
ts_parser_set_language(parser, load_real_language("json"));
SpyInput input("[1234, 5, 6, 4, 5]", 3);
tree = ts_parser_parse(parser, nullptr, input.input());
assert_root_node("(value (array (number) (number) (number) (number) (number)))");
input.clear();
TSInputEdit edit = input.replace(1, 4, "null");
ts_tree_edit(tree, &edit);
ts_parser_set_operation_limit(parser, 1);
TSTree *new_tree = ts_parser_parse(parser, tree, input.input());
AssertThat(new_tree, Equals<TSTree *>(nullptr));
});
});
describe("reset()", [&]() {
it("causes the parser to parse from scratch on the next call to parse, instead of resuming", [&]() {
ts_parser_set_language(parser, load_real_language("json"));
ts_parser_set_operation_limit(parser, 3);
tree = ts_parser_parse_string(parser, nullptr, "[1234, 5, 6, 4, 5]", 18);
AssertThat(tree, Equals<TSTree *>(nullptr));
tree = ts_parser_parse_string(parser, nullptr, "true", 4);
AssertThat(tree, !Equals<TSTree *>(nullptr));
// Without calling reset, the parser continues from where it left off, so
// it does not see the changes to the beginning of the source code.
ts_parser_set_operation_limit(parser, SIZE_MAX);
tree = ts_parser_parse_string(parser, nullptr, "[null, 5, 6, 4, 5]", 18);
assert_root_node("(value (array (number) (number) (number) (number) (number)))");
ts_tree_delete(tree);
tree = ts_parser_resume(parser);
ts_parser_set_operation_limit(parser, 3);
tree = ts_parser_parse_string(parser, nullptr, "[1234, 5, 6, 4, 5]", 18);
AssertThat(tree, Equals<TSTree *>(nullptr));
// By calling reset, we force the parser to start over from scratch so
// that it sees the changes to the beginning of the source code.
ts_parser_set_operation_limit(parser, SIZE_MAX);
ts_parser_reset(parser);
tree = ts_parser_parse_string(parser, nullptr, "[null, 5, 6, 4, 5]", 18);
assert_root_node("(value (array (null) (number) (number) (number) (number)))");
});
});
});