Remove resume method, make parse resume by default
Also, add a `reset` method to explicitly discard an outstanding parse. Co-Authored-By: Ashi Krishnan <queerviolet@github.com>
This commit is contained in:
parent
a24f7764d8
commit
d7c1f84d7b
5 changed files with 140 additions and 73 deletions
|
|
@ -89,7 +89,7 @@ bool ts_parser_enabled(const TSParser *);
|
|||
void ts_parser_set_enabled(TSParser *, bool);
|
||||
size_t ts_parser_operation_limit(const TSParser *);
|
||||
void ts_parser_set_operation_limit(TSParser *, size_t);
|
||||
TSTree *ts_parser_resume(TSParser *);
|
||||
void ts_parser_reset(TSParser *);
|
||||
|
||||
TSTree *ts_tree_copy(const TSTree *);
|
||||
void ts_tree_delete(TSTree *);
|
||||
|
|
|
|||
|
|
@ -45,8 +45,7 @@ static void ts_lexer__get_lookahead(Lexer *self) {
|
|||
} else {
|
||||
self->lookahead_size = lookahead_size;
|
||||
}
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
self->lookahead_size = utf16_iterate(chunk, size, &self->data.lookahead);
|
||||
}
|
||||
}
|
||||
|
|
@ -109,7 +108,7 @@ static uint32_t ts_lexer__get_column(void *payload) {
|
|||
// parsers can call them without needing to be linked against this library.
|
||||
|
||||
void ts_lexer_init(Lexer *self) {
|
||||
*self = (Lexer){
|
||||
*self = (Lexer) {
|
||||
.data = {
|
||||
.advance = ts_lexer__advance,
|
||||
.mark_end = ts_lexer__mark_end,
|
||||
|
|
@ -127,33 +126,30 @@ void ts_lexer_init(Lexer *self) {
|
|||
ts_lexer_reset(self, length_zero());
|
||||
}
|
||||
|
||||
static inline void ts_lexer__reset(Lexer *self, Length position) {
|
||||
self->token_start_position = position;
|
||||
self->token_end_position = LENGTH_UNDEFINED;
|
||||
self->current_position = position;
|
||||
|
||||
if (self->chunk && (position.bytes < self->chunk_start ||
|
||||
position.bytes >= self->chunk_start + self->chunk_size)) {
|
||||
self->chunk = 0;
|
||||
self->chunk_start = 0;
|
||||
self->chunk_size = 0;
|
||||
}
|
||||
|
||||
self->lookahead_size = 0;
|
||||
self->data.lookahead = 0;
|
||||
}
|
||||
|
||||
void ts_lexer_set_input(Lexer *self, TSInput input) {
|
||||
self->input = input;
|
||||
self->data.lookahead = 0;
|
||||
self->lookahead_size = 0;
|
||||
self->chunk = 0;
|
||||
self->chunk_start = 0;
|
||||
self->chunk_size = 0;
|
||||
ts_lexer__reset(self, length_zero());
|
||||
}
|
||||
|
||||
void ts_lexer_reset(Lexer *self, Length position) {
|
||||
if (position.bytes != self->current_position.bytes) {
|
||||
ts_lexer__reset(self, position);
|
||||
self->token_start_position = position;
|
||||
self->token_end_position = LENGTH_UNDEFINED;
|
||||
self->current_position = position;
|
||||
|
||||
if (self->chunk && (position.bytes < self->chunk_start ||
|
||||
position.bytes >= self->chunk_start + self->chunk_size)) {
|
||||
self->chunk = 0;
|
||||
self->chunk_start = 0;
|
||||
self->chunk_size = 0;
|
||||
}
|
||||
|
||||
self->lookahead_size = 0;
|
||||
self->data.lookahead = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -61,7 +61,7 @@ struct TSParser {
|
|||
size_t operation_limit;
|
||||
volatile bool enabled;
|
||||
bool halt_on_error;
|
||||
|
||||
const Subtree *old_tree;
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
|
|
@ -710,28 +710,6 @@ static StackSliceArray ts_parser__reduce(TSParser *self, StackVersion version, T
|
|||
return pop;
|
||||
}
|
||||
|
||||
static void ts_parser__start(TSParser *self, TSInput input, const Subtree *previous_tree) {
|
||||
if (previous_tree) {
|
||||
LOG("parse_after_edit");
|
||||
} else {
|
||||
LOG("new_parse");
|
||||
}
|
||||
|
||||
if (self->language->external_scanner.deserialize) {
|
||||
self->language->external_scanner.deserialize(self->external_scanner_payload, NULL, 0);
|
||||
}
|
||||
|
||||
ts_lexer_set_input(&self->lexer, input);
|
||||
ts_stack_clear(self->stack);
|
||||
ts_parser__set_cached_token(self, 0, NULL, NULL);
|
||||
reusable_node_reset(&self->reusable_node, previous_tree);
|
||||
if (self->finished_tree) {
|
||||
ts_subtree_release(&self->tree_pool, self->finished_tree);
|
||||
self->finished_tree = NULL;
|
||||
}
|
||||
self->accept_count = 0;
|
||||
}
|
||||
|
||||
static void ts_parser__accept(TSParser *self, StackVersion version, const Subtree *lookahead) {
|
||||
assert(lookahead->symbol == ts_builtin_sym_end);
|
||||
ts_stack_push(self->stack, version, lookahead, false, 1);
|
||||
|
|
@ -1345,6 +1323,13 @@ static unsigned ts_parser__condense_stack(TSParser *self) {
|
|||
return min_error_cost;
|
||||
}
|
||||
|
||||
static bool ts_parser_has_outstanding_parse(TSParser *self) {
|
||||
return (
|
||||
self->lexer.current_position.bytes > 0 ||
|
||||
ts_stack_state(self->stack, 0) != 1
|
||||
);
|
||||
}
|
||||
|
||||
// Parser - Public
|
||||
|
||||
TSParser *ts_parser_new() {
|
||||
|
|
@ -1360,17 +1345,21 @@ TSParser *ts_parser_new() {
|
|||
self->halt_on_error = false;
|
||||
self->enabled = true;
|
||||
self->operation_limit = SIZE_MAX;
|
||||
self->old_tree = NULL;
|
||||
ts_parser__set_cached_token(self, 0, NULL, NULL);
|
||||
return self;
|
||||
}
|
||||
|
||||
void ts_parser_delete(TSParser *self) {
|
||||
if (self->stack) {
|
||||
ts_stack_delete(self->stack);
|
||||
}
|
||||
ts_stack_delete(self->stack);
|
||||
if (self->reduce_actions.contents) {
|
||||
array_delete(&self->reduce_actions);
|
||||
}
|
||||
if (self->old_tree) {
|
||||
ts_subtree_release(&self->tree_pool, self->old_tree);
|
||||
self->old_tree = NULL;
|
||||
}
|
||||
ts_parser__set_cached_token(self, 0, NULL, NULL);
|
||||
ts_subtree_pool_delete(&self->tree_pool);
|
||||
reusable_node_delete(&self->reusable_node);
|
||||
ts_parser_set_language(self, NULL);
|
||||
|
|
@ -1430,8 +1419,43 @@ void ts_parser_set_operation_limit(TSParser *self, size_t limit) {
|
|||
self->operation_limit = limit;
|
||||
}
|
||||
|
||||
TSTree *ts_parser_resume(TSParser *self) {
|
||||
if (!self->language || !self->lexer.input.read) return NULL;
|
||||
void ts_parser_reset(TSParser *self) {
|
||||
if (self->language->external_scanner.deserialize) {
|
||||
self->language->external_scanner.deserialize(self->external_scanner_payload, NULL, 0);
|
||||
}
|
||||
|
||||
if (self->old_tree) {
|
||||
ts_subtree_release(&self->tree_pool, self->old_tree);
|
||||
self->old_tree = NULL;
|
||||
}
|
||||
|
||||
reusable_node_clear(&self->reusable_node);
|
||||
ts_lexer_reset(&self->lexer, length_zero());
|
||||
ts_stack_clear(self->stack);
|
||||
ts_parser__set_cached_token(self, 0, NULL, NULL);
|
||||
if (self->finished_tree) {
|
||||
ts_subtree_release(&self->tree_pool, self->finished_tree);
|
||||
self->finished_tree = NULL;
|
||||
}
|
||||
self->accept_count = 0;
|
||||
}
|
||||
|
||||
TSTree *ts_parser_parse(TSParser *self, const TSTree *old_tree, TSInput input) {
|
||||
if (!self->language || !input.read) return NULL;
|
||||
|
||||
ts_lexer_set_input(&self->lexer, input);
|
||||
|
||||
if (ts_parser_has_outstanding_parse(self)) {
|
||||
LOG("resume_parsing");
|
||||
} else if (old_tree) {
|
||||
ts_subtree_retain(old_tree->root);
|
||||
self->old_tree = old_tree->root;
|
||||
reusable_node_reset(&self->reusable_node, old_tree->root);
|
||||
LOG("parse_after_edit");
|
||||
} else {
|
||||
reusable_node_clear(&self->reusable_node);
|
||||
LOG("new_parse");
|
||||
}
|
||||
|
||||
uint32_t position = 0, last_position = 0, version_count = 0;
|
||||
size_t operation_count = 0;
|
||||
|
|
@ -1440,8 +1464,8 @@ TSTree *ts_parser_resume(TSParser *self) {
|
|||
for (StackVersion version = 0;
|
||||
version_count = ts_stack_version_count(self->stack), version < version_count;
|
||||
version++) {
|
||||
operation_count++;
|
||||
if (operation_count > self->operation_limit || !self->enabled) return NULL;
|
||||
operation_count++;
|
||||
|
||||
bool allow_node_reuse = version_count == 1;
|
||||
while (ts_stack_is_active(self->stack, version)) {
|
||||
|
|
@ -1477,18 +1501,10 @@ TSTree *ts_parser_resume(TSParser *self) {
|
|||
|
||||
TSTree *result = ts_tree_new(self->finished_tree, self->language);
|
||||
self->finished_tree = NULL;
|
||||
ts_stack_clear(self->stack);
|
||||
ts_parser__set_cached_token(self, 0, NULL, NULL);
|
||||
ts_lexer_set_input(&self->lexer, (TSInput) { NULL, NULL, 0 });
|
||||
ts_parser_reset(self);
|
||||
return result;
|
||||
}
|
||||
|
||||
TSTree *ts_parser_parse(TSParser *self, const TSTree *old_tree, TSInput input) {
|
||||
if (!self->language) return NULL;
|
||||
ts_parser__start(self, input, old_tree ? old_tree->root : NULL);
|
||||
return ts_parser_resume(self);
|
||||
}
|
||||
|
||||
TSTree *ts_parser_parse_string(TSParser *self, const TSTree *old_tree,
|
||||
const char *string, uint32_t length) {
|
||||
TSStringInput input = {string, length};
|
||||
|
|
|
|||
|
|
@ -15,14 +15,18 @@ static inline ReusableNode reusable_node_new() {
|
|||
return (ReusableNode) {array_new(), NULL};
|
||||
}
|
||||
|
||||
static inline void reusable_node_reset(ReusableNode *self, const Subtree *tree) {
|
||||
static inline void reusable_node_clear(ReusableNode *self) {
|
||||
array_clear(&self->stack);
|
||||
self->last_external_token = NULL;
|
||||
}
|
||||
|
||||
static inline void reusable_node_reset(ReusableNode *self, const Subtree *tree) {
|
||||
reusable_node_clear(self);
|
||||
array_push(&self->stack, ((StackEntry) {
|
||||
.tree = tree,
|
||||
.child_index = 0,
|
||||
.byte_offset = 0,
|
||||
}));
|
||||
self->last_external_token = NULL;
|
||||
}
|
||||
|
||||
static inline const Subtree *reusable_node_tree(ReusableNode *self) {
|
||||
|
|
|
|||
|
|
@ -680,7 +680,7 @@ describe("Parser", [&]() {
|
|||
&state,
|
||||
[](void *payload, uint32_t byte, TSPoint position, uint32_t *bytes_read) {
|
||||
InputState *state = static_cast<InputState *>(payload);
|
||||
assert(state->read_count++ <= 10);
|
||||
assert(state->read_count++ <= 11);
|
||||
*bytes_read = strlen(state->string);
|
||||
return state->string;
|
||||
},
|
||||
|
|
@ -694,27 +694,78 @@ describe("Parser", [&]() {
|
|||
state.read_count = 0;
|
||||
state.string = "";
|
||||
|
||||
tree = ts_parser_resume(parser);
|
||||
tree = ts_parser_parse(parser, nullptr, infinite_input);
|
||||
AssertThat(tree, !Equals<TSTree *>(nullptr));
|
||||
ts_tree_delete(tree);
|
||||
});
|
||||
});
|
||||
|
||||
describe("resume()", [&]() {
|
||||
it("does nothing unless parsing was previously halted", [&]() {
|
||||
it("retains the old tree even if the parser halts before finishing parsing", [&]() {
|
||||
ts_parser_set_language(parser, load_real_language("json"));
|
||||
|
||||
TSTree *tree = ts_parser_resume(parser);
|
||||
AssertThat(tree, Equals<TSTree *>(nullptr));
|
||||
tree = ts_parser_resume(parser);
|
||||
SpyInput input("[1234, 5, 6, 4, 5]", 3);
|
||||
tree = ts_parser_parse(parser, nullptr, input.input());
|
||||
assert_root_node("(value (array (number) (number) (number) (number) (number)))");
|
||||
|
||||
input.clear();
|
||||
TSInputEdit edit = input.replace(1, 4, "null");
|
||||
ts_tree_edit(tree, &edit);
|
||||
|
||||
ts_parser_set_operation_limit(parser, 1);
|
||||
TSTree *new_tree = ts_parser_parse(parser, tree, input.input());
|
||||
AssertThat(new_tree, Equals<TSTree *>(nullptr));
|
||||
|
||||
ts_tree_delete(tree);
|
||||
ts_parser_set_operation_limit(parser, SIZE_MAX);
|
||||
tree = ts_parser_parse(parser, nullptr, input.input());
|
||||
assert_root_node("(value (array (null) (number) (number) (number) (number)))");
|
||||
|
||||
AssertThat(input.strings_read(), Equals(vector<string>({
|
||||
"[null,",
|
||||
})));
|
||||
});
|
||||
|
||||
it("does not leak the old tree if parsing halts and never finishes", [&]() {
|
||||
ts_parser_set_language(parser, load_real_language("json"));
|
||||
|
||||
SpyInput input("[1234, 5, 6, 4, 5]", 3);
|
||||
tree = ts_parser_parse(parser, nullptr, input.input());
|
||||
assert_root_node("(value (array (number) (number) (number) (number) (number)))");
|
||||
|
||||
input.clear();
|
||||
TSInputEdit edit = input.replace(1, 4, "null");
|
||||
ts_tree_edit(tree, &edit);
|
||||
|
||||
ts_parser_set_operation_limit(parser, 1);
|
||||
TSTree *new_tree = ts_parser_parse(parser, tree, input.input());
|
||||
AssertThat(new_tree, Equals<TSTree *>(nullptr));
|
||||
});
|
||||
});
|
||||
|
||||
describe("reset()", [&]() {
|
||||
it("causes the parser to parse from scratch on the next call to parse, instead of resuming", [&]() {
|
||||
ts_parser_set_language(parser, load_real_language("json"));
|
||||
|
||||
ts_parser_set_operation_limit(parser, 3);
|
||||
tree = ts_parser_parse_string(parser, nullptr, "[1234, 5, 6, 4, 5]", 18);
|
||||
AssertThat(tree, Equals<TSTree *>(nullptr));
|
||||
|
||||
tree = ts_parser_parse_string(parser, nullptr, "true", 4);
|
||||
AssertThat(tree, !Equals<TSTree *>(nullptr));
|
||||
// Without calling reset, the parser continues from where it left off, so
|
||||
// it does not see the changes to the beginning of the source code.
|
||||
ts_parser_set_operation_limit(parser, SIZE_MAX);
|
||||
tree = ts_parser_parse_string(parser, nullptr, "[null, 5, 6, 4, 5]", 18);
|
||||
assert_root_node("(value (array (number) (number) (number) (number) (number)))");
|
||||
ts_tree_delete(tree);
|
||||
|
||||
tree = ts_parser_resume(parser);
|
||||
ts_parser_set_operation_limit(parser, 3);
|
||||
tree = ts_parser_parse_string(parser, nullptr, "[1234, 5, 6, 4, 5]", 18);
|
||||
AssertThat(tree, Equals<TSTree *>(nullptr));
|
||||
|
||||
// By calling reset, we force the parser to start over from scratch so
|
||||
// that it sees the changes to the beginning of the source code.
|
||||
ts_parser_set_operation_limit(parser, SIZE_MAX);
|
||||
ts_parser_reset(parser);
|
||||
tree = ts_parser_parse_string(parser, nullptr, "[null, 5, 6, 4, 5]", 18);
|
||||
assert_root_node("(value (array (null) (number) (number) (number) (number)))");
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue