diff --git a/include/tree_sitter/runtime.h b/include/tree_sitter/runtime.h index 9c38f8fb..5ee8b2e7 100644 --- a/include/tree_sitter/runtime.h +++ b/include/tree_sitter/runtime.h @@ -89,7 +89,7 @@ bool ts_parser_enabled(const TSParser *); void ts_parser_set_enabled(TSParser *, bool); size_t ts_parser_operation_limit(const TSParser *); void ts_parser_set_operation_limit(TSParser *, size_t); -TSTree *ts_parser_resume(TSParser *); +void ts_parser_reset(TSParser *); TSTree *ts_tree_copy(const TSTree *); void ts_tree_delete(TSTree *); diff --git a/src/runtime/lexer.c b/src/runtime/lexer.c index 26d97f75..e78a0778 100644 --- a/src/runtime/lexer.c +++ b/src/runtime/lexer.c @@ -45,8 +45,7 @@ static void ts_lexer__get_lookahead(Lexer *self) { } else { self->lookahead_size = lookahead_size; } - } - else { + } else { self->lookahead_size = utf16_iterate(chunk, size, &self->data.lookahead); } } @@ -109,7 +108,7 @@ static uint32_t ts_lexer__get_column(void *payload) { // parsers can call them without needing to be linked against this library. void ts_lexer_init(Lexer *self) { - *self = (Lexer){ + *self = (Lexer) { .data = { .advance = ts_lexer__advance, .mark_end = ts_lexer__mark_end, @@ -127,33 +126,30 @@ void ts_lexer_init(Lexer *self) { ts_lexer_reset(self, length_zero()); } -static inline void ts_lexer__reset(Lexer *self, Length position) { - self->token_start_position = position; - self->token_end_position = LENGTH_UNDEFINED; - self->current_position = position; - - if (self->chunk && (position.bytes < self->chunk_start || - position.bytes >= self->chunk_start + self->chunk_size)) { - self->chunk = 0; - self->chunk_start = 0; - self->chunk_size = 0; - } - - self->lookahead_size = 0; - self->data.lookahead = 0; -} - void ts_lexer_set_input(Lexer *self, TSInput input) { self->input = input; + self->data.lookahead = 0; + self->lookahead_size = 0; self->chunk = 0; self->chunk_start = 0; self->chunk_size = 0; - ts_lexer__reset(self, length_zero()); } void ts_lexer_reset(Lexer *self, Length position) { if (position.bytes != self->current_position.bytes) { - ts_lexer__reset(self, position); + self->token_start_position = position; + self->token_end_position = LENGTH_UNDEFINED; + self->current_position = position; + + if (self->chunk && (position.bytes < self->chunk_start || + position.bytes >= self->chunk_start + self->chunk_size)) { + self->chunk = 0; + self->chunk_start = 0; + self->chunk_size = 0; + } + + self->lookahead_size = 0; + self->data.lookahead = 0; } } diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 7e0e6184..ed854ea0 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -61,7 +61,7 @@ struct TSParser { size_t operation_limit; volatile bool enabled; bool halt_on_error; - + const Subtree *old_tree; }; typedef struct { @@ -710,28 +710,6 @@ static StackSliceArray ts_parser__reduce(TSParser *self, StackVersion version, T return pop; } -static void ts_parser__start(TSParser *self, TSInput input, const Subtree *previous_tree) { - if (previous_tree) { - LOG("parse_after_edit"); - } else { - LOG("new_parse"); - } - - if (self->language->external_scanner.deserialize) { - self->language->external_scanner.deserialize(self->external_scanner_payload, NULL, 0); - } - - ts_lexer_set_input(&self->lexer, input); - ts_stack_clear(self->stack); - ts_parser__set_cached_token(self, 0, NULL, NULL); - reusable_node_reset(&self->reusable_node, previous_tree); - if (self->finished_tree) { - ts_subtree_release(&self->tree_pool, self->finished_tree); - self->finished_tree = NULL; - } - self->accept_count = 0; -} - static void ts_parser__accept(TSParser *self, StackVersion version, const Subtree *lookahead) { assert(lookahead->symbol == ts_builtin_sym_end); ts_stack_push(self->stack, version, lookahead, false, 1); @@ -1345,6 +1323,13 @@ static unsigned ts_parser__condense_stack(TSParser *self) { return min_error_cost; } +static bool ts_parser_has_outstanding_parse(TSParser *self) { + return ( + self->lexer.current_position.bytes > 0 || + ts_stack_state(self->stack, 0) != 1 + ); +} + // Parser - Public TSParser *ts_parser_new() { @@ -1360,17 +1345,21 @@ TSParser *ts_parser_new() { self->halt_on_error = false; self->enabled = true; self->operation_limit = SIZE_MAX; + self->old_tree = NULL; ts_parser__set_cached_token(self, 0, NULL, NULL); return self; } void ts_parser_delete(TSParser *self) { - if (self->stack) { - ts_stack_delete(self->stack); - } + ts_stack_delete(self->stack); if (self->reduce_actions.contents) { array_delete(&self->reduce_actions); } + if (self->old_tree) { + ts_subtree_release(&self->tree_pool, self->old_tree); + self->old_tree = NULL; + } + ts_parser__set_cached_token(self, 0, NULL, NULL); ts_subtree_pool_delete(&self->tree_pool); reusable_node_delete(&self->reusable_node); ts_parser_set_language(self, NULL); @@ -1430,8 +1419,43 @@ void ts_parser_set_operation_limit(TSParser *self, size_t limit) { self->operation_limit = limit; } -TSTree *ts_parser_resume(TSParser *self) { - if (!self->language || !self->lexer.input.read) return NULL; +void ts_parser_reset(TSParser *self) { + if (self->language->external_scanner.deserialize) { + self->language->external_scanner.deserialize(self->external_scanner_payload, NULL, 0); + } + + if (self->old_tree) { + ts_subtree_release(&self->tree_pool, self->old_tree); + self->old_tree = NULL; + } + + reusable_node_clear(&self->reusable_node); + ts_lexer_reset(&self->lexer, length_zero()); + ts_stack_clear(self->stack); + ts_parser__set_cached_token(self, 0, NULL, NULL); + if (self->finished_tree) { + ts_subtree_release(&self->tree_pool, self->finished_tree); + self->finished_tree = NULL; + } + self->accept_count = 0; +} + +TSTree *ts_parser_parse(TSParser *self, const TSTree *old_tree, TSInput input) { + if (!self->language || !input.read) return NULL; + + ts_lexer_set_input(&self->lexer, input); + + if (ts_parser_has_outstanding_parse(self)) { + LOG("resume_parsing"); + } else if (old_tree) { + ts_subtree_retain(old_tree->root); + self->old_tree = old_tree->root; + reusable_node_reset(&self->reusable_node, old_tree->root); + LOG("parse_after_edit"); + } else { + reusable_node_clear(&self->reusable_node); + LOG("new_parse"); + } uint32_t position = 0, last_position = 0, version_count = 0; size_t operation_count = 0; @@ -1440,8 +1464,8 @@ TSTree *ts_parser_resume(TSParser *self) { for (StackVersion version = 0; version_count = ts_stack_version_count(self->stack), version < version_count; version++) { - operation_count++; if (operation_count > self->operation_limit || !self->enabled) return NULL; + operation_count++; bool allow_node_reuse = version_count == 1; while (ts_stack_is_active(self->stack, version)) { @@ -1477,18 +1501,10 @@ TSTree *ts_parser_resume(TSParser *self) { TSTree *result = ts_tree_new(self->finished_tree, self->language); self->finished_tree = NULL; - ts_stack_clear(self->stack); - ts_parser__set_cached_token(self, 0, NULL, NULL); - ts_lexer_set_input(&self->lexer, (TSInput) { NULL, NULL, 0 }); + ts_parser_reset(self); return result; } -TSTree *ts_parser_parse(TSParser *self, const TSTree *old_tree, TSInput input) { - if (!self->language) return NULL; - ts_parser__start(self, input, old_tree ? old_tree->root : NULL); - return ts_parser_resume(self); -} - TSTree *ts_parser_parse_string(TSParser *self, const TSTree *old_tree, const char *string, uint32_t length) { TSStringInput input = {string, length}; diff --git a/src/runtime/reusable_node.h b/src/runtime/reusable_node.h index 5575012b..28186525 100644 --- a/src/runtime/reusable_node.h +++ b/src/runtime/reusable_node.h @@ -15,14 +15,18 @@ static inline ReusableNode reusable_node_new() { return (ReusableNode) {array_new(), NULL}; } -static inline void reusable_node_reset(ReusableNode *self, const Subtree *tree) { +static inline void reusable_node_clear(ReusableNode *self) { array_clear(&self->stack); + self->last_external_token = NULL; +} + +static inline void reusable_node_reset(ReusableNode *self, const Subtree *tree) { + reusable_node_clear(self); array_push(&self->stack, ((StackEntry) { .tree = tree, .child_index = 0, .byte_offset = 0, })); - self->last_external_token = NULL; } static inline const Subtree *reusable_node_tree(ReusableNode *self) { diff --git a/test/runtime/parser_test.cc b/test/runtime/parser_test.cc index 37942971..b9a41715 100644 --- a/test/runtime/parser_test.cc +++ b/test/runtime/parser_test.cc @@ -680,7 +680,7 @@ describe("Parser", [&]() { &state, [](void *payload, uint32_t byte, TSPoint position, uint32_t *bytes_read) { InputState *state = static_cast(payload); - assert(state->read_count++ <= 10); + assert(state->read_count++ <= 11); *bytes_read = strlen(state->string); return state->string; }, @@ -694,27 +694,78 @@ describe("Parser", [&]() { state.read_count = 0; state.string = ""; - tree = ts_parser_resume(parser); + tree = ts_parser_parse(parser, nullptr, infinite_input); AssertThat(tree, !Equals(nullptr)); ts_tree_delete(tree); }); - }); - describe("resume()", [&]() { - it("does nothing unless parsing was previously halted", [&]() { + it("retains the old tree even if the parser halts before finishing parsing", [&]() { ts_parser_set_language(parser, load_real_language("json")); - TSTree *tree = ts_parser_resume(parser); - AssertThat(tree, Equals(nullptr)); - tree = ts_parser_resume(parser); + SpyInput input("[1234, 5, 6, 4, 5]", 3); + tree = ts_parser_parse(parser, nullptr, input.input()); + assert_root_node("(value (array (number) (number) (number) (number) (number)))"); + + input.clear(); + TSInputEdit edit = input.replace(1, 4, "null"); + ts_tree_edit(tree, &edit); + + ts_parser_set_operation_limit(parser, 1); + TSTree *new_tree = ts_parser_parse(parser, tree, input.input()); + AssertThat(new_tree, Equals(nullptr)); + + ts_tree_delete(tree); + ts_parser_set_operation_limit(parser, SIZE_MAX); + tree = ts_parser_parse(parser, nullptr, input.input()); + assert_root_node("(value (array (null) (number) (number) (number) (number)))"); + + AssertThat(input.strings_read(), Equals(vector({ + "[null,", + }))); + }); + + it("does not leak the old tree if parsing halts and never finishes", [&]() { + ts_parser_set_language(parser, load_real_language("json")); + + SpyInput input("[1234, 5, 6, 4, 5]", 3); + tree = ts_parser_parse(parser, nullptr, input.input()); + assert_root_node("(value (array (number) (number) (number) (number) (number)))"); + + input.clear(); + TSInputEdit edit = input.replace(1, 4, "null"); + ts_tree_edit(tree, &edit); + + ts_parser_set_operation_limit(parser, 1); + TSTree *new_tree = ts_parser_parse(parser, tree, input.input()); + AssertThat(new_tree, Equals(nullptr)); + }); + }); + + describe("reset()", [&]() { + it("causes the parser to parse from scratch on the next call to parse, instead of resuming", [&]() { + ts_parser_set_language(parser, load_real_language("json")); + + ts_parser_set_operation_limit(parser, 3); + tree = ts_parser_parse_string(parser, nullptr, "[1234, 5, 6, 4, 5]", 18); AssertThat(tree, Equals(nullptr)); - tree = ts_parser_parse_string(parser, nullptr, "true", 4); - AssertThat(tree, !Equals(nullptr)); + // Without calling reset, the parser continues from where it left off, so + // it does not see the changes to the beginning of the source code. + ts_parser_set_operation_limit(parser, SIZE_MAX); + tree = ts_parser_parse_string(parser, nullptr, "[null, 5, 6, 4, 5]", 18); + assert_root_node("(value (array (number) (number) (number) (number) (number)))"); ts_tree_delete(tree); - tree = ts_parser_resume(parser); + ts_parser_set_operation_limit(parser, 3); + tree = ts_parser_parse_string(parser, nullptr, "[1234, 5, 6, 4, 5]", 18); AssertThat(tree, Equals(nullptr)); + + // By calling reset, we force the parser to start over from scratch so + // that it sees the changes to the beginning of the source code. + ts_parser_set_operation_limit(parser, SIZE_MAX); + ts_parser_reset(parser); + tree = ts_parser_parse_string(parser, nullptr, "[null, 5, 6, 4, 5]", 18); + assert_root_node("(value (array (null) (number) (number) (number) (number)))"); }); }); });