From 36608180d243163a15705cdfb0b837c6784ed1ac Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Sat, 7 Jan 2017 21:45:28 -0800 Subject: [PATCH] Store external token states in the parse stack --- spec/runtime/parser_spec.cc | 11 ++++ src/runtime/lexer.c | 16 ++---- src/runtime/lexer.h | 3 +- src/runtime/parser.c | 104 +++++++++++++++++++----------------- src/runtime/stack.c | 13 +++-- 5 files changed, 80 insertions(+), 67 deletions(-) diff --git a/spec/runtime/parser_spec.cc b/spec/runtime/parser_spec.cc index 6ac36991..88633f1f 100644 --- a/spec/runtime/parser_spec.cc +++ b/spec/runtime/parser_spec.cc @@ -60,6 +60,11 @@ describe("Parser", [&]() { replace_text(position, length, ""); }; + auto undo = [&]() { + ts_document_edit(document, input->undo()); + ts_document_parse(document); + }; + auto assert_root_node = [&](const string &expected) { TSNode node = ts_document_root_node(document); char *node_string = ts_node_string(node, document); @@ -386,6 +391,12 @@ describe("Parser", [&]() { "(if_statement (identifier) " "(print_statement (identifier)) " "(return_statement (expression_list (identifier)))))"); + + undo(); + assert_root_node("(module " + "(if_statement (identifier) " + "(print_statement (identifier))) " + "(return_statement (expression_list (identifier))))"); }); }); diff --git a/src/runtime/lexer.c b/src/runtime/lexer.c index f7ebf042..902c2d3b 100644 --- a/src/runtime/lexer.c +++ b/src/runtime/lexer.c @@ -88,8 +88,7 @@ void ts_lexer_init(Lexer *self) { .payload = NULL, .log = NULL }, - .needs_to_restore_external_scanner = false, - .last_external_token_end_byte = 0, + .last_external_token_state = NULL, }; ts_lexer_reset(self, length_zero()); } @@ -112,20 +111,11 @@ static inline void ts_lexer__reset(Lexer *self, Length position) { void ts_lexer_set_input(Lexer *self, TSInput input) { self->input = input; ts_lexer__reset(self, length_zero()); - self->needs_to_restore_external_scanner = false; - self->last_external_token_end_byte = 0; + self->last_external_token_state = NULL; } void ts_lexer_reset(Lexer *self, Length position) { - if (position.bytes > self->current_position.bytes) { - self->needs_to_restore_external_scanner = true; - self->last_external_token_end_byte = 0; - ts_lexer__reset(self, position); - } else if (position.bytes < self->current_position.bytes) { - if (position.bytes < self->last_external_token_end_byte) { - self->needs_to_restore_external_scanner = true; - self->last_external_token_end_byte = 0; - } + if (position.bytes != self->current_position.bytes) { ts_lexer__reset(self, position); } } diff --git a/src/runtime/lexer.h b/src/runtime/lexer.h index 76d863c4..67470f6f 100644 --- a/src/runtime/lexer.h +++ b/src/runtime/lexer.h @@ -25,8 +25,7 @@ typedef struct { TSInput input; TSLogger logger; char debug_buffer[TS_DEBUG_BUFFER_SIZE]; - bool needs_to_restore_external_scanner; - uint32_t last_external_token_end_byte; + const TSExternalTokenState *last_external_token_state; } Lexer; void ts_lexer_init(Lexer *); diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 2184c2d5..a29810c4 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -130,13 +130,20 @@ static bool parser__breakdown_lookahead(Parser *self, Tree **lookahead, return result; } +static inline bool ts_lex_mode_eq(TSLexMode self, TSLexMode other) { + return self.lex_state == other.lex_state && + self.external_lex_state == other.external_lex_state; +} + static bool parser__can_reuse(Parser *self, TSStateId state, Tree *tree, TableEntry *table_entry) { TSLexMode current_lex_mode = self->language->lex_modes[state]; - if (tree->first_leaf.lex_mode.lex_state == current_lex_mode.lex_state && - tree->first_leaf.lex_mode.external_lex_state == current_lex_mode.external_lex_state) + if (ts_lex_mode_eq(tree->first_leaf.lex_mode, current_lex_mode)) return true; - if (tree->size.bytes == 0) return false; + if (current_lex_mode.external_lex_state != 0) + return false; + if (tree->size.bytes == 0) + return false; if (!table_entry->is_reusable) return false; if (!table_entry->depends_on_lookahead) @@ -182,53 +189,26 @@ static bool parser__condense_stack(Parser *self) { return result; } -static StackIterateAction parser__restore_external_scanner_callback( - void *payload, TSStateId state, TreeArray *trees, uint32_t tree_count, - bool is_done, bool is_pending) { - Parser *self = payload; - if (tree_count > 0) { - Tree *tree = *array_back(trees); - if (tree->has_external_token_state) { - const TSExternalTokenState *state = ts_tree_last_external_token_state(tree); +static void parser__restore_external_scanner(Parser *self, StackVersion version) { + const TSExternalTokenState *state = ts_stack_external_token_state(self->stack, version); + if (self->lexer.last_external_token_state != state) { + LOG("restore_external_scanner"); + if (state) { self->language->external_scanner.deserialize( self->external_scanner_payload, *state ); - LOG("deserialized_external_scanner"); - return StackIterateStop; + } else { + self->language->external_scanner.reset(self->external_scanner_payload); } } - - if (is_done) { - LOG("no_previous_external_token"); - self->language->external_scanner.reset(self->external_scanner_payload); - return StackIterateStop; - } - - return StackIterateNone; -} - -static void parser__restore_external_scanner(Parser *self, StackVersion version) { - if (!self->lexer.needs_to_restore_external_scanner) return; - LOG("restore_external_scanner"); - StackPopResult pop = ts_stack_iterate(self->stack, version, parser__restore_external_scanner_callback, self); - if (pop.slices.size > 0) { - StackSlice slice = pop.slices.contents[0]; - for (size_t i = 1; i < slice.trees.size; i++) { - Tree *tree = slice.trees.contents[i]; - if (tree->has_external_tokens) { - printf("RE-SCANNING TREE: %s\n", ts_tree_string(tree, self->language, true)); - } - } - ts_tree_array_delete(&slice.trees); - } } static Tree *parser__lex(Parser *self, StackVersion version) { TSStateId parse_state = ts_stack_top_state(self->stack, version); Length start_position = ts_stack_top_position(self->stack, version); TSLexMode lex_mode = self->language->lex_modes[parse_state]; - const bool *external_tokens = ts_language_enabled_external_tokens( + const bool *valid_external_tokens = ts_language_enabled_external_tokens( self->language, lex_mode.external_lex_state ); @@ -243,15 +223,13 @@ static Tree *parser__lex(Parser *self, StackVersion version) { for (;;) { Length current_position = self->lexer.current_position; - if (external_tokens) { + if (valid_external_tokens) { LOG("lex_external state:%d, row:%u, column:%u", lex_mode.external_lex_state, current_position.extent.row, current_position.extent.column); parser__restore_external_scanner(self, version); ts_lexer_start(&self->lexer); if (self->language->external_scanner.scan(self->external_scanner_payload, - &self->lexer.data, external_tokens)) { - self->lexer.last_external_token_end_byte = self->lexer.current_position.bytes; - self->lexer.needs_to_restore_external_scanner = false; + &self->lexer.data, valid_external_tokens)) { found_external_token = true; break; } @@ -269,7 +247,7 @@ static Tree *parser__lex(Parser *self, StackVersion version) { LOG("retry_in_error_mode"); found_error = true; lex_mode = self->language->lex_modes[ERROR_STATE]; - external_tokens = ts_language_enabled_external_tokens( + valid_external_tokens = ts_language_enabled_external_tokens( self->language, lex_mode.external_lex_state ); @@ -303,7 +281,9 @@ static Tree *parser__lex(Parser *self, StackVersion version) { result = ts_tree_make_error(size, padding, first_error_character); } else { TSSymbol symbol = self->lexer.data.result_symbol; - if (found_external_token) symbol = self->language->external_scanner.symbol_map[symbol]; + if (found_external_token) { + symbol = self->language->external_scanner.symbol_map[symbol]; + } Length padding = length_sub(self->lexer.token_start_position, start_position); Length size = length_sub(self->lexer.current_position, self->lexer.token_start_position); @@ -312,10 +292,9 @@ static Tree *parser__lex(Parser *self, StackVersion version) { if (found_external_token) { result->has_external_tokens = true; - if (self->language->external_scanner.serialize(self->external_scanner_payload, result->external_token_state)) { - result->has_external_token_state = true; - self->last_external_token = result; - } + result->has_external_token_state = true; + self->language->external_scanner.serialize(self->external_scanner_payload, result->external_token_state); + self->lexer.last_external_token_state = &result->external_token_state; } } @@ -331,6 +310,17 @@ static void parser__clear_cached_token(Parser *self) { self->cached_token = NULL; } +static inline bool ts_external_token_state_eq(const TSExternalTokenState *self, + const TSExternalTokenState *other) { + if (self == other) { + return true; + } else if (!self || !other) { + return false; + } else { + return memcmp(self, other, sizeof(TSExternalTokenState)) == 0; + } +} + static Tree *parser__get_lookahead(Parser *self, StackVersion version, ReusableNode *reusable_node, bool *is_fresh) { @@ -370,6 +360,20 @@ static Tree *parser__get_lookahead(Parser *self, StackVersion version, continue; } + if (reusable_node->tree->first_leaf.lex_mode.external_lex_state != 0 && + !ts_external_token_state_eq( + reusable_node->preceding_external_token_state, + ts_stack_external_token_state(self->stack, version))) { + LOG("cant_reuse_external_tokens tree:%s, size:%u", + SYM_NAME(reusable_node->tree->symbol), + reusable_node->tree->size.bytes); + if (!reusable_node_breakdown(reusable_node)) { + reusable_node_pop(reusable_node); + parser__breakdown_top_of_stack(self, version); + } + continue; + } + Tree *result = reusable_node->tree; ts_tree_retain(result); return result; @@ -459,6 +463,10 @@ static void parser__shift(Parser *self, StackVersion version, TSStateId state, bool is_pending = lookahead->child_count > 0; ts_stack_push(self->stack, version, lookahead, is_pending, state); + if (lookahead->has_external_token_state) { + ts_stack_set_external_token_state( + self->stack, version, ts_tree_last_external_token_state(lookahead)); + } ts_tree_release(lookahead); } diff --git a/src/runtime/stack.c b/src/runtime/stack.c index 198cce4d..934f70bb 100644 --- a/src/runtime/stack.c +++ b/src/runtime/stack.c @@ -169,11 +169,13 @@ static void stack_node_add_link(StackNode *self, StackLink link) { } static StackVersion ts_stack__add_version(Stack *self, StackNode *node, - unsigned push_count) { + unsigned push_count, + const TSExternalTokenState *external_token_state) { StackHead head = { .node = node, .is_halted = false, .push_count = push_count, + .external_token_state = external_token_state, }; array_push(&self->heads, head); stack_node_retain(node); @@ -181,7 +183,8 @@ static StackVersion ts_stack__add_version(Stack *self, StackNode *node, } static void ts_stack__add_slice(Stack *self, StackNode *node, TreeArray *trees, - unsigned push_count) { + unsigned push_count, + const TSExternalTokenState *external_token_state) { for (uint32_t i = self->slices.size - 1; i + 1 > 0; i--) { StackVersion version = self->slices.contents[i].version; if (self->heads.contents[version].node == node) { @@ -191,7 +194,7 @@ static void ts_stack__add_slice(Stack *self, StackNode *node, TreeArray *trees, } } - StackVersion version = ts_stack__add_version(self, node, push_count); + StackVersion version = ts_stack__add_version(self, node, push_count, external_token_state); StackSlice slice = { *trees, version }; array_push(&self->slices, slice); } @@ -203,6 +206,7 @@ INLINE StackPopResult stack__iter(Stack *self, StackVersion version, StackHead *head = array_get(&self->heads, version); unsigned push_count = head->push_count; + const TSExternalTokenState *external_token_state = head->external_token_state; Iterator iterator = { .node = head->node, .trees = array_new(), @@ -230,7 +234,8 @@ INLINE StackPopResult stack__iter(Stack *self, StackVersion version, if (!should_stop) ts_tree_array_copy(trees, &trees); array_reverse(&trees); - ts_stack__add_slice(self, node, &trees, push_count + iterator->push_count); + ts_stack__add_slice(self, node, &trees, push_count + iterator->push_count, + external_token_state); } if (should_stop) {