From 2bcd2e4d004782da6c0f154f708c94329471799a Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 21 Dec 2015 16:04:11 -0800 Subject: [PATCH] Reuse fragile tokens that came from the current lex state --- include/tree_sitter/parser.h | 1 + src/runtime/lexer.c | 5 ++- src/runtime/parser.c | 72 +++++++++++++++++++++--------------- src/runtime/tree.c | 8 ++-- src/runtime/tree.h | 3 ++ 5 files changed, 54 insertions(+), 35 deletions(-) diff --git a/include/tree_sitter/parser.h b/include/tree_sitter/parser.h index e1da810c..9b29cf60 100644 --- a/include/tree_sitter/parser.h +++ b/include/tree_sitter/parser.h @@ -47,6 +47,7 @@ typedef struct TSLexer { size_t lookahead_size; int32_t lookahead; + TSStateId starting_state; TSInput input; TSDebugger debugger; diff --git a/src/runtime/lexer.c b/src/runtime/lexer.c index d2b69fc0..d859223b 100644 --- a/src/runtime/lexer.c +++ b/src/runtime/lexer.c @@ -45,6 +45,7 @@ static void ts_lexer__start(TSLexer *self, TSStateId lex_state) { LOG("start_lex state:%d, pos:%lu", lex_state, self->current_position.chars); LOG_LOOKAHEAD(); + self->starting_state = lex_state; if (!self->chunk) ts_lexer__get_chunk(self); if (!self->lookahead_size) @@ -101,7 +102,9 @@ static TSTree *ts_lexer__accept(TSLexer *self, TSSymbol symbol, result = ts_tree_make_leaf(symbol, padding, size, metadata); } - result->options.fragile_left = fragile; + if (fragile) + result->context.lex_state = self->starting_state; + return result; } diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 494aada0..4617eb18 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -84,19 +84,26 @@ static void ts_parser__breakdown_top_of_stack(TSParser *self, int head) { } while (last_child->child_count > 0); } +static void ts_parser__pop_reusable_subtree(LookaheadState *state); + /* * Replace the parser's reusable_subtree with its first non-fragile descendant. * Return true if a suitable descendant is found, false otherwise. */ -static bool ts_parser__breakdown_reusable_subtree(LookaheadState *state) { +static void ts_parser__breakdown_reusable_subtree(LookaheadState *state) { do { - if (state->reusable_subtree->symbol == ts_builtin_sym_error) - return false; - if (state->reusable_subtree->child_count == 0) - return false; + if (state->reusable_subtree->symbol == ts_builtin_sym_error) { + ts_parser__pop_reusable_subtree(state); + return; + } + + if (state->reusable_subtree->child_count == 0) { + ts_parser__pop_reusable_subtree(state); + return; + } + state->reusable_subtree = state->reusable_subtree->children[0]; } while (ts_tree_is_fragile(state->reusable_subtree)); - return true; } /* @@ -118,13 +125,30 @@ static void ts_parser__pop_reusable_subtree(LookaheadState *state) { } static bool ts_parser__can_reuse(TSParser *self, int head, TSTree *subtree) { - if (!subtree || subtree->symbol == ts_builtin_sym_error || - ts_tree_is_fragile(subtree)) + if (!subtree) return false; + if (subtree->symbol == ts_builtin_sym_error) + return false; + if (ts_tree_is_fragile(subtree)) + return false; + TSStateId state = ts_stack_top_state(self->stack, head); + + if (subtree->context.lex_state != TSTREE_LEX_STATE_INDEPENDENT) { + TSStateId lex_state = self->language->lex_states[state]; + if (subtree->context.lex_state != lex_state) + return false; + } + const TSParseAction *action = ts_language_actions(self->language, state, subtree->symbol); - return action->type != TSParseActionTypeError && !action->can_hide_split; + if (action->type == TSParseActionTypeError || action->can_hide_split) + return false; + + if (ts_tree_is_extra(subtree) && !action->extra) + return false; + + return true; } /* @@ -142,33 +166,25 @@ static TSTree *ts_parser__get_next_lookahead(TSParser *self, int head) { } if (state->reusable_subtree_pos < position.chars) { - LOG("past_reuse sym:%s", SYM_NAME(state->reusable_subtree->symbol)); + LOG("past_reusable sym:%s", SYM_NAME(state->reusable_subtree->symbol)); ts_parser__pop_reusable_subtree(state); continue; } - bool can_reuse = true; if (ts_tree_has_changes(state->reusable_subtree)) { if (state->is_verifying) { ts_parser__breakdown_top_of_stack(self, head); state->is_verifying = false; } + LOG("breakdown_changed sym:%s", SYM_NAME(state->reusable_subtree->symbol)); - can_reuse = false; - } else if (ts_tree_is_extra(state->reusable_subtree)) { - LOG("breakdown_extra sym:%s", SYM_NAME(state->reusable_subtree->symbol)); - can_reuse = false; - } else if (!ts_parser__can_reuse(self, head, state->reusable_subtree)) { - LOG("breakdown_non_reusable sym:%s", - SYM_NAME(state->reusable_subtree->symbol)); - can_reuse = false; + ts_parser__breakdown_reusable_subtree(state); + continue; } - if (!can_reuse) { - if (!ts_parser__breakdown_reusable_subtree(state)) { - LOG("dont_reuse sym:%s", SYM_NAME(state->reusable_subtree->symbol)); - ts_parser__pop_reusable_subtree(state); - } + if (!ts_parser__can_reuse(self, head, state->reusable_subtree)) { + LOG("breakdown_unreusable sym:%s", SYM_NAME(state->reusable_subtree->symbol)); + ts_parser__breakdown_reusable_subtree(state); continue; } @@ -217,8 +233,6 @@ static TSTree *ts_parser__select_tree(void *data, TSTree *left, TSTree *right) { static bool ts_parser__shift(TSParser *self, int head, TSStateId parse_state, TSTree *lookahead) { - if (self->language->symbol_metadata[lookahead->symbol].extra) - ts_tree_set_fragile(lookahead); if (ts_stack_push(self->stack, head, parse_state, lookahead)) { LOG("merge head:%d", head); vector_erase(&self->lookahead_states, head); @@ -643,10 +657,8 @@ TSTree *ts_parser_parse(TSParser *self, TSInput input, TSTree *previous_tree) { if (!ts_parser__can_reuse(self, head, lookahead) || position.chars != last_position.chars) { - TSTree *reused_lookahead = ts_parser__get_next_lookahead(self, head); - if (ts_parser__can_reuse(self, head, reused_lookahead)) { - lookahead = reused_lookahead; - } else { + lookahead = ts_parser__get_next_lookahead(self, head); + if (!lookahead) { ts_lexer_reset(&self->lexer, position); TSStateId parse_state = ts_stack_top_state(self->stack, head); TSStateId lex_state = self->language->lex_states[parse_state]; diff --git a/src/runtime/tree.c b/src/runtime/tree.c index 6616335f..dc084407 100644 --- a/src/runtime/tree.c +++ b/src/runtime/tree.c @@ -18,10 +18,9 @@ TSTree *ts_tree_make_leaf(TSSymbol sym, TSLength padding, TSLength size, .named_child_count = 0, .children = NULL, .padding = padding, - .options = - { - .visible = metadata.visible, .named = metadata.named, - }, + .options.visible = metadata.visible, + .options.named = metadata.named, + .context.lex_state = TSTREE_LEX_STATE_INDEPENDENT, }; if (sym == ts_builtin_sym_error) { @@ -92,6 +91,7 @@ void ts_tree_set_children(TSTree *self, size_t child_count, TSTree **children) { } if (child_count > 0) { + self->context.lex_state = children[0]->context.lex_state; if (children[0]->options.fragile_left) self->options.fragile_left = true; if (children[child_count - 1]->options.fragile_right) diff --git a/src/runtime/tree.h b/src/runtime/tree.h index 166b7c11..c2693618 100644 --- a/src/runtime/tree.h +++ b/src/runtime/tree.h @@ -9,11 +9,14 @@ extern "C" { #include "tree_sitter/parser.h" #include "runtime/length.h" +#define TSTREE_LEX_STATE_INDEPENDENT (unsigned short)(-1) + struct TSTree { struct { struct TSTree *parent; size_t index; TSLength offset; + TSStateId lex_state; } context; size_t child_count; size_t visible_child_count;