From c3a242740bb3d91ac3e17a5fb5f0d7abad92ee72 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 1 Jul 2016 15:08:19 -0700 Subject: [PATCH] Allow lookahead to be broken down further after performing reductions --- spec/runtime/parser_spec.cc | 2 +- src/runtime/parser.c | 252 ++++++++++++++++++++++-------------- src/runtime/parser.h | 8 ++ 3 files changed, 164 insertions(+), 98 deletions(-) diff --git a/spec/runtime/parser_spec.cc b/spec/runtime/parser_spec.cc index ff6b05fe..888a229c 100644 --- a/spec/runtime/parser_spec.cc +++ b/spec/runtime/parser_spec.cc @@ -224,7 +224,7 @@ describe("Parser", [&]() { "(identifier) " "(math_op (number) (identifier)))))"); - insert_text(strlen("x ^ (100 + abc"), ".d"); + insert_text(strlen("x * (100 + abc"), ".d"); assert_root_node( "(program (expression_statement (math_op " diff --git a/src/runtime/parser.c b/src/runtime/parser.c index d102c682..1dba1876 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -49,11 +49,6 @@ static const unsigned ERROR_COST_THRESHOLD = 3; -typedef struct { - TSTree *tree; - size_t char_index; -} ReusableNode; - typedef struct { TSParser *parser; TSSymbol lookahead_symbol; @@ -112,8 +107,6 @@ static BreakdownResult ts_parser__breakdown_top_of_stack(TSParser *self, TSStateId state = ts_stack_top_state(self->stack, slice.version); TSTree *parent = *array_front(&slice.trees); - LOG("breakdown_top_of_stack tree:%s", SYM_NAME(parent->symbol)); - for (size_t j = 0; j < parent->child_count; j++) { TSTree *child = parent->children[j]; pending = child->child_count > 0; @@ -136,6 +129,9 @@ static BreakdownResult ts_parser__breakdown_top_of_stack(TSParser *self, CHECK(ts_parser__push(self, slice.version, tree, state)); } + LOG("breakdown_top_of_stack tree:%s", SYM_NAME(parent->symbol)); + LOG_STACK(); + ts_tree_release(parent); array_delete(&slice.trees); } @@ -197,41 +193,31 @@ static bool ts_parser__condense_stack(TSParser *self) { return result; } -static bool ts_parser__can_reuse(TSParser *self, StackVersion version, - TSTree *tree) { - if (tree->symbol == ts_builtin_sym_error) { - LOG("cant_reuse_error tree:%s", SYM_NAME(tree->symbol)); - return false; - } - - if (tree->has_changes) { - LOG("cant_reuse_changed tree:%s", SYM_NAME(tree->symbol)); - return false; - } - - TSStateId state = ts_stack_top_state(self->stack, version); +static bool ts_parser__can_reuse(TSParser *self, TSStateId state, + TableEntry *table_entry, TSTree *tree) { if (tree->parse_state != state) { if (ts_tree_is_fragile(tree)) { - LOG("cant_reuse_fragile sym:%s", SYM_NAME(tree->symbol)); + LOG("cant_reuse_fragile sym:%s, size:%lu", SYM_NAME(tree->symbol), + tree->size.chars); return false; } - TableEntry entry; - ts_language_table_entry(self->language, state, tree->symbol, &entry); - - if (!entry.is_reusable) { - LOG("cant_reuse_ambiguous sym:%s", SYM_NAME(tree->symbol)); + if (!table_entry->is_reusable) { + LOG("cant_reuse_ambiguous sym:%s, size:%lu", SYM_NAME(tree->symbol), + tree->size.chars); return false; } - if (entry.action_count == 0) { - LOG("cant_reuse_unexpected sym:%s", SYM_NAME(tree->symbol)); + if (table_entry->action_count == 0) { + LOG("cant_reuse_unexpected sym:%s, size:%lu", SYM_NAME(tree->symbol), + tree->size.chars); return false; } - TSParseAction action = entry.actions[entry.action_count - 1]; + TSParseAction action = table_entry->actions[table_entry->action_count - 1]; if (tree->extra != action.extra) { - LOG("cant_reuse_extra sym:%s", SYM_NAME(tree->symbol)); + LOG("cant_reuse_extra sym:%s, size:%lu", SYM_NAME(tree->symbol), + tree->size.chars); return false; } @@ -243,24 +229,28 @@ static bool ts_parser__can_reuse(TSParser *self, StackVersion version, &leaf_entry); if (!leaf_entry.is_reusable) { - LOG("cant_reuse_first_leaf sym:%s, leaf_sym:%s", - SYM_NAME(tree->symbol), SYM_NAME(tree->first_leaf.symbol)); + LOG("cant_reuse_first_leaf sym:%s, leaf_sym:%s, size:%lu", + SYM_NAME(tree->symbol), SYM_NAME(tree->first_leaf.symbol), + tree->size.chars); return false; } if (tree->child_count == 1 && leaf_entry.depends_on_lookahead) { - LOG("cant_reuse_lookahead_dependent sym:%s, leaf_sym:%s", - SYM_NAME(tree->symbol), SYM_NAME(tree->first_leaf.symbol)); + LOG("cant_reuse_lookahead_dependent sym:%s, leaf_sym:%s, size:%lu", + SYM_NAME(tree->symbol), SYM_NAME(tree->first_leaf.symbol), + tree->size.chars); return false; } - } else if (entry.depends_on_lookahead) { - LOG("cant_reuse_lookahead_dependent sym:%s, leaf_sym:%s", - SYM_NAME(tree->symbol), SYM_NAME(tree->first_leaf.symbol)); + } else if (table_entry->depends_on_lookahead) { + LOG("cant_reuse_lookahead_dependent sym:%s, leaf_sym:%s, size:%lu", + SYM_NAME(tree->symbol), SYM_NAME(tree->first_leaf.symbol), + tree->size.chars); return false; } } } + LOG("reuse sym:%s size:%lu", SYM_NAME(tree->symbol), tree->size.chars); return true; } @@ -298,22 +288,45 @@ static TSTree *ts_parser__lex(TSParser *self, TSStateId parse_state, return result; } +static void ts_parser__clear_cached_token(TSParser *self) { + ts_tree_release(self->cached_token); + self->cached_token = NULL; +} + static TSTree *ts_parser__get_lookahead(TSParser *self, StackVersion version, - ReusableNode *reusable_node) { + ReusableNode *reusable_node, + bool *is_fresh) { TSLength position = ts_stack_top_position(self->stack, version); while (reusable_node->tree) { if (reusable_node->char_index > position.chars) { + LOG("before_reusable sym:%s, pos:%lu", + SYM_NAME(reusable_node->tree->symbol), reusable_node->char_index); break; } if (reusable_node->char_index < position.chars) { - LOG("past_reusable sym:%s", SYM_NAME(reusable_node->tree->symbol)); + LOG("past_reusable sym:%s, pos:%lu", + SYM_NAME(reusable_node->tree->symbol), reusable_node->char_index); ts_parser__pop_reusable_node(reusable_node); continue; } - if (!ts_parser__can_reuse(self, version, reusable_node->tree)) { + if (reusable_node->tree->symbol == ts_builtin_sym_error) { + LOG("cant_reuse_error sym:%s, size:%lu", + SYM_NAME(reusable_node->tree->symbol), + reusable_node->tree->size.chars); + if (!ts_parser__breakdown_reusable_node(reusable_node)) { + ts_parser__pop_reusable_node(reusable_node); + CHECK(ts_parser__breakdown_top_of_stack(self, version)); + } + continue; + } + + if (reusable_node->tree->has_changes) { + LOG("cant_reuse_changed tree:%s, size:%lu", + SYM_NAME(reusable_node->tree->symbol), + reusable_node->tree->size.chars); if (!ts_parser__breakdown_reusable_node(reusable_node)) { ts_parser__pop_reusable_node(reusable_node); CHECK(ts_parser__breakdown_top_of_stack(self, version)); @@ -322,17 +335,20 @@ static TSTree *ts_parser__get_lookahead(TSParser *self, StackVersion version, } TSTree *result = reusable_node->tree; - TSLength size = ts_tree_total_size(result); - LOG("reuse sym:%s size:%lu extra:%d", SYM_NAME(result->symbol), size.chars, - result->extra); - ts_parser__pop_reusable_node(reusable_node); ts_tree_retain(result); + *is_fresh = false; return result; } + if (self->cached_token && position.chars == self->cached_token_char_index) { + ts_tree_retain(self->cached_token); + return self->cached_token; + } + ts_lexer_reset(&self->lexer, position); TSStateId parse_state = ts_stack_top_state(self->stack, version); bool error_mode = parse_state == TS_STATE_ERROR; + *is_fresh = true; return ts_parser__lex(self, parse_state, error_mode); error: @@ -784,7 +800,8 @@ static void ts_parser__start(TSParser *self, TSInput input, ts_lexer_set_input(&self->lexer, input); ts_stack_clear(self->stack); - + self->reusable_node = (ReusableNode){ previous_tree, 0 }; + self->cached_token = NULL; self->finished_tree = NULL; } @@ -835,8 +852,9 @@ error: } static bool ts_parser__handle_error(TSParser *self, StackVersion version, - TSStateId state, TSTree *lookahead) { + TSTree *lookahead) { size_t previous_version_count = ts_stack_version_count(self->stack); + TSStateId state = ts_stack_top_state(self->stack, version); unsigned error_cost = ts_stack_error_cost(self->stack, version); unsigned error_depth = ts_stack_error_depth(self->stack, version) + 1; @@ -944,19 +962,34 @@ error: return false; } -static bool ts_parser__consume_lookahead(TSParser *self, StackVersion version, - TSTree *lookahead) { +typedef enum { + ConsumeFailed, + ConsumeInvalid, + ConsumeSucceeded, +} ConsumeResult; + +static ConsumeResult ts_parser__consume_lookahead(TSParser *self, + StackVersion version, + TSTree *lookahead, + bool lookahead_is_fresh) { for (;;) { TSStateId state = ts_stack_top_state(self->stack, version); + + TableEntry entry; + ts_language_table_entry(self->language, state, lookahead->symbol, &entry); + + if (!lookahead_is_fresh && + !ts_parser__can_reuse(self, state, &entry, lookahead)) + return ConsumeInvalid; + + LOG("lookahead sym:%s, size:%lu", SYM_NAME(lookahead->symbol), + lookahead->size.chars); + bool reduction_stopped_at_error = false; StackVersion last_reduction_version = STACK_VERSION_NONE; - size_t action_count; - const TSParseAction *actions = ts_language_actions( - self->language, state, lookahead->symbol, &action_count); - - for (size_t i = 0; i < action_count; i++) { - TSParseAction action = actions[i]; + for (size_t i = 0; i < entry.action_count; i++) { + TSParseAction action = entry.actions[i]; switch (action.type) { case TSParseActionTypeShift: { @@ -971,7 +1004,7 @@ static bool ts_parser__consume_lookahead(TSParser *self, StackVersion version, CHECK(ts_parser__shift(self, version, next_state, lookahead, action.extra)); - return true; + return ConsumeSucceeded; } case TSParseActionTypeReduce: { @@ -999,7 +1032,8 @@ static bool ts_parser__consume_lookahead(TSParser *self, StackVersion version, case ReduceStoppedAtError: { reduction_stopped_at_error = true; switch (ts_parser__repair_error(self, reduction.slice, lookahead, - actions, action_count)) { + entry.actions, + entry.action_count)) { case RepairFailed: goto error; case RepairNoneFound: @@ -1020,20 +1054,58 @@ static bool ts_parser__consume_lookahead(TSParser *self, StackVersion version, LOG("accept"); CHECK(ts_parser__accept(self, version)); - return true; + return ConsumeSucceeded; } case TSParseActionTypeRecover: { CHECK(ts_parser__recover(self, version, action.to_state, lookahead)); - return true; + return ConsumeSucceeded; } } - - LOG_STACK(); } - if (last_reduction_version != STACK_VERSION_NONE) { - ts_stack_renumber_version(self->stack, last_reduction_version, version); + if (last_reduction_version == STACK_VERSION_NONE) + return ConsumeInvalid; + + ts_stack_renumber_version(self->stack, last_reduction_version, version); + LOG_STACK(); + } + +error: + return ConsumeFailed; +} + +static bool ts_parser__advance(TSParser *self, StackVersion version, + ReusableNode *reusable_node) { + TSTree *lookahead = NULL; + + for (;;) { + bool lookahead_is_fresh; + CHECK(lookahead = ts_parser__get_lookahead(self, version, reusable_node, + &lookahead_is_fresh)); + + switch (ts_parser__consume_lookahead(self, version, lookahead, + lookahead_is_fresh)) { + case ConsumeFailed: + goto error; + case ConsumeSucceeded: + if (lookahead == reusable_node->tree) + ts_parser__pop_reusable_node(reusable_node); + ts_tree_release(lookahead); + return true; + case ConsumeInvalid: + break; + } + + if (!lookahead_is_fresh) { + if (lookahead == reusable_node->tree) { + if (!ts_parser__breakdown_reusable_node(reusable_node)) + ts_parser__pop_reusable_node(reusable_node); + } else { + ts_parser__clear_cached_token(self); + } + + ts_tree_release(lookahead); continue; } @@ -1041,15 +1113,22 @@ static bool ts_parser__consume_lookahead(TSParser *self, StackVersion version, case BreakdownFailed: goto error; case BreakdownPerformed: - break; + ts_tree_release(lookahead); + continue; case BreakdownAborted: - CHECK(ts_parser__handle_error(self, version, state, lookahead)); - if (ts_stack_is_halted(self->stack, version)) - return true; + break; } + + CHECK(ts_parser__handle_error(self, version, lookahead)); + ts_tree_release(lookahead); + + if (ts_stack_is_halted(self->stack, version)) + return true; } error: + if (lookahead) + ts_tree_release(lookahead); return false; } @@ -1087,16 +1166,14 @@ void ts_parser_destroy(TSParser *self) { TSTree *ts_parser_parse(TSParser *self, TSInput input, TSTree *old_tree) { ts_parser__start(self, input, old_tree); - StackVersion version = 0; - size_t last_position = 0, position = 0; - ReusableNode reusable_node, current_reusable_node = { old_tree, 0 }; - for (;;) { - TSTree *lookahead = NULL; - size_t lookahead_position = 0; + StackVersion version = STACK_VERSION_NONE; + size_t position = 0, last_position = 0; + ReusableNode reusable_node; + do { for (version = 0; version < ts_stack_version_count(self->stack); version++) { - reusable_node = current_reusable_node; + reusable_node = self->reusable_node; last_position = position; while (!ts_stack_is_halted(self->stack, version)) { @@ -1111,44 +1188,25 @@ TSTree *ts_parser_parse(TSParser *self, TSInput input, TSTree *old_tree) { ts_stack_top_position(self->stack, version).rows + 1, ts_stack_top_position(self->stack, version).columns + 1); - if (!lookahead || (position != lookahead_position) || - !ts_parser__can_reuse(self, version, lookahead)) { - ts_tree_release(lookahead); - lookahead = ts_parser__get_lookahead(self, version, &reusable_node); - lookahead_position = position; - CHECK(lookahead); - } - - LOG("lookahead sym:%s, size:%lu", SYM_NAME(lookahead->symbol), - ts_tree_total_chars(lookahead)); - - if (!ts_parser__consume_lookahead(self, version, lookahead)) { - ts_tree_release(lookahead); - goto error; - } - + CHECK(ts_parser__advance(self, version, &reusable_node)); LOG_STACK(); } } - current_reusable_node = reusable_node; + self->reusable_node = reusable_node; if (ts_parser__condense_stack(self)) { LOG("condense"); LOG_STACK(); } - ts_tree_release(lookahead); - - if (version == 0) - break; - else - self->is_split = (version > 1); - } + self->is_split = (version > 1); + } while (version != 0); LOG("done"); LOG_TREE(); ts_stack_clear(self->stack); + ts_parser__clear_cached_token(self); ts_tree_assign_parents(self->finished_tree); return self->finished_tree; diff --git a/src/runtime/parser.h b/src/runtime/parser.h index 9a2f4015..9d827d39 100644 --- a/src/runtime/parser.h +++ b/src/runtime/parser.h @@ -9,6 +9,11 @@ extern "C" { #include "runtime/array.h" #include "runtime/reduce_action.h" +typedef struct { + TSTree *tree; + size_t char_index; +} ReusableNode; + typedef struct { TSLexer lexer; Stack *stack; @@ -18,6 +23,9 @@ typedef struct { bool is_split; bool print_debugging_graphs; TSTree scratch_tree; + TSTree *cached_token; + size_t cached_token_char_index; + ReusableNode reusable_node; } TSParser; bool ts_parser_init(TSParser *);