diff --git a/spec/runtime/stack_spec.cc b/spec/runtime/stack_spec.cc index 319830c3..7c825642 100644 --- a/spec/runtime/stack_spec.cc +++ b/spec/runtime/stack_spec.cc @@ -1,15 +1,16 @@ #include "runtime/runtime_spec_helper.h" +#include "runtime/helpers/tree_helpers.h" #include "runtime/stack.h" #include "runtime/tree.h" #include "runtime/length.h" enum { - stateA, stateB, stateC, stateD, stateE, stateF, stateG, stateH + stateA, stateB, stateC, stateD, stateE, stateF, stateG, stateH, stateI, stateJ }; enum { symbol0 = ts_builtin_sym_start, - symbol1, symbol2, symbol3, symbol4, symbol5, symbol6, symbol7 + symbol1, symbol2, symbol3, symbol4, symbol5, symbol6, symbol7, symbol8 }; struct TreeSelectionSpy { @@ -31,7 +32,7 @@ START_TEST describe("Stack", [&]() { Stack *stack; - const size_t tree_count = 8; + const size_t tree_count = 10; TSTree *trees[tree_count]; TreeSelectionSpy tree_selection_spy{0, NULL, {NULL, NULL}}; @@ -43,7 +44,7 @@ describe("Stack", [&]() { TSLength len = ts_length_make(2, 2); for (size_t i = 0; i < tree_count; i++) - trees[i] = ts_tree_make_leaf(ts_builtin_sym_start + i, len, len, {1, 1}, {1, 2}, TSNodeTypeNamed); + trees[i] = ts_tree_make_leaf(i, len, len, {1, 1}, {1, 2}, TSNodeTypeNamed); }); after_each([&]() { @@ -87,8 +88,6 @@ describe("Stack", [&]() { }); describe("popping nodes from the stack", [&]() { - StackPopResultList pop; - before_each([&]() { /* * A0__B1__C2. @@ -102,43 +101,47 @@ describe("Stack", [&]() { /* * A0. */ - pop = ts_stack_pop(stack, 0, 2, false); - AssertThat(pop.size, Equals(1)); - AssertThat(pop.contents[0].tree_count, Equals(2)); - AssertThat(pop.contents[0].trees[0], Equals(trees[1])); - AssertThat(pop.contents[0].trees[1], Equals(trees[2])); + Vector pop = ts_stack_pop(stack, 0, 2, false); + StackPopResult pop1 = *(StackPopResult *)vector_get(&pop, 0); + AssertThat(pop.size, Equals(1)); + AssertThat(pop1.tree_count, Equals(2)); + AssertThat(pop1.trees[0], Equals(trees[1])); + AssertThat(pop1.trees[1], Equals(trees[2])); AssertThat(*ts_stack_head(stack, 0), Equals({trees[0], stateA})); /* * . */ pop = ts_stack_pop(stack, 0, 1, false); - AssertThat(pop.size, Equals(1)); - AssertThat(pop.contents[0].tree_count, Equals(1)); - AssertThat(pop.contents[0].trees[0], Equals(trees[0])); + pop1 = *(StackPopResult *)vector_get(&pop, 0); + AssertThat(pop.size, Equals(1)); + AssertThat(pop1.tree_count, Equals(1)); + AssertThat(pop1.trees[0], Equals(trees[0])); AssertThat(ts_stack_head(stack, 0), Equals(nullptr)); }); it("does not count 'extra' trees toward the count", [&]() { ts_tree_set_extra(trees[1]); - pop = ts_stack_pop(stack, 0, 2, false); - AssertThat(pop.size, Equals(1)); - AssertThat(pop.contents[0].tree_count, Equals(3)); - AssertThat(pop.contents[0].trees[0], Equals(trees[0])); - AssertThat(pop.contents[0].trees[1], Equals(trees[1])); - AssertThat(pop.contents[0].trees[2], Equals(trees[2])); + Vector pop = ts_stack_pop(stack, 0, 2, false); + StackPopResult pop1 = *(StackPopResult *)vector_get(&pop, 0); + AssertThat(pop.size, Equals(1)); + AssertThat(pop1.tree_count, Equals(3)); + AssertThat(pop1.trees[0], Equals(trees[0])); + AssertThat(pop1.trees[1], Equals(trees[1])); + AssertThat(pop1.trees[2], Equals(trees[2])); AssertThat(ts_stack_head(stack, 0), Equals(nullptr)); }); it("pops the entire stack when given a negative count", [&]() { - pop = ts_stack_pop(stack, 0, -1, false); + Vector pop = ts_stack_pop(stack, 0, -1, false); - AssertThat(pop.size, Equals(1)); - AssertThat(pop.contents[0].tree_count, Equals(3)); - AssertThat(pop.contents[0].trees[0], Equals(trees[0])); - AssertThat(pop.contents[0].trees[1], Equals(trees[1])); - AssertThat(pop.contents[0].trees[2], Equals(trees[2])); + AssertThat(pop.size, Equals(1)); + StackPopResult pop1 = *(StackPopResult *)vector_get(&pop, 0); + AssertThat(pop1.tree_count, Equals(3)); + AssertThat(pop1.trees[0], Equals(trees[0])); + AssertThat(pop1.trees[1], Equals(trees[1])); + AssertThat(pop1.trees[2], Equals(trees[2])); }); }); @@ -316,6 +319,9 @@ describe("Stack", [&]() { ts_stack_push(stack, 1, stateE, trees[4]); ts_stack_push(stack, 1, stateF, trees[5]); ts_stack_push(stack, 1, stateG, trees[6]); + + AssertThat(ts_stack_head_count(stack), Equals(1)); + AssertThat(ts_stack_entry_next_count(ts_stack_head(stack, 0)), Equals(2)); }); describe("when there are two paths that lead to two different heads", [&]() { @@ -324,18 +330,18 @@ describe("Stack", [&]() { * A0__B1__C2. * \__E4. */ - StackPopResultList pop = ts_stack_pop(stack, 0, 2, false); + Vector pop = ts_stack_pop(stack, 0, 2, false); - AssertThat(pop.size, Equals(2)); - StackPopResult pop1 = pop.contents[0]; - AssertThat(pop1.index, Equals(0)); - AssertThat(pop1.tree_count, Equals(2)); + AssertThat(pop.size, Equals(2)); + StackPopResult pop1 = *(StackPopResult *)vector_get(&pop, 0); + AssertThat(pop1.head_index, Equals(0)); + AssertThat(pop1.tree_count, Equals(2)); AssertThat(pop1.trees[0], Equals(trees[3])); AssertThat(pop1.trees[1], Equals(trees[6])); - StackPopResult pop2 = pop.contents[1]; - AssertThat(pop2.index, Equals(1)); - AssertThat(pop2.tree_count, Equals(2)); + StackPopResult pop2 = *(StackPopResult *)vector_get(&pop, 1); + AssertThat(pop2.head_index, Equals(1)); + AssertThat(pop2.tree_count, Equals(2)); AssertThat(pop2.trees[0], Equals(trees[5])); AssertThat(pop2.trees[1], Equals(trees[6])); @@ -359,9 +365,9 @@ describe("Stack", [&]() { * A0__B1__C2__D3__G6. * \__E4__F5__/ */ - StackPopResultList pop = ts_stack_pop(stack, 0, 1, false); + Vector pop = ts_stack_pop(stack, 0, 1, false); - AssertThat(pop.size, Equals(1)); + AssertThat(pop.size, Equals(1)); AssertThat(ts_stack_head_count(stack), Equals(1)); }); }); @@ -379,19 +385,21 @@ describe("Stack", [&]() { * A0__B1__C2__D3. * \__E4__F5. */ - StackPopResultList pop = ts_stack_pop(stack, 0, 2, false); + Vector pop = ts_stack_pop(stack, 0, 2, false); AssertThat(ts_stack_head_count(stack), Equals(2)); - AssertThat(pop.size, Equals(2)); - AssertThat(pop.contents[0].index, Equals(0)); - AssertThat(pop.contents[0].tree_count, Equals(2)); - AssertThat(pop.contents[0].trees[0], Equals(trees[6])); - AssertThat(pop.contents[0].trees[1], Equals(trees[7])); + AssertThat(pop.size, Equals(2)); + StackPopResult pop1 = *(StackPopResult *)vector_get(&pop, 0); + AssertThat(pop1.head_index, Equals(0)); + AssertThat(pop1.tree_count, Equals(2)); + AssertThat(pop1.trees[0], Equals(trees[6])); + AssertThat(pop1.trees[1], Equals(trees[7])); - AssertThat(pop.contents[1].index, Equals(1)); - AssertThat(pop.contents[1].tree_count, Equals(2)); - AssertThat(pop.contents[1].trees[0], Equals(trees[6])); - AssertThat(pop.contents[1].trees[1], Equals(trees[7])); + StackPopResult pop2 = *(StackPopResult *)vector_get(&pop, 1); + AssertThat(pop2.head_index, Equals(1)); + AssertThat(pop2.tree_count, Equals(2)); + AssertThat(pop2.trees[0], Equals(trees[6])); + AssertThat(pop2.trees[1], Equals(trees[7])); }); }); @@ -400,17 +408,121 @@ describe("Stack", [&]() { /* * A0__B1. */ - StackPopResultList pop = ts_stack_pop(stack, 0, 3, false); + Vector pop = ts_stack_pop(stack, 0, 3, false); AssertThat(ts_stack_head_count(stack), Equals(1)); AssertThat(*ts_stack_head(stack, 0), Equals({trees[1], stateB})); - AssertThat(pop.size, Equals(2)); - AssertThat(pop.contents[0].tree_count, Equals(3)); - AssertThat(pop.contents[0].index, Equals(0)); - AssertThat(pop.contents[0].trees[0], Equals(trees[2])); - AssertThat(pop.contents[1].tree_count, Equals(3)); - AssertThat(pop.contents[1].index, Equals(0)); - AssertThat(pop.contents[1].trees[0], Equals(trees[4])); + AssertThat(pop.size, Equals(2)); + StackPopResult pop1 = *(StackPopResult *)vector_get(&pop, 0); + AssertThat(pop1.tree_count, Equals(3)); + AssertThat(pop1.head_index, Equals(0)); + AssertThat(pop1.trees[0], Equals(trees[2])); + + StackPopResult pop2 = *(StackPopResult *)vector_get(&pop, 1); + AssertThat(pop2.tree_count, Equals(3)); + AssertThat(pop2.head_index, Equals(0)); + AssertThat(pop2.trees[0], Equals(trees[4])); + }); + }); + }); + + describe("popping from a stack head that has been 3-way merged", [&]() { + before_each([&]() { + /* + * A0__B1__C2__D3__I8__J9. + * \__E4__F5__/ + * \__G6__H7__/ + */ + ts_stack_clear(stack); + ts_stack_push(stack, 0, stateA, trees[0]); + ts_stack_push(stack, 0, stateB, trees[1]); + ts_stack_split(stack, 0); + ts_stack_split(stack, 1); + ts_stack_push(stack, 0, stateC, trees[2]); + ts_stack_push(stack, 1, stateE, trees[4]); + ts_stack_push(stack, 2, stateG, trees[6]); + ts_stack_push(stack, 0, stateD, trees[3]); + ts_stack_push(stack, 1, stateF, trees[5]); + ts_stack_push(stack, 2, stateH, trees[7]); + ts_stack_push(stack, 0, stateI, trees[8]); + ts_stack_push(stack, 1, stateI, trees[8]); + ts_stack_push(stack, 1, stateI, trees[8]); + ts_stack_push(stack, 0, stateJ, trees[9]); + + AssertThat(ts_stack_head_count(stack), Equals(1)); + StackEntry *head = ts_stack_head(stack, 0); + AssertThat(ts_stack_entry_next_count(head), Equals(1)); + AssertThat(ts_stack_entry_next_count(ts_stack_entry_next(head, 0)), Equals(3)); + }); + + describe("when there is one path that leads to three different heads", [&]() { + it("returns three entries with the same array of trees", [&]() { + /* + * A0__B1__C2__D3. + * \__E4__F5. + * \__G6__H7. + */ + Vector pop = ts_stack_pop(stack, 0, 2, false); + AssertThat(ts_stack_head_count(stack), Equals(3)); + + AssertThat(pop.size, Equals(3)); + + StackPopResult pop1 = *(StackPopResult *)vector_get(&pop, 0); + AssertThat(ts_stack_top_tree(stack, 0), Equals(trees[3])); + AssertThat(pop1.head_index, Equals(0)); + AssertThat(pop1.tree_count, Equals(2)); + AssertThat(pop1.trees[0], Equals(trees[8])); + AssertThat(pop1.trees[1], Equals(trees[9])); + + StackPopResult pop2 = *(StackPopResult *)vector_get(&pop, 1); + AssertThat(ts_stack_top_tree(stack, 1), Equals(trees[5])); + AssertThat(pop2.head_index, Equals(1)); + AssertThat(pop2.tree_count, Equals(2)); + AssertThat(pop2.trees, Equals(pop1.trees)); + + StackPopResult pop3 = *(StackPopResult *)vector_get(&pop, 2); + AssertThat(ts_stack_top_tree(stack, 2), Equals(trees[7])); + AssertThat(pop3.head_index, Equals(2)); + AssertThat(pop3.tree_count, Equals(2)); + AssertThat(pop3.trees, Equals(pop1.trees)); + }); + }); + + describe("when there are three different paths that lead to three different heads", [&]() { + it("returns three entries with different arrays of trees", [&]() { + /* + * A0__B1__C2. + * \__E4. + * \__G6. + */ + Vector pop = ts_stack_pop(stack, 0, 3, false); + AssertThat(ts_stack_head_count(stack), Equals(3)); + + AssertThat(pop.size, Equals(3)); + + StackPopResult pop1 = *(StackPopResult *)vector_get(&pop, 0); + AssertThat(ts_stack_top_tree(stack, 0), Equals(trees[2])); + AssertThat(pop1.head_index, Equals(0)); + AssertThat(pop1.tree_count, Equals(3)); + AssertThat(pop1.trees[0], Equals(trees[3])); + AssertThat(pop1.trees[1], Equals(trees[8])); + AssertThat(pop1.trees[2], Equals(trees[9])); + + StackPopResult pop2 = *(StackPopResult *)vector_get(&pop, 1); + AssertThat(ts_stack_top_tree(stack, 1), Equals(trees[4])); + AssertThat(pop2.head_index, Equals(1)); + AssertThat(pop2.tree_count, Equals(3)); + AssertThat(pop2.trees[0], Equals(trees[5])); + AssertThat(pop2.trees[1], Equals(trees[8])); + AssertThat(pop2.trees[2], Equals(trees[9])); + + StackPopResult pop3 = *(StackPopResult *)vector_get(&pop, 2); + AssertThat(ts_stack_top_tree(stack, 2), Equals(trees[6])); + AssertThat(pop3.head_index, Equals(2)); + AssertThat(pop3.tree_count, Equals(3)); + AssertThat(pop3.trees[0], Equals(trees[7])); + AssertThat(pop3.trees[1], Equals(trees[8])); + AssertThat(pop3.trees[2], Equals(trees[9])); }); }); }); diff --git a/src/runtime/language.c b/src/runtime/language.c index ece2dec4..086ab3bb 100644 --- a/src/runtime/language.c +++ b/src/runtime/language.c @@ -1,5 +1,24 @@ #include "tree_sitter/parser.h" +static const TSParseAction ERROR_ACTIONS[2] = { + {.type = TSParseActionTypeError }, {.type = 0 } +}; + +const TSParseAction *ts_language_actions(const TSLanguage *language, + TSStateId state, TSSymbol sym) { + const TSParseAction *actions = + (language->parse_table + (state * language->symbol_count))[sym]; + return actions ? actions : ERROR_ACTIONS; +} + +TSParseAction ts_language_last_action(const TSLanguage *language, + TSStateId state, TSSymbol sym) { + const TSParseAction *action = ts_language_actions(language, state, sym); + while ((action + 1)->type) + action++; + return *action; +} + size_t ts_language_symbol_count(const TSLanguage *language) { return language->symbol_count; } diff --git a/src/runtime/language.h b/src/runtime/language.h new file mode 100644 index 00000000..e8f5ae99 --- /dev/null +++ b/src/runtime/language.h @@ -0,0 +1,18 @@ +#ifndef RUNTIME_LANGUAGE_H_ +#define RUNTIME_LANGUAGE_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "tree_sitter/parser.h" + +const TSParseAction *ts_language_actions(const TSLanguage *, TSStateId, + TSSymbol); +TSParseAction ts_language_last_action(const TSLanguage *, TSStateId, TSSymbol); + +#ifdef __cplusplus +} +#endif + +#endif // RUNTIME_LANGUAGE_H_ diff --git a/src/runtime/lexer.c b/src/runtime/lexer.c index 5a6fcf1d..061e6315 100644 --- a/src/runtime/lexer.c +++ b/src/runtime/lexer.c @@ -6,17 +6,17 @@ #include "runtime/debugger.h" #include "utf8proc.h" -#define DEBUG(...) \ +#define LOG(...) \ if (self->debugger.debug_fn) { \ snprintf(self->debug_buffer, TS_DEBUG_BUFFER_SIZE, __VA_ARGS__); \ self->debugger.debug_fn(self->debugger.payload, TSDebugTypeLex, \ self->debug_buffer); \ } -#define DEBUG_LOOKAHEAD() \ - DEBUG((0 < self->lookahead && self->lookahead < 256) ? "lookahead char:'%c'" \ - : "lookahead char:%d", \ - self->lookahead); +#define LOG_LOOKAHEAD() \ + LOG((0 < self->lookahead && self->lookahead < 256) ? "lookahead char:'%c'" \ + : "lookahead char:%d", \ + self->lookahead); static const char *empty_chunk = ""; @@ -37,12 +37,12 @@ static void ts_lexer__get_lookahead(TSLexer *self) { self->lookahead_size = utf8proc_iterate( (const uint8_t *)self->chunk + position_in_chunk, self->chunk_size - position_in_chunk + 1, &self->lookahead); - DEBUG_LOOKAHEAD(); + LOG_LOOKAHEAD(); } static void ts_lexer__start(TSLexer *self, TSStateId lex_state) { - DEBUG("start_lex state:%d, pos:%lu", lex_state, self->current_position.chars); - DEBUG_LOOKAHEAD(); + LOG("start_lex state:%d, pos:%lu", lex_state, self->current_position.chars); + LOG_LOOKAHEAD(); if (!self->chunk) ts_lexer__get_chunk(self); @@ -51,7 +51,7 @@ static void ts_lexer__start(TSLexer *self, TSStateId lex_state) { } static void ts_lexer__start_token(TSLexer *self) { - DEBUG("start_token chars:%lu", self->current_position.chars); + LOG("start_token chars:%lu", self->current_position.chars); self->token_start_position = self->current_position; DEBUG("start_token row:%lu", self->current_point.row); @@ -60,7 +60,7 @@ static void ts_lexer__start_token(TSLexer *self) { } static bool ts_lexer__advance(TSLexer *self, TSStateId state) { - DEBUG("advance state:%d", state); + LOG("advance state:%d", state); if (self->chunk == empty_chunk) return false; @@ -97,14 +97,11 @@ static TSTree *ts_lexer__accept(TSLexer *self, TSSymbol symbol, self->token_end_point = self->current_point; if (symbol == ts_builtin_sym_error) { - DEBUG("error_char"); - return ts_tree_make_error(size, padding, size_point, - padding_point, self->lookahead); + LOG("error_char"); + return ts_tree_make_error(size, padding, size_point, padding_point, self->lookahead); } else { - DEBUG("accept_token sym:%s", symbol_name); - return ts_tree_make_leaf(symbol, padding, size, - padding_point, - size_point, node_type); + LOG("accept_token sym:%s", symbol_name); + return ts_tree_make_leaf(symbol, padding, size, padding_point, size_point, node_type); } } @@ -128,6 +125,9 @@ TSLexer ts_lexer_make() { } void ts_lexer_reset(TSLexer *self, TSLength position, TSPoint point) { + if (ts_length_eq(position, self->current_position)) + return; + self->token_start_position = position; self->token_end_position = position; self->current_position = position; diff --git a/src/runtime/parser.c b/src/runtime/parser.c index bd477d3d..e16b8de5 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -1,4 +1,5 @@ #include "runtime/parser.h" +#include #include #include #include "tree_sitter/runtime.h" @@ -6,12 +7,14 @@ #include "runtime/tree.h" #include "runtime/lexer.h" #include "runtime/length.h" +#include "runtime/vector.h" +#include "runtime/language.h" /* * Debugging */ -#define DEBUG(...) \ +#define LOG(...) \ if (self->lexer.debugger.debug_fn) { \ snprintf(self->lexer.debug_buffer, TS_DEBUG_BUFFER_SIZE, __VA_ARGS__); \ self->lexer.debugger.debug_fn(self->lexer.debugger.payload, \ @@ -20,6 +23,12 @@ #define SYM_NAME(sym) self->language->symbol_names[sym] +typedef struct { + TSTree *reusable_subtree; + size_t reusable_subtree_pos; + TSLength position; +} LookaheadState; + typedef enum { ConsumeResultShifted, ConsumeResultRemoved, @@ -30,37 +39,18 @@ typedef enum { * Private */ -static const TSParseAction ERROR_ACTIONS[2] = { - {.type = TSParseActionTypeError }, {.type = 0 } -}; - -static const TSParseAction *ts_language__actions(const TSLanguage *language, - TSStateId state, TSSymbol sym) { - const TSParseAction *actions = - (language->parse_table + (state * language->symbol_count))[sym]; - return actions ? actions : ERROR_ACTIONS; -} - -static TSParseAction ts_language__last_action(const TSLanguage *language, - TSStateId state, TSSymbol sym) { - const TSParseAction *action = ts_language__actions(language, state, sym); - while ((action + 1)->type) - action++; - return *action; -} - /* * Replace the parser's reusable_subtree with its first non-fragile descendant. * Return true if a suitable descendant is found, false otherwise. */ -static bool ts_parser__breakdown_reusable_subtree(TSParser *self) { +static bool ts_parser__breakdown_reusable_subtree(LookaheadState *state) { do { - if (self->reusable_subtree->symbol == ts_builtin_sym_error) + if (state->reusable_subtree->symbol == ts_builtin_sym_error) return false; - if (self->reusable_subtree->child_count == 0) + if (state->reusable_subtree->child_count == 0) return false; - self->reusable_subtree = self->reusable_subtree->children[0]; - } while (ts_tree_is_fragile(self->reusable_subtree)); + state->reusable_subtree = state->reusable_subtree->children[0]; + } while (ts_tree_is_fragile(state->reusable_subtree)); return true; } @@ -68,83 +58,87 @@ static bool ts_parser__breakdown_reusable_subtree(TSParser *self) { * Replace the parser's reusable_subtree with its largest right neighbor, or * NULL if no right neighbor exists. */ -static void ts_parser__pop_reusable_subtree(TSParser *self) { - self->reusable_subtree_pos += ts_tree_total_size(self->reusable_subtree).chars; +static void ts_parser__pop_reusable_subtree(LookaheadState *state) { + state->reusable_subtree_pos += + ts_tree_total_size(state->reusable_subtree).chars; - while (self->reusable_subtree) { - TSTree *parent = self->reusable_subtree->context.parent; - size_t next_index = self->reusable_subtree->context.index + 1; + while (state->reusable_subtree) { + TSTree *parent = state->reusable_subtree->context.parent; + size_t next_index = state->reusable_subtree->context.index + 1; if (parent && parent->child_count > next_index) { - self->reusable_subtree = parent->children[next_index]; + state->reusable_subtree = parent->children[next_index]; return; } - self->reusable_subtree = parent; + state->reusable_subtree = parent; } } +static bool ts_parser__can_reuse(TSParser *self, int head, TSTree *subtree) { + if (!subtree || subtree->symbol == ts_builtin_sym_error) + return false; + TSStateId state = ts_stack_top_state(self->stack, head); + const TSParseAction *action = + ts_language_actions(self->language, state, subtree->symbol); + return action->type != TSParseActionTypeError; +} + /* * Advance the parser's lookahead subtree. If there is a reusable subtree * at the correct position in the parser's previous tree, use that. Otherwise, * run the lexer. */ -static void ts_parser__get_next_lookahead(TSParser *self) { - while (self->reusable_subtree) { - if (self->reusable_subtree_pos > self->lexer.current_position.chars) { +static TSTree *ts_parser__get_next_lookahead(TSParser *self, int head) { + LookaheadState *state = vector_get(&self->lookahead_states, head); + + while (state->reusable_subtree) { + if (state->reusable_subtree_pos > state->position.chars) { break; } - if (self->reusable_subtree_pos < self->lexer.current_position.chars) { - DEBUG("past_reuse sym:%s", SYM_NAME(self->reusable_subtree->symbol)); - ts_parser__pop_reusable_subtree(self); + if (state->reusable_subtree_pos < state->position.chars) { + LOG("past_reuse sym:%s", SYM_NAME(state->reusable_subtree->symbol)); + ts_parser__pop_reusable_subtree(state); continue; } - if (ts_tree_has_changes(self->reusable_subtree) || - ts_tree_is_fragile(self->reusable_subtree) || - ts_tree_is_extra(self->reusable_subtree)) { - DEBUG("breakdown sym:%s", SYM_NAME(self->reusable_subtree->symbol)); - if (!ts_parser__breakdown_reusable_subtree(self)) - ts_parser__pop_reusable_subtree(self); + if (ts_tree_has_changes(state->reusable_subtree) || + ts_tree_is_fragile(state->reusable_subtree) || + ts_tree_is_extra(state->reusable_subtree) || + (state->reusable_subtree->child_count > 0 && + !ts_parser__can_reuse(self, head, state->reusable_subtree))) { + LOG("breakdown sym:%s", SYM_NAME(state->reusable_subtree->symbol)); + if (!ts_parser__breakdown_reusable_subtree(state)) + ts_parser__pop_reusable_subtree(state); continue; } - TSStateId top_state = ts_stack_top_state(self->stack, 0); - TSSymbol symbol = self->reusable_subtree->symbol; - if (ts_language__last_action(self->language, top_state, symbol).type == - TSParseActionTypeError) { - DEBUG("cant_reuse sym:%s", SYM_NAME(self->reusable_subtree->symbol)); - ts_parser__pop_reusable_subtree(self); - continue; - } - - self->lookahead = self->reusable_subtree; - TSLength size = ts_tree_total_size(self->lookahead); - TSPoint offset_point = ts_tree_offset_point(self->lookahead); - DEBUG("reuse sym:%s size:%lu extra:%d", SYM_NAME(self->lookahead->symbol), - size.chars, self->lookahead->options.extra); - ts_lexer_reset(&self->lexer, - ts_length_add(self->lexer.current_position, size), - ts_point_add(self->lexer.current_point, offset_point)); - ts_parser__pop_reusable_subtree(self); - return; + TSTree *result = state->reusable_subtree; + TSLength size = ts_tree_total_size(result); + LOG("reuse sym:%s size:%lu extra:%d", SYM_NAME(result->symbol), size.chars, + result->options.extra); + ts_parser__pop_reusable_subtree(state); + return result; } - TSLength position = self->lexer.current_position; - TSPoint point = self->lexer.current_point; - for (size_t i = 0, count = ts_stack_head_count(self->stack); i < count; i++) { - if (i > 0) { - ts_lexer_reset(&self->lexer, position, point); - ts_tree_release(self->lookahead); - } + return NULL; +} - TSStateId parse_state = ts_stack_top_state(self->stack, i); - TSStateId lex_state = self->language->lex_states[parse_state]; - DEBUG("lex state:%d", lex_state); - self->lookahead = self->language->lex_fn(&self->lexer, lex_state); +static int ts_parser__split(TSParser *self, int head) { + int result = ts_stack_split(self->stack, head); + assert(result == (int)self->lookahead_states.size); + LookaheadState head_state = + *(LookaheadState *)vector_get(&self->lookahead_states, head); + vector_push(&self->lookahead_states, &head_state); + return result; +} - if (self->lookahead->symbol != ts_builtin_sym_error) - break; - } +static void ts_parser__remove_head(TSParser *self, int head) { + vector_erase(&self->lookahead_states, head); + ts_stack_remove_head(self->stack, head); +} + +static TSTree *ts_parser__select_tree(void *data, TSTree *left, TSTree *right) { + return ts_tree_compare(left, right) <= 0 ? left : right; } /* @@ -152,59 +146,114 @@ static void ts_parser__get_next_lookahead(TSParser *self) { */ static ConsumeResult ts_parser__shift(TSParser *self, int head, - TSStateId parse_state) { - if (ts_stack_push(self->stack, head, parse_state, self->lookahead)) + TSStateId parse_state, TSTree *lookahead) { + LookaheadState *head_state = vector_get(&self->lookahead_states, head); + head_state->position = + ts_length_add(head_state->position, ts_tree_total_size(lookahead)); + if (ts_stack_push(self->stack, head, parse_state, lookahead)) { + LOG("merge head:%d", head); + vector_erase(&self->lookahead_states, head); return ConsumeResultRemoved; - else + } else { return ConsumeResultShifted; + } } -static bool ts_parser__shift_extra(TSParser *self, int head, TSStateId state) { - ts_tree_set_extra(self->lookahead); - return ts_parser__shift(self, head, state); +static bool ts_parser__shift_extra(TSParser *self, int head, TSStateId state, + TSTree *lookahead) { + ts_tree_set_extra(lookahead); + return ts_parser__shift(self, head, state, lookahead); } static TSTree *ts_parser__reduce(TSParser *self, int head, TSSymbol symbol, - size_t child_count, bool extra, - bool count_extra) { + int child_count, bool extra, bool count_extra) { + vector_clear(&self->reduce_parents); TSNodeType node_type = self->language->node_types[symbol]; - StackPopResultList pop_results = - ts_stack_pop(self->stack, head, child_count, count_extra); + Vector pop_results = ts_stack_pop(self->stack, head, child_count, count_extra); - TSTree *parent = NULL; - TSTree **last_children = NULL; - int last_index = -1; + int last_head_index = -1; + int removed_heads = 0; - for (int i = 0; i < pop_results.size; i++) { - StackPopResult pop_result = pop_results.contents[i]; + for (size_t i = 0; i < pop_results.size; i++) { + StackPopResult *pop_result = vector_get(&pop_results, i); - if (pop_result.trees != last_children) { - parent = ts_tree_make_node(symbol, pop_result.tree_count, - pop_result.trees, node_type); - } - - if (pop_result.index == last_index) { - ts_stack_add_alternative(self->stack, pop_result.index, parent); - } else { - TSStateId top_state = ts_stack_top_state(self->stack, pop_result.index); - TSStateId state; - - if (extra) { - ts_tree_set_extra(parent); - state = top_state; - } else { - state = ts_language__last_action(self->language, top_state, symbol) - .data.to_state; + /* + * If the same set of trees led to a previous stack head, reuse the parent + * tree that was added to that head. + */ + TSTree *parent = NULL; + for (size_t j = 0; j < i; j++) { + StackPopResult *prior_result = vector_get(&pop_results, j); + if (pop_result->trees == prior_result->trees) { + TSTree **existing_parent = vector_get(&self->reduce_parents, j); + parent = *existing_parent; + break; } - - ts_stack_push(self->stack, pop_result.index, state, parent); } - last_index = pop_result.index; - last_children = pop_result.trees; + /* + * Otherwise, create a new parent node for this set of trees. + */ + if (!parent) + parent = ts_tree_make_node(symbol, pop_result->tree_count, + pop_result->trees, node_type); + vector_push(&self->reduce_parents, &parent); + + /* + * If another path led to the same stack head, add this new parent tree + * as an alternative for that stack head. + */ + int new_head = pop_result->head_index - removed_heads; + if (pop_result->head_index == last_head_index) { + ts_stack_add_alternative(self->stack, new_head, parent); + continue; + } + + /* + * If the stack has split in the process of popping, create a duplicate of + * the lookahead state for this head, for the new head. + */ + if (i > 0) { + LOG("split_during_reduce new_head:%d", new_head); + LookaheadState *head_state = vector_get(&self->lookahead_states, head); + vector_push(&self->lookahead_states, head_state); + } + + /* + * If the parent node is extra, then do not change the state when pushing + * it. Otherwise, proceed to the state given in the parse table for the + * new parent symbol. + */ + TSStateId state; + TSStateId top_state = ts_stack_top_state(self->stack, new_head); + if (extra) { + ts_tree_set_extra(parent); + state = top_state; + } else { + TSParseAction action = + ts_language_last_action(self->language, top_state, symbol); + if (child_count == -1) { + state = 0; + } else { + assert(action.type == TSParseActionTypeShift); + state = action.data.to_state; + } + } + + /* + * If the given state already existed at a different head of the stack, + * then remove the lookahead state for the head. + */ + if (ts_stack_push(self->stack, new_head, state, parent)) { + vector_erase(&self->lookahead_states, new_head); + removed_heads++; + } + + last_head_index = pop_result->head_index; } - return parent; + TSTree **last_parent = vector_back(&self->reduce_parents); + return *last_parent; } static void ts_parser__reduce_fragile(TSParser *self, int head, TSSymbol symbol, @@ -216,16 +265,18 @@ static void ts_parser__reduce_fragile(TSParser *self, int head, TSSymbol symbol, } static void ts_parser__reduce_error(TSParser *self, int head, - size_t child_count) { + size_t child_count, TSTree *lookahead) { + LookaheadState *head_state = vector_get(&self->lookahead_states, head); TSTree *reduced = ts_parser__reduce(self, head, ts_builtin_sym_error, child_count, false, true); - reduced->size = ts_length_add(reduced->size, self->lookahead->padding); - self->lookahead->padding = ts_length_zero(); + reduced->size = ts_length_add(reduced->size, lookahead->padding); + head_state->position = ts_length_add(head_state->position, lookahead->padding); + lookahead->padding = ts_length_zero(); ts_tree_set_fragile_left(reduced); ts_tree_set_fragile_right(reduced); } -static bool ts_parser__handle_error(TSParser *self, int head) { +static bool ts_parser__handle_error(TSParser *self, int head, TSTree *lookahead) { size_t error_token_count = 1; StackEntry *entry_before_error = ts_stack_head(self->stack, head); @@ -237,20 +288,20 @@ static bool ts_parser__handle_error(TSParser *self, int head) { */ int i = -1; for (StackEntry *entry = entry_before_error; true; - entry = ts_stack_entry_next(entry, head), i++) { + entry = ts_stack_entry_next(entry, 0), i++) { TSStateId stack_state = entry ? entry->state : 0; - TSParseAction action_on_error = ts_language__last_action( + TSParseAction action_on_error = ts_language_last_action( self->language, stack_state, ts_builtin_sym_error); if (action_on_error.type == TSParseActionTypeShift) { TSStateId state_after_error = action_on_error.data.to_state; - TSParseAction action_after_error = ts_language__last_action( - self->language, state_after_error, self->lookahead->symbol); + TSParseAction action_after_error = ts_language_last_action( + self->language, state_after_error, lookahead->symbol); if (action_after_error.type != TSParseActionTypeError) { - DEBUG("recover state:%u, count:%lu", state_after_error, - error_token_count + i); - ts_parser__reduce_error(self, head, error_token_count + i); + LOG("recover state:%u, count:%lu", state_after_error, + error_token_count + i); + ts_parser__reduce_error(self, head, error_token_count + i, lookahead); return true; } } @@ -263,17 +314,18 @@ static bool ts_parser__handle_error(TSParser *self, int head) { * If there is no state in the stack for which we can recover with the * current lookahead token, advance to the next token. */ - DEBUG("skip token:%s", SYM_NAME(self->lookahead->symbol)); - ts_parser__shift(self, head, ts_stack_top_state(self->stack, head)); - self->lookahead = self->language->lex_fn(&self->lexer, ts_lex_state_error); + LOG("skip token:%s", SYM_NAME(lookahead->symbol)); + ts_parser__shift(self, head, ts_stack_top_state(self->stack, head), + lookahead); + lookahead = self->language->lex_fn(&self->lexer, ts_lex_state_error); error_token_count++; /* * If the end of input is reached, exit. */ - if (self->lookahead->symbol == ts_builtin_sym_end) { - DEBUG("fail_to_recover"); - ts_parser__reduce_error(self, head, error_token_count - 1); + if (lookahead->symbol == ts_builtin_sym_end) { + LOG("fail_to_recover"); + ts_parser__reduce_error(self, head, -1, lookahead); return false; } } @@ -282,28 +334,33 @@ static bool ts_parser__handle_error(TSParser *self, int head) { static void ts_parser__start(TSParser *self, TSInput input, TSTree *previous_tree) { if (previous_tree) { - DEBUG("parse_after_edit"); + LOG("parse_after_edit"); } else { - DEBUG("new_parse"); + LOG("new_parse"); } self->lexer.input = input; ts_lexer_reset(&self->lexer, ts_length_zero(), ts_point_zero()); ts_stack_clear(self->stack); - self->reusable_subtree = previous_tree; - self->reusable_subtree_pos = 0; - self->lookahead = NULL; + LookaheadState head_state = { + .position = ts_length_zero(), + .reusable_subtree = previous_tree, + .reusable_subtree_pos = 0, + }; + vector_clear(&self->lookahead_states); + vector_push(&self->lookahead_states, &head_state); } static TSTree *ts_parser__finish(TSParser *self) { - StackPopResult pop_result = ts_stack_pop(self->stack, 0, -1, true).contents[0]; + Vector pop_results = ts_stack_pop(self->stack, 0, -1, true); + StackPopResult *pop_result = vector_get(&pop_results, 0); - TSTree **trees = pop_result.trees; - size_t extra_count = pop_result.tree_count - 1; - TSTree *root = trees[extra_count]; + size_t extra_count = pop_result->tree_count - 1; + TSTree *root = pop_result->trees[extra_count]; - ts_tree_prepend_children(root, extra_count, trees); + ts_tree_prepend_children(root, extra_count, pop_result->trees); + ts_tree_assign_parents(root); return root; } @@ -311,11 +368,12 @@ static TSTree *ts_parser__finish(TSParser *self) { * Continue performing parse actions for the given head until the current * lookahead symbol is consumed. */ -static ConsumeResult ts_parser__consume_lookahead(TSParser *self, int head) { +static ConsumeResult ts_parser__consume_lookahead(TSParser *self, int head, + TSTree *lookahead) { for (;;) { TSStateId state = ts_stack_top_state(self->stack, head); const TSParseAction *next_action = - ts_language__actions(self->language, state, self->lookahead->symbol); + ts_language_actions(self->language, state, lookahead->symbol); /* * If there are multiple actions for the current state and lookahead symbol, @@ -330,95 +388,67 @@ static ConsumeResult ts_parser__consume_lookahead(TSParser *self, int head) { int current_head; if (next_action->type == 0) { current_head = head; - DEBUG("action current_head:%d, state:%d", current_head, state); } else { - current_head = ts_stack_split(self->stack, head); - DEBUG("split_action from_head:%d, current_head:%d, state:%d", head, - current_head, state); + current_head = ts_parser__split(self, head); + LOG("split_action from_head:%d, new_head:%d", head, current_head); } // TODO: Remove this by making a separate symbol for errors returned from // the lexer. - if (self->lookahead->symbol == ts_builtin_sym_error) + if (lookahead->symbol == ts_builtin_sym_error) action.type = TSParseActionTypeError; switch (action.type) { case TSParseActionTypeError: - DEBUG("error_sym"); + LOG("error_sym"); if (ts_stack_head_count(self->stack) == 1) { - if (ts_parser__handle_error(self, current_head)) - break; + if (ts_parser__handle_error(self, current_head, lookahead)) + return ConsumeResultShifted; else return ConsumeResultFinished; } else { - DEBUG("bail current_head:%d", current_head); - ts_stack_remove_head(self->stack, current_head); + LOG("bail current_head:%d", current_head); + ts_parser__remove_head(self, current_head); return ConsumeResultRemoved; } case TSParseActionTypeShift: - DEBUG("shift state:%u", action.data.to_state); - return ts_parser__shift(self, current_head, action.data.to_state); + LOG("shift state:%u", action.data.to_state); + return ts_parser__shift(self, current_head, action.data.to_state, + lookahead); case TSParseActionTypeShiftExtra: - DEBUG("shift_extra"); - return ts_parser__shift_extra(self, current_head, state); + LOG("shift_extra"); + return ts_parser__shift_extra(self, current_head, state, lookahead); case TSParseActionTypeReduce: - DEBUG("reduce sym:%s, child_count:%u", SYM_NAME(action.data.symbol), - action.data.child_count); + LOG("reduce sym:%s, child_count:%u", SYM_NAME(action.data.symbol), + action.data.child_count); ts_parser__reduce(self, current_head, action.data.symbol, action.data.child_count, false, false); break; case TSParseActionTypeReduceExtra: - DEBUG("reduce_extra sym:%s", SYM_NAME(action.data.symbol)); + LOG("reduce_extra sym:%s", SYM_NAME(action.data.symbol)); ts_parser__reduce(self, current_head, action.data.symbol, 1, true, false); break; case TSParseActionTypeReduceFragile: - DEBUG("reduce_fragile sym:%s, count:%u", SYM_NAME(action.data.symbol), - action.data.child_count); + LOG("reduce_fragile sym:%s, count:%u", SYM_NAME(action.data.symbol), + action.data.child_count); ts_parser__reduce_fragile(self, current_head, action.data.symbol, action.data.child_count); break; case TSParseActionTypeAccept: - DEBUG("accept"); + LOG("accept"); return ConsumeResultFinished; } } } } -static int ts_tree__compare(TSTree *left, TSTree *right) { - if (left->symbol < right->symbol) return -1; - if (right->symbol < left->symbol) return 1; - if (left->child_count < right->child_count) return -1; - if (right->child_count < left->child_count) return 1; - for (size_t i = 0; i < left->child_count; i++) { - TSTree *left_child = left->children[i]; - TSTree *right_child = right->children[i]; - switch (ts_tree__compare(left_child, right_child)) { - case -1: - return -1; - case 1: - return 1; - default: - break; - } - } - return 0; -} - -static TSTree *ts_parser__select_tree(void *data, TSTree *left, TSTree *right) { - if (ts_tree__compare(left, right) <= 0) - return left; - else - return right; -} - /* * Public */ @@ -429,14 +459,13 @@ TSParser ts_parser_make() { .stack = ts_stack_new((TreeSelectionCallback){ NULL, ts_parser__select_tree, }), - .lookahead = NULL, + .lookahead_states = vector_new(sizeof(LookaheadState), 4), + .reduce_parents = vector_new(sizeof(TSTree *), 4), }; } void ts_parser_destroy(TSParser *self) { ts_stack_delete(self->stack); - if (self->lookahead) - ts_tree_release(self->lookahead); } TSDebugger ts_parser_debugger(const TSParser *self) { @@ -451,14 +480,36 @@ TSTree *ts_parser_parse(TSParser *self, TSInput input, TSTree *previous_tree) { ts_parser__start(self, input, previous_tree); for (;;) { - ts_parser__get_next_lookahead(self); - - DEBUG("lookahead sym:%s, pos:%lu, head_count:%d", - SYM_NAME(self->lookahead->symbol), self->lexer.current_position.chars, - ts_stack_head_count(self->stack)); + TSTree *lookahead = NULL; + TSLength position = ts_length_zero(); + TSPoint point = ts_point_zero(); for (int head = 0; head < ts_stack_head_count(self->stack);) { - switch (ts_parser__consume_lookahead(self, head)) { + LookaheadState *state = vector_get(&self->lookahead_states, head); + + LOG("process head:%d, head_count:%d, state:%d, pos:%lu", head, + ts_stack_head_count(self->stack), + ts_stack_top_state(self->stack, head), state->position.chars); + + if (!ts_parser__can_reuse(self, head, lookahead) || + !ts_length_eq(state->position, position)) { + TSTree *reused_lookahead = ts_parser__get_next_lookahead(self, head); + if (ts_parser__can_reuse(self, head, reused_lookahead)) { + lookahead = reused_lookahead; + } else { + position = state->position; + point = state->current_point; + ts_lexer_reset(&self->lexer, position, point); + TSStateId parse_state = ts_stack_top_state(self->stack, head); + TSStateId lex_state = self->language->lex_states[parse_state]; + lookahead = self->language->lex_fn(&self->lexer, lex_state); + } + } + + LOG("lookahead sym:%s, size:%lu", SYM_NAME(lookahead->symbol), + ts_tree_total_size(lookahead).chars); + + switch (ts_parser__consume_lookahead(self, head, lookahead)) { case ConsumeResultRemoved: break; case ConsumeResultShifted: diff --git a/src/runtime/parser.h b/src/runtime/parser.h index 50370ba0..6c5cfffb 100644 --- a/src/runtime/parser.h +++ b/src/runtime/parser.h @@ -6,14 +6,14 @@ extern "C" { #endif #include "runtime/stack.h" +#include "runtime/vector.h" typedef struct { TSLexer lexer; Stack *stack; - TSTree *lookahead; - TSTree *reusable_subtree; - size_t reusable_subtree_pos; const TSLanguage *language; + Vector lookahead_states; + Vector reduce_parents; } TSParser; TSParser ts_parser_make(); diff --git a/src/runtime/stack.c b/src/runtime/stack.c index 3a4dd308..b82ac91b 100644 --- a/src/runtime/stack.c +++ b/src/runtime/stack.c @@ -1,17 +1,17 @@ #include "tree_sitter/parser.h" #include "runtime/tree.h" -#include "runtime/tree_vector.h" +#include "runtime/vector.h" #include "runtime/stack.h" #include "runtime/length.h" #include -#define MAX_POP_PATH_COUNT 8 +#define MAX_SUCCESSOR_COUNT 8 #define INITIAL_HEAD_CAPACITY 3 #define STARTING_TREE_CAPACITY 10 typedef struct StackNode { StackEntry entry; - struct StackNode *successors[MAX_POP_PATH_COUNT]; + struct StackNode *successors[MAX_SUCCESSOR_COUNT]; short unsigned int successor_count; short unsigned int ref_count; } StackNode; @@ -20,10 +20,18 @@ struct Stack { StackNode **heads; int head_count; int head_capacity; - StackPopResult last_pop_results[MAX_POP_PATH_COUNT]; + Vector pop_results; + Vector pop_paths; TreeSelectionCallback tree_selection_callback; }; +typedef struct { + size_t goal_tree_count; + StackNode *node; + Vector trees; + bool is_shared; +} PopPath; + /* * Section: Stack lifecycle */ @@ -35,11 +43,15 @@ Stack *ts_stack_new(TreeSelectionCallback tree_selection_callback) { .head_count = 1, .head_capacity = INITIAL_HEAD_CAPACITY, .tree_selection_callback = tree_selection_callback, + .pop_results = vector_new(sizeof(StackPopResult), 4), + .pop_paths = vector_new(sizeof(PopPath), 4), }; return self; } void ts_stack_delete(Stack *self) { + vector_delete(&self->pop_results); + vector_delete(&self->pop_paths); free(self->heads); free(self); } @@ -164,12 +176,12 @@ static int ts_stack__add_head(Stack *self, StackNode *node) { return new_index; } -static int ts_stack__find_or_add_head(Stack *self, StackNode *node) { +static int ts_stack__find_head(Stack *self, StackNode *node) { for (int i = 0; i < self->head_count; i++) if (self->heads[i] == node) { return i; } - return ts_stack__add_head(self, node); + return -1; } void ts_stack_remove_head(Stack *self, int head_index) { @@ -221,16 +233,20 @@ int ts_stack_split(Stack *self, int head_index) { return ts_stack__add_head(self, self->heads[head_index]); } -StackPopResultList ts_stack_pop(Stack *self, int head_index, int child_count, - bool count_extra) { +Vector ts_stack_pop(Stack *self, int head_index, int child_count, + bool count_extra) { StackNode *previous_head = self->heads[head_index]; - - int path_count = 1; int capacity = (child_count == -1) ? STARTING_TREE_CAPACITY : child_count; - size_t tree_counts_by_path[MAX_POP_PATH_COUNT] = { child_count }; - StackNode *nodes_by_path[MAX_POP_PATH_COUNT] = { previous_head }; - TreeVector trees_by_path[MAX_POP_PATH_COUNT] = { tree_vector_new(capacity) }; - bool is_shared_by_path[MAX_POP_PATH_COUNT] = { false }; + PopPath initial_path = { + .goal_tree_count = child_count, + .node = previous_head, + .trees = vector_new(sizeof(TSTree *), capacity), + .is_shared = false, + }; + + vector_clear(&self->pop_results); + vector_clear(&self->pop_paths); + vector_push(&self->pop_paths, &initial_path); /* * Reduce along every possible path in parallel. Stop when the given number @@ -239,71 +255,72 @@ StackPopResultList ts_stack_pop(Stack *self, int head_index, int child_count, bool all_paths_done = false; while (!all_paths_done) { all_paths_done = true; - int current_path_count = path_count; - for (int path = 0; path < current_path_count; path++) { - StackNode *node = nodes_by_path[path]; - if (!node || (trees_by_path[path].size == tree_counts_by_path[path])) + + for (size_t i = 0; i < self->pop_paths.size; i++) { + PopPath *path = vector_get(&self->pop_paths, i); + StackNode *node = path->node; + + if (!node || path->trees.size == path->goal_tree_count) continue; + all_paths_done = false; /* * Children that are 'extra' do not count towards the total child count. */ if (ts_tree_is_extra(node->entry.tree) && !count_extra) - tree_counts_by_path[path]++; + path->goal_tree_count++; /* * If a node has more than one successor, create new paths for each of * the additional successors. */ - if (is_shared_by_path[path]) { - trees_by_path[path] = tree_vector_copy(&trees_by_path[path]); - is_shared_by_path[path] = false; + if (path->is_shared) { + path->trees = vector_copy(&path->trees); + path->is_shared = false; } - tree_vector_push(&trees_by_path[path], node->entry.tree); - for (int i = 0; i < node->successor_count; i++) { - int next_path; - if (i > 0) { - if (path_count == MAX_POP_PATH_COUNT) - break; - next_path = path_count; - tree_counts_by_path[next_path] = tree_counts_by_path[path]; - trees_by_path[next_path] = trees_by_path[path]; - is_shared_by_path[next_path] = true; - path_count++; - } else { - next_path = path; - } + ts_tree_retain(node->entry.tree); + vector_push(&path->trees, &node->entry.tree); - nodes_by_path[next_path] = node->successors[i]; + path->node = path->node->successors[0]; + for (int j = 1; j < node->successor_count; j++) { + PopPath path_copy = *path; + vector_push(&self->pop_paths, &path_copy); + PopPath *next_path = vector_back(&self->pop_paths); + next_path->node = node->successors[j]; + next_path->is_shared = true; } } } - for (int path = 0; path < path_count; path++) { - if (!is_shared_by_path[path]) - tree_vector_reverse(&trees_by_path[path]); - int index = -1; - if (path == 0) { - stack_node_retain(nodes_by_path[path]); - self->heads[head_index] = nodes_by_path[path]; - index = head_index; + for (size_t i = 0; i < self->pop_paths.size; i++) { + PopPath *path = vector_get(&self->pop_paths, i); + + if (!path->is_shared) + vector_reverse(&path->trees); + + StackPopResult result = { + .trees = path->trees.contents, + .tree_count = path->trees.size, + .head_index = -1, + }; + + if (i == 0) { + stack_node_retain(path->node); + self->heads[head_index] = path->node; + result.head_index = head_index; } else { - index = ts_stack__find_or_add_head(self, nodes_by_path[path]); + result.head_index = ts_stack__find_head(self, path->node); + if (result.head_index == -1) + result.head_index = ts_stack__add_head(self, path->node); } - self->last_pop_results[path] = (StackPopResult){ - .index = index, - .tree_count = trees_by_path[path].size, - .trees = trees_by_path[path].contents, - }; + vector_push(&self->pop_results, &result); } stack_node_release(previous_head); - return (StackPopResultList){ - .size = path_count, .contents = self->last_pop_results, - }; + return self->pop_results; } void ts_stack_shrink(Stack *self, int head_index, int count) { diff --git a/src/runtime/stack.h b/src/runtime/stack.h index 2aab77e9..3b44f351 100644 --- a/src/runtime/stack.h +++ b/src/runtime/stack.h @@ -6,6 +6,7 @@ extern "C" { #endif #include "tree_sitter/parser.h" +#include "runtime/vector.h" typedef struct Stack Stack; @@ -15,16 +16,11 @@ typedef struct { } StackEntry; typedef struct { - int index; - int tree_count; TSTree **trees; + size_t tree_count; + int head_index; } StackPopResult; -typedef struct { - int size; - StackPopResult *contents; -} StackPopResultList; - typedef struct { void *data; TSTree *(*callback)(void *data, TSTree *, TSTree *); @@ -90,7 +86,7 @@ void ts_stack_add_alternative(Stack *, int head, TSTree *); * which had previously been merged. It returns a struct that indicates the * index of each revealed head and the trees removed from that head. */ -StackPopResultList ts_stack_pop(Stack *, int head, int count, bool count_extra); +Vector ts_stack_pop(Stack *, int head, int count, bool count_extra); /* * Remove the given number of entries from the given head of the stack. diff --git a/src/runtime/tree.c b/src/runtime/tree.c index 32547532..dbc4b4ca 100644 --- a/src/runtime/tree.c +++ b/src/runtime/tree.c @@ -44,6 +44,21 @@ TSTree *ts_tree_make_error(TSLength size, TSLength padding, return result; } +void ts_tree_assign_parents(TSTree *self) { + TSLength offset = ts_length_zero(); + for (size_t i = 0; i < self->child_count; i++) { + TSTree *child = self->children[i]; + if (child->context.parent != self) { + child->context.parent = self; + child->context.index = i; + child->context.offset = offset; + child->context.offset_point = ts_tree_offset_point(self); + ts_tree_assign_parents(child); + } + offset = ts_length_add(offset, ts_tree_total_size(child)); + } +} + static void ts_tree__set_children(TSTree *self, TSTree **children, size_t child_count) { self->children = children; @@ -52,10 +67,6 @@ static void ts_tree__set_children(TSTree *self, TSTree **children, for (size_t i = 0; i < child_count; i++) { TSTree *child = children[i]; ts_tree_retain(child); - child->context.parent = self; - child->context.index = i; - child->context.offset = ts_tree_total_size(self); - child->context.offset_point = ts_tree_offset_point(self); if (i == 0) { self->padding = child->padding; @@ -166,6 +177,30 @@ bool ts_tree_eq(const TSTree *self, const TSTree *other) { return true; } +int ts_tree_compare(const TSTree *left, const TSTree *right) { + if (left->symbol < right->symbol) + return -1; + if (right->symbol < left->symbol) + return 1; + if (left->child_count < right->child_count) + return -1; + if (right->child_count < left->child_count) + return 1; + for (size_t i = 0; i < left->child_count; i++) { + TSTree *left_child = left->children[i]; + TSTree *right_child = right->children[i]; + switch (ts_tree_compare(left_child, right_child)) { + case -1: + return -1; + case 1: + return 1; + default: + break; + } + } + return 0; +} + static size_t write_lookahead_to_string(char *string, size_t limit, char lookahead) { switch (lookahead) { diff --git a/src/runtime/tree.h b/src/runtime/tree.h index f373928b..5f82ed53 100644 --- a/src/runtime/tree.h +++ b/src/runtime/tree.h @@ -49,6 +49,7 @@ TSTree *ts_tree_make_error(TSLength size, TSLength padding, void ts_tree_retain(TSTree *tree); void ts_tree_release(TSTree *tree); bool ts_tree_eq(const TSTree *tree1, const TSTree *tree2); +int ts_tree_compare(const TSTree *tree1, const TSTree *tree2); char *ts_tree_string(const TSTree *tree, const char **names, bool include_anonymous); @@ -56,6 +57,7 @@ size_t ts_tree_offset_column(const TSTree *self); TSLength ts_tree_total_size(const TSTree *tree); TSPoint ts_tree_offset_point(const TSTree *self); void ts_tree_prepend_children(TSTree *, size_t, TSTree **); +void ts_tree_assign_parents(TSTree *); void ts_tree_edit(TSTree *, TSInputEdit); static inline bool ts_tree_is_extra(const TSTree *tree) { diff --git a/src/runtime/tree_vector.h b/src/runtime/tree_vector.h deleted file mode 100644 index 4464e52c..00000000 --- a/src/runtime/tree_vector.h +++ /dev/null @@ -1,55 +0,0 @@ -#ifndef RUNTIME_TREE_VECTOR_H_ -#define RUNTIME_TREE_VECTOR_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -#include -#include "./tree.h" - -typedef struct { - TSTree **contents; - size_t capacity; - size_t size; -} TreeVector; - -static inline TreeVector tree_vector_new(size_t size) { - return (TreeVector){ - .contents = malloc(size * sizeof(TSTree *)), .capacity = size, .size = 0, - }; -} - -static inline void tree_vector_push(TreeVector *self, TSTree *tree) { - if (self->size == self->capacity) { - self->capacity += 4; - self->contents = realloc(self->contents, self->capacity * sizeof(TSTree *)); - } - ts_tree_retain(tree); - self->contents[self->size++] = tree; -} - -static inline void tree_vector_reverse(TreeVector *self) { - TSTree *swap; - size_t limit = self->size / 2; - for (size_t i = 0; i < limit; i++) { - swap = self->contents[i]; - self->contents[i] = self->contents[self->size - 1 - i]; - self->contents[self->size - 1 - i] = swap; - } -} - -static inline TreeVector tree_vector_copy(TreeVector *self) { - return (TreeVector){ - .contents = memcpy(malloc(self->capacity * sizeof(TSTree *)), - self->contents, self->size * sizeof(TSTree *)), - .capacity = self->capacity, - .size = self->size, - }; -} - -#ifdef __cplusplus -} -#endif - -#endif // RUNTIME_TREE_VECTOR_H_ diff --git a/src/runtime/vector.h b/src/runtime/vector.h new file mode 100644 index 00000000..00019ca0 --- /dev/null +++ b/src/runtime/vector.h @@ -0,0 +1,92 @@ +#ifndef RUNTIME_VECTOR_H_ +#define RUNTIME_VECTOR_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include + +typedef struct { + void *contents; + size_t size; + size_t capacity; + size_t element_size; +} Vector; + +static inline Vector vector_new(size_t element_size, size_t capacity) { + Vector result; + result.contents = malloc(capacity * element_size); + result.size = 0; + result.capacity = capacity; + result.element_size = element_size; + return result; +} + +static inline void vector_delete(Vector *self) { + free(self->contents); +} + +static inline void *vector_get(Vector *self, size_t index) { + assert(index < self->size); + return (void *)((char *)self->contents + index * self->element_size); +} + +static inline void *vector_back(Vector *self) { + assert(self->size > 0); + return vector_get(self, self->size - 1); +} + +static inline void vector_clear(Vector *self) { + self->size = 0; +} + +static inline void vector_erase(Vector *self, size_t index) { + assert(index < self->size); + char *contents = (char *)self->contents; + memmove(contents + index * self->element_size, + contents + (index + 1) * self->element_size, + (self->size - index - 1) * self->element_size); + self->size--; +} + +static inline void vector_push(Vector *self, void *entry) { + if (self->size == self->capacity) { + self->capacity += 4; + self->contents = + realloc(self->contents, self->capacity * self->element_size); + } + + char *contents = (char *)self->contents; + memcpy(contents + (self->size * self->element_size), (char *)entry, + self->element_size); + self->size++; +} + +static inline void vector_reverse(Vector *self) { + char swap[self->element_size]; + char *contents = (char *)self->contents; + size_t limit = self->size / 2; + for (size_t i = 0; i < limit; i++) { + size_t offset = i * self->element_size; + size_t reverse_offset = (self->size - 1 - i) * self->element_size; + memcpy(&swap, contents + offset, self->element_size); + memcpy(contents + offset, contents + reverse_offset, self->element_size); + memcpy(contents + reverse_offset, &swap, self->element_size); + } +} + +static inline Vector vector_copy(Vector *self) { + Vector copy = *self; + copy.contents = memcpy(malloc(self->capacity * self->element_size), + self->contents, self->size * self->element_size); + return copy; +} + +#ifdef __cplusplus +} +#endif + +#endif // RUNTIME_VECTOR_H_