From 0824d3e1f3cb3a0bcb6967c682aa5c2a77552293 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Sat, 14 Nov 2015 12:37:21 -0800 Subject: [PATCH 1/6] Only use first parse stack path during error recovery --- src/runtime/parser.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/runtime/parser.c b/src/runtime/parser.c index b338adaf..36d35e94 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -234,7 +234,7 @@ static bool ts_parser__handle_error(TSParser *self, int head) { */ int i = -1; for (StackEntry *entry = entry_before_error; true; - entry = ts_stack_entry_next(entry, head), i++) { + entry = ts_stack_entry_next(entry, 0), i++) { TSStateId stack_state = entry ? entry->state : 0; TSParseAction action_on_error = ts_language__last_action( self->language, stack_state, ts_builtin_sym_error); From ab34cfecd9c7ce98b15c20edfa2d221f030f86b5 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Sun, 15 Nov 2015 09:55:36 -0800 Subject: [PATCH 2/6] Replace TreeVector with a more generic Vector struct --- spec/runtime/stack_spec.cc | 3 +- src/runtime/stack.c | 12 ++++--- src/runtime/tree_vector.h | 55 ------------------------------- src/runtime/vector.h | 67 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 76 insertions(+), 61 deletions(-) delete mode 100644 src/runtime/tree_vector.h create mode 100644 src/runtime/vector.h diff --git a/spec/runtime/stack_spec.cc b/spec/runtime/stack_spec.cc index 05be07e8..b2d12208 100644 --- a/spec/runtime/stack_spec.cc +++ b/spec/runtime/stack_spec.cc @@ -1,4 +1,5 @@ #include "runtime/runtime_spec_helper.h" +#include "runtime/helpers/tree_helpers.h" #include "runtime/stack.h" #include "runtime/tree.h" #include "runtime/length.h" @@ -43,7 +44,7 @@ describe("Stack", [&]() { TSLength len = ts_length_make(2, 2); for (size_t i = 0; i < tree_count; i++) - trees[i] = ts_tree_make_leaf(ts_builtin_sym_start + i, len, len, TSNodeTypeNamed); + trees[i] = ts_tree_make_leaf(i, len, len, TSNodeTypeNamed); }); after_each([&]() { diff --git a/src/runtime/stack.c b/src/runtime/stack.c index 3a4dd308..89efb4b8 100644 --- a/src/runtime/stack.c +++ b/src/runtime/stack.c @@ -1,6 +1,6 @@ #include "tree_sitter/parser.h" #include "runtime/tree.h" -#include "runtime/tree_vector.h" +#include "runtime/vector.h" #include "runtime/stack.h" #include "runtime/length.h" #include @@ -229,7 +229,8 @@ StackPopResultList ts_stack_pop(Stack *self, int head_index, int child_count, int capacity = (child_count == -1) ? STARTING_TREE_CAPACITY : child_count; size_t tree_counts_by_path[MAX_POP_PATH_COUNT] = { child_count }; StackNode *nodes_by_path[MAX_POP_PATH_COUNT] = { previous_head }; - TreeVector trees_by_path[MAX_POP_PATH_COUNT] = { tree_vector_new(capacity) }; + Vector trees_by_path[MAX_POP_PATH_COUNT] = { vector_new(sizeof(TSTree *), + capacity) }; bool is_shared_by_path[MAX_POP_PATH_COUNT] = { false }; /* @@ -257,10 +258,11 @@ StackPopResultList ts_stack_pop(Stack *self, int head_index, int child_count, * the additional successors. */ if (is_shared_by_path[path]) { - trees_by_path[path] = tree_vector_copy(&trees_by_path[path]); + trees_by_path[path] = vector_copy(&trees_by_path[path]); is_shared_by_path[path] = false; } - tree_vector_push(&trees_by_path[path], node->entry.tree); + ts_tree_retain(node->entry.tree); + vector_push(&trees_by_path[path], &node->entry.tree); for (int i = 0; i < node->successor_count; i++) { int next_path; @@ -283,7 +285,7 @@ StackPopResultList ts_stack_pop(Stack *self, int head_index, int child_count, for (int path = 0; path < path_count; path++) { if (!is_shared_by_path[path]) - tree_vector_reverse(&trees_by_path[path]); + vector_reverse(&trees_by_path[path]); int index = -1; if (path == 0) { stack_node_retain(nodes_by_path[path]); diff --git a/src/runtime/tree_vector.h b/src/runtime/tree_vector.h deleted file mode 100644 index 4464e52c..00000000 --- a/src/runtime/tree_vector.h +++ /dev/null @@ -1,55 +0,0 @@ -#ifndef RUNTIME_TREE_VECTOR_H_ -#define RUNTIME_TREE_VECTOR_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -#include -#include "./tree.h" - -typedef struct { - TSTree **contents; - size_t capacity; - size_t size; -} TreeVector; - -static inline TreeVector tree_vector_new(size_t size) { - return (TreeVector){ - .contents = malloc(size * sizeof(TSTree *)), .capacity = size, .size = 0, - }; -} - -static inline void tree_vector_push(TreeVector *self, TSTree *tree) { - if (self->size == self->capacity) { - self->capacity += 4; - self->contents = realloc(self->contents, self->capacity * sizeof(TSTree *)); - } - ts_tree_retain(tree); - self->contents[self->size++] = tree; -} - -static inline void tree_vector_reverse(TreeVector *self) { - TSTree *swap; - size_t limit = self->size / 2; - for (size_t i = 0; i < limit; i++) { - swap = self->contents[i]; - self->contents[i] = self->contents[self->size - 1 - i]; - self->contents[self->size - 1 - i] = swap; - } -} - -static inline TreeVector tree_vector_copy(TreeVector *self) { - return (TreeVector){ - .contents = memcpy(malloc(self->capacity * sizeof(TSTree *)), - self->contents, self->size * sizeof(TSTree *)), - .capacity = self->capacity, - .size = self->size, - }; -} - -#ifdef __cplusplus -} -#endif - -#endif // RUNTIME_TREE_VECTOR_H_ diff --git a/src/runtime/vector.h b/src/runtime/vector.h new file mode 100644 index 00000000..c62a43c9 --- /dev/null +++ b/src/runtime/vector.h @@ -0,0 +1,67 @@ +#ifndef RUNTIME_VECTOR_H_ +#define RUNTIME_VECTOR_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +typedef struct { + void *contents; + size_t size; + size_t capacity; + size_t element_size; +} Vector; + +static inline Vector vector_new(size_t element_size, size_t capacity) { + return (Vector){ + .contents = malloc(capacity * element_size), + .size = 0, + .capacity = capacity, + .element_size = element_size, + }; +} + +static inline void vector_push(Vector *self, void *entry) { + if (self->size == self->capacity) { + self->capacity += 4; + self->contents = + realloc(self->contents, self->capacity * self->element_size); + } + + char *contents = (char *)self->contents; + memcpy(contents + (self->size * self->element_size), (char *)entry, + self->element_size); + self->size++; +} + +static inline void vector_reverse(Vector *self) { + char swap[self->element_size]; + char *contents = (char *)self->contents; + size_t limit = self->size / 2; + for (size_t i = 0; i < limit; i++) { + size_t offset = i * self->element_size; + size_t reverse_offset = (self->size - 1 - i) * self->element_size; + memcpy(&swap, contents + offset, self->element_size); + memcpy(contents + offset, contents + reverse_offset, self->element_size); + memcpy(contents + reverse_offset, &swap, self->element_size); + } +} + +static inline Vector vector_copy(Vector *self) { + return (Vector){ + .contents = memcpy(malloc(self->capacity * self->element_size), + self->contents, self->size * self->element_size), + .size = self->size, + .capacity = self->capacity, + .element_size = self->element_size, + }; +} + +#ifdef __cplusplus +} +#endif + +#endif // RUNTIME_VECTOR_H_ From 484721b0c20f5252065f56c980a444e7ce3f5b57 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Sun, 15 Nov 2015 12:21:16 -0800 Subject: [PATCH 3/6] Assign tree parent pointers after parse is complete --- src/runtime/parser.c | 1 + src/runtime/tree.c | 17 ++++++++++++++--- src/runtime/tree.h | 1 + 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 36d35e94..8668fe04 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -301,6 +301,7 @@ static TSTree *ts_parser__finish(TSParser *self) { TSTree *root = trees[extra_count]; ts_tree_prepend_children(root, extra_count, trees); + ts_tree_assign_parents(root); return root; } diff --git a/src/runtime/tree.c b/src/runtime/tree.c index 822e51f8..4c04e9b8 100644 --- a/src/runtime/tree.c +++ b/src/runtime/tree.c @@ -36,6 +36,20 @@ TSTree *ts_tree_make_error(TSLength size, TSLength padding, char lookahead_char) return result; } +void ts_tree_assign_parents(TSTree *self) { + TSLength offset = ts_length_zero(); + for (size_t i = 0; i < self->child_count; i++) { + TSTree *child = self->children[i]; + if (child->context.parent != self) { + child->context.parent = self; + child->context.index = i; + child->context.offset = offset; + ts_tree_assign_parents(child); + } + offset = ts_length_add(offset, ts_tree_total_size(child)); + } +} + static void ts_tree__set_children(TSTree *self, TSTree **children, size_t child_count) { self->children = children; @@ -44,9 +58,6 @@ static void ts_tree__set_children(TSTree *self, TSTree **children, for (size_t i = 0; i < child_count; i++) { TSTree *child = children[i]; ts_tree_retain(child); - child->context.parent = self; - child->context.index = i; - child->context.offset = ts_tree_total_size(self); if (i == 0) { self->padding = child->padding; diff --git a/src/runtime/tree.h b/src/runtime/tree.h index 88208831..b443868e 100644 --- a/src/runtime/tree.h +++ b/src/runtime/tree.h @@ -44,6 +44,7 @@ char *ts_tree_string(const TSTree *tree, const char **names, bool include_anonymous); TSLength ts_tree_total_size(const TSTree *tree); void ts_tree_prepend_children(TSTree *, size_t, TSTree **); +void ts_tree_assign_parents(TSTree *); void ts_tree_edit(TSTree *, TSInputEdit); static inline bool ts_tree_is_extra(const TSTree *tree) { From 64874449e4d7c9b2bf8dea9d6fa453862415a7dc Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 18 Nov 2015 08:47:15 -0800 Subject: [PATCH 4/6] Allow different parse stack heads to lex differently --- src/runtime/lexer.c | 3 + src/runtime/parser.c | 202 +++++++++++++++++++++++++++---------------- src/runtime/parser.h | 4 +- src/runtime/vector.h | 19 ++++ 4 files changed, 152 insertions(+), 76 deletions(-) diff --git a/src/runtime/lexer.c b/src/runtime/lexer.c index b2b21819..bbcfd84d 100644 --- a/src/runtime/lexer.c +++ b/src/runtime/lexer.c @@ -110,6 +110,9 @@ TSLexer ts_lexer_make() { } void ts_lexer_reset(TSLexer *self, TSLength position) { + if (ts_length_eq(position, self->current_position)) + return; + self->token_start_position = position; self->token_end_position = position; self->current_position = position; diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 8668fe04..8321a2dc 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -6,6 +6,9 @@ #include "runtime/tree.h" #include "runtime/lexer.h" #include "runtime/length.h" +#include "runtime/vector.h" + +#include /* * Debugging @@ -20,6 +23,12 @@ #define SYM_NAME(sym) self->language->symbol_names[sym] +typedef struct { + TSTree *reusable_subtree; + size_t reusable_subtree_pos; + TSLength position; +} HeadState; + typedef enum { ConsumeResultShifted, ConsumeResultRemoved, @@ -53,14 +62,14 @@ static TSParseAction ts_language__last_action(const TSLanguage *language, * Replace the parser's reusable_subtree with its first non-fragile descendant. * Return true if a suitable descendant is found, false otherwise. */ -static bool ts_parser__breakdown_reusable_subtree(TSParser *self) { +static bool ts_parser__breakdown_reusable_subtree(HeadState *state) { do { - if (self->reusable_subtree->symbol == ts_builtin_sym_error) + if (state->reusable_subtree->symbol == ts_builtin_sym_error) return false; - if (self->reusable_subtree->child_count == 0) + if (state->reusable_subtree->child_count == 0) return false; - self->reusable_subtree = self->reusable_subtree->children[0]; - } while (ts_tree_is_fragile(self->reusable_subtree)); + state->reusable_subtree = state->reusable_subtree->children[0]; + } while (ts_tree_is_fragile(state->reusable_subtree)); return true; } @@ -68,80 +77,82 @@ static bool ts_parser__breakdown_reusable_subtree(TSParser *self) { * Replace the parser's reusable_subtree with its largest right neighbor, or * NULL if no right neighbor exists. */ -static void ts_parser__pop_reusable_subtree(TSParser *self) { - self->reusable_subtree_pos += ts_tree_total_size(self->reusable_subtree).chars; +static void ts_parser__pop_reusable_subtree(HeadState *state) { + state->reusable_subtree_pos += + ts_tree_total_size(state->reusable_subtree).chars; - while (self->reusable_subtree) { - TSTree *parent = self->reusable_subtree->context.parent; - size_t next_index = self->reusable_subtree->context.index + 1; + while (state->reusable_subtree) { + TSTree *parent = state->reusable_subtree->context.parent; + size_t next_index = state->reusable_subtree->context.index + 1; if (parent && parent->child_count > next_index) { - self->reusable_subtree = parent->children[next_index]; + state->reusable_subtree = parent->children[next_index]; return; } - self->reusable_subtree = parent; + state->reusable_subtree = parent; } } +static bool ts_parser__can_reuse(TSParser *self, int head, TSTree *subtree) { + if (subtree->symbol == ts_builtin_sym_error) + return false; + TSStateId state = ts_stack_top_state(self->stack, head); + const TSParseAction *action = + ts_language__actions(self->language, state, subtree->symbol); + return action->type != TSParseActionTypeError; +} + /* * Advance the parser's lookahead subtree. If there is a reusable subtree * at the correct position in the parser's previous tree, use that. Otherwise, * run the lexer. */ -static void ts_parser__get_next_lookahead(TSParser *self) { - while (self->reusable_subtree) { - if (self->reusable_subtree_pos > self->lexer.current_position.chars) { +static TSTree *ts_parser__get_next_lookahead(TSParser *self, int head) { + HeadState *state = vector_get(&self->head_states, head); + + while (state->reusable_subtree) { + if (state->reusable_subtree_pos > state->position.chars) { break; } - if (self->reusable_subtree_pos < self->lexer.current_position.chars) { - DEBUG("past_reuse sym:%s", SYM_NAME(self->reusable_subtree->symbol)); - ts_parser__pop_reusable_subtree(self); + if (state->reusable_subtree_pos < state->position.chars) { + DEBUG("past_reuse sym:%s", SYM_NAME(state->reusable_subtree->symbol)); + ts_parser__pop_reusable_subtree(state); continue; } - if (ts_tree_has_changes(self->reusable_subtree) || - ts_tree_is_fragile(self->reusable_subtree) || - ts_tree_is_extra(self->reusable_subtree)) { - DEBUG("breakdown sym:%s", SYM_NAME(self->reusable_subtree->symbol)); - if (!ts_parser__breakdown_reusable_subtree(self)) - ts_parser__pop_reusable_subtree(self); + if (ts_tree_has_changes(state->reusable_subtree) || + ts_tree_is_fragile(state->reusable_subtree) || + ts_tree_is_extra(state->reusable_subtree) || + (state->reusable_subtree->child_count > 0 && + !ts_parser__can_reuse(self, head, state->reusable_subtree))) { + DEBUG("breakdown sym:%s", SYM_NAME(state->reusable_subtree->symbol)); + if (!ts_parser__breakdown_reusable_subtree(state)) + ts_parser__pop_reusable_subtree(state); continue; } - TSStateId top_state = ts_stack_top_state(self->stack, 0); - TSSymbol symbol = self->reusable_subtree->symbol; - if (ts_language__last_action(self->language, top_state, symbol).type == - TSParseActionTypeError) { - DEBUG("cant_reuse sym:%s", SYM_NAME(self->reusable_subtree->symbol)); - ts_parser__pop_reusable_subtree(self); - continue; - } - - self->lookahead = self->reusable_subtree; - TSLength size = ts_tree_total_size(self->lookahead); - DEBUG("reuse sym:%s size:%lu extra:%d", SYM_NAME(self->lookahead->symbol), - size.chars, self->lookahead->options.extra); - ts_lexer_reset(&self->lexer, - ts_length_add(self->lexer.current_position, size)); - ts_parser__pop_reusable_subtree(self); - return; + TSTree *result = state->reusable_subtree; + TSLength size = ts_tree_total_size(result); + DEBUG("reuse sym:%s size:%lu extra:%d", SYM_NAME(result->symbol), + size.chars, result->options.extra); + ts_parser__pop_reusable_subtree(state); + return result; } - TSLength position = self->lexer.current_position; - for (size_t i = 0, count = ts_stack_head_count(self->stack); i < count; i++) { - if (i > 0) { - ts_lexer_reset(&self->lexer, position); - ts_tree_release(self->lookahead); - } + return NULL; +} - TSStateId parse_state = ts_stack_top_state(self->stack, i); - TSStateId lex_state = self->language->lex_states[parse_state]; - DEBUG("lex state:%d", lex_state); - self->lookahead = self->language->lex_fn(&self->lexer, lex_state); +static int ts_parser__split(TSParser *self, int head) { + int result = ts_stack_split(self->stack, head); + assert(result == self->head_states.size); + HeadState head_state = *(HeadState *)vector_get(&self->head_states, head); + vector_push(&self->head_states, &head_state); + return result; +} - if (self->lookahead->symbol != ts_builtin_sym_error) - break; - } +static void ts_parser__remove_head(TSParser *self, int head) { + vector_erase(&self->head_states, head); + ts_stack_remove_head(self->stack, head); } /* @@ -150,10 +161,16 @@ static void ts_parser__get_next_lookahead(TSParser *self) { static ConsumeResult ts_parser__shift(TSParser *self, int head, TSStateId parse_state) { - if (ts_stack_push(self->stack, head, parse_state, self->lookahead)) + HeadState *head_state = vector_get(&self->head_states, head); + head_state->position = + ts_length_add(head_state->position, ts_tree_total_size(self->lookahead)); + if (ts_stack_push(self->stack, head, parse_state, self->lookahead)) { + DEBUG("merge head:%d", head); + vector_erase(&self->head_states, head); return ConsumeResultRemoved; - else + } else { return ConsumeResultShifted; + } } static bool ts_parser__shift_extra(TSParser *self, int head, TSStateId state) { @@ -175,6 +192,13 @@ static TSTree *ts_parser__reduce(TSParser *self, int head, TSSymbol symbol, for (int i = 0; i < pop_results.size; i++) { StackPopResult pop_result = pop_results.contents[i]; + if (i > 0) { + assert(pop_result.index == self->head_states.size); + DEBUG("split_during_reduce new_head:%d", pop_result.index); + HeadState *head_state = vector_get(&self->head_states, head); + vector_push(&self->head_states, head_state); + } + if (pop_result.trees != last_children) { parent = ts_tree_make_node(symbol, pop_result.tree_count, pop_result.trees, node_type); @@ -194,7 +218,8 @@ static TSTree *ts_parser__reduce(TSParser *self, int head, TSSymbol symbol, .data.to_state; } - ts_stack_push(self->stack, pop_result.index, state, parent); + if (ts_stack_push(self->stack, pop_result.index, state, parent)) + vector_erase(&self->head_states, pop_result.index); } last_index = pop_result.index; @@ -214,9 +239,12 @@ static void ts_parser__reduce_fragile(TSParser *self, int head, TSSymbol symbol, static void ts_parser__reduce_error(TSParser *self, int head, size_t child_count) { + HeadState *head_state = vector_get(&self->head_states, head); TSTree *reduced = ts_parser__reduce(self, head, ts_builtin_sym_error, child_count, false, true); reduced->size = ts_length_add(reduced->size, self->lookahead->padding); + head_state->position = + ts_length_add(head_state->position, self->lookahead->padding); self->lookahead->padding = ts_length_zero(); ts_tree_set_fragile_left(reduced); ts_tree_set_fragile_right(reduced); @@ -288,8 +316,14 @@ static void ts_parser__start(TSParser *self, TSInput input, ts_lexer_reset(&self->lexer, ts_length_zero()); ts_stack_clear(self->stack); - self->reusable_subtree = previous_tree; - self->reusable_subtree_pos = 0; + HeadState head_state = { + .position = ts_length_zero(), + .reusable_subtree = previous_tree, + .reusable_subtree_pos = 0, + }; + vector_clear(&self->head_states); + vector_push(&self->head_states, &head_state); + self->lookahead = NULL; } @@ -328,11 +362,9 @@ static ConsumeResult ts_parser__consume_lookahead(TSParser *self, int head) { int current_head; if (next_action->type == 0) { current_head = head; - DEBUG("action current_head:%d, state:%d", current_head, state); } else { - current_head = ts_stack_split(self->stack, head); - DEBUG("split_action from_head:%d, current_head:%d, state:%d", head, - current_head, state); + current_head = ts_parser__split(self, head); + DEBUG("split_action from_head:%d, new_head:%d", head, current_head); } // TODO: Remove this by making a separate symbol for errors returned from @@ -350,7 +382,7 @@ static ConsumeResult ts_parser__consume_lookahead(TSParser *self, int head) { return ConsumeResultFinished; } else { DEBUG("bail current_head:%d", current_head); - ts_stack_remove_head(self->stack, current_head); + ts_parser__remove_head(self, current_head); return ConsumeResultRemoved; } @@ -391,10 +423,14 @@ static ConsumeResult ts_parser__consume_lookahead(TSParser *self, int head) { } static int ts_tree__compare(TSTree *left, TSTree *right) { - if (left->symbol < right->symbol) return -1; - if (right->symbol < left->symbol) return 1; - if (left->child_count < right->child_count) return -1; - if (right->child_count < left->child_count) return 1; + if (left->symbol < right->symbol) + return -1; + if (right->symbol < left->symbol) + return 1; + if (left->child_count < right->child_count) + return -1; + if (right->child_count < left->child_count) + return 1; for (size_t i = 0; i < left->child_count; i++) { TSTree *left_child = left->children[i]; TSTree *right_child = right->children[i]; @@ -427,6 +463,7 @@ TSParser ts_parser_make() { .stack = ts_stack_new((TreeSelectionCallback){ NULL, ts_parser__select_tree, }), + .head_states = vector_new(sizeof(HeadState), 3), .lookahead = NULL, }; } @@ -449,13 +486,28 @@ TSTree *ts_parser_parse(TSParser *self, TSInput input, TSTree *previous_tree) { ts_parser__start(self, input, previous_tree); for (;;) { - ts_parser__get_next_lookahead(self); - - DEBUG("lookahead sym:%s, pos:%lu, head_count:%d", - SYM_NAME(self->lookahead->symbol), self->lexer.current_position.chars, - ts_stack_head_count(self->stack)); - for (int head = 0; head < ts_stack_head_count(self->stack);) { + HeadState *state = vector_get(&self->head_states, head); + + DEBUG("process head:%d, head_count:%d, state:%d, pos:%lu", head, + ts_stack_head_count(self->stack), + ts_stack_top_state(self->stack, head), state->position.chars); + + TSTree *reused_lookahead = ts_parser__get_next_lookahead(self, head); + if (reused_lookahead && + ts_parser__can_reuse(self, head, reused_lookahead)) { + self->lookahead = reused_lookahead; + } else if (!(self->lookahead && + ts_parser__can_reuse(self, head, self->lookahead))) { + ts_lexer_reset(&self->lexer, state->position); + TSStateId parse_state = ts_stack_top_state(self->stack, head); + TSStateId lex_state = self->language->lex_states[parse_state]; + self->lookahead = self->language->lex_fn(&self->lexer, lex_state); + } + + DEBUG("lookahead sym:%s, size:%lu", SYM_NAME(self->lookahead->symbol), + ts_tree_total_size(self->lookahead).chars); + switch (ts_parser__consume_lookahead(self, head)) { case ConsumeResultRemoved: break; @@ -466,5 +518,7 @@ TSTree *ts_parser_parse(TSParser *self, TSInput input, TSTree *previous_tree) { return ts_parser__finish(self); } } + + self->lookahead = NULL; } } diff --git a/src/runtime/parser.h b/src/runtime/parser.h index 50370ba0..53cb6a0f 100644 --- a/src/runtime/parser.h +++ b/src/runtime/parser.h @@ -6,14 +6,14 @@ extern "C" { #endif #include "runtime/stack.h" +#include "runtime/vector.h" typedef struct { TSLexer lexer; Stack *stack; TSTree *lookahead; - TSTree *reusable_subtree; - size_t reusable_subtree_pos; const TSLanguage *language; + Vector head_states; } TSParser; TSParser ts_parser_make(); diff --git a/src/runtime/vector.h b/src/runtime/vector.h index c62a43c9..28437764 100644 --- a/src/runtime/vector.h +++ b/src/runtime/vector.h @@ -7,6 +7,7 @@ extern "C" { #include #include +#include typedef struct { void *contents; @@ -24,6 +25,24 @@ static inline Vector vector_new(size_t element_size, size_t capacity) { }; } +static inline void *vector_get(Vector *self, size_t index) { + assert(index < self->size); + return (void *)((char *)self->contents + index * self->element_size); +} + +static inline void vector_clear(Vector *self) { + self->size = 0; +} + +static inline void vector_erase(Vector *self, size_t index) { + assert(index < self->size); + char *contents = (char *)self->contents; + memmove(contents + index * self->element_size, + contents + (index + 1) * self->element_size, + (self->size - index - 1) * self->element_size); + self->size--; +} + static inline void vector_push(Vector *self, void *entry) { if (self->size == self->capacity) { self->capacity += 4; From c88e9044d5fb1d395aa986e6a109f468cf1355b8 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 20 Nov 2015 00:01:53 -0800 Subject: [PATCH 5/6] Make stack popping more robust --- spec/runtime/stack_spec.cc | 217 ++++++++++++++++++++++++++++--------- src/runtime/parser.c | 111 +++++++++++++------ src/runtime/parser.h | 1 + src/runtime/stack.c | 129 ++++++++++++---------- src/runtime/stack.h | 12 +- src/runtime/vector.h | 9 ++ 6 files changed, 328 insertions(+), 151 deletions(-) diff --git a/spec/runtime/stack_spec.cc b/spec/runtime/stack_spec.cc index b2d12208..7c3cc1c7 100644 --- a/spec/runtime/stack_spec.cc +++ b/spec/runtime/stack_spec.cc @@ -5,12 +5,12 @@ #include "runtime/length.h" enum { - stateA, stateB, stateC, stateD, stateE, stateF, stateG, stateH + stateA, stateB, stateC, stateD, stateE, stateF, stateG, stateH, stateI, stateJ }; enum { symbol0 = ts_builtin_sym_start, - symbol1, symbol2, symbol3, symbol4, symbol5, symbol6, symbol7 + symbol1, symbol2, symbol3, symbol4, symbol5, symbol6, symbol7, symbol8 }; struct TreeSelectionSpy { @@ -32,7 +32,7 @@ START_TEST describe("Stack", [&]() { Stack *stack; - const size_t tree_count = 8; + const size_t tree_count = 10; TSTree *trees[tree_count]; TreeSelectionSpy tree_selection_spy{0, NULL, {NULL, NULL}}; @@ -88,8 +88,6 @@ describe("Stack", [&]() { }); describe("popping nodes from the stack", [&]() { - StackPopResultList pop; - before_each([&]() { /* * A0__B1__C2. @@ -103,43 +101,47 @@ describe("Stack", [&]() { /* * A0. */ - pop = ts_stack_pop(stack, 0, 2, false); - AssertThat(pop.size, Equals(1)); - AssertThat(pop.contents[0].tree_count, Equals(2)); - AssertThat(pop.contents[0].trees[0], Equals(trees[1])); - AssertThat(pop.contents[0].trees[1], Equals(trees[2])); + Vector pop = ts_stack_pop(stack, 0, 2, false); + StackPopResult pop1 = *(StackPopResult *)vector_get(&pop, 0); + AssertThat(pop.size, Equals(1)); + AssertThat(pop1.tree_count, Equals(2)); + AssertThat(pop1.trees[0], Equals(trees[1])); + AssertThat(pop1.trees[1], Equals(trees[2])); AssertThat(*ts_stack_head(stack, 0), Equals({trees[0], stateA})); /* * . */ pop = ts_stack_pop(stack, 0, 1, false); - AssertThat(pop.size, Equals(1)); - AssertThat(pop.contents[0].tree_count, Equals(1)); - AssertThat(pop.contents[0].trees[0], Equals(trees[0])); + pop1 = *(StackPopResult *)vector_get(&pop, 0); + AssertThat(pop.size, Equals(1)); + AssertThat(pop1.tree_count, Equals(1)); + AssertThat(pop1.trees[0], Equals(trees[0])); AssertThat(ts_stack_head(stack, 0), Equals(nullptr)); }); it("does not count 'extra' trees toward the count", [&]() { ts_tree_set_extra(trees[1]); - pop = ts_stack_pop(stack, 0, 2, false); - AssertThat(pop.size, Equals(1)); - AssertThat(pop.contents[0].tree_count, Equals(3)); - AssertThat(pop.contents[0].trees[0], Equals(trees[0])); - AssertThat(pop.contents[0].trees[1], Equals(trees[1])); - AssertThat(pop.contents[0].trees[2], Equals(trees[2])); + Vector pop = ts_stack_pop(stack, 0, 2, false); + StackPopResult pop1 = *(StackPopResult *)vector_get(&pop, 0); + AssertThat(pop.size, Equals(1)); + AssertThat(pop1.tree_count, Equals(3)); + AssertThat(pop1.trees[0], Equals(trees[0])); + AssertThat(pop1.trees[1], Equals(trees[1])); + AssertThat(pop1.trees[2], Equals(trees[2])); AssertThat(ts_stack_head(stack, 0), Equals(nullptr)); }); it("pops the entire stack when given a negative count", [&]() { - pop = ts_stack_pop(stack, 0, -1, false); + Vector pop = ts_stack_pop(stack, 0, -1, false); - AssertThat(pop.size, Equals(1)); - AssertThat(pop.contents[0].tree_count, Equals(3)); - AssertThat(pop.contents[0].trees[0], Equals(trees[0])); - AssertThat(pop.contents[0].trees[1], Equals(trees[1])); - AssertThat(pop.contents[0].trees[2], Equals(trees[2])); + AssertThat(pop.size, Equals(1)); + StackPopResult pop1 = *(StackPopResult *)vector_get(&pop, 0); + AssertThat(pop1.tree_count, Equals(3)); + AssertThat(pop1.trees[0], Equals(trees[0])); + AssertThat(pop1.trees[1], Equals(trees[1])); + AssertThat(pop1.trees[2], Equals(trees[2])); }); }); @@ -317,6 +319,9 @@ describe("Stack", [&]() { ts_stack_push(stack, 1, stateE, trees[4]); ts_stack_push(stack, 1, stateF, trees[5]); ts_stack_push(stack, 1, stateG, trees[6]); + + AssertThat(ts_stack_head_count(stack), Equals(1)); + AssertThat(ts_stack_entry_next_count(ts_stack_head(stack, 0)), Equals(2)); }); describe("when there are two paths that lead to two different heads", [&]() { @@ -325,18 +330,18 @@ describe("Stack", [&]() { * A0__B1__C2. * \__E4. */ - StackPopResultList pop = ts_stack_pop(stack, 0, 2, false); + Vector pop = ts_stack_pop(stack, 0, 2, false); - AssertThat(pop.size, Equals(2)); - StackPopResult pop1 = pop.contents[0]; - AssertThat(pop1.index, Equals(0)); - AssertThat(pop1.tree_count, Equals(2)); + AssertThat(pop.size, Equals(2)); + StackPopResult pop1 = *(StackPopResult *)vector_get(&pop, 0); + AssertThat(pop1.head_index, Equals(0)); + AssertThat(pop1.tree_count, Equals(2)); AssertThat(pop1.trees[0], Equals(trees[3])); AssertThat(pop1.trees[1], Equals(trees[6])); - StackPopResult pop2 = pop.contents[1]; - AssertThat(pop2.index, Equals(1)); - AssertThat(pop2.tree_count, Equals(2)); + StackPopResult pop2 = *(StackPopResult *)vector_get(&pop, 1); + AssertThat(pop2.head_index, Equals(1)); + AssertThat(pop2.tree_count, Equals(2)); AssertThat(pop2.trees[0], Equals(trees[5])); AssertThat(pop2.trees[1], Equals(trees[6])); @@ -360,9 +365,9 @@ describe("Stack", [&]() { * A0__B1__C2__D3__G6. * \__E4__F5__/ */ - StackPopResultList pop = ts_stack_pop(stack, 0, 1, false); + Vector pop = ts_stack_pop(stack, 0, 1, false); - AssertThat(pop.size, Equals(1)); + AssertThat(pop.size, Equals(1)); AssertThat(ts_stack_head_count(stack), Equals(1)); }); }); @@ -380,19 +385,21 @@ describe("Stack", [&]() { * A0__B1__C2__D3. * \__E4__F5. */ - StackPopResultList pop = ts_stack_pop(stack, 0, 2, false); + Vector pop = ts_stack_pop(stack, 0, 2, false); AssertThat(ts_stack_head_count(stack), Equals(2)); - AssertThat(pop.size, Equals(2)); - AssertThat(pop.contents[0].index, Equals(0)); - AssertThat(pop.contents[0].tree_count, Equals(2)); - AssertThat(pop.contents[0].trees[0], Equals(trees[6])); - AssertThat(pop.contents[0].trees[1], Equals(trees[7])); + AssertThat(pop.size, Equals(2)); + StackPopResult pop1 = *(StackPopResult *)vector_get(&pop, 0); + AssertThat(pop1.head_index, Equals(0)); + AssertThat(pop1.tree_count, Equals(2)); + AssertThat(pop1.trees[0], Equals(trees[6])); + AssertThat(pop1.trees[1], Equals(trees[7])); - AssertThat(pop.contents[1].index, Equals(1)); - AssertThat(pop.contents[1].tree_count, Equals(2)); - AssertThat(pop.contents[1].trees[0], Equals(trees[6])); - AssertThat(pop.contents[1].trees[1], Equals(trees[7])); + StackPopResult pop2 = *(StackPopResult *)vector_get(&pop, 1); + AssertThat(pop2.head_index, Equals(1)); + AssertThat(pop2.tree_count, Equals(2)); + AssertThat(pop2.trees[0], Equals(trees[6])); + AssertThat(pop2.trees[1], Equals(trees[7])); }); }); @@ -401,17 +408,121 @@ describe("Stack", [&]() { /* * A0__B1. */ - StackPopResultList pop = ts_stack_pop(stack, 0, 3, false); + Vector pop = ts_stack_pop(stack, 0, 3, false); AssertThat(ts_stack_head_count(stack), Equals(1)); AssertThat(*ts_stack_head(stack, 0), Equals({trees[1], stateB})); - AssertThat(pop.size, Equals(2)); - AssertThat(pop.contents[0].tree_count, Equals(3)); - AssertThat(pop.contents[0].index, Equals(0)); - AssertThat(pop.contents[0].trees[0], Equals(trees[2])); - AssertThat(pop.contents[1].tree_count, Equals(3)); - AssertThat(pop.contents[1].index, Equals(0)); - AssertThat(pop.contents[1].trees[0], Equals(trees[4])); + AssertThat(pop.size, Equals(2)); + StackPopResult pop1 = *(StackPopResult *)vector_get(&pop, 0); + AssertThat(pop1.tree_count, Equals(3)); + AssertThat(pop1.head_index, Equals(0)); + AssertThat(pop1.trees[0], Equals(trees[2])); + + StackPopResult pop2 = *(StackPopResult *)vector_get(&pop, 1); + AssertThat(pop2.tree_count, Equals(3)); + AssertThat(pop2.head_index, Equals(0)); + AssertThat(pop2.trees[0], Equals(trees[4])); + }); + }); + }); + + describe("popping from a stack head that has been 3-way merged", [&]() { + before_each([&]() { + /* + * A0__B1__C2__D3__I8__J9. + * \__E4__F5__/ + * \__G6__H7__/ + */ + ts_stack_clear(stack); + ts_stack_push(stack, 0, stateA, trees[0]); + ts_stack_push(stack, 0, stateB, trees[1]); + ts_stack_split(stack, 0); + ts_stack_split(stack, 1); + ts_stack_push(stack, 0, stateC, trees[2]); + ts_stack_push(stack, 1, stateE, trees[4]); + ts_stack_push(stack, 2, stateG, trees[6]); + ts_stack_push(stack, 0, stateD, trees[3]); + ts_stack_push(stack, 1, stateF, trees[5]); + ts_stack_push(stack, 2, stateH, trees[7]); + ts_stack_push(stack, 0, stateI, trees[8]); + ts_stack_push(stack, 1, stateI, trees[8]); + ts_stack_push(stack, 1, stateI, trees[8]); + ts_stack_push(stack, 0, stateJ, trees[9]); + + AssertThat(ts_stack_head_count(stack), Equals(1)); + StackEntry *head = ts_stack_head(stack, 0); + AssertThat(ts_stack_entry_next_count(head), Equals(1)); + AssertThat(ts_stack_entry_next_count(ts_stack_entry_next(head, 0)), Equals(3)); + }); + + describe("when there is one path that leads to three different heads", [&]() { + it("returns three entries with the same array of trees", [&]() { + /* + * A0__B1__C2__D3. + * \__E4__F5. + * \__G6__H7. + */ + Vector pop = ts_stack_pop(stack, 0, 2, false); + AssertThat(ts_stack_head_count(stack), Equals(3)); + + AssertThat(pop.size, Equals(3)); + + StackPopResult pop1 = *(StackPopResult *)vector_get(&pop, 0); + AssertThat(ts_stack_top_tree(stack, 0), Equals(trees[3])); + AssertThat(pop1.head_index, Equals(0)); + AssertThat(pop1.tree_count, Equals(2)); + AssertThat(pop1.trees[0], Equals(trees[8])); + AssertThat(pop1.trees[1], Equals(trees[9])); + + StackPopResult pop2 = *(StackPopResult *)vector_get(&pop, 1); + AssertThat(ts_stack_top_tree(stack, 1), Equals(trees[5])); + AssertThat(pop2.head_index, Equals(1)); + AssertThat(pop2.tree_count, Equals(2)); + AssertThat(pop2.trees, Equals(pop1.trees)); + + StackPopResult pop3 = *(StackPopResult *)vector_get(&pop, 2); + AssertThat(ts_stack_top_tree(stack, 2), Equals(trees[7])); + AssertThat(pop3.head_index, Equals(2)); + AssertThat(pop3.tree_count, Equals(2)); + AssertThat(pop3.trees, Equals(pop1.trees)); + }); + }); + + describe("when there are three different paths that lead to three different heads", [&]() { + it("returns three entries with different arrays of trees", [&]() { + /* + * A0__B1__C2. + * \__E4. + * \__G6. + */ + Vector pop = ts_stack_pop(stack, 0, 3, false); + AssertThat(ts_stack_head_count(stack), Equals(3)); + + AssertThat(pop.size, Equals(3)); + + StackPopResult pop1 = *(StackPopResult *)vector_get(&pop, 0); + AssertThat(ts_stack_top_tree(stack, 0), Equals(trees[2])); + AssertThat(pop1.head_index, Equals(0)); + AssertThat(pop1.tree_count, Equals(3)); + AssertThat(pop1.trees[0], Equals(trees[3])); + AssertThat(pop1.trees[1], Equals(trees[8])); + AssertThat(pop1.trees[2], Equals(trees[9])); + + StackPopResult pop2 = *(StackPopResult *)vector_get(&pop, 1); + AssertThat(ts_stack_top_tree(stack, 1), Equals(trees[4])); + AssertThat(pop2.head_index, Equals(1)); + AssertThat(pop2.tree_count, Equals(3)); + AssertThat(pop2.trees[0], Equals(trees[5])); + AssertThat(pop2.trees[1], Equals(trees[8])); + AssertThat(pop2.trees[2], Equals(trees[9])); + + StackPopResult pop3 = *(StackPopResult *)vector_get(&pop, 2); + AssertThat(ts_stack_top_tree(stack, 2), Equals(trees[6])); + AssertThat(pop3.head_index, Equals(2)); + AssertThat(pop3.tree_count, Equals(3)); + AssertThat(pop3.trees[0], Equals(trees[7])); + AssertThat(pop3.trees[1], Equals(trees[8])); + AssertThat(pop3.trees[2], Equals(trees[9])); }); }); }); diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 8321a2dc..1b9cf0f6 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -181,52 +181,91 @@ static bool ts_parser__shift_extra(TSParser *self, int head, TSStateId state) { static TSTree *ts_parser__reduce(TSParser *self, int head, TSSymbol symbol, size_t child_count, bool extra, bool count_extra) { + vector_clear(&self->reduce_parents); TSNodeType node_type = self->language->node_types[symbol]; - StackPopResultList pop_results = - ts_stack_pop(self->stack, head, child_count, count_extra); + Vector pop_results = ts_stack_pop(self->stack, head, child_count, count_extra); - TSTree *parent = NULL; - TSTree **last_children = NULL; - int last_index = -1; + int last_head_index = -1; + int removed_heads = 0; - for (int i = 0; i < pop_results.size; i++) { - StackPopResult pop_result = pop_results.contents[i]; + for (size_t i = 0; i < pop_results.size; i++) { + StackPopResult *pop_result = vector_get(&pop_results, i); + /* + * If the same set of trees led to a previous stack head, reuse the parent + * tree that was added to that head. + */ + TSTree *parent = NULL; + for (size_t j = 0; j < i; j++) { + StackPopResult *prior_result = vector_get(&pop_results, j); + if (pop_result->trees == prior_result->trees) { + TSTree **existing_parent = vector_get(&self->reduce_parents, j); + parent = *existing_parent; + break; + } + } + + /* + * Otherwise, create a new parent node for this set of trees. + */ + if (!parent) + parent = ts_tree_make_node(symbol, pop_result->tree_count, pop_result->trees, node_type); + vector_push(&self->reduce_parents, &parent); + + /* + * If another path led to the same stack head, add this new parent tree + * as an alternative for that stack head. + */ + int new_head = pop_result->head_index - removed_heads; + if (pop_result->head_index == last_head_index) { + ts_stack_add_alternative(self->stack, new_head, parent); + continue; + } + + /* + * If the stack has split in the process of popping, create a duplicate of + * the lookahead state for this head, for the new head. + */ if (i > 0) { - assert(pop_result.index == self->head_states.size); - DEBUG("split_during_reduce new_head:%d", pop_result.index); + DEBUG("split_during_reduce new_head:%d", new_head); HeadState *head_state = vector_get(&self->head_states, head); vector_push(&self->head_states, head_state); } - if (pop_result.trees != last_children) { - parent = ts_tree_make_node(symbol, pop_result.tree_count, - pop_result.trees, node_type); - } - - if (pop_result.index == last_index) { - ts_stack_add_alternative(self->stack, pop_result.index, parent); + /* + * If the parent node is extra, then do not change the state when pushing + * it. Otherwise, proceed to the state given in the parse table for the + * new parent symbol. + */ + TSStateId state; + TSStateId top_state = ts_stack_top_state(self->stack, new_head); + if (extra) { + ts_tree_set_extra(parent); + state = top_state; } else { - TSStateId top_state = ts_stack_top_state(self->stack, pop_result.index); - TSStateId state; - - if (extra) { - ts_tree_set_extra(parent); - state = top_state; + TSParseAction action = ts_language__last_action(self->language, top_state, symbol); + if (child_count == -1) { + state = 0; } else { - state = ts_language__last_action(self->language, top_state, symbol) - .data.to_state; + assert(action.type == TSParseActionTypeShift); + state = action.data.to_state; } - - if (ts_stack_push(self->stack, pop_result.index, state, parent)) - vector_erase(&self->head_states, pop_result.index); } - last_index = pop_result.index; - last_children = pop_result.trees; + /* + * If the given state already existed at a different head of the stack, + * then remove the lookahead state for the head. + */ + if (ts_stack_push(self->stack, new_head, state, parent)) { + vector_erase(&self->head_states, new_head); + removed_heads++; + } + + last_head_index = pop_result->head_index; } - return parent; + TSTree **last_parent = vector_back(&self->reduce_parents); + return *last_parent; } static void ts_parser__reduce_fragile(TSParser *self, int head, TSSymbol symbol, @@ -298,7 +337,7 @@ static bool ts_parser__handle_error(TSParser *self, int head) { */ if (self->lookahead->symbol == ts_builtin_sym_end) { DEBUG("fail_to_recover"); - ts_parser__reduce_error(self, head, error_token_count - 1); + ts_parser__reduce_error(self, head, -1); return false; } } @@ -328,10 +367,11 @@ static void ts_parser__start(TSParser *self, TSInput input, } static TSTree *ts_parser__finish(TSParser *self) { - StackPopResult pop_result = ts_stack_pop(self->stack, 0, -1, true).contents[0]; + Vector pop_results = ts_stack_pop(self->stack, 0, -1, true); + StackPopResult *pop_result = vector_get(&pop_results, 0); - TSTree **trees = pop_result.trees; - size_t extra_count = pop_result.tree_count - 1; + TSTree **trees = pop_result->trees; + size_t extra_count = pop_result->tree_count - 1; TSTree *root = trees[extra_count]; ts_tree_prepend_children(root, extra_count, trees); @@ -463,7 +503,8 @@ TSParser ts_parser_make() { .stack = ts_stack_new((TreeSelectionCallback){ NULL, ts_parser__select_tree, }), - .head_states = vector_new(sizeof(HeadState), 3), + .head_states = vector_new(sizeof(HeadState), 4), + .reduce_parents = vector_new(sizeof(TSTree *), 4), .lookahead = NULL, }; } diff --git a/src/runtime/parser.h b/src/runtime/parser.h index 53cb6a0f..b15c06f2 100644 --- a/src/runtime/parser.h +++ b/src/runtime/parser.h @@ -14,6 +14,7 @@ typedef struct { TSTree *lookahead; const TSLanguage *language; Vector head_states; + Vector reduce_parents; } TSParser; TSParser ts_parser_make(); diff --git a/src/runtime/stack.c b/src/runtime/stack.c index 89efb4b8..3236b763 100644 --- a/src/runtime/stack.c +++ b/src/runtime/stack.c @@ -5,13 +5,13 @@ #include "runtime/length.h" #include -#define MAX_POP_PATH_COUNT 8 +#define MAX_SUCCESSOR_COUNT 8 #define INITIAL_HEAD_CAPACITY 3 #define STARTING_TREE_CAPACITY 10 typedef struct StackNode { StackEntry entry; - struct StackNode *successors[MAX_POP_PATH_COUNT]; + struct StackNode *successors[MAX_SUCCESSOR_COUNT]; short unsigned int successor_count; short unsigned int ref_count; } StackNode; @@ -20,10 +20,18 @@ struct Stack { StackNode **heads; int head_count; int head_capacity; - StackPopResult last_pop_results[MAX_POP_PATH_COUNT]; + Vector pop_results; + Vector pop_paths; TreeSelectionCallback tree_selection_callback; }; +typedef struct { + size_t goal_tree_count; + StackNode *node; + Vector trees; + bool is_shared; +} PopPath; + /* * Section: Stack lifecycle */ @@ -35,11 +43,15 @@ Stack *ts_stack_new(TreeSelectionCallback tree_selection_callback) { .head_count = 1, .head_capacity = INITIAL_HEAD_CAPACITY, .tree_selection_callback = tree_selection_callback, + .pop_results = vector_new(sizeof(StackPopResult), 4), + .pop_paths = vector_new(sizeof(PopPath), 4), }; return self; } void ts_stack_delete(Stack *self) { + vector_delete(&self->pop_results); + vector_delete(&self->pop_paths); free(self->heads); free(self); } @@ -164,12 +176,12 @@ static int ts_stack__add_head(Stack *self, StackNode *node) { return new_index; } -static int ts_stack__find_or_add_head(Stack *self, StackNode *node) { +static int ts_stack__find_head(Stack *self, StackNode *node) { for (int i = 0; i < self->head_count; i++) if (self->heads[i] == node) { return i; } - return ts_stack__add_head(self, node); + return -1; } void ts_stack_remove_head(Stack *self, int head_index) { @@ -221,17 +233,24 @@ int ts_stack_split(Stack *self, int head_index) { return ts_stack__add_head(self, self->heads[head_index]); } -StackPopResultList ts_stack_pop(Stack *self, int head_index, int child_count, - bool count_extra) { - StackNode *previous_head = self->heads[head_index]; +const char *symbol_names[] = { + "zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", + "ten", "eleven", "twelve" +}; - int path_count = 1; +Vector ts_stack_pop(Stack *self, int head_index, int child_count, bool count_extra) { + StackNode *previous_head = self->heads[head_index]; int capacity = (child_count == -1) ? STARTING_TREE_CAPACITY : child_count; - size_t tree_counts_by_path[MAX_POP_PATH_COUNT] = { child_count }; - StackNode *nodes_by_path[MAX_POP_PATH_COUNT] = { previous_head }; - Vector trees_by_path[MAX_POP_PATH_COUNT] = { vector_new(sizeof(TSTree *), - capacity) }; - bool is_shared_by_path[MAX_POP_PATH_COUNT] = { false }; + PopPath initial_path = { + .goal_tree_count = child_count, + .node = previous_head, + .trees = vector_new(sizeof(TSTree *), capacity), + .is_shared = false, + }; + + vector_clear(&self->pop_results); + vector_clear(&self->pop_paths); + vector_push(&self->pop_paths, &initial_path); /* * Reduce along every possible path in parallel. Stop when the given number @@ -240,72 +259,72 @@ StackPopResultList ts_stack_pop(Stack *self, int head_index, int child_count, bool all_paths_done = false; while (!all_paths_done) { all_paths_done = true; - int current_path_count = path_count; - for (int path = 0; path < current_path_count; path++) { - StackNode *node = nodes_by_path[path]; - if (!node || (trees_by_path[path].size == tree_counts_by_path[path])) + + for (size_t i = 0; i < self->pop_paths.size; i++) { + PopPath *path = vector_get(&self->pop_paths, i); + StackNode *node = path->node; + + if (!node || path->trees.size == path->goal_tree_count) continue; + all_paths_done = false; /* * Children that are 'extra' do not count towards the total child count. */ if (ts_tree_is_extra(node->entry.tree) && !count_extra) - tree_counts_by_path[path]++; + path->goal_tree_count++; /* * If a node has more than one successor, create new paths for each of * the additional successors. */ - if (is_shared_by_path[path]) { - trees_by_path[path] = vector_copy(&trees_by_path[path]); - is_shared_by_path[path] = false; + if (path->is_shared) { + path->trees = vector_copy(&path->trees); + path->is_shared = false; } + ts_tree_retain(node->entry.tree); - vector_push(&trees_by_path[path], &node->entry.tree); + vector_push(&path->trees, &node->entry.tree); - for (int i = 0; i < node->successor_count; i++) { - int next_path; - if (i > 0) { - if (path_count == MAX_POP_PATH_COUNT) - break; - next_path = path_count; - tree_counts_by_path[next_path] = tree_counts_by_path[path]; - trees_by_path[next_path] = trees_by_path[path]; - is_shared_by_path[next_path] = true; - path_count++; - } else { - next_path = path; - } - - nodes_by_path[next_path] = node->successors[i]; + path->node = path->node->successors[0]; + for (int j = 1; j < node->successor_count; j++) { + PopPath path_copy = *path; + vector_push(&self->pop_paths, &path_copy); + PopPath *next_path = vector_back(&self->pop_paths); + next_path->node = node->successors[j]; + next_path->is_shared = true; } } } - for (int path = 0; path < path_count; path++) { - if (!is_shared_by_path[path]) - vector_reverse(&trees_by_path[path]); - int index = -1; - if (path == 0) { - stack_node_retain(nodes_by_path[path]); - self->heads[head_index] = nodes_by_path[path]; - index = head_index; + for (size_t i = 0; i < self->pop_paths.size; i++) { + PopPath *path = vector_get(&self->pop_paths, i); + + if (!path->is_shared) + vector_reverse(&path->trees); + + StackPopResult result = { + .trees = path->trees.contents, + .tree_count = path->trees.size, + .head_index = -1, + }; + + if (i == 0) { + stack_node_retain(path->node); + self->heads[head_index] = path->node; + result.head_index = head_index; } else { - index = ts_stack__find_or_add_head(self, nodes_by_path[path]); + result.head_index = ts_stack__find_head(self, path->node); + if (result.head_index == -1) + result.head_index = ts_stack__add_head(self, path->node); } - self->last_pop_results[path] = (StackPopResult){ - .index = index, - .tree_count = trees_by_path[path].size, - .trees = trees_by_path[path].contents, - }; + vector_push(&self->pop_results, &result); } stack_node_release(previous_head); - return (StackPopResultList){ - .size = path_count, .contents = self->last_pop_results, - }; + return self->pop_results; } void ts_stack_shrink(Stack *self, int head_index, int count) { diff --git a/src/runtime/stack.h b/src/runtime/stack.h index 2aab77e9..3b44f351 100644 --- a/src/runtime/stack.h +++ b/src/runtime/stack.h @@ -6,6 +6,7 @@ extern "C" { #endif #include "tree_sitter/parser.h" +#include "runtime/vector.h" typedef struct Stack Stack; @@ -15,16 +16,11 @@ typedef struct { } StackEntry; typedef struct { - int index; - int tree_count; TSTree **trees; + size_t tree_count; + int head_index; } StackPopResult; -typedef struct { - int size; - StackPopResult *contents; -} StackPopResultList; - typedef struct { void *data; TSTree *(*callback)(void *data, TSTree *, TSTree *); @@ -90,7 +86,7 @@ void ts_stack_add_alternative(Stack *, int head, TSTree *); * which had previously been merged. It returns a struct that indicates the * index of each revealed head and the trees removed from that head. */ -StackPopResultList ts_stack_pop(Stack *, int head, int count, bool count_extra); +Vector ts_stack_pop(Stack *, int head, int count, bool count_extra); /* * Remove the given number of entries from the given head of the stack. diff --git a/src/runtime/vector.h b/src/runtime/vector.h index 28437764..434ddefe 100644 --- a/src/runtime/vector.h +++ b/src/runtime/vector.h @@ -25,11 +25,20 @@ static inline Vector vector_new(size_t element_size, size_t capacity) { }; } +static inline void vector_delete(Vector *self) { + free(self->contents); +} + static inline void *vector_get(Vector *self, size_t index) { assert(index < self->size); return (void *)((char *)self->contents + index * self->element_size); } +static inline void *vector_back(Vector *self) { + assert(self->size > 0); + return vector_get(self, self->size - 1); +} + static inline void vector_clear(Vector *self) { self->size = 0; } From ee66f1e774efc94f212211fba60fab54ddbc026c Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 20 Nov 2015 00:07:05 -0800 Subject: [PATCH 6/6] Make vector.h compile when included by C++, w/o clang extensions --- src/runtime/vector.h | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/src/runtime/vector.h b/src/runtime/vector.h index 434ddefe..ed09af33 100644 --- a/src/runtime/vector.h +++ b/src/runtime/vector.h @@ -17,12 +17,12 @@ typedef struct { } Vector; static inline Vector vector_new(size_t element_size, size_t capacity) { - return (Vector){ - .contents = malloc(capacity * element_size), - .size = 0, - .capacity = capacity, - .element_size = element_size, - }; + Vector result; + result.contents = malloc(capacity * element_size); + result.size = 0; + result.capacity = capacity; + result.element_size = element_size; + return result; } static inline void vector_delete(Vector *self) { @@ -79,13 +79,10 @@ static inline void vector_reverse(Vector *self) { } static inline Vector vector_copy(Vector *self) { - return (Vector){ - .contents = memcpy(malloc(self->capacity * self->element_size), - self->contents, self->size * self->element_size), - .size = self->size, - .capacity = self->capacity, - .element_size = self->element_size, - }; + Vector copy = *self; + copy.contents = memcpy(malloc(self->capacity * self->element_size), + self->contents, self->size * self->element_size); + return copy; } #ifdef __cplusplus