From 0824d3e1f3cb3a0bcb6967c682aa5c2a77552293 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Sat, 14 Nov 2015 12:37:21 -0800 Subject: [PATCH 01/13] Only use first parse stack path during error recovery --- src/runtime/parser.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/runtime/parser.c b/src/runtime/parser.c index b338adaf..36d35e94 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -234,7 +234,7 @@ static bool ts_parser__handle_error(TSParser *self, int head) { */ int i = -1; for (StackEntry *entry = entry_before_error; true; - entry = ts_stack_entry_next(entry, head), i++) { + entry = ts_stack_entry_next(entry, 0), i++) { TSStateId stack_state = entry ? entry->state : 0; TSParseAction action_on_error = ts_language__last_action( self->language, stack_state, ts_builtin_sym_error); From ab34cfecd9c7ce98b15c20edfa2d221f030f86b5 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Sun, 15 Nov 2015 09:55:36 -0800 Subject: [PATCH 02/13] Replace TreeVector with a more generic Vector struct --- spec/runtime/stack_spec.cc | 3 +- src/runtime/stack.c | 12 ++++--- src/runtime/tree_vector.h | 55 ------------------------------- src/runtime/vector.h | 67 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 76 insertions(+), 61 deletions(-) delete mode 100644 src/runtime/tree_vector.h create mode 100644 src/runtime/vector.h diff --git a/spec/runtime/stack_spec.cc b/spec/runtime/stack_spec.cc index 05be07e8..b2d12208 100644 --- a/spec/runtime/stack_spec.cc +++ b/spec/runtime/stack_spec.cc @@ -1,4 +1,5 @@ #include "runtime/runtime_spec_helper.h" +#include "runtime/helpers/tree_helpers.h" #include "runtime/stack.h" #include "runtime/tree.h" #include "runtime/length.h" @@ -43,7 +44,7 @@ describe("Stack", [&]() { TSLength len = ts_length_make(2, 2); for (size_t i = 0; i < tree_count; i++) - trees[i] = ts_tree_make_leaf(ts_builtin_sym_start + i, len, len, TSNodeTypeNamed); + trees[i] = ts_tree_make_leaf(i, len, len, TSNodeTypeNamed); }); after_each([&]() { diff --git a/src/runtime/stack.c b/src/runtime/stack.c index 3a4dd308..89efb4b8 100644 --- a/src/runtime/stack.c +++ b/src/runtime/stack.c @@ -1,6 +1,6 @@ #include "tree_sitter/parser.h" #include "runtime/tree.h" -#include "runtime/tree_vector.h" +#include "runtime/vector.h" #include "runtime/stack.h" #include "runtime/length.h" #include @@ -229,7 +229,8 @@ StackPopResultList ts_stack_pop(Stack *self, int head_index, int child_count, int capacity = (child_count == -1) ? STARTING_TREE_CAPACITY : child_count; size_t tree_counts_by_path[MAX_POP_PATH_COUNT] = { child_count }; StackNode *nodes_by_path[MAX_POP_PATH_COUNT] = { previous_head }; - TreeVector trees_by_path[MAX_POP_PATH_COUNT] = { tree_vector_new(capacity) }; + Vector trees_by_path[MAX_POP_PATH_COUNT] = { vector_new(sizeof(TSTree *), + capacity) }; bool is_shared_by_path[MAX_POP_PATH_COUNT] = { false }; /* @@ -257,10 +258,11 @@ StackPopResultList ts_stack_pop(Stack *self, int head_index, int child_count, * the additional successors. */ if (is_shared_by_path[path]) { - trees_by_path[path] = tree_vector_copy(&trees_by_path[path]); + trees_by_path[path] = vector_copy(&trees_by_path[path]); is_shared_by_path[path] = false; } - tree_vector_push(&trees_by_path[path], node->entry.tree); + ts_tree_retain(node->entry.tree); + vector_push(&trees_by_path[path], &node->entry.tree); for (int i = 0; i < node->successor_count; i++) { int next_path; @@ -283,7 +285,7 @@ StackPopResultList ts_stack_pop(Stack *self, int head_index, int child_count, for (int path = 0; path < path_count; path++) { if (!is_shared_by_path[path]) - tree_vector_reverse(&trees_by_path[path]); + vector_reverse(&trees_by_path[path]); int index = -1; if (path == 0) { stack_node_retain(nodes_by_path[path]); diff --git a/src/runtime/tree_vector.h b/src/runtime/tree_vector.h deleted file mode 100644 index 4464e52c..00000000 --- a/src/runtime/tree_vector.h +++ /dev/null @@ -1,55 +0,0 @@ -#ifndef RUNTIME_TREE_VECTOR_H_ -#define RUNTIME_TREE_VECTOR_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -#include -#include "./tree.h" - -typedef struct { - TSTree **contents; - size_t capacity; - size_t size; -} TreeVector; - -static inline TreeVector tree_vector_new(size_t size) { - return (TreeVector){ - .contents = malloc(size * sizeof(TSTree *)), .capacity = size, .size = 0, - }; -} - -static inline void tree_vector_push(TreeVector *self, TSTree *tree) { - if (self->size == self->capacity) { - self->capacity += 4; - self->contents = realloc(self->contents, self->capacity * sizeof(TSTree *)); - } - ts_tree_retain(tree); - self->contents[self->size++] = tree; -} - -static inline void tree_vector_reverse(TreeVector *self) { - TSTree *swap; - size_t limit = self->size / 2; - for (size_t i = 0; i < limit; i++) { - swap = self->contents[i]; - self->contents[i] = self->contents[self->size - 1 - i]; - self->contents[self->size - 1 - i] = swap; - } -} - -static inline TreeVector tree_vector_copy(TreeVector *self) { - return (TreeVector){ - .contents = memcpy(malloc(self->capacity * sizeof(TSTree *)), - self->contents, self->size * sizeof(TSTree *)), - .capacity = self->capacity, - .size = self->size, - }; -} - -#ifdef __cplusplus -} -#endif - -#endif // RUNTIME_TREE_VECTOR_H_ diff --git a/src/runtime/vector.h b/src/runtime/vector.h new file mode 100644 index 00000000..c62a43c9 --- /dev/null +++ b/src/runtime/vector.h @@ -0,0 +1,67 @@ +#ifndef RUNTIME_VECTOR_H_ +#define RUNTIME_VECTOR_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +typedef struct { + void *contents; + size_t size; + size_t capacity; + size_t element_size; +} Vector; + +static inline Vector vector_new(size_t element_size, size_t capacity) { + return (Vector){ + .contents = malloc(capacity * element_size), + .size = 0, + .capacity = capacity, + .element_size = element_size, + }; +} + +static inline void vector_push(Vector *self, void *entry) { + if (self->size == self->capacity) { + self->capacity += 4; + self->contents = + realloc(self->contents, self->capacity * self->element_size); + } + + char *contents = (char *)self->contents; + memcpy(contents + (self->size * self->element_size), (char *)entry, + self->element_size); + self->size++; +} + +static inline void vector_reverse(Vector *self) { + char swap[self->element_size]; + char *contents = (char *)self->contents; + size_t limit = self->size / 2; + for (size_t i = 0; i < limit; i++) { + size_t offset = i * self->element_size; + size_t reverse_offset = (self->size - 1 - i) * self->element_size; + memcpy(&swap, contents + offset, self->element_size); + memcpy(contents + offset, contents + reverse_offset, self->element_size); + memcpy(contents + reverse_offset, &swap, self->element_size); + } +} + +static inline Vector vector_copy(Vector *self) { + return (Vector){ + .contents = memcpy(malloc(self->capacity * self->element_size), + self->contents, self->size * self->element_size), + .size = self->size, + .capacity = self->capacity, + .element_size = self->element_size, + }; +} + +#ifdef __cplusplus +} +#endif + +#endif // RUNTIME_VECTOR_H_ From 484721b0c20f5252065f56c980a444e7ce3f5b57 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Sun, 15 Nov 2015 12:21:16 -0800 Subject: [PATCH 03/13] Assign tree parent pointers after parse is complete --- src/runtime/parser.c | 1 + src/runtime/tree.c | 17 ++++++++++++++--- src/runtime/tree.h | 1 + 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 36d35e94..8668fe04 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -301,6 +301,7 @@ static TSTree *ts_parser__finish(TSParser *self) { TSTree *root = trees[extra_count]; ts_tree_prepend_children(root, extra_count, trees); + ts_tree_assign_parents(root); return root; } diff --git a/src/runtime/tree.c b/src/runtime/tree.c index 822e51f8..4c04e9b8 100644 --- a/src/runtime/tree.c +++ b/src/runtime/tree.c @@ -36,6 +36,20 @@ TSTree *ts_tree_make_error(TSLength size, TSLength padding, char lookahead_char) return result; } +void ts_tree_assign_parents(TSTree *self) { + TSLength offset = ts_length_zero(); + for (size_t i = 0; i < self->child_count; i++) { + TSTree *child = self->children[i]; + if (child->context.parent != self) { + child->context.parent = self; + child->context.index = i; + child->context.offset = offset; + ts_tree_assign_parents(child); + } + offset = ts_length_add(offset, ts_tree_total_size(child)); + } +} + static void ts_tree__set_children(TSTree *self, TSTree **children, size_t child_count) { self->children = children; @@ -44,9 +58,6 @@ static void ts_tree__set_children(TSTree *self, TSTree **children, for (size_t i = 0; i < child_count; i++) { TSTree *child = children[i]; ts_tree_retain(child); - child->context.parent = self; - child->context.index = i; - child->context.offset = ts_tree_total_size(self); if (i == 0) { self->padding = child->padding; diff --git a/src/runtime/tree.h b/src/runtime/tree.h index 88208831..b443868e 100644 --- a/src/runtime/tree.h +++ b/src/runtime/tree.h @@ -44,6 +44,7 @@ char *ts_tree_string(const TSTree *tree, const char **names, bool include_anonymous); TSLength ts_tree_total_size(const TSTree *tree); void ts_tree_prepend_children(TSTree *, size_t, TSTree **); +void ts_tree_assign_parents(TSTree *); void ts_tree_edit(TSTree *, TSInputEdit); static inline bool ts_tree_is_extra(const TSTree *tree) { From 64874449e4d7c9b2bf8dea9d6fa453862415a7dc Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 18 Nov 2015 08:47:15 -0800 Subject: [PATCH 04/13] Allow different parse stack heads to lex differently --- src/runtime/lexer.c | 3 + src/runtime/parser.c | 202 +++++++++++++++++++++++++++---------------- src/runtime/parser.h | 4 +- src/runtime/vector.h | 19 ++++ 4 files changed, 152 insertions(+), 76 deletions(-) diff --git a/src/runtime/lexer.c b/src/runtime/lexer.c index b2b21819..bbcfd84d 100644 --- a/src/runtime/lexer.c +++ b/src/runtime/lexer.c @@ -110,6 +110,9 @@ TSLexer ts_lexer_make() { } void ts_lexer_reset(TSLexer *self, TSLength position) { + if (ts_length_eq(position, self->current_position)) + return; + self->token_start_position = position; self->token_end_position = position; self->current_position = position; diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 8668fe04..8321a2dc 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -6,6 +6,9 @@ #include "runtime/tree.h" #include "runtime/lexer.h" #include "runtime/length.h" +#include "runtime/vector.h" + +#include /* * Debugging @@ -20,6 +23,12 @@ #define SYM_NAME(sym) self->language->symbol_names[sym] +typedef struct { + TSTree *reusable_subtree; + size_t reusable_subtree_pos; + TSLength position; +} HeadState; + typedef enum { ConsumeResultShifted, ConsumeResultRemoved, @@ -53,14 +62,14 @@ static TSParseAction ts_language__last_action(const TSLanguage *language, * Replace the parser's reusable_subtree with its first non-fragile descendant. * Return true if a suitable descendant is found, false otherwise. */ -static bool ts_parser__breakdown_reusable_subtree(TSParser *self) { +static bool ts_parser__breakdown_reusable_subtree(HeadState *state) { do { - if (self->reusable_subtree->symbol == ts_builtin_sym_error) + if (state->reusable_subtree->symbol == ts_builtin_sym_error) return false; - if (self->reusable_subtree->child_count == 0) + if (state->reusable_subtree->child_count == 0) return false; - self->reusable_subtree = self->reusable_subtree->children[0]; - } while (ts_tree_is_fragile(self->reusable_subtree)); + state->reusable_subtree = state->reusable_subtree->children[0]; + } while (ts_tree_is_fragile(state->reusable_subtree)); return true; } @@ -68,80 +77,82 @@ static bool ts_parser__breakdown_reusable_subtree(TSParser *self) { * Replace the parser's reusable_subtree with its largest right neighbor, or * NULL if no right neighbor exists. */ -static void ts_parser__pop_reusable_subtree(TSParser *self) { - self->reusable_subtree_pos += ts_tree_total_size(self->reusable_subtree).chars; +static void ts_parser__pop_reusable_subtree(HeadState *state) { + state->reusable_subtree_pos += + ts_tree_total_size(state->reusable_subtree).chars; - while (self->reusable_subtree) { - TSTree *parent = self->reusable_subtree->context.parent; - size_t next_index = self->reusable_subtree->context.index + 1; + while (state->reusable_subtree) { + TSTree *parent = state->reusable_subtree->context.parent; + size_t next_index = state->reusable_subtree->context.index + 1; if (parent && parent->child_count > next_index) { - self->reusable_subtree = parent->children[next_index]; + state->reusable_subtree = parent->children[next_index]; return; } - self->reusable_subtree = parent; + state->reusable_subtree = parent; } } +static bool ts_parser__can_reuse(TSParser *self, int head, TSTree *subtree) { + if (subtree->symbol == ts_builtin_sym_error) + return false; + TSStateId state = ts_stack_top_state(self->stack, head); + const TSParseAction *action = + ts_language__actions(self->language, state, subtree->symbol); + return action->type != TSParseActionTypeError; +} + /* * Advance the parser's lookahead subtree. If there is a reusable subtree * at the correct position in the parser's previous tree, use that. Otherwise, * run the lexer. */ -static void ts_parser__get_next_lookahead(TSParser *self) { - while (self->reusable_subtree) { - if (self->reusable_subtree_pos > self->lexer.current_position.chars) { +static TSTree *ts_parser__get_next_lookahead(TSParser *self, int head) { + HeadState *state = vector_get(&self->head_states, head); + + while (state->reusable_subtree) { + if (state->reusable_subtree_pos > state->position.chars) { break; } - if (self->reusable_subtree_pos < self->lexer.current_position.chars) { - DEBUG("past_reuse sym:%s", SYM_NAME(self->reusable_subtree->symbol)); - ts_parser__pop_reusable_subtree(self); + if (state->reusable_subtree_pos < state->position.chars) { + DEBUG("past_reuse sym:%s", SYM_NAME(state->reusable_subtree->symbol)); + ts_parser__pop_reusable_subtree(state); continue; } - if (ts_tree_has_changes(self->reusable_subtree) || - ts_tree_is_fragile(self->reusable_subtree) || - ts_tree_is_extra(self->reusable_subtree)) { - DEBUG("breakdown sym:%s", SYM_NAME(self->reusable_subtree->symbol)); - if (!ts_parser__breakdown_reusable_subtree(self)) - ts_parser__pop_reusable_subtree(self); + if (ts_tree_has_changes(state->reusable_subtree) || + ts_tree_is_fragile(state->reusable_subtree) || + ts_tree_is_extra(state->reusable_subtree) || + (state->reusable_subtree->child_count > 0 && + !ts_parser__can_reuse(self, head, state->reusable_subtree))) { + DEBUG("breakdown sym:%s", SYM_NAME(state->reusable_subtree->symbol)); + if (!ts_parser__breakdown_reusable_subtree(state)) + ts_parser__pop_reusable_subtree(state); continue; } - TSStateId top_state = ts_stack_top_state(self->stack, 0); - TSSymbol symbol = self->reusable_subtree->symbol; - if (ts_language__last_action(self->language, top_state, symbol).type == - TSParseActionTypeError) { - DEBUG("cant_reuse sym:%s", SYM_NAME(self->reusable_subtree->symbol)); - ts_parser__pop_reusable_subtree(self); - continue; - } - - self->lookahead = self->reusable_subtree; - TSLength size = ts_tree_total_size(self->lookahead); - DEBUG("reuse sym:%s size:%lu extra:%d", SYM_NAME(self->lookahead->symbol), - size.chars, self->lookahead->options.extra); - ts_lexer_reset(&self->lexer, - ts_length_add(self->lexer.current_position, size)); - ts_parser__pop_reusable_subtree(self); - return; + TSTree *result = state->reusable_subtree; + TSLength size = ts_tree_total_size(result); + DEBUG("reuse sym:%s size:%lu extra:%d", SYM_NAME(result->symbol), + size.chars, result->options.extra); + ts_parser__pop_reusable_subtree(state); + return result; } - TSLength position = self->lexer.current_position; - for (size_t i = 0, count = ts_stack_head_count(self->stack); i < count; i++) { - if (i > 0) { - ts_lexer_reset(&self->lexer, position); - ts_tree_release(self->lookahead); - } + return NULL; +} - TSStateId parse_state = ts_stack_top_state(self->stack, i); - TSStateId lex_state = self->language->lex_states[parse_state]; - DEBUG("lex state:%d", lex_state); - self->lookahead = self->language->lex_fn(&self->lexer, lex_state); +static int ts_parser__split(TSParser *self, int head) { + int result = ts_stack_split(self->stack, head); + assert(result == self->head_states.size); + HeadState head_state = *(HeadState *)vector_get(&self->head_states, head); + vector_push(&self->head_states, &head_state); + return result; +} - if (self->lookahead->symbol != ts_builtin_sym_error) - break; - } +static void ts_parser__remove_head(TSParser *self, int head) { + vector_erase(&self->head_states, head); + ts_stack_remove_head(self->stack, head); } /* @@ -150,10 +161,16 @@ static void ts_parser__get_next_lookahead(TSParser *self) { static ConsumeResult ts_parser__shift(TSParser *self, int head, TSStateId parse_state) { - if (ts_stack_push(self->stack, head, parse_state, self->lookahead)) + HeadState *head_state = vector_get(&self->head_states, head); + head_state->position = + ts_length_add(head_state->position, ts_tree_total_size(self->lookahead)); + if (ts_stack_push(self->stack, head, parse_state, self->lookahead)) { + DEBUG("merge head:%d", head); + vector_erase(&self->head_states, head); return ConsumeResultRemoved; - else + } else { return ConsumeResultShifted; + } } static bool ts_parser__shift_extra(TSParser *self, int head, TSStateId state) { @@ -175,6 +192,13 @@ static TSTree *ts_parser__reduce(TSParser *self, int head, TSSymbol symbol, for (int i = 0; i < pop_results.size; i++) { StackPopResult pop_result = pop_results.contents[i]; + if (i > 0) { + assert(pop_result.index == self->head_states.size); + DEBUG("split_during_reduce new_head:%d", pop_result.index); + HeadState *head_state = vector_get(&self->head_states, head); + vector_push(&self->head_states, head_state); + } + if (pop_result.trees != last_children) { parent = ts_tree_make_node(symbol, pop_result.tree_count, pop_result.trees, node_type); @@ -194,7 +218,8 @@ static TSTree *ts_parser__reduce(TSParser *self, int head, TSSymbol symbol, .data.to_state; } - ts_stack_push(self->stack, pop_result.index, state, parent); + if (ts_stack_push(self->stack, pop_result.index, state, parent)) + vector_erase(&self->head_states, pop_result.index); } last_index = pop_result.index; @@ -214,9 +239,12 @@ static void ts_parser__reduce_fragile(TSParser *self, int head, TSSymbol symbol, static void ts_parser__reduce_error(TSParser *self, int head, size_t child_count) { + HeadState *head_state = vector_get(&self->head_states, head); TSTree *reduced = ts_parser__reduce(self, head, ts_builtin_sym_error, child_count, false, true); reduced->size = ts_length_add(reduced->size, self->lookahead->padding); + head_state->position = + ts_length_add(head_state->position, self->lookahead->padding); self->lookahead->padding = ts_length_zero(); ts_tree_set_fragile_left(reduced); ts_tree_set_fragile_right(reduced); @@ -288,8 +316,14 @@ static void ts_parser__start(TSParser *self, TSInput input, ts_lexer_reset(&self->lexer, ts_length_zero()); ts_stack_clear(self->stack); - self->reusable_subtree = previous_tree; - self->reusable_subtree_pos = 0; + HeadState head_state = { + .position = ts_length_zero(), + .reusable_subtree = previous_tree, + .reusable_subtree_pos = 0, + }; + vector_clear(&self->head_states); + vector_push(&self->head_states, &head_state); + self->lookahead = NULL; } @@ -328,11 +362,9 @@ static ConsumeResult ts_parser__consume_lookahead(TSParser *self, int head) { int current_head; if (next_action->type == 0) { current_head = head; - DEBUG("action current_head:%d, state:%d", current_head, state); } else { - current_head = ts_stack_split(self->stack, head); - DEBUG("split_action from_head:%d, current_head:%d, state:%d", head, - current_head, state); + current_head = ts_parser__split(self, head); + DEBUG("split_action from_head:%d, new_head:%d", head, current_head); } // TODO: Remove this by making a separate symbol for errors returned from @@ -350,7 +382,7 @@ static ConsumeResult ts_parser__consume_lookahead(TSParser *self, int head) { return ConsumeResultFinished; } else { DEBUG("bail current_head:%d", current_head); - ts_stack_remove_head(self->stack, current_head); + ts_parser__remove_head(self, current_head); return ConsumeResultRemoved; } @@ -391,10 +423,14 @@ static ConsumeResult ts_parser__consume_lookahead(TSParser *self, int head) { } static int ts_tree__compare(TSTree *left, TSTree *right) { - if (left->symbol < right->symbol) return -1; - if (right->symbol < left->symbol) return 1; - if (left->child_count < right->child_count) return -1; - if (right->child_count < left->child_count) return 1; + if (left->symbol < right->symbol) + return -1; + if (right->symbol < left->symbol) + return 1; + if (left->child_count < right->child_count) + return -1; + if (right->child_count < left->child_count) + return 1; for (size_t i = 0; i < left->child_count; i++) { TSTree *left_child = left->children[i]; TSTree *right_child = right->children[i]; @@ -427,6 +463,7 @@ TSParser ts_parser_make() { .stack = ts_stack_new((TreeSelectionCallback){ NULL, ts_parser__select_tree, }), + .head_states = vector_new(sizeof(HeadState), 3), .lookahead = NULL, }; } @@ -449,13 +486,28 @@ TSTree *ts_parser_parse(TSParser *self, TSInput input, TSTree *previous_tree) { ts_parser__start(self, input, previous_tree); for (;;) { - ts_parser__get_next_lookahead(self); - - DEBUG("lookahead sym:%s, pos:%lu, head_count:%d", - SYM_NAME(self->lookahead->symbol), self->lexer.current_position.chars, - ts_stack_head_count(self->stack)); - for (int head = 0; head < ts_stack_head_count(self->stack);) { + HeadState *state = vector_get(&self->head_states, head); + + DEBUG("process head:%d, head_count:%d, state:%d, pos:%lu", head, + ts_stack_head_count(self->stack), + ts_stack_top_state(self->stack, head), state->position.chars); + + TSTree *reused_lookahead = ts_parser__get_next_lookahead(self, head); + if (reused_lookahead && + ts_parser__can_reuse(self, head, reused_lookahead)) { + self->lookahead = reused_lookahead; + } else if (!(self->lookahead && + ts_parser__can_reuse(self, head, self->lookahead))) { + ts_lexer_reset(&self->lexer, state->position); + TSStateId parse_state = ts_stack_top_state(self->stack, head); + TSStateId lex_state = self->language->lex_states[parse_state]; + self->lookahead = self->language->lex_fn(&self->lexer, lex_state); + } + + DEBUG("lookahead sym:%s, size:%lu", SYM_NAME(self->lookahead->symbol), + ts_tree_total_size(self->lookahead).chars); + switch (ts_parser__consume_lookahead(self, head)) { case ConsumeResultRemoved: break; @@ -466,5 +518,7 @@ TSTree *ts_parser_parse(TSParser *self, TSInput input, TSTree *previous_tree) { return ts_parser__finish(self); } } + + self->lookahead = NULL; } } diff --git a/src/runtime/parser.h b/src/runtime/parser.h index 50370ba0..53cb6a0f 100644 --- a/src/runtime/parser.h +++ b/src/runtime/parser.h @@ -6,14 +6,14 @@ extern "C" { #endif #include "runtime/stack.h" +#include "runtime/vector.h" typedef struct { TSLexer lexer; Stack *stack; TSTree *lookahead; - TSTree *reusable_subtree; - size_t reusable_subtree_pos; const TSLanguage *language; + Vector head_states; } TSParser; TSParser ts_parser_make(); diff --git a/src/runtime/vector.h b/src/runtime/vector.h index c62a43c9..28437764 100644 --- a/src/runtime/vector.h +++ b/src/runtime/vector.h @@ -7,6 +7,7 @@ extern "C" { #include #include +#include typedef struct { void *contents; @@ -24,6 +25,24 @@ static inline Vector vector_new(size_t element_size, size_t capacity) { }; } +static inline void *vector_get(Vector *self, size_t index) { + assert(index < self->size); + return (void *)((char *)self->contents + index * self->element_size); +} + +static inline void vector_clear(Vector *self) { + self->size = 0; +} + +static inline void vector_erase(Vector *self, size_t index) { + assert(index < self->size); + char *contents = (char *)self->contents; + memmove(contents + index * self->element_size, + contents + (index + 1) * self->element_size, + (self->size - index - 1) * self->element_size); + self->size--; +} + static inline void vector_push(Vector *self, void *entry) { if (self->size == self->capacity) { self->capacity += 4; From c88e9044d5fb1d395aa986e6a109f468cf1355b8 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 20 Nov 2015 00:01:53 -0800 Subject: [PATCH 05/13] Make stack popping more robust --- spec/runtime/stack_spec.cc | 217 ++++++++++++++++++++++++++++--------- src/runtime/parser.c | 111 +++++++++++++------ src/runtime/parser.h | 1 + src/runtime/stack.c | 129 ++++++++++++---------- src/runtime/stack.h | 12 +- src/runtime/vector.h | 9 ++ 6 files changed, 328 insertions(+), 151 deletions(-) diff --git a/spec/runtime/stack_spec.cc b/spec/runtime/stack_spec.cc index b2d12208..7c3cc1c7 100644 --- a/spec/runtime/stack_spec.cc +++ b/spec/runtime/stack_spec.cc @@ -5,12 +5,12 @@ #include "runtime/length.h" enum { - stateA, stateB, stateC, stateD, stateE, stateF, stateG, stateH + stateA, stateB, stateC, stateD, stateE, stateF, stateG, stateH, stateI, stateJ }; enum { symbol0 = ts_builtin_sym_start, - symbol1, symbol2, symbol3, symbol4, symbol5, symbol6, symbol7 + symbol1, symbol2, symbol3, symbol4, symbol5, symbol6, symbol7, symbol8 }; struct TreeSelectionSpy { @@ -32,7 +32,7 @@ START_TEST describe("Stack", [&]() { Stack *stack; - const size_t tree_count = 8; + const size_t tree_count = 10; TSTree *trees[tree_count]; TreeSelectionSpy tree_selection_spy{0, NULL, {NULL, NULL}}; @@ -88,8 +88,6 @@ describe("Stack", [&]() { }); describe("popping nodes from the stack", [&]() { - StackPopResultList pop; - before_each([&]() { /* * A0__B1__C2. @@ -103,43 +101,47 @@ describe("Stack", [&]() { /* * A0. */ - pop = ts_stack_pop(stack, 0, 2, false); - AssertThat(pop.size, Equals(1)); - AssertThat(pop.contents[0].tree_count, Equals(2)); - AssertThat(pop.contents[0].trees[0], Equals(trees[1])); - AssertThat(pop.contents[0].trees[1], Equals(trees[2])); + Vector pop = ts_stack_pop(stack, 0, 2, false); + StackPopResult pop1 = *(StackPopResult *)vector_get(&pop, 0); + AssertThat(pop.size, Equals(1)); + AssertThat(pop1.tree_count, Equals(2)); + AssertThat(pop1.trees[0], Equals(trees[1])); + AssertThat(pop1.trees[1], Equals(trees[2])); AssertThat(*ts_stack_head(stack, 0), Equals({trees[0], stateA})); /* * . */ pop = ts_stack_pop(stack, 0, 1, false); - AssertThat(pop.size, Equals(1)); - AssertThat(pop.contents[0].tree_count, Equals(1)); - AssertThat(pop.contents[0].trees[0], Equals(trees[0])); + pop1 = *(StackPopResult *)vector_get(&pop, 0); + AssertThat(pop.size, Equals(1)); + AssertThat(pop1.tree_count, Equals(1)); + AssertThat(pop1.trees[0], Equals(trees[0])); AssertThat(ts_stack_head(stack, 0), Equals(nullptr)); }); it("does not count 'extra' trees toward the count", [&]() { ts_tree_set_extra(trees[1]); - pop = ts_stack_pop(stack, 0, 2, false); - AssertThat(pop.size, Equals(1)); - AssertThat(pop.contents[0].tree_count, Equals(3)); - AssertThat(pop.contents[0].trees[0], Equals(trees[0])); - AssertThat(pop.contents[0].trees[1], Equals(trees[1])); - AssertThat(pop.contents[0].trees[2], Equals(trees[2])); + Vector pop = ts_stack_pop(stack, 0, 2, false); + StackPopResult pop1 = *(StackPopResult *)vector_get(&pop, 0); + AssertThat(pop.size, Equals(1)); + AssertThat(pop1.tree_count, Equals(3)); + AssertThat(pop1.trees[0], Equals(trees[0])); + AssertThat(pop1.trees[1], Equals(trees[1])); + AssertThat(pop1.trees[2], Equals(trees[2])); AssertThat(ts_stack_head(stack, 0), Equals(nullptr)); }); it("pops the entire stack when given a negative count", [&]() { - pop = ts_stack_pop(stack, 0, -1, false); + Vector pop = ts_stack_pop(stack, 0, -1, false); - AssertThat(pop.size, Equals(1)); - AssertThat(pop.contents[0].tree_count, Equals(3)); - AssertThat(pop.contents[0].trees[0], Equals(trees[0])); - AssertThat(pop.contents[0].trees[1], Equals(trees[1])); - AssertThat(pop.contents[0].trees[2], Equals(trees[2])); + AssertThat(pop.size, Equals(1)); + StackPopResult pop1 = *(StackPopResult *)vector_get(&pop, 0); + AssertThat(pop1.tree_count, Equals(3)); + AssertThat(pop1.trees[0], Equals(trees[0])); + AssertThat(pop1.trees[1], Equals(trees[1])); + AssertThat(pop1.trees[2], Equals(trees[2])); }); }); @@ -317,6 +319,9 @@ describe("Stack", [&]() { ts_stack_push(stack, 1, stateE, trees[4]); ts_stack_push(stack, 1, stateF, trees[5]); ts_stack_push(stack, 1, stateG, trees[6]); + + AssertThat(ts_stack_head_count(stack), Equals(1)); + AssertThat(ts_stack_entry_next_count(ts_stack_head(stack, 0)), Equals(2)); }); describe("when there are two paths that lead to two different heads", [&]() { @@ -325,18 +330,18 @@ describe("Stack", [&]() { * A0__B1__C2. * \__E4. */ - StackPopResultList pop = ts_stack_pop(stack, 0, 2, false); + Vector pop = ts_stack_pop(stack, 0, 2, false); - AssertThat(pop.size, Equals(2)); - StackPopResult pop1 = pop.contents[0]; - AssertThat(pop1.index, Equals(0)); - AssertThat(pop1.tree_count, Equals(2)); + AssertThat(pop.size, Equals(2)); + StackPopResult pop1 = *(StackPopResult *)vector_get(&pop, 0); + AssertThat(pop1.head_index, Equals(0)); + AssertThat(pop1.tree_count, Equals(2)); AssertThat(pop1.trees[0], Equals(trees[3])); AssertThat(pop1.trees[1], Equals(trees[6])); - StackPopResult pop2 = pop.contents[1]; - AssertThat(pop2.index, Equals(1)); - AssertThat(pop2.tree_count, Equals(2)); + StackPopResult pop2 = *(StackPopResult *)vector_get(&pop, 1); + AssertThat(pop2.head_index, Equals(1)); + AssertThat(pop2.tree_count, Equals(2)); AssertThat(pop2.trees[0], Equals(trees[5])); AssertThat(pop2.trees[1], Equals(trees[6])); @@ -360,9 +365,9 @@ describe("Stack", [&]() { * A0__B1__C2__D3__G6. * \__E4__F5__/ */ - StackPopResultList pop = ts_stack_pop(stack, 0, 1, false); + Vector pop = ts_stack_pop(stack, 0, 1, false); - AssertThat(pop.size, Equals(1)); + AssertThat(pop.size, Equals(1)); AssertThat(ts_stack_head_count(stack), Equals(1)); }); }); @@ -380,19 +385,21 @@ describe("Stack", [&]() { * A0__B1__C2__D3. * \__E4__F5. */ - StackPopResultList pop = ts_stack_pop(stack, 0, 2, false); + Vector pop = ts_stack_pop(stack, 0, 2, false); AssertThat(ts_stack_head_count(stack), Equals(2)); - AssertThat(pop.size, Equals(2)); - AssertThat(pop.contents[0].index, Equals(0)); - AssertThat(pop.contents[0].tree_count, Equals(2)); - AssertThat(pop.contents[0].trees[0], Equals(trees[6])); - AssertThat(pop.contents[0].trees[1], Equals(trees[7])); + AssertThat(pop.size, Equals(2)); + StackPopResult pop1 = *(StackPopResult *)vector_get(&pop, 0); + AssertThat(pop1.head_index, Equals(0)); + AssertThat(pop1.tree_count, Equals(2)); + AssertThat(pop1.trees[0], Equals(trees[6])); + AssertThat(pop1.trees[1], Equals(trees[7])); - AssertThat(pop.contents[1].index, Equals(1)); - AssertThat(pop.contents[1].tree_count, Equals(2)); - AssertThat(pop.contents[1].trees[0], Equals(trees[6])); - AssertThat(pop.contents[1].trees[1], Equals(trees[7])); + StackPopResult pop2 = *(StackPopResult *)vector_get(&pop, 1); + AssertThat(pop2.head_index, Equals(1)); + AssertThat(pop2.tree_count, Equals(2)); + AssertThat(pop2.trees[0], Equals(trees[6])); + AssertThat(pop2.trees[1], Equals(trees[7])); }); }); @@ -401,17 +408,121 @@ describe("Stack", [&]() { /* * A0__B1. */ - StackPopResultList pop = ts_stack_pop(stack, 0, 3, false); + Vector pop = ts_stack_pop(stack, 0, 3, false); AssertThat(ts_stack_head_count(stack), Equals(1)); AssertThat(*ts_stack_head(stack, 0), Equals({trees[1], stateB})); - AssertThat(pop.size, Equals(2)); - AssertThat(pop.contents[0].tree_count, Equals(3)); - AssertThat(pop.contents[0].index, Equals(0)); - AssertThat(pop.contents[0].trees[0], Equals(trees[2])); - AssertThat(pop.contents[1].tree_count, Equals(3)); - AssertThat(pop.contents[1].index, Equals(0)); - AssertThat(pop.contents[1].trees[0], Equals(trees[4])); + AssertThat(pop.size, Equals(2)); + StackPopResult pop1 = *(StackPopResult *)vector_get(&pop, 0); + AssertThat(pop1.tree_count, Equals(3)); + AssertThat(pop1.head_index, Equals(0)); + AssertThat(pop1.trees[0], Equals(trees[2])); + + StackPopResult pop2 = *(StackPopResult *)vector_get(&pop, 1); + AssertThat(pop2.tree_count, Equals(3)); + AssertThat(pop2.head_index, Equals(0)); + AssertThat(pop2.trees[0], Equals(trees[4])); + }); + }); + }); + + describe("popping from a stack head that has been 3-way merged", [&]() { + before_each([&]() { + /* + * A0__B1__C2__D3__I8__J9. + * \__E4__F5__/ + * \__G6__H7__/ + */ + ts_stack_clear(stack); + ts_stack_push(stack, 0, stateA, trees[0]); + ts_stack_push(stack, 0, stateB, trees[1]); + ts_stack_split(stack, 0); + ts_stack_split(stack, 1); + ts_stack_push(stack, 0, stateC, trees[2]); + ts_stack_push(stack, 1, stateE, trees[4]); + ts_stack_push(stack, 2, stateG, trees[6]); + ts_stack_push(stack, 0, stateD, trees[3]); + ts_stack_push(stack, 1, stateF, trees[5]); + ts_stack_push(stack, 2, stateH, trees[7]); + ts_stack_push(stack, 0, stateI, trees[8]); + ts_stack_push(stack, 1, stateI, trees[8]); + ts_stack_push(stack, 1, stateI, trees[8]); + ts_stack_push(stack, 0, stateJ, trees[9]); + + AssertThat(ts_stack_head_count(stack), Equals(1)); + StackEntry *head = ts_stack_head(stack, 0); + AssertThat(ts_stack_entry_next_count(head), Equals(1)); + AssertThat(ts_stack_entry_next_count(ts_stack_entry_next(head, 0)), Equals(3)); + }); + + describe("when there is one path that leads to three different heads", [&]() { + it("returns three entries with the same array of trees", [&]() { + /* + * A0__B1__C2__D3. + * \__E4__F5. + * \__G6__H7. + */ + Vector pop = ts_stack_pop(stack, 0, 2, false); + AssertThat(ts_stack_head_count(stack), Equals(3)); + + AssertThat(pop.size, Equals(3)); + + StackPopResult pop1 = *(StackPopResult *)vector_get(&pop, 0); + AssertThat(ts_stack_top_tree(stack, 0), Equals(trees[3])); + AssertThat(pop1.head_index, Equals(0)); + AssertThat(pop1.tree_count, Equals(2)); + AssertThat(pop1.trees[0], Equals(trees[8])); + AssertThat(pop1.trees[1], Equals(trees[9])); + + StackPopResult pop2 = *(StackPopResult *)vector_get(&pop, 1); + AssertThat(ts_stack_top_tree(stack, 1), Equals(trees[5])); + AssertThat(pop2.head_index, Equals(1)); + AssertThat(pop2.tree_count, Equals(2)); + AssertThat(pop2.trees, Equals(pop1.trees)); + + StackPopResult pop3 = *(StackPopResult *)vector_get(&pop, 2); + AssertThat(ts_stack_top_tree(stack, 2), Equals(trees[7])); + AssertThat(pop3.head_index, Equals(2)); + AssertThat(pop3.tree_count, Equals(2)); + AssertThat(pop3.trees, Equals(pop1.trees)); + }); + }); + + describe("when there are three different paths that lead to three different heads", [&]() { + it("returns three entries with different arrays of trees", [&]() { + /* + * A0__B1__C2. + * \__E4. + * \__G6. + */ + Vector pop = ts_stack_pop(stack, 0, 3, false); + AssertThat(ts_stack_head_count(stack), Equals(3)); + + AssertThat(pop.size, Equals(3)); + + StackPopResult pop1 = *(StackPopResult *)vector_get(&pop, 0); + AssertThat(ts_stack_top_tree(stack, 0), Equals(trees[2])); + AssertThat(pop1.head_index, Equals(0)); + AssertThat(pop1.tree_count, Equals(3)); + AssertThat(pop1.trees[0], Equals(trees[3])); + AssertThat(pop1.trees[1], Equals(trees[8])); + AssertThat(pop1.trees[2], Equals(trees[9])); + + StackPopResult pop2 = *(StackPopResult *)vector_get(&pop, 1); + AssertThat(ts_stack_top_tree(stack, 1), Equals(trees[4])); + AssertThat(pop2.head_index, Equals(1)); + AssertThat(pop2.tree_count, Equals(3)); + AssertThat(pop2.trees[0], Equals(trees[5])); + AssertThat(pop2.trees[1], Equals(trees[8])); + AssertThat(pop2.trees[2], Equals(trees[9])); + + StackPopResult pop3 = *(StackPopResult *)vector_get(&pop, 2); + AssertThat(ts_stack_top_tree(stack, 2), Equals(trees[6])); + AssertThat(pop3.head_index, Equals(2)); + AssertThat(pop3.tree_count, Equals(3)); + AssertThat(pop3.trees[0], Equals(trees[7])); + AssertThat(pop3.trees[1], Equals(trees[8])); + AssertThat(pop3.trees[2], Equals(trees[9])); }); }); }); diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 8321a2dc..1b9cf0f6 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -181,52 +181,91 @@ static bool ts_parser__shift_extra(TSParser *self, int head, TSStateId state) { static TSTree *ts_parser__reduce(TSParser *self, int head, TSSymbol symbol, size_t child_count, bool extra, bool count_extra) { + vector_clear(&self->reduce_parents); TSNodeType node_type = self->language->node_types[symbol]; - StackPopResultList pop_results = - ts_stack_pop(self->stack, head, child_count, count_extra); + Vector pop_results = ts_stack_pop(self->stack, head, child_count, count_extra); - TSTree *parent = NULL; - TSTree **last_children = NULL; - int last_index = -1; + int last_head_index = -1; + int removed_heads = 0; - for (int i = 0; i < pop_results.size; i++) { - StackPopResult pop_result = pop_results.contents[i]; + for (size_t i = 0; i < pop_results.size; i++) { + StackPopResult *pop_result = vector_get(&pop_results, i); + /* + * If the same set of trees led to a previous stack head, reuse the parent + * tree that was added to that head. + */ + TSTree *parent = NULL; + for (size_t j = 0; j < i; j++) { + StackPopResult *prior_result = vector_get(&pop_results, j); + if (pop_result->trees == prior_result->trees) { + TSTree **existing_parent = vector_get(&self->reduce_parents, j); + parent = *existing_parent; + break; + } + } + + /* + * Otherwise, create a new parent node for this set of trees. + */ + if (!parent) + parent = ts_tree_make_node(symbol, pop_result->tree_count, pop_result->trees, node_type); + vector_push(&self->reduce_parents, &parent); + + /* + * If another path led to the same stack head, add this new parent tree + * as an alternative for that stack head. + */ + int new_head = pop_result->head_index - removed_heads; + if (pop_result->head_index == last_head_index) { + ts_stack_add_alternative(self->stack, new_head, parent); + continue; + } + + /* + * If the stack has split in the process of popping, create a duplicate of + * the lookahead state for this head, for the new head. + */ if (i > 0) { - assert(pop_result.index == self->head_states.size); - DEBUG("split_during_reduce new_head:%d", pop_result.index); + DEBUG("split_during_reduce new_head:%d", new_head); HeadState *head_state = vector_get(&self->head_states, head); vector_push(&self->head_states, head_state); } - if (pop_result.trees != last_children) { - parent = ts_tree_make_node(symbol, pop_result.tree_count, - pop_result.trees, node_type); - } - - if (pop_result.index == last_index) { - ts_stack_add_alternative(self->stack, pop_result.index, parent); + /* + * If the parent node is extra, then do not change the state when pushing + * it. Otherwise, proceed to the state given in the parse table for the + * new parent symbol. + */ + TSStateId state; + TSStateId top_state = ts_stack_top_state(self->stack, new_head); + if (extra) { + ts_tree_set_extra(parent); + state = top_state; } else { - TSStateId top_state = ts_stack_top_state(self->stack, pop_result.index); - TSStateId state; - - if (extra) { - ts_tree_set_extra(parent); - state = top_state; + TSParseAction action = ts_language__last_action(self->language, top_state, symbol); + if (child_count == -1) { + state = 0; } else { - state = ts_language__last_action(self->language, top_state, symbol) - .data.to_state; + assert(action.type == TSParseActionTypeShift); + state = action.data.to_state; } - - if (ts_stack_push(self->stack, pop_result.index, state, parent)) - vector_erase(&self->head_states, pop_result.index); } - last_index = pop_result.index; - last_children = pop_result.trees; + /* + * If the given state already existed at a different head of the stack, + * then remove the lookahead state for the head. + */ + if (ts_stack_push(self->stack, new_head, state, parent)) { + vector_erase(&self->head_states, new_head); + removed_heads++; + } + + last_head_index = pop_result->head_index; } - return parent; + TSTree **last_parent = vector_back(&self->reduce_parents); + return *last_parent; } static void ts_parser__reduce_fragile(TSParser *self, int head, TSSymbol symbol, @@ -298,7 +337,7 @@ static bool ts_parser__handle_error(TSParser *self, int head) { */ if (self->lookahead->symbol == ts_builtin_sym_end) { DEBUG("fail_to_recover"); - ts_parser__reduce_error(self, head, error_token_count - 1); + ts_parser__reduce_error(self, head, -1); return false; } } @@ -328,10 +367,11 @@ static void ts_parser__start(TSParser *self, TSInput input, } static TSTree *ts_parser__finish(TSParser *self) { - StackPopResult pop_result = ts_stack_pop(self->stack, 0, -1, true).contents[0]; + Vector pop_results = ts_stack_pop(self->stack, 0, -1, true); + StackPopResult *pop_result = vector_get(&pop_results, 0); - TSTree **trees = pop_result.trees; - size_t extra_count = pop_result.tree_count - 1; + TSTree **trees = pop_result->trees; + size_t extra_count = pop_result->tree_count - 1; TSTree *root = trees[extra_count]; ts_tree_prepend_children(root, extra_count, trees); @@ -463,7 +503,8 @@ TSParser ts_parser_make() { .stack = ts_stack_new((TreeSelectionCallback){ NULL, ts_parser__select_tree, }), - .head_states = vector_new(sizeof(HeadState), 3), + .head_states = vector_new(sizeof(HeadState), 4), + .reduce_parents = vector_new(sizeof(TSTree *), 4), .lookahead = NULL, }; } diff --git a/src/runtime/parser.h b/src/runtime/parser.h index 53cb6a0f..b15c06f2 100644 --- a/src/runtime/parser.h +++ b/src/runtime/parser.h @@ -14,6 +14,7 @@ typedef struct { TSTree *lookahead; const TSLanguage *language; Vector head_states; + Vector reduce_parents; } TSParser; TSParser ts_parser_make(); diff --git a/src/runtime/stack.c b/src/runtime/stack.c index 89efb4b8..3236b763 100644 --- a/src/runtime/stack.c +++ b/src/runtime/stack.c @@ -5,13 +5,13 @@ #include "runtime/length.h" #include -#define MAX_POP_PATH_COUNT 8 +#define MAX_SUCCESSOR_COUNT 8 #define INITIAL_HEAD_CAPACITY 3 #define STARTING_TREE_CAPACITY 10 typedef struct StackNode { StackEntry entry; - struct StackNode *successors[MAX_POP_PATH_COUNT]; + struct StackNode *successors[MAX_SUCCESSOR_COUNT]; short unsigned int successor_count; short unsigned int ref_count; } StackNode; @@ -20,10 +20,18 @@ struct Stack { StackNode **heads; int head_count; int head_capacity; - StackPopResult last_pop_results[MAX_POP_PATH_COUNT]; + Vector pop_results; + Vector pop_paths; TreeSelectionCallback tree_selection_callback; }; +typedef struct { + size_t goal_tree_count; + StackNode *node; + Vector trees; + bool is_shared; +} PopPath; + /* * Section: Stack lifecycle */ @@ -35,11 +43,15 @@ Stack *ts_stack_new(TreeSelectionCallback tree_selection_callback) { .head_count = 1, .head_capacity = INITIAL_HEAD_CAPACITY, .tree_selection_callback = tree_selection_callback, + .pop_results = vector_new(sizeof(StackPopResult), 4), + .pop_paths = vector_new(sizeof(PopPath), 4), }; return self; } void ts_stack_delete(Stack *self) { + vector_delete(&self->pop_results); + vector_delete(&self->pop_paths); free(self->heads); free(self); } @@ -164,12 +176,12 @@ static int ts_stack__add_head(Stack *self, StackNode *node) { return new_index; } -static int ts_stack__find_or_add_head(Stack *self, StackNode *node) { +static int ts_stack__find_head(Stack *self, StackNode *node) { for (int i = 0; i < self->head_count; i++) if (self->heads[i] == node) { return i; } - return ts_stack__add_head(self, node); + return -1; } void ts_stack_remove_head(Stack *self, int head_index) { @@ -221,17 +233,24 @@ int ts_stack_split(Stack *self, int head_index) { return ts_stack__add_head(self, self->heads[head_index]); } -StackPopResultList ts_stack_pop(Stack *self, int head_index, int child_count, - bool count_extra) { - StackNode *previous_head = self->heads[head_index]; +const char *symbol_names[] = { + "zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", + "ten", "eleven", "twelve" +}; - int path_count = 1; +Vector ts_stack_pop(Stack *self, int head_index, int child_count, bool count_extra) { + StackNode *previous_head = self->heads[head_index]; int capacity = (child_count == -1) ? STARTING_TREE_CAPACITY : child_count; - size_t tree_counts_by_path[MAX_POP_PATH_COUNT] = { child_count }; - StackNode *nodes_by_path[MAX_POP_PATH_COUNT] = { previous_head }; - Vector trees_by_path[MAX_POP_PATH_COUNT] = { vector_new(sizeof(TSTree *), - capacity) }; - bool is_shared_by_path[MAX_POP_PATH_COUNT] = { false }; + PopPath initial_path = { + .goal_tree_count = child_count, + .node = previous_head, + .trees = vector_new(sizeof(TSTree *), capacity), + .is_shared = false, + }; + + vector_clear(&self->pop_results); + vector_clear(&self->pop_paths); + vector_push(&self->pop_paths, &initial_path); /* * Reduce along every possible path in parallel. Stop when the given number @@ -240,72 +259,72 @@ StackPopResultList ts_stack_pop(Stack *self, int head_index, int child_count, bool all_paths_done = false; while (!all_paths_done) { all_paths_done = true; - int current_path_count = path_count; - for (int path = 0; path < current_path_count; path++) { - StackNode *node = nodes_by_path[path]; - if (!node || (trees_by_path[path].size == tree_counts_by_path[path])) + + for (size_t i = 0; i < self->pop_paths.size; i++) { + PopPath *path = vector_get(&self->pop_paths, i); + StackNode *node = path->node; + + if (!node || path->trees.size == path->goal_tree_count) continue; + all_paths_done = false; /* * Children that are 'extra' do not count towards the total child count. */ if (ts_tree_is_extra(node->entry.tree) && !count_extra) - tree_counts_by_path[path]++; + path->goal_tree_count++; /* * If a node has more than one successor, create new paths for each of * the additional successors. */ - if (is_shared_by_path[path]) { - trees_by_path[path] = vector_copy(&trees_by_path[path]); - is_shared_by_path[path] = false; + if (path->is_shared) { + path->trees = vector_copy(&path->trees); + path->is_shared = false; } + ts_tree_retain(node->entry.tree); - vector_push(&trees_by_path[path], &node->entry.tree); + vector_push(&path->trees, &node->entry.tree); - for (int i = 0; i < node->successor_count; i++) { - int next_path; - if (i > 0) { - if (path_count == MAX_POP_PATH_COUNT) - break; - next_path = path_count; - tree_counts_by_path[next_path] = tree_counts_by_path[path]; - trees_by_path[next_path] = trees_by_path[path]; - is_shared_by_path[next_path] = true; - path_count++; - } else { - next_path = path; - } - - nodes_by_path[next_path] = node->successors[i]; + path->node = path->node->successors[0]; + for (int j = 1; j < node->successor_count; j++) { + PopPath path_copy = *path; + vector_push(&self->pop_paths, &path_copy); + PopPath *next_path = vector_back(&self->pop_paths); + next_path->node = node->successors[j]; + next_path->is_shared = true; } } } - for (int path = 0; path < path_count; path++) { - if (!is_shared_by_path[path]) - vector_reverse(&trees_by_path[path]); - int index = -1; - if (path == 0) { - stack_node_retain(nodes_by_path[path]); - self->heads[head_index] = nodes_by_path[path]; - index = head_index; + for (size_t i = 0; i < self->pop_paths.size; i++) { + PopPath *path = vector_get(&self->pop_paths, i); + + if (!path->is_shared) + vector_reverse(&path->trees); + + StackPopResult result = { + .trees = path->trees.contents, + .tree_count = path->trees.size, + .head_index = -1, + }; + + if (i == 0) { + stack_node_retain(path->node); + self->heads[head_index] = path->node; + result.head_index = head_index; } else { - index = ts_stack__find_or_add_head(self, nodes_by_path[path]); + result.head_index = ts_stack__find_head(self, path->node); + if (result.head_index == -1) + result.head_index = ts_stack__add_head(self, path->node); } - self->last_pop_results[path] = (StackPopResult){ - .index = index, - .tree_count = trees_by_path[path].size, - .trees = trees_by_path[path].contents, - }; + vector_push(&self->pop_results, &result); } stack_node_release(previous_head); - return (StackPopResultList){ - .size = path_count, .contents = self->last_pop_results, - }; + return self->pop_results; } void ts_stack_shrink(Stack *self, int head_index, int count) { diff --git a/src/runtime/stack.h b/src/runtime/stack.h index 2aab77e9..3b44f351 100644 --- a/src/runtime/stack.h +++ b/src/runtime/stack.h @@ -6,6 +6,7 @@ extern "C" { #endif #include "tree_sitter/parser.h" +#include "runtime/vector.h" typedef struct Stack Stack; @@ -15,16 +16,11 @@ typedef struct { } StackEntry; typedef struct { - int index; - int tree_count; TSTree **trees; + size_t tree_count; + int head_index; } StackPopResult; -typedef struct { - int size; - StackPopResult *contents; -} StackPopResultList; - typedef struct { void *data; TSTree *(*callback)(void *data, TSTree *, TSTree *); @@ -90,7 +86,7 @@ void ts_stack_add_alternative(Stack *, int head, TSTree *); * which had previously been merged. It returns a struct that indicates the * index of each revealed head and the trees removed from that head. */ -StackPopResultList ts_stack_pop(Stack *, int head, int count, bool count_extra); +Vector ts_stack_pop(Stack *, int head, int count, bool count_extra); /* * Remove the given number of entries from the given head of the stack. diff --git a/src/runtime/vector.h b/src/runtime/vector.h index 28437764..434ddefe 100644 --- a/src/runtime/vector.h +++ b/src/runtime/vector.h @@ -25,11 +25,20 @@ static inline Vector vector_new(size_t element_size, size_t capacity) { }; } +static inline void vector_delete(Vector *self) { + free(self->contents); +} + static inline void *vector_get(Vector *self, size_t index) { assert(index < self->size); return (void *)((char *)self->contents + index * self->element_size); } +static inline void *vector_back(Vector *self) { + assert(self->size > 0); + return vector_get(self, self->size - 1); +} + static inline void vector_clear(Vector *self) { self->size = 0; } From ee66f1e774efc94f212211fba60fab54ddbc026c Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 20 Nov 2015 00:07:05 -0800 Subject: [PATCH 06/13] Make vector.h compile when included by C++, w/o clang extensions --- src/runtime/vector.h | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/src/runtime/vector.h b/src/runtime/vector.h index 434ddefe..ed09af33 100644 --- a/src/runtime/vector.h +++ b/src/runtime/vector.h @@ -17,12 +17,12 @@ typedef struct { } Vector; static inline Vector vector_new(size_t element_size, size_t capacity) { - return (Vector){ - .contents = malloc(capacity * element_size), - .size = 0, - .capacity = capacity, - .element_size = element_size, - }; + Vector result; + result.contents = malloc(capacity * element_size); + result.size = 0; + result.capacity = capacity; + result.element_size = element_size; + return result; } static inline void vector_delete(Vector *self) { @@ -79,13 +79,10 @@ static inline void vector_reverse(Vector *self) { } static inline Vector vector_copy(Vector *self) { - return (Vector){ - .contents = memcpy(malloc(self->capacity * self->element_size), - self->contents, self->size * self->element_size), - .size = self->size, - .capacity = self->capacity, - .element_size = self->element_size, - }; + Vector copy = *self; + copy.contents = memcpy(malloc(self->capacity * self->element_size), + self->contents, self->size * self->element_size); + return copy; } #ifdef __cplusplus From 8d1b9501abb846e635ea59623cfbb3ce8b5437f3 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 20 Nov 2015 11:47:39 -0800 Subject: [PATCH 07/13] Fix integer comparison warnings --- src/runtime/parser.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 1b9cf0f6..ef6dc466 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -1,6 +1,7 @@ #include "runtime/parser.h" #include #include +#include #include "tree_sitter/runtime.h" #include "tree_sitter/parser.h" #include "runtime/tree.h" @@ -8,8 +9,6 @@ #include "runtime/length.h" #include "runtime/vector.h" -#include - /* * Debugging */ @@ -144,7 +143,7 @@ static TSTree *ts_parser__get_next_lookahead(TSParser *self, int head) { static int ts_parser__split(TSParser *self, int head) { int result = ts_stack_split(self->stack, head); - assert(result == self->head_states.size); + assert(result == (int)self->head_states.size); HeadState head_state = *(HeadState *)vector_get(&self->head_states, head); vector_push(&self->head_states, &head_state); return result; @@ -179,7 +178,7 @@ static bool ts_parser__shift_extra(TSParser *self, int head, TSStateId state) { } static TSTree *ts_parser__reduce(TSParser *self, int head, TSSymbol symbol, - size_t child_count, bool extra, + int child_count, bool extra, bool count_extra) { vector_clear(&self->reduce_parents); TSNodeType node_type = self->language->node_types[symbol]; From 7aba2a07169c4af5285e9465d2b6306ebe5d36bd Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 20 Nov 2015 11:49:04 -0800 Subject: [PATCH 08/13] Rename DEBUG macro to LOG DEBUG is already used as the symbol to enable/disable assert() calls --- src/runtime/lexer.c | 24 ++++++++-------- src/runtime/parser.c | 67 ++++++++++++++++++++++---------------------- src/runtime/stack.c | 10 +++---- src/runtime/vector.h | 4 +-- 4 files changed, 53 insertions(+), 52 deletions(-) diff --git a/src/runtime/lexer.c b/src/runtime/lexer.c index bbcfd84d..de4c9bf0 100644 --- a/src/runtime/lexer.c +++ b/src/runtime/lexer.c @@ -6,17 +6,17 @@ #include "runtime/debugger.h" #include "utf8proc.h" -#define DEBUG(...) \ +#define LOG(...) \ if (self->debugger.debug_fn) { \ snprintf(self->debug_buffer, TS_DEBUG_BUFFER_SIZE, __VA_ARGS__); \ self->debugger.debug_fn(self->debugger.payload, TSDebugTypeLex, \ self->debug_buffer); \ } -#define DEBUG_LOOKAHEAD() \ - DEBUG((0 < self->lookahead && self->lookahead < 256) ? "lookahead char:'%c'" \ - : "lookahead char:%d", \ - self->lookahead); +#define LOG_LOOKAHEAD() \ + LOG((0 < self->lookahead && self->lookahead < 256) ? "lookahead char:'%c'" \ + : "lookahead char:%d", \ + self->lookahead); static const char *empty_chunk = ""; @@ -37,12 +37,12 @@ static void ts_lexer__get_lookahead(TSLexer *self) { self->lookahead_size = utf8proc_iterate( (const uint8_t *)self->chunk + position_in_chunk, self->chunk_size - position_in_chunk + 1, &self->lookahead); - DEBUG_LOOKAHEAD(); + LOG_LOOKAHEAD(); } static void ts_lexer__start(TSLexer *self, TSStateId lex_state) { - DEBUG("start_lex state:%d, pos:%lu", lex_state, self->current_position.chars); - DEBUG_LOOKAHEAD(); + LOG("start_lex state:%d, pos:%lu", lex_state, self->current_position.chars); + LOG_LOOKAHEAD(); if (!self->chunk) ts_lexer__get_chunk(self); @@ -51,12 +51,12 @@ static void ts_lexer__start(TSLexer *self, TSStateId lex_state) { } static void ts_lexer__start_token(TSLexer *self) { - DEBUG("start_token chars:%lu", self->current_position.chars); + LOG("start_token chars:%lu", self->current_position.chars); self->token_start_position = self->current_position; } static bool ts_lexer__advance(TSLexer *self, TSStateId state) { - DEBUG("advance state:%d", state); + LOG("advance state:%d", state); if (self->chunk == empty_chunk) return false; @@ -82,10 +82,10 @@ static TSTree *ts_lexer__accept(TSLexer *self, TSSymbol symbol, self->token_end_position = self->current_position; if (symbol == ts_builtin_sym_error) { - DEBUG("error_char"); + LOG("error_char"); return ts_tree_make_error(size, padding, self->lookahead); } else { - DEBUG("accept_token sym:%s", symbol_name); + LOG("accept_token sym:%s", symbol_name); return ts_tree_make_leaf(symbol, padding, size, node_type); } } diff --git a/src/runtime/parser.c b/src/runtime/parser.c index ef6dc466..6201ace3 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -13,7 +13,7 @@ * Debugging */ -#define DEBUG(...) \ +#define LOG(...) \ if (self->lexer.debugger.debug_fn) { \ snprintf(self->lexer.debug_buffer, TS_DEBUG_BUFFER_SIZE, __VA_ARGS__); \ self->lexer.debugger.debug_fn(self->lexer.debugger.payload, \ @@ -114,7 +114,7 @@ static TSTree *ts_parser__get_next_lookahead(TSParser *self, int head) { } if (state->reusable_subtree_pos < state->position.chars) { - DEBUG("past_reuse sym:%s", SYM_NAME(state->reusable_subtree->symbol)); + LOG("past_reuse sym:%s", SYM_NAME(state->reusable_subtree->symbol)); ts_parser__pop_reusable_subtree(state); continue; } @@ -124,7 +124,7 @@ static TSTree *ts_parser__get_next_lookahead(TSParser *self, int head) { ts_tree_is_extra(state->reusable_subtree) || (state->reusable_subtree->child_count > 0 && !ts_parser__can_reuse(self, head, state->reusable_subtree))) { - DEBUG("breakdown sym:%s", SYM_NAME(state->reusable_subtree->symbol)); + LOG("breakdown sym:%s", SYM_NAME(state->reusable_subtree->symbol)); if (!ts_parser__breakdown_reusable_subtree(state)) ts_parser__pop_reusable_subtree(state); continue; @@ -132,8 +132,8 @@ static TSTree *ts_parser__get_next_lookahead(TSParser *self, int head) { TSTree *result = state->reusable_subtree; TSLength size = ts_tree_total_size(result); - DEBUG("reuse sym:%s size:%lu extra:%d", SYM_NAME(result->symbol), - size.chars, result->options.extra); + LOG("reuse sym:%s size:%lu extra:%d", SYM_NAME(result->symbol), size.chars, + result->options.extra); ts_parser__pop_reusable_subtree(state); return result; } @@ -164,7 +164,7 @@ static ConsumeResult ts_parser__shift(TSParser *self, int head, head_state->position = ts_length_add(head_state->position, ts_tree_total_size(self->lookahead)); if (ts_stack_push(self->stack, head, parse_state, self->lookahead)) { - DEBUG("merge head:%d", head); + LOG("merge head:%d", head); vector_erase(&self->head_states, head); return ConsumeResultRemoved; } else { @@ -178,8 +178,7 @@ static bool ts_parser__shift_extra(TSParser *self, int head, TSStateId state) { } static TSTree *ts_parser__reduce(TSParser *self, int head, TSSymbol symbol, - int child_count, bool extra, - bool count_extra) { + int child_count, bool extra, bool count_extra) { vector_clear(&self->reduce_parents); TSNodeType node_type = self->language->node_types[symbol]; Vector pop_results = ts_stack_pop(self->stack, head, child_count, count_extra); @@ -208,7 +207,8 @@ static TSTree *ts_parser__reduce(TSParser *self, int head, TSSymbol symbol, * Otherwise, create a new parent node for this set of trees. */ if (!parent) - parent = ts_tree_make_node(symbol, pop_result->tree_count, pop_result->trees, node_type); + parent = ts_tree_make_node(symbol, pop_result->tree_count, + pop_result->trees, node_type); vector_push(&self->reduce_parents, &parent); /* @@ -226,7 +226,7 @@ static TSTree *ts_parser__reduce(TSParser *self, int head, TSSymbol symbol, * the lookahead state for this head, for the new head. */ if (i > 0) { - DEBUG("split_during_reduce new_head:%d", new_head); + LOG("split_during_reduce new_head:%d", new_head); HeadState *head_state = vector_get(&self->head_states, head); vector_push(&self->head_states, head_state); } @@ -242,7 +242,8 @@ static TSTree *ts_parser__reduce(TSParser *self, int head, TSSymbol symbol, ts_tree_set_extra(parent); state = top_state; } else { - TSParseAction action = ts_language__last_action(self->language, top_state, symbol); + TSParseAction action = + ts_language__last_action(self->language, top_state, symbol); if (child_count == -1) { state = 0; } else { @@ -311,8 +312,8 @@ static bool ts_parser__handle_error(TSParser *self, int head) { self->language, state_after_error, self->lookahead->symbol); if (action_after_error.type != TSParseActionTypeError) { - DEBUG("recover state:%u, count:%lu", state_after_error, - error_token_count + i); + LOG("recover state:%u, count:%lu", state_after_error, + error_token_count + i); ts_parser__reduce_error(self, head, error_token_count + i); return true; } @@ -326,7 +327,7 @@ static bool ts_parser__handle_error(TSParser *self, int head) { * If there is no state in the stack for which we can recover with the * current lookahead token, advance to the next token. */ - DEBUG("skip token:%s", SYM_NAME(self->lookahead->symbol)); + LOG("skip token:%s", SYM_NAME(self->lookahead->symbol)); ts_parser__shift(self, head, ts_stack_top_state(self->stack, head)); self->lookahead = self->language->lex_fn(&self->lexer, ts_lex_state_error); error_token_count++; @@ -335,7 +336,7 @@ static bool ts_parser__handle_error(TSParser *self, int head) { * If the end of input is reached, exit. */ if (self->lookahead->symbol == ts_builtin_sym_end) { - DEBUG("fail_to_recover"); + LOG("fail_to_recover"); ts_parser__reduce_error(self, head, -1); return false; } @@ -345,9 +346,9 @@ static bool ts_parser__handle_error(TSParser *self, int head) { static void ts_parser__start(TSParser *self, TSInput input, TSTree *previous_tree) { if (previous_tree) { - DEBUG("parse_after_edit"); + LOG("parse_after_edit"); } else { - DEBUG("new_parse"); + LOG("new_parse"); } self->lexer.input = input; @@ -403,7 +404,7 @@ static ConsumeResult ts_parser__consume_lookahead(TSParser *self, int head) { current_head = head; } else { current_head = ts_parser__split(self, head); - DEBUG("split_action from_head:%d, new_head:%d", head, current_head); + LOG("split_action from_head:%d, new_head:%d", head, current_head); } // TODO: Remove this by making a separate symbol for errors returned from @@ -413,48 +414,48 @@ static ConsumeResult ts_parser__consume_lookahead(TSParser *self, int head) { switch (action.type) { case TSParseActionTypeError: - DEBUG("error_sym"); + LOG("error_sym"); if (ts_stack_head_count(self->stack) == 1) { if (ts_parser__handle_error(self, current_head)) break; else return ConsumeResultFinished; } else { - DEBUG("bail current_head:%d", current_head); + LOG("bail current_head:%d", current_head); ts_parser__remove_head(self, current_head); return ConsumeResultRemoved; } case TSParseActionTypeShift: - DEBUG("shift state:%u", action.data.to_state); + LOG("shift state:%u", action.data.to_state); return ts_parser__shift(self, current_head, action.data.to_state); case TSParseActionTypeShiftExtra: - DEBUG("shift_extra"); + LOG("shift_extra"); return ts_parser__shift_extra(self, current_head, state); case TSParseActionTypeReduce: - DEBUG("reduce sym:%s, child_count:%u", SYM_NAME(action.data.symbol), - action.data.child_count); + LOG("reduce sym:%s, child_count:%u", SYM_NAME(action.data.symbol), + action.data.child_count); ts_parser__reduce(self, current_head, action.data.symbol, action.data.child_count, false, false); break; case TSParseActionTypeReduceExtra: - DEBUG("reduce_extra sym:%s", SYM_NAME(action.data.symbol)); + LOG("reduce_extra sym:%s", SYM_NAME(action.data.symbol)); ts_parser__reduce(self, current_head, action.data.symbol, 1, true, false); break; case TSParseActionTypeReduceFragile: - DEBUG("reduce_fragile sym:%s, count:%u", SYM_NAME(action.data.symbol), - action.data.child_count); + LOG("reduce_fragile sym:%s, count:%u", SYM_NAME(action.data.symbol), + action.data.child_count); ts_parser__reduce_fragile(self, current_head, action.data.symbol, action.data.child_count); break; case TSParseActionTypeAccept: - DEBUG("accept"); + LOG("accept"); return ConsumeResultFinished; } } @@ -529,9 +530,9 @@ TSTree *ts_parser_parse(TSParser *self, TSInput input, TSTree *previous_tree) { for (int head = 0; head < ts_stack_head_count(self->stack);) { HeadState *state = vector_get(&self->head_states, head); - DEBUG("process head:%d, head_count:%d, state:%d, pos:%lu", head, - ts_stack_head_count(self->stack), - ts_stack_top_state(self->stack, head), state->position.chars); + LOG("process head:%d, head_count:%d, state:%d, pos:%lu", head, + ts_stack_head_count(self->stack), + ts_stack_top_state(self->stack, head), state->position.chars); TSTree *reused_lookahead = ts_parser__get_next_lookahead(self, head); if (reused_lookahead && @@ -545,8 +546,8 @@ TSTree *ts_parser_parse(TSParser *self, TSInput input, TSTree *previous_tree) { self->lookahead = self->language->lex_fn(&self->lexer, lex_state); } - DEBUG("lookahead sym:%s, size:%lu", SYM_NAME(self->lookahead->symbol), - ts_tree_total_size(self->lookahead).chars); + LOG("lookahead sym:%s, size:%lu", SYM_NAME(self->lookahead->symbol), + ts_tree_total_size(self->lookahead).chars); switch (ts_parser__consume_lookahead(self, head)) { case ConsumeResultRemoved: diff --git a/src/runtime/stack.c b/src/runtime/stack.c index 3236b763..83901f61 100644 --- a/src/runtime/stack.c +++ b/src/runtime/stack.c @@ -233,12 +233,12 @@ int ts_stack_split(Stack *self, int head_index) { return ts_stack__add_head(self, self->heads[head_index]); } -const char *symbol_names[] = { - "zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", - "ten", "eleven", "twelve" -}; +const char *symbol_names[] = { "zero", "one", "two", "three", "four", + "five", "six", "seven", "eight", "nine", + "ten", "eleven", "twelve" }; -Vector ts_stack_pop(Stack *self, int head_index, int child_count, bool count_extra) { +Vector ts_stack_pop(Stack *self, int head_index, int child_count, + bool count_extra) { StackNode *previous_head = self->heads[head_index]; int capacity = (child_count == -1) ? STARTING_TREE_CAPACITY : child_count; PopPath initial_path = { diff --git a/src/runtime/vector.h b/src/runtime/vector.h index ed09af33..00019ca0 100644 --- a/src/runtime/vector.h +++ b/src/runtime/vector.h @@ -34,7 +34,7 @@ static inline void *vector_get(Vector *self, size_t index) { return (void *)((char *)self->contents + index * self->element_size); } -static inline void *vector_back(Vector *self) { +static inline void *vector_back(Vector *self) { assert(self->size > 0); return vector_get(self, self->size - 1); } @@ -81,7 +81,7 @@ static inline void vector_reverse(Vector *self) { static inline Vector vector_copy(Vector *self) { Vector copy = *self; copy.contents = memcpy(malloc(self->capacity * self->element_size), - self->contents, self->size * self->element_size); + self->contents, self->size * self->element_size); return copy; } From 5c95d02bd07436d0f84f3bb8ef5c44e480d26894 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 20 Nov 2015 11:53:03 -0800 Subject: [PATCH 09/13] Move ts_tree_compare to the right file --- src/runtime/parser.c | 26 +------------------------- src/runtime/tree.c | 24 ++++++++++++++++++++++++ src/runtime/tree.h | 1 + 3 files changed, 26 insertions(+), 25 deletions(-) diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 6201ace3..c064403b 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -462,32 +462,8 @@ static ConsumeResult ts_parser__consume_lookahead(TSParser *self, int head) { } } -static int ts_tree__compare(TSTree *left, TSTree *right) { - if (left->symbol < right->symbol) - return -1; - if (right->symbol < left->symbol) - return 1; - if (left->child_count < right->child_count) - return -1; - if (right->child_count < left->child_count) - return 1; - for (size_t i = 0; i < left->child_count; i++) { - TSTree *left_child = left->children[i]; - TSTree *right_child = right->children[i]; - switch (ts_tree__compare(left_child, right_child)) { - case -1: - return -1; - case 1: - return 1; - default: - break; - } - } - return 0; -} - static TSTree *ts_parser__select_tree(void *data, TSTree *left, TSTree *right) { - if (ts_tree__compare(left, right) <= 0) + if (ts_tree_compare(left, right) <= 0) return left; else return right; diff --git a/src/runtime/tree.c b/src/runtime/tree.c index 4c04e9b8..0591ba03 100644 --- a/src/runtime/tree.c +++ b/src/runtime/tree.c @@ -143,6 +143,30 @@ bool ts_tree_eq(const TSTree *self, const TSTree *other) { return true; } +int ts_tree_compare(const TSTree *left, const TSTree *right) { + if (left->symbol < right->symbol) + return -1; + if (right->symbol < left->symbol) + return 1; + if (left->child_count < right->child_count) + return -1; + if (right->child_count < left->child_count) + return 1; + for (size_t i = 0; i < left->child_count; i++) { + TSTree *left_child = left->children[i]; + TSTree *right_child = right->children[i]; + switch (ts_tree_compare(left_child, right_child)) { + case -1: + return -1; + case 1: + return 1; + default: + break; + } + } + return 0; +} + static size_t write_lookahead_to_string(char *string, size_t limit, char lookahead) { switch (lookahead) { diff --git a/src/runtime/tree.h b/src/runtime/tree.h index b443868e..93bfdecf 100644 --- a/src/runtime/tree.h +++ b/src/runtime/tree.h @@ -40,6 +40,7 @@ TSTree *ts_tree_make_error(TSLength size, TSLength padding, char lookahead_char) void ts_tree_retain(TSTree *tree); void ts_tree_release(TSTree *tree); bool ts_tree_eq(const TSTree *tree1, const TSTree *tree2); +int ts_tree_compare(const TSTree *tree1, const TSTree *tree2); char *ts_tree_string(const TSTree *tree, const char **names, bool include_anonymous); TSLength ts_tree_total_size(const TSTree *tree); From 467930a785180ad8fcdc527a3d7e4d0b38f24364 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 20 Nov 2015 12:00:49 -0800 Subject: [PATCH 10/13] Move some ts_language functions to the right file --- src/runtime/language.c | 19 +++++++++++++++++++ src/runtime/language.h | 18 ++++++++++++++++++ src/runtime/parser.c | 35 ++++++++--------------------------- 3 files changed, 45 insertions(+), 27 deletions(-) create mode 100644 src/runtime/language.h diff --git a/src/runtime/language.c b/src/runtime/language.c index ece2dec4..086ab3bb 100644 --- a/src/runtime/language.c +++ b/src/runtime/language.c @@ -1,5 +1,24 @@ #include "tree_sitter/parser.h" +static const TSParseAction ERROR_ACTIONS[2] = { + {.type = TSParseActionTypeError }, {.type = 0 } +}; + +const TSParseAction *ts_language_actions(const TSLanguage *language, + TSStateId state, TSSymbol sym) { + const TSParseAction *actions = + (language->parse_table + (state * language->symbol_count))[sym]; + return actions ? actions : ERROR_ACTIONS; +} + +TSParseAction ts_language_last_action(const TSLanguage *language, + TSStateId state, TSSymbol sym) { + const TSParseAction *action = ts_language_actions(language, state, sym); + while ((action + 1)->type) + action++; + return *action; +} + size_t ts_language_symbol_count(const TSLanguage *language) { return language->symbol_count; } diff --git a/src/runtime/language.h b/src/runtime/language.h new file mode 100644 index 00000000..e8f5ae99 --- /dev/null +++ b/src/runtime/language.h @@ -0,0 +1,18 @@ +#ifndef RUNTIME_LANGUAGE_H_ +#define RUNTIME_LANGUAGE_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "tree_sitter/parser.h" + +const TSParseAction *ts_language_actions(const TSLanguage *, TSStateId, + TSSymbol); +TSParseAction ts_language_last_action(const TSLanguage *, TSStateId, TSSymbol); + +#ifdef __cplusplus +} +#endif + +#endif // RUNTIME_LANGUAGE_H_ diff --git a/src/runtime/parser.c b/src/runtime/parser.c index c064403b..59500922 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -8,6 +8,7 @@ #include "runtime/lexer.h" #include "runtime/length.h" #include "runtime/vector.h" +#include "runtime/language.h" /* * Debugging @@ -38,25 +39,6 @@ typedef enum { * Private */ -static const TSParseAction ERROR_ACTIONS[2] = { - {.type = TSParseActionTypeError }, {.type = 0 } -}; - -static const TSParseAction *ts_language__actions(const TSLanguage *language, - TSStateId state, TSSymbol sym) { - const TSParseAction *actions = - (language->parse_table + (state * language->symbol_count))[sym]; - return actions ? actions : ERROR_ACTIONS; -} - -static TSParseAction ts_language__last_action(const TSLanguage *language, - TSStateId state, TSSymbol sym) { - const TSParseAction *action = ts_language__actions(language, state, sym); - while ((action + 1)->type) - action++; - return *action; -} - /* * Replace the parser's reusable_subtree with its first non-fragile descendant. * Return true if a suitable descendant is found, false otherwise. @@ -96,7 +78,7 @@ static bool ts_parser__can_reuse(TSParser *self, int head, TSTree *subtree) { return false; TSStateId state = ts_stack_top_state(self->stack, head); const TSParseAction *action = - ts_language__actions(self->language, state, subtree->symbol); + ts_language_actions(self->language, state, subtree->symbol); return action->type != TSParseActionTypeError; } @@ -243,7 +225,7 @@ static TSTree *ts_parser__reduce(TSParser *self, int head, TSSymbol symbol, state = top_state; } else { TSParseAction action = - ts_language__last_action(self->language, top_state, symbol); + ts_language_last_action(self->language, top_state, symbol); if (child_count == -1) { state = 0; } else { @@ -303,12 +285,12 @@ static bool ts_parser__handle_error(TSParser *self, int head) { for (StackEntry *entry = entry_before_error; true; entry = ts_stack_entry_next(entry, 0), i++) { TSStateId stack_state = entry ? entry->state : 0; - TSParseAction action_on_error = ts_language__last_action( + TSParseAction action_on_error = ts_language_last_action( self->language, stack_state, ts_builtin_sym_error); if (action_on_error.type == TSParseActionTypeShift) { TSStateId state_after_error = action_on_error.data.to_state; - TSParseAction action_after_error = ts_language__last_action( + TSParseAction action_after_error = ts_language_last_action( self->language, state_after_error, self->lookahead->symbol); if (action_after_error.type != TSParseActionTypeError) { @@ -370,11 +352,10 @@ static TSTree *ts_parser__finish(TSParser *self) { Vector pop_results = ts_stack_pop(self->stack, 0, -1, true); StackPopResult *pop_result = vector_get(&pop_results, 0); - TSTree **trees = pop_result->trees; size_t extra_count = pop_result->tree_count - 1; - TSTree *root = trees[extra_count]; + TSTree *root = pop_result->trees[extra_count]; - ts_tree_prepend_children(root, extra_count, trees); + ts_tree_prepend_children(root, extra_count, pop_result->trees); ts_tree_assign_parents(root); return root; } @@ -387,7 +368,7 @@ static ConsumeResult ts_parser__consume_lookahead(TSParser *self, int head) { for (;;) { TSStateId state = ts_stack_top_state(self->stack, head); const TSParseAction *next_action = - ts_language__actions(self->language, state, self->lookahead->symbol); + ts_language_actions(self->language, state, self->lookahead->symbol); /* * If there are multiple actions for the current state and lookahead symbol, From 32b10888232d4f252e2b6c0c0aa95a00e604ff2c Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 20 Nov 2015 12:55:01 -0800 Subject: [PATCH 11/13] Rename HeadState -> LookaheadState, remove parser.lookahead field --- src/runtime/parser.c | 123 +++++++++++++++++++++---------------------- src/runtime/parser.h | 3 +- 2 files changed, 61 insertions(+), 65 deletions(-) diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 59500922..f1973302 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -1,7 +1,7 @@ #include "runtime/parser.h" +#include #include #include -#include #include "tree_sitter/runtime.h" #include "tree_sitter/parser.h" #include "runtime/tree.h" @@ -27,7 +27,7 @@ typedef struct { TSTree *reusable_subtree; size_t reusable_subtree_pos; TSLength position; -} HeadState; +} LookaheadState; typedef enum { ConsumeResultShifted, @@ -43,7 +43,7 @@ typedef enum { * Replace the parser's reusable_subtree with its first non-fragile descendant. * Return true if a suitable descendant is found, false otherwise. */ -static bool ts_parser__breakdown_reusable_subtree(HeadState *state) { +static bool ts_parser__breakdown_reusable_subtree(LookaheadState *state) { do { if (state->reusable_subtree->symbol == ts_builtin_sym_error) return false; @@ -58,7 +58,7 @@ static bool ts_parser__breakdown_reusable_subtree(HeadState *state) { * Replace the parser's reusable_subtree with its largest right neighbor, or * NULL if no right neighbor exists. */ -static void ts_parser__pop_reusable_subtree(HeadState *state) { +static void ts_parser__pop_reusable_subtree(LookaheadState *state) { state->reusable_subtree_pos += ts_tree_total_size(state->reusable_subtree).chars; @@ -74,7 +74,7 @@ static void ts_parser__pop_reusable_subtree(HeadState *state) { } static bool ts_parser__can_reuse(TSParser *self, int head, TSTree *subtree) { - if (subtree->symbol == ts_builtin_sym_error) + if (!subtree || subtree->symbol == ts_builtin_sym_error) return false; TSStateId state = ts_stack_top_state(self->stack, head); const TSParseAction *action = @@ -88,7 +88,7 @@ static bool ts_parser__can_reuse(TSParser *self, int head, TSTree *subtree) { * run the lexer. */ static TSTree *ts_parser__get_next_lookahead(TSParser *self, int head) { - HeadState *state = vector_get(&self->head_states, head); + LookaheadState *state = vector_get(&self->lookahead_states, head); while (state->reusable_subtree) { if (state->reusable_subtree_pos > state->position.chars) { @@ -125,14 +125,15 @@ static TSTree *ts_parser__get_next_lookahead(TSParser *self, int head) { static int ts_parser__split(TSParser *self, int head) { int result = ts_stack_split(self->stack, head); - assert(result == (int)self->head_states.size); - HeadState head_state = *(HeadState *)vector_get(&self->head_states, head); - vector_push(&self->head_states, &head_state); + assert(result == (int)self->lookahead_states.size); + LookaheadState head_state = + *(LookaheadState *)vector_get(&self->lookahead_states, head); + vector_push(&self->lookahead_states, &head_state); return result; } static void ts_parser__remove_head(TSParser *self, int head) { - vector_erase(&self->head_states, head); + vector_erase(&self->lookahead_states, head); ts_stack_remove_head(self->stack, head); } @@ -141,22 +142,23 @@ static void ts_parser__remove_head(TSParser *self, int head) { */ static ConsumeResult ts_parser__shift(TSParser *self, int head, - TSStateId parse_state) { - HeadState *head_state = vector_get(&self->head_states, head); + TSStateId parse_state, TSTree *lookahead) { + LookaheadState *head_state = vector_get(&self->lookahead_states, head); head_state->position = - ts_length_add(head_state->position, ts_tree_total_size(self->lookahead)); - if (ts_stack_push(self->stack, head, parse_state, self->lookahead)) { + ts_length_add(head_state->position, ts_tree_total_size(lookahead)); + if (ts_stack_push(self->stack, head, parse_state, lookahead)) { LOG("merge head:%d", head); - vector_erase(&self->head_states, head); + vector_erase(&self->lookahead_states, head); return ConsumeResultRemoved; } else { return ConsumeResultShifted; } } -static bool ts_parser__shift_extra(TSParser *self, int head, TSStateId state) { - ts_tree_set_extra(self->lookahead); - return ts_parser__shift(self, head, state); +static bool ts_parser__shift_extra(TSParser *self, int head, TSStateId state, + TSTree *lookahead) { + ts_tree_set_extra(lookahead); + return ts_parser__shift(self, head, state, lookahead); } static TSTree *ts_parser__reduce(TSParser *self, int head, TSSymbol symbol, @@ -209,8 +211,8 @@ static TSTree *ts_parser__reduce(TSParser *self, int head, TSSymbol symbol, */ if (i > 0) { LOG("split_during_reduce new_head:%d", new_head); - HeadState *head_state = vector_get(&self->head_states, head); - vector_push(&self->head_states, head_state); + LookaheadState *head_state = vector_get(&self->lookahead_states, head); + vector_push(&self->lookahead_states, head_state); } /* @@ -239,7 +241,7 @@ static TSTree *ts_parser__reduce(TSParser *self, int head, TSSymbol symbol, * then remove the lookahead state for the head. */ if (ts_stack_push(self->stack, new_head, state, parent)) { - vector_erase(&self->head_states, new_head); + vector_erase(&self->lookahead_states, new_head); removed_heads++; } @@ -259,19 +261,18 @@ static void ts_parser__reduce_fragile(TSParser *self, int head, TSSymbol symbol, } static void ts_parser__reduce_error(TSParser *self, int head, - size_t child_count) { - HeadState *head_state = vector_get(&self->head_states, head); + size_t child_count, TSTree *lookahead) { + LookaheadState *head_state = vector_get(&self->lookahead_states, head); TSTree *reduced = ts_parser__reduce(self, head, ts_builtin_sym_error, child_count, false, true); - reduced->size = ts_length_add(reduced->size, self->lookahead->padding); - head_state->position = - ts_length_add(head_state->position, self->lookahead->padding); - self->lookahead->padding = ts_length_zero(); + reduced->size = ts_length_add(reduced->size, lookahead->padding); + head_state->position = ts_length_add(head_state->position, lookahead->padding); + lookahead->padding = ts_length_zero(); ts_tree_set_fragile_left(reduced); ts_tree_set_fragile_right(reduced); } -static bool ts_parser__handle_error(TSParser *self, int head) { +static bool ts_parser__handle_error(TSParser *self, int head, TSTree *lookahead) { size_t error_token_count = 1; StackEntry *entry_before_error = ts_stack_head(self->stack, head); @@ -291,12 +292,12 @@ static bool ts_parser__handle_error(TSParser *self, int head) { if (action_on_error.type == TSParseActionTypeShift) { TSStateId state_after_error = action_on_error.data.to_state; TSParseAction action_after_error = ts_language_last_action( - self->language, state_after_error, self->lookahead->symbol); + self->language, state_after_error, lookahead->symbol); if (action_after_error.type != TSParseActionTypeError) { LOG("recover state:%u, count:%lu", state_after_error, error_token_count + i); - ts_parser__reduce_error(self, head, error_token_count + i); + ts_parser__reduce_error(self, head, error_token_count + i, lookahead); return true; } } @@ -309,17 +310,18 @@ static bool ts_parser__handle_error(TSParser *self, int head) { * If there is no state in the stack for which we can recover with the * current lookahead token, advance to the next token. */ - LOG("skip token:%s", SYM_NAME(self->lookahead->symbol)); - ts_parser__shift(self, head, ts_stack_top_state(self->stack, head)); - self->lookahead = self->language->lex_fn(&self->lexer, ts_lex_state_error); + LOG("skip token:%s", SYM_NAME(lookahead->symbol)); + ts_parser__shift(self, head, ts_stack_top_state(self->stack, head), + lookahead); + lookahead = self->language->lex_fn(&self->lexer, ts_lex_state_error); error_token_count++; /* * If the end of input is reached, exit. */ - if (self->lookahead->symbol == ts_builtin_sym_end) { + if (lookahead->symbol == ts_builtin_sym_end) { LOG("fail_to_recover"); - ts_parser__reduce_error(self, head, -1); + ts_parser__reduce_error(self, head, -1, lookahead); return false; } } @@ -337,15 +339,13 @@ static void ts_parser__start(TSParser *self, TSInput input, ts_lexer_reset(&self->lexer, ts_length_zero()); ts_stack_clear(self->stack); - HeadState head_state = { + LookaheadState head_state = { .position = ts_length_zero(), .reusable_subtree = previous_tree, .reusable_subtree_pos = 0, }; - vector_clear(&self->head_states); - vector_push(&self->head_states, &head_state); - - self->lookahead = NULL; + vector_clear(&self->lookahead_states); + vector_push(&self->lookahead_states, &head_state); } static TSTree *ts_parser__finish(TSParser *self) { @@ -364,11 +364,12 @@ static TSTree *ts_parser__finish(TSParser *self) { * Continue performing parse actions for the given head until the current * lookahead symbol is consumed. */ -static ConsumeResult ts_parser__consume_lookahead(TSParser *self, int head) { +static ConsumeResult ts_parser__consume_lookahead(TSParser *self, int head, + TSTree *lookahead) { for (;;) { TSStateId state = ts_stack_top_state(self->stack, head); const TSParseAction *next_action = - ts_language_actions(self->language, state, self->lookahead->symbol); + ts_language_actions(self->language, state, lookahead->symbol); /* * If there are multiple actions for the current state and lookahead symbol, @@ -390,15 +391,15 @@ static ConsumeResult ts_parser__consume_lookahead(TSParser *self, int head) { // TODO: Remove this by making a separate symbol for errors returned from // the lexer. - if (self->lookahead->symbol == ts_builtin_sym_error) + if (lookahead->symbol == ts_builtin_sym_error) action.type = TSParseActionTypeError; switch (action.type) { case TSParseActionTypeError: LOG("error_sym"); if (ts_stack_head_count(self->stack) == 1) { - if (ts_parser__handle_error(self, current_head)) - break; + if (ts_parser__handle_error(self, current_head, lookahead)) + return ConsumeResultShifted; else return ConsumeResultFinished; } else { @@ -409,11 +410,12 @@ static ConsumeResult ts_parser__consume_lookahead(TSParser *self, int head) { case TSParseActionTypeShift: LOG("shift state:%u", action.data.to_state); - return ts_parser__shift(self, current_head, action.data.to_state); + return ts_parser__shift(self, current_head, action.data.to_state, + lookahead); case TSParseActionTypeShiftExtra: LOG("shift_extra"); - return ts_parser__shift_extra(self, current_head, state); + return ts_parser__shift_extra(self, current_head, state, lookahead); case TSParseActionTypeReduce: LOG("reduce sym:%s, child_count:%u", SYM_NAME(action.data.symbol), @@ -460,16 +462,13 @@ TSParser ts_parser_make() { .stack = ts_stack_new((TreeSelectionCallback){ NULL, ts_parser__select_tree, }), - .head_states = vector_new(sizeof(HeadState), 4), + .lookahead_states = vector_new(sizeof(LookaheadState), 4), .reduce_parents = vector_new(sizeof(TSTree *), 4), - .lookahead = NULL, }; } void ts_parser_destroy(TSParser *self) { ts_stack_delete(self->stack); - if (self->lookahead) - ts_tree_release(self->lookahead); } TSDebugger ts_parser_debugger(const TSParser *self) { @@ -484,29 +483,29 @@ TSTree *ts_parser_parse(TSParser *self, TSInput input, TSTree *previous_tree) { ts_parser__start(self, input, previous_tree); for (;;) { + TSTree *lookahead = NULL; + for (int head = 0; head < ts_stack_head_count(self->stack);) { - HeadState *state = vector_get(&self->head_states, head); + LookaheadState *state = vector_get(&self->lookahead_states, head); LOG("process head:%d, head_count:%d, state:%d, pos:%lu", head, ts_stack_head_count(self->stack), ts_stack_top_state(self->stack, head), state->position.chars); TSTree *reused_lookahead = ts_parser__get_next_lookahead(self, head); - if (reused_lookahead && - ts_parser__can_reuse(self, head, reused_lookahead)) { - self->lookahead = reused_lookahead; - } else if (!(self->lookahead && - ts_parser__can_reuse(self, head, self->lookahead))) { + if (ts_parser__can_reuse(self, head, reused_lookahead)) { + lookahead = reused_lookahead; + } else if (!ts_parser__can_reuse(self, head, lookahead)) { ts_lexer_reset(&self->lexer, state->position); TSStateId parse_state = ts_stack_top_state(self->stack, head); TSStateId lex_state = self->language->lex_states[parse_state]; - self->lookahead = self->language->lex_fn(&self->lexer, lex_state); + lookahead = self->language->lex_fn(&self->lexer, lex_state); } - LOG("lookahead sym:%s, size:%lu", SYM_NAME(self->lookahead->symbol), - ts_tree_total_size(self->lookahead).chars); + LOG("lookahead sym:%s, size:%lu", SYM_NAME(lookahead->symbol), + ts_tree_total_size(lookahead).chars); - switch (ts_parser__consume_lookahead(self, head)) { + switch (ts_parser__consume_lookahead(self, head, lookahead)) { case ConsumeResultRemoved: break; case ConsumeResultShifted: @@ -516,7 +515,5 @@ TSTree *ts_parser_parse(TSParser *self, TSInput input, TSTree *previous_tree) { return ts_parser__finish(self); } } - - self->lookahead = NULL; } } diff --git a/src/runtime/parser.h b/src/runtime/parser.h index b15c06f2..6c5cfffb 100644 --- a/src/runtime/parser.h +++ b/src/runtime/parser.h @@ -11,9 +11,8 @@ extern "C" { typedef struct { TSLexer lexer; Stack *stack; - TSTree *lookahead; const TSLanguage *language; - Vector head_states; + Vector lookahead_states; Vector reduce_parents; } TSParser; From c28db44cd9b2df5a1e329c33c6932edf687ea12a Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 20 Nov 2015 13:10:11 -0800 Subject: [PATCH 12/13] :art: --- src/runtime/parser.c | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/src/runtime/parser.c b/src/runtime/parser.c index f1973302..eec1a31c 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -137,6 +137,10 @@ static void ts_parser__remove_head(TSParser *self, int head) { ts_stack_remove_head(self->stack, head); } +static TSTree *ts_parser__select_tree(void *data, TSTree *left, TSTree *right) { + return ts_tree_compare(left, right) <= 0 ? left : right; +} + /* * Parse Actions */ @@ -445,13 +449,6 @@ static ConsumeResult ts_parser__consume_lookahead(TSParser *self, int head, } } -static TSTree *ts_parser__select_tree(void *data, TSTree *left, TSTree *right) { - if (ts_tree_compare(left, right) <= 0) - return left; - else - return right; -} - /* * Public */ @@ -484,6 +481,7 @@ TSTree *ts_parser_parse(TSParser *self, TSInput input, TSTree *previous_tree) { for (;;) { TSTree *lookahead = NULL; + TSLength position = ts_length_zero(); for (int head = 0; head < ts_stack_head_count(self->stack);) { LookaheadState *state = vector_get(&self->lookahead_states, head); @@ -492,14 +490,18 @@ TSTree *ts_parser_parse(TSParser *self, TSInput input, TSTree *previous_tree) { ts_stack_head_count(self->stack), ts_stack_top_state(self->stack, head), state->position.chars); - TSTree *reused_lookahead = ts_parser__get_next_lookahead(self, head); - if (ts_parser__can_reuse(self, head, reused_lookahead)) { - lookahead = reused_lookahead; - } else if (!ts_parser__can_reuse(self, head, lookahead)) { - ts_lexer_reset(&self->lexer, state->position); - TSStateId parse_state = ts_stack_top_state(self->stack, head); - TSStateId lex_state = self->language->lex_states[parse_state]; - lookahead = self->language->lex_fn(&self->lexer, lex_state); + if (!ts_parser__can_reuse(self, head, lookahead) || + !ts_length_eq(state->position, position)) { + TSTree *reused_lookahead = ts_parser__get_next_lookahead(self, head); + if (ts_parser__can_reuse(self, head, reused_lookahead)) { + lookahead = reused_lookahead; + } else { + position = state->position; + ts_lexer_reset(&self->lexer, position); + TSStateId parse_state = ts_stack_top_state(self->stack, head); + TSStateId lex_state = self->language->lex_states[parse_state]; + lookahead = self->language->lex_fn(&self->lexer, lex_state); + } } LOG("lookahead sym:%s, size:%lu", SYM_NAME(lookahead->symbol), From ce27c2ee97238fdacdefadf56434f14fd080a094 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 20 Nov 2015 13:12:32 -0800 Subject: [PATCH 13/13] :fire: variable accidentally left in for debugging --- src/runtime/stack.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/runtime/stack.c b/src/runtime/stack.c index 83901f61..b82ac91b 100644 --- a/src/runtime/stack.c +++ b/src/runtime/stack.c @@ -233,10 +233,6 @@ int ts_stack_split(Stack *self, int head_index) { return ts_stack__add_head(self, self->heads[head_index]); } -const char *symbol_names[] = { "zero", "one", "two", "three", "four", - "five", "six", "seven", "eight", "nine", - "ten", "eleven", "twelve" }; - Vector ts_stack_pop(Stack *self, int head_index, int child_count, bool count_extra) { StackNode *previous_head = self->heads[head_index];