From 1a99bfd9ff2819f428c8c935857f56b5f9fec2c2 Mon Sep 17 00:00:00 2001 From: Amaan Qureshi Date: Thu, 26 Sep 2024 17:51:46 -0400 Subject: [PATCH] wow --- lib/src/language.h | 1 + lib/src/parser.c | 69 ++- lib/src/stack.c | 1470 +++++++++++++++++++++++--------------------- lib/src/stack.h | 11 +- lib/src/subtree.c | 49 +- lib/src/subtree.h | 1 + 6 files changed, 891 insertions(+), 710 deletions(-) diff --git a/lib/src/language.h b/lib/src/language.h index 4e2769b4..daaee9a4 100644 --- a/lib/src/language.h +++ b/lib/src/language.h @@ -272,6 +272,7 @@ static inline void ts_language_write_symbol_as_dot_string( TSSymbol symbol ) { const char *name = ts_language_symbol_name(self, symbol); + printf("name: %s\n", name); for (const char *chr = name; *chr; chr++) { switch (*chr) { case '"': diff --git a/lib/src/parser.c b/lib/src/parser.c index 5db2cf50..3d40c06e 100644 --- a/lib/src/parser.c +++ b/lib/src/parser.c @@ -206,12 +206,18 @@ static bool ts_parser__breakdown_top_of_stack( } ts_subtree_retain(child); - ts_stack_push(self->stack, slice.version, child, pending, state); + ts_stack_push(self->stack, slice.version, child, pending, state, self->language); + LOG("push 1"); + printf("push 1\n"); + LOG_STACK(); } for (uint32_t j = 1; j < slice.subtrees.size; j++) { Subtree tree = slice.subtrees.contents[j]; - ts_stack_push(self->stack, slice.version, tree, false, state); + ts_stack_push(self->stack, slice.version, tree, false, state, self->language); + LOG("push 2"); + printf("push 2\n"); + LOG_STACK(); } ts_subtree_release(&self->tree_pool, parent); @@ -913,7 +919,10 @@ static void ts_parser__shift( subtree_to_push = ts_subtree_from_mut(result); } - ts_stack_push(self->stack, version, subtree_to_push, !is_leaf, state); + ts_stack_push(self->stack, version, subtree_to_push, !is_leaf, state, self->language); + LOG("push 3"); + printf("push 3\n"); + LOG_STACK(); if (ts_subtree_has_external_tokens(subtree_to_push)) { ts_stack_set_last_external_token( self->stack, version, ts_subtree_last_external_token(subtree_to_push) @@ -1016,9 +1025,15 @@ static StackVersion ts_parser__reduce( // Push the parent node onto the stack, along with any extra tokens that // were previously on top of the stack. - ts_stack_push(self->stack, slice_version, ts_subtree_from_mut(parent), false, next_state); + ts_stack_push(self->stack, slice_version, ts_subtree_from_mut(parent), false, next_state, self->language); + LOG("push 4"); + printf("push 4\n"); + LOG_STACK(); for (uint32_t j = 0; j < self->trailing_extras.size; j++) { - ts_stack_push(self->stack, slice_version, self->trailing_extras.contents[j], false, next_state); + ts_stack_push(self->stack, slice_version, self->trailing_extras.contents[j], false, next_state, self->language); + LOG("push 5"); + printf("push 5\n"); + LOG_STACK(); } for (StackVersion j = 0; j < slice_version; j++) { @@ -1042,9 +1057,15 @@ static void ts_parser__accept( Subtree lookahead ) { assert(ts_subtree_is_eof(lookahead)); - ts_stack_push(self->stack, version, lookahead, false, 1); + ts_stack_push(self->stack, version, lookahead, false, 1, self->language); + LOG("push 6"); + printf("push 6\n"); + LOG_STACK(); - StackSliceArray pop = ts_stack_pop_all(self->stack, version); + LOG("POP ALL") + printf("POP ALL\n"); + StackSliceArray pop = ts_stack_pop_all(self->stack, version, self->dot_graph_file); + LOG_STACK(); for (uint32_t i = 0; i < pop.size; i++) { SubtreeArray trees = pop.contents[i].subtrees; @@ -1222,14 +1243,20 @@ static bool ts_parser__recover_to_state( if (slice.subtrees.size > 0) { Subtree error = ts_subtree_new_error_node(&slice.subtrees, true, self->language); - ts_stack_push(self->stack, slice.version, error, false, goal_state); + ts_stack_push(self->stack, slice.version, error, false, goal_state, self->language); + LOG("push 7"); + printf("push 7\n"); + LOG_STACK(); } else { array_delete(&slice.subtrees); } for (unsigned j = 0; j < self->trailing_extras.size; j++) { Subtree tree = self->trailing_extras.contents[j]; - ts_stack_push(self->stack, slice.version, tree, false, goal_state); + ts_stack_push(self->stack, slice.version, tree, false, goal_state, self->language); + LOG("push 8"); + printf("push 8\n"); + LOG_STACK(); } previous_version = slice.version; @@ -1339,7 +1366,10 @@ static void ts_parser__recover( LOG("recover_eof"); SubtreeArray children = array_new(); Subtree parent = ts_subtree_new_error_node(&children, false, self->language); - ts_stack_push(self->stack, version, parent, false, 1); + ts_stack_push(self->stack, version, parent, false, 1, self->language); + LOG("push 9"); + printf("push 9\n"); + LOG_STACK(); ts_parser__accept(self, version, lookahead); return; } @@ -1407,7 +1437,10 @@ static void ts_parser__recover( } // Push the new ERROR onto the stack. - ts_stack_push(self->stack, version, ts_subtree_from_mut(error_repeat), false, ERROR_STATE); + ts_stack_push(self->stack, version, ts_subtree_from_mut(error_repeat), false, ERROR_STATE, self->language); + LOG("push 10"); + printf("push 10\n"); + LOG_STACK(); if (ts_subtree_has_external_tokens(lookahead)) { ts_stack_set_last_external_token( self->stack, version, ts_subtree_last_external_token(lookahead) @@ -1469,8 +1502,11 @@ static void ts_parser__handle_error( ts_stack_push( self->stack, version_with_missing_tree, missing_tree, false, - state_after_missing_symbol + state_after_missing_symbol, self->language ); + LOG("push 11"); + printf("push 11\n"); + LOG_STACK(); if (ts_parser__do_all_potential_reductions( self, version_with_missing_tree, @@ -1488,7 +1524,10 @@ static void ts_parser__handle_error( } } - ts_stack_push(self->stack, v, NULL_SUBTREE, false, ERROR_STATE); + ts_stack_push(self->stack, v, NULL_SUBTREE, false, ERROR_STATE, self->language); + LOG("push 12"); + printf("push 12\n"); + LOG_STACK(); v = (v == version) ? previous_version_count : v + 1; } @@ -1847,7 +1886,7 @@ TSParser *ts_parser_new(void) { array_init(&self->reduce_actions); array_reserve(&self->reduce_actions, 4); self->tree_pool = ts_subtree_pool_new(32); - self->stack = ts_stack_new(&self->tree_pool); + self->stack = ts_stack_new(&self->tree_pool, self->language); self->finished_tree = NULL_SUBTREE; self->reusable_node = reusable_node_new(); self->dot_graph_file = NULL; @@ -1915,6 +1954,8 @@ bool ts_parser_set_language(TSParser *self, const TSLanguage *language) { } self->language = ts_language_copy(language); + ts_stack_set_language(self->stack, ts_language_copy(language)); + ts_stack_set_lexer(self->stack, &self->lexer); return true; } diff --git a/lib/src/stack.c b/lib/src/stack.c index 98d8c561..ac2f3bd6 100644 --- a/lib/src/stack.c +++ b/lib/src/stack.c @@ -1,9 +1,10 @@ -#include "./alloc.h" -#include "./language.h" -#include "./subtree.h" -#include "./array.h" #include "./stack.h" +#include "./alloc.h" +#include "./array.h" +#include "./language.h" #include "./length.h" +#include "./lexer.h" +#include "./subtree.h" #include #include #include @@ -18,882 +19,973 @@ #define forceinline static inline __attribute__((always_inline)) #endif +#define LOG(...) \ + if (self->lexer->logger.log || dot_graph_file) { \ + snprintf(self->lexer->debug_buffer, TREE_SITTER_SERIALIZATION_BUFFER_SIZE, __VA_ARGS__); \ + ts_stack__log(self, dot_graph_file); \ + } + +#define LOG_TREE(subtree) \ + if (dot_graph_file) { \ + printf("self->language:%p\n", (void *)self->language); \ + ts_subtree_print_dot_graph(subtree, self->language, dot_graph_file); \ + fputs("\n", dot_graph_file); \ + } + +#define LOG_LINKS() \ + for (uint32_t k = 0; k < node->link_count; k++) { \ + printf("link %d\n", k); \ + printf("node:%p\n", node->links[k].node); \ + printf("subtree:%p\n", node->links[k].subtree.ptr); \ + if (node->links[k].subtree.ptr) { \ + LOG("LINK TREE %d", k); \ + LOG_TREE(node->links[k].subtree); \ + } else { \ + LOG("LINK NO TREE %d", k); \ + } \ + } + typedef struct StackNode StackNode; typedef struct { - StackNode *node; - Subtree subtree; - bool is_pending; + StackNode *node; + Subtree subtree; + bool is_pending; } StackLink; struct StackNode { - TSStateId state; - Length position; - StackLink links[MAX_LINK_COUNT]; - short unsigned int link_count; - uint32_t ref_count; - unsigned error_cost; - unsigned node_count; - int dynamic_precedence; + TSStateId state; + Length position; + StackLink links[MAX_LINK_COUNT]; + short unsigned int link_count; + uint32_t ref_count; + unsigned error_cost; + unsigned node_count; + int dynamic_precedence; }; typedef struct { - StackNode *node; - SubtreeArray subtrees; - uint32_t subtree_count; - bool is_pending; + StackNode *node; + SubtreeArray subtrees; + uint32_t subtree_count; + bool is_pending; } StackIterator; typedef Array(StackNode *) StackNodeArray; typedef enum { - StackStatusActive, - StackStatusPaused, - StackStatusHalted, + StackStatusActive, + StackStatusPaused, + StackStatusHalted, } StackStatus; typedef struct { - StackNode *node; - StackSummary *summary; - unsigned node_count_at_last_error; - Subtree last_external_token; - Subtree lookahead_when_paused; - StackStatus status; + StackNode *node; + StackSummary *summary; + unsigned node_count_at_last_error; + Subtree last_external_token; + Subtree lookahead_when_paused; + StackStatus status; } StackHead; struct Stack { - Array(StackHead) heads; - StackSliceArray slices; - Array(StackIterator) iterators; - StackNodeArray node_pool; - StackNode *base_node; - SubtreePool *subtree_pool; + Array(StackHead) heads; + StackSliceArray slices; + Array(StackIterator) iterators; + StackNodeArray node_pool; + StackNode *base_node; + SubtreePool *subtree_pool; + const TSLanguage *language; + Lexer *lexer; }; typedef unsigned StackAction; + enum { - StackActionNone, - StackActionStop = 1, - StackActionPop = 2, + StackActionNone, + StackActionStop = 1, + StackActionPop = 2, }; typedef StackAction (*StackCallback)(void *, const StackIterator *); static void stack_node_retain(StackNode *self) { - if (!self) - return; - assert(self->ref_count > 0); - self->ref_count++; - assert(self->ref_count != 0); + if (!self) + return; + assert(self->ref_count > 0); + self->ref_count++; + assert(self->ref_count != 0); } -static void stack_node_release( - StackNode *self, - StackNodeArray *pool, - SubtreePool *subtree_pool -) { -recur: - assert(self->ref_count != 0); - self->ref_count--; - if (self->ref_count > 0) return; - - StackNode *first_predecessor = NULL; - if (self->link_count > 0) { - for (unsigned i = self->link_count - 1; i > 0; i--) { - StackLink link = self->links[i]; - if (link.subtree.ptr) ts_subtree_release(subtree_pool, link.subtree); - stack_node_release(link.node, pool, subtree_pool); +static void ts_stack__log(Stack *self, FILE *dot_graph_file) { + if (self->lexer->logger.log) { + self->lexer->logger.log(self->lexer->logger.payload, TSLogTypeParse, self->lexer->debug_buffer); } - StackLink link = self->links[0]; - if (link.subtree.ptr) ts_subtree_release(subtree_pool, link.subtree); - first_predecessor = self->links[0].node; - } - if (pool->size < MAX_NODE_POOL_SIZE) { - array_push(pool, self); - } else { - ts_free(self); - } + if (dot_graph_file) { + fprintf(dot_graph_file, "graph {\nlabel=\""); + for (char *chr = &self->lexer->debug_buffer[0]; *chr != 0; chr++) { + if (*chr == '"' || *chr == '\\') + fputc('\\', dot_graph_file); + fputc(*chr, dot_graph_file); + } + fprintf(dot_graph_file, "\"\n}\n\n"); + } +} - if (first_predecessor) { - self = first_predecessor; - goto recur; - } +static void stack_node_release(StackNode *self, StackNodeArray *pool, SubtreePool *subtree_pool) { +recur: + assert(self->ref_count != 0); + self->ref_count--; + if (self->ref_count > 0) + return; + + StackNode *first_predecessor = NULL; + if (self->link_count > 0) { + for (unsigned i = self->link_count - 1; i > 0; i--) { + StackLink link = self->links[i]; + if (link.subtree.ptr) + ts_subtree_release(subtree_pool, link.subtree); + stack_node_release(link.node, pool, subtree_pool); + } + StackLink link = self->links[0]; + if (link.subtree.ptr) + ts_subtree_release(subtree_pool, link.subtree); + first_predecessor = self->links[0].node; + } + + if (pool->size < MAX_NODE_POOL_SIZE) { + array_push(pool, self); + } else { + ts_free(self); + } + + if (first_predecessor) { + self = first_predecessor; + goto recur; + } } /// Get the number of nodes in the subtree, for the purpose of measuring /// how much progress has been made by a given version of the stack. static uint32_t stack__subtree_node_count(Subtree subtree) { - uint32_t count = ts_subtree_visible_descendant_count(subtree); - if (ts_subtree_visible(subtree)) count++; + uint32_t count = ts_subtree_visible_descendant_count(subtree); + if (ts_subtree_visible(subtree)) + count++; - // Count intermediate error nodes even though they are not visible, - // because a stack version's node count is used to check whether it - // has made any progress since the last time it encountered an error. - if (ts_subtree_symbol(subtree) == ts_builtin_sym_error_repeat) count++; + // Count intermediate error nodes even though they are not visible, + // because a stack version's node count is used to check whether it + // has made any progress since the last time it encountered an error. + if (ts_subtree_symbol(subtree) == ts_builtin_sym_error_repeat) + count++; - return count; + return count; } -static StackNode *stack_node_new( - StackNode *previous_node, - Subtree subtree, - bool is_pending, - TSStateId state, - StackNodeArray *pool -) { - StackNode *node = pool->size > 0 - ? array_pop(pool) - : ts_malloc(sizeof(StackNode)); - *node = (StackNode) { - .ref_count = 1, - .link_count = 0, - .state = state - }; +static StackNode *stack_node_new(StackNode *previous_node, Subtree subtree, bool is_pending, TSStateId state, + StackNodeArray *pool) { + StackNode *node = pool->size > 0 ? array_pop(pool) : ts_malloc(sizeof(StackNode)); + *node = (StackNode){.ref_count = 1, .link_count = 0, .state = state}; - if (previous_node) { - node->link_count = 1; - node->links[0] = (StackLink) { - .node = previous_node, - .subtree = subtree, - .is_pending = is_pending, - }; + if (previous_node) { + node->link_count = 1; + node->links[0] = (StackLink){ + .node = previous_node, + .subtree = subtree, + .is_pending = is_pending, + }; - node->position = previous_node->position; - node->error_cost = previous_node->error_cost; - node->dynamic_precedence = previous_node->dynamic_precedence; - node->node_count = previous_node->node_count; + node->position = previous_node->position; + node->error_cost = previous_node->error_cost; + node->dynamic_precedence = previous_node->dynamic_precedence; + node->node_count = previous_node->node_count; - if (subtree.ptr) { - node->error_cost += ts_subtree_error_cost(subtree); - node->position = length_add(node->position, ts_subtree_total_size(subtree)); - node->node_count += stack__subtree_node_count(subtree); - node->dynamic_precedence += ts_subtree_dynamic_precedence(subtree); + if (subtree.ptr) { + node->error_cost += ts_subtree_error_cost(subtree); + node->position = length_add(node->position, ts_subtree_total_size(subtree)); + node->node_count += stack__subtree_node_count(subtree); + node->dynamic_precedence += ts_subtree_dynamic_precedence(subtree); + } + } else { + node->position = length_zero(); + // node->error_cost = 0; } - } else { - node->position = length_zero(); - node->error_cost = 0; - } - return node; + return node; } static bool stack__subtree_is_equivalent(Subtree left, Subtree right) { - if (left.ptr == right.ptr) return true; - if (!left.ptr || !right.ptr) return false; + if (left.ptr == right.ptr) + return true; + if (!left.ptr || !right.ptr) + return false; - // Symbols must match - if (ts_subtree_symbol(left) != ts_subtree_symbol(right)) return false; + // Symbols must match + if (ts_subtree_symbol(left) != ts_subtree_symbol(right)) + return false; - // If both have errors, don't bother keeping both. - if (ts_subtree_error_cost(left) > 0 && ts_subtree_error_cost(right) > 0) return true; + // If both have errors, don't bother keeping both. + if (ts_subtree_error_cost(left) > 0 && ts_subtree_error_cost(right) > 0) + return true; - return ( - ts_subtree_padding(left).bytes == ts_subtree_padding(right).bytes && - ts_subtree_size(left).bytes == ts_subtree_size(right).bytes && - ts_subtree_child_count(left) == ts_subtree_child_count(right) && - ts_subtree_extra(left) == ts_subtree_extra(right) && - ts_subtree_external_scanner_state_eq(left, right) - ); + return (ts_subtree_padding(left).bytes == ts_subtree_padding(right).bytes && + ts_subtree_size(left).bytes == ts_subtree_size(right).bytes && + ts_subtree_child_count(left) == ts_subtree_child_count(right) && + ts_subtree_extra(left) == ts_subtree_extra(right) && ts_subtree_external_scanner_state_eq(left, right)); } -static void stack_node_add_link( - StackNode *self, - StackLink link, - SubtreePool *subtree_pool -) { - if (link.node == self) return; - - for (int i = 0; i < self->link_count; i++) { - StackLink *existing_link = &self->links[i]; - if (stack__subtree_is_equivalent(existing_link->subtree, link.subtree)) { - // In general, we preserve ambiguities until they are removed from the stack - // during a pop operation where multiple paths lead to the same node. But in - // the special case where two links directly connect the same pair of nodes, - // we can safely remove the ambiguity ahead of time without changing behavior. - if (existing_link->node == link.node) { - if ( - ts_subtree_dynamic_precedence(link.subtree) > - ts_subtree_dynamic_precedence(existing_link->subtree) - ) { - ts_subtree_retain(link.subtree); - ts_subtree_release(subtree_pool, existing_link->subtree); - existing_link->subtree = link.subtree; - self->dynamic_precedence = - link.node->dynamic_precedence + ts_subtree_dynamic_precedence(link.subtree); - } +static void stack_node_add_link(StackNode *self, StackLink link, SubtreePool *subtree_pool) { + if (link.node == self) return; - } - // If the previous nodes are mergeable, merge them recursively. - if ( - existing_link->node->state == link.node->state && - existing_link->node->position.bytes == link.node->position.bytes && - existing_link->node->error_cost == link.node->error_cost - ) { - for (int j = 0; j < link.node->link_count; j++) { - stack_node_add_link(existing_link->node, link.node->links[j], subtree_pool); - } - int32_t dynamic_precedence = link.node->dynamic_precedence; - if (link.subtree.ptr) { - dynamic_precedence += ts_subtree_dynamic_precedence(link.subtree); - } - if (dynamic_precedence > self->dynamic_precedence) { - self->dynamic_precedence = dynamic_precedence; - } - return; - } - } - } - - if (self->link_count == MAX_LINK_COUNT) return; - - stack_node_retain(link.node); - unsigned node_count = link.node->node_count; - int dynamic_precedence = link.node->dynamic_precedence; - self->links[self->link_count++] = link; - - if (link.subtree.ptr) { - ts_subtree_retain(link.subtree); - node_count += stack__subtree_node_count(link.subtree); - dynamic_precedence += ts_subtree_dynamic_precedence(link.subtree); - } - - if (node_count > self->node_count) self->node_count = node_count; - if (dynamic_precedence > self->dynamic_precedence) self->dynamic_precedence = dynamic_precedence; -} - -static void stack_head_delete( - StackHead *self, - StackNodeArray *pool, - SubtreePool *subtree_pool -) { - if (self->node) { - if (self->last_external_token.ptr) { - ts_subtree_release(subtree_pool, self->last_external_token); - } - if (self->lookahead_when_paused.ptr) { - ts_subtree_release(subtree_pool, self->lookahead_when_paused); - } - if (self->summary) { - array_delete(self->summary); - ts_free(self->summary); - } - stack_node_release(self->node, pool, subtree_pool); - } -} - -static StackVersion ts_stack__add_version( - Stack *self, - StackVersion original_version, - StackNode *node -) { - StackHead head = { - .node = node, - .node_count_at_last_error = self->heads.contents[original_version].node_count_at_last_error, - .last_external_token = self->heads.contents[original_version].last_external_token, - .status = StackStatusActive, - .lookahead_when_paused = NULL_SUBTREE, - }; - array_push(&self->heads, head); - stack_node_retain(node); - if (head.last_external_token.ptr) ts_subtree_retain(head.last_external_token); - return (StackVersion)(self->heads.size - 1); -} - -static void ts_stack__add_slice( - Stack *self, - StackVersion original_version, - StackNode *node, - SubtreeArray *subtrees -) { - for (uint32_t i = self->slices.size - 1; i + 1 > 0; i--) { - StackVersion version = self->slices.contents[i].version; - if (self->heads.contents[version].node == node) { - StackSlice slice = {*subtrees, version}; - array_insert(&self->slices, i + 1, slice); - return; - } - } - - StackVersion version = ts_stack__add_version(self, original_version, node); - StackSlice slice = { *subtrees, version }; - array_push(&self->slices, slice); -} - -static StackSliceArray stack__iter( - Stack *self, - StackVersion version, - StackCallback callback, - void *payload, - int goal_subtree_count -) { - array_clear(&self->slices); - array_clear(&self->iterators); - - StackHead *head = array_get(&self->heads, version); - StackIterator new_iterator = { - .node = head->node, - .subtrees = array_new(), - .subtree_count = 0, - .is_pending = true, - }; - - bool include_subtrees = false; - if (goal_subtree_count >= 0) { - include_subtrees = true; - array_reserve(&new_iterator.subtrees, (uint32_t)ts_subtree_alloc_size(goal_subtree_count) / sizeof(Subtree)); - } - - array_push(&self->iterators, new_iterator); - - while (self->iterators.size > 0) { - for (uint32_t i = 0, size = self->iterators.size; i < size; i++) { - StackIterator *iterator = &self->iterators.contents[i]; - StackNode *node = iterator->node; - - StackAction action = callback(payload, iterator); - bool should_pop = action & StackActionPop; - bool should_stop = action & StackActionStop || node->link_count == 0; - - if (should_pop) { - SubtreeArray subtrees = iterator->subtrees; - if (!should_stop) { - ts_subtree_array_copy(subtrees, &subtrees); - } - ts_subtree_array_reverse(&subtrees); - ts_stack__add_slice( - self, - version, - node, - &subtrees - ); - } - - if (should_stop) { - if (!should_pop) { - ts_subtree_array_delete(self->subtree_pool, &iterator->subtrees); - } - array_erase(&self->iterators, i); - i--, size--; - continue; - } - - for (uint32_t j = 1; j <= node->link_count; j++) { - StackIterator *next_iterator; - StackLink link; - if (j == node->link_count) { - link = node->links[0]; - next_iterator = &self->iterators.contents[i]; - } else { - if (self->iterators.size >= MAX_ITERATOR_COUNT) continue; - link = node->links[j]; - StackIterator current_iterator = self->iterators.contents[i]; - array_push(&self->iterators, current_iterator); - next_iterator = array_back(&self->iterators); - ts_subtree_array_copy(next_iterator->subtrees, &next_iterator->subtrees); - } - - next_iterator->node = link.node; - if (link.subtree.ptr) { - if (include_subtrees) { - array_push(&next_iterator->subtrees, link.subtree); - ts_subtree_retain(link.subtree); - } - - if (!ts_subtree_extra(link.subtree)) { - next_iterator->subtree_count++; - if (!link.is_pending) { - next_iterator->is_pending = false; + for (int i = 0; i < self->link_count; i++) { + StackLink *existing_link = &self->links[i]; + if (stack__subtree_is_equivalent(existing_link->subtree, link.subtree)) { + // In general, we preserve ambiguities until they are removed from the stack + // during a pop operation where multiple paths lead to the same node. But in + // the special case where two links directly connect the same pair of nodes, + // we can safely remove the ambiguity ahead of time without changing behavior. + if (existing_link->node == link.node) { + if (ts_subtree_dynamic_precedence(link.subtree) > + ts_subtree_dynamic_precedence(existing_link->subtree)) { + ts_subtree_retain(link.subtree); + ts_subtree_release(subtree_pool, existing_link->subtree); + existing_link->subtree = link.subtree; + self->dynamic_precedence = + link.node->dynamic_precedence + ts_subtree_dynamic_precedence(link.subtree); + } + return; } - } - } else { - next_iterator->subtree_count++; - next_iterator->is_pending = false; - } - } - } - } - return self->slices; + // If the previous nodes are mergeable, merge them recursively. + if (existing_link->node->state == link.node->state && + existing_link->node->position.bytes == link.node->position.bytes && + existing_link->node->error_cost == link.node->error_cost) { + for (int j = 0; j < link.node->link_count; j++) { + stack_node_add_link(existing_link->node, link.node->links[j], subtree_pool); + } + int32_t dynamic_precedence = link.node->dynamic_precedence; + if (link.subtree.ptr) { + dynamic_precedence += ts_subtree_dynamic_precedence(link.subtree); + } + if (dynamic_precedence > self->dynamic_precedence) { + self->dynamic_precedence = dynamic_precedence; + } + return; + } + } + } + + if (self->link_count == MAX_LINK_COUNT) + return; + + stack_node_retain(link.node); + unsigned node_count = link.node->node_count; + int dynamic_precedence = link.node->dynamic_precedence; + self->links[self->link_count++] = link; + + if (link.subtree.ptr) { + ts_subtree_retain(link.subtree); + node_count += stack__subtree_node_count(link.subtree); + dynamic_precedence += ts_subtree_dynamic_precedence(link.subtree); + } + + if (node_count > self->node_count) + self->node_count = node_count; + if (dynamic_precedence > self->dynamic_precedence) + self->dynamic_precedence = dynamic_precedence; } -Stack *ts_stack_new(SubtreePool *subtree_pool) { - Stack *self = ts_calloc(1, sizeof(Stack)); +static void stack_head_delete(StackHead *self, StackNodeArray *pool, SubtreePool *subtree_pool) { + if (self->node) { + if (self->last_external_token.ptr) { + ts_subtree_release(subtree_pool, self->last_external_token); + } + if (self->lookahead_when_paused.ptr) { + ts_subtree_release(subtree_pool, self->lookahead_when_paused); + } + if (self->summary) { + array_delete(self->summary); + ts_free(self->summary); + } + stack_node_release(self->node, pool, subtree_pool); + } +} - array_init(&self->heads); - array_init(&self->slices); - array_init(&self->iterators); - array_init(&self->node_pool); - array_reserve(&self->heads, 4); - array_reserve(&self->slices, 4); - array_reserve(&self->iterators, 4); - array_reserve(&self->node_pool, MAX_NODE_POOL_SIZE); +static StackVersion ts_stack__add_version(Stack *self, StackVersion original_version, StackNode *node) { + StackHead head = { + .node = node, + .node_count_at_last_error = self->heads.contents[original_version].node_count_at_last_error, + .last_external_token = self->heads.contents[original_version].last_external_token, + .status = StackStatusActive, + .lookahead_when_paused = NULL_SUBTREE, + }; + array_push(&self->heads, head); + stack_node_retain(node); + if (head.last_external_token.ptr) + ts_subtree_retain(head.last_external_token); + return (StackVersion)(self->heads.size - 1); +} - self->subtree_pool = subtree_pool; - self->base_node = stack_node_new(NULL, NULL_SUBTREE, false, 1, &self->node_pool); - ts_stack_clear(self); +static void ts_stack__add_slice(Stack *self, StackVersion original_version, StackNode *node, SubtreeArray *subtrees) { + for (uint32_t i = self->slices.size - 1; i + 1 > 0; i--) { + StackVersion version = self->slices.contents[i].version; + if (self->heads.contents[version].node == node) { + StackSlice slice = {*subtrees, version}; + array_insert(&self->slices, i + 1, slice); + return; + } + } - return self; + StackVersion version = ts_stack__add_version(self, original_version, node); + StackSlice slice = {*subtrees, version}; + array_push(&self->slices, slice); +} + +static StackSliceArray stack__iter(Stack *self, StackVersion version, StackCallback callback, void *payload, + int goal_subtree_count, FILE *dot_graph_file) { + array_clear(&self->slices); + array_clear(&self->iterators); + + StackHead *head = array_get(&self->heads, version); + StackIterator new_iterator = { + .node = head->node, + .subtrees = array_new(), + .subtree_count = 0, + .is_pending = true, + }; + + bool include_subtrees = false; + if (goal_subtree_count >= 0) { + include_subtrees = true; + array_reserve(&new_iterator.subtrees, + (uint32_t)ts_subtree_alloc_size(goal_subtree_count ? 1 : goal_subtree_count) / sizeof(Subtree)); + } + + printf("iterator len: %d\n", self->iterators.size); + array_push(&self->iterators, new_iterator); + printf("iterator len: %d\n", self->iterators.size); + + while (self->iterators.size > 0) { + for (uint32_t i = 0, size = self->iterators.size; i < size; i++) { + StackIterator *iterator = &self->iterators.contents[i]; + StackNode *node = iterator->node; + + StackAction action = callback(payload, iterator); + bool should_pop = action & StackActionPop; + bool should_stop = action & StackActionStop || node->link_count == 0; + printf("should_pop: %d\n", should_pop); + printf("should_stop: %d\n", should_stop); + + if (should_pop) { + SubtreeArray subtrees = iterator->subtrees; + for (uint32_t j = 0; j < subtrees.size; j++) { + LOG("TREE %d", j); + LOG_TREE(subtrees.contents[j]); + } + if (!should_stop) { + ts_subtree_array_copy(subtrees, &subtrees); + LOG("[0] PRE COPY LEN %d", subtrees.size); + for (uint32_t k = 0; k < subtrees.size; k++) { + LOG("[0] PRE COPY TREE %d", k); + LOG_TREE(subtrees.contents[k]); + } + LOG("[0] NEW PTR: %p\n", (void *)subtrees.contents) + LOG("[0] POST COPY LEN %d", subtrees.size); + for (uint32_t k = 0; k < subtrees.size; k++) { + LOG("[0] POST COPY TREE %d", k); + LOG_TREE(subtrees.contents[k]); + } + } + for (uint32_t k = 0; k < subtrees.size; k++) { + LOG("[0] PRE REVERSE TREE %d", k); + LOG_TREE(subtrees.contents[k]); + } + ts_subtree_array_reverse(&subtrees); + for (uint32_t k = 0; k < subtrees.size; k++) { + LOG("[0] POST REVERSE TREE %d", k); + LOG_TREE(subtrees.contents[k]); + } + ts_stack__add_slice(self, version, node, &subtrees); + printf("ADD SLICE\n"); + LOG("ADD SLICE\n"); + for (uint32_t j = 0; j < subtrees.size; j++) { + LOG("ADD SLICE TREE %d", j); + LOG_TREE(subtrees.contents[j]); + } + } + + if (should_stop) { + if (!should_pop) { + ts_subtree_array_delete(self->subtree_pool, &iterator->subtrees); + } + array_erase(&self->iterators, i); + i--, size--; + continue; + } + + for (uint32_t j = 1; j <= node->link_count; j++) { + StackIterator *next_iterator; + StackLink link; + if (j == node->link_count) { + printf("link set 1\n"); + link = node->links[0]; + LOG("# of links=%d", node->link_count); + for (uint32_t k = 0; k < node->link_count; k++) { + printf("link %d\n", k); + printf("node:%p\n", node->links[k].node); + printf("subtree:%p\n", node->links[k].subtree.ptr); + if (node->links[k].subtree.ptr) { + LOG("LINK TREE %d", k); + LOG_TREE(node->links[k].subtree); + } else { + LOG("LINK NO TREE %d", k); + } + } + next_iterator = &self->iterators.contents[i]; + } else { + if (self->iterators.size >= MAX_ITERATOR_COUNT) + continue; + link = node->links[j]; + StackIterator current_iterator = self->iterators.contents[i]; + array_push(&self->iterators, current_iterator); + next_iterator = array_back(&self->iterators); + ts_subtree_array_copy(next_iterator->subtrees, &next_iterator->subtrees); + LOG("[1] PRE COPY LEN %d", next_iterator->subtrees.size); + for (uint32_t k = 0; k < next_iterator->subtrees.size; k++) { + LOG("[1] PRE COPY TREE %d", k); + LOG_TREE(next_iterator->subtrees.contents[k]); + } + LOG("[1] NEW PTR: %p\n", (void *)next_iterator->subtrees.contents) + LOG("[1] POST COPY LEN %d", next_iterator->subtrees.size); + for (uint32_t k = 0; k < next_iterator->subtrees.size; k++) { + LOG("[1] POST COPY TREE %d", k); + LOG_TREE(next_iterator->subtrees.contents[k]); + } + } + + next_iterator->node = link.node; + if (link.subtree.ptr) { + if (include_subtrees) { + printf("PUSH INCLUDE SUBTREE\n"); + LOG("PUSH INCLUDING SUBTREE"); + LOG_TREE(link.subtree); + array_push(&next_iterator->subtrees, link.subtree); + + ts_subtree_retain(link.subtree); + } + + if (!ts_subtree_extra(link.subtree)) { + next_iterator->subtree_count++; + if (!link.is_pending) { + next_iterator->is_pending = false; + } + } + } else { + next_iterator->subtree_count++; + next_iterator->is_pending = false; + } + } + } + } + + return self->slices; +} + +Stack *ts_stack_new(SubtreePool *subtree_pool, const TSLanguage *language) { + Stack *self = ts_calloc(1, sizeof(Stack)); + + array_init(&self->heads); + array_init(&self->slices); + array_init(&self->iterators); + array_init(&self->node_pool); + array_reserve(&self->heads, 4); + array_reserve(&self->slices, 4); + array_reserve(&self->iterators, 4); + array_reserve(&self->node_pool, MAX_NODE_POOL_SIZE); + + self->subtree_pool = subtree_pool; + self->base_node = stack_node_new(NULL, NULL_SUBTREE, false, 1, &self->node_pool); + self->language = language; + printf("language:%p\n", language); + ts_stack_clear(self); + + return self; } void ts_stack_delete(Stack *self) { - if (self->slices.contents) - array_delete(&self->slices); - if (self->iterators.contents) - array_delete(&self->iterators); - stack_node_release(self->base_node, &self->node_pool, self->subtree_pool); - for (uint32_t i = 0; i < self->heads.size; i++) { - stack_head_delete(&self->heads.contents[i], &self->node_pool, self->subtree_pool); - } - array_clear(&self->heads); - if (self->node_pool.contents) { - for (uint32_t i = 0; i < self->node_pool.size; i++) - ts_free(self->node_pool.contents[i]); - array_delete(&self->node_pool); - } - array_delete(&self->heads); - ts_free(self); + if (self->slices.contents) + array_delete(&self->slices); + if (self->iterators.contents) + array_delete(&self->iterators); + stack_node_release(self->base_node, &self->node_pool, self->subtree_pool); + for (uint32_t i = 0; i < self->heads.size; i++) { + stack_head_delete(&self->heads.contents[i], &self->node_pool, self->subtree_pool); + } + array_clear(&self->heads); + if (self->node_pool.contents) { + for (uint32_t i = 0; i < self->node_pool.size; i++) + ts_free(self->node_pool.contents[i]); + array_delete(&self->node_pool); + } + array_delete(&self->heads); + ts_free(self); } -uint32_t ts_stack_version_count(const Stack *self) { - return self->heads.size; -} +uint32_t ts_stack_version_count(const Stack *self) { return self->heads.size; } TSStateId ts_stack_state(const Stack *self, StackVersion version) { - return array_get(&self->heads, version)->node->state; + return array_get(&self->heads, version)->node->state; } Length ts_stack_position(const Stack *self, StackVersion version) { - return array_get(&self->heads, version)->node->position; + return array_get(&self->heads, version)->node->position; } Subtree ts_stack_last_external_token(const Stack *self, StackVersion version) { - return array_get(&self->heads, version)->last_external_token; + return array_get(&self->heads, version)->last_external_token; } void ts_stack_set_last_external_token(Stack *self, StackVersion version, Subtree token) { - StackHead *head = array_get(&self->heads, version); - if (token.ptr) ts_subtree_retain(token); - if (head->last_external_token.ptr) ts_subtree_release(self->subtree_pool, head->last_external_token); - head->last_external_token = token; + StackHead *head = array_get(&self->heads, version); + if (token.ptr) + ts_subtree_retain(token); + if (head->last_external_token.ptr) + ts_subtree_release(self->subtree_pool, head->last_external_token); + head->last_external_token = token; } +void ts_stack_set_language(Stack *self, const TSLanguage *language) { self->language = language; } + +void ts_stack_set_lexer(Stack *self, Lexer *lexer) { self->lexer = lexer; } + unsigned ts_stack_error_cost(const Stack *self, StackVersion version) { - StackHead *head = array_get(&self->heads, version); - unsigned result = head->node->error_cost; - if ( - head->status == StackStatusPaused || - (head->node->state == ERROR_STATE && !head->node->links[0].subtree.ptr)) { - result += ERROR_COST_PER_RECOVERY; - } - return result; + StackHead *head = array_get(&self->heads, version); + unsigned result = head->node->error_cost; + if (head->status == StackStatusPaused || (head->node->state == ERROR_STATE && !head->node->links[0].subtree.ptr)) { + if (head->node->links[0].subtree.data.is_inline) { + printf("DID += FOR %s\n", + ts_language_symbol_name(self->language, ts_subtree_symbol(head->node->links[0].subtree))); + } + result += ERROR_COST_PER_RECOVERY; + } + return result; } unsigned ts_stack_node_count_since_error(const Stack *self, StackVersion version) { - StackHead *head = array_get(&self->heads, version); - if (head->node->node_count < head->node_count_at_last_error) { - head->node_count_at_last_error = head->node->node_count; - } - return head->node->node_count - head->node_count_at_last_error; + StackHead *head = array_get(&self->heads, version); + if (head->node->node_count < head->node_count_at_last_error) { + head->node_count_at_last_error = head->node->node_count; + } + return head->node->node_count - head->node_count_at_last_error; } -void ts_stack_push( - Stack *self, - StackVersion version, - Subtree subtree, - bool pending, - TSStateId state -) { - StackHead *head = array_get(&self->heads, version); - StackNode *new_node = stack_node_new(head->node, subtree, pending, state, &self->node_pool); - if (!subtree.ptr) head->node_count_at_last_error = new_node->node_count; - head->node = new_node; +void ts_stack_push(Stack *self, StackVersion version, Subtree subtree, bool pending, TSStateId state, + const TSLanguage *l) { + StackHead *head = array_get(&self->heads, version); + StackNode *new_node = stack_node_new(head->node, subtree, pending, state, &self->node_pool); + if (!subtree.ptr) + head->node_count_at_last_error = new_node->node_count; + else { + const char *s = ts_language_symbol_name(l, ts_subtree_symbol(subtree)); + printf("sym=%s\n", s); + if (strcmp(s, "ERROR") == 0) { + (void)s; + } + } + head->node = new_node; } forceinline StackAction pop_count_callback(void *payload, const StackIterator *iterator) { - unsigned *goal_subtree_count = payload; - if (iterator->subtree_count == *goal_subtree_count) { - return StackActionPop | StackActionStop; - } else { - return StackActionNone; - } + unsigned *goal_subtree_count = payload; + if (iterator->subtree_count == *goal_subtree_count) { + return StackActionPop | StackActionStop; + } else { + return StackActionNone; + } } StackSliceArray ts_stack_pop_count(Stack *self, StackVersion version, uint32_t count) { - return stack__iter(self, version, pop_count_callback, &count, (int)count); + return stack__iter(self, version, pop_count_callback, &count, (int)count, NULL); } forceinline StackAction pop_pending_callback(void *payload, const StackIterator *iterator) { - (void)payload; - if (iterator->subtree_count >= 1) { - if (iterator->is_pending) { - return StackActionPop | StackActionStop; + (void)payload; + if (iterator->subtree_count >= 1) { + if (iterator->is_pending) { + return StackActionPop | StackActionStop; + } else { + return StackActionStop; + } } else { - return StackActionStop; + return StackActionNone; } - } else { - return StackActionNone; - } } StackSliceArray ts_stack_pop_pending(Stack *self, StackVersion version) { - StackSliceArray pop = stack__iter(self, version, pop_pending_callback, NULL, 0); - if (pop.size > 0) { - ts_stack_renumber_version(self, pop.contents[0].version, version); - pop.contents[0].version = version; - } - return pop; + StackSliceArray pop = stack__iter(self, version, pop_pending_callback, NULL, 0, NULL); + if (pop.size > 0) { + ts_stack_renumber_version(self, pop.contents[0].version, version); + pop.contents[0].version = version; + } + return pop; } forceinline StackAction pop_error_callback(void *payload, const StackIterator *iterator) { - if (iterator->subtrees.size > 0) { - bool *found_error = payload; - if (!*found_error && ts_subtree_is_error(iterator->subtrees.contents[0])) { - *found_error = true; - return StackActionPop | StackActionStop; + if (iterator->subtrees.size > 0) { + bool *found_error = payload; + if (!*found_error && ts_subtree_is_error(iterator->subtrees.contents[0])) { + *found_error = true; + return StackActionPop | StackActionStop; + } else { + return StackActionStop; + } } else { - return StackActionStop; + return StackActionNone; } - } else { - return StackActionNone; - } } SubtreeArray ts_stack_pop_error(Stack *self, StackVersion version) { - StackNode *node = array_get(&self->heads, version)->node; - for (unsigned i = 0; i < node->link_count; i++) { - if (node->links[i].subtree.ptr && ts_subtree_is_error(node->links[i].subtree)) { - bool found_error = false; - StackSliceArray pop = stack__iter(self, version, pop_error_callback, &found_error, 1); - if (pop.size > 0) { - assert(pop.size == 1); - ts_stack_renumber_version(self, pop.contents[0].version, version); - return pop.contents[0].subtrees; - } - break; + StackNode *node = array_get(&self->heads, version)->node; + for (unsigned i = 0; i < node->link_count; i++) { + if (node->links[i].subtree.ptr && ts_subtree_is_error(node->links[i].subtree)) { + bool found_error = false; + StackSliceArray pop = stack__iter(self, version, pop_error_callback, &found_error, 1, NULL); + if (pop.size > 0) { + assert(pop.size == 1); + ts_stack_renumber_version(self, pop.contents[0].version, version); + return pop.contents[0].subtrees; + } + break; + } } - } - return (SubtreeArray) {.size = 0}; + return (SubtreeArray){.size = 0}; } forceinline StackAction pop_all_callback(void *payload, const StackIterator *iterator) { - (void)payload; - return iterator->node->link_count == 0 ? StackActionPop : StackActionNone; + (void)payload; + return iterator->node->link_count == 0 ? StackActionPop : StackActionNone; } -StackSliceArray ts_stack_pop_all(Stack *self, StackVersion version) { - return stack__iter(self, version, pop_all_callback, NULL, 0); +StackSliceArray ts_stack_pop_all(Stack *self, StackVersion version, FILE *dot_graph_file) { + return stack__iter(self, version, pop_all_callback, NULL, 0, dot_graph_file); } typedef struct { - StackSummary *summary; - unsigned max_depth; + StackSummary *summary; + unsigned max_depth; } SummarizeStackSession; forceinline StackAction summarize_stack_callback(void *payload, const StackIterator *iterator) { - SummarizeStackSession *session = payload; - TSStateId state = iterator->node->state; - unsigned depth = iterator->subtree_count; - if (depth > session->max_depth) return StackActionStop; - for (unsigned i = session->summary->size - 1; i + 1 > 0; i--) { - StackSummaryEntry entry = session->summary->contents[i]; - if (entry.depth < depth) break; - if (entry.depth == depth && entry.state == state) return StackActionNone; - } - array_push(session->summary, ((StackSummaryEntry) { - .position = iterator->node->position, - .depth = depth, - .state = state, - })); - return StackActionNone; + SummarizeStackSession *session = payload; + TSStateId state = iterator->node->state; + unsigned depth = iterator->subtree_count; + if (depth > session->max_depth) + return StackActionStop; + for (unsigned i = session->summary->size - 1; i + 1 > 0; i--) { + StackSummaryEntry entry = session->summary->contents[i]; + if (entry.depth < depth) + break; + if (entry.depth == depth && entry.state == state) + return StackActionNone; + } + array_push(session->summary, ((StackSummaryEntry){ + .position = iterator->node->position, + .depth = depth, + .state = state, + })); + return StackActionNone; } void ts_stack_record_summary(Stack *self, StackVersion version, unsigned max_depth) { - SummarizeStackSession session = { - .summary = ts_malloc(sizeof(StackSummary)), - .max_depth = max_depth - }; - array_init(session.summary); - stack__iter(self, version, summarize_stack_callback, &session, -1); - StackHead *head = &self->heads.contents[version]; - if (head->summary) { - array_delete(head->summary); - ts_free(head->summary); - } - head->summary = session.summary; + SummarizeStackSession session = {.summary = ts_malloc(sizeof(StackSummary)), .max_depth = max_depth}; + array_init(session.summary); + stack__iter(self, version, summarize_stack_callback, &session, -1, NULL); + StackHead *head = &self->heads.contents[version]; + if (head->summary) { + array_delete(head->summary); + ts_free(head->summary); + } + head->summary = session.summary; } StackSummary *ts_stack_get_summary(Stack *self, StackVersion version) { - return array_get(&self->heads, version)->summary; + return array_get(&self->heads, version)->summary; } int ts_stack_dynamic_precedence(Stack *self, StackVersion version) { - return array_get(&self->heads, version)->node->dynamic_precedence; + return array_get(&self->heads, version)->node->dynamic_precedence; } bool ts_stack_has_advanced_since_error(const Stack *self, StackVersion version) { - const StackHead *head = array_get(&self->heads, version); - const StackNode *node = head->node; - if (node->error_cost == 0) return true; - while (node) { - if (node->link_count > 0) { - Subtree subtree = node->links[0].subtree; - if (subtree.ptr) { - if (ts_subtree_total_bytes(subtree) > 0) { - return true; - } else if ( - node->node_count > head->node_count_at_last_error && - ts_subtree_error_cost(subtree) == 0 - ) { - node = node->links[0].node; - continue; + const StackHead *head = array_get(&self->heads, version); + const StackNode *node = head->node; + if (node->error_cost == 0) + return true; + while (node) { + if (node->link_count > 0) { + Subtree subtree = node->links[0].subtree; + if (subtree.ptr) { + if (ts_subtree_total_bytes(subtree) > 0) { + return true; + } else if (node->node_count > head->node_count_at_last_error && ts_subtree_error_cost(subtree) == 0) { + node = node->links[0].node; + continue; + } + } } - } + break; } - break; - } - return false; + return false; } void ts_stack_remove_version(Stack *self, StackVersion version) { - stack_head_delete(array_get(&self->heads, version), &self->node_pool, self->subtree_pool); - array_erase(&self->heads, version); + stack_head_delete(array_get(&self->heads, version), &self->node_pool, self->subtree_pool); + array_erase(&self->heads, version); } void ts_stack_renumber_version(Stack *self, StackVersion v1, StackVersion v2) { - if (v1 == v2) return; - assert(v2 < v1); - assert((uint32_t)v1 < self->heads.size); - StackHead *source_head = &self->heads.contents[v1]; - StackHead *target_head = &self->heads.contents[v2]; - if (target_head->summary && !source_head->summary) { - source_head->summary = target_head->summary; - target_head->summary = NULL; - } - stack_head_delete(target_head, &self->node_pool, self->subtree_pool); - *target_head = *source_head; - array_erase(&self->heads, v1); + if (v1 == v2) + return; + assert(v2 < v1); + assert((uint32_t)v1 < self->heads.size); + StackHead *source_head = &self->heads.contents[v1]; + StackHead *target_head = &self->heads.contents[v2]; + if (target_head->summary && !source_head->summary) { + source_head->summary = target_head->summary; + target_head->summary = NULL; + } + stack_head_delete(target_head, &self->node_pool, self->subtree_pool); + *target_head = *source_head; + array_erase(&self->heads, v1); } void ts_stack_swap_versions(Stack *self, StackVersion v1, StackVersion v2) { - StackHead temporary_head = self->heads.contents[v1]; - self->heads.contents[v1] = self->heads.contents[v2]; - self->heads.contents[v2] = temporary_head; + StackHead temporary_head = self->heads.contents[v1]; + self->heads.contents[v1] = self->heads.contents[v2]; + self->heads.contents[v2] = temporary_head; } StackVersion ts_stack_copy_version(Stack *self, StackVersion version) { - assert(version < self->heads.size); - array_push(&self->heads, self->heads.contents[version]); - StackHead *head = array_back(&self->heads); - stack_node_retain(head->node); - if (head->last_external_token.ptr) ts_subtree_retain(head->last_external_token); - head->summary = NULL; - return self->heads.size - 1; + assert(version < self->heads.size); + array_push(&self->heads, self->heads.contents[version]); + StackHead *head = array_back(&self->heads); + stack_node_retain(head->node); + if (head->last_external_token.ptr) + ts_subtree_retain(head->last_external_token); + head->summary = NULL; + return self->heads.size - 1; } bool ts_stack_merge(Stack *self, StackVersion version1, StackVersion version2) { - if (!ts_stack_can_merge(self, version1, version2)) return false; - StackHead *head1 = &self->heads.contents[version1]; - StackHead *head2 = &self->heads.contents[version2]; - for (uint32_t i = 0; i < head2->node->link_count; i++) { - stack_node_add_link(head1->node, head2->node->links[i], self->subtree_pool); - } - if (head1->node->state == ERROR_STATE) { - head1->node_count_at_last_error = head1->node->node_count; - } - ts_stack_remove_version(self, version2); - return true; + if (!ts_stack_can_merge(self, version1, version2)) + return false; + StackHead *head1 = &self->heads.contents[version1]; + StackHead *head2 = &self->heads.contents[version2]; + for (uint32_t i = 0; i < head2->node->link_count; i++) { + stack_node_add_link(head1->node, head2->node->links[i], self->subtree_pool); + } + if (head1->node->state == ERROR_STATE) { + head1->node_count_at_last_error = head1->node->node_count; + } + ts_stack_remove_version(self, version2); + return true; } bool ts_stack_can_merge(Stack *self, StackVersion version1, StackVersion version2) { - StackHead *head1 = &self->heads.contents[version1]; - StackHead *head2 = &self->heads.contents[version2]; - return - head1->status == StackStatusActive && - head2->status == StackStatusActive && - head1->node->state == head2->node->state && - head1->node->position.bytes == head2->node->position.bytes && - head1->node->error_cost == head2->node->error_cost && - ts_subtree_external_scanner_state_eq(head1->last_external_token, head2->last_external_token); + StackHead *head1 = &self->heads.contents[version1]; + StackHead *head2 = &self->heads.contents[version2]; + return head1->status == StackStatusActive && head2->status == StackStatusActive && + head1->node->state == head2->node->state && head1->node->position.bytes == head2->node->position.bytes && + head1->node->error_cost == head2->node->error_cost && + ts_subtree_external_scanner_state_eq(head1->last_external_token, head2->last_external_token); } -void ts_stack_halt(Stack *self, StackVersion version) { - array_get(&self->heads, version)->status = StackStatusHalted; -} +void ts_stack_halt(Stack *self, StackVersion version) { array_get(&self->heads, version)->status = StackStatusHalted; } void ts_stack_pause(Stack *self, StackVersion version, Subtree lookahead) { - StackHead *head = array_get(&self->heads, version); - head->status = StackStatusPaused; - head->lookahead_when_paused = lookahead; - head->node_count_at_last_error = head->node->node_count; + StackHead *head = array_get(&self->heads, version); + head->status = StackStatusPaused; + head->lookahead_when_paused = lookahead; + head->node_count_at_last_error = head->node->node_count; } bool ts_stack_is_active(const Stack *self, StackVersion version) { - return array_get(&self->heads, version)->status == StackStatusActive; + return array_get(&self->heads, version)->status == StackStatusActive; } bool ts_stack_is_halted(const Stack *self, StackVersion version) { - return array_get(&self->heads, version)->status == StackStatusHalted; + return array_get(&self->heads, version)->status == StackStatusHalted; } bool ts_stack_is_paused(const Stack *self, StackVersion version) { - return array_get(&self->heads, version)->status == StackStatusPaused; + return array_get(&self->heads, version)->status == StackStatusPaused; } Subtree ts_stack_resume(Stack *self, StackVersion version) { - StackHead *head = array_get(&self->heads, version); - assert(head->status == StackStatusPaused); - Subtree result = head->lookahead_when_paused; - head->status = StackStatusActive; - head->lookahead_when_paused = NULL_SUBTREE; - return result; + StackHead *head = array_get(&self->heads, version); + assert(head->status == StackStatusPaused); + Subtree result = head->lookahead_when_paused; + head->status = StackStatusActive; + head->lookahead_when_paused = NULL_SUBTREE; + return result; } void ts_stack_clear(Stack *self) { - stack_node_retain(self->base_node); - for (uint32_t i = 0; i < self->heads.size; i++) { - stack_head_delete(&self->heads.contents[i], &self->node_pool, self->subtree_pool); - } - array_clear(&self->heads); - array_push(&self->heads, ((StackHead) { - .node = self->base_node, - .status = StackStatusActive, - .last_external_token = NULL_SUBTREE, - .lookahead_when_paused = NULL_SUBTREE, - })); + stack_node_retain(self->base_node); + for (uint32_t i = 0; i < self->heads.size; i++) { + stack_head_delete(&self->heads.contents[i], &self->node_pool, self->subtree_pool); + } + array_clear(&self->heads); + array_push(&self->heads, ((StackHead){ + .node = self->base_node, + .status = StackStatusActive, + .last_external_token = NULL_SUBTREE, + .lookahead_when_paused = NULL_SUBTREE, + })); } bool ts_stack_print_dot_graph(Stack *self, const TSLanguage *language, FILE *f) { - array_reserve(&self->iterators, 32); - if (!f) f = stderr; + array_reserve(&self->iterators, 32); + if (!f) + f = stderr; - fprintf(f, "digraph stack {\n"); - fprintf(f, "rankdir=\"RL\";\n"); - fprintf(f, "edge [arrowhead=none]\n"); + fprintf(f, "digraph stack {\n"); + fprintf(f, "rankdir=\"RL\";\n"); + fprintf(f, "edge [arrowhead=none]\n"); - Array(StackNode *) visited_nodes = array_new(); + Array(StackNode *) visited_nodes = array_new(); - array_clear(&self->iterators); - for (uint32_t i = 0; i < self->heads.size; i++) { - StackHead *head = &self->heads.contents[i]; - if (head->status == StackStatusHalted) continue; + array_clear(&self->iterators); + for (uint32_t i = 0; i < self->heads.size; i++) { + StackHead *head = &self->heads.contents[i]; + if (head->status == StackStatusHalted) + continue; - fprintf(f, "node_head_%u [shape=none, label=\"\"]\n", i); - fprintf(f, "node_head_%u -> node_%p [", i, (void *)head->node); + fprintf(f, "node_head_%u [shape=none, label=\"\"]\n", i); + fprintf(f, "node_head_%u -> node_%p [", i, (void *)head->node); - if (head->status == StackStatusPaused) { - fprintf(f, "color=red "); - } - fprintf(f, - "label=%u, fontcolor=blue, weight=10000, labeltooltip=\"node_count: %u\nerror_cost: %u", - i, - ts_stack_node_count_since_error(self, i), - ts_stack_error_cost(self, i) - ); - - if (head->summary) { - fprintf(f, "\nsummary:"); - for (uint32_t j = 0; j < head->summary->size; j++) fprintf(f, " %u", head->summary->contents[j].state); - } - - if (head->last_external_token.ptr) { - const ExternalScannerState *state = &head->last_external_token.ptr->external_scanner_state; - const char *data = ts_external_scanner_state_data(state); - fprintf(f, "\nexternal_scanner_state:"); - for (uint32_t j = 0; j < state->length; j++) fprintf(f, " %2X", data[j]); - } - - fprintf(f, "\"]\n"); - array_push(&self->iterators, ((StackIterator) { - .node = head->node - })); - } - - bool all_iterators_done = false; - while (!all_iterators_done) { - all_iterators_done = true; - - for (uint32_t i = 0; i < self->iterators.size; i++) { - StackIterator iterator = self->iterators.contents[i]; - StackNode *node = iterator.node; - - for (uint32_t j = 0; j < visited_nodes.size; j++) { - if (visited_nodes.contents[j] == node) { - node = NULL; - break; + if (head->status == StackStatusPaused) { + fprintf(f, "color=red "); } - } + fprintf(f, "label=%u, fontcolor=blue, weight=10000, labeltooltip=\"node_count: %u\nerror_cost: %u", i, + ts_stack_node_count_since_error(self, i), ts_stack_error_cost(self, i)); - if (!node) continue; - all_iterators_done = false; - - fprintf(f, "node_%p [", (void *)node); - if (node->state == ERROR_STATE) { - fprintf(f, "label=\"?\""); - } else if ( - node->link_count == 1 && - node->links[0].subtree.ptr && - ts_subtree_extra(node->links[0].subtree) - ) { - fprintf(f, "shape=point margin=0 label=\"\""); - } else { - fprintf(f, "label=\"%d\"", node->state); - } - - fprintf( - f, - " tooltip=\"position: %u,%u\nnode_count:%u\nerror_cost: %u\ndynamic_precedence: %d\"];\n", - node->position.extent.row + 1, - node->position.extent.column, - node->node_count, - node->error_cost, - node->dynamic_precedence - ); - - for (int j = 0; j < node->link_count; j++) { - StackLink link = node->links[j]; - fprintf(f, "node_%p -> node_%p [", (void *)node, (void *)link.node); - if (link.is_pending) fprintf(f, "style=dashed "); - if (link.subtree.ptr && ts_subtree_extra(link.subtree)) fprintf(f, "fontcolor=gray "); - - if (!link.subtree.ptr) { - fprintf(f, "color=red"); - } else { - fprintf(f, "label=\""); - bool quoted = ts_subtree_visible(link.subtree) && !ts_subtree_named(link.subtree); - if (quoted) fprintf(f, "'"); - ts_language_write_symbol_as_dot_string(language, f, ts_subtree_symbol(link.subtree)); - if (quoted) fprintf(f, "'"); - fprintf(f, "\""); - fprintf( - f, - "labeltooltip=\"error_cost: %u\ndynamic_precedence: %" PRId32 "\"", - ts_subtree_error_cost(link.subtree), - ts_subtree_dynamic_precedence(link.subtree) - ); + if (head->summary) { + fprintf(f, "\nsummary:"); + for (uint32_t j = 0; j < head->summary->size; j++) + fprintf(f, " %u", head->summary->contents[j].state); } - fprintf(f, "];\n"); - - StackIterator *next_iterator; - if (j == 0) { - next_iterator = &self->iterators.contents[i]; - } else { - array_push(&self->iterators, iterator); - next_iterator = array_back(&self->iterators); + if (head->last_external_token.ptr) { + const ExternalScannerState *state = &head->last_external_token.ptr->external_scanner_state; + const char *data = ts_external_scanner_state_data(state); + fprintf(f, "\nexternal_scanner_state:"); + for (uint32_t j = 0; j < state->length; j++) + fprintf(f, " %2X", data[j]); } - next_iterator->node = link.node; - } - array_push(&visited_nodes, node); + fprintf(f, "\"]\n"); + array_push(&self->iterators, ((StackIterator){.node = head->node})); } - } - fprintf(f, "}\n"); + bool all_iterators_done = false; + while (!all_iterators_done) { + all_iterators_done = true; - array_delete(&visited_nodes); - return true; + for (uint32_t i = 0; i < self->iterators.size; i++) { + StackIterator iterator = self->iterators.contents[i]; + StackNode *node = iterator.node; + + for (uint32_t j = 0; j < visited_nodes.size; j++) { + if (visited_nodes.contents[j] == node) { + node = NULL; + break; + } + } + + if (!node) + continue; + all_iterators_done = false; + + fprintf(f, "node_%p [", (void *)node); + if (node->state == ERROR_STATE) { + fprintf(f, "label=\"?\""); + } else if (node->link_count == 1 && node->links[0].subtree.ptr && + ts_subtree_extra(node->links[0].subtree)) { + fprintf(f, "shape=point margin=0 label=\"\""); + } else { + fprintf(f, "label=\"%d\"", node->state); + } + + fprintf(f, " tooltip=\"position: %u,%u\nnode_count:%u\nerror_cost: %u\ndynamic_precedence: %d\"];\n", + node->position.extent.row + 1, node->position.extent.column, node->node_count, node->error_cost, + node->dynamic_precedence); + + for (int j = 0; j < node->link_count; j++) { + StackLink link = node->links[j]; + fprintf(f, "node_%p -> node_%p [", (void *)node, (void *)link.node); + if (link.is_pending) + fprintf(f, "style=dashed "); + if (link.subtree.ptr && ts_subtree_extra(link.subtree)) + fprintf(f, "fontcolor=gray "); + + if (!link.subtree.ptr) { + fprintf(f, "color=red"); + } else { + fprintf(f, "label=\""); + bool quoted = ts_subtree_visible(link.subtree) && !ts_subtree_named(link.subtree); + if (quoted) + fprintf(f, "'"); + ts_language_write_symbol_as_dot_string(language, f, ts_subtree_symbol(link.subtree)); + printf("[1]ts_subtree_error_cost(link.subtree)=%u\n", ts_subtree_error_cost(link.subtree)); + if (quoted) + fprintf(f, "'"); + fprintf(f, "\""); + fprintf(f, "labeltooltip=\"ptr: %p\nerror_cost: %u\ndynamic_precedence: %" PRId32, + (void *)link.subtree.ptr, ts_subtree_error_cost(link.subtree), + ts_subtree_dynamic_precedence(link.subtree)); + if (ts_subtree_is_error(link.subtree) && ts_subtree_child_count(link.subtree) == 0 && + link.subtree.ptr->lookahead_char != 0) { + fprintf(f, "\ncharacter: '%c'", link.subtree.ptr->lookahead_char); + } else { + fprintf(f, "\nno character"); + } + if (link.subtree.data.is_inline) { + fprintf(f, "\nis_inline:%d", link.subtree.data.is_inline); + } else { + fprintf(f, "\nog_ptr:%p", (void *)link.subtree.ptr->og_ptr); + } + fprintf(f, "\""); + if (ts_subtree_error_cost(link.subtree) == 500 && + ts_language_symbol_name(language, ts_subtree_symbol(link.subtree)) != NULL) { + printf("ptr to target stack tree node: %p\n", (void *)link.subtree.ptr); + } else if (ts_subtree_error_cost(link.subtree) == 0 && + ts_language_symbol_name(language, ts_subtree_symbol(link.subtree)) != NULL) { + printf("ptr to bad target stack tree node: %p\n", (void *)link.subtree.ptr); + } + } + + fprintf(f, "];\n"); + + StackIterator *next_iterator; + if (j == 0) { + next_iterator = &self->iterators.contents[i]; + } else { + array_push(&self->iterators, iterator); + next_iterator = array_back(&self->iterators); + } + next_iterator->node = link.node; + } + + array_push(&visited_nodes, node); + } + } + + fprintf(f, "}\n"); + + array_delete(&visited_nodes); + return true; } #undef forceinline diff --git a/lib/src/stack.h b/lib/src/stack.h index 86abbc9d..c70f3c6e 100644 --- a/lib/src/stack.h +++ b/lib/src/stack.h @@ -7,6 +7,7 @@ extern "C" { #include "./array.h" #include "./subtree.h" +#include "./lexer.h" #include "./error_costs.h" #include @@ -29,7 +30,7 @@ typedef struct { typedef Array(StackSummaryEntry) StackSummary; // Create a stack. -Stack *ts_stack_new(SubtreePool *); +Stack *ts_stack_new(SubtreePool *, const TSLanguage*); // Release the memory reserved for a given stack. void ts_stack_delete(Stack *); @@ -47,6 +48,10 @@ Subtree ts_stack_last_external_token(const Stack *, StackVersion); // Set the last external token associated with a given version of the stack. void ts_stack_set_last_external_token(Stack *, StackVersion, Subtree ); +void ts_stack_set_language(Stack *self, const TSLanguage *language); + +void ts_stack_set_lexer(Stack *self, Lexer *lexer); + // Get the position of the given version of the stack within the document. Length ts_stack_position(const Stack *, StackVersion); @@ -55,7 +60,7 @@ Length ts_stack_position(const Stack *, StackVersion); // This transfers ownership of the tree to the Stack. Callers that // need to retain ownership of the tree for their own purposes should // first retain the tree. -void ts_stack_push(Stack *, StackVersion, Subtree , bool, TSStateId); +void ts_stack_push(Stack *, StackVersion, Subtree , bool, TSStateId, const TSLanguage*); // Pop the given number of entries from the given version of the stack. This // operation can increase the number of stack versions by revealing multiple @@ -71,7 +76,7 @@ SubtreeArray ts_stack_pop_error(Stack *, StackVersion); StackSliceArray ts_stack_pop_pending(Stack *, StackVersion); // Remove any all trees from the given version of the stack. -StackSliceArray ts_stack_pop_all(Stack *, StackVersion); +StackSliceArray ts_stack_pop_all(Stack *, StackVersion, FILE*); // Get the maximum number of tree nodes reachable from this version of the stack // since the last error was detected. diff --git a/lib/src/subtree.c b/lib/src/subtree.c index 4524e182..be78af15 100644 --- a/lib/src/subtree.c +++ b/lib/src/subtree.c @@ -22,6 +22,7 @@ typedef struct { #define TS_MAX_INLINE_TREE_LENGTH UINT8_MAX #define TS_MAX_TREE_POOL_SIZE 32 + // ExternalScannerState void ts_external_scanner_state_init(ExternalScannerState *self, const char *data, unsigned length) { @@ -196,8 +197,10 @@ Subtree ts_subtree_new_leaf( }}; } else { SubtreeHeapData *data = ts_subtree_pool_allocate(pool); + printf("[1] ALLOCATED %p\n", (void *)data); *data = (SubtreeHeapData) { .ref_count = 1, + .og_ptr = (size_t)data, .padding = padding, .size = size, .lookahead_bytes = lookahead_bytes, @@ -272,6 +275,7 @@ MutableSubtree ts_subtree_clone(Subtree self) { ); } result->ref_count = 1; + printf("NEW SUBTREE: %p\n", result); return (MutableSubtree) {.ptr = result}; } @@ -375,7 +379,10 @@ void ts_subtree_summarize_children( self.ptr->named_child_count = 0; self.ptr->visible_child_count = 0; - self.ptr->error_cost = 0; + if (!ts_subtree_is_error(ts_subtree_from_mut(self))) { + printf("SET 0: %s\n", ts_language_symbol_name(language, self.ptr->symbol)); + self.ptr->error_cost = 0; + } self.ptr->repeat_depth = 0; self.ptr->visible_descendant_count = 0; self.ptr->has_external_tokens = false; @@ -422,16 +429,27 @@ void ts_subtree_summarize_children( } uint32_t grandchild_count = ts_subtree_child_count(child); + printf("HI: %s\n", ts_language_symbol_name(language, self.ptr->symbol)); if ( self.ptr->symbol == ts_builtin_sym_error || self.ptr->symbol == ts_builtin_sym_error_repeat ) { + printf("THE FIRST BLOCK\n"); if (!ts_subtree_extra(child) && !(ts_subtree_is_error(child) && grandchild_count == 0)) { if (ts_subtree_visible(child)) { self.ptr->error_cost += ERROR_COST_PER_SKIPPED_TREE; } else if (grandchild_count > 0) { self.ptr->error_cost += ERROR_COST_PER_SKIPPED_TREE * child.ptr->visible_child_count; } + } else { + printf("FALSE BLOCK\n"); + } + + if (ts_subtree_is_error(child) && grandchild_count == 0) { + MutableSubtree child_mut = ts_subtree_to_mut_unsafe(child); + child_mut.ptr->error_cost = ERROR_COST_PER_RECOVERY + + ERROR_COST_PER_SKIPPED_CHAR * child.ptr->size.bytes + + ERROR_COST_PER_SKIPPED_LINE * child.ptr->size.extent.row; } } @@ -465,10 +483,13 @@ void ts_subtree_summarize_children( self.ptr->lookahead_bytes = lookahead_end_byte - self.ptr->size.bytes - self.ptr->padding.bytes; + printf("HI 2: %d\n", self.ptr->symbol); + printf("HI 2: %s\n", ts_language_symbol_name(language, self.ptr->symbol)); if ( self.ptr->symbol == ts_builtin_sym_error || self.ptr->symbol == ts_builtin_sym_error_repeat ) { + printf("HI 3\n"); self.ptr->error_cost += ERROR_COST_PER_RECOVERY + ERROR_COST_PER_SKIPPED_CHAR * self.ptr->size.bytes + @@ -519,9 +540,10 @@ MutableSubtree ts_subtree_new_node( children->capacity = (uint32_t)(new_byte_size / sizeof(Subtree)); } SubtreeHeapData *data = (SubtreeHeapData *)&children->contents[children->size]; - + printf("[0]ALLOCATED %p\n", (void *)data); *data = (SubtreeHeapData) { .ref_count = 1, + .og_ptr = (size_t)data, .symbol = symbol, .child_count = children->size, .visible = metadata.visible, @@ -538,6 +560,11 @@ MutableSubtree ts_subtree_new_node( }} }; MutableSubtree result = {.ptr = data}; + if (symbol == ts_builtin_sym_error_repeat) { + printf("ptr: %p\n", (void *)result.ptr); + } else if (symbol == ts_builtin_sym_error) { + printf("ptr: %p\n", (void *)result.ptr); + } ts_subtree_summarize_children(result, language); return result; } @@ -981,6 +1008,7 @@ void ts_subtree__print_dot_graph(const Subtree *self, uint32_t start_offset, uint32_t end_offset = start_offset + ts_subtree_total_bytes(*self); fprintf(f, "tree_%p [label=\"", (void *)self); ts_language_write_symbol_as_dot_string(language, f, symbol); + printf("[0]ts_subtree_error_cost(link.subtree)=%u\n", ts_subtree_error_cost(*self)); fprintf(f, "\""); if (ts_subtree_child_count(*self) == 0) fprintf(f, ", shape=plaintext"); @@ -994,7 +1022,8 @@ void ts_subtree__print_dot_graph(const Subtree *self, uint32_t start_offset, "depends-on-column: %u\n" "descendant-count: %u\n" "repeat-depth: %u\n" - "lookahead-bytes: %u", + "lookahead-bytes: %u\n" + "ptr: %p", start_offset, end_offset, ts_subtree_parse_state(*self), ts_subtree_error_cost(*self), @@ -1002,11 +1031,23 @@ void ts_subtree__print_dot_graph(const Subtree *self, uint32_t start_offset, ts_subtree_depends_on_column(*self), ts_subtree_visible_descendant_count(*self), ts_subtree_repeat_depth(*self), - ts_subtree_lookahead_bytes(*self) + ts_subtree_lookahead_bytes(*self), + (void*)self->ptr ); + if (self->data.is_inline) { + fprintf(f, "\nis_inline: %d", self->data.is_inline); + } else { + fprintf(f, "\nog_ptr: %p", (void*)self->ptr->og_ptr); + } + if (ts_subtree_is_error(*self) && ts_subtree_child_count(*self) == 0 && self->ptr->lookahead_char != 0) { fprintf(f, "\ncharacter: '%c'", self->ptr->lookahead_char); + printf("ptr %p is error with an error cost of %d\n", (void*)self->ptr, ts_subtree_error_cost(*self)); + printf("branch [1] %d %p\n", ts_subtree_missing(*self), self->ptr); + printf("branch [2] %d\n", self->data.is_inline); + } else { + printf("no call!\n"); } fprintf(f, "\"]\n"); diff --git a/lib/src/subtree.h b/lib/src/subtree.h index f140ecdb..97608f98 100644 --- a/lib/src/subtree.h +++ b/lib/src/subtree.h @@ -110,6 +110,7 @@ struct SubtreeInlineData { // the inline representation. typedef struct { volatile uint32_t ref_count; + size_t og_ptr; Length padding; Length size; uint32_t lookahead_bytes;