diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 52e7c96b..0906f754 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -619,15 +619,32 @@ static bool ts_parser__replace_children(TSParser *self, Subtree *tree, SubtreeAr } } -static StackSliceArray ts_parser__reduce(TSParser *self, StackVersion version, TSSymbol symbol, - uint32_t count, int dynamic_precedence, - uint16_t alias_sequence_id, bool fragile) { +static StackVersion ts_parser__reduce(TSParser *self, StackVersion version, TSSymbol symbol, + uint32_t count, int dynamic_precedence, + uint16_t alias_sequence_id, bool fragile) { uint32_t initial_version_count = ts_stack_version_count(self->stack); - + uint32_t removed_version_count = 0; StackSliceArray pop = ts_stack_pop_count(self->stack, version, count); for (uint32_t i = 0; i < pop.size; i++) { StackSlice slice = pop.contents[i]; + StackVersion slice_version = slice.version - removed_version_count; + + // Error recovery can sometimes cause lots of stack versions to merge, + // such that a single pop operation can produce a lots of slices. + // Avoid creating too many stack versions in that situation. + if (i > 0 && slice_version > MAX_VERSION_COUNT) { + ts_stack_remove_version(self->stack, slice_version); + ts_subtree_array_delete(&self->tree_pool, &slice.subtrees); + removed_version_count++; + while (i + 1 < pop.size) { + StackSlice next_slice = pop.contents[i + 1]; + if (next_slice.version != slice.version) break; + ts_subtree_array_delete(&self->tree_pool, &next_slice.subtrees); + i++; + } + continue; + } // Extra tokens on top of the stack should not be included in this new parent // node. They will be re-pushed onto the stack after the parent node is @@ -666,7 +683,7 @@ static StackSliceArray ts_parser__reduce(TSParser *self, StackVersion version, T parent->dynamic_precedence += dynamic_precedence; parent->alias_sequence_id = alias_sequence_id; - TSStateId state = ts_stack_state(self->stack, slice.version); + TSStateId state = ts_stack_state(self->stack, slice_version); TSStateId next_state = ts_language_next_state(self->language, state, symbol); if (fragile || pop.size > 1 || initial_version_count > 1) { parent->fragile_left = true; @@ -678,36 +695,24 @@ static StackSliceArray ts_parser__reduce(TSParser *self, StackVersion version, T // Push the parent node onto the stack, along with any extra tokens that // were previously on top of the stack. - ts_stack_push(self->stack, slice.version, parent, false, next_state); + ts_stack_push(self->stack, slice_version, parent, false, next_state); for (uint32_t j = parent->children.size; j < slice.subtrees.size; j++) { - ts_stack_push(self->stack, slice.version, slice.subtrees.contents[j], false, next_state); + ts_stack_push(self->stack, slice_version, slice.subtrees.contents[j], false, next_state); } - if (ts_stack_version_count(self->stack) > MAX_VERSION_COUNT) { - i++; - while (i < pop.size) { - StackSlice slice = pop.contents[i]; - ts_subtree_array_delete(&self->tree_pool, &slice.subtrees); - ts_stack_halt(self->stack, slice.version); - i++; - } - while (ts_stack_version_count(self->stack) > slice.version + 1) { - ts_stack_remove_version(self->stack, slice.version + 1); - } - break; - } - } - - for (StackVersion i = initial_version_count; i < ts_stack_version_count(self->stack); i++) { - for (StackVersion j = initial_version_count; j < i; j++) { - if (ts_stack_merge(self->stack, j, i)) { - i--; + for (StackVersion j = 0; j < slice_version; j++) { + if (j == version) continue; + if (ts_stack_merge(self->stack, j, slice_version)) { + removed_version_count++; break; } } } - return pop; + // Return the first new stack version that was created. + return ts_stack_version_count(self->stack) > initial_version_count + ? initial_version_count + : STACK_VERSION_NONE; } static void ts_parser__accept(TSParser *self, StackVersion version, const Subtree *lookahead) { @@ -754,8 +759,9 @@ static void ts_parser__accept(TSParser *self, StackVersion version, const Subtre ts_stack_halt(self->stack, version); } -static bool ts_parser__do_all_potential_reductions(TSParser *self, StackVersion starting_version, - TSSymbol lookahead_symbol) { +static bool ts_parser__do_all_potential_reductions(TSParser *self, + StackVersion starting_version, + TSSymbol lookahead_symbol) { uint32_t initial_version_count = ts_stack_version_count(self->stack); bool can_shift_lookahead_symbol = false; @@ -810,10 +816,11 @@ static bool ts_parser__do_all_potential_reductions(TSParser *self, StackVersion } } + StackVersion reduction_version = STACK_VERSION_NONE; for (uint32_t i = 0; i < self->reduce_actions.size; i++) { ReduceAction action = self->reduce_actions.contents[i]; - ts_parser__reduce( + reduction_version = ts_parser__reduce( self, version, action.symbol, action.count, action.dynamic_precedence, action.alias_sequence_id, true @@ -822,8 +829,8 @@ static bool ts_parser__do_all_potential_reductions(TSParser *self, StackVersion if (has_shift_action) { can_shift_lookahead_symbol = true; - } else if (self->reduce_actions.size > 0 && i < MAX_VERSION_COUNT) { - ts_stack_renumber_version(self->stack, version_count, version); + } else if (reduction_version != STACK_VERSION_NONE && i < MAX_VERSION_COUNT) { + ts_stack_renumber_version(self->stack, reduction_version, version); continue; } else if (lookahead_symbol != 0) { ts_stack_remove_version(self->stack, version); @@ -1168,13 +1175,14 @@ static void ts_parser__advance(TSParser *self, StackVersion version, bool allow_ case TSParseActionTypeReduce: { bool is_fragile = table_entry.action_count > 1; LOG("reduce sym:%s, child_count:%u", SYM_NAME(action.params.symbol), action.params.child_count); - StackSliceArray reduction = ts_parser__reduce( + StackVersion reduction_version = ts_parser__reduce( self, version, action.params.symbol, action.params.child_count, action.params.dynamic_precedence, action.params.alias_sequence_id, is_fragile ); - StackSlice slice = *array_front(&reduction); - last_reduction_version = slice.version; + if (reduction_version != STACK_VERSION_NONE) { + last_reduction_version = reduction_version; + } break; } diff --git a/src/runtime/stack.c b/src/runtime/stack.c index 8921d559..0e78663c 100644 --- a/src/runtime/stack.c +++ b/src/runtime/stack.c @@ -160,24 +160,42 @@ static bool stack__subtree_is_equivalent(const Subtree *left, const Subtree *rig right && left->symbol == right->symbol && ((left->error_cost > 0 && right->error_cost > 0) || - (left->children.size == 0 && right->children.size == 0 && - left->padding.bytes == right->padding.bytes && + (left->padding.bytes == right->padding.bytes && left->size.bytes == right->size.bytes && left->extra == right->extra && ts_subtree_external_scanner_state_eq(left, right)))); } -static void stack_node_add_link(StackNode *self, StackLink link) { +static void stack_node_add_link(StackNode *self, StackLink link, SubtreePool *subtree_pool) { if (link.node == self) return; for (int i = 0; i < self->link_count; i++) { - StackLink existing_link = self->links[i]; - if (stack__subtree_is_equivalent(existing_link.subtree, link.subtree)) { - if (existing_link.node == link.node) return; - if (existing_link.node->state == link.node->state && - existing_link.node->position.bytes == link.node->position.bytes) { + StackLink *existing_link = &self->links[i]; + if (stack__subtree_is_equivalent(existing_link->subtree, link.subtree)) { + // In general, we preserve ambiguities until they are removed from the stack + // during a pop operation where multiple paths lead to the same node. But in + // the special case where two links directly connect the same pair of nodes, + // we can safely remove the ambiguity ahead of time without changing behavior. + if (existing_link->node == link.node) { + if (link.subtree->dynamic_precedence > existing_link->subtree->dynamic_precedence) { + ts_subtree_retain(link.subtree); + ts_subtree_release(subtree_pool, existing_link->subtree); + existing_link->subtree = link.subtree; + self->dynamic_precedence = link.node->dynamic_precedence + link.subtree->dynamic_precedence; + } + return; + } + + // If the previous nodes are mergeable, merge them recursively. + if (existing_link->node->state == link.node->state && + existing_link->node->position.bytes == link.node->position.bytes) { for (int j = 0; j < link.node->link_count; j++) { - stack_node_add_link(existing_link.node, link.node->links[j]); + stack_node_add_link(existing_link->node, link.node->links[j], subtree_pool); + } + int dynamic_precedence = link.node->dynamic_precedence; + if (link.subtree) dynamic_precedence += link.subtree->dynamic_precedence; + if (dynamic_precedence > self->dynamic_precedence) { + self->dynamic_precedence = dynamic_precedence; } return; } @@ -193,6 +211,10 @@ static void stack_node_add_link(StackNode *self, StackLink link) { unsigned node_count = link.node->node_count; if (link.subtree) node_count += link.subtree->node_count; if (node_count > self->node_count) self->node_count = node_count; + + int dynamic_precedence = link.node->dynamic_precedence; + if (link.subtree) dynamic_precedence += link.subtree->dynamic_precedence; + if (dynamic_precedence > self->dynamic_precedence) self->dynamic_precedence = dynamic_precedence; } static void stack_head_delete(StackHead *self, StackNodeArray *pool, SubtreePool *subtree_pool) { @@ -554,6 +576,7 @@ void ts_stack_remove_version(Stack *self, StackVersion version) { } void ts_stack_renumber_version(Stack *self, StackVersion v1, StackVersion v2) { + if (v1 == v2) return; assert(v2 < v1); assert((uint32_t)v1 < self->heads.size); StackHead *source_head = &self->heads.contents[v1]; @@ -588,7 +611,7 @@ bool ts_stack_merge(Stack *self, StackVersion version1, StackVersion version2) { StackHead *head1 = &self->heads.contents[version1]; StackHead *head2 = &self->heads.contents[version2]; for (uint32_t i = 0; i < head2->node->link_count; i++) { - stack_node_add_link(head1->node, head2->node->links[i]); + stack_node_add_link(head1->node, head2->node->links[i], self->subtree_pool); } if (head1->node->state == ERROR_STATE) { head1->node_count_at_last_error = head1->node->node_count;