Restructure query_cursor_advance to explicitly control which hidden nodes it descends into

This commit is contained in:
Max Brunsfeld 2023-02-15 14:03:15 -08:00
parent 29c9073177
commit fa869cf3ed
2 changed files with 492 additions and 428 deletions

View file

@ -309,6 +309,7 @@ struct TSQueryCursor {
TSPoint start_point;
TSPoint end_point;
uint32_t next_state_id;
bool on_visible_node;
bool ascending;
bool halted;
bool did_exceed_match_limit;
@ -1163,12 +1164,12 @@ static void ts_query__perform_analysis(
#ifdef DEBUG_ANALYZE_QUERY
printf("Iteration: %u. Final step indices:", iteration);
for (unsigned j = 0; j < final_step_indices->size; j++) {
printf(" %4u", final_step_indices->contents[j]);
for (unsigned j = 0; j < analysis->final_step_indices.size; j++) {
printf(" %4u", analysis->final_step_indices.contents[j]);
}
printf("\n");
for (unsigned j = 0; j < states->size; j++) {
AnalysisState *state = states->contents[j];
for (unsigned j = 0; j < analysis->states.size; j++) {
AnalysisState *state = analysis->states.contents[j];
printf(" %3u: step: %u, stack: [", j, state->step_index);
for (unsigned k = 0; k < state->depth; k++) {
printf(
@ -1710,7 +1711,10 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) {
}
#ifdef DEBUG_ANALYZE_QUERY
printf("\nWalk states for %s:\n", ts_language_symbol_name(self->language, states.contents[0]->stack[0].parent_symbol));
printf(
"\nWalk states for %s:\n",
ts_language_symbol_name(self->language, analysis.states.contents[0]->stack[0].parent_symbol)
);
#endif
analysis.did_abort = false;
@ -1911,7 +1915,7 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) {
#ifdef DEBUG_ANALYZE_QUERY
if (self->repeat_symbols_with_rootless_patterns.size > 0) {
printf("\nRepetition symbols with rootless patterns:\n");
printf("aborted analysis: %d\n", analyzer.did_abort);
printf("aborted analysis: %d\n", analysis.did_abort);
for (unsigned i = 0; i < self->repeat_symbols_with_rootless_patterns.size; i++) {
TSSymbol symbol = self->repeat_symbols_with_rootless_patterns.contents[i];
printf(" %u, %s\n", symbol, ts_language_symbol_name(self->language, symbol));
@ -2986,6 +2990,7 @@ void ts_query_cursor_exec(
array_clear(&self->finished_states);
ts_tree_cursor_reset(&self->cursor, node);
capture_list_pool_reset(&self->capture_list_pool);
self->on_visible_node = true;
self->next_state_id = 0;
self->depth = 0;
self->ascending = false;
@ -3320,6 +3325,50 @@ static QueryState *ts_query_cursor__copy_state(
return &self->states.contents[state_index + 1];
}
static inline bool ts_query_cursor__should_descend_outside_of_range(
TSQueryCursor *self
) {
// If there are in-progress matches whose remaining steps occur
// deeper in the tree, then descend.
for (unsigned i = 0; i < self->states.size; i++) {
QueryState *state = &self->states.contents[i];;
QueryStep *next_step = &self->query->steps.contents[state->step_index];
if (
next_step->depth != PATTERN_DONE_MARKER &&
state->start_depth + next_step->depth > self->depth
) {
return true;
}
}
// If the current node is hidden, then a non-rooted pattern might match
// one if its roots inside of this node, and match another of its roots
// as part of a sibling node, so we may need to descend.
if (!self->on_visible_node) {
// Descending into a repetition node outside of the range can be
// expensive, because these nodes can have many visible children.
// Avoid descending into repetition nodes unless we have already
// determined that this query can match rootless patterns inside
// of this type of repetition node.
Subtree subtree = ts_tree_cursor_current_subtree(&self->cursor);
if (ts_subtree_is_repetition(subtree)) {
bool exists;
uint32_t index;
array_search_sorted_by(
&self->query->repeat_symbols_with_rootless_patterns,,
ts_subtree_symbol(subtree),
&index,
&exists
);
return exists;
}
return true;
}
return false;
}
// Walk the tree, processing patterns until at least one pattern finishes,
// If one or more patterns finish, return `true` and store their states in the
// `finished_states` array. Multiple patterns can finish on the same node. If
@ -3351,219 +3400,49 @@ static inline bool ts_query_cursor__advance(
);
// Leave this node by stepping to its next sibling or to its parent.
if (ts_tree_cursor_goto_next_sibling(&self->cursor)) {
self->ascending = false;
} else if (ts_tree_cursor_goto_parent(&self->cursor)) {
self->depth--;
} else {
LOG("halt at root\n");
self->halted = true;
}
// After leaving a node, remove any states that cannot make further progress.
uint32_t deleted_count = 0;
for (unsigned i = 0, n = self->states.size; i < n; i++) {
QueryState *state = &self->states.contents[i];
QueryStep *step = &self->query->steps.contents[state->step_index];
// If a state completed its pattern inside of this node, but was deferred from finishing
// in order to search for longer matches, mark it as finished.
if (step->depth == PATTERN_DONE_MARKER) {
if (state->start_depth > self->depth || self->halted) {
LOG(" finish pattern %u\n", state->pattern_index);
array_push(&self->finished_states, *state);
did_match = true;
deleted_count++;
continue;
}
}
// If a state needed to match something within this node, then remove that state
// as it has failed to match.
else if ((uint32_t)state->start_depth + (uint32_t)step->depth > self->depth) {
LOG(
" failed to match. pattern:%u, step:%u\n",
state->pattern_index,
state->step_index
);
capture_list_pool_release(
&self->capture_list_pool,
state->capture_list_id
);
deleted_count++;
continue;
}
if (deleted_count > 0) {
self->states.contents[i - deleted_count] = *state;
}
}
self->states.size -= deleted_count;
}
// Enter a new node.
else {
// Get the properties of the current node.
TSNode node = ts_tree_cursor_current_node(&self->cursor);
TSNode parent_node = ts_tree_cursor_parent_node(&self->cursor);
TSSymbol symbol = ts_node_symbol(node);
bool is_named = ts_node_is_named(node);
bool has_later_siblings;
bool has_later_named_siblings;
bool can_have_later_siblings_with_this_field;
TSFieldId field_id = 0;
TSSymbol supertypes[8] = {0};
unsigned supertype_count = 8;
ts_tree_cursor_current_status(
&self->cursor,
&field_id,
&has_later_siblings,
&has_later_named_siblings,
&can_have_later_siblings_with_this_field,
supertypes,
&supertype_count
);
LOG(
"enter node. depth:%u, type:%s, field:%s, row:%u state_count:%u, finished_state_count:%u\n",
self->depth,
ts_node_type(node),
ts_language_field_name_for_id(self->query->language, field_id),
ts_node_start_point(node).row,
self->states.size,
self->finished_states.size
);
bool parent_intersects_range = ts_node_is_null(parent_node) || (
ts_node_end_byte(parent_node) > self->start_byte &&
ts_node_start_byte(parent_node) < self->end_byte &&
point_gt(ts_node_end_point(parent_node), self->start_point) &&
point_lt(ts_node_start_point(parent_node), self->end_point)
);
bool node_intersects_range = parent_intersects_range && (
ts_node_end_byte(node) > self->start_byte &&
ts_node_start_byte(node) < self->end_byte &&
point_gt(ts_node_end_point(node), self->start_point) &&
point_lt(ts_node_start_point(node), self->end_point)
);
bool node_is_error = symbol == ts_builtin_sym_error;
bool parent_is_error =
!ts_node_is_null(parent_node) &&
ts_node_symbol(parent_node) == ts_builtin_sym_error;
// Add new states for any patterns whose root node is a wildcard.
if (!node_is_error) {
for (unsigned i = 0; i < self->query->wildcard_root_pattern_count; i++) {
PatternEntry *pattern = &self->query->pattern_map.contents[i];
// If this node matches the first step of the pattern, then add a new
// state at the start of this pattern.
QueryStep *step = &self->query->steps.contents[pattern->step_index];
if (
(pattern->is_rooted ?
node_intersects_range :
(parent_intersects_range && !parent_is_error)) &&
(!step->field || field_id == step->field) &&
(!step->supertype_symbol || supertype_count > 0)
) {
ts_query_cursor__add_state(self, pattern);
}
}
}
// Add new states for any patterns whose root node matches this node.
unsigned i;
if (ts_query__pattern_map_search(self->query, symbol, &i)) {
PatternEntry *pattern = &self->query->pattern_map.contents[i];
QueryStep *step = &self->query->steps.contents[pattern->step_index];
do {
// If this node matches the first step of the pattern, then add a new
// state at the start of this pattern.
if (
(pattern->is_rooted ?
node_intersects_range :
(parent_intersects_range && !parent_is_error)) &&
(!step->field || field_id == step->field)
) {
ts_query_cursor__add_state(self, pattern);
}
// Advance to the next pattern whose root node matches this node.
i++;
if (i == self->query->pattern_map.size) break;
pattern = &self->query->pattern_map.contents[i];
step = &self->query->steps.contents[pattern->step_index];
} while (step->symbol == symbol);
}
// Update all of the in-progress states with current node.
for (unsigned i = 0, copy_count = 0; i < self->states.size; i += 1 + copy_count) {
QueryState *state = &self->states.contents[i];
QueryStep *step = &self->query->steps.contents[state->step_index];
state->has_in_progress_alternatives = false;
copy_count = 0;
// Check that the node matches all of the criteria for the next
// step of the pattern.
if ((uint32_t)state->start_depth + (uint32_t)step->depth != self->depth) continue;
// Determine if this node matches this step of the pattern, and also
// if this node can have later siblings that match this step of the
// pattern.
bool node_does_match = false;
if (step->symbol == WILDCARD_SYMBOL) {
node_does_match = !node_is_error && (is_named || !step->is_named);
} else {
node_does_match = symbol == step->symbol;
}
bool later_sibling_can_match = has_later_siblings;
if ((step->is_immediate && is_named) || state->seeking_immediate_match) {
later_sibling_can_match = false;
}
if (step->is_last_child && has_later_named_siblings) {
node_does_match = false;
}
if (step->supertype_symbol) {
bool has_supertype = false;
for (unsigned j = 0; j < supertype_count; j++) {
if (supertypes[j] == step->supertype_symbol) {
has_supertype = true;
break;
}
}
if (!has_supertype) node_does_match = false;
}
if (step->field) {
if (step->field == field_id) {
if (!can_have_later_siblings_with_this_field) {
later_sibling_can_match = false;
}
switch (ts_tree_cursor_goto_next_sibling_internal(&self->cursor)) {
case TreeCursorStepVisible:
self->on_visible_node = true;
self->ascending = false;
break;
case TreeCursorStepHidden:
self->depth--;
self->on_visible_node = false;
self->ascending = false;
break;
default:
if (ts_tree_cursor_goto_parent(&self->cursor)) {
self->depth--;
} else {
node_does_match = false;
LOG("halt at root\n");
self->halted = true;
}
}
}
if (step->negated_field_list_id) {
TSFieldId *negated_field_ids = &self->query->negated_fields.contents[step->negated_field_list_id];
for (;;) {
TSFieldId negated_field_id = *negated_field_ids;
if (negated_field_id) {
negated_field_ids++;
if (ts_node_child_by_field_id(node, negated_field_id).id) {
node_does_match = false;
break;
}
} else {
break;
if (self->on_visible_node) {
// After leaving a node, remove any states that cannot make further progress.
uint32_t deleted_count = 0;
for (unsigned i = 0, n = self->states.size; i < n; i++) {
QueryState *state = &self->states.contents[i];
QueryStep *step = &self->query->steps.contents[state->step_index];
// If a state completed its pattern inside of this node, but was deferred from finishing
// in order to search for longer matches, mark it as finished.
if (step->depth == PATTERN_DONE_MARKER) {
if (state->start_depth > self->depth || self->halted) {
LOG(" finish pattern %u\n", state->pattern_index);
array_push(&self->finished_states, *state);
did_match = true;
deleted_count++;
continue;
}
}
}
// Remove states immediately if it is ever clear that they cannot match.
if (!node_does_match) {
if (!later_sibling_can_match) {
// If a state needed to match something within this node, then remove that state
// as it has failed to match.
else if ((uint32_t)state->start_depth + (uint32_t)step->depth > self->depth) {
LOG(
" discard state. pattern:%u, step:%u\n",
" failed to match. pattern:%u, step:%u\n",
state->pattern_index,
state->step_index
);
@ -3571,249 +3450,428 @@ static inline bool ts_query_cursor__advance(
&self->capture_list_pool,
state->capture_list_id
);
array_erase(&self->states, i);
i--;
deleted_count++;
continue;
}
continue;
}
// Some patterns can match their root node in multiple ways, capturing different
// children. If this pattern step could match later children within the same
// parent, then this query state cannot simply be updated in place. It must be
// split into two states: one that matches this node, and one which skips over
// this node, to preserve the possibility of matching later siblings.
if (later_sibling_can_match && (
step->contains_captures ||
ts_query__step_is_fallible(self->query, state->step_index)
)) {
if (ts_query_cursor__copy_state(self, &state)) {
LOG(
" split state for capture. pattern:%u, step:%u\n",
state->pattern_index,
state->step_index
);
copy_count++;
if (deleted_count > 0) {
self->states.contents[i - deleted_count] = *state;
}
}
self->states.size -= deleted_count;
}
}
// If this pattern started with a wildcard, such that the pattern map
// actually points to the *second* step of the pattern, then check
// that the node has a parent, and capture the parent node if necessary.
if (state->needs_parent) {
TSNode parent = ts_tree_cursor_parent_node(&self->cursor);
if (ts_node_is_null(parent)) {
LOG(" missing parent node\n");
state->dead = true;
} else {
state->needs_parent = false;
QueryStep *skipped_wildcard_step = step;
do {
skipped_wildcard_step--;
} while (
skipped_wildcard_step->is_dead_end ||
skipped_wildcard_step->is_pass_through ||
skipped_wildcard_step->depth > 0
);
if (skipped_wildcard_step->capture_ids[0] != NONE) {
LOG(" capture wildcard parent\n");
ts_query_cursor__capture(
self,
state,
skipped_wildcard_step,
parent
);
}
}
}
// Enter a new node.
else {
// Get the properties of the current node.
TSNode node = ts_tree_cursor_current_node(&self->cursor);
TSNode parent_node = ts_tree_cursor_parent_node(&self->cursor);
// If the current node is captured in this pattern, add it to the capture list.
if (step->capture_ids[0] != NONE) {
ts_query_cursor__capture(self, state, step, node);
}
bool parent_precedes_range = !ts_node_is_null(parent_node) && (
ts_node_end_byte(parent_node) <= self->start_byte ||
point_lte(ts_node_end_point(parent_node), self->start_point)
);
bool parent_follows_range = !ts_node_is_null(parent_node) && (
ts_node_start_byte(parent_node) >= self->end_byte ||
point_gte(ts_node_start_point(parent_node), self->end_point)
);
bool node_precedes_range = parent_precedes_range || (
ts_node_end_byte(node) <= self->start_byte ||
point_lte(ts_node_end_point(node), self->start_point)
);
bool node_follows_range = parent_follows_range || (
ts_node_start_byte(node) >= self->end_byte ||
point_gte(ts_node_start_point(node), self->end_point)
);
bool parent_intersects_range = !parent_precedes_range && !parent_follows_range;
bool node_intersects_range = !node_precedes_range && !node_follows_range;
if (state->dead) {
array_erase(&self->states, i);
i--;
continue;
}
// Advance this state to the next step of its pattern.
state->step_index++;
state->seeking_immediate_match = false;
if (self->on_visible_node) {
TSSymbol symbol = ts_node_symbol(node);
bool is_named = ts_node_is_named(node);
bool has_later_siblings;
bool has_later_named_siblings;
bool can_have_later_siblings_with_this_field;
TSFieldId field_id = 0;
TSSymbol supertypes[8] = {0};
unsigned supertype_count = 8;
ts_tree_cursor_current_status(
&self->cursor,
&field_id,
&has_later_siblings,
&has_later_named_siblings,
&can_have_later_siblings_with_this_field,
supertypes,
&supertype_count
);
LOG(
" advance state. pattern:%u, step:%u\n",
state->pattern_index,
state->step_index
"enter node. depth:%u, type:%s, field:%s, row:%u state_count:%u, finished_state_count:%u\n",
self->depth,
ts_node_type(node),
ts_language_field_name_for_id(self->query->language, field_id),
ts_node_start_point(node).row,
self->states.size,
self->finished_states.size
);
QueryStep *next_step = &self->query->steps.contents[state->step_index];
if (stop_on_definite_step && next_step->root_pattern_guaranteed) did_match = true;
bool node_is_error = symbol == ts_builtin_sym_error;
bool parent_is_error =
!ts_node_is_null(parent_node) &&
ts_node_symbol(parent_node) == ts_builtin_sym_error;
// If this state's next step has an alternative step, then copy the state in order
// to pursue both alternatives. The alternative step itself may have an alternative,
// so this is an interactive process.
unsigned end_index = i + 1;
for (unsigned j = i; j < end_index; j++) {
QueryState *state = &self->states.contents[j];
QueryStep *next_step = &self->query->steps.contents[state->step_index];
if (next_step->alternative_index != NONE) {
// A "dead-end" step exists only to add a non-sequential jump into the step sequence,
// via its alternative index. When a state reaches a dead-end step, it jumps straight
// to the step's alternative.
if (next_step->is_dead_end) {
state->step_index = next_step->alternative_index;
j--;
continue;
// Add new states for any patterns whose root node is a wildcard.
if (!node_is_error) {
for (unsigned i = 0; i < self->query->wildcard_root_pattern_count; i++) {
PatternEntry *pattern = &self->query->pattern_map.contents[i];
// If this node matches the first step of the pattern, then add a new
// state at the start of this pattern.
QueryStep *step = &self->query->steps.contents[pattern->step_index];
if (
(pattern->is_rooted ?
node_intersects_range :
(parent_intersects_range && !parent_is_error)) &&
(!step->field || field_id == step->field) &&
(!step->supertype_symbol || supertype_count > 0)
) {
ts_query_cursor__add_state(self, pattern);
}
}
}
// Add new states for any patterns whose root node matches this node.
unsigned i;
if (ts_query__pattern_map_search(self->query, symbol, &i)) {
PatternEntry *pattern = &self->query->pattern_map.contents[i];
QueryStep *step = &self->query->steps.contents[pattern->step_index];
do {
// If this node matches the first step of the pattern, then add a new
// state at the start of this pattern.
if (
(pattern->is_rooted ?
node_intersects_range :
(parent_intersects_range && !parent_is_error)) &&
(!step->field || field_id == step->field)
) {
ts_query_cursor__add_state(self, pattern);
}
// A "pass-through" step exists only to add a branch into the step sequence,
// via its alternative_index. When a state reaches a pass-through step, it splits
// in order to process the alternative step, and then it advances to the next step.
if (next_step->is_pass_through) {
state->step_index++;
j--;
}
// Advance to the next pattern whose root node matches this node.
i++;
if (i == self->query->pattern_map.size) break;
pattern = &self->query->pattern_map.contents[i];
step = &self->query->steps.contents[pattern->step_index];
} while (step->symbol == symbol);
}
QueryState *copy = ts_query_cursor__copy_state(self, &state);
if (copy) {
// Update all of the in-progress states with current node.
for (unsigned i = 0, copy_count = 0; i < self->states.size; i += 1 + copy_count) {
QueryState *state = &self->states.contents[i];
QueryStep *step = &self->query->steps.contents[state->step_index];
state->has_in_progress_alternatives = false;
copy_count = 0;
// Check that the node matches all of the criteria for the next
// step of the pattern.
if ((uint32_t)state->start_depth + (uint32_t)step->depth != self->depth) continue;
// Determine if this node matches this step of the pattern, and also
// if this node can have later siblings that match this step of the
// pattern.
bool node_does_match = false;
if (step->symbol == WILDCARD_SYMBOL) {
node_does_match = !node_is_error && (is_named || !step->is_named);
} else {
node_does_match = symbol == step->symbol;
}
bool later_sibling_can_match = has_later_siblings;
if ((step->is_immediate && is_named) || state->seeking_immediate_match) {
later_sibling_can_match = false;
}
if (step->is_last_child && has_later_named_siblings) {
node_does_match = false;
}
if (step->supertype_symbol) {
bool has_supertype = false;
for (unsigned j = 0; j < supertype_count; j++) {
if (supertypes[j] == step->supertype_symbol) {
has_supertype = true;
break;
}
}
if (!has_supertype) node_does_match = false;
}
if (step->field) {
if (step->field == field_id) {
if (!can_have_later_siblings_with_this_field) {
later_sibling_can_match = false;
}
} else {
node_does_match = false;
}
}
if (step->negated_field_list_id) {
TSFieldId *negated_field_ids = &self->query->negated_fields.contents[step->negated_field_list_id];
for (;;) {
TSFieldId negated_field_id = *negated_field_ids;
if (negated_field_id) {
negated_field_ids++;
if (ts_node_child_by_field_id(node, negated_field_id).id) {
node_does_match = false;
break;
}
} else {
break;
}
}
}
// Remove states immediately if it is ever clear that they cannot match.
if (!node_does_match) {
if (!later_sibling_can_match) {
LOG(
" split state for branch. pattern:%u, from_step:%u, to_step:%u, immediate:%d, capture_count: %u\n",
copy->pattern_index,
copy->step_index,
next_step->alternative_index,
next_step->alternative_is_immediate,
capture_list_pool_get(&self->capture_list_pool, copy->capture_list_id)->size
" discard state. pattern:%u, step:%u\n",
state->pattern_index,
state->step_index
);
capture_list_pool_release(
&self->capture_list_pool,
state->capture_list_id
);
array_erase(&self->states, i);
i--;
}
continue;
}
// Some patterns can match their root node in multiple ways, capturing different
// children. If this pattern step could match later children within the same
// parent, then this query state cannot simply be updated in place. It must be
// split into two states: one that matches this node, and one which skips over
// this node, to preserve the possibility of matching later siblings.
if (later_sibling_can_match && (
step->contains_captures ||
ts_query__step_is_fallible(self->query, state->step_index)
)) {
if (ts_query_cursor__copy_state(self, &state)) {
LOG(
" split state for capture. pattern:%u, step:%u\n",
state->pattern_index,
state->step_index
);
end_index++;
copy_count++;
copy->step_index = next_step->alternative_index;
if (next_step->alternative_is_immediate) {
copy->seeking_immediate_match = true;
}
}
// If this pattern started with a wildcard, such that the pattern map
// actually points to the *second* step of the pattern, then check
// that the node has a parent, and capture the parent node if necessary.
if (state->needs_parent) {
TSNode parent = ts_tree_cursor_parent_node(&self->cursor);
if (ts_node_is_null(parent)) {
LOG(" missing parent node\n");
state->dead = true;
} else {
state->needs_parent = false;
QueryStep *skipped_wildcard_step = step;
do {
skipped_wildcard_step--;
} while (
skipped_wildcard_step->is_dead_end ||
skipped_wildcard_step->is_pass_through ||
skipped_wildcard_step->depth > 0
);
if (skipped_wildcard_step->capture_ids[0] != NONE) {
LOG(" capture wildcard parent\n");
ts_query_cursor__capture(
self,
state,
skipped_wildcard_step,
parent
);
}
}
}
// If the current node is captured in this pattern, add it to the capture list.
if (step->capture_ids[0] != NONE) {
ts_query_cursor__capture(self, state, step, node);
}
if (state->dead) {
array_erase(&self->states, i);
i--;
continue;
}
// Advance this state to the next step of its pattern.
state->step_index++;
state->seeking_immediate_match = false;
LOG(
" advance state. pattern:%u, step:%u\n",
state->pattern_index,
state->step_index
);
QueryStep *next_step = &self->query->steps.contents[state->step_index];
if (stop_on_definite_step && next_step->root_pattern_guaranteed) did_match = true;
// If this state's next step has an alternative step, then copy the state in order
// to pursue both alternatives. The alternative step itself may have an alternative,
// so this is an interactive process.
unsigned end_index = i + 1;
for (unsigned j = i; j < end_index; j++) {
QueryState *state = &self->states.contents[j];
QueryStep *next_step = &self->query->steps.contents[state->step_index];
if (next_step->alternative_index != NONE) {
// A "dead-end" step exists only to add a non-sequential jump into the step sequence,
// via its alternative index. When a state reaches a dead-end step, it jumps straight
// to the step's alternative.
if (next_step->is_dead_end) {
state->step_index = next_step->alternative_index;
j--;
continue;
}
// A "pass-through" step exists only to add a branch into the step sequence,
// via its alternative_index. When a state reaches a pass-through step, it splits
// in order to process the alternative step, and then it advances to the next step.
if (next_step->is_pass_through) {
state->step_index++;
j--;
}
QueryState *copy = ts_query_cursor__copy_state(self, &state);
if (copy) {
LOG(
" split state for branch. pattern:%u, from_step:%u, to_step:%u, immediate:%d, capture_count: %u\n",
copy->pattern_index,
copy->step_index,
next_step->alternative_index,
next_step->alternative_is_immediate,
capture_list_pool_get(&self->capture_list_pool, copy->capture_list_id)->size
);
end_index++;
copy_count++;
copy->step_index = next_step->alternative_index;
if (next_step->alternative_is_immediate) {
copy->seeking_immediate_match = true;
}
}
}
}
}
for (unsigned i = 0; i < self->states.size; i++) {
QueryState *state = &self->states.contents[i];
if (state->dead) {
array_erase(&self->states, i);
i--;
continue;
}
// Enfore the longest-match criteria. When a query pattern contains optional or
// repeated nodes, this is necessary to avoid multiple redundant states, where
// one state has a strict subset of another state's captures.
bool did_remove = false;
for (unsigned j = i + 1; j < self->states.size; j++) {
QueryState *other_state = &self->states.contents[j];
// Query states are kept in ascending order of start_depth and pattern_index.
// Since the longest-match criteria is only used for deduping matches of the same
// pattern and root node, we only need to perform pairwise comparisons within a
// small slice of the states array.
if (
other_state->start_depth != state->start_depth ||
other_state->pattern_index != state->pattern_index
) break;
bool left_contains_right, right_contains_left;
ts_query_cursor__compare_captures(
self,
state,
other_state,
&left_contains_right,
&right_contains_left
);
if (left_contains_right) {
if (state->step_index == other_state->step_index) {
LOG(
" drop shorter state. pattern: %u, step_index: %u\n",
state->pattern_index,
state->step_index
);
capture_list_pool_release(&self->capture_list_pool, other_state->capture_list_id);
array_erase(&self->states, j);
j--;
continue;
}
other_state->has_in_progress_alternatives = true;
}
if (right_contains_left) {
if (state->step_index == other_state->step_index) {
LOG(
" drop shorter state. pattern: %u, step_index: %u\n",
state->pattern_index,
state->step_index
);
capture_list_pool_release(&self->capture_list_pool, state->capture_list_id);
array_erase(&self->states, i);
i--;
did_remove = true;
break;
}
state->has_in_progress_alternatives = true;
}
}
// If the state is at the end of its pattern, remove it from the list
// of in-progress states and add it to the list of finished states.
if (!did_remove) {
LOG(
" keep state. pattern: %u, start_depth: %u, step_index: %u, capture_count: %u\n",
state->pattern_index,
state->start_depth,
state->step_index,
capture_list_pool_get(&self->capture_list_pool, state->capture_list_id)->size
);
QueryStep *next_step = &self->query->steps.contents[state->step_index];
if (next_step->depth == PATTERN_DONE_MARKER) {
if (state->has_in_progress_alternatives) {
LOG(" defer finishing pattern %u\n", state->pattern_index);
} else {
LOG(" finish pattern %u\n", state->pattern_index);
array_push(&self->finished_states, *state);
array_erase(&self->states, (uint32_t)(state - self->states.contents));
did_match = true;
i--;
}
}
}
}
}
for (unsigned i = 0; i < self->states.size; i++) {
QueryState *state = &self->states.contents[i];
if (state->dead) {
array_erase(&self->states, i);
i--;
continue;
}
// Enfore the longest-match criteria. When a query pattern contains optional or
// repeated nodes, this is necessary to avoid multiple redundant states, where
// one state has a strict subset of another state's captures.
bool did_remove = false;
for (unsigned j = i + 1; j < self->states.size; j++) {
QueryState *other_state = &self->states.contents[j];
// Query states are kept in ascending order of start_depth and pattern_index.
// Since the longest-match criteria is only used for deduping matches of the same
// pattern and root node, we only need to perform pairwise comparisons within a
// small slice of the states array.
if (
other_state->start_depth != state->start_depth ||
other_state->pattern_index != state->pattern_index
) break;
bool left_contains_right, right_contains_left;
ts_query_cursor__compare_captures(
self,
state,
other_state,
&left_contains_right,
&right_contains_left
);
if (left_contains_right) {
if (state->step_index == other_state->step_index) {
LOG(
" drop shorter state. pattern: %u, step_index: %u\n",
state->pattern_index,
state->step_index
);
capture_list_pool_release(&self->capture_list_pool, other_state->capture_list_id);
array_erase(&self->states, j);
j--;
continue;
}
other_state->has_in_progress_alternatives = true;
}
if (right_contains_left) {
if (state->step_index == other_state->step_index) {
LOG(
" drop shorter state. pattern: %u, step_index: %u\n",
state->pattern_index,
state->step_index
);
capture_list_pool_release(&self->capture_list_pool, state->capture_list_id);
array_erase(&self->states, i);
i--;
did_remove = true;
break;
}
state->has_in_progress_alternatives = true;
}
}
// If the state is at the end of its pattern, remove it from the list
// of in-progress states and add it to the list of finished states.
if (!did_remove) {
LOG(
" keep state. pattern: %u, start_depth: %u, step_index: %u, capture_count: %u\n",
state->pattern_index,
state->start_depth,
state->step_index,
capture_list_pool_get(&self->capture_list_pool, state->capture_list_id)->size
);
QueryStep *next_step = &self->query->steps.contents[state->step_index];
if (next_step->depth == PATTERN_DONE_MARKER) {
if (state->has_in_progress_alternatives) {
LOG(" defer finishing pattern %u\n", state->pattern_index);
} else {
LOG(" finish pattern %u\n", state->pattern_index);
array_push(&self->finished_states, *state);
array_erase(&self->states, (uint32_t)(state - self->states.contents));
did_match = true;
i--;
}
}
}
}
// When the current node ends prior to the desired start offset,
// only descend for the purpose of continuing in-progress matches.
bool has_in_progress_matches = false;
if (!node_intersects_range) {
for (unsigned i = 0; i < self->states.size; i++) {
QueryState *state = &self->states.contents[i];;
QueryStep *next_step = &self->query->steps.contents[state->step_index];
if (
next_step->depth != PATTERN_DONE_MARKER &&
state->start_depth + next_step->depth > self->depth
) {
has_in_progress_matches = true;
bool should_descend =
node_intersects_range ||
ts_query_cursor__should_descend_outside_of_range(self);
if (should_descend) {
switch (ts_tree_cursor_goto_first_child_internal(&self->cursor)) {
case TreeCursorStepVisible:
self->depth++;
self->on_visible_node = true;
continue;
case TreeCursorStepHidden:
self->on_visible_node = false;
continue;
default:
break;
}
}
}
bool should_descend = node_intersects_range || has_in_progress_matches;
if (!should_descend) {
LOG(
" not descending. node end byte: %u, start byte: %u\n",
ts_node_end_byte(node),
self->start_byte
);
}
if (should_descend && ts_tree_cursor_goto_first_child(&self->cursor)) {
self->depth++;
} else {
self->ascending = true;
}
self->ascending = true;
}
}
}

View file

@ -291,6 +291,12 @@ static inline uint32_t ts_subtree_repeat_depth(Subtree self) {
return self.data.is_inline ? 0 : self.ptr->repeat_depth;
}
static inline uint32_t ts_subtree_is_repetition(Subtree self) {
return self.data.is_inline
? 0
: !self.ptr->named && !self.ptr->visible && self.ptr->child_count != 0;
}
static inline uint32_t ts_subtree_node_count(Subtree self) {
return (self.data.is_inline || self.ptr->child_count == 0) ? 1 : self.ptr->node_count;
}