diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index 914d41cd..a377ca51 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -382,7 +382,7 @@ fn test_query_matches_with_many_overlapping_results() { ) .unwrap(); - let count = 80; + let count = 1024; // Deeply nested chained function calls: // a @@ -547,8 +547,8 @@ fn test_query_matches_with_immediate_siblings() { &[ (0, vec![("parent", "a"), ("child", "b")]), (0, vec![("parent", "b"), ("child", "c")]), - (1, vec![("last-child", "d")]), (0, vec![("parent", "c"), ("child", "d")]), + (1, vec![("last-child", "d")]), (2, vec![("first-element", "w")]), (2, vec![("first-element", "1")]), ], @@ -732,6 +732,55 @@ fn test_query_matches_with_nested_repetitions() { }); } +#[test] +fn test_query_matches_with_multiple_repetition_patterns_that_intersect_other_pattern() { + allocations::record(|| { + let language = get_language("javascript"); + + // When this query sees a comment, it must keep track of several potential + // matches: up to two for each pattern that begins with a comment. + let query = Query::new( + language, + r#" + (call_expression + function: (member_expression + property: (property_identifier) @name)) @ref.method + + ((comment)* @doc (function_declaration)) + ((comment)* @doc (generator_function_declaration)) + ((comment)* @doc (class_declaration)) + ((comment)* @doc (lexical_declaration)) + ((comment)* @doc (variable_declaration)) + ((comment)* @doc (method_definition)) + + (comment) @comment + "#, + ) + .unwrap(); + + // Here, a series of comments occurs in the middle of a match of the first + // pattern. To avoid exceeding the storage limits and discarding that outer + // match, the comment-related matches need to be managed efficiently. + let source = format!( + "theObject\n{}\n.theMethod()", + " // the comment\n".repeat(64) + ); + + assert_query_matches( + language, + &query, + &source, + &vec![(7, vec![("comment", "// the comment")]); 64] + .into_iter() + .chain(vec![( + 0, + vec![("ref.method", source.as_str()), ("name", "theMethod")], + )]) + .collect::>(), + ); + }); +} + #[test] fn test_query_matches_with_leading_zero_or_more_repeated_leaf_nodes() { allocations::record(|| { diff --git a/lib/src/query.c b/lib/src/query.c index c839c299..8c8bd4c3 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -48,7 +48,6 @@ typedef struct { uint16_t alternative_index; uint16_t depth; bool contains_captures: 1; - bool is_pattern_start: 1; bool is_immediate: 1; bool is_last_child: 1; bool is_pass_through: 1; @@ -449,7 +448,6 @@ static QueryStep query_step__new( .alternative_index = NONE, .contains_captures = false, .is_last_child = false, - .is_pattern_start = false, .is_pass_through = false, .is_dead_end = false, .is_immediate = is_immediate, @@ -547,6 +545,23 @@ static inline void ts_query__pattern_map_insert( ) { uint32_t index; ts_query__pattern_map_search(self, symbol, &index); + + // Ensure that the entries are sorted not only by symbol, but also + // by pattern_index. This way, states for earlier patterns will be + // initiated first, which allows the ordering of the states array + // to be maintained more efficiently. + while (index < self->pattern_map.size) { + PatternEntry *entry = &self->pattern_map.contents[index]; + if ( + self->steps.contents[entry->step_index].symbol == symbol && + entry->pattern_index < pattern_index + ) { + index++; + } else { + break; + } + } + array_insert(&self->pattern_map, index, ((PatternEntry) { .step_index = start_step_index, .pattern_index = pattern_index, @@ -1168,7 +1183,6 @@ TSQuery *ts_query_new( // Maintain a map that can look up patterns for a given root symbol. for (;;) { QueryStep *step = &self->steps.contents[start_step_index]; - step->is_pattern_start = true; ts_query__pattern_map_insert(self, step->symbol, start_step_index, pattern_index); if (step->symbol == WILDCARD_SYMBOL) { self->wildcard_root_pattern_count++; @@ -1178,6 +1192,7 @@ TSQuery *ts_query_new( // then add multiple entries to the pattern map. if (step->alternative_index != NONE) { start_step_index = step->alternative_index; + step->alternative_index = NONE; } else { break; } @@ -1460,27 +1475,62 @@ void ts_query_cursor__compare_captures( } } -static bool ts_query_cursor__add_state( +static void ts_query_cursor__add_state( TSQueryCursor *self, const PatternEntry *pattern ) { + QueryStep *step = &self->query->steps.contents[pattern->step_index]; + uint32_t start_depth = self->depth - step->depth; + + // Keep the states array in ascending order of start_depth and pattern_index, + // so that it can be processed more efficiently elsewhere. Usually, there is + // no work to do here because of two facts: + // * States with lower start_depth are naturally added first due to the + // order in which nodes are visited. + // * Earlier patterns are naturally added first because of the ordering of the + // pattern_map data structure that's used to initiate matches. + // + // This loop is only needed in cases where two conditions hold: + // * A pattern consists of more than one sibling node, so that its states + // remain in progress after exiting the node that started the match. + // * The first node in the pattern matches against multiple nodes at the + // same depth. + // + // An example of this is the pattern '((comment)* (function))'. If multiple + // `comment` nodes appear in a row, then we may initiate a new state for this + // pattern while another state for the same pattern is already in progress. + // If there are multiple patterns like this in a query, then this loop will + // need to execute in order to keep the states ordered by pattern_index. + uint32_t index = self->states.size; + while (index > 0) { + QueryState *prev_state = &self->states.contents[index - 1]; + if (prev_state->start_depth < start_depth) break; + if (prev_state->start_depth == start_depth) { + if (prev_state->pattern_index < pattern->pattern_index) break; + if (prev_state->pattern_index == pattern->pattern_index) { + // Avoid unnecessarily inserting an unnecessary duplicate state, + // which would be immediately pruned by the longest-match criteria. + if (prev_state->step_index == pattern->step_index) return; + } + } + index--; + } + LOG( " start state. pattern:%u, step:%u\n", pattern->pattern_index, pattern->step_index ); - QueryStep *step = &self->query->steps.contents[pattern->step_index]; - array_push(&self->states, ((QueryState) { + array_insert(&self->states, index, ((QueryState) { .capture_list_id = NONE, .step_index = pattern->step_index, .pattern_index = pattern->pattern_index, - .start_depth = self->depth - step->depth, + .start_depth = start_depth, .consumed_capture_count = 0, - .seeking_immediate_match = false, + .seeking_immediate_match = true, .has_in_progress_alternatives = false, .dead = false, })); - return true; } // Acquire a capture list for this state. If there are no capture lists left in the @@ -1682,7 +1732,7 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { // If this node matches the first step of the pattern, then add a new // state at the start of this pattern. if (step->field && field_id != step->field) continue; - if (!ts_query_cursor__add_state(self, pattern)) break; + ts_query_cursor__add_state(self, pattern); } // Add new states for any patterns whose root node matches this node. @@ -1694,7 +1744,7 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { // If this node matches the first step of the pattern, then add a new // state at the start of this pattern. if (step->field && field_id != step->field) continue; - if (!ts_query_cursor__add_state(self, pattern)) break; + ts_query_cursor__add_state(self, pattern); // Advance to the next pattern whose root node matches this node. i++; @@ -1762,11 +1812,7 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { // parent, then this query state cannot simply be updated in place. It must be // split into two states: one that matches this node, and one which skips over // this node, to preserve the possibility of matching later siblings. - if ( - later_sibling_can_match && - !step->is_pattern_start && - step->contains_captures - ) { + if (later_sibling_can_match && step->contains_captures) { if (ts_query_cursor__copy_state(self, &state)) { LOG( " split state for capture. pattern:%u, step:%u\n", @@ -1822,25 +1868,27 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { continue; } - QueryState *copy = ts_query_cursor__copy_state(self, &state); if (next_step->is_pass_through) { state->step_index++; j--; } + + QueryState *copy = ts_query_cursor__copy_state(self, &state); if (copy) { - copy_count++; + LOG( + " split state for branch. pattern:%u, from_step:%u, to_step:%u, immediate:%d, capture_count: %u\n", + copy->pattern_index, + copy->step_index, + next_step->alternative_index, + next_step->alternative_is_immediate, + capture_list_pool_get(&self->capture_list_pool, copy->capture_list_id)->size + ); end_index++; + copy_count++; copy->step_index = next_step->alternative_index; if (next_step->alternative_is_immediate) { copy->seeking_immediate_match = true; } - LOG( - " split state for branch. pattern:%u, step:%u, step:%u, immediate:%d\n", - copy->pattern_index, - state->step_index, - copy->step_index, - copy->seeking_immediate_match - ); } } } @@ -1860,13 +1908,11 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { bool did_remove = false; for (unsigned j = i + 1; j < self->states.size; j++) { QueryState *other_state = &self->states.contents[j]; - if (other_state->dead) { - array_erase(&self->states, j); - j--; - continue; - } - // When query states are copied in order + // Query states are kept in ascending order of start_depth and pattern_index. + // Since the longest-match criteria is only used for deduping matches of the same + // pattern and root node, we only need to perform pairwise comparisons within a + // small slice of the states array. if ( other_state->start_depth != state->start_depth || other_state->pattern_index != state->pattern_index @@ -1914,6 +1960,13 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { // If there the state is at the end of its pattern, remove it from the list // of in-progress states and add it to the list of finished states. if (!did_remove) { + LOG( + " keep state. pattern: %u, start_depth: %u, step_index: %u, capture_count: %u\n", + state->pattern_index, + state->start_depth, + state->step_index, + capture_list_pool_get(&self->capture_list_pool, state->capture_list_id)->size + ); QueryStep *next_step = &self->query->steps.contents[state->step_index]; if (next_step->depth == PATTERN_DONE_MARKER) { if (state->has_in_progress_alternatives) {