fix(query): ensure immediate matches for any node when an anchor follows a wildcard node

This commit is contained in:
Amaan Qureshi 2024-12-28 23:54:55 -05:00
parent d87d4592e0
commit 22f67e2b67
2 changed files with 55 additions and 3 deletions

View file

@ -5405,3 +5405,40 @@ fn test_query_execution_with_points_causing_underflow() {
assert_eq!(matches, matches2);
}
#[test]
fn test_wildcard_behavior_before_anchor() {
let language = get_language("python");
let mut parser = Parser::new();
parser.set_language(&language).unwrap();
let source = "
(a, b)
(c, d,)
";
// In this query, we're targeting any *named* node immediately before a closing parenthesis.
let query = Query::new(&language, r#"(tuple (_) @last . ")" .) @match"#).unwrap();
assert_query_matches(
&language,
&query,
source,
&[
(0, vec![("match", "(a, b)"), ("last", "b")]),
(0, vec![("match", "(c, d,)"), ("last", "d")]),
],
);
// In this query, we're targeting *any* node immediately before a closing
// parenthesis.
let query = Query::new(&language, r#"(tuple _ @last . ")" .) @match"#).unwrap();
assert_query_matches(
&language,
&query,
source,
&[
(0, vec![("match", "(a, b)"), ("last", "b")]),
(0, vec![("match", "(c, d,)"), ("last", ",")]),
],
);
}

View file

@ -81,7 +81,7 @@ typedef struct {
* for the entire top-level pattern. When iterating through a query's
* captures using `ts_query_cursor_next_capture`, this field is used to
* detect that a capture can safely be returned from a match that has not
* even completed yet.
* even completed yet.
*/
typedef struct {
TSSymbol symbol;
@ -174,7 +174,8 @@ typedef struct {
* list of captures from the `CaptureListPool`.
* - `seeking_immediate_match` - A flag that indicates that the state's next
* step must be matched by the very next sibling. This is used when
* processing repetitions.
* processing repetitions, or when processing a wildcard node followed by
* an anchor.
* - `has_in_progress_alternatives` - A flag that indicates that there is are
* other states that have the same captures as this state, but are at
* different steps in their pattern. This means that in order to obey the
@ -3919,7 +3920,6 @@ static inline bool ts_query_cursor__advance(
// Advance this state to the next step of its pattern.
state->step_index++;
state->seeking_immediate_match = false;
LOG(
" advance state. pattern:%u, step:%u\n",
state->pattern_index,
@ -3927,6 +3927,21 @@ static inline bool ts_query_cursor__advance(
);
QueryStep *next_step = &self->query->steps.contents[state->step_index];
// For a given step, if the current symbol is the wildcard symbol, `_`, and it is **not**
// named, meaning it should capture anonymous nodes, **and** the next step is immediate,
// we reuse the `seeking_immediate_match` flag to indicate that we are looking for an
// immediate match due to an unnamed wildcard symbol.
//
// The reason for this is that typically, anchors will not consider anonymous nodes,
// but we're special casing the wildcard symbol to allow for any immediate matches,
// regardless of whether they are named or not.
if (step->symbol == WILDCARD_SYMBOL && !step->is_named && next_step->is_immediate) {
state->seeking_immediate_match = true;
} else {
state->seeking_immediate_match = false;
}
if (stop_on_definite_step && next_step->root_pattern_guaranteed) did_match = true;
// If this state's next step has an alternative step, then copy the state in order