From 341665824c52b3c9259cfaea1fc179f615dccf3a Mon Sep 17 00:00:00 2001 From: Amaan Qureshi Date: Fri, 26 Sep 2025 16:58:10 -0400 Subject: [PATCH] fix(lib): validate subtypes in supertype queries --- crates/cli/src/tests/query_test.rs | 57 +++++++++++++++++-- docs/src/using-parsers/queries/1-syntax.md | 6 ++ .../queries/3-predicates-and-directives.md | 2 +- lib/src/query.c | 49 ++++++++++++++-- 4 files changed, 104 insertions(+), 10 deletions(-) diff --git a/crates/cli/src/tests/query_test.rs b/crates/cli/src/tests/query_test.rs index fdbdaab8..e70dd26b 100644 --- a/crates/cli/src/tests/query_test.rs +++ b/crates/cli/src/tests/query_test.rs @@ -416,11 +416,11 @@ fn test_query_errors_on_impossible_patterns() { Err(QueryError { kind: QueryErrorKind::Structure, row: 0, - offset: 51, - column: 51, + offset: 37, + column: 37, message: [ "(binary_expression left: (expression (identifier)) left: (expression (identifier)))", - " ^", + " ^", ] .join("\n"), }) @@ -5773,7 +5773,7 @@ fn test_query_assertion_on_unreachable_node_with_child() { // A query that tries to capture the `await` token in the `await_binding` rule // should not cause an assertion failure during query analysis. let grammar = r#" -module.exports = grammar({ +export default grammar({ name: "query_assertion_crash", rules: { @@ -5820,3 +5820,52 @@ module.exports = grammar({ } ); } + +#[test] +fn test_query_supertype_with_anonymous_node() { + let grammar = r#" +export default grammar({ + name: "supertype_anonymous_test", + + extras: $ => [/\s/, $.comment], + + supertypes: $ => [$.expression], + + word: $ => $.identifier, + + rules: { + source_file: $ => repeat($.expression), + + expression: $ => choice( + $.function_call, + '()' // an empty tuple, which should be queryable with the supertype syntax + ), + + function_call: $ => seq($.identifier, '()'), + + identifier: _ => /[a-zA-Z_][a-zA-Z0-9_]*/, + + comment: _ => token(seq('//', /.*/)), + } +}); + "#; + + let file = tempfile::NamedTempFile::with_suffix(".js").unwrap(); + std::fs::write(file.path(), grammar).unwrap(); + + let grammar_json = load_grammar_file(file.path(), None).unwrap(); + + let (parser_name, parser_code) = generate_parser(&grammar_json).unwrap(); + + let language = get_test_language(&parser_name, &parser_code, None); + + let query_result = Query::new(&language, r#"(expression/"()") @tuple"#); + + assert!(query_result.is_ok()); + + let query = query_result.unwrap(); + + let source = "foo()\n()"; + + assert_query_matches(&language, &query, source, &[(0, vec![("tuple", "()")])]); +} diff --git a/docs/src/using-parsers/queries/1-syntax.md b/docs/src/using-parsers/queries/1-syntax.md index a12cec70..0f02be61 100644 --- a/docs/src/using-parsers/queries/1-syntax.md +++ b/docs/src/using-parsers/queries/1-syntax.md @@ -115,6 +115,12 @@ match a `binary_expression` only if it is a child of `expression`: (expression/binary_expression) @binary-expression ``` +This also applies to anonymous nodes. For example, this pattern would match `"()"` only if it is a child of `expression`: + +```query +(expression/"()") @empty-expression +``` + [grammar]: ../../creating-parsers/3-writing-the-grammar.md#structuring-rules-well [node-field-names]: ../2-basic-parsing.md#node-field-names [named-vs-anonymous-nodes]: ../2-basic-parsing.md#named-vs-anonymous-nodes diff --git a/docs/src/using-parsers/queries/3-predicates-and-directives.md b/docs/src/using-parsers/queries/3-predicates-and-directives.md index a059a3ba..88e01e01 100644 --- a/docs/src/using-parsers/queries/3-predicates-and-directives.md +++ b/docs/src/using-parsers/queries/3-predicates-and-directives.md @@ -140,7 +140,7 @@ see fit. ```query ((comment) @injection.content - (#lua-match? @injection.content "/[*\/][!*\/]steps.size; i++) { QueryStep *step = array_get(&self->steps, i); if (step->depth == PATTERN_DONE_MARKER) { @@ -1510,8 +1511,45 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { has_children = true; } - if (has_children && !is_wildcard) { - array_push(&parent_step_indices, i); + if (has_children) { + if (!is_wildcard) { + array_push(&parent_step_indices, i); + } else if (step->supertype_symbol && self->language->abi_version >= LANGUAGE_VERSION_WITH_RESERVED_WORDS) { + // Look at the child steps to see if any aren't valid subtypes for this supertype. + uint32_t subtype_length; + const TSSymbol *subtypes = ts_language_subtypes( + self->language, + step->supertype_symbol, + &subtype_length + ); + + for (unsigned j = i + 1; j < self->steps.size; j++) { + QueryStep *child_step = array_get(&self->steps, j); + if (child_step->depth == PATTERN_DONE_MARKER || child_step->depth <= step->depth) { + break; + } + if (child_step->depth == step->depth + 1 && child_step->symbol != WILDCARD_SYMBOL) { + bool is_valid_subtype = false; + for (uint32_t k = 0; k < subtype_length; k++) { + if (child_step->symbol == subtypes[k]) { + is_valid_subtype = true; + break; + } + } + + if (!is_valid_subtype) { + for (unsigned offset_idx = 0; offset_idx < self->step_offsets.size; offset_idx++) { + StepOffset *step_offset = array_get(&self->step_offsets, offset_idx); + if (step_offset->step_index >= j) { + *error_offset = step_offset->byte_offset; + all_patterns_are_valid = false; + goto supertype_cleanup; + } + } + } + } + } + } } } @@ -1684,7 +1722,6 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { // For each non-terminal pattern, determine if the pattern can successfully match, // and identify all of the possible children within the pattern where matching could fail. - bool all_patterns_are_valid = true; QueryAnalysis analysis = query_analysis__new(); for (unsigned i = 0; i < parent_step_indices.size; i++) { uint16_t parent_step_index = *array_get(&parent_step_indices, i); @@ -1962,11 +1999,13 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { array_delete(&subgraphs); query_analysis__delete(&analysis); array_delete(&next_nodes); - array_delete(&non_rooted_pattern_start_steps); - array_delete(&parent_step_indices); array_delete(&predicate_capture_ids); state_predecessor_map_delete(&predecessor_map); +supertype_cleanup: + array_delete(&non_rooted_pattern_start_steps); + array_delete(&parent_step_indices); + return all_patterns_are_valid; }