fix(lib): validate subtypes in supertype queries

This commit is contained in:
Amaan Qureshi 2025-09-26 16:58:10 -04:00 committed by Amaan Qureshi
parent bd02be25d5
commit 341665824c
4 changed files with 104 additions and 10 deletions

View file

@ -416,11 +416,11 @@ fn test_query_errors_on_impossible_patterns() {
Err(QueryError {
kind: QueryErrorKind::Structure,
row: 0,
offset: 51,
column: 51,
offset: 37,
column: 37,
message: [
"(binary_expression left: (expression (identifier)) left: (expression (identifier)))",
" ^",
" ^",
]
.join("\n"),
})
@ -5773,7 +5773,7 @@ fn test_query_assertion_on_unreachable_node_with_child() {
// A query that tries to capture the `await` token in the `await_binding` rule
// should not cause an assertion failure during query analysis.
let grammar = r#"
module.exports = grammar({
export default grammar({
name: "query_assertion_crash",
rules: {
@ -5820,3 +5820,52 @@ module.exports = grammar({
}
);
}
#[test]
fn test_query_supertype_with_anonymous_node() {
let grammar = r#"
export default grammar({
name: "supertype_anonymous_test",
extras: $ => [/\s/, $.comment],
supertypes: $ => [$.expression],
word: $ => $.identifier,
rules: {
source_file: $ => repeat($.expression),
expression: $ => choice(
$.function_call,
'()' // an empty tuple, which should be queryable with the supertype syntax
),
function_call: $ => seq($.identifier, '()'),
identifier: _ => /[a-zA-Z_][a-zA-Z0-9_]*/,
comment: _ => token(seq('//', /.*/)),
}
});
"#;
let file = tempfile::NamedTempFile::with_suffix(".js").unwrap();
std::fs::write(file.path(), grammar).unwrap();
let grammar_json = load_grammar_file(file.path(), None).unwrap();
let (parser_name, parser_code) = generate_parser(&grammar_json).unwrap();
let language = get_test_language(&parser_name, &parser_code, None);
let query_result = Query::new(&language, r#"(expression/"()") @tuple"#);
assert!(query_result.is_ok());
let query = query_result.unwrap();
let source = "foo()\n()";
assert_query_matches(&language, &query, source, &[(0, vec![("tuple", "()")])]);
}

View file

@ -115,6 +115,12 @@ match a `binary_expression` only if it is a child of `expression`:
(expression/binary_expression) @binary-expression
```
This also applies to anonymous nodes. For example, this pattern would match `"()"` only if it is a child of `expression`:
```query
(expression/"()") @empty-expression
```
[grammar]: ../../creating-parsers/3-writing-the-grammar.md#structuring-rules-well
[node-field-names]: ../2-basic-parsing.md#node-field-names
[named-vs-anonymous-nodes]: ../2-basic-parsing.md#named-vs-anonymous-nodes

View file

@ -140,7 +140,7 @@ see fit.
```query
((comment) @injection.content
(#lua-match? @injection.content "/[*\/][!*\/]<?[^a-zA-Z]")
(#match? @injection.content "/[*\/][!*\/]<?[^a-zA-Z]")
(#set! injection.language "doxygen"))
```

View file

@ -1483,6 +1483,7 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) {
// basic information about each step. Mark all of the steps that contain
// captures, and record the indices of all of the steps that have child steps.
Array(uint32_t) parent_step_indices = array_new();
bool all_patterns_are_valid = true;
for (unsigned i = 0; i < self->steps.size; i++) {
QueryStep *step = array_get(&self->steps, i);
if (step->depth == PATTERN_DONE_MARKER) {
@ -1510,8 +1511,45 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) {
has_children = true;
}
if (has_children && !is_wildcard) {
array_push(&parent_step_indices, i);
if (has_children) {
if (!is_wildcard) {
array_push(&parent_step_indices, i);
} else if (step->supertype_symbol && self->language->abi_version >= LANGUAGE_VERSION_WITH_RESERVED_WORDS) {
// Look at the child steps to see if any aren't valid subtypes for this supertype.
uint32_t subtype_length;
const TSSymbol *subtypes = ts_language_subtypes(
self->language,
step->supertype_symbol,
&subtype_length
);
for (unsigned j = i + 1; j < self->steps.size; j++) {
QueryStep *child_step = array_get(&self->steps, j);
if (child_step->depth == PATTERN_DONE_MARKER || child_step->depth <= step->depth) {
break;
}
if (child_step->depth == step->depth + 1 && child_step->symbol != WILDCARD_SYMBOL) {
bool is_valid_subtype = false;
for (uint32_t k = 0; k < subtype_length; k++) {
if (child_step->symbol == subtypes[k]) {
is_valid_subtype = true;
break;
}
}
if (!is_valid_subtype) {
for (unsigned offset_idx = 0; offset_idx < self->step_offsets.size; offset_idx++) {
StepOffset *step_offset = array_get(&self->step_offsets, offset_idx);
if (step_offset->step_index >= j) {
*error_offset = step_offset->byte_offset;
all_patterns_are_valid = false;
goto supertype_cleanup;
}
}
}
}
}
}
}
}
@ -1684,7 +1722,6 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) {
// For each non-terminal pattern, determine if the pattern can successfully match,
// and identify all of the possible children within the pattern where matching could fail.
bool all_patterns_are_valid = true;
QueryAnalysis analysis = query_analysis__new();
for (unsigned i = 0; i < parent_step_indices.size; i++) {
uint16_t parent_step_index = *array_get(&parent_step_indices, i);
@ -1962,11 +1999,13 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) {
array_delete(&subgraphs);
query_analysis__delete(&analysis);
array_delete(&next_nodes);
array_delete(&non_rooted_pattern_start_steps);
array_delete(&parent_step_indices);
array_delete(&predicate_capture_ids);
state_predecessor_map_delete(&predecessor_map);
supertype_cleanup:
array_delete(&non_rooted_pattern_start_steps);
array_delete(&parent_step_indices);
return all_patterns_are_valid;
}