feat: support querying missing nodes

Co-authored-by: Amaan Qureshi <amaanq12@gmail.com>
This commit is contained in:
Riley Bruins 2024-12-14 11:57:36 -08:00 committed by GitHub
parent cd94dbd57f
commit 495fe2a6c5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 190 additions and 7 deletions

View file

@ -99,8 +99,8 @@ test:
cargo xtask generate-fixtures
cargo xtask test
test_wasm:
cargo xtask generate-fixtures-wasm
test-wasm:
cargo xtask generate-fixtures --wasm
cargo xtask test-wasm
lint:
@ -115,4 +115,4 @@ format:
changelog:
@git-cliff --config .github/cliff.toml --prepend CHANGELOG.md --latest --github-token $(shell gh auth token)
.PHONY: test test_wasm lint format changelog
.PHONY: test test-wasm lint format changelog

View file

@ -193,6 +193,36 @@ fn test_query_errors_on_invalid_syntax() {
]
.join("\n")
);
// MISSING keyword with full pattern
assert_eq!(
Query::new(
&get_language("c"),
r"(MISSING (function_declarator (identifier))) "
)
.unwrap_err()
.message,
[
r"(MISSING (function_declarator (identifier))) ",
r" ^",
]
.join("\n")
);
// MISSING keyword with multiple identifiers
assert_eq!(
Query::new(
&get_language("c"),
r"(MISSING function_declarator function_declarator) "
)
.unwrap_err()
.message,
[
r"(MISSING function_declarator function_declarator) ",
r" ^",
]
.join("\n")
);
});
}
@ -767,6 +797,74 @@ fn test_query_matches_capturing_error_nodes() {
});
}
#[test]
fn test_query_matches_capturing_missing_nodes() {
allocations::record(|| {
let language = get_language("javascript");
let query = Query::new(
&language,
r#"
(MISSING
; Comments should be valid
) @missing
(MISSING
; Comments should be valid
";"
; Comments should be valid
) @missing-semicolon
"#,
)
.unwrap();
// Missing anonymous nodes
assert_query_matches(
&language,
&query,
"
x = function(a) { b; } function(c) { d; }
// ^ MISSING semicolon here
",
&[
(0, vec![("missing", "")]),
(1, vec![("missing-semicolon", "")]),
],
);
let language = get_language("c");
let query = Query::new(
&language,
"(MISSING field_identifier) @missing-field-ident
(MISSING identifier) @missing-ident
(MISSING) @missing-anything",
)
.unwrap();
// Missing named nodes
assert_query_matches(
&language,
&query,
"
int main() {
if (a.) {
// ^ MISSING field_identifier here
b();
c();
if (*) d();
// ^ MISSING identifier here
}
}
",
&[
(0, vec![("missing-field-ident", "")]),
(2, vec![("missing-anything", "")]),
(1, vec![("missing-ident", "")]),
(2, vec![("missing-anything", "")]),
],
);
});
}
#[test]
fn test_query_matches_with_extra_children() {
allocations::record(|| {

View file

@ -617,6 +617,29 @@ For example, this pattern would match any node inside a call:
(call (_) @call.inner)
```
#### Special Nodes
When the parser encounters text it does not recognize, it represents this node
as `(ERROR)` in the syntax tree. These error nodes can be queried just like
normal nodes:
```scheme
(ERROR) @error-node
```
Similarly, if a parser is able to recover from erroneous text by inserting a missing token and then reducing, it will insert that missing node in the final tree so long as that tree has the lowest error cost. These missing nodes appear as seemingly normal nodes in the tree, but they are zero tokens wide, and are a property of the actual terminal node that was inserted, instead of being its own kind of node. These special missing nodes can be queried using `(MISSING)`:
```scheme
(MISSING) @missing-node
```
This is useful when attempting to detect all syntax errors in a given parse tree, since these missing node are not captured by `(ERROR)` queries. Specific missing node types can also be queried:
```scheme
(MISSING identifier) @missing-identifier
(MISSING ";") @missing-semicolon
```
#### Anchors
The anchor operator, `.`, is used to constrain the ways in which child patterns are matched. It has different behaviors depending on where it's placed inside a query.

View file

@ -100,6 +100,7 @@ typedef struct {
bool contains_captures: 1;
bool root_pattern_guaranteed: 1;
bool parent_pattern_guaranteed: 1;
bool is_missing: 1;
} QueryStep;
/*
@ -2313,6 +2314,7 @@ static TSQueryError ts_query__parse_pattern(
// Otherwise, this parenthesis is the start of a named node.
else {
TSSymbol symbol;
bool is_missing = false;
// Parse a normal node name
if (stream_is_ident_start(stream)) {
@ -2323,6 +2325,51 @@ static TSQueryError ts_query__parse_pattern(
// Parse the wildcard symbol
if (length == 1 && node_name[0] == '_') {
symbol = WILDCARD_SYMBOL;
} else if (!strncmp(node_name, "MISSING", length)) {
is_missing = true;
stream_skip_whitespace(stream);
if (stream_is_ident_start(stream)) {
const char *missing_node_name = stream->input;
stream_scan_identifier(stream);
uint32_t missing_node_length = (uint32_t)(stream->input - missing_node_name);
symbol = ts_language_symbol_for_name(
self->language,
missing_node_name,
missing_node_length,
true
);
if (!symbol) {
stream_reset(stream, missing_node_name);
return TSQueryErrorNodeType;
}
}
else if (stream->next == '"') {
const char *string_start = stream->input;
TSQueryError e = ts_query__parse_string_literal(self, stream);
if (e) return e;
symbol = ts_language_symbol_for_name(
self->language,
self->string_buffer.contents,
self->string_buffer.size,
false
);
if (!symbol) {
stream_reset(stream, string_start + 1);
return TSQueryErrorNodeType;
}
}
else if (stream->next == ')') {
symbol = WILDCARD_SYMBOL;
}
else {
stream_reset(stream, stream->input);
return TSQueryErrorSyntax;
}
}
else {
@ -2348,6 +2395,9 @@ static TSQueryError ts_query__parse_pattern(
step->supertype_symbol = step->symbol;
step->symbol = WILDCARD_SYMBOL;
}
if (is_missing) {
step->is_missing = true;
}
if (symbol == WILDCARD_SYMBOL) {
step->is_named = true;
}
@ -3641,6 +3691,7 @@ static inline bool ts_query_cursor__advance(
if (self->on_visible_node) {
TSSymbol symbol = ts_node_symbol(node);
bool is_named = ts_node_is_named(node);
bool is_missing = ts_node_is_missing(node);
bool has_later_siblings;
bool has_later_named_siblings;
bool can_have_later_siblings_with_this_field;
@ -3737,9 +3788,13 @@ static inline bool ts_query_cursor__advance(
// pattern.
bool node_does_match = false;
if (step->symbol == WILDCARD_SYMBOL) {
node_does_match = !node_is_error && (is_named || !step->is_named);
if (step->is_missing) {
node_does_match = is_missing;
} else {
node_does_match = !node_is_error && (is_named || !step->is_named);
}
} else {
node_does_match = symbol == step->symbol;
node_does_match = symbol == step->symbol && (!step->is_missing || is_missing);
}
bool later_sibling_can_match = has_later_siblings;
if ((step->is_immediate && is_named) || state->seeking_immediate_match) {

View file

@ -122,8 +122,15 @@ pub fn run_wasm() -> Result<()> {
bail_on_err(&output, "Failed to install test dependencies")?;
}
let output = Command::new(npm).arg("test").output()?;
bail_on_err(&output, &format!("Failed to run {npm} test"))?;
let child = Command::new(npm).arg("test").spawn()?;
let output = child.wait_with_output()?;
bail_on_err(&output, &format!("Failed to run `{npm} test`"))?;
// Display test results
let output = String::from_utf8_lossy(&output.stdout);
for line in output.lines() {
println!("{line}");
}
Ok(())
}