From 495fe2a6c54c0834b741f190d24bf174f783f106 Mon Sep 17 00:00:00 2001 From: Riley Bruins Date: Sat, 14 Dec 2024 11:57:36 -0800 Subject: [PATCH] feat: support querying missing nodes Co-authored-by: Amaan Qureshi --- Makefile | 6 +- cli/src/tests/query_test.rs | 98 +++++++++++++++++++++++++++++++++ docs/section-2-using-parsers.md | 23 ++++++++ lib/src/query.c | 59 +++++++++++++++++++- xtask/src/test.rs | 11 +++- 5 files changed, 190 insertions(+), 7 deletions(-) diff --git a/Makefile b/Makefile index 7b3ae4a5..83c2f814 100644 --- a/Makefile +++ b/Makefile @@ -99,8 +99,8 @@ test: cargo xtask generate-fixtures cargo xtask test -test_wasm: - cargo xtask generate-fixtures-wasm +test-wasm: + cargo xtask generate-fixtures --wasm cargo xtask test-wasm lint: @@ -115,4 +115,4 @@ format: changelog: @git-cliff --config .github/cliff.toml --prepend CHANGELOG.md --latest --github-token $(shell gh auth token) -.PHONY: test test_wasm lint format changelog +.PHONY: test test-wasm lint format changelog diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index e1433c97..32b6136f 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -193,6 +193,36 @@ fn test_query_errors_on_invalid_syntax() { ] .join("\n") ); + + // MISSING keyword with full pattern + assert_eq!( + Query::new( + &get_language("c"), + r"(MISSING (function_declarator (identifier))) " + ) + .unwrap_err() + .message, + [ + r"(MISSING (function_declarator (identifier))) ", + r" ^", + ] + .join("\n") + ); + + // MISSING keyword with multiple identifiers + assert_eq!( + Query::new( + &get_language("c"), + r"(MISSING function_declarator function_declarator) " + ) + .unwrap_err() + .message, + [ + r"(MISSING function_declarator function_declarator) ", + r" ^", + ] + .join("\n") + ); }); } @@ -767,6 +797,74 @@ fn test_query_matches_capturing_error_nodes() { }); } +#[test] +fn test_query_matches_capturing_missing_nodes() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new( + &language, + r#" + (MISSING + ; Comments should be valid + ) @missing + (MISSING + ; Comments should be valid + ";" + ; Comments should be valid + ) @missing-semicolon + "#, + ) + .unwrap(); + + // Missing anonymous nodes + assert_query_matches( + &language, + &query, + " + x = function(a) { b; } function(c) { d; } + // ^ MISSING semicolon here + ", + &[ + (0, vec![("missing", "")]), + (1, vec![("missing-semicolon", "")]), + ], + ); + + let language = get_language("c"); + let query = Query::new( + &language, + "(MISSING field_identifier) @missing-field-ident + (MISSING identifier) @missing-ident + (MISSING) @missing-anything", + ) + .unwrap(); + + // Missing named nodes + assert_query_matches( + &language, + &query, + " + int main() { + if (a.) { + // ^ MISSING field_identifier here + b(); + c(); + + if (*) d(); + // ^ MISSING identifier here + } + } + ", + &[ + (0, vec![("missing-field-ident", "")]), + (2, vec![("missing-anything", "")]), + (1, vec![("missing-ident", "")]), + (2, vec![("missing-anything", "")]), + ], + ); + }); +} + #[test] fn test_query_matches_with_extra_children() { allocations::record(|| { diff --git a/docs/section-2-using-parsers.md b/docs/section-2-using-parsers.md index 46c39616..7b54a44e 100644 --- a/docs/section-2-using-parsers.md +++ b/docs/section-2-using-parsers.md @@ -617,6 +617,29 @@ For example, this pattern would match any node inside a call: (call (_) @call.inner) ``` +#### Special Nodes + +When the parser encounters text it does not recognize, it represents this node +as `(ERROR)` in the syntax tree. These error nodes can be queried just like +normal nodes: + +```scheme +(ERROR) @error-node +``` + +Similarly, if a parser is able to recover from erroneous text by inserting a missing token and then reducing, it will insert that missing node in the final tree so long as that tree has the lowest error cost. These missing nodes appear as seemingly normal nodes in the tree, but they are zero tokens wide, and are a property of the actual terminal node that was inserted, instead of being its own kind of node. These special missing nodes can be queried using `(MISSING)`: + +```scheme +(MISSING) @missing-node +``` + +This is useful when attempting to detect all syntax errors in a given parse tree, since these missing node are not captured by `(ERROR)` queries. Specific missing node types can also be queried: + +```scheme +(MISSING identifier) @missing-identifier +(MISSING ";") @missing-semicolon +``` + #### Anchors The anchor operator, `.`, is used to constrain the ways in which child patterns are matched. It has different behaviors depending on where it's placed inside a query. diff --git a/lib/src/query.c b/lib/src/query.c index 9a83254e..8b8d5529 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -100,6 +100,7 @@ typedef struct { bool contains_captures: 1; bool root_pattern_guaranteed: 1; bool parent_pattern_guaranteed: 1; + bool is_missing: 1; } QueryStep; /* @@ -2313,6 +2314,7 @@ static TSQueryError ts_query__parse_pattern( // Otherwise, this parenthesis is the start of a named node. else { TSSymbol symbol; + bool is_missing = false; // Parse a normal node name if (stream_is_ident_start(stream)) { @@ -2323,6 +2325,51 @@ static TSQueryError ts_query__parse_pattern( // Parse the wildcard symbol if (length == 1 && node_name[0] == '_') { symbol = WILDCARD_SYMBOL; + } else if (!strncmp(node_name, "MISSING", length)) { + is_missing = true; + stream_skip_whitespace(stream); + + if (stream_is_ident_start(stream)) { + const char *missing_node_name = stream->input; + stream_scan_identifier(stream); + uint32_t missing_node_length = (uint32_t)(stream->input - missing_node_name); + symbol = ts_language_symbol_for_name( + self->language, + missing_node_name, + missing_node_length, + true + ); + if (!symbol) { + stream_reset(stream, missing_node_name); + return TSQueryErrorNodeType; + } + } + + else if (stream->next == '"') { + const char *string_start = stream->input; + TSQueryError e = ts_query__parse_string_literal(self, stream); + if (e) return e; + + symbol = ts_language_symbol_for_name( + self->language, + self->string_buffer.contents, + self->string_buffer.size, + false + ); + if (!symbol) { + stream_reset(stream, string_start + 1); + return TSQueryErrorNodeType; + } + } + + else if (stream->next == ')') { + symbol = WILDCARD_SYMBOL; + } + + else { + stream_reset(stream, stream->input); + return TSQueryErrorSyntax; + } } else { @@ -2348,6 +2395,9 @@ static TSQueryError ts_query__parse_pattern( step->supertype_symbol = step->symbol; step->symbol = WILDCARD_SYMBOL; } + if (is_missing) { + step->is_missing = true; + } if (symbol == WILDCARD_SYMBOL) { step->is_named = true; } @@ -3641,6 +3691,7 @@ static inline bool ts_query_cursor__advance( if (self->on_visible_node) { TSSymbol symbol = ts_node_symbol(node); bool is_named = ts_node_is_named(node); + bool is_missing = ts_node_is_missing(node); bool has_later_siblings; bool has_later_named_siblings; bool can_have_later_siblings_with_this_field; @@ -3737,9 +3788,13 @@ static inline bool ts_query_cursor__advance( // pattern. bool node_does_match = false; if (step->symbol == WILDCARD_SYMBOL) { - node_does_match = !node_is_error && (is_named || !step->is_named); + if (step->is_missing) { + node_does_match = is_missing; + } else { + node_does_match = !node_is_error && (is_named || !step->is_named); + } } else { - node_does_match = symbol == step->symbol; + node_does_match = symbol == step->symbol && (!step->is_missing || is_missing); } bool later_sibling_can_match = has_later_siblings; if ((step->is_immediate && is_named) || state->seeking_immediate_match) { diff --git a/xtask/src/test.rs b/xtask/src/test.rs index 62abe37c..e8c4e86c 100644 --- a/xtask/src/test.rs +++ b/xtask/src/test.rs @@ -122,8 +122,15 @@ pub fn run_wasm() -> Result<()> { bail_on_err(&output, "Failed to install test dependencies")?; } - let output = Command::new(npm).arg("test").output()?; - bail_on_err(&output, &format!("Failed to run {npm} test"))?; + let child = Command::new(npm).arg("test").spawn()?; + let output = child.wait_with_output()?; + bail_on_err(&output, &format!("Failed to run `{npm} test`"))?; + + // Display test results + let output = String::from_utf8_lossy(&output.stdout); + for line in output.lines() { + println!("{line}"); + } Ok(()) }