Fix handling of alternations and optional nodes in query analysis
This commit is contained in:
parent
9daec9cb22
commit
456b1f6771
3 changed files with 190 additions and 52 deletions
|
|
@ -1,11 +1,17 @@
|
|||
use super::helpers::allocations;
|
||||
use super::helpers::fixtures::get_language;
|
||||
use lazy_static::lazy_static;
|
||||
use std::env;
|
||||
use std::fmt::Write;
|
||||
use tree_sitter::{
|
||||
Language, Node, Parser, Query, QueryCapture, QueryCursor, QueryError, QueryMatch,
|
||||
QueryPredicate, QueryPredicateArg, QueryProperty,
|
||||
};
|
||||
|
||||
lazy_static! {
|
||||
static ref EXAMPLE_FILTER: Option<String> = env::var("TREE_SITTER_TEST_EXAMPLE_FILTER").ok();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_query_errors_on_invalid_syntax() {
|
||||
allocations::record(|| {
|
||||
|
|
@ -234,6 +240,34 @@ fn test_query_errors_on_impossible_patterns() {
|
|||
.join("\n")
|
||||
))
|
||||
);
|
||||
|
||||
Query::new(
|
||||
js_lang,
|
||||
"[
|
||||
(function (identifier))
|
||||
(function_declaration (identifier))
|
||||
(generator_function_declaration (identifier))
|
||||
]",
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
Query::new(
|
||||
js_lang,
|
||||
"[
|
||||
(function (identifier))
|
||||
(function_declaration (object))
|
||||
(generator_function_declaration (identifier))
|
||||
]",
|
||||
),
|
||||
Err(QueryError::Structure(
|
||||
3,
|
||||
[
|
||||
" (function_declaration (object))", //
|
||||
" ^",
|
||||
]
|
||||
.join("\n")
|
||||
))
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
|
|
@ -2322,37 +2356,92 @@ fn test_query_alternative_predicate_prefix() {
|
|||
fn test_query_step_is_definite() {
|
||||
struct Row {
|
||||
language: Language,
|
||||
description: &'static str,
|
||||
pattern: &'static str,
|
||||
results_by_substring: &'static [(&'static str, bool)],
|
||||
}
|
||||
|
||||
let rows = &[
|
||||
Row {
|
||||
description: "no definite steps",
|
||||
language: get_language("python"),
|
||||
pattern: r#"(expression_statement (string))"#,
|
||||
results_by_substring: &[("expression_statement", false), ("string", false)],
|
||||
},
|
||||
Row {
|
||||
language: get_language("javascript"),
|
||||
pattern: r#"(expression_statement (string))"#,
|
||||
results_by_substring: &[("expression_statement", false), ("string", false)],
|
||||
},
|
||||
Row {
|
||||
description: "all definite steps",
|
||||
language: get_language("javascript"),
|
||||
pattern: r#"(object "{" "}")"#,
|
||||
results_by_substring: &[("object", false), ("{", true), ("}", true)],
|
||||
},
|
||||
Row {
|
||||
description: "an indefinite step that is optional",
|
||||
language: get_language("javascript"),
|
||||
pattern: r#"(object "{" (identifier)? @foo "}")"#,
|
||||
results_by_substring: &[
|
||||
("object", false),
|
||||
("{", true),
|
||||
("(identifier)?", false),
|
||||
("}", true),
|
||||
],
|
||||
},
|
||||
Row {
|
||||
description: "multiple indefinite steps that are optional",
|
||||
language: get_language("javascript"),
|
||||
pattern: r#"(object "{" (identifier)? @id1 ("," (identifier) @id2)? "}")"#,
|
||||
results_by_substring: &[
|
||||
("object", false),
|
||||
("{", true),
|
||||
("(identifier)? @id1", false),
|
||||
("\",\"", false),
|
||||
("}", true),
|
||||
],
|
||||
},
|
||||
Row {
|
||||
description: "definite step after indefinite step",
|
||||
language: get_language("javascript"),
|
||||
pattern: r#"(pair (property_identifier) ":")"#,
|
||||
results_by_substring: &[("pair", false), ("property_identifier", false), (":", true)],
|
||||
},
|
||||
Row {
|
||||
description: "indefinite step in between two definite steps",
|
||||
language: get_language("javascript"),
|
||||
pattern: r#"(object "{" (_) "}")"#,
|
||||
results_by_substring: &[("object", false), ("{", false), ("", false), ("}", true)],
|
||||
pattern: r#"(ternary_expression
|
||||
condition: (_)
|
||||
"?"
|
||||
consequence: (call_expression)
|
||||
":"
|
||||
alternative: (_))"#,
|
||||
results_by_substring: &[
|
||||
("condition:", false),
|
||||
("\"?\"", false),
|
||||
("consequence:", false),
|
||||
("\":\"", true),
|
||||
("alternative:", true),
|
||||
],
|
||||
},
|
||||
Row {
|
||||
description: "one definite step after a repetition",
|
||||
language: get_language("javascript"),
|
||||
pattern: r#"(object "{" (_) "}")"#,
|
||||
results_by_substring: &[("object", false), ("{", false), ("(_)", false), ("}", true)],
|
||||
},
|
||||
Row {
|
||||
description: "definite steps after multiple repetitions",
|
||||
language: get_language("json"),
|
||||
pattern: r#"(object "{" (pair) "," (pair) "," (_) "}")"#,
|
||||
results_by_substring: &[
|
||||
("object", false),
|
||||
("{", false),
|
||||
("(pair) \",\" (pair)", false),
|
||||
("(pair) \",\" (_)", false),
|
||||
("\",\" (_)", false),
|
||||
("(_)", true),
|
||||
("}", true),
|
||||
],
|
||||
},
|
||||
Row {
|
||||
description: "a definite with a field",
|
||||
language: get_language("javascript"),
|
||||
pattern: r#"(binary_expression left: (identifier) right: (_))"#,
|
||||
results_by_substring: &[
|
||||
|
|
@ -2362,6 +2451,7 @@ fn test_query_step_is_definite() {
|
|||
],
|
||||
},
|
||||
Row {
|
||||
description: "multiple definite steps with fields",
|
||||
language: get_language("javascript"),
|
||||
pattern: r#"(function_declaration name: (identifier) body: (statement_block))"#,
|
||||
results_by_substring: &[
|
||||
|
|
@ -2371,6 +2461,7 @@ fn test_query_step_is_definite() {
|
|||
],
|
||||
},
|
||||
Row {
|
||||
description: "nesting, one definite step",
|
||||
language: get_language("javascript"),
|
||||
pattern: r#"
|
||||
(function_declaration
|
||||
|
|
@ -2386,6 +2477,7 @@ fn test_query_step_is_definite() {
|
|||
],
|
||||
},
|
||||
Row {
|
||||
description: "definite step after some deeply nested hidden nodes",
|
||||
language: get_language("ruby"),
|
||||
pattern: r#"
|
||||
(singleton_class
|
||||
|
|
@ -2399,6 +2491,7 @@ fn test_query_step_is_definite() {
|
|||
],
|
||||
},
|
||||
Row {
|
||||
description: "nesting, no definite steps",
|
||||
language: get_language("javascript"),
|
||||
pattern: r#"
|
||||
(call_expression
|
||||
|
|
@ -2409,6 +2502,7 @@ fn test_query_step_is_definite() {
|
|||
results_by_substring: &[("property_identifier", false), ("template_string", false)],
|
||||
},
|
||||
Row {
|
||||
description: "a definite step after a nested node",
|
||||
language: get_language("javascript"),
|
||||
pattern: r#"
|
||||
(subscript_expression
|
||||
|
|
@ -2424,6 +2518,7 @@ fn test_query_step_is_definite() {
|
|||
],
|
||||
},
|
||||
Row {
|
||||
description: "a step that is indefinite due to a predicate",
|
||||
language: get_language("javascript"),
|
||||
pattern: r#"
|
||||
(subscript_expression
|
||||
|
|
@ -2439,17 +2534,45 @@ fn test_query_step_is_definite() {
|
|||
("[", true),
|
||||
],
|
||||
},
|
||||
Row {
|
||||
description: "alternation where one branch has definite steps",
|
||||
language: get_language("javascript"),
|
||||
pattern: r#"
|
||||
[
|
||||
(unary_expression (identifier))
|
||||
(call_expression
|
||||
function: (_)
|
||||
arguments: (_))
|
||||
(binary_expression right:(call_expression))
|
||||
]
|
||||
"#,
|
||||
results_by_substring: &[
|
||||
("identifier", false),
|
||||
("right:", false),
|
||||
("function:", true),
|
||||
("arguments:", true),
|
||||
],
|
||||
},
|
||||
];
|
||||
|
||||
allocations::record(|| {
|
||||
eprintln!("");
|
||||
|
||||
for row in rows.iter() {
|
||||
if let Some(filter) = EXAMPLE_FILTER.as_ref() {
|
||||
if !row.description.contains(filter.as_str()) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
eprintln!(" query example: {:?}", row.description);
|
||||
let query = Query::new(row.language, row.pattern).unwrap();
|
||||
for (substring, is_definite) in row.results_by_substring {
|
||||
let offset = row.pattern.find(substring).unwrap();
|
||||
assert_eq!(
|
||||
query.step_is_definite(offset),
|
||||
*is_definite,
|
||||
"Pattern: {:?}, substring: {:?}, expected is_definite to be {}",
|
||||
"Description: {}, Pattern: {:?}, substring: {:?}, expected is_definite to be {}",
|
||||
row.description,
|
||||
row.pattern
|
||||
.split_ascii_whitespace()
|
||||
.collect::<Vec<_>>()
|
||||
|
|
|
|||
|
|
@ -1144,34 +1144,18 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) {
|
|||
next_states = _states;
|
||||
}
|
||||
|
||||
// A query step is definite if the containing pattern will definitely match
|
||||
// once the step is reached. In other words, a step is *not* definite if
|
||||
// it's possible to create a syntax node that matches up to until that step,
|
||||
// but does not match the entire pattern.
|
||||
uint32_t child_step_index = parent_step_index + 1;
|
||||
QueryStep *child_step = &self->steps.contents[child_step_index];
|
||||
while (child_step->depth == parent_depth + 1) {
|
||||
// Check if there is any way for the pattern to reach this step, but fail
|
||||
// to reach the end of the sub-pattern.
|
||||
for (unsigned k = 0; k < final_step_indices.size; k++) {
|
||||
uint32_t final_step_index = final_step_indices.contents[k];
|
||||
if (
|
||||
final_step_index >= child_step_index &&
|
||||
self->steps.contents[final_step_index].depth == child_step->depth
|
||||
) {
|
||||
child_step->is_definite = false;
|
||||
break;
|
||||
}
|
||||
// Mark as indefinite any step where a match terminated.
|
||||
// Later, this property will be propagated to all of the step's predecessors.
|
||||
for (unsigned j = 0; j < final_step_indices.size; j++) {
|
||||
uint32_t final_step_index = final_step_indices.contents[j];
|
||||
QueryStep *step = &self->steps.contents[final_step_index];
|
||||
if (
|
||||
step->depth != PATTERN_DONE_MARKER &&
|
||||
step->depth > parent_depth &&
|
||||
!step->is_dead_end
|
||||
) {
|
||||
step->is_definite = false;
|
||||
}
|
||||
|
||||
// Advance to the next child step in this sub-pattern.
|
||||
do {
|
||||
child_step_index++;
|
||||
child_step++;
|
||||
} while (
|
||||
child_step->depth != PATTERN_DONE_MARKER &&
|
||||
child_step->depth > parent_depth + 1
|
||||
);
|
||||
}
|
||||
|
||||
// If this pattern cannot match, store the pattern index so that it can be
|
||||
|
|
@ -1187,9 +1171,7 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) {
|
|||
}
|
||||
}
|
||||
|
||||
// In order for a step to be definite, all of its child steps must be definite,
|
||||
// and all of its later sibling steps must be definite. Propagate any indefiniteness
|
||||
// upward and backward through the pattern trees.
|
||||
// Mark as indefinite any step with captures that are used in predicates.
|
||||
Array(uint16_t) predicate_capture_ids = array_new();
|
||||
for (unsigned i = 0; i < self->patterns.size; i++) {
|
||||
QueryPattern *pattern = &self->patterns.contents[i];
|
||||
|
|
@ -1207,16 +1189,13 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) {
|
|||
}
|
||||
}
|
||||
|
||||
bool all_later_children_definite = true;
|
||||
// Find all of the steps that have these captures.
|
||||
for (
|
||||
unsigned start = pattern->steps.offset,
|
||||
end = start + pattern->steps.length,
|
||||
j = end - 1; j + 1 > start; j--
|
||||
j = start; j < end; j++
|
||||
) {
|
||||
QueryStep *step = &self->steps.contents[j];
|
||||
|
||||
// If this step has a capture that is used in a predicate,
|
||||
// then it is not definite.
|
||||
for (unsigned k = 0; k < MAX_STEP_CAPTURE_COUNT; k++) {
|
||||
uint16_t capture_id = step->capture_ids[k];
|
||||
if (capture_id == NONE) break;
|
||||
|
|
@ -1227,10 +1206,41 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) {
|
|||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If a step is not definite, then none of its predecessors can be definite.
|
||||
if (!all_later_children_definite) step->is_definite = false;
|
||||
if (!step->is_definite) all_later_children_definite = false;
|
||||
// Propagate indefiniteness backwards.
|
||||
bool done = self->steps.size == 0;
|
||||
while (!done) {
|
||||
done = true;
|
||||
for (unsigned i = self->steps.size - 1; i > 0; i--) {
|
||||
QueryStep *step = &self->steps.contents[i];
|
||||
|
||||
// Determine if this step is definite or has definite alternatives.
|
||||
bool is_definite = false;
|
||||
for (;;) {
|
||||
if (step->is_definite) {
|
||||
is_definite = true;
|
||||
break;
|
||||
}
|
||||
if (step->alternative_index == NONE || step->alternative_index < i) {
|
||||
break;
|
||||
}
|
||||
step = &self->steps.contents[step->alternative_index];
|
||||
}
|
||||
|
||||
// If not, mark its predecessor as indefinite.
|
||||
if (!is_definite) {
|
||||
QueryStep *prev_step = &self->steps.contents[i - 1];
|
||||
if (
|
||||
!prev_step->is_dead_end &&
|
||||
prev_step->depth != PATTERN_DONE_MARKER &&
|
||||
prev_step->is_definite
|
||||
) {
|
||||
prev_step->is_definite = false;
|
||||
done = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1242,11 +1252,12 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) {
|
|||
printf(" %u: DONE\n", i);
|
||||
} else {
|
||||
printf(
|
||||
" %u: {symbol: %s, is_definite: %d}\n",
|
||||
" %u: {symbol: %s, field: %s, is_definite: %d}\n",
|
||||
i,
|
||||
(step->symbol == WILDCARD_SYMBOL || step->symbol == NAMED_WILDCARD_SYMBOL)
|
||||
? "ANY"
|
||||
: ts_language_symbol_name(self->language, step->symbol),
|
||||
(step->field ? ts_language_field_name_for_id(self->language, step->field) : "-"),
|
||||
step->is_definite
|
||||
);
|
||||
}
|
||||
|
|
@ -1979,7 +1990,7 @@ bool ts_query_step_is_definite(
|
|||
uint32_t step_index = UINT32_MAX;
|
||||
for (unsigned i = 0; i < self->step_offsets.size; i++) {
|
||||
StepOffset *step_offset = &self->step_offsets.contents[i];
|
||||
if (step_offset->byte_offset >= byte_offset) break;
|
||||
if (step_offset->byte_offset > byte_offset) break;
|
||||
step_index = step_offset->step_index;
|
||||
}
|
||||
if (step_index < self->steps.size) {
|
||||
|
|
|
|||
12
script/test
12
script/test
|
|
@ -83,10 +83,14 @@ done
|
|||
|
||||
shift $(expr $OPTIND - 1)
|
||||
|
||||
if [[ -n $TREE_SITTER_TEST_LANGUAGE_FILTER || -n $TREE_SITTER_TEST_EXAMPLE_FILTER || -n $TREE_SITTER_TEST_TRIAL_FILTER ]]; then
|
||||
top_level_filter=corpus
|
||||
else
|
||||
top_level_filter=$1
|
||||
top_level_filter=$1
|
||||
|
||||
if [[ \
|
||||
-n $TREE_SITTER_TEST_LANGUAGE_FILTER || \
|
||||
-n $TREE_SITTER_TEST_EXAMPLE_FILTER || \
|
||||
-n $TREE_SITTER_TEST_TRIAL_FILTER \
|
||||
]]; then
|
||||
echo ${top_level_filter:=corpus}
|
||||
fi
|
||||
|
||||
if [[ "${mode}" == "debug" ]]; then
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue