Fix handling of alternations and optional nodes in query analysis

This commit is contained in:
Max Brunsfeld 2020-08-20 16:28:54 -07:00
parent 9daec9cb22
commit 456b1f6771
3 changed files with 190 additions and 52 deletions

View file

@ -1,11 +1,17 @@
use super::helpers::allocations;
use super::helpers::fixtures::get_language;
use lazy_static::lazy_static;
use std::env;
use std::fmt::Write;
use tree_sitter::{
Language, Node, Parser, Query, QueryCapture, QueryCursor, QueryError, QueryMatch,
QueryPredicate, QueryPredicateArg, QueryProperty,
};
lazy_static! {
static ref EXAMPLE_FILTER: Option<String> = env::var("TREE_SITTER_TEST_EXAMPLE_FILTER").ok();
}
#[test]
fn test_query_errors_on_invalid_syntax() {
allocations::record(|| {
@ -234,6 +240,34 @@ fn test_query_errors_on_impossible_patterns() {
.join("\n")
))
);
Query::new(
js_lang,
"[
(function (identifier))
(function_declaration (identifier))
(generator_function_declaration (identifier))
]",
)
.unwrap();
assert_eq!(
Query::new(
js_lang,
"[
(function (identifier))
(function_declaration (object))
(generator_function_declaration (identifier))
]",
),
Err(QueryError::Structure(
3,
[
" (function_declaration (object))", //
" ^",
]
.join("\n")
))
);
});
}
@ -2322,37 +2356,92 @@ fn test_query_alternative_predicate_prefix() {
fn test_query_step_is_definite() {
struct Row {
language: Language,
description: &'static str,
pattern: &'static str,
results_by_substring: &'static [(&'static str, bool)],
}
let rows = &[
Row {
description: "no definite steps",
language: get_language("python"),
pattern: r#"(expression_statement (string))"#,
results_by_substring: &[("expression_statement", false), ("string", false)],
},
Row {
language: get_language("javascript"),
pattern: r#"(expression_statement (string))"#,
results_by_substring: &[("expression_statement", false), ("string", false)],
},
Row {
description: "all definite steps",
language: get_language("javascript"),
pattern: r#"(object "{" "}")"#,
results_by_substring: &[("object", false), ("{", true), ("}", true)],
},
Row {
description: "an indefinite step that is optional",
language: get_language("javascript"),
pattern: r#"(object "{" (identifier)? @foo "}")"#,
results_by_substring: &[
("object", false),
("{", true),
("(identifier)?", false),
("}", true),
],
},
Row {
description: "multiple indefinite steps that are optional",
language: get_language("javascript"),
pattern: r#"(object "{" (identifier)? @id1 ("," (identifier) @id2)? "}")"#,
results_by_substring: &[
("object", false),
("{", true),
("(identifier)? @id1", false),
("\",\"", false),
("}", true),
],
},
Row {
description: "definite step after indefinite step",
language: get_language("javascript"),
pattern: r#"(pair (property_identifier) ":")"#,
results_by_substring: &[("pair", false), ("property_identifier", false), (":", true)],
},
Row {
description: "indefinite step in between two definite steps",
language: get_language("javascript"),
pattern: r#"(object "{" (_) "}")"#,
results_by_substring: &[("object", false), ("{", false), ("", false), ("}", true)],
pattern: r#"(ternary_expression
condition: (_)
"?"
consequence: (call_expression)
":"
alternative: (_))"#,
results_by_substring: &[
("condition:", false),
("\"?\"", false),
("consequence:", false),
("\":\"", true),
("alternative:", true),
],
},
Row {
description: "one definite step after a repetition",
language: get_language("javascript"),
pattern: r#"(object "{" (_) "}")"#,
results_by_substring: &[("object", false), ("{", false), ("(_)", false), ("}", true)],
},
Row {
description: "definite steps after multiple repetitions",
language: get_language("json"),
pattern: r#"(object "{" (pair) "," (pair) "," (_) "}")"#,
results_by_substring: &[
("object", false),
("{", false),
("(pair) \",\" (pair)", false),
("(pair) \",\" (_)", false),
("\",\" (_)", false),
("(_)", true),
("}", true),
],
},
Row {
description: "a definite with a field",
language: get_language("javascript"),
pattern: r#"(binary_expression left: (identifier) right: (_))"#,
results_by_substring: &[
@ -2362,6 +2451,7 @@ fn test_query_step_is_definite() {
],
},
Row {
description: "multiple definite steps with fields",
language: get_language("javascript"),
pattern: r#"(function_declaration name: (identifier) body: (statement_block))"#,
results_by_substring: &[
@ -2371,6 +2461,7 @@ fn test_query_step_is_definite() {
],
},
Row {
description: "nesting, one definite step",
language: get_language("javascript"),
pattern: r#"
(function_declaration
@ -2386,6 +2477,7 @@ fn test_query_step_is_definite() {
],
},
Row {
description: "definite step after some deeply nested hidden nodes",
language: get_language("ruby"),
pattern: r#"
(singleton_class
@ -2399,6 +2491,7 @@ fn test_query_step_is_definite() {
],
},
Row {
description: "nesting, no definite steps",
language: get_language("javascript"),
pattern: r#"
(call_expression
@ -2409,6 +2502,7 @@ fn test_query_step_is_definite() {
results_by_substring: &[("property_identifier", false), ("template_string", false)],
},
Row {
description: "a definite step after a nested node",
language: get_language("javascript"),
pattern: r#"
(subscript_expression
@ -2424,6 +2518,7 @@ fn test_query_step_is_definite() {
],
},
Row {
description: "a step that is indefinite due to a predicate",
language: get_language("javascript"),
pattern: r#"
(subscript_expression
@ -2439,17 +2534,45 @@ fn test_query_step_is_definite() {
("[", true),
],
},
Row {
description: "alternation where one branch has definite steps",
language: get_language("javascript"),
pattern: r#"
[
(unary_expression (identifier))
(call_expression
function: (_)
arguments: (_))
(binary_expression right:(call_expression))
]
"#,
results_by_substring: &[
("identifier", false),
("right:", false),
("function:", true),
("arguments:", true),
],
},
];
allocations::record(|| {
eprintln!("");
for row in rows.iter() {
if let Some(filter) = EXAMPLE_FILTER.as_ref() {
if !row.description.contains(filter.as_str()) {
continue;
}
}
eprintln!(" query example: {:?}", row.description);
let query = Query::new(row.language, row.pattern).unwrap();
for (substring, is_definite) in row.results_by_substring {
let offset = row.pattern.find(substring).unwrap();
assert_eq!(
query.step_is_definite(offset),
*is_definite,
"Pattern: {:?}, substring: {:?}, expected is_definite to be {}",
"Description: {}, Pattern: {:?}, substring: {:?}, expected is_definite to be {}",
row.description,
row.pattern
.split_ascii_whitespace()
.collect::<Vec<_>>()

View file

@ -1144,34 +1144,18 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) {
next_states = _states;
}
// A query step is definite if the containing pattern will definitely match
// once the step is reached. In other words, a step is *not* definite if
// it's possible to create a syntax node that matches up to until that step,
// but does not match the entire pattern.
uint32_t child_step_index = parent_step_index + 1;
QueryStep *child_step = &self->steps.contents[child_step_index];
while (child_step->depth == parent_depth + 1) {
// Check if there is any way for the pattern to reach this step, but fail
// to reach the end of the sub-pattern.
for (unsigned k = 0; k < final_step_indices.size; k++) {
uint32_t final_step_index = final_step_indices.contents[k];
if (
final_step_index >= child_step_index &&
self->steps.contents[final_step_index].depth == child_step->depth
) {
child_step->is_definite = false;
break;
}
// Mark as indefinite any step where a match terminated.
// Later, this property will be propagated to all of the step's predecessors.
for (unsigned j = 0; j < final_step_indices.size; j++) {
uint32_t final_step_index = final_step_indices.contents[j];
QueryStep *step = &self->steps.contents[final_step_index];
if (
step->depth != PATTERN_DONE_MARKER &&
step->depth > parent_depth &&
!step->is_dead_end
) {
step->is_definite = false;
}
// Advance to the next child step in this sub-pattern.
do {
child_step_index++;
child_step++;
} while (
child_step->depth != PATTERN_DONE_MARKER &&
child_step->depth > parent_depth + 1
);
}
// If this pattern cannot match, store the pattern index so that it can be
@ -1187,9 +1171,7 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) {
}
}
// In order for a step to be definite, all of its child steps must be definite,
// and all of its later sibling steps must be definite. Propagate any indefiniteness
// upward and backward through the pattern trees.
// Mark as indefinite any step with captures that are used in predicates.
Array(uint16_t) predicate_capture_ids = array_new();
for (unsigned i = 0; i < self->patterns.size; i++) {
QueryPattern *pattern = &self->patterns.contents[i];
@ -1207,16 +1189,13 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) {
}
}
bool all_later_children_definite = true;
// Find all of the steps that have these captures.
for (
unsigned start = pattern->steps.offset,
end = start + pattern->steps.length,
j = end - 1; j + 1 > start; j--
j = start; j < end; j++
) {
QueryStep *step = &self->steps.contents[j];
// If this step has a capture that is used in a predicate,
// then it is not definite.
for (unsigned k = 0; k < MAX_STEP_CAPTURE_COUNT; k++) {
uint16_t capture_id = step->capture_ids[k];
if (capture_id == NONE) break;
@ -1227,10 +1206,41 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) {
break;
}
}
}
}
// If a step is not definite, then none of its predecessors can be definite.
if (!all_later_children_definite) step->is_definite = false;
if (!step->is_definite) all_later_children_definite = false;
// Propagate indefiniteness backwards.
bool done = self->steps.size == 0;
while (!done) {
done = true;
for (unsigned i = self->steps.size - 1; i > 0; i--) {
QueryStep *step = &self->steps.contents[i];
// Determine if this step is definite or has definite alternatives.
bool is_definite = false;
for (;;) {
if (step->is_definite) {
is_definite = true;
break;
}
if (step->alternative_index == NONE || step->alternative_index < i) {
break;
}
step = &self->steps.contents[step->alternative_index];
}
// If not, mark its predecessor as indefinite.
if (!is_definite) {
QueryStep *prev_step = &self->steps.contents[i - 1];
if (
!prev_step->is_dead_end &&
prev_step->depth != PATTERN_DONE_MARKER &&
prev_step->is_definite
) {
prev_step->is_definite = false;
done = false;
}
}
}
}
@ -1242,11 +1252,12 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) {
printf(" %u: DONE\n", i);
} else {
printf(
" %u: {symbol: %s, is_definite: %d}\n",
" %u: {symbol: %s, field: %s, is_definite: %d}\n",
i,
(step->symbol == WILDCARD_SYMBOL || step->symbol == NAMED_WILDCARD_SYMBOL)
? "ANY"
: ts_language_symbol_name(self->language, step->symbol),
(step->field ? ts_language_field_name_for_id(self->language, step->field) : "-"),
step->is_definite
);
}
@ -1979,7 +1990,7 @@ bool ts_query_step_is_definite(
uint32_t step_index = UINT32_MAX;
for (unsigned i = 0; i < self->step_offsets.size; i++) {
StepOffset *step_offset = &self->step_offsets.contents[i];
if (step_offset->byte_offset >= byte_offset) break;
if (step_offset->byte_offset > byte_offset) break;
step_index = step_offset->step_index;
}
if (step_index < self->steps.size) {

View file

@ -83,10 +83,14 @@ done
shift $(expr $OPTIND - 1)
if [[ -n $TREE_SITTER_TEST_LANGUAGE_FILTER || -n $TREE_SITTER_TEST_EXAMPLE_FILTER || -n $TREE_SITTER_TEST_TRIAL_FILTER ]]; then
top_level_filter=corpus
else
top_level_filter=$1
top_level_filter=$1
if [[ \
-n $TREE_SITTER_TEST_LANGUAGE_FILTER || \
-n $TREE_SITTER_TEST_EXAMPLE_FILTER || \
-n $TREE_SITTER_TEST_TRIAL_FILTER \
]]; then
echo ${top_level_filter:=corpus}
fi
if [[ "${mode}" == "debug" ]]; then