Fix handling of repetitions in query analysis

Fixes #1007
This commit is contained in:
Max Brunsfeld 2021-03-19 11:00:31 -07:00
parent 56c7c6b39d
commit 89e1157a29
2 changed files with 66 additions and 35 deletions

View file

@ -3,8 +3,8 @@ use lazy_static::lazy_static;
use std::env;
use std::fmt::Write;
use tree_sitter::{
allocations, Language, Node, Parser, Query, QueryCapture, QueryCursor, QueryError,
QueryErrorKind, QueryMatch, QueryPredicate, QueryPredicateArg, QueryProperty, Point,
allocations, Language, Node, Parser, Point, Query, QueryCapture, QueryCursor, QueryError,
QueryErrorKind, QueryMatch, QueryPredicate, QueryPredicateArg, QueryProperty,
};
lazy_static! {
@ -1110,15 +1110,7 @@ fn test_query_matches_with_top_level_repetitions() {
fn test_query_matches_with_non_terminal_repetitions_within_root() {
allocations::record(|| {
let language = get_language("javascript");
let query = Query::new(
language,
r#"
(_
(expression_statement
(identifier) @id)+)
"#,
)
.unwrap();
let query = Query::new(language, "(_ (expression_statement (identifier) @id)+)").unwrap();
assert_query_matches(
language,
@ -1216,6 +1208,34 @@ fn test_query_matches_with_multiple_repetition_patterns_that_intersect_other_pat
});
}
#[test]
fn test_query_matches_with_trailing_repetitions_of_last_child() {
allocations::record(|| {
let language = get_language("javascript");
let query = Query::new(
language,
"
(unary_expression (primary_expression)+ @operand)
",
)
.unwrap();
assert_query_matches(
language,
&query,
"
a = typeof (!b && ~c);
",
&[
(0, vec![("operand", "b")]),
(0, vec![("operand", "c")]),
(0, vec![("operand", "(!b && ~c)")]),
],
);
});
}
#[test]
fn test_query_matches_with_leading_zero_or_more_repeated_leaf_nodes() {
allocations::record(|| {
@ -1800,10 +1820,9 @@ fn test_query_matches_within_point_range() {
let mut cursor = QueryCursor::new();
let matches =
cursor
.set_point_range(Point::new(0, 0), Point::new(1, 3))
.matches(&query, tree.root_node(), to_callback(source));
let matches = cursor
.set_point_range(Point::new(0, 0), Point::new(1, 3))
.matches(&query, tree.root_node(), to_callback(source));
assert_eq!(
collect_matches(matches, &query, source),
@ -1814,10 +1833,9 @@ fn test_query_matches_within_point_range() {
]
);
let matches =
cursor
.set_point_range(Point::new(1, 0), Point::new(2, 3))
.matches(&query, tree.root_node(), to_callback(source));
let matches = cursor
.set_point_range(Point::new(1, 0), Point::new(2, 3))
.matches(&query, tree.root_node(), to_callback(source));
assert_eq!(
collect_matches(matches, &query, source),
@ -1828,10 +1846,9 @@ fn test_query_matches_within_point_range() {
]
);
let matches =
cursor
.set_point_range(Point::new(2, 1), Point::new(0, 0))
.matches(&query, tree.root_node(), to_callback(source));
let matches = cursor
.set_point_range(Point::new(2, 1), Point::new(0, 0))
.matches(&query, tree.root_node(), to_callback(source));
assert_eq!(
collect_matches(matches, &query, source),

View file

@ -909,7 +909,7 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) {
// For each non-terminal pattern, determine if the pattern can successfully match,
// and identify all of the possible children within the pattern where matching could fail.
bool result = true;
bool all_patterns_are_valid = true;
AnalysisStateSet states = array_new();
AnalysisStateSet next_states = array_new();
AnalysisStateSet deeper_states = array_new();
@ -930,7 +930,7 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) {
array_search_sorted_by(&self->step_offsets, .step_index, first_child_step_index, &i, &exists);
assert(exists);
*error_offset = self->step_offsets.contents[i].byte_offset;
result = false;
all_patterns_are_valid = false;
break;
}
@ -1097,6 +1097,7 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) {
}
}
// Create a new state that has advanced past this hypothetical subtree.
AnalysisState next_state = *state;
analysis_state__top(&next_state)->child_index++;
analysis_state__top(&next_state)->parse_state = successor.state;
@ -1165,8 +1166,18 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) {
}
for (;;) {
// If this state can make further progress, then add it to the states for the next iteration.
// Otherwise, record the fact that matching can fail at this step of the pattern.
// Skip pass-through states. Although these states have alternatives, they are only
// used to implement repetitions, and query analysis does not need to process
// repetitions in order to determine whether steps are possible and definite.
if (next_step->is_pass_through) {
next_state.step_index++;
next_step++;
continue;
}
// If the pattern is finished or hypothetical parent node is complete, then
// record that matching can terminate at this step of the pattern. Otherwise,
// add this state to the list of states to process on the next iteration.
if (!next_step->is_dead_end) {
bool did_finish_pattern = self->steps.contents[next_state.step_index].depth != parent_depth + 1;
if (did_finish_pattern) can_finish_pattern = true;
@ -1177,8 +1188,10 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) {
}
}
// If the state has advanced to a step with an alternative step, then add another state at
// that alternative step to the next iteration.
// If the state has advanced to a step with an alternative step, then add another state
// at that alternative step. This process is simpler than the process of actually matching a
// pattern during query exection, because for the purposes of query analysis, there is no
// need to process repetitions.
if (
does_match &&
next_step->alternative_index != NONE &&
@ -1228,14 +1241,14 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) {
// If this pattern cannot match, store the pattern index so that it can be
// returned to the caller.
if (result && !can_finish_pattern && !did_exceed_max_depth) {
if (all_patterns_are_valid && !can_finish_pattern && !did_exceed_max_depth) {
assert(final_step_indices.size > 0);
uint16_t impossible_step_index = *array_back(&final_step_indices);
uint32_t i, exists;
array_search_sorted_by(&self->step_offsets, .step_index, impossible_step_index, &i, &exists);
assert(exists);
if (i >= self->step_offsets.size) i = self->step_offsets.size - 1;
*error_offset = self->step_offsets.contents[i].byte_offset;
result = false;
all_patterns_are_valid = false;
break;
}
}
@ -1348,7 +1361,7 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) {
array_delete(&predicate_capture_ids);
state_predecessor_map_delete(&predecessor_map);
return result;
return all_patterns_are_valid;
}
static void ts_query__finalize_steps(TSQuery *self) {
@ -1941,8 +1954,6 @@ static TSQueryError ts_query__parse_pattern(
// Parse suffixes modifiers for this pattern
for (;;) {
QueryStep *step = &self->steps.contents[starting_step_index];
// Parse the one-or-more operator.
if (stream->next == '+') {
stream_advance(stream);
@ -1966,6 +1977,7 @@ static TSQueryError ts_query__parse_pattern(
repeat_step.alternative_is_immediate = true;
array_push(&self->steps, repeat_step);
QueryStep *step = &self->steps.contents[starting_step_index];
while (step->alternative_index != NONE) {
step = &self->steps.contents[step->alternative_index];
}
@ -1977,6 +1989,7 @@ static TSQueryError ts_query__parse_pattern(
stream_advance(stream);
stream_skip_whitespace(stream);
QueryStep *step = &self->steps.contents[starting_step_index];
while (step->alternative_index != NONE) {
step = &self->steps.contents[step->alternative_index];
}
@ -2001,6 +2014,7 @@ static TSQueryError ts_query__parse_pattern(
uint32_t step_index = starting_step_index;
for (;;) {
QueryStep *step = &self->steps.contents[step_index];
query_step__add_capture(step, capture_id);
if (
step->alternative_index != NONE &&