Use is_definite flag in ts_query_cursor_next_capture

This commit is contained in:
Max Brunsfeld 2020-08-17 16:50:59 -07:00
parent 228a9e28e1
commit 91fc9f5399
2 changed files with 158 additions and 87 deletions

View file

@ -1952,6 +1952,54 @@ fn test_query_captures_with_too_many_nested_results() {
});
}
#[test]
fn test_query_captures_with_definite_pattern_containing_many_nested_matches() {
allocations::record(|| {
let language = get_language("javascript");
let query = Query::new(
language,
r#"
(array
"[" @l-bracket
"]" @r-bracket)
"." @dot
"#,
)
.unwrap();
// The '[' node must be returned before all of the '.' nodes,
// even though its pattern does not finish until the ']' node
// at the end of the document. But because the '[' is definite,
// it can be returned before the pattern finishes matching.
let source = "
[
a.b.c.d.e.f.g.h.i,
a.b.c.d.e.f.g.h.i,
a.b.c.d.e.f.g.h.i,
a.b.c.d.e.f.g.h.i,
a.b.c.d.e.f.g.h.i,
]
";
let mut parser = Parser::new();
parser.set_language(language).unwrap();
let tree = parser.parse(&source, None).unwrap();
let mut cursor = QueryCursor::new();
let captures = cursor.captures(&query, tree.root_node(), to_callback(source));
assert_eq!(
collect_captures(captures, &query, source),
[("l-bracket", "[")]
.iter()
.chain([("dot", "."); 40].iter())
.chain([("r-bracket", "]")].iter())
.cloned()
.collect::<Vec<_>>(),
);
});
}
#[test]
fn test_query_captures_ordered_by_both_start_and_end_positions() {
allocations::record(|| {

View file

@ -2034,7 +2034,8 @@ static bool ts_query_cursor__first_in_progress_capture(
TSQueryCursor *self,
uint32_t *state_index,
uint32_t *byte_offset,
uint32_t *pattern_index
uint32_t *pattern_index,
bool *is_definite
) {
bool result = false;
*state_index = UINT32_MAX;
@ -2047,13 +2048,20 @@ static bool ts_query_cursor__first_in_progress_capture(
&self->capture_list_pool,
state->capture_list_id
);
if (captures->size > 0) {
uint32_t capture_byte = ts_node_start_byte(captures->contents[0].node);
if (captures->size > state->consumed_capture_count) {
uint32_t capture_byte = ts_node_start_byte(captures->contents[state->consumed_capture_count].node);
if (
!result ||
capture_byte < *byte_offset ||
(capture_byte == *byte_offset && state->pattern_index < *pattern_index)
) {
QueryStep *step = &self->query->steps.contents[state->step_index];
if (is_definite) {
*is_definite = step->is_definite;
} else if (step->is_definite) {
continue;
}
result = true;
*state_index = i;
*byte_offset = capture_byte;
@ -2216,7 +2224,8 @@ static CaptureList *ts_query_cursor__prepare_to_capture(
self,
&state_index,
&byte_offset,
&pattern_index
&pattern_index,
NULL
) &&
state_index != state_index_to_preserve
) {
@ -2275,7 +2284,10 @@ static QueryState *ts_query_cursor__copy_state(
// If one or more patterns finish, return `true` and store their states in the
// `finished_states` array. Multiple patterns can finish on the same node. If
// there are no more matches, return `false`.
static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
static inline bool ts_query_cursor__advance(
TSQueryCursor *self,
bool stop_on_definite_step
) {
bool did_match = false;
for (;;) {
if (self->halted) {
@ -2290,6 +2302,7 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
if (did_match || self->halted) return did_match;
// Exit the current node.
if (self->ascending) {
LOG("leave node. type:%s\n", ts_node_type(ts_tree_cursor_current_node(&self->cursor)));
@ -2342,7 +2355,10 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
}
}
self->states.size -= deleted_count;
} else {
}
// Enter a new node.
else {
// If this node is before the selected range, then avoid descending into it.
TSNode node = ts_tree_cursor_current_node(&self->cursor);
if (
@ -2516,6 +2532,9 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
state->step_index
);
QueryStep *next_step = &self->query->steps.contents[state->step_index];
if (stop_on_definite_step && next_step->is_definite) did_match = true;
// If this state's next step has an alternative step, then copy the state in order
// to pursue both alternatives. The alternative step itself may have an alternative,
// so this is an interative process.
@ -2660,7 +2679,7 @@ bool ts_query_cursor_next_match(
TSQueryMatch *match
) {
if (self->finished_states.size == 0) {
if (!ts_query_cursor__advance(self)) {
if (!ts_query_cursor__advance(self, false)) {
return false;
}
}
@ -2701,99 +2720,103 @@ bool ts_query_cursor_next_capture(
TSQueryMatch *match,
uint32_t *capture_index
) {
// The goal here is to return captures in order, even though they may not
// be discovered in order, because patterns can overlap. Search for matches
// until there is a finished capture that is before any unfinished capture.
for (;;) {
// The goal here is to return captures in order, even though they may not
// be discovered in order, because patterns can overlap. If there are any
// finished patterns, then try to find one that contains a capture that
// is *definitely* before any capture in an *unfinished* pattern.
if (self->finished_states.size > 0) {
// First, identify the position of the earliest capture in an unfinished
// match. For a finished capture to be returned, it must be *before*
// this position.
uint32_t first_unfinished_capture_byte;
uint32_t first_unfinished_pattern_index;
uint32_t first_unfinished_state_index;
ts_query_cursor__first_in_progress_capture(
self,
&first_unfinished_state_index,
&first_unfinished_capture_byte,
&first_unfinished_pattern_index
// First, find the earliest capture in an unfinished match.
uint32_t first_unfinished_capture_byte;
uint32_t first_unfinished_pattern_index;
uint32_t first_unfinished_state_index;
bool first_unfinished_state_is_definite = false;
ts_query_cursor__first_in_progress_capture(
self,
&first_unfinished_state_index,
&first_unfinished_capture_byte,
&first_unfinished_pattern_index,
&first_unfinished_state_is_definite
);
// Then find the earliest capture in a finished match. It must occur
// before the first capture in an *unfinished* match.
QueryState *first_finished_state = NULL;
uint32_t first_finished_capture_byte = first_unfinished_capture_byte;
uint32_t first_finished_pattern_index = first_unfinished_pattern_index;
for (unsigned i = 0; i < self->finished_states.size; i++) {
QueryState *state = &self->finished_states.contents[i];
const CaptureList *captures = capture_list_pool_get(
&self->capture_list_pool,
state->capture_list_id
);
// Find the earliest capture in a finished match.
int first_finished_state_index = -1;
uint32_t first_finished_capture_byte = first_unfinished_capture_byte;
uint32_t first_finished_pattern_index = first_unfinished_pattern_index;
for (unsigned i = 0; i < self->finished_states.size; i++) {
const QueryState *state = &self->finished_states.contents[i];
const CaptureList *captures = capture_list_pool_get(
&self->capture_list_pool,
state->capture_list_id
if (captures->size > state->consumed_capture_count) {
uint32_t capture_byte = ts_node_start_byte(
captures->contents[state->consumed_capture_count].node
);
if (captures->size > state->consumed_capture_count) {
uint32_t capture_byte = ts_node_start_byte(
captures->contents[state->consumed_capture_count].node
);
if (
capture_byte < first_finished_capture_byte ||
(
capture_byte == first_finished_capture_byte &&
state->pattern_index < first_finished_pattern_index
)
) {
first_finished_state_index = i;
first_finished_capture_byte = capture_byte;
first_finished_pattern_index = state->pattern_index;
}
} else {
capture_list_pool_release(
&self->capture_list_pool,
state->capture_list_id
);
array_erase(&self->finished_states, i);
i--;
if (
capture_byte < first_finished_capture_byte ||
(
capture_byte == first_finished_capture_byte &&
state->pattern_index < first_finished_pattern_index
)
) {
first_finished_state = state;
first_finished_capture_byte = capture_byte;
first_finished_pattern_index = state->pattern_index;
}
}
// If there is finished capture that is clearly before any unfinished
// capture, then return its match, and its capture index. Internally
// record the fact that the capture has been 'consumed'.
if (first_finished_state_index != -1) {
QueryState *state = &self->finished_states.contents[
first_finished_state_index
];
match->id = state->id;
match->pattern_index = state->pattern_index;
const CaptureList *captures = capture_list_pool_get(
&self->capture_list_pool,
state->capture_list_id
);
match->captures = captures->contents;
match->capture_count = captures->size;
*capture_index = state->consumed_capture_count;
state->consumed_capture_count++;
return true;
}
if (capture_list_pool_is_empty(&self->capture_list_pool)) {
LOG(
" abandon state. index:%u, pattern:%u, offset:%u.\n",
first_unfinished_state_index,
first_unfinished_pattern_index,
first_unfinished_capture_byte
);
} else {
capture_list_pool_release(
&self->capture_list_pool,
self->states.contents[first_unfinished_state_index].capture_list_id
state->capture_list_id
);
array_erase(&self->states, first_unfinished_state_index);
array_erase(&self->finished_states, i);
i--;
}
}
// If there is finished capture that is clearly before any unfinished
// capture, then return its match, and its capture index. Internally
// record the fact that the capture has been 'consumed'.
QueryState *state;
if (first_finished_state) {
state = first_finished_state;
} else if (first_unfinished_state_is_definite) {
state = &self->states.contents[first_unfinished_state_index];
} else {
state = NULL;
}
if (state) {
match->id = state->id;
match->pattern_index = state->pattern_index;
const CaptureList *captures = capture_list_pool_get(
&self->capture_list_pool,
state->capture_list_id
);
match->captures = captures->contents;
match->capture_count = captures->size;
*capture_index = state->consumed_capture_count;
state->consumed_capture_count++;
return true;
}
if (capture_list_pool_is_empty(&self->capture_list_pool)) {
LOG(
" abandon state. index:%u, pattern:%u, offset:%u.\n",
first_unfinished_state_index,
first_unfinished_pattern_index,
first_unfinished_capture_byte
);
capture_list_pool_release(
&self->capture_list_pool,
self->states.contents[first_unfinished_state_index].capture_list_id
);
array_erase(&self->states, first_unfinished_state_index);
}
// If there are no finished matches that are ready to be returned, then
// continue finding more matches.
if (
!ts_query_cursor__advance(self) &&
!ts_query_cursor__advance(self, true) &&
self->finished_states.size == 0
) return false;
}