Terminate failed query matches before descending whenever possible
When iterating over captures, this prevents reasonable queries from forcing the tree cursor to buffer matches unnecessarily.
This commit is contained in:
parent
374a7ac81e
commit
186b08381c
4 changed files with 203 additions and 96 deletions
|
|
@ -1,6 +1,7 @@
|
|||
use super::helpers::allocations;
|
||||
use super::helpers::fixtures::get_language;
|
||||
use tree_sitter::{Node, Parser, Query, QueryCursor, QueryError, QueryMatch};
|
||||
use std::fmt::Write;
|
||||
|
||||
#[test]
|
||||
fn test_query_errors_on_invalid_syntax() {
|
||||
|
|
@ -632,6 +633,66 @@ fn test_query_captures_with_duplicates() {
|
|||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_query_captures_with_many_results() {
|
||||
allocations::record(|| {
|
||||
let language = get_language("javascript");
|
||||
|
||||
// Search for key-value pairs whose values are anonymous functions.
|
||||
let query = Query::new(
|
||||
language,
|
||||
r#"
|
||||
(pair
|
||||
key: * @method-def
|
||||
value: (arrow_function))
|
||||
|
||||
":" @colon
|
||||
"," @comma
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
// The `pair` node for key `y` does not match any pattern, but inside of
|
||||
// its value, it contains many other `pair` nodes that do match the pattern.
|
||||
// The match for the *outer* pair should be terminated *before* descending into
|
||||
// the object value, so that we can avoid needing to buffer all of the inner
|
||||
// matches.
|
||||
let method_count = 50;
|
||||
let mut source = "x = { y: {\n".to_owned();
|
||||
for i in 0..method_count {
|
||||
writeln!(&mut source, " method{}: $ => null,", i).unwrap();
|
||||
}
|
||||
source.push_str("}};\n");
|
||||
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(language).unwrap();
|
||||
let tree = parser.parse(&source, None).unwrap();
|
||||
let mut cursor = QueryCursor::new();
|
||||
|
||||
let captures = cursor.captures(&query, tree.root_node(), to_callback(&source));
|
||||
let captures = collect_captures(captures, &query, &source);
|
||||
|
||||
assert_eq!(&captures[0..13], &[
|
||||
("colon", ":"),
|
||||
("method-def", "method0"),
|
||||
("colon", ":"),
|
||||
("comma", ","),
|
||||
("method-def", "method1"),
|
||||
("colon", ":"),
|
||||
("comma", ","),
|
||||
("method-def", "method2"),
|
||||
("colon", ":"),
|
||||
("comma", ","),
|
||||
("method-def", "method3"),
|
||||
("colon", ":"),
|
||||
("comma", ","),
|
||||
]);
|
||||
|
||||
// Ensure that we don't drop matches because of needing to buffer too many.
|
||||
assert_eq!(captures.len(), 1 + 3 * method_count);
|
||||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_query_pattern_after_source_byte() {
|
||||
let language = get_language("javascript");
|
||||
|
|
|
|||
123
lib/src/query.c
123
lib/src/query.c
|
|
@ -138,11 +138,8 @@ static const uint16_t NONE = UINT16_MAX;
|
|||
static const TSSymbol WILDCARD_SYMBOL = 0;
|
||||
static const uint16_t MAX_STATE_COUNT = 32;
|
||||
|
||||
#ifdef DEBUG_QUERY
|
||||
#define LOG printf
|
||||
#else
|
||||
// #define LOG printf
|
||||
#define LOG(...)
|
||||
#endif
|
||||
|
||||
/**********
|
||||
* Stream
|
||||
|
|
@ -939,6 +936,8 @@ static QueryState *ts_query_cursor_copy_state(
|
|||
static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
|
||||
do {
|
||||
if (self->ascending) {
|
||||
LOG("leave node %s\n", ts_node_type(ts_tree_cursor_current_node(&self->cursor)));
|
||||
|
||||
// When leaving a node, remove any unfinished states whose next step
|
||||
// needed to match something within that node.
|
||||
uint32_t deleted_count = 0;
|
||||
|
|
@ -948,7 +947,7 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
|
|||
|
||||
if (state->start_depth + step->depth > self->depth) {
|
||||
LOG(
|
||||
"fail state with pattern: %u, step: %u\n",
|
||||
" failed to match. pattern:%u, step:%u\n",
|
||||
state->pattern_index,
|
||||
state->step_index
|
||||
);
|
||||
|
|
@ -963,10 +962,7 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
|
|||
}
|
||||
}
|
||||
|
||||
if (deleted_count) {
|
||||
LOG("failed %u of %u states\n", deleted_count, self->states.size);
|
||||
self->states.size -= deleted_count;
|
||||
}
|
||||
self->states.size -= deleted_count;
|
||||
|
||||
if (ts_tree_cursor_goto_next_sibling(&self->cursor)) {
|
||||
self->ascending = false;
|
||||
|
|
@ -976,8 +972,13 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
|
|||
return false;
|
||||
}
|
||||
} else {
|
||||
TSFieldId field_id = NONE;
|
||||
bool field_occurs_in_later_sibling = false;
|
||||
bool can_have_later_siblings;
|
||||
bool can_have_later_siblings_with_this_field;
|
||||
TSFieldId field_id = ts_tree_cursor_current_status(
|
||||
&self->cursor,
|
||||
&can_have_later_siblings,
|
||||
&can_have_later_siblings_with_this_field
|
||||
);
|
||||
TSNode node = ts_tree_cursor_current_node(&self->cursor);
|
||||
TSSymbol symbol = ts_node_symbol(node);
|
||||
|
||||
|
|
@ -999,27 +1000,22 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
|
|||
point_lte(self->end_point, ts_node_start_point(node))
|
||||
) return false;
|
||||
|
||||
LOG("enter node %s\n", ts_node_type(node));
|
||||
LOG(
|
||||
"enter node %s. row:%u state_count:%u, finished_state_count: %u\n",
|
||||
ts_node_type(node),
|
||||
ts_node_start_point(node).row,
|
||||
self->states.size,
|
||||
self->finished_states.size
|
||||
);
|
||||
|
||||
// Add new states for any patterns whose root node is a wildcard.
|
||||
for (unsigned i = 0; i < self->query->wildcard_root_pattern_count; i++) {
|
||||
PatternEntry *slice = &self->query->pattern_map.contents[i];
|
||||
QueryStep *step = &self->query->steps.contents[slice->step_index];
|
||||
|
||||
if (step->field) {
|
||||
// Compute the current field id if it is needed and has not yet
|
||||
// been computed.
|
||||
if (field_id == NONE) {
|
||||
field_id = ts_tree_cursor_current_field_id_ext(
|
||||
&self->cursor,
|
||||
&field_occurs_in_later_sibling
|
||||
);
|
||||
}
|
||||
if (field_id != step->field) continue;
|
||||
}
|
||||
|
||||
// If this node matches the first step of the pattern, then add a new
|
||||
// state at the start of this pattern.
|
||||
if (step->field && field_id != step->field) continue;
|
||||
uint32_t capture_list_id = capture_list_pool_acquire(
|
||||
&self->capture_list_pool
|
||||
);
|
||||
|
|
@ -1039,19 +1035,9 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
|
|||
PatternEntry *slice = &self->query->pattern_map.contents[i];
|
||||
QueryStep *step = &self->query->steps.contents[slice->step_index];
|
||||
do {
|
||||
if (step->field) {
|
||||
// Compute the current field id if it is needed and has not yet
|
||||
// been computed.
|
||||
if (field_id == NONE) {
|
||||
field_id = ts_tree_cursor_current_field_id_ext(
|
||||
&self->cursor,
|
||||
&field_occurs_in_later_sibling
|
||||
);
|
||||
}
|
||||
if (field_id != step->field) continue;
|
||||
}
|
||||
if (step->field && field_id != step->field) continue;
|
||||
|
||||
LOG("start pattern %u\n", slice->pattern_index);
|
||||
LOG(" start state. pattern:%u\n", slice->pattern_index);
|
||||
|
||||
// If this node matches the first step of the pattern, then add a
|
||||
// new in-progress state. First, acquire a list to hold the pattern's
|
||||
|
|
@ -1059,7 +1045,10 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
|
|||
uint32_t capture_list_id = capture_list_pool_acquire(
|
||||
&self->capture_list_pool
|
||||
);
|
||||
if (capture_list_id == NONE) break;
|
||||
if (capture_list_id == NONE) {
|
||||
LOG(" too many states.");
|
||||
break;
|
||||
}
|
||||
|
||||
array_push(&self->states, ((QueryState) {
|
||||
.pattern_index = slice->pattern_index,
|
||||
|
|
@ -1084,19 +1073,40 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
|
|||
QueryStep *step = &self->query->steps.contents[state->step_index];
|
||||
|
||||
// Check that the node matches all of the criteria for the next
|
||||
// step of the pattern.
|
||||
// step of the pattern.if (
|
||||
if (state->start_depth + step->depth != self->depth) continue;
|
||||
if (step->symbol && step->symbol != symbol) continue;
|
||||
|
||||
// Determine if this node matches this step of the pattern, and also
|
||||
// if this node can have later siblings that match this step of the
|
||||
// pattern.
|
||||
bool node_does_match = !step->symbol || step->symbol == symbol;
|
||||
bool later_sibling_can_match = can_have_later_siblings;
|
||||
if (step->field) {
|
||||
// Compute the current field id if it is needed and has not yet
|
||||
// been computed.
|
||||
if (field_id == NONE) {
|
||||
field_id = ts_tree_cursor_current_field_id_ext(
|
||||
&self->cursor,
|
||||
&field_occurs_in_later_sibling
|
||||
);
|
||||
if (step->field == field_id) {
|
||||
if (!node_does_match && !can_have_later_siblings_with_this_field) {
|
||||
later_sibling_can_match = false;
|
||||
}
|
||||
} else {
|
||||
node_does_match = false;
|
||||
}
|
||||
if (field_id != step->field) continue;
|
||||
}
|
||||
|
||||
if (!node_does_match) {
|
||||
if (!later_sibling_can_match) {
|
||||
LOG(
|
||||
" discard state. pattern:%u, step:%u\n",
|
||||
state->pattern_index,
|
||||
state->step_index
|
||||
);
|
||||
capture_list_pool_release(
|
||||
&self->capture_list_pool,
|
||||
state->capture_list_id
|
||||
);
|
||||
array_erase(&self->states, i);
|
||||
i--;
|
||||
n--;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Some patterns can match their root node in multiple ways,
|
||||
|
|
@ -1107,17 +1117,30 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
|
|||
// this node, to preserve the possibility of capturing later
|
||||
// siblings.
|
||||
QueryState *next_state = state;
|
||||
if (step->depth > 0 && (!step->field || field_occurs_in_later_sibling)) {
|
||||
if (step->depth > 0 && later_sibling_can_match) {
|
||||
LOG(
|
||||
" split state. pattern:%u, step:%u\n",
|
||||
state->pattern_index,
|
||||
state->step_index
|
||||
);
|
||||
QueryState *copy = ts_query_cursor_copy_state(self, state);
|
||||
if (copy) next_state = copy;
|
||||
}
|
||||
|
||||
LOG("advance state for pattern %u\n", next_state->pattern_index);
|
||||
LOG(
|
||||
" advance state. pattern:%u, step:%u\n",
|
||||
next_state->pattern_index,
|
||||
next_state->step_index
|
||||
);
|
||||
|
||||
// If the current node is captured in this pattern, add it to the
|
||||
// capture list.
|
||||
if (step->capture_id != NONE) {
|
||||
LOG("capture id %u\n", step->capture_id);
|
||||
LOG(
|
||||
" capture node. pattern:%u, capture_id:%u\n",
|
||||
next_state->pattern_index,
|
||||
step->capture_id
|
||||
);
|
||||
TSQueryCapture *capture_list = capture_list_pool_get(
|
||||
&self->capture_list_pool,
|
||||
next_state->capture_list_id
|
||||
|
|
|
|||
|
|
@ -244,13 +244,74 @@ TSNode ts_tree_cursor_current_node(const TSTreeCursor *_self) {
|
|||
);
|
||||
}
|
||||
|
||||
static inline TSFieldId ts_tree_cursor__current_field_info(
|
||||
TSFieldId ts_tree_cursor_current_status(
|
||||
const TSTreeCursor *_self,
|
||||
const TSFieldMapEntry **field_map,
|
||||
const TSFieldMapEntry **field_map_end,
|
||||
uint32_t *child_index
|
||||
bool *can_have_later_siblings,
|
||||
bool *can_have_later_siblings_with_this_field
|
||||
) {
|
||||
const TreeCursor *self = (const TreeCursor *)_self;
|
||||
TSFieldId result = 0;
|
||||
*can_have_later_siblings = false;
|
||||
*can_have_later_siblings_with_this_field = false;
|
||||
|
||||
// Walk up the tree, visiting the current node and its invisible ancestors,
|
||||
// because fields can refer to nodes through invisible *wrapper* nodes,
|
||||
for (unsigned i = self->stack.size - 1; i > 0; i--) {
|
||||
TreeCursorEntry *entry = &self->stack.contents[i];
|
||||
TreeCursorEntry *parent_entry = &self->stack.contents[i - 1];
|
||||
|
||||
// Stop walking up when a visible ancestor is found.
|
||||
if (i != self->stack.size - 1) {
|
||||
if (ts_subtree_visible(*entry->subtree)) break;
|
||||
const TSSymbol *alias_sequence = ts_language_alias_sequence(
|
||||
self->tree->language,
|
||||
parent_entry->subtree->ptr->production_id
|
||||
);
|
||||
if (alias_sequence && alias_sequence[entry->structural_child_index]) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (ts_subtree_child_count(*parent_entry->subtree) > entry->child_index) {
|
||||
*can_have_later_siblings = true;
|
||||
}
|
||||
|
||||
if (ts_subtree_extra(*entry->subtree)) break;
|
||||
|
||||
const TSFieldMapEntry *field_map, *field_map_end;
|
||||
ts_language_field_map(
|
||||
self->tree->language,
|
||||
parent_entry->subtree->ptr->production_id,
|
||||
&field_map, &field_map_end
|
||||
);
|
||||
|
||||
// Look for a field name associated with the current node.
|
||||
if (!result) {
|
||||
for (const TSFieldMapEntry *i = field_map; i < field_map_end; i++) {
|
||||
if (!i->inherited && i->child_index == entry->structural_child_index) {
|
||||
result = i->field_id;
|
||||
*can_have_later_siblings_with_this_field = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Determine if there other later siblings with the same field name.
|
||||
if (result) {
|
||||
for (const TSFieldMapEntry *i = field_map; i < field_map_end; i++) {
|
||||
if (i->field_id == result && i->child_index > entry->structural_child_index) {
|
||||
*can_have_later_siblings_with_this_field = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *_self) {
|
||||
const TreeCursor *self = (const TreeCursor *)_self;
|
||||
|
||||
// Walk up the tree, visiting the current node and its invisible ancestors.
|
||||
for (unsigned i = self->stack.size - 1; i > 0; i--) {
|
||||
|
|
@ -271,14 +332,14 @@ static inline TSFieldId ts_tree_cursor__current_field_info(
|
|||
|
||||
if (ts_subtree_extra(*entry->subtree)) break;
|
||||
|
||||
const TSFieldMapEntry *field_map, *field_map_end;
|
||||
ts_language_field_map(
|
||||
self->tree->language,
|
||||
parent_entry->subtree->ptr->production_id,
|
||||
field_map, field_map_end
|
||||
&field_map, &field_map_end
|
||||
);
|
||||
for (const TSFieldMapEntry *i = *field_map; i < *field_map_end; i++) {
|
||||
for (const TSFieldMapEntry *i = field_map; i < field_map_end; i++) {
|
||||
if (!i->inherited && i->child_index == entry->structural_child_index) {
|
||||
*child_index = entry->structural_child_index;
|
||||
return i->field_id;
|
||||
}
|
||||
}
|
||||
|
|
@ -286,44 +347,6 @@ static inline TSFieldId ts_tree_cursor__current_field_info(
|
|||
return 0;
|
||||
}
|
||||
|
||||
TSFieldId ts_tree_cursor_current_field_id_ext(
|
||||
const TSTreeCursor *self,
|
||||
bool *field_has_additional
|
||||
) {
|
||||
uint32_t child_index;
|
||||
const TSFieldMapEntry *field_map, *field_map_end;
|
||||
TSFieldId field_id = ts_tree_cursor__current_field_info(
|
||||
self,
|
||||
&field_map,
|
||||
&field_map_end,
|
||||
&child_index
|
||||
);
|
||||
|
||||
// After finding the field, check if any other later children have
|
||||
// the same field name.
|
||||
if (field_id) {
|
||||
for (const TSFieldMapEntry *i = field_map; i < field_map_end; i++) {
|
||||
if (i->field_id == field_id && i->child_index > child_index) {
|
||||
*field_has_additional = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return field_id;
|
||||
}
|
||||
|
||||
|
||||
TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *self) {
|
||||
uint32_t child_index;
|
||||
const TSFieldMapEntry *field_map, *field_map_end;
|
||||
return ts_tree_cursor__current_field_info(
|
||||
self,
|
||||
&field_map,
|
||||
&field_map_end,
|
||||
&child_index
|
||||
);
|
||||
}
|
||||
|
||||
const char *ts_tree_cursor_current_field_name(const TSTreeCursor *_self) {
|
||||
TSFieldId id = ts_tree_cursor_current_field_id(_self);
|
||||
if (id) {
|
||||
|
|
|
|||
|
|
@ -16,6 +16,6 @@ typedef struct {
|
|||
} TreeCursor;
|
||||
|
||||
void ts_tree_cursor_init(TreeCursor *, TSNode);
|
||||
TSFieldId ts_tree_cursor_current_field_id_ext(const TSTreeCursor *, bool *);
|
||||
TSFieldId ts_tree_cursor_current_status(const TSTreeCursor *, bool *, bool *);
|
||||
|
||||
#endif // TREE_SITTER_TREE_CURSOR_H_
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue