query: Allow configurable match limit

The default is now a whopping 64K matches, which "should be enough for
everyone".  You can use the new `ts_query_cursor_set_match_limit`
function to set this to a lower limit, such as the previous default of
32.
This commit is contained in:
Douglas Creager 2021-06-02 11:24:58 -04:00
parent 78010722a4
commit cd96552448
5 changed files with 106 additions and 23 deletions

View file

@ -1625,7 +1625,6 @@ fn test_query_matches_with_different_tokens_with_the_same_string_value() {
});
}
/*
#[test]
fn test_query_matches_with_too_many_permutations_to_track() {
allocations::record(|| {
@ -1646,6 +1645,7 @@ fn test_query_matches_with_too_many_permutations_to_track() {
parser.set_language(language).unwrap();
let tree = parser.parse(&source, None).unwrap();
let mut cursor = QueryCursor::new();
cursor.set_match_limit(32);
let matches = cursor.matches(&query, tree.root_node(), to_callback(&source));
// For this pathological query, some match permutations will be dropped.
@ -1687,6 +1687,7 @@ fn test_query_matches_with_alternatives_and_too_many_permutations_to_track() {
parser.set_language(language).unwrap();
let tree = parser.parse(&source, None).unwrap();
let mut cursor = QueryCursor::new();
cursor.set_match_limit(32);
let matches = cursor.matches(&query, tree.root_node(), to_callback(&source));
assert_eq!(
@ -1696,7 +1697,6 @@ fn test_query_matches_with_alternatives_and_too_many_permutations_to_track() {
assert_eq!(cursor.did_exceed_match_limit(), true);
});
}
*/
#[test]
fn test_query_matches_with_anonymous_tokens() {
@ -2704,7 +2704,6 @@ fn test_query_captures_with_many_nested_results_with_fields() {
});
}
/*
#[test]
fn test_query_captures_with_too_many_nested_results() {
allocations::record(|| {
@ -2768,6 +2767,7 @@ fn test_query_captures_with_too_many_nested_results() {
parser.set_language(language).unwrap();
let tree = parser.parse(&source, None).unwrap();
let mut cursor = QueryCursor::new();
cursor.set_match_limit(32);
let captures = cursor.captures(&query, tree.root_node(), to_callback(&source));
let captures = collect_captures(captures, &query, &source);
@ -2795,7 +2795,6 @@ fn test_query_captures_with_too_many_nested_results() {
);
});
}
*/
#[test]
fn test_query_captures_with_definite_pattern_containing_many_nested_matches() {

View file

@ -726,15 +726,27 @@ extern "C" {
pub fn ts_query_cursor_exec(arg1: *mut TSQueryCursor, arg2: *const TSQuery, arg3: TSNode);
}
extern "C" {
#[doc = " Check if this cursor has exceeded its maximum number of in-progress"]
#[doc = " matches."]
#[doc = " Manage the maximum number of in-progress matches allowed by this query"]
#[doc = " cursor."]
#[doc = ""]
#[doc = " Currently, query cursors have a fixed capacity for storing lists"]
#[doc = " of in-progress captures. If this capacity is exceeded, then the"]
#[doc = " earliest-starting match will silently be dropped to make room for"]
#[doc = " further matches."]
#[doc = " Query cursors have a maximum capacity for storing lists of in-progress"]
#[doc = " captures. If this capacity is exceeded, then the earliest-starting match will"]
#[doc = " silently be dropped to make room for further matches."]
#[doc = ""]
#[doc = " By default, this limit is 65,536 pending matches, which is effectively"]
#[doc = " unlimited for most queries and syntax trees. You can optionally set this to a"]
#[doc = " lower number if you want to have (and check) a tighter bound on query"]
#[doc = " complexity."]
#[doc = ""]
#[doc = " If you update the match limit, it must be > 0 and <= 65536."]
pub fn ts_query_cursor_did_exceed_match_limit(arg1: *const TSQueryCursor) -> bool;
}
extern "C" {
pub fn ts_query_cursor_match_limit(arg1: *const TSQueryCursor) -> u32;
}
extern "C" {
pub fn ts_query_cursor_set_match_limit(arg1: *mut TSQueryCursor, arg2: u32);
}
extern "C" {
#[doc = " Set the range of bytes or (row, column) positions in which the query"]
#[doc = " will be executed."]

View file

@ -1598,6 +1598,19 @@ impl<'a> QueryCursor {
QueryCursor(unsafe { NonNull::new_unchecked(ffi::ts_query_cursor_new()) })
}
/// Return the maximum number of in-progress matches for this cursor.
pub fn match_limit(&self) -> u32 {
unsafe { ffi::ts_query_cursor_match_limit(self.0.as_ptr()) }
}
/// Set the maximum number of in-progress matches for this cursor. The limit must be > 0 and
/// <= 65536.
pub fn set_match_limit(&mut self, limit: u32) {
unsafe {
ffi::ts_query_cursor_set_match_limit(self.0.as_ptr(), limit);
}
}
/// Check if, on its last execution, this cursor exceeded its maximum number of
/// in-progress matches.
pub fn did_exceed_match_limit(&self) -> bool {

View file

@ -798,15 +798,23 @@ void ts_query_cursor_delete(TSQueryCursor *);
void ts_query_cursor_exec(TSQueryCursor *, const TSQuery *, TSNode);
/**
* Check if this cursor has exceeded its maximum number of in-progress
* matches.
* Manage the maximum number of in-progress matches allowed by this query
* cursor.
*
* Currently, query cursors have a fixed capacity for storing lists
* of in-progress captures. If this capacity is exceeded, then the
* earliest-starting match will silently be dropped to make room for
* further matches.
* Query cursors have a maximum capacity for storing lists of in-progress
* captures. If this capacity is exceeded, then the earliest-starting match will
* silently be dropped to make room for further matches.
*
* By default, this limit is 65,536 pending matches, which is effectively
* unlimited for most queries and syntax trees. You can optionally set this to a
* lower number if you want to have (and check) a tighter bound on query
* complexity.
*
* If you update the match limit, it must be > 0 and <= 65536.
*/
bool ts_query_cursor_did_exceed_match_limit(const TSQueryCursor *);
uint32_t ts_query_cursor_match_limit(const TSQueryCursor *);
void ts_query_cursor_set_match_limit(TSQueryCursor *, uint32_t);
/**
* Set the range of bytes or (row, column) positions in which the query

View file

@ -175,6 +175,15 @@ typedef Array(CaptureList) CaptureListPoolEntry;
typedef struct {
CaptureListPoolEntry list;
CaptureList empty_list;
// The maximum number of capture lists that we are allowed to allocate. We
// never allow `list` to allocate more entries than this, dropping pending
// matches if needed to stay under the limit.
uint32_t max_capture_list_count;
// The number of capture lists allocated in `list` that are not currently in
// use. We reuse those existing-but-unused capture lists before trying to
// allocate any new ones. We use an invalid value (UINT32_MAX) for a capture
// list's length to indicate that it's not in use.
uint16_t free_capture_list_count;
} CaptureListPool;
/*
@ -358,6 +367,10 @@ static CaptureListPool capture_list_pool_new(void) {
return (CaptureListPool) {
.list = array_new(),
.empty_list = array_new(),
// The maximum maxmimum is 64K, since we use `uint16_t` as our capture list
// index type.
.max_capture_list_count = 65536,
.free_capture_list_count = 0,
};
}
@ -366,6 +379,7 @@ static void capture_list_pool_reset(CaptureListPool *self) {
// This invalid size means that the list is not in use.
self->list.contents[i].size = UINT32_MAX;
}
self->free_capture_list_count = self->list.size;
}
static void capture_list_pool_delete(CaptureListPool *self) {
@ -385,17 +399,30 @@ static CaptureList *capture_list_pool_get_mut(CaptureListPool *self, uint16_t id
return &self->list.contents[id];
}
static bool capture_list_pool_is_empty(const CaptureListPool *self) {
// The capture list pool is empty if all allocated lists are in use, and we
// have reached the maximum allowed number of allocated lists.
return self->free_capture_list_count == 0 && self->list.size >= self->max_capture_list_count;
}
static uint16_t capture_list_pool_acquire(CaptureListPool *self) {
// First see if any already allocated capture lists are currently unused.
for (uint16_t i = 0; i < self->list.size; i++) {
if (self->list.contents[i].size == UINT32_MAX) {
array_clear(&self->list.contents[i]);
return i;
// First see if any already allocated capture list is currently unused.
if (self->free_capture_list_count > 0) {
for (uint16_t i = 0; i < self->list.size; i++) {
if (self->list.contents[i].size == UINT32_MAX) {
array_clear(&self->list.contents[i]);
self->free_capture_list_count--;
return i;
}
}
}
// Otherwise allocate and initialize a new capture list.
uint16_t i = self->list.size;
// Otherwise allocate and initialize a new capture list, as long as that
// doesn't put us over the requested maximum.
uint32_t i = self->list.size;
if (i >= self->max_capture_list_count) {
return NONE;
}
CaptureList list;
array_init(&list);
array_push(&self->list, list);
@ -405,6 +432,7 @@ static uint16_t capture_list_pool_acquire(CaptureListPool *self) {
static void capture_list_pool_release(CaptureListPool *self, uint16_t id) {
if (id >= self->list.size) return;
self->list.contents[id].size = UINT32_MAX;
self->free_capture_list_count++;
}
/**************
@ -2285,6 +2313,15 @@ bool ts_query_cursor_did_exceed_match_limit(const TSQueryCursor *self) {
return self->did_exceed_match_limit;
}
uint32_t ts_query_cursor_match_limit(const TSQueryCursor *self) {
return self->capture_list_pool.max_capture_list_count;
}
void ts_query_cursor_set_match_limit(TSQueryCursor *self, uint32_t limit) {
assert(limit > 0 && limit <= 65536);
self->capture_list_pool.max_capture_list_count = limit;
}
void ts_query_cursor_exec(
TSQueryCursor *self,
const TSQuery *query,
@ -3186,6 +3223,20 @@ bool ts_query_cursor_next_capture(
return true;
}
if (capture_list_pool_is_empty(&self->capture_list_pool)) {
LOG(
" abandon state. index:%u, pattern:%u, offset:%u.\n",
first_unfinished_state_index,
first_unfinished_pattern_index,
first_unfinished_capture_byte
);
capture_list_pool_release(
&self->capture_list_pool,
self->states.contents[first_unfinished_state_index].capture_list_id
);
array_erase(&self->states, first_unfinished_state_index);
}
// If there are no finished matches that are ready to be returned, then
// continue finding more matches.
if (