diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index b47dd69a..5a19eead 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -1625,7 +1625,6 @@ fn test_query_matches_with_different_tokens_with_the_same_string_value() { }); } -/* #[test] fn test_query_matches_with_too_many_permutations_to_track() { allocations::record(|| { @@ -1646,6 +1645,7 @@ fn test_query_matches_with_too_many_permutations_to_track() { parser.set_language(language).unwrap(); let tree = parser.parse(&source, None).unwrap(); let mut cursor = QueryCursor::new(); + cursor.set_match_limit(32); let matches = cursor.matches(&query, tree.root_node(), to_callback(&source)); // For this pathological query, some match permutations will be dropped. @@ -1687,6 +1687,7 @@ fn test_query_matches_with_alternatives_and_too_many_permutations_to_track() { parser.set_language(language).unwrap(); let tree = parser.parse(&source, None).unwrap(); let mut cursor = QueryCursor::new(); + cursor.set_match_limit(32); let matches = cursor.matches(&query, tree.root_node(), to_callback(&source)); assert_eq!( @@ -1696,7 +1697,6 @@ fn test_query_matches_with_alternatives_and_too_many_permutations_to_track() { assert_eq!(cursor.did_exceed_match_limit(), true); }); } -*/ #[test] fn test_query_matches_with_anonymous_tokens() { @@ -2704,7 +2704,6 @@ fn test_query_captures_with_many_nested_results_with_fields() { }); } -/* #[test] fn test_query_captures_with_too_many_nested_results() { allocations::record(|| { @@ -2768,6 +2767,7 @@ fn test_query_captures_with_too_many_nested_results() { parser.set_language(language).unwrap(); let tree = parser.parse(&source, None).unwrap(); let mut cursor = QueryCursor::new(); + cursor.set_match_limit(32); let captures = cursor.captures(&query, tree.root_node(), to_callback(&source)); let captures = collect_captures(captures, &query, &source); @@ -2795,7 +2795,6 @@ fn test_query_captures_with_too_many_nested_results() { ); }); } -*/ #[test] fn test_query_captures_with_definite_pattern_containing_many_nested_matches() { diff --git a/lib/binding_rust/bindings.rs b/lib/binding_rust/bindings.rs index 50da12fc..9b8c0f65 100644 --- a/lib/binding_rust/bindings.rs +++ b/lib/binding_rust/bindings.rs @@ -726,15 +726,27 @@ extern "C" { pub fn ts_query_cursor_exec(arg1: *mut TSQueryCursor, arg2: *const TSQuery, arg3: TSNode); } extern "C" { - #[doc = " Check if this cursor has exceeded its maximum number of in-progress"] - #[doc = " matches."] + #[doc = " Manage the maximum number of in-progress matches allowed by this query"] + #[doc = " cursor."] #[doc = ""] - #[doc = " Currently, query cursors have a fixed capacity for storing lists"] - #[doc = " of in-progress captures. If this capacity is exceeded, then the"] - #[doc = " earliest-starting match will silently be dropped to make room for"] - #[doc = " further matches."] + #[doc = " Query cursors have a maximum capacity for storing lists of in-progress"] + #[doc = " captures. If this capacity is exceeded, then the earliest-starting match will"] + #[doc = " silently be dropped to make room for further matches."] + #[doc = ""] + #[doc = " By default, this limit is 65,536 pending matches, which is effectively"] + #[doc = " unlimited for most queries and syntax trees. You can optionally set this to a"] + #[doc = " lower number if you want to have (and check) a tighter bound on query"] + #[doc = " complexity."] + #[doc = ""] + #[doc = " If you update the match limit, it must be > 0 and <= 65536."] pub fn ts_query_cursor_did_exceed_match_limit(arg1: *const TSQueryCursor) -> bool; } +extern "C" { + pub fn ts_query_cursor_match_limit(arg1: *const TSQueryCursor) -> u32; +} +extern "C" { + pub fn ts_query_cursor_set_match_limit(arg1: *mut TSQueryCursor, arg2: u32); +} extern "C" { #[doc = " Set the range of bytes or (row, column) positions in which the query"] #[doc = " will be executed."] diff --git a/lib/binding_rust/lib.rs b/lib/binding_rust/lib.rs index 801f773f..bfdc843c 100644 --- a/lib/binding_rust/lib.rs +++ b/lib/binding_rust/lib.rs @@ -1598,6 +1598,19 @@ impl<'a> QueryCursor { QueryCursor(unsafe { NonNull::new_unchecked(ffi::ts_query_cursor_new()) }) } + /// Return the maximum number of in-progress matches for this cursor. + pub fn match_limit(&self) -> u32 { + unsafe { ffi::ts_query_cursor_match_limit(self.0.as_ptr()) } + } + + /// Set the maximum number of in-progress matches for this cursor. The limit must be > 0 and + /// <= 65536. + pub fn set_match_limit(&mut self, limit: u32) { + unsafe { + ffi::ts_query_cursor_set_match_limit(self.0.as_ptr(), limit); + } + } + /// Check if, on its last execution, this cursor exceeded its maximum number of /// in-progress matches. pub fn did_exceed_match_limit(&self) -> bool { diff --git a/lib/include/tree_sitter/api.h b/lib/include/tree_sitter/api.h index 43315415..fad7a589 100644 --- a/lib/include/tree_sitter/api.h +++ b/lib/include/tree_sitter/api.h @@ -798,15 +798,23 @@ void ts_query_cursor_delete(TSQueryCursor *); void ts_query_cursor_exec(TSQueryCursor *, const TSQuery *, TSNode); /** - * Check if this cursor has exceeded its maximum number of in-progress - * matches. + * Manage the maximum number of in-progress matches allowed by this query + * cursor. * - * Currently, query cursors have a fixed capacity for storing lists - * of in-progress captures. If this capacity is exceeded, then the - * earliest-starting match will silently be dropped to make room for - * further matches. + * Query cursors have a maximum capacity for storing lists of in-progress + * captures. If this capacity is exceeded, then the earliest-starting match will + * silently be dropped to make room for further matches. + * + * By default, this limit is 65,536 pending matches, which is effectively + * unlimited for most queries and syntax trees. You can optionally set this to a + * lower number if you want to have (and check) a tighter bound on query + * complexity. + * + * If you update the match limit, it must be > 0 and <= 65536. */ bool ts_query_cursor_did_exceed_match_limit(const TSQueryCursor *); +uint32_t ts_query_cursor_match_limit(const TSQueryCursor *); +void ts_query_cursor_set_match_limit(TSQueryCursor *, uint32_t); /** * Set the range of bytes or (row, column) positions in which the query diff --git a/lib/src/query.c b/lib/src/query.c index 5043554f..2c8e7193 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -175,6 +175,15 @@ typedef Array(CaptureList) CaptureListPoolEntry; typedef struct { CaptureListPoolEntry list; CaptureList empty_list; + // The maximum number of capture lists that we are allowed to allocate. We + // never allow `list` to allocate more entries than this, dropping pending + // matches if needed to stay under the limit. + uint32_t max_capture_list_count; + // The number of capture lists allocated in `list` that are not currently in + // use. We reuse those existing-but-unused capture lists before trying to + // allocate any new ones. We use an invalid value (UINT32_MAX) for a capture + // list's length to indicate that it's not in use. + uint16_t free_capture_list_count; } CaptureListPool; /* @@ -358,6 +367,10 @@ static CaptureListPool capture_list_pool_new(void) { return (CaptureListPool) { .list = array_new(), .empty_list = array_new(), + // The maximum maxmimum is 64K, since we use `uint16_t` as our capture list + // index type. + .max_capture_list_count = 65536, + .free_capture_list_count = 0, }; } @@ -366,6 +379,7 @@ static void capture_list_pool_reset(CaptureListPool *self) { // This invalid size means that the list is not in use. self->list.contents[i].size = UINT32_MAX; } + self->free_capture_list_count = self->list.size; } static void capture_list_pool_delete(CaptureListPool *self) { @@ -385,17 +399,30 @@ static CaptureList *capture_list_pool_get_mut(CaptureListPool *self, uint16_t id return &self->list.contents[id]; } +static bool capture_list_pool_is_empty(const CaptureListPool *self) { + // The capture list pool is empty if all allocated lists are in use, and we + // have reached the maximum allowed number of allocated lists. + return self->free_capture_list_count == 0 && self->list.size >= self->max_capture_list_count; +} + static uint16_t capture_list_pool_acquire(CaptureListPool *self) { - // First see if any already allocated capture lists are currently unused. - for (uint16_t i = 0; i < self->list.size; i++) { - if (self->list.contents[i].size == UINT32_MAX) { - array_clear(&self->list.contents[i]); - return i; + // First see if any already allocated capture list is currently unused. + if (self->free_capture_list_count > 0) { + for (uint16_t i = 0; i < self->list.size; i++) { + if (self->list.contents[i].size == UINT32_MAX) { + array_clear(&self->list.contents[i]); + self->free_capture_list_count--; + return i; + } } } - // Otherwise allocate and initialize a new capture list. - uint16_t i = self->list.size; + // Otherwise allocate and initialize a new capture list, as long as that + // doesn't put us over the requested maximum. + uint32_t i = self->list.size; + if (i >= self->max_capture_list_count) { + return NONE; + } CaptureList list; array_init(&list); array_push(&self->list, list); @@ -405,6 +432,7 @@ static uint16_t capture_list_pool_acquire(CaptureListPool *self) { static void capture_list_pool_release(CaptureListPool *self, uint16_t id) { if (id >= self->list.size) return; self->list.contents[id].size = UINT32_MAX; + self->free_capture_list_count++; } /************** @@ -2285,6 +2313,15 @@ bool ts_query_cursor_did_exceed_match_limit(const TSQueryCursor *self) { return self->did_exceed_match_limit; } +uint32_t ts_query_cursor_match_limit(const TSQueryCursor *self) { + return self->capture_list_pool.max_capture_list_count; +} + +void ts_query_cursor_set_match_limit(TSQueryCursor *self, uint32_t limit) { + assert(limit > 0 && limit <= 65536); + self->capture_list_pool.max_capture_list_count = limit; +} + void ts_query_cursor_exec( TSQueryCursor *self, const TSQuery *query, @@ -3186,6 +3223,20 @@ bool ts_query_cursor_next_capture( return true; } + if (capture_list_pool_is_empty(&self->capture_list_pool)) { + LOG( + " abandon state. index:%u, pattern:%u, offset:%u.\n", + first_unfinished_state_index, + first_unfinished_pattern_index, + first_unfinished_capture_byte + ); + capture_list_pool_release( + &self->capture_list_pool, + self->states.contents[first_unfinished_state_index].capture_list_id + ); + array_erase(&self->states, first_unfinished_state_index); + } + // If there are no finished matches that are ready to be returned, then // continue finding more matches. if (