diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index d6153dd4..5a19eead 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -1645,6 +1645,7 @@ fn test_query_matches_with_too_many_permutations_to_track() { parser.set_language(language).unwrap(); let tree = parser.parse(&source, None).unwrap(); let mut cursor = QueryCursor::new(); + cursor.set_match_limit(32); let matches = cursor.matches(&query, tree.root_node(), to_callback(&source)); // For this pathological query, some match permutations will be dropped. @@ -1686,6 +1687,7 @@ fn test_query_matches_with_alternatives_and_too_many_permutations_to_track() { parser.set_language(language).unwrap(); let tree = parser.parse(&source, None).unwrap(); let mut cursor = QueryCursor::new(); + cursor.set_match_limit(32); let matches = cursor.matches(&query, tree.root_node(), to_callback(&source)); assert_eq!( @@ -2765,6 +2767,7 @@ fn test_query_captures_with_too_many_nested_results() { parser.set_language(language).unwrap(); let tree = parser.parse(&source, None).unwrap(); let mut cursor = QueryCursor::new(); + cursor.set_match_limit(32); let captures = cursor.captures(&query, tree.root_node(), to_callback(&source)); let captures = collect_captures(captures, &query, &source); diff --git a/lib/binding_rust/bindings.rs b/lib/binding_rust/bindings.rs index 50da12fc..9b8c0f65 100644 --- a/lib/binding_rust/bindings.rs +++ b/lib/binding_rust/bindings.rs @@ -726,15 +726,27 @@ extern "C" { pub fn ts_query_cursor_exec(arg1: *mut TSQueryCursor, arg2: *const TSQuery, arg3: TSNode); } extern "C" { - #[doc = " Check if this cursor has exceeded its maximum number of in-progress"] - #[doc = " matches."] + #[doc = " Manage the maximum number of in-progress matches allowed by this query"] + #[doc = " cursor."] #[doc = ""] - #[doc = " Currently, query cursors have a fixed capacity for storing lists"] - #[doc = " of in-progress captures. If this capacity is exceeded, then the"] - #[doc = " earliest-starting match will silently be dropped to make room for"] - #[doc = " further matches."] + #[doc = " Query cursors have a maximum capacity for storing lists of in-progress"] + #[doc = " captures. If this capacity is exceeded, then the earliest-starting match will"] + #[doc = " silently be dropped to make room for further matches."] + #[doc = ""] + #[doc = " By default, this limit is 65,536 pending matches, which is effectively"] + #[doc = " unlimited for most queries and syntax trees. You can optionally set this to a"] + #[doc = " lower number if you want to have (and check) a tighter bound on query"] + #[doc = " complexity."] + #[doc = ""] + #[doc = " If you update the match limit, it must be > 0 and <= 65536."] pub fn ts_query_cursor_did_exceed_match_limit(arg1: *const TSQueryCursor) -> bool; } +extern "C" { + pub fn ts_query_cursor_match_limit(arg1: *const TSQueryCursor) -> u32; +} +extern "C" { + pub fn ts_query_cursor_set_match_limit(arg1: *mut TSQueryCursor, arg2: u32); +} extern "C" { #[doc = " Set the range of bytes or (row, column) positions in which the query"] #[doc = " will be executed."] diff --git a/lib/binding_rust/lib.rs b/lib/binding_rust/lib.rs index 801f773f..bfdc843c 100644 --- a/lib/binding_rust/lib.rs +++ b/lib/binding_rust/lib.rs @@ -1598,6 +1598,19 @@ impl<'a> QueryCursor { QueryCursor(unsafe { NonNull::new_unchecked(ffi::ts_query_cursor_new()) }) } + /// Return the maximum number of in-progress matches for this cursor. + pub fn match_limit(&self) -> u32 { + unsafe { ffi::ts_query_cursor_match_limit(self.0.as_ptr()) } + } + + /// Set the maximum number of in-progress matches for this cursor. The limit must be > 0 and + /// <= 65536. + pub fn set_match_limit(&mut self, limit: u32) { + unsafe { + ffi::ts_query_cursor_set_match_limit(self.0.as_ptr(), limit); + } + } + /// Check if, on its last execution, this cursor exceeded its maximum number of /// in-progress matches. pub fn did_exceed_match_limit(&self) -> bool { diff --git a/lib/binding_web/binding.c b/lib/binding_web/binding.c index 8adaec75..27292911 100644 --- a/lib/binding_web/binding.c +++ b/lib/binding_web/binding.c @@ -594,9 +594,15 @@ void ts_query_matches_wasm( uint32_t start_row, uint32_t start_column, uint32_t end_row, - uint32_t end_column + uint32_t end_column, + uint32_t match_limit ) { if (!scratch_query_cursor) scratch_query_cursor = ts_query_cursor_new(); + if (match_limit == 0) { + ts_query_cursor_set_match_limit(scratch_query_cursor, UINT32_MAX); + } else { + ts_query_cursor_set_match_limit(scratch_query_cursor, match_limit); + } TSNode node = unmarshal_node(tree); TSPoint start_point = {start_row, code_unit_to_byte(start_column)}; @@ -635,9 +641,15 @@ void ts_query_captures_wasm( uint32_t start_row, uint32_t start_column, uint32_t end_row, - uint32_t end_column + uint32_t end_column, + uint32_t match_limit ) { if (!scratch_query_cursor) scratch_query_cursor = ts_query_cursor_new(); + if (match_limit == 0) { + ts_query_cursor_set_match_limit(scratch_query_cursor, UINT32_MAX); + } else { + ts_query_cursor_set_match_limit(scratch_query_cursor, match_limit); + } TSNode node = unmarshal_node(tree); TSPoint start_point = {start_row, code_unit_to_byte(start_column)}; diff --git a/lib/binding_web/binding.js b/lib/binding_web/binding.js index 1f9ef412..bf0a91ce 100644 --- a/lib/binding_web/binding.js +++ b/lib/binding_web/binding.js @@ -953,9 +953,17 @@ class Query { this[0] = 0; } - matches(node, startPosition, endPosition) { + matches(node, startPosition, endPosition, options) { if (!startPosition) startPosition = ZERO_POINT; if (!endPosition) endPosition = ZERO_POINT; + if (!options) options = {}; + + let matchLimit = options.matchLimit; + if (typeof matchLimit === 'undefined') { + matchLimit = 0; + } else if (typeof matchLimit !== 'number') { + throw new Error('Arguments must be numbers'); + } marshalNode(node); @@ -965,7 +973,8 @@ class Query { startPosition.row, startPosition.column, endPosition.row, - endPosition.column + endPosition.column, + matchLimit ); const rawCount = getValue(TRANSFER_BUFFER, 'i32'); @@ -1000,9 +1009,17 @@ class Query { return result; } - captures(node, startPosition, endPosition) { + captures(node, startPosition, endPosition, options) { if (!startPosition) startPosition = ZERO_POINT; if (!endPosition) endPosition = ZERO_POINT; + if (!options) options = {}; + + let matchLimit = options.matchLimit; + if (typeof matchLimit === 'undefined') { + matchLimit = 0; + } else if (typeof matchLimit !== 'number') { + throw new Error('Arguments must be numbers'); + } marshalNode(node); @@ -1012,7 +1029,8 @@ class Query { startPosition.row, startPosition.column, endPosition.row, - endPosition.column + endPosition.column, + matchLimit ); const count = getValue(TRANSFER_BUFFER, 'i32'); diff --git a/lib/binding_web/test/query-test.js b/lib/binding_web/test/query-test.js index b7b2e053..2b2aebe0 100644 --- a/lib/binding_web/test/query-test.js +++ b/lib/binding_web/test/query-test.js @@ -256,7 +256,7 @@ describe("Query", () => { (array (identifier) @pre (identifier) @post) `); - const captures = query.captures(tree.rootNode); + const captures = query.captures(tree.rootNode, null, null, {matchLimit: 32}); assert.ok(query.didExceedMatchLimit()); }); }); diff --git a/lib/include/tree_sitter/api.h b/lib/include/tree_sitter/api.h index 43315415..6f826604 100644 --- a/lib/include/tree_sitter/api.h +++ b/lib/include/tree_sitter/api.h @@ -798,15 +798,19 @@ void ts_query_cursor_delete(TSQueryCursor *); void ts_query_cursor_exec(TSQueryCursor *, const TSQuery *, TSNode); /** - * Check if this cursor has exceeded its maximum number of in-progress - * matches. + * Manage the maximum number of in-progress matches allowed by this query + * cursor. * - * Currently, query cursors have a fixed capacity for storing lists - * of in-progress captures. If this capacity is exceeded, then the - * earliest-starting match will silently be dropped to make room for - * further matches. + * Query cursors have an optional maximum capacity for storing lists of + * in-progress captures. If this capacity is exceeded, then the + * earliest-starting match will silently be dropped to make room for further + * matches. This maximum capacity is optional — by default, query cursors allow + * any number of pending matches, dynamically allocating new space for them as + * needed as the query is executed. */ bool ts_query_cursor_did_exceed_match_limit(const TSQueryCursor *); +uint32_t ts_query_cursor_match_limit(const TSQueryCursor *); +void ts_query_cursor_set_match_limit(TSQueryCursor *, uint32_t); /** * Set the range of bytes or (row, column) positions in which the query diff --git a/lib/src/bits.h b/lib/src/bits.h deleted file mode 100644 index ca8caf30..00000000 --- a/lib/src/bits.h +++ /dev/null @@ -1,42 +0,0 @@ -#ifndef TREE_SITTER_BITS_H_ -#define TREE_SITTER_BITS_H_ - -#include - -static inline uint32_t bitmask_for_index(uint16_t id) { - return (1u << (31 - id)); -} - -#ifdef __TINYC__ - -// Algorithm taken from the Hacker's Delight book -// See also https://graphics.stanford.edu/~seander/bithacks.html -static inline uint32_t count_leading_zeros(uint32_t x) { - int count = 0; - if (x == 0) return 32; - x = x - ((x >> 1) & 0x55555555); - x = (x & 0x33333333) + ((x >> 2) & 0x33333333); - count = (((x + (x >> 4)) & 0x0f0f0f0f) * 0x01010101) >> 24; - return count; -} - -#elif defined _WIN32 && !defined __GNUC__ - -#include - -static inline uint32_t count_leading_zeros(uint32_t x) { - if (x == 0) return 32; - uint32_t result; - _BitScanReverse(&result, x); - return 31 - result; -} - -#else - -static inline uint32_t count_leading_zeros(uint32_t x) { - if (x == 0) return 32; - return __builtin_clz(x); -} - -#endif -#endif // TREE_SITTER_BITS_H_ diff --git a/lib/src/query.c b/lib/src/query.c index 65dbe1fe..13149eb8 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -1,7 +1,6 @@ #include "tree_sitter/api.h" #include "./alloc.h" #include "./array.h" -#include "./bits.h" #include "./language.h" #include "./point.h" #include "./tree_cursor.h" @@ -12,7 +11,6 @@ // #define LOG(...) fprintf(stderr, __VA_ARGS__) #define LOG(...) -#define MAX_CAPTURE_LIST_COUNT 32 #define MAX_STEP_CAPTURE_COUNT 3 #define MAX_STATE_PREDECESSOR_COUNT 100 #define MAX_ANALYSIS_STATE_DEPTH 12 @@ -153,10 +151,10 @@ typedef struct { */ typedef struct { uint32_t id; + uint32_t capture_list_id; uint16_t start_depth; uint16_t step_index; uint16_t pattern_index; - uint16_t capture_list_id; uint16_t consumed_capture_count: 12; bool seeking_immediate_match: 1; bool has_in_progress_alternatives: 1; @@ -173,9 +171,17 @@ typedef Array(TSQueryCapture) CaptureList; * currently in use by a query state. */ typedef struct { - CaptureList list[MAX_CAPTURE_LIST_COUNT]; + Array(CaptureList) list; CaptureList empty_list; - uint32_t usage_map; + // The maximum number of capture lists that we are allowed to allocate. We + // never allow `list` to allocate more entries than this, dropping pending + // matches if needed to stay under the limit. + uint32_t max_capture_list_count; + // The number of capture lists allocated in `list` that are not currently in + // use. We reuse those existing-but-unused capture lists before trying to + // allocate any new ones. We use an invalid value (UINT32_MAX) for a capture + // list's length to indicate that it's not in use. + uint32_t free_capture_list_count; } CaptureListPool; /* @@ -357,54 +363,72 @@ static uint32_t stream_offset(Stream *self) { static CaptureListPool capture_list_pool_new(void) { return (CaptureListPool) { + .list = array_new(), .empty_list = array_new(), - .usage_map = UINT32_MAX, + .max_capture_list_count = UINT32_MAX, + .free_capture_list_count = 0, }; } static void capture_list_pool_reset(CaptureListPool *self) { - self->usage_map = UINT32_MAX; - for (unsigned i = 0; i < MAX_CAPTURE_LIST_COUNT; i++) { - array_clear(&self->list[i]); + for (uint16_t i = 0; i < self->list.size; i++) { + // This invalid size means that the list is not in use. + self->list.contents[i].size = UINT32_MAX; } + self->free_capture_list_count = self->list.size; } static void capture_list_pool_delete(CaptureListPool *self) { - for (unsigned i = 0; i < MAX_CAPTURE_LIST_COUNT; i++) { - array_delete(&self->list[i]); + for (uint16_t i = 0; i < self->list.size; i++) { + array_delete(&self->list.contents[i]); } + array_delete(&self->list); } static const CaptureList *capture_list_pool_get(const CaptureListPool *self, uint16_t id) { - if (id >= MAX_CAPTURE_LIST_COUNT) return &self->empty_list; - return &self->list[id]; + if (id >= self->list.size) return &self->empty_list; + return &self->list.contents[id]; } static CaptureList *capture_list_pool_get_mut(CaptureListPool *self, uint16_t id) { - assert(id < MAX_CAPTURE_LIST_COUNT); - return &self->list[id]; + assert(id < self->list.size); + return &self->list.contents[id]; } static bool capture_list_pool_is_empty(const CaptureListPool *self) { - return self->usage_map == 0; + // The capture list pool is empty if all allocated lists are in use, and we + // have reached the maximum allowed number of allocated lists. + return self->free_capture_list_count == 0 && self->list.size >= self->max_capture_list_count; } static uint16_t capture_list_pool_acquire(CaptureListPool *self) { - // In the usage_map bitmask, ones represent free lists, and zeros represent - // lists that are in use. A free list id can quickly be found by counting - // the leading zeros in the usage map. An id of zero corresponds to the - // highest-order bit in the bitmask. - uint16_t id = count_leading_zeros(self->usage_map); - if (id >= MAX_CAPTURE_LIST_COUNT) return NONE; - self->usage_map &= ~bitmask_for_index(id); - array_clear(&self->list[id]); - return id; + // First see if any already allocated capture list is currently unused. + if (self->free_capture_list_count > 0) { + for (uint16_t i = 0; i < self->list.size; i++) { + if (self->list.contents[i].size == UINT32_MAX) { + array_clear(&self->list.contents[i]); + self->free_capture_list_count--; + return i; + } + } + } + + // Otherwise allocate and initialize a new capture list, as long as that + // doesn't put us over the requested maximum. + uint32_t i = self->list.size; + if (i >= self->max_capture_list_count) { + return NONE; + } + CaptureList list; + array_init(&list); + array_push(&self->list, list); + return i; } static void capture_list_pool_release(CaptureListPool *self, uint16_t id) { - if (id >= MAX_CAPTURE_LIST_COUNT) return; - array_clear(&self->list[id]); - self->usage_map |= bitmask_for_index(id); + if (id >= self->list.size) return; + self->list.contents[id].size = UINT32_MAX; + self->free_capture_list_count++; } /************** @@ -2285,6 +2309,14 @@ bool ts_query_cursor_did_exceed_match_limit(const TSQueryCursor *self) { return self->did_exceed_match_limit; } +uint32_t ts_query_cursor_match_limit(const TSQueryCursor *self) { + return self->capture_list_pool.max_capture_list_count; +} + +void ts_query_cursor_set_match_limit(TSQueryCursor *self, uint32_t limit) { + self->capture_list_pool.max_capture_list_count = limit; +} + void ts_query_cursor_exec( TSQueryCursor *self, const TSQuery *query,