Merge pull request #1127 from dcreager/query-mempool
query: Allow unlimited pending matches
This commit is contained in:
commit
82f3d3232b
9 changed files with 141 additions and 89 deletions
|
|
@ -1645,6 +1645,7 @@ fn test_query_matches_with_too_many_permutations_to_track() {
|
|||
parser.set_language(language).unwrap();
|
||||
let tree = parser.parse(&source, None).unwrap();
|
||||
let mut cursor = QueryCursor::new();
|
||||
cursor.set_match_limit(32);
|
||||
let matches = cursor.matches(&query, tree.root_node(), to_callback(&source));
|
||||
|
||||
// For this pathological query, some match permutations will be dropped.
|
||||
|
|
@ -1686,6 +1687,7 @@ fn test_query_matches_with_alternatives_and_too_many_permutations_to_track() {
|
|||
parser.set_language(language).unwrap();
|
||||
let tree = parser.parse(&source, None).unwrap();
|
||||
let mut cursor = QueryCursor::new();
|
||||
cursor.set_match_limit(32);
|
||||
let matches = cursor.matches(&query, tree.root_node(), to_callback(&source));
|
||||
|
||||
assert_eq!(
|
||||
|
|
@ -2765,6 +2767,7 @@ fn test_query_captures_with_too_many_nested_results() {
|
|||
parser.set_language(language).unwrap();
|
||||
let tree = parser.parse(&source, None).unwrap();
|
||||
let mut cursor = QueryCursor::new();
|
||||
cursor.set_match_limit(32);
|
||||
let captures = cursor.captures(&query, tree.root_node(), to_callback(&source));
|
||||
let captures = collect_captures(captures, &query, &source);
|
||||
|
||||
|
|
|
|||
|
|
@ -726,15 +726,27 @@ extern "C" {
|
|||
pub fn ts_query_cursor_exec(arg1: *mut TSQueryCursor, arg2: *const TSQuery, arg3: TSNode);
|
||||
}
|
||||
extern "C" {
|
||||
#[doc = " Check if this cursor has exceeded its maximum number of in-progress"]
|
||||
#[doc = " matches."]
|
||||
#[doc = " Manage the maximum number of in-progress matches allowed by this query"]
|
||||
#[doc = " cursor."]
|
||||
#[doc = ""]
|
||||
#[doc = " Currently, query cursors have a fixed capacity for storing lists"]
|
||||
#[doc = " of in-progress captures. If this capacity is exceeded, then the"]
|
||||
#[doc = " earliest-starting match will silently be dropped to make room for"]
|
||||
#[doc = " further matches."]
|
||||
#[doc = " Query cursors have a maximum capacity for storing lists of in-progress"]
|
||||
#[doc = " captures. If this capacity is exceeded, then the earliest-starting match will"]
|
||||
#[doc = " silently be dropped to make room for further matches."]
|
||||
#[doc = ""]
|
||||
#[doc = " By default, this limit is 65,536 pending matches, which is effectively"]
|
||||
#[doc = " unlimited for most queries and syntax trees. You can optionally set this to a"]
|
||||
#[doc = " lower number if you want to have (and check) a tighter bound on query"]
|
||||
#[doc = " complexity."]
|
||||
#[doc = ""]
|
||||
#[doc = " If you update the match limit, it must be > 0 and <= 65536."]
|
||||
pub fn ts_query_cursor_did_exceed_match_limit(arg1: *const TSQueryCursor) -> bool;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_query_cursor_match_limit(arg1: *const TSQueryCursor) -> u32;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_query_cursor_set_match_limit(arg1: *mut TSQueryCursor, arg2: u32);
|
||||
}
|
||||
extern "C" {
|
||||
#[doc = " Set the range of bytes or (row, column) positions in which the query"]
|
||||
#[doc = " will be executed."]
|
||||
|
|
|
|||
|
|
@ -1598,6 +1598,19 @@ impl<'a> QueryCursor {
|
|||
QueryCursor(unsafe { NonNull::new_unchecked(ffi::ts_query_cursor_new()) })
|
||||
}
|
||||
|
||||
/// Return the maximum number of in-progress matches for this cursor.
|
||||
pub fn match_limit(&self) -> u32 {
|
||||
unsafe { ffi::ts_query_cursor_match_limit(self.0.as_ptr()) }
|
||||
}
|
||||
|
||||
/// Set the maximum number of in-progress matches for this cursor. The limit must be > 0 and
|
||||
/// <= 65536.
|
||||
pub fn set_match_limit(&mut self, limit: u32) {
|
||||
unsafe {
|
||||
ffi::ts_query_cursor_set_match_limit(self.0.as_ptr(), limit);
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if, on its last execution, this cursor exceeded its maximum number of
|
||||
/// in-progress matches.
|
||||
pub fn did_exceed_match_limit(&self) -> bool {
|
||||
|
|
|
|||
|
|
@ -594,9 +594,15 @@ void ts_query_matches_wasm(
|
|||
uint32_t start_row,
|
||||
uint32_t start_column,
|
||||
uint32_t end_row,
|
||||
uint32_t end_column
|
||||
uint32_t end_column,
|
||||
uint32_t match_limit
|
||||
) {
|
||||
if (!scratch_query_cursor) scratch_query_cursor = ts_query_cursor_new();
|
||||
if (match_limit == 0) {
|
||||
ts_query_cursor_set_match_limit(scratch_query_cursor, UINT32_MAX);
|
||||
} else {
|
||||
ts_query_cursor_set_match_limit(scratch_query_cursor, match_limit);
|
||||
}
|
||||
|
||||
TSNode node = unmarshal_node(tree);
|
||||
TSPoint start_point = {start_row, code_unit_to_byte(start_column)};
|
||||
|
|
@ -635,9 +641,15 @@ void ts_query_captures_wasm(
|
|||
uint32_t start_row,
|
||||
uint32_t start_column,
|
||||
uint32_t end_row,
|
||||
uint32_t end_column
|
||||
uint32_t end_column,
|
||||
uint32_t match_limit
|
||||
) {
|
||||
if (!scratch_query_cursor) scratch_query_cursor = ts_query_cursor_new();
|
||||
if (match_limit == 0) {
|
||||
ts_query_cursor_set_match_limit(scratch_query_cursor, UINT32_MAX);
|
||||
} else {
|
||||
ts_query_cursor_set_match_limit(scratch_query_cursor, match_limit);
|
||||
}
|
||||
|
||||
TSNode node = unmarshal_node(tree);
|
||||
TSPoint start_point = {start_row, code_unit_to_byte(start_column)};
|
||||
|
|
|
|||
|
|
@ -953,9 +953,17 @@ class Query {
|
|||
this[0] = 0;
|
||||
}
|
||||
|
||||
matches(node, startPosition, endPosition) {
|
||||
matches(node, startPosition, endPosition, options) {
|
||||
if (!startPosition) startPosition = ZERO_POINT;
|
||||
if (!endPosition) endPosition = ZERO_POINT;
|
||||
if (!options) options = {};
|
||||
|
||||
let matchLimit = options.matchLimit;
|
||||
if (typeof matchLimit === 'undefined') {
|
||||
matchLimit = 0;
|
||||
} else if (typeof matchLimit !== 'number') {
|
||||
throw new Error('Arguments must be numbers');
|
||||
}
|
||||
|
||||
marshalNode(node);
|
||||
|
||||
|
|
@ -965,7 +973,8 @@ class Query {
|
|||
startPosition.row,
|
||||
startPosition.column,
|
||||
endPosition.row,
|
||||
endPosition.column
|
||||
endPosition.column,
|
||||
matchLimit
|
||||
);
|
||||
|
||||
const rawCount = getValue(TRANSFER_BUFFER, 'i32');
|
||||
|
|
@ -1000,9 +1009,17 @@ class Query {
|
|||
return result;
|
||||
}
|
||||
|
||||
captures(node, startPosition, endPosition) {
|
||||
captures(node, startPosition, endPosition, options) {
|
||||
if (!startPosition) startPosition = ZERO_POINT;
|
||||
if (!endPosition) endPosition = ZERO_POINT;
|
||||
if (!options) options = {};
|
||||
|
||||
let matchLimit = options.matchLimit;
|
||||
if (typeof matchLimit === 'undefined') {
|
||||
matchLimit = 0;
|
||||
} else if (typeof matchLimit !== 'number') {
|
||||
throw new Error('Arguments must be numbers');
|
||||
}
|
||||
|
||||
marshalNode(node);
|
||||
|
||||
|
|
@ -1012,7 +1029,8 @@ class Query {
|
|||
startPosition.row,
|
||||
startPosition.column,
|
||||
endPosition.row,
|
||||
endPosition.column
|
||||
endPosition.column,
|
||||
matchLimit
|
||||
);
|
||||
|
||||
const count = getValue(TRANSFER_BUFFER, 'i32');
|
||||
|
|
|
|||
|
|
@ -256,7 +256,7 @@ describe("Query", () => {
|
|||
(array (identifier) @pre (identifier) @post)
|
||||
`);
|
||||
|
||||
const captures = query.captures(tree.rootNode);
|
||||
const captures = query.captures(tree.rootNode, null, null, {matchLimit: 32});
|
||||
assert.ok(query.didExceedMatchLimit());
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -798,15 +798,19 @@ void ts_query_cursor_delete(TSQueryCursor *);
|
|||
void ts_query_cursor_exec(TSQueryCursor *, const TSQuery *, TSNode);
|
||||
|
||||
/**
|
||||
* Check if this cursor has exceeded its maximum number of in-progress
|
||||
* matches.
|
||||
* Manage the maximum number of in-progress matches allowed by this query
|
||||
* cursor.
|
||||
*
|
||||
* Currently, query cursors have a fixed capacity for storing lists
|
||||
* of in-progress captures. If this capacity is exceeded, then the
|
||||
* earliest-starting match will silently be dropped to make room for
|
||||
* further matches.
|
||||
* Query cursors have an optional maximum capacity for storing lists of
|
||||
* in-progress captures. If this capacity is exceeded, then the
|
||||
* earliest-starting match will silently be dropped to make room for further
|
||||
* matches. This maximum capacity is optional — by default, query cursors allow
|
||||
* any number of pending matches, dynamically allocating new space for them as
|
||||
* needed as the query is executed.
|
||||
*/
|
||||
bool ts_query_cursor_did_exceed_match_limit(const TSQueryCursor *);
|
||||
uint32_t ts_query_cursor_match_limit(const TSQueryCursor *);
|
||||
void ts_query_cursor_set_match_limit(TSQueryCursor *, uint32_t);
|
||||
|
||||
/**
|
||||
* Set the range of bytes or (row, column) positions in which the query
|
||||
|
|
|
|||
|
|
@ -1,42 +0,0 @@
|
|||
#ifndef TREE_SITTER_BITS_H_
|
||||
#define TREE_SITTER_BITS_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
static inline uint32_t bitmask_for_index(uint16_t id) {
|
||||
return (1u << (31 - id));
|
||||
}
|
||||
|
||||
#ifdef __TINYC__
|
||||
|
||||
// Algorithm taken from the Hacker's Delight book
|
||||
// See also https://graphics.stanford.edu/~seander/bithacks.html
|
||||
static inline uint32_t count_leading_zeros(uint32_t x) {
|
||||
int count = 0;
|
||||
if (x == 0) return 32;
|
||||
x = x - ((x >> 1) & 0x55555555);
|
||||
x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
|
||||
count = (((x + (x >> 4)) & 0x0f0f0f0f) * 0x01010101) >> 24;
|
||||
return count;
|
||||
}
|
||||
|
||||
#elif defined _WIN32 && !defined __GNUC__
|
||||
|
||||
#include <intrin.h>
|
||||
|
||||
static inline uint32_t count_leading_zeros(uint32_t x) {
|
||||
if (x == 0) return 32;
|
||||
uint32_t result;
|
||||
_BitScanReverse(&result, x);
|
||||
return 31 - result;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static inline uint32_t count_leading_zeros(uint32_t x) {
|
||||
if (x == 0) return 32;
|
||||
return __builtin_clz(x);
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif // TREE_SITTER_BITS_H_
|
||||
|
|
@ -1,7 +1,6 @@
|
|||
#include "tree_sitter/api.h"
|
||||
#include "./alloc.h"
|
||||
#include "./array.h"
|
||||
#include "./bits.h"
|
||||
#include "./language.h"
|
||||
#include "./point.h"
|
||||
#include "./tree_cursor.h"
|
||||
|
|
@ -12,7 +11,6 @@
|
|||
// #define LOG(...) fprintf(stderr, __VA_ARGS__)
|
||||
#define LOG(...)
|
||||
|
||||
#define MAX_CAPTURE_LIST_COUNT 32
|
||||
#define MAX_STEP_CAPTURE_COUNT 3
|
||||
#define MAX_STATE_PREDECESSOR_COUNT 100
|
||||
#define MAX_ANALYSIS_STATE_DEPTH 12
|
||||
|
|
@ -153,10 +151,10 @@ typedef struct {
|
|||
*/
|
||||
typedef struct {
|
||||
uint32_t id;
|
||||
uint32_t capture_list_id;
|
||||
uint16_t start_depth;
|
||||
uint16_t step_index;
|
||||
uint16_t pattern_index;
|
||||
uint16_t capture_list_id;
|
||||
uint16_t consumed_capture_count: 12;
|
||||
bool seeking_immediate_match: 1;
|
||||
bool has_in_progress_alternatives: 1;
|
||||
|
|
@ -173,9 +171,17 @@ typedef Array(TSQueryCapture) CaptureList;
|
|||
* currently in use by a query state.
|
||||
*/
|
||||
typedef struct {
|
||||
CaptureList list[MAX_CAPTURE_LIST_COUNT];
|
||||
Array(CaptureList) list;
|
||||
CaptureList empty_list;
|
||||
uint32_t usage_map;
|
||||
// The maximum number of capture lists that we are allowed to allocate. We
|
||||
// never allow `list` to allocate more entries than this, dropping pending
|
||||
// matches if needed to stay under the limit.
|
||||
uint32_t max_capture_list_count;
|
||||
// The number of capture lists allocated in `list` that are not currently in
|
||||
// use. We reuse those existing-but-unused capture lists before trying to
|
||||
// allocate any new ones. We use an invalid value (UINT32_MAX) for a capture
|
||||
// list's length to indicate that it's not in use.
|
||||
uint32_t free_capture_list_count;
|
||||
} CaptureListPool;
|
||||
|
||||
/*
|
||||
|
|
@ -357,54 +363,72 @@ static uint32_t stream_offset(Stream *self) {
|
|||
|
||||
static CaptureListPool capture_list_pool_new(void) {
|
||||
return (CaptureListPool) {
|
||||
.list = array_new(),
|
||||
.empty_list = array_new(),
|
||||
.usage_map = UINT32_MAX,
|
||||
.max_capture_list_count = UINT32_MAX,
|
||||
.free_capture_list_count = 0,
|
||||
};
|
||||
}
|
||||
|
||||
static void capture_list_pool_reset(CaptureListPool *self) {
|
||||
self->usage_map = UINT32_MAX;
|
||||
for (unsigned i = 0; i < MAX_CAPTURE_LIST_COUNT; i++) {
|
||||
array_clear(&self->list[i]);
|
||||
for (uint16_t i = 0; i < self->list.size; i++) {
|
||||
// This invalid size means that the list is not in use.
|
||||
self->list.contents[i].size = UINT32_MAX;
|
||||
}
|
||||
self->free_capture_list_count = self->list.size;
|
||||
}
|
||||
|
||||
static void capture_list_pool_delete(CaptureListPool *self) {
|
||||
for (unsigned i = 0; i < MAX_CAPTURE_LIST_COUNT; i++) {
|
||||
array_delete(&self->list[i]);
|
||||
for (uint16_t i = 0; i < self->list.size; i++) {
|
||||
array_delete(&self->list.contents[i]);
|
||||
}
|
||||
array_delete(&self->list);
|
||||
}
|
||||
|
||||
static const CaptureList *capture_list_pool_get(const CaptureListPool *self, uint16_t id) {
|
||||
if (id >= MAX_CAPTURE_LIST_COUNT) return &self->empty_list;
|
||||
return &self->list[id];
|
||||
if (id >= self->list.size) return &self->empty_list;
|
||||
return &self->list.contents[id];
|
||||
}
|
||||
|
||||
static CaptureList *capture_list_pool_get_mut(CaptureListPool *self, uint16_t id) {
|
||||
assert(id < MAX_CAPTURE_LIST_COUNT);
|
||||
return &self->list[id];
|
||||
assert(id < self->list.size);
|
||||
return &self->list.contents[id];
|
||||
}
|
||||
|
||||
static bool capture_list_pool_is_empty(const CaptureListPool *self) {
|
||||
return self->usage_map == 0;
|
||||
// The capture list pool is empty if all allocated lists are in use, and we
|
||||
// have reached the maximum allowed number of allocated lists.
|
||||
return self->free_capture_list_count == 0 && self->list.size >= self->max_capture_list_count;
|
||||
}
|
||||
|
||||
static uint16_t capture_list_pool_acquire(CaptureListPool *self) {
|
||||
// In the usage_map bitmask, ones represent free lists, and zeros represent
|
||||
// lists that are in use. A free list id can quickly be found by counting
|
||||
// the leading zeros in the usage map. An id of zero corresponds to the
|
||||
// highest-order bit in the bitmask.
|
||||
uint16_t id = count_leading_zeros(self->usage_map);
|
||||
if (id >= MAX_CAPTURE_LIST_COUNT) return NONE;
|
||||
self->usage_map &= ~bitmask_for_index(id);
|
||||
array_clear(&self->list[id]);
|
||||
return id;
|
||||
// First see if any already allocated capture list is currently unused.
|
||||
if (self->free_capture_list_count > 0) {
|
||||
for (uint16_t i = 0; i < self->list.size; i++) {
|
||||
if (self->list.contents[i].size == UINT32_MAX) {
|
||||
array_clear(&self->list.contents[i]);
|
||||
self->free_capture_list_count--;
|
||||
return i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Otherwise allocate and initialize a new capture list, as long as that
|
||||
// doesn't put us over the requested maximum.
|
||||
uint32_t i = self->list.size;
|
||||
if (i >= self->max_capture_list_count) {
|
||||
return NONE;
|
||||
}
|
||||
CaptureList list;
|
||||
array_init(&list);
|
||||
array_push(&self->list, list);
|
||||
return i;
|
||||
}
|
||||
|
||||
static void capture_list_pool_release(CaptureListPool *self, uint16_t id) {
|
||||
if (id >= MAX_CAPTURE_LIST_COUNT) return;
|
||||
array_clear(&self->list[id]);
|
||||
self->usage_map |= bitmask_for_index(id);
|
||||
if (id >= self->list.size) return;
|
||||
self->list.contents[id].size = UINT32_MAX;
|
||||
self->free_capture_list_count++;
|
||||
}
|
||||
|
||||
/**************
|
||||
|
|
@ -2285,6 +2309,14 @@ bool ts_query_cursor_did_exceed_match_limit(const TSQueryCursor *self) {
|
|||
return self->did_exceed_match_limit;
|
||||
}
|
||||
|
||||
uint32_t ts_query_cursor_match_limit(const TSQueryCursor *self) {
|
||||
return self->capture_list_pool.max_capture_list_count;
|
||||
}
|
||||
|
||||
void ts_query_cursor_set_match_limit(TSQueryCursor *self, uint32_t limit) {
|
||||
self->capture_list_pool.max_capture_list_count = limit;
|
||||
}
|
||||
|
||||
void ts_query_cursor_exec(
|
||||
TSQueryCursor *self,
|
||||
const TSQuery *query,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue