Merge pull request #1127 from dcreager/query-mempool

query: Allow unlimited pending matches
This commit is contained in:
Max Brunsfeld 2021-06-02 11:39:17 -07:00 committed by GitHub
commit 82f3d3232b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 141 additions and 89 deletions

View file

@ -1645,6 +1645,7 @@ fn test_query_matches_with_too_many_permutations_to_track() {
parser.set_language(language).unwrap();
let tree = parser.parse(&source, None).unwrap();
let mut cursor = QueryCursor::new();
cursor.set_match_limit(32);
let matches = cursor.matches(&query, tree.root_node(), to_callback(&source));
// For this pathological query, some match permutations will be dropped.
@ -1686,6 +1687,7 @@ fn test_query_matches_with_alternatives_and_too_many_permutations_to_track() {
parser.set_language(language).unwrap();
let tree = parser.parse(&source, None).unwrap();
let mut cursor = QueryCursor::new();
cursor.set_match_limit(32);
let matches = cursor.matches(&query, tree.root_node(), to_callback(&source));
assert_eq!(
@ -2765,6 +2767,7 @@ fn test_query_captures_with_too_many_nested_results() {
parser.set_language(language).unwrap();
let tree = parser.parse(&source, None).unwrap();
let mut cursor = QueryCursor::new();
cursor.set_match_limit(32);
let captures = cursor.captures(&query, tree.root_node(), to_callback(&source));
let captures = collect_captures(captures, &query, &source);

View file

@ -726,15 +726,27 @@ extern "C" {
pub fn ts_query_cursor_exec(arg1: *mut TSQueryCursor, arg2: *const TSQuery, arg3: TSNode);
}
extern "C" {
#[doc = " Check if this cursor has exceeded its maximum number of in-progress"]
#[doc = " matches."]
#[doc = " Manage the maximum number of in-progress matches allowed by this query"]
#[doc = " cursor."]
#[doc = ""]
#[doc = " Currently, query cursors have a fixed capacity for storing lists"]
#[doc = " of in-progress captures. If this capacity is exceeded, then the"]
#[doc = " earliest-starting match will silently be dropped to make room for"]
#[doc = " further matches."]
#[doc = " Query cursors have a maximum capacity for storing lists of in-progress"]
#[doc = " captures. If this capacity is exceeded, then the earliest-starting match will"]
#[doc = " silently be dropped to make room for further matches."]
#[doc = ""]
#[doc = " By default, this limit is 65,536 pending matches, which is effectively"]
#[doc = " unlimited for most queries and syntax trees. You can optionally set this to a"]
#[doc = " lower number if you want to have (and check) a tighter bound on query"]
#[doc = " complexity."]
#[doc = ""]
#[doc = " If you update the match limit, it must be > 0 and <= 65536."]
pub fn ts_query_cursor_did_exceed_match_limit(arg1: *const TSQueryCursor) -> bool;
}
extern "C" {
pub fn ts_query_cursor_match_limit(arg1: *const TSQueryCursor) -> u32;
}
extern "C" {
pub fn ts_query_cursor_set_match_limit(arg1: *mut TSQueryCursor, arg2: u32);
}
extern "C" {
#[doc = " Set the range of bytes or (row, column) positions in which the query"]
#[doc = " will be executed."]

View file

@ -1598,6 +1598,19 @@ impl<'a> QueryCursor {
QueryCursor(unsafe { NonNull::new_unchecked(ffi::ts_query_cursor_new()) })
}
/// Return the maximum number of in-progress matches for this cursor.
pub fn match_limit(&self) -> u32 {
unsafe { ffi::ts_query_cursor_match_limit(self.0.as_ptr()) }
}
/// Set the maximum number of in-progress matches for this cursor. The limit must be > 0 and
/// <= 65536.
pub fn set_match_limit(&mut self, limit: u32) {
unsafe {
ffi::ts_query_cursor_set_match_limit(self.0.as_ptr(), limit);
}
}
/// Check if, on its last execution, this cursor exceeded its maximum number of
/// in-progress matches.
pub fn did_exceed_match_limit(&self) -> bool {

View file

@ -594,9 +594,15 @@ void ts_query_matches_wasm(
uint32_t start_row,
uint32_t start_column,
uint32_t end_row,
uint32_t end_column
uint32_t end_column,
uint32_t match_limit
) {
if (!scratch_query_cursor) scratch_query_cursor = ts_query_cursor_new();
if (match_limit == 0) {
ts_query_cursor_set_match_limit(scratch_query_cursor, UINT32_MAX);
} else {
ts_query_cursor_set_match_limit(scratch_query_cursor, match_limit);
}
TSNode node = unmarshal_node(tree);
TSPoint start_point = {start_row, code_unit_to_byte(start_column)};
@ -635,9 +641,15 @@ void ts_query_captures_wasm(
uint32_t start_row,
uint32_t start_column,
uint32_t end_row,
uint32_t end_column
uint32_t end_column,
uint32_t match_limit
) {
if (!scratch_query_cursor) scratch_query_cursor = ts_query_cursor_new();
if (match_limit == 0) {
ts_query_cursor_set_match_limit(scratch_query_cursor, UINT32_MAX);
} else {
ts_query_cursor_set_match_limit(scratch_query_cursor, match_limit);
}
TSNode node = unmarshal_node(tree);
TSPoint start_point = {start_row, code_unit_to_byte(start_column)};

View file

@ -953,9 +953,17 @@ class Query {
this[0] = 0;
}
matches(node, startPosition, endPosition) {
matches(node, startPosition, endPosition, options) {
if (!startPosition) startPosition = ZERO_POINT;
if (!endPosition) endPosition = ZERO_POINT;
if (!options) options = {};
let matchLimit = options.matchLimit;
if (typeof matchLimit === 'undefined') {
matchLimit = 0;
} else if (typeof matchLimit !== 'number') {
throw new Error('Arguments must be numbers');
}
marshalNode(node);
@ -965,7 +973,8 @@ class Query {
startPosition.row,
startPosition.column,
endPosition.row,
endPosition.column
endPosition.column,
matchLimit
);
const rawCount = getValue(TRANSFER_BUFFER, 'i32');
@ -1000,9 +1009,17 @@ class Query {
return result;
}
captures(node, startPosition, endPosition) {
captures(node, startPosition, endPosition, options) {
if (!startPosition) startPosition = ZERO_POINT;
if (!endPosition) endPosition = ZERO_POINT;
if (!options) options = {};
let matchLimit = options.matchLimit;
if (typeof matchLimit === 'undefined') {
matchLimit = 0;
} else if (typeof matchLimit !== 'number') {
throw new Error('Arguments must be numbers');
}
marshalNode(node);
@ -1012,7 +1029,8 @@ class Query {
startPosition.row,
startPosition.column,
endPosition.row,
endPosition.column
endPosition.column,
matchLimit
);
const count = getValue(TRANSFER_BUFFER, 'i32');

View file

@ -256,7 +256,7 @@ describe("Query", () => {
(array (identifier) @pre (identifier) @post)
`);
const captures = query.captures(tree.rootNode);
const captures = query.captures(tree.rootNode, null, null, {matchLimit: 32});
assert.ok(query.didExceedMatchLimit());
});
});

View file

@ -798,15 +798,19 @@ void ts_query_cursor_delete(TSQueryCursor *);
void ts_query_cursor_exec(TSQueryCursor *, const TSQuery *, TSNode);
/**
* Check if this cursor has exceeded its maximum number of in-progress
* matches.
* Manage the maximum number of in-progress matches allowed by this query
* cursor.
*
* Currently, query cursors have a fixed capacity for storing lists
* of in-progress captures. If this capacity is exceeded, then the
* earliest-starting match will silently be dropped to make room for
* further matches.
* Query cursors have an optional maximum capacity for storing lists of
* in-progress captures. If this capacity is exceeded, then the
* earliest-starting match will silently be dropped to make room for further
* matches. This maximum capacity is optional by default, query cursors allow
* any number of pending matches, dynamically allocating new space for them as
* needed as the query is executed.
*/
bool ts_query_cursor_did_exceed_match_limit(const TSQueryCursor *);
uint32_t ts_query_cursor_match_limit(const TSQueryCursor *);
void ts_query_cursor_set_match_limit(TSQueryCursor *, uint32_t);
/**
* Set the range of bytes or (row, column) positions in which the query

View file

@ -1,42 +0,0 @@
#ifndef TREE_SITTER_BITS_H_
#define TREE_SITTER_BITS_H_
#include <stdint.h>
static inline uint32_t bitmask_for_index(uint16_t id) {
return (1u << (31 - id));
}
#ifdef __TINYC__
// Algorithm taken from the Hacker's Delight book
// See also https://graphics.stanford.edu/~seander/bithacks.html
static inline uint32_t count_leading_zeros(uint32_t x) {
int count = 0;
if (x == 0) return 32;
x = x - ((x >> 1) & 0x55555555);
x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
count = (((x + (x >> 4)) & 0x0f0f0f0f) * 0x01010101) >> 24;
return count;
}
#elif defined _WIN32 && !defined __GNUC__
#include <intrin.h>
static inline uint32_t count_leading_zeros(uint32_t x) {
if (x == 0) return 32;
uint32_t result;
_BitScanReverse(&result, x);
return 31 - result;
}
#else
static inline uint32_t count_leading_zeros(uint32_t x) {
if (x == 0) return 32;
return __builtin_clz(x);
}
#endif
#endif // TREE_SITTER_BITS_H_

View file

@ -1,7 +1,6 @@
#include "tree_sitter/api.h"
#include "./alloc.h"
#include "./array.h"
#include "./bits.h"
#include "./language.h"
#include "./point.h"
#include "./tree_cursor.h"
@ -12,7 +11,6 @@
// #define LOG(...) fprintf(stderr, __VA_ARGS__)
#define LOG(...)
#define MAX_CAPTURE_LIST_COUNT 32
#define MAX_STEP_CAPTURE_COUNT 3
#define MAX_STATE_PREDECESSOR_COUNT 100
#define MAX_ANALYSIS_STATE_DEPTH 12
@ -153,10 +151,10 @@ typedef struct {
*/
typedef struct {
uint32_t id;
uint32_t capture_list_id;
uint16_t start_depth;
uint16_t step_index;
uint16_t pattern_index;
uint16_t capture_list_id;
uint16_t consumed_capture_count: 12;
bool seeking_immediate_match: 1;
bool has_in_progress_alternatives: 1;
@ -173,9 +171,17 @@ typedef Array(TSQueryCapture) CaptureList;
* currently in use by a query state.
*/
typedef struct {
CaptureList list[MAX_CAPTURE_LIST_COUNT];
Array(CaptureList) list;
CaptureList empty_list;
uint32_t usage_map;
// The maximum number of capture lists that we are allowed to allocate. We
// never allow `list` to allocate more entries than this, dropping pending
// matches if needed to stay under the limit.
uint32_t max_capture_list_count;
// The number of capture lists allocated in `list` that are not currently in
// use. We reuse those existing-but-unused capture lists before trying to
// allocate any new ones. We use an invalid value (UINT32_MAX) for a capture
// list's length to indicate that it's not in use.
uint32_t free_capture_list_count;
} CaptureListPool;
/*
@ -357,54 +363,72 @@ static uint32_t stream_offset(Stream *self) {
static CaptureListPool capture_list_pool_new(void) {
return (CaptureListPool) {
.list = array_new(),
.empty_list = array_new(),
.usage_map = UINT32_MAX,
.max_capture_list_count = UINT32_MAX,
.free_capture_list_count = 0,
};
}
static void capture_list_pool_reset(CaptureListPool *self) {
self->usage_map = UINT32_MAX;
for (unsigned i = 0; i < MAX_CAPTURE_LIST_COUNT; i++) {
array_clear(&self->list[i]);
for (uint16_t i = 0; i < self->list.size; i++) {
// This invalid size means that the list is not in use.
self->list.contents[i].size = UINT32_MAX;
}
self->free_capture_list_count = self->list.size;
}
static void capture_list_pool_delete(CaptureListPool *self) {
for (unsigned i = 0; i < MAX_CAPTURE_LIST_COUNT; i++) {
array_delete(&self->list[i]);
for (uint16_t i = 0; i < self->list.size; i++) {
array_delete(&self->list.contents[i]);
}
array_delete(&self->list);
}
static const CaptureList *capture_list_pool_get(const CaptureListPool *self, uint16_t id) {
if (id >= MAX_CAPTURE_LIST_COUNT) return &self->empty_list;
return &self->list[id];
if (id >= self->list.size) return &self->empty_list;
return &self->list.contents[id];
}
static CaptureList *capture_list_pool_get_mut(CaptureListPool *self, uint16_t id) {
assert(id < MAX_CAPTURE_LIST_COUNT);
return &self->list[id];
assert(id < self->list.size);
return &self->list.contents[id];
}
static bool capture_list_pool_is_empty(const CaptureListPool *self) {
return self->usage_map == 0;
// The capture list pool is empty if all allocated lists are in use, and we
// have reached the maximum allowed number of allocated lists.
return self->free_capture_list_count == 0 && self->list.size >= self->max_capture_list_count;
}
static uint16_t capture_list_pool_acquire(CaptureListPool *self) {
// In the usage_map bitmask, ones represent free lists, and zeros represent
// lists that are in use. A free list id can quickly be found by counting
// the leading zeros in the usage map. An id of zero corresponds to the
// highest-order bit in the bitmask.
uint16_t id = count_leading_zeros(self->usage_map);
if (id >= MAX_CAPTURE_LIST_COUNT) return NONE;
self->usage_map &= ~bitmask_for_index(id);
array_clear(&self->list[id]);
return id;
// First see if any already allocated capture list is currently unused.
if (self->free_capture_list_count > 0) {
for (uint16_t i = 0; i < self->list.size; i++) {
if (self->list.contents[i].size == UINT32_MAX) {
array_clear(&self->list.contents[i]);
self->free_capture_list_count--;
return i;
}
}
}
// Otherwise allocate and initialize a new capture list, as long as that
// doesn't put us over the requested maximum.
uint32_t i = self->list.size;
if (i >= self->max_capture_list_count) {
return NONE;
}
CaptureList list;
array_init(&list);
array_push(&self->list, list);
return i;
}
static void capture_list_pool_release(CaptureListPool *self, uint16_t id) {
if (id >= MAX_CAPTURE_LIST_COUNT) return;
array_clear(&self->list[id]);
self->usage_map |= bitmask_for_index(id);
if (id >= self->list.size) return;
self->list.contents[id].size = UINT32_MAX;
self->free_capture_list_count++;
}
/**************
@ -2285,6 +2309,14 @@ bool ts_query_cursor_did_exceed_match_limit(const TSQueryCursor *self) {
return self->did_exceed_match_limit;
}
uint32_t ts_query_cursor_match_limit(const TSQueryCursor *self) {
return self->capture_list_pool.max_capture_list_count;
}
void ts_query_cursor_set_match_limit(TSQueryCursor *self, uint32_t limit) {
self->capture_list_pool.max_capture_list_count = limit;
}
void ts_query_cursor_exec(
TSQueryCursor *self,
const TSQuery *query,