feat: add an API to time out query executions
Currently, if a predicate is hard to match on the Rust side, a sizable query against a very large file can take forever, and ends up hanging. This commit adds an API function `ts_query_cursor_set_timeout_micros` to limit how long query execution is allowed to take, thereby negating the chance of a hang to occur.
This commit is contained in:
parent
a748488596
commit
3f424c0121
11 changed files with 132 additions and 8 deletions
|
|
@ -5146,3 +5146,28 @@ fn test_query_on_empty_source_code() {
|
|||
&[(0, vec![("program", "")])],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_query_execution_with_timeout() {
|
||||
let language = get_language("javascript");
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(&language).unwrap();
|
||||
|
||||
let source_code = "function foo() { while (true) { } }\n".repeat(1000);
|
||||
let tree = parser.parse(&source_code, None).unwrap();
|
||||
|
||||
let query = Query::new(&language, "(function_declaration) @function").unwrap();
|
||||
let mut cursor = QueryCursor::new();
|
||||
|
||||
cursor.set_timeout_micros(1000);
|
||||
let matches = cursor
|
||||
.matches(&query, tree.root_node(), source_code.as_bytes())
|
||||
.count();
|
||||
assert!(matches < 1000);
|
||||
|
||||
cursor.set_timeout_micros(0);
|
||||
let matches = cursor
|
||||
.matches(&query, tree.root_node(), source_code.as_bytes())
|
||||
.count();
|
||||
assert_eq!(matches, 1000);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
/* automatically generated by rust-bindgen 0.69.4 */
|
||||
/* automatically generated by rust-bindgen 0.70.0 */
|
||||
|
||||
pub const TREE_SITTER_LANGUAGE_VERSION: u32 = 14;
|
||||
pub const TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION: u32 = 13;
|
||||
|
|
@ -462,7 +462,7 @@ extern "C" {
|
|||
pub fn ts_tree_cursor_delete(self_: *mut TSTreeCursor);
|
||||
}
|
||||
extern "C" {
|
||||
#[doc = " Re-initialize a tree cursor to start at a different node."]
|
||||
#[doc = " Re-initialize a tree cursor to start at the original node that the cursor was\n constructed with."]
|
||||
pub fn ts_tree_cursor_reset(self_: *mut TSTreeCursor, node: TSNode);
|
||||
}
|
||||
extern "C" {
|
||||
|
|
@ -637,6 +637,14 @@ extern "C" {
|
|||
extern "C" {
|
||||
pub fn ts_query_cursor_set_match_limit(self_: *mut TSQueryCursor, limit: u32);
|
||||
}
|
||||
extern "C" {
|
||||
#[doc = " Set the maximum duration in microseconds that query execution should be allowed to\n take before halting.\n\n If query execution takes longer than this, it will halt early, returning NULL.\n See [`ts_query_cursor_next_match`] or [`ts_query_cursor_next_capture`] for more information."]
|
||||
pub fn ts_query_cursor_set_timeout_micros(self_: *mut TSQueryCursor, timeout_micros: u64);
|
||||
}
|
||||
extern "C" {
|
||||
#[doc = " Get the duration in microseconds that query execution is allowed to take."]
|
||||
pub fn ts_query_cursor_timeout_micros(self_: *const TSQueryCursor) -> u64;
|
||||
}
|
||||
extern "C" {
|
||||
#[doc = " Set the range of bytes or (row, column) positions in which the query\n will be executed."]
|
||||
pub fn ts_query_cursor_set_byte_range(
|
||||
|
|
|
|||
|
|
@ -2362,6 +2362,26 @@ impl QueryCursor {
|
|||
}
|
||||
}
|
||||
|
||||
/// Set the maximum duration in microseconds that query execution should be allowed to
|
||||
/// take before halting.
|
||||
///
|
||||
/// If query execution takes longer than this, it will halt early, returning None.
|
||||
#[doc(alias = "ts_query_cursor_set_timeout_micros")]
|
||||
pub fn set_timeout_micros(&mut self, timeout: u64) {
|
||||
unsafe {
|
||||
ffi::ts_query_cursor_set_timeout_micros(self.ptr.as_ptr(), timeout);
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the duration in microseconds that query execution is allowed to take.
|
||||
///
|
||||
/// This is set via [`set_timeout_micros`](QueryCursor::set_timeout_micros).
|
||||
#[doc(alias = "ts_query_cursor_timeout_micros")]
|
||||
#[must_use]
|
||||
pub fn timeout_micros(&self) -> u64 {
|
||||
unsafe { ffi::ts_query_cursor_timeout_micros(self.ptr.as_ptr()) }
|
||||
}
|
||||
|
||||
/// Check if, on its last execution, this cursor exceeded its maximum number
|
||||
/// of in-progress matches.
|
||||
#[doc(alias = "ts_query_cursor_did_exceed_match_limit")]
|
||||
|
|
|
|||
|
|
@ -792,7 +792,8 @@ void ts_query_matches_wasm(
|
|||
uint32_t start_index,
|
||||
uint32_t end_index,
|
||||
uint32_t match_limit,
|
||||
uint32_t max_start_depth
|
||||
uint32_t max_start_depth,
|
||||
uint32_t timeout_micros
|
||||
) {
|
||||
if (!scratch_query_cursor) {
|
||||
scratch_query_cursor = ts_query_cursor_new();
|
||||
|
|
@ -810,6 +811,7 @@ void ts_query_matches_wasm(
|
|||
ts_query_cursor_set_byte_range(scratch_query_cursor, start_index, end_index);
|
||||
ts_query_cursor_set_match_limit(scratch_query_cursor, match_limit);
|
||||
ts_query_cursor_set_max_start_depth(scratch_query_cursor, max_start_depth);
|
||||
ts_query_cursor_set_timeout_micros(scratch_query_cursor, timeout_micros);
|
||||
ts_query_cursor_exec(scratch_query_cursor, self, node);
|
||||
|
||||
uint32_t index = 0;
|
||||
|
|
@ -847,7 +849,8 @@ void ts_query_captures_wasm(
|
|||
uint32_t start_index,
|
||||
uint32_t end_index,
|
||||
uint32_t match_limit,
|
||||
uint32_t max_start_depth
|
||||
uint32_t max_start_depth,
|
||||
uint32_t timeout_micros
|
||||
) {
|
||||
if (!scratch_query_cursor) {
|
||||
scratch_query_cursor = ts_query_cursor_new();
|
||||
|
|
@ -862,6 +865,7 @@ void ts_query_captures_wasm(
|
|||
ts_query_cursor_set_byte_range(scratch_query_cursor, start_index, end_index);
|
||||
ts_query_cursor_set_match_limit(scratch_query_cursor, match_limit);
|
||||
ts_query_cursor_set_max_start_depth(scratch_query_cursor, max_start_depth);
|
||||
ts_query_cursor_set_timeout_micros(scratch_query_cursor, timeout_micros);
|
||||
ts_query_cursor_exec(scratch_query_cursor, self, node);
|
||||
|
||||
unsigned index = 0;
|
||||
|
|
|
|||
|
|
@ -1279,6 +1279,7 @@ class Query {
|
|||
endIndex = 0,
|
||||
matchLimit = 0xFFFFFFFF,
|
||||
maxStartDepth = 0xFFFFFFFF,
|
||||
timeoutMicros = 0,
|
||||
} = {},
|
||||
) {
|
||||
if (typeof matchLimit !== 'number') {
|
||||
|
|
@ -1298,6 +1299,7 @@ class Query {
|
|||
endIndex,
|
||||
matchLimit,
|
||||
maxStartDepth,
|
||||
timeoutMicros,
|
||||
);
|
||||
|
||||
const rawCount = getValue(TRANSFER_BUFFER, 'i32');
|
||||
|
|
@ -1342,6 +1344,7 @@ class Query {
|
|||
endIndex = 0,
|
||||
matchLimit = 0xFFFFFFFF,
|
||||
maxStartDepth = 0xFFFFFFFF,
|
||||
timeoutMicros = 0,
|
||||
} = {},
|
||||
) {
|
||||
if (typeof matchLimit !== 'number') {
|
||||
|
|
@ -1361,6 +1364,7 @@ class Query {
|
|||
endIndex,
|
||||
matchLimit,
|
||||
maxStartDepth,
|
||||
timeoutMicros,
|
||||
);
|
||||
|
||||
const count = getValue(TRANSFER_BUFFER, 'i32');
|
||||
|
|
|
|||
|
|
@ -451,6 +451,17 @@ describe('Query', () => {
|
|||
]);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Set a timeout', () =>
|
||||
it('returns less than the expected matches', () => {
|
||||
tree = parser.parse('function foo() while (true) { } }\n'.repeat(1000));
|
||||
query = JavaScript.query('(function_declaration name: (identifier) @function)');
|
||||
const matches = query.matches(tree.rootNode, { timeoutMicros: 1000 });
|
||||
assert.isBelow(matches.length, 1000);
|
||||
const matches2 = query.matches(tree.rootNode, { timeoutMicros: 0 });
|
||||
assert.equal(matches2.length, 1000);
|
||||
})
|
||||
);
|
||||
});
|
||||
|
||||
function formatMatches(matches) {
|
||||
|
|
|
|||
1
lib/binding_web/tree-sitter-web.d.ts
vendored
1
lib/binding_web/tree-sitter-web.d.ts
vendored
|
|
@ -179,6 +179,7 @@ declare module 'web-tree-sitter' {
|
|||
endIndex?: number;
|
||||
matchLimit?: number;
|
||||
maxStartDepth?: number;
|
||||
timeoutMicros?: number;
|
||||
};
|
||||
|
||||
export interface PredicateResult {
|
||||
|
|
|
|||
|
|
@ -983,6 +983,22 @@ bool ts_query_cursor_did_exceed_match_limit(const TSQueryCursor *self);
|
|||
uint32_t ts_query_cursor_match_limit(const TSQueryCursor *self);
|
||||
void ts_query_cursor_set_match_limit(TSQueryCursor *self, uint32_t limit);
|
||||
|
||||
/**
|
||||
* Set the maximum duration in microseconds that query execution should be allowed to
|
||||
* take before halting.
|
||||
*
|
||||
* If query execution takes longer than this, it will halt early, returning NULL.
|
||||
* See [`ts_query_cursor_next_match`] or [`ts_query_cursor_next_capture`] for more information.
|
||||
*/
|
||||
void ts_query_cursor_set_timeout_micros(TSQueryCursor *self, uint64_t timeout_micros);
|
||||
|
||||
/**
|
||||
* Get the duration in microseconds that query execution is allowed to take.
|
||||
*
|
||||
* This is set via [`ts_query_cursor_set_timeout_micros`].
|
||||
*/
|
||||
uint64_t ts_query_cursor_timeout_micros(const TSQueryCursor *self);
|
||||
|
||||
/**
|
||||
* Set the range of bytes or (row, column) positions in which the query
|
||||
* will be executed.
|
||||
|
|
|
|||
|
|
@ -83,7 +83,7 @@ static const unsigned MAX_VERSION_COUNT = 6;
|
|||
static const unsigned MAX_VERSION_COUNT_OVERFLOW = 4;
|
||||
static const unsigned MAX_SUMMARY_DEPTH = 16;
|
||||
static const unsigned MAX_COST_DIFFERENCE = 16 * ERROR_COST_PER_SKIPPED_TREE;
|
||||
static const unsigned OP_COUNT_PER_TIMEOUT_CHECK = 100;
|
||||
static const unsigned OP_COUNT_PER_PARSER_TIMEOUT_CHECK = 100;
|
||||
|
||||
typedef struct {
|
||||
Subtree token;
|
||||
|
|
@ -1565,7 +1565,7 @@ static bool ts_parser__advance(
|
|||
|
||||
// If a cancellation flag or a timeout was provided, then check every
|
||||
// time a fixed number of parse actions has been processed.
|
||||
if (++self->operation_count == OP_COUNT_PER_TIMEOUT_CHECK) {
|
||||
if (++self->operation_count == OP_COUNT_PER_PARSER_TIMEOUT_CHECK) {
|
||||
self->operation_count = 0;
|
||||
}
|
||||
if (
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
#include "tree_sitter/api.h"
|
||||
#include "./alloc.h"
|
||||
#include "./array.h"
|
||||
#include "./clock.h"
|
||||
#include "./language.h"
|
||||
#include "./point.h"
|
||||
#include "./tree_cursor.h"
|
||||
|
|
@ -312,6 +313,9 @@ struct TSQueryCursor {
|
|||
TSPoint start_point;
|
||||
TSPoint end_point;
|
||||
uint32_t next_state_id;
|
||||
TSClock end_clock;
|
||||
TSDuration timeout_duration;
|
||||
unsigned operation_count;
|
||||
bool on_visible_node;
|
||||
bool ascending;
|
||||
bool halted;
|
||||
|
|
@ -322,6 +326,7 @@ static const TSQueryError PARENT_DONE = -1;
|
|||
static const uint16_t PATTERN_DONE_MARKER = UINT16_MAX;
|
||||
static const uint16_t NONE = UINT16_MAX;
|
||||
static const TSSymbol WILDCARD_SYMBOL = 0;
|
||||
static const unsigned OP_COUNT_PER_QUERY_TIMEOUT_CHECK = 100;
|
||||
|
||||
/**********
|
||||
* Stream
|
||||
|
|
@ -2986,6 +2991,9 @@ TSQueryCursor *ts_query_cursor_new(void) {
|
|||
.start_point = {0, 0},
|
||||
.end_point = POINT_MAX,
|
||||
.max_start_depth = UINT32_MAX,
|
||||
.timeout_duration = 0,
|
||||
.end_clock = clock_null(),
|
||||
.operation_count = 0,
|
||||
};
|
||||
array_reserve(&self->states, 8);
|
||||
array_reserve(&self->finished_states, 8);
|
||||
|
|
@ -3012,6 +3020,14 @@ void ts_query_cursor_set_match_limit(TSQueryCursor *self, uint32_t limit) {
|
|||
self->capture_list_pool.max_capture_list_count = limit;
|
||||
}
|
||||
|
||||
uint64_t ts_query_cursor_timeout_micros(const TSQueryCursor *self) {
|
||||
return duration_to_micros(self->timeout_duration);
|
||||
}
|
||||
|
||||
void ts_query_cursor_set_timeout_micros(TSQueryCursor *self, uint64_t timeout_micros) {
|
||||
self->timeout_duration = duration_from_micros(timeout_micros);
|
||||
}
|
||||
|
||||
#ifdef DEBUG_EXECUTE_QUERY
|
||||
#define LOG(...) fprintf(stderr, __VA_ARGS__)
|
||||
#else
|
||||
|
|
@ -3023,7 +3039,7 @@ void ts_query_cursor_exec(
|
|||
const TSQuery *query,
|
||||
TSNode node
|
||||
) {
|
||||
if (query) {
|
||||
if (query) {
|
||||
LOG("query steps:\n");
|
||||
for (unsigned i = 0; i < query->steps.size; i++) {
|
||||
QueryStep *step = &query->steps.contents[i];
|
||||
|
|
@ -3060,6 +3076,12 @@ void ts_query_cursor_exec(
|
|||
self->halted = false;
|
||||
self->query = query;
|
||||
self->did_exceed_match_limit = false;
|
||||
self->operation_count = 0;
|
||||
if (self->timeout_duration) {
|
||||
self->end_clock = clock_after(clock_now(), self->timeout_duration);
|
||||
} else {
|
||||
self->end_clock = clock_null();
|
||||
}
|
||||
}
|
||||
|
||||
void ts_query_cursor_set_byte_range(
|
||||
|
|
@ -3456,7 +3478,19 @@ static inline bool ts_query_cursor__advance(
|
|||
}
|
||||
}
|
||||
|
||||
if (did_match || self->halted) return did_match;
|
||||
if (++self->operation_count == OP_COUNT_PER_QUERY_TIMEOUT_CHECK) {
|
||||
self->operation_count = 0;
|
||||
}
|
||||
if (
|
||||
did_match ||
|
||||
self->halted ||
|
||||
(
|
||||
self->operation_count == 0 &&
|
||||
!clock_is_null(self->end_clock) && clock_is_gt(clock_now(), self->end_clock)
|
||||
)
|
||||
) {
|
||||
return did_match;
|
||||
}
|
||||
|
||||
// Exit the current node.
|
||||
if (self->ascending) {
|
||||
|
|
|
|||
|
|
@ -37,6 +37,7 @@ bindgen \
|
|||
--blocklist-type '^__.*' \
|
||||
--no-prepend-enum-name \
|
||||
--no-copy "$no_copy" \
|
||||
--use-core \
|
||||
"$header_path" \
|
||||
-- \
|
||||
-D TREE_SITTER_FEATURE_WASM \
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue