feat: add an API to time out query executions

Currently, if a predicate is hard to match on the Rust side, a sizable
query against a very large file can take forever, and ends up hanging.
This commit adds an API function `ts_query_cursor_set_timeout_micros` to
limit how long query execution is allowed to take, thereby negating the
chance of a hang to occur.
This commit is contained in:
Amaan Qureshi 2024-08-29 17:21:52 -04:00
parent a748488596
commit 3f424c0121
11 changed files with 132 additions and 8 deletions

View file

@ -83,7 +83,7 @@ static const unsigned MAX_VERSION_COUNT = 6;
static const unsigned MAX_VERSION_COUNT_OVERFLOW = 4;
static const unsigned MAX_SUMMARY_DEPTH = 16;
static const unsigned MAX_COST_DIFFERENCE = 16 * ERROR_COST_PER_SKIPPED_TREE;
static const unsigned OP_COUNT_PER_TIMEOUT_CHECK = 100;
static const unsigned OP_COUNT_PER_PARSER_TIMEOUT_CHECK = 100;
typedef struct {
Subtree token;
@ -1565,7 +1565,7 @@ static bool ts_parser__advance(
// If a cancellation flag or a timeout was provided, then check every
// time a fixed number of parse actions has been processed.
if (++self->operation_count == OP_COUNT_PER_TIMEOUT_CHECK) {
if (++self->operation_count == OP_COUNT_PER_PARSER_TIMEOUT_CHECK) {
self->operation_count = 0;
}
if (

View file

@ -1,6 +1,7 @@
#include "tree_sitter/api.h"
#include "./alloc.h"
#include "./array.h"
#include "./clock.h"
#include "./language.h"
#include "./point.h"
#include "./tree_cursor.h"
@ -312,6 +313,9 @@ struct TSQueryCursor {
TSPoint start_point;
TSPoint end_point;
uint32_t next_state_id;
TSClock end_clock;
TSDuration timeout_duration;
unsigned operation_count;
bool on_visible_node;
bool ascending;
bool halted;
@ -322,6 +326,7 @@ static const TSQueryError PARENT_DONE = -1;
static const uint16_t PATTERN_DONE_MARKER = UINT16_MAX;
static const uint16_t NONE = UINT16_MAX;
static const TSSymbol WILDCARD_SYMBOL = 0;
static const unsigned OP_COUNT_PER_QUERY_TIMEOUT_CHECK = 100;
/**********
* Stream
@ -2986,6 +2991,9 @@ TSQueryCursor *ts_query_cursor_new(void) {
.start_point = {0, 0},
.end_point = POINT_MAX,
.max_start_depth = UINT32_MAX,
.timeout_duration = 0,
.end_clock = clock_null(),
.operation_count = 0,
};
array_reserve(&self->states, 8);
array_reserve(&self->finished_states, 8);
@ -3012,6 +3020,14 @@ void ts_query_cursor_set_match_limit(TSQueryCursor *self, uint32_t limit) {
self->capture_list_pool.max_capture_list_count = limit;
}
uint64_t ts_query_cursor_timeout_micros(const TSQueryCursor *self) {
return duration_to_micros(self->timeout_duration);
}
void ts_query_cursor_set_timeout_micros(TSQueryCursor *self, uint64_t timeout_micros) {
self->timeout_duration = duration_from_micros(timeout_micros);
}
#ifdef DEBUG_EXECUTE_QUERY
#define LOG(...) fprintf(stderr, __VA_ARGS__)
#else
@ -3023,7 +3039,7 @@ void ts_query_cursor_exec(
const TSQuery *query,
TSNode node
) {
if (query) {
if (query) {
LOG("query steps:\n");
for (unsigned i = 0; i < query->steps.size; i++) {
QueryStep *step = &query->steps.contents[i];
@ -3060,6 +3076,12 @@ void ts_query_cursor_exec(
self->halted = false;
self->query = query;
self->did_exceed_match_limit = false;
self->operation_count = 0;
if (self->timeout_duration) {
self->end_clock = clock_after(clock_now(), self->timeout_duration);
} else {
self->end_clock = clock_null();
}
}
void ts_query_cursor_set_byte_range(
@ -3456,7 +3478,19 @@ static inline bool ts_query_cursor__advance(
}
}
if (did_match || self->halted) return did_match;
if (++self->operation_count == OP_COUNT_PER_QUERY_TIMEOUT_CHECK) {
self->operation_count = 0;
}
if (
did_match ||
self->halted ||
(
self->operation_count == 0 &&
!clock_is_null(self->end_clock) && clock_is_gt(clock_now(), self->end_clock)
)
) {
return did_match;
}
// Exit the current node.
if (self->ascending) {