Query - If too many states, kill the one w/ the earliest capture

This commit is contained in:
Max Brunsfeld 2019-11-21 17:20:34 -08:00
parent 71998ef3c1
commit e3f6b1a1af

View file

@ -140,7 +140,7 @@ static const uint16_t NONE = UINT16_MAX;
static const TSSymbol WILDCARD_SYMBOL = 0;
static const uint16_t MAX_STATE_COUNT = 32;
// #define LOG printf
// #define LOG(...) fprintf(stderr, __VA_ARGS__)
#define LOG(...)
/**********
@ -244,6 +244,10 @@ static TSQueryCapture *capture_list_pool_get(CaptureListPool *self, uint16_t id)
return &self->list.contents[id * (self->list.size / MAX_STATE_COUNT)];
}
static bool capture_list_pool_is_empty(const CaptureListPool *self) {
return self->usage_map == 0;
}
static uint16_t capture_list_pool_acquire(CaptureListPool *self) {
// In the usage_map bitmask, ones represent free lists, and zeros represent
// lists that are in use. A free list id can quickly be found by counting
@ -412,7 +416,7 @@ static void ts_query__finalize_steps(TSQuery *self) {
// a higher level of abstraction, such as the Rust/JavaScript bindings. They
// can contain '@'-prefixed capture names, double-quoted strings, and bare
// symbols, which also represent strings.
static TSQueryError ts_query_parse_predicate(
static TSQueryError ts_query__parse_predicate(
TSQuery *self,
Stream *stream
) {
@ -523,7 +527,7 @@ static TSQueryError ts_query_parse_predicate(
// Read one S-expression pattern from the stream, and incorporate it into
// the query's internal state machine representation. For nested patterns,
// this function calls itself recursively.
static TSQueryError ts_query_parse_pattern(
static TSQueryError ts_query__parse_pattern(
TSQuery *self,
Stream *stream,
uint32_t depth,
@ -546,13 +550,13 @@ static TSQueryError ts_query_parse_pattern(
// Parse a nested list, which represents a pattern followed by
// zero-or-more predicates.
if (stream->next == '(' && depth == 0) {
TSQueryError e = ts_query_parse_pattern(self, stream, 0, capture_count);
TSQueryError e = ts_query__parse_pattern(self, stream, 0, capture_count);
if (e) return e;
// Parse the predicates.
stream_skip_whitespace(stream);
for (;;) {
TSQueryError e = ts_query_parse_predicate(self, stream);
TSQueryError e = ts_query__parse_predicate(self, stream);
if (e == PARENT_DONE) {
stream_advance(stream);
stream_skip_whitespace(stream);
@ -602,7 +606,7 @@ static TSQueryError ts_query_parse_pattern(
// Parse the child patterns
stream_skip_whitespace(stream);
for (;;) {
TSQueryError e = ts_query_parse_pattern(self, stream, depth + 1, capture_count);
TSQueryError e = ts_query__parse_pattern(self, stream, depth + 1, capture_count);
if (e == PARENT_DONE) {
stream_advance(stream);
break;
@ -666,7 +670,7 @@ static TSQueryError ts_query_parse_pattern(
// Parse the pattern
uint32_t step_index = self->steps.size;
TSQueryError e = ts_query_parse_pattern(self, stream, depth, capture_count);
TSQueryError e = ts_query__parse_pattern(self, stream, depth, capture_count);
if (e == PARENT_DONE) return TSQueryErrorSyntax;
if (e) return e;
@ -782,7 +786,7 @@ TSQuery *ts_query_new(
.offset = self->predicate_steps.size,
.length = 0,
}));
*error_type = ts_query_parse_pattern(self, &stream, 0, &capture_count);
*error_type = ts_query__parse_pattern(self, &stream, 0, &capture_count);
array_push(&self->steps, ((QueryStep) { .depth = PATTERN_DONE_MARKER }));
// If any pattern could not be parsed, then report the error information
@ -961,7 +965,83 @@ void ts_query_cursor_set_point_range(
self->end_point = end_point;
}
static QueryState *ts_query_cursor_copy_state(
// Search through all of the in-progress states, and find the captured
// node that occurs earliest in the document.
static bool ts_query_cursor__first_in_progress_capture(
TSQueryCursor *self,
uint32_t *state_index,
uint32_t *byte_offset,
uint32_t *pattern_index
) {
bool result = false;
for (unsigned i = 0; i < self->states.size; i++) {
const QueryState *state = &self->states.contents[i];
if (state->capture_count > 0) {
const TSQueryCapture *captures = capture_list_pool_get(
&self->capture_list_pool,
state->capture_list_id
);
uint32_t capture_byte = ts_node_start_byte(captures[0].node);
if (
!result ||
capture_byte < *byte_offset ||
(
capture_byte == *byte_offset &&
state->pattern_index < *pattern_index
)
) {
result = true;
*state_index = i;
*byte_offset = capture_byte;
*pattern_index = state->pattern_index;
}
}
}
return result;
}
static bool ts_query__cursor_add_state(
TSQueryCursor *self,
const PatternEntry *slice
) {
uint32_t list_id = capture_list_pool_acquire(&self->capture_list_pool);
// If there are no capture lists left in the pool, then terminate whichever
// state has captured the earliest node in the document, and steal its
// capture list.
if (list_id == NONE) {
uint32_t state_index, byte_offset, pattern_index;
if (ts_query_cursor__first_in_progress_capture(
self,
&state_index,
&byte_offset,
&pattern_index
)) {
LOG(
" abandon state. index:%u, pattern:%u, offset:%u.\n",
state_index, pattern_index, byte_offset
);
list_id = self->states.contents[state_index].capture_list_id;
array_erase(&self->states, state_index);
} else {
LOG(" too many finished states.\n");
return false;
}
}
LOG(" start state. pattern:%u\n", slice->pattern_index);
array_push(&self->states, ((QueryState) {
.capture_list_id = list_id,
.step_index = slice->step_index,
.pattern_index = slice->pattern_index,
.start_depth = self->depth,
.capture_count = 0,
.consumed_capture_count = 0,
}));
return true;
}
static QueryState *ts_query__cursor_copy_state(
TSQueryCursor *self,
const QueryState *state
) {
@ -989,7 +1069,7 @@ static QueryState *ts_query_cursor_copy_state(
static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
do {
if (self->ascending) {
LOG("leave node %s\n", ts_node_type(ts_tree_cursor_current_node(&self->cursor)));
LOG("leave node. type:%s\n", ts_node_type(ts_tree_cursor_current_node(&self->cursor)));
// When leaving a node, remove any unfinished states whose next step
// needed to match something within that node.
@ -1057,11 +1137,14 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
) return false;
LOG(
"enter node %s. row:%u state_count:%u, finished_state_count: %u\n",
"enter node. type:%s, field:%s, row:%u state_count:%u, finished_state_count:%u, can_have_later_siblings:%d, can_have_later_siblings_with_this_field:%d\n",
ts_node_type(node),
ts_language_field_name_for_id(self->query->language, field_id),
ts_node_start_point(node).row,
self->states.size,
self->finished_states.size
self->finished_states.size,
can_have_later_siblings,
can_have_later_siblings_with_this_field
);
// Add new states for any patterns whose root node is a wildcard.
@ -1072,17 +1155,7 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
// If this node matches the first step of the pattern, then add a new
// state at the start of this pattern.
if (step->field && field_id != step->field) continue;
uint32_t capture_list_id = capture_list_pool_acquire(
&self->capture_list_pool
);
if (capture_list_id == NONE) break;
array_push(&self->states, ((QueryState) {
.step_index = slice->step_index,
.pattern_index = slice->pattern_index,
.capture_list_id = capture_list_id,
.capture_count = 0,
.consumed_capture_count = 0,
}));
if (!ts_query__cursor_add_state(self, slice)) break;
}
// Add new states for any patterns whose root node matches this node.
@ -1091,29 +1164,10 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
PatternEntry *slice = &self->query->pattern_map.contents[i];
QueryStep *step = &self->query->steps.contents[slice->step_index];
do {
// If this node matches the first step of the pattern, then add a new
// state at the start of this pattern.
if (step->field && field_id != step->field) continue;
LOG(" start state. pattern:%u\n", slice->pattern_index);
// If this node matches the first step of the pattern, then add a
// new in-progress state. First, acquire a list to hold the pattern's
// captures.
uint32_t capture_list_id = capture_list_pool_acquire(
&self->capture_list_pool
);
if (capture_list_id == NONE) {
LOG(" too many states.");
break;
}
array_push(&self->states, ((QueryState) {
.pattern_index = slice->pattern_index,
.step_index = slice->step_index,
.start_depth = self->depth,
.capture_list_id = capture_list_id,
.capture_count = 0,
.consumed_capture_count = 0,
}));
if (!ts_query__cursor_add_state(self, slice)) break;
// Advance to the next pattern whose root node matches this node.
i++;
@ -1178,13 +1232,17 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
step->contains_captures &&
later_sibling_can_match
) {
LOG(
" split state. pattern:%u, step:%u\n",
state->pattern_index,
state->step_index
);
QueryState *copy = ts_query_cursor_copy_state(self, state);
if (copy) next_state = copy;
QueryState *copy = ts_query__cursor_copy_state(self, state);
if (copy) {
LOG(
" split state. pattern:%u, step:%u\n",
copy->pattern_index,
copy->step_index
);
next_state = copy;
} else {
LOG(" canot split state.\n");
}
}
LOG(
@ -1298,26 +1356,13 @@ bool ts_query_cursor_next_capture(
// this position.
uint32_t first_unfinished_capture_byte = UINT32_MAX;
uint32_t first_unfinished_pattern_index = UINT32_MAX;
for (unsigned i = 0; i < self->states.size; i++) {
const QueryState *state = &self->states.contents[i];
if (state->capture_count > 0) {
const TSQueryCapture *captures = capture_list_pool_get(
&self->capture_list_pool,
state->capture_list_id
);
uint32_t capture_byte = ts_node_start_byte(captures[0].node);
if (
capture_byte < first_unfinished_capture_byte ||
(
capture_byte == first_unfinished_capture_byte &&
state->pattern_index < first_unfinished_pattern_index
)
) {
first_unfinished_capture_byte = capture_byte;
first_unfinished_pattern_index = state->pattern_index;
}
}
}
uint32_t first_unfinished_state_index;
ts_query_cursor__first_in_progress_capture(
self,
&first_unfinished_state_index,
&first_unfinished_capture_byte,
&first_unfinished_pattern_index
);
// Find the earliest capture in a finished match.
int first_finished_state_index = -1;
@ -1372,6 +1417,20 @@ bool ts_query_cursor_next_capture(
state->consumed_capture_count++;
return true;
}
if (capture_list_pool_is_empty(&self->capture_list_pool)) {
LOG(
" abandon state. index:%u, pattern:%u, offset:%u.\n",
first_unfinished_state_index,
first_unfinished_pattern_index,
first_unfinished_capture_byte
);
capture_list_pool_release(
&self->capture_list_pool,
self->states.contents[first_unfinished_state_index].capture_list_id
);
array_erase(&self->states, first_unfinished_state_index);
}
}
// If there are no finished matches that are ready to be returned, then