diff --git a/lib/src/query.c b/lib/src/query.c index a933c7f0..23fd4edc 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -140,7 +140,7 @@ static const uint16_t NONE = UINT16_MAX; static const TSSymbol WILDCARD_SYMBOL = 0; static const uint16_t MAX_STATE_COUNT = 32; -// #define LOG printf +// #define LOG(...) fprintf(stderr, __VA_ARGS__) #define LOG(...) /********** @@ -244,6 +244,10 @@ static TSQueryCapture *capture_list_pool_get(CaptureListPool *self, uint16_t id) return &self->list.contents[id * (self->list.size / MAX_STATE_COUNT)]; } +static bool capture_list_pool_is_empty(const CaptureListPool *self) { + return self->usage_map == 0; +} + static uint16_t capture_list_pool_acquire(CaptureListPool *self) { // In the usage_map bitmask, ones represent free lists, and zeros represent // lists that are in use. A free list id can quickly be found by counting @@ -412,7 +416,7 @@ static void ts_query__finalize_steps(TSQuery *self) { // a higher level of abstraction, such as the Rust/JavaScript bindings. They // can contain '@'-prefixed capture names, double-quoted strings, and bare // symbols, which also represent strings. -static TSQueryError ts_query_parse_predicate( +static TSQueryError ts_query__parse_predicate( TSQuery *self, Stream *stream ) { @@ -523,7 +527,7 @@ static TSQueryError ts_query_parse_predicate( // Read one S-expression pattern from the stream, and incorporate it into // the query's internal state machine representation. For nested patterns, // this function calls itself recursively. -static TSQueryError ts_query_parse_pattern( +static TSQueryError ts_query__parse_pattern( TSQuery *self, Stream *stream, uint32_t depth, @@ -546,13 +550,13 @@ static TSQueryError ts_query_parse_pattern( // Parse a nested list, which represents a pattern followed by // zero-or-more predicates. if (stream->next == '(' && depth == 0) { - TSQueryError e = ts_query_parse_pattern(self, stream, 0, capture_count); + TSQueryError e = ts_query__parse_pattern(self, stream, 0, capture_count); if (e) return e; // Parse the predicates. stream_skip_whitespace(stream); for (;;) { - TSQueryError e = ts_query_parse_predicate(self, stream); + TSQueryError e = ts_query__parse_predicate(self, stream); if (e == PARENT_DONE) { stream_advance(stream); stream_skip_whitespace(stream); @@ -602,7 +606,7 @@ static TSQueryError ts_query_parse_pattern( // Parse the child patterns stream_skip_whitespace(stream); for (;;) { - TSQueryError e = ts_query_parse_pattern(self, stream, depth + 1, capture_count); + TSQueryError e = ts_query__parse_pattern(self, stream, depth + 1, capture_count); if (e == PARENT_DONE) { stream_advance(stream); break; @@ -666,7 +670,7 @@ static TSQueryError ts_query_parse_pattern( // Parse the pattern uint32_t step_index = self->steps.size; - TSQueryError e = ts_query_parse_pattern(self, stream, depth, capture_count); + TSQueryError e = ts_query__parse_pattern(self, stream, depth, capture_count); if (e == PARENT_DONE) return TSQueryErrorSyntax; if (e) return e; @@ -782,7 +786,7 @@ TSQuery *ts_query_new( .offset = self->predicate_steps.size, .length = 0, })); - *error_type = ts_query_parse_pattern(self, &stream, 0, &capture_count); + *error_type = ts_query__parse_pattern(self, &stream, 0, &capture_count); array_push(&self->steps, ((QueryStep) { .depth = PATTERN_DONE_MARKER })); // If any pattern could not be parsed, then report the error information @@ -961,7 +965,83 @@ void ts_query_cursor_set_point_range( self->end_point = end_point; } -static QueryState *ts_query_cursor_copy_state( +// Search through all of the in-progress states, and find the captured +// node that occurs earliest in the document. +static bool ts_query_cursor__first_in_progress_capture( + TSQueryCursor *self, + uint32_t *state_index, + uint32_t *byte_offset, + uint32_t *pattern_index +) { + bool result = false; + for (unsigned i = 0; i < self->states.size; i++) { + const QueryState *state = &self->states.contents[i]; + if (state->capture_count > 0) { + const TSQueryCapture *captures = capture_list_pool_get( + &self->capture_list_pool, + state->capture_list_id + ); + uint32_t capture_byte = ts_node_start_byte(captures[0].node); + if ( + !result || + capture_byte < *byte_offset || + ( + capture_byte == *byte_offset && + state->pattern_index < *pattern_index + ) + ) { + result = true; + *state_index = i; + *byte_offset = capture_byte; + *pattern_index = state->pattern_index; + } + } + } + return result; +} + +static bool ts_query__cursor_add_state( + TSQueryCursor *self, + const PatternEntry *slice +) { + uint32_t list_id = capture_list_pool_acquire(&self->capture_list_pool); + + // If there are no capture lists left in the pool, then terminate whichever + // state has captured the earliest node in the document, and steal its + // capture list. + if (list_id == NONE) { + uint32_t state_index, byte_offset, pattern_index; + if (ts_query_cursor__first_in_progress_capture( + self, + &state_index, + &byte_offset, + &pattern_index + )) { + LOG( + " abandon state. index:%u, pattern:%u, offset:%u.\n", + state_index, pattern_index, byte_offset + ); + list_id = self->states.contents[state_index].capture_list_id; + array_erase(&self->states, state_index); + } else { + LOG(" too many finished states.\n"); + return false; + } + } + + LOG(" start state. pattern:%u\n", slice->pattern_index); + array_push(&self->states, ((QueryState) { + .capture_list_id = list_id, + .step_index = slice->step_index, + .pattern_index = slice->pattern_index, + .start_depth = self->depth, + .capture_count = 0, + .consumed_capture_count = 0, + })); + return true; +} + +static QueryState *ts_query__cursor_copy_state( TSQueryCursor *self, const QueryState *state ) { @@ -989,7 +1069,7 @@ static QueryState *ts_query_cursor_copy_state( static inline bool ts_query_cursor__advance(TSQueryCursor *self) { do { if (self->ascending) { - LOG("leave node %s\n", ts_node_type(ts_tree_cursor_current_node(&self->cursor))); + LOG("leave node. type:%s\n", ts_node_type(ts_tree_cursor_current_node(&self->cursor))); // When leaving a node, remove any unfinished states whose next step // needed to match something within that node. @@ -1057,11 +1137,14 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { ) return false; LOG( - "enter node %s. row:%u state_count:%u, finished_state_count: %u\n", + "enter node. type:%s, field:%s, row:%u state_count:%u, finished_state_count:%u, can_have_later_siblings:%d, can_have_later_siblings_with_this_field:%d\n", ts_node_type(node), + ts_language_field_name_for_id(self->query->language, field_id), ts_node_start_point(node).row, self->states.size, - self->finished_states.size + self->finished_states.size, + can_have_later_siblings, + can_have_later_siblings_with_this_field ); // Add new states for any patterns whose root node is a wildcard. @@ -1072,17 +1155,7 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { // If this node matches the first step of the pattern, then add a new // state at the start of this pattern. if (step->field && field_id != step->field) continue; - uint32_t capture_list_id = capture_list_pool_acquire( - &self->capture_list_pool - ); - if (capture_list_id == NONE) break; - array_push(&self->states, ((QueryState) { - .step_index = slice->step_index, - .pattern_index = slice->pattern_index, - .capture_list_id = capture_list_id, - .capture_count = 0, - .consumed_capture_count = 0, - })); + if (!ts_query__cursor_add_state(self, slice)) break; } // Add new states for any patterns whose root node matches this node. @@ -1091,29 +1164,10 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { PatternEntry *slice = &self->query->pattern_map.contents[i]; QueryStep *step = &self->query->steps.contents[slice->step_index]; do { + // If this node matches the first step of the pattern, then add a new + // state at the start of this pattern. if (step->field && field_id != step->field) continue; - - LOG(" start state. pattern:%u\n", slice->pattern_index); - - // If this node matches the first step of the pattern, then add a - // new in-progress state. First, acquire a list to hold the pattern's - // captures. - uint32_t capture_list_id = capture_list_pool_acquire( - &self->capture_list_pool - ); - if (capture_list_id == NONE) { - LOG(" too many states."); - break; - } - - array_push(&self->states, ((QueryState) { - .pattern_index = slice->pattern_index, - .step_index = slice->step_index, - .start_depth = self->depth, - .capture_list_id = capture_list_id, - .capture_count = 0, - .consumed_capture_count = 0, - })); + if (!ts_query__cursor_add_state(self, slice)) break; // Advance to the next pattern whose root node matches this node. i++; @@ -1178,13 +1232,17 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { step->contains_captures && later_sibling_can_match ) { - LOG( - " split state. pattern:%u, step:%u\n", - state->pattern_index, - state->step_index - ); - QueryState *copy = ts_query_cursor_copy_state(self, state); - if (copy) next_state = copy; + QueryState *copy = ts_query__cursor_copy_state(self, state); + if (copy) { + LOG( + " split state. pattern:%u, step:%u\n", + copy->pattern_index, + copy->step_index + ); + next_state = copy; + } else { + LOG(" canot split state.\n"); + } } LOG( @@ -1298,26 +1356,13 @@ bool ts_query_cursor_next_capture( // this position. uint32_t first_unfinished_capture_byte = UINT32_MAX; uint32_t first_unfinished_pattern_index = UINT32_MAX; - for (unsigned i = 0; i < self->states.size; i++) { - const QueryState *state = &self->states.contents[i]; - if (state->capture_count > 0) { - const TSQueryCapture *captures = capture_list_pool_get( - &self->capture_list_pool, - state->capture_list_id - ); - uint32_t capture_byte = ts_node_start_byte(captures[0].node); - if ( - capture_byte < first_unfinished_capture_byte || - ( - capture_byte == first_unfinished_capture_byte && - state->pattern_index < first_unfinished_pattern_index - ) - ) { - first_unfinished_capture_byte = capture_byte; - first_unfinished_pattern_index = state->pattern_index; - } - } - } + uint32_t first_unfinished_state_index; + ts_query_cursor__first_in_progress_capture( + self, + &first_unfinished_state_index, + &first_unfinished_capture_byte, + &first_unfinished_pattern_index + ); // Find the earliest capture in a finished match. int first_finished_state_index = -1; @@ -1372,6 +1417,20 @@ bool ts_query_cursor_next_capture( state->consumed_capture_count++; return true; } + + if (capture_list_pool_is_empty(&self->capture_list_pool)) { + LOG( + " abandon state. index:%u, pattern:%u, offset:%u.\n", + first_unfinished_state_index, + first_unfinished_pattern_index, + first_unfinished_capture_byte + ); + capture_list_pool_release( + &self->capture_list_pool, + self->states.contents[first_unfinished_state_index].capture_list_id + ); + array_erase(&self->states, first_unfinished_state_index); + } } // If there are no finished matches that are ready to be returned, then