Mark steps as definite on query construction
* Add a ts_query_pattern_is_definite API, just for debugging this * Store state_count on TSLanguage structs, to allow for scanning parse tables
This commit is contained in:
parent
6a46dff89a
commit
4c2f36a07b
10 changed files with 755 additions and 76 deletions
632
lib/src/query.c
632
lib/src/query.c
|
|
@ -14,6 +14,8 @@
|
|||
#define MAX_STATE_COUNT 256
|
||||
#define MAX_CAPTURE_LIST_COUNT 32
|
||||
#define MAX_STEP_CAPTURE_COUNT 3
|
||||
#define MAX_STATE_PREDECESSOR_COUNT 100
|
||||
#define MAX_WALK_STATE_DEPTH 4
|
||||
|
||||
/*
|
||||
* Stream - A sequence of unicode characters derived from a UTF8 string.
|
||||
|
|
@ -55,6 +57,7 @@ typedef struct {
|
|||
bool is_pass_through: 1;
|
||||
bool is_dead_end: 1;
|
||||
bool alternative_is_immediate: 1;
|
||||
bool is_definite: 1;
|
||||
} QueryStep;
|
||||
|
||||
/*
|
||||
|
|
@ -89,6 +92,12 @@ typedef struct {
|
|||
uint16_t pattern_index;
|
||||
} PatternEntry;
|
||||
|
||||
typedef struct {
|
||||
Slice predicate_steps;
|
||||
uint32_t start_byte;
|
||||
uint32_t start_step;
|
||||
} QueryPattern;
|
||||
|
||||
/*
|
||||
* QueryState - The state of an in-progress match of a particular pattern
|
||||
* in a query. While executing, a `TSQueryCursor` must keep track of a number
|
||||
|
|
@ -138,6 +147,31 @@ typedef struct {
|
|||
uint32_t usage_map;
|
||||
} CaptureListPool;
|
||||
|
||||
/*
|
||||
* WalkState - The state needed for walking the parse table when analyzing
|
||||
* a query pattern, to determine the steps where the pattern could fail
|
||||
* to match.
|
||||
*/
|
||||
typedef struct {
|
||||
TSStateId state;
|
||||
TSSymbol parent_symbol;
|
||||
uint16_t child_index;
|
||||
TSFieldId field;
|
||||
} WalkStateEntry;
|
||||
|
||||
typedef struct {
|
||||
WalkStateEntry stack[MAX_WALK_STATE_DEPTH];
|
||||
uint16_t depth;
|
||||
uint16_t step_index;
|
||||
} WalkState;
|
||||
|
||||
/*
|
||||
* StatePredecessorMap - A map that stores the predecessors of each parse state.
|
||||
*/
|
||||
typedef struct {
|
||||
TSStateId *contents;
|
||||
} StatePredecessorMap;
|
||||
|
||||
/*
|
||||
* TSQuery - A tree query, compiled from a string of S-expressions. The query
|
||||
* itself is immutable. The mutable state used in the process of executing the
|
||||
|
|
@ -149,8 +183,7 @@ struct TSQuery {
|
|||
Array(QueryStep) steps;
|
||||
Array(PatternEntry) pattern_map;
|
||||
Array(TSQueryPredicateStep) predicate_steps;
|
||||
Array(Slice) predicates_by_pattern;
|
||||
Array(uint32_t) start_bytes_by_pattern;
|
||||
Array(QueryPattern) patterns;
|
||||
const TSLanguage *language;
|
||||
uint16_t wildcard_root_pattern_count;
|
||||
TSSymbol *symbol_map;
|
||||
|
|
@ -451,6 +484,7 @@ static QueryStep query_step__new(
|
|||
.is_pattern_start = false,
|
||||
.is_pass_through = false,
|
||||
.is_dead_end = false,
|
||||
.is_definite = false,
|
||||
.is_immediate = is_immediate,
|
||||
.alternative_is_immediate = false,
|
||||
};
|
||||
|
|
@ -480,6 +514,67 @@ static void query_step__remove_capture(QueryStep *self, uint16_t capture_id) {
|
|||
}
|
||||
}
|
||||
|
||||
/**********************
|
||||
* StatePredecessorMap
|
||||
**********************/
|
||||
|
||||
static inline StatePredecessorMap state_predecessor_map_new(const TSLanguage *language) {
|
||||
return (StatePredecessorMap) {
|
||||
.contents = ts_calloc(language->state_count * (MAX_STATE_PREDECESSOR_COUNT + 1), sizeof(TSStateId)),
|
||||
};
|
||||
}
|
||||
|
||||
static inline void state_predecessor_map_delete(StatePredecessorMap *self) {
|
||||
ts_free(self->contents);
|
||||
}
|
||||
|
||||
static inline void state_predecessor_map_add(
|
||||
StatePredecessorMap *self,
|
||||
TSStateId state,
|
||||
TSStateId predecessor
|
||||
) {
|
||||
unsigned index = state * (MAX_STATE_PREDECESSOR_COUNT + 1);
|
||||
TSStateId *count = &self->contents[index];
|
||||
if (*count == 0 || (*count < MAX_STATE_PREDECESSOR_COUNT && self->contents[index + *count] != predecessor)) {
|
||||
(*count)++;
|
||||
self->contents[index + *count] = predecessor;
|
||||
}
|
||||
}
|
||||
|
||||
static inline const TSStateId *state_predecessor_map_get(
|
||||
const StatePredecessorMap *self,
|
||||
TSStateId state,
|
||||
unsigned *count
|
||||
) {
|
||||
unsigned index = state * (MAX_STATE_PREDECESSOR_COUNT + 1);
|
||||
*count = self->contents[index];
|
||||
return &self->contents[index + 1];
|
||||
}
|
||||
|
||||
/************
|
||||
* WalkState
|
||||
************/
|
||||
|
||||
static inline int walk_state__compare(WalkState *self, WalkState *other) {
|
||||
if (self->depth < other->depth) return -1;
|
||||
if (self->depth > other->depth) return 1;
|
||||
if (self->step_index < other->step_index) return -1;
|
||||
if (self->step_index > other->step_index) return 1;
|
||||
for (unsigned i = 0; i < self->depth; i++) {
|
||||
if (self->stack[i].state < other->stack[i].state) return -1;
|
||||
if (self->stack[i].state > other->stack[i].state) return 1;
|
||||
if (self->stack[i].parent_symbol < other->stack[i].parent_symbol) return -1;
|
||||
if (self->stack[i].parent_symbol > other->stack[i].parent_symbol) return 1;
|
||||
if (self->stack[i].child_index < other->stack[i].child_index) return -1;
|
||||
if (self->stack[i].child_index > other->stack[i].child_index) return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline WalkStateEntry *walk_state__top(WalkState *self) {
|
||||
return &self->stack[self->depth - 1];
|
||||
}
|
||||
|
||||
/*********
|
||||
* Query
|
||||
*********/
|
||||
|
|
@ -552,6 +647,466 @@ static inline void ts_query__pattern_map_insert(
|
|||
}));
|
||||
}
|
||||
|
||||
static void ts_query__analyze_patterns(TSQuery *self) {
|
||||
typedef struct {
|
||||
TSSymbol parent_symbol;
|
||||
uint32_t parent_step_index;
|
||||
Array(uint32_t) child_step_indices;
|
||||
} ParentPattern;
|
||||
|
||||
typedef struct {
|
||||
TSStateId state;
|
||||
uint8_t child_index;
|
||||
uint8_t production_id;
|
||||
bool done;
|
||||
} SubgraphNode;
|
||||
|
||||
typedef struct {
|
||||
TSSymbol symbol;
|
||||
Array(TSStateId) start_states;
|
||||
Array(SubgraphNode) nodes;
|
||||
} SymbolSubgraph;
|
||||
|
||||
typedef Array(WalkState) WalkStateList;
|
||||
|
||||
// Identify all of the patterns in the query that have child patterns. This
|
||||
// includes both top-level patterns and patterns that are nested within some
|
||||
// larger pattern. For each of these, record the parent symbol, the step index
|
||||
// and all of the immediate child step indices in reverse order.
|
||||
Array(ParentPattern) parent_patterns = array_new();
|
||||
Array(uint32_t) stack = array_new();
|
||||
for (unsigned i = 0; i < self->steps.size; i++) {
|
||||
QueryStep *step = &self->steps.contents[i];
|
||||
if (step->depth == PATTERN_DONE_MARKER) {
|
||||
array_clear(&stack);
|
||||
} else {
|
||||
uint32_t parent_pattern_index = 0;
|
||||
while (stack.size > 0) {
|
||||
parent_pattern_index = *array_back(&stack);
|
||||
ParentPattern *parent_pattern = &parent_patterns.contents[parent_pattern_index];
|
||||
QueryStep *parent_step = &self->steps.contents[parent_pattern->parent_step_index];
|
||||
if (parent_step->depth >= step->depth) {
|
||||
stack.size--;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (stack.size > 0) {
|
||||
ParentPattern *parent_pattern = &parent_patterns.contents[parent_pattern_index];
|
||||
step->is_definite = true;
|
||||
array_push(&parent_pattern->child_step_indices, i);
|
||||
}
|
||||
|
||||
array_push(&stack, parent_patterns.size);
|
||||
array_push(&parent_patterns, ((ParentPattern) {
|
||||
.parent_symbol = step->symbol,
|
||||
.parent_step_index = i,
|
||||
}));
|
||||
}
|
||||
}
|
||||
for (unsigned i = 0; i < parent_patterns.size; i++) {
|
||||
ParentPattern *parent_pattern = &parent_patterns.contents[i];
|
||||
if (parent_pattern->child_step_indices.size == 0) {
|
||||
array_erase(&parent_patterns, i);
|
||||
i--;
|
||||
}
|
||||
}
|
||||
|
||||
// Debug
|
||||
// {
|
||||
// printf("\nParent pattern entries\n");
|
||||
// for (unsigned i = 0; i < parent_patterns.size; i++) {
|
||||
// ParentPattern *parent_pattern = &parent_patterns.contents[i];
|
||||
// printf(" %s ->", ts_language_symbol_name(self->language, parent_pattern->parent_symbol));
|
||||
// for (unsigned j = 0; j < parent_pattern->child_step_indices.size; j++) {
|
||||
// QueryStep *step = &self->steps.contents[parent_pattern->child_step_indices.contents[j]];
|
||||
// printf(" %s", ts_language_symbol_name(self->language, step->symbol));
|
||||
// }
|
||||
// printf("\n");
|
||||
// }
|
||||
// }
|
||||
|
||||
// Initialize a set of subgraphs, with one subgraph for each parent symbol,
|
||||
// in the query, and one subgraph for each hidden symbol.
|
||||
unsigned subgraph_index = 0, exists;
|
||||
Array(SymbolSubgraph) subgraphs = array_new();
|
||||
for (unsigned i = 0; i < parent_patterns.size; i++) {
|
||||
TSSymbol parent_symbol = parent_patterns.contents[i].parent_symbol;
|
||||
array_search_sorted_by(&subgraphs, 0, .symbol, parent_symbol, &subgraph_index, &exists);
|
||||
if (!exists) {
|
||||
array_insert(&subgraphs, subgraph_index, ((SymbolSubgraph) { .symbol = parent_symbol, }));
|
||||
}
|
||||
}
|
||||
subgraph_index = 0;
|
||||
for (TSSymbol sym = 0; sym < self->language->symbol_count; sym++) {
|
||||
if (!ts_language_symbol_metadata(self->language, sym).visible) {
|
||||
array_search_sorted_by(
|
||||
&subgraphs, subgraph_index,
|
||||
.symbol, sym,
|
||||
&subgraph_index, &exists
|
||||
);
|
||||
if (!exists) {
|
||||
array_insert(&subgraphs, subgraph_index, ((SymbolSubgraph) { .symbol = sym, }));
|
||||
subgraph_index++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Scan the parse table to find the data needed for these subgraphs.
|
||||
// Collect three things during this scan:
|
||||
// 1) All of the parse states where one of these symbols can start.
|
||||
// 2) All of the parse states where one of these symbols can end, along
|
||||
// with information about the node that would be created.
|
||||
// 3) A list of predecessor states for each state.
|
||||
StatePredecessorMap predecessor_map = state_predecessor_map_new(self->language);
|
||||
for (TSStateId state = 1; state < self->language->state_count; state++) {
|
||||
unsigned subgraph_index = 0, exists;
|
||||
for (TSSymbol sym = 0; sym < self->language->token_count; sym++) {
|
||||
unsigned count;
|
||||
const TSParseAction *actions = ts_language_actions(self->language, state, sym, &count);
|
||||
for (unsigned i = 0; i < count; i++) {
|
||||
const TSParseAction *action = &actions[i];
|
||||
if (action->type == TSParseActionTypeReduce) {
|
||||
unsigned exists;
|
||||
array_search_sorted_by(
|
||||
&subgraphs,
|
||||
subgraph_index,
|
||||
.symbol,
|
||||
action->params.reduce.symbol,
|
||||
&subgraph_index,
|
||||
&exists
|
||||
);
|
||||
if (exists) {
|
||||
SymbolSubgraph *subgraph = &subgraphs.contents[subgraph_index];
|
||||
if (subgraph->nodes.size == 0 || array_back(&subgraph->nodes)->state != state) {
|
||||
array_push(&subgraph->nodes, ((SubgraphNode) {
|
||||
.state = state,
|
||||
.production_id = action->params.reduce.production_id,
|
||||
.child_index = action->params.reduce.child_count,
|
||||
.done = true,
|
||||
}));
|
||||
}
|
||||
}
|
||||
} else if (
|
||||
action->type == TSParseActionTypeShift &&
|
||||
!action->params.shift.extra
|
||||
) {
|
||||
TSStateId next_state = action->params.shift.state;
|
||||
state_predecessor_map_add(&predecessor_map, next_state, state);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (TSSymbol sym = self->language->token_count; sym < self->language->symbol_count; sym++) {
|
||||
TSStateId next_state = ts_language_next_state(self->language, state, sym);
|
||||
if (next_state != 0) {
|
||||
state_predecessor_map_add(&predecessor_map, next_state, state);
|
||||
array_search_sorted_by(
|
||||
&subgraphs,
|
||||
subgraph_index,
|
||||
.symbol,
|
||||
sym,
|
||||
&subgraph_index,
|
||||
&exists
|
||||
);
|
||||
if (exists) {
|
||||
SymbolSubgraph *subgraph = &subgraphs.contents[subgraph_index];
|
||||
array_push(&subgraph->start_states, state);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// For each subgraph, compute the remainder of the nodes by walking backward
|
||||
// from the end states using the predecessor map.
|
||||
Array(SubgraphNode) next_nodes = array_new();
|
||||
for (unsigned i = 0; i < subgraphs.size; i++) {
|
||||
SymbolSubgraph *subgraph = &subgraphs.contents[i];
|
||||
if (subgraph->nodes.size == 0) {
|
||||
array_delete(&subgraph->start_states);
|
||||
array_erase(&subgraphs, i);
|
||||
i--;
|
||||
continue;
|
||||
}
|
||||
array_assign(&next_nodes, &subgraph->nodes);
|
||||
while (next_nodes.size > 0) {
|
||||
SubgraphNode node = array_pop(&next_nodes);
|
||||
if (node.child_index > 1) {
|
||||
unsigned predecessor_count;
|
||||
const TSStateId *predecessors = state_predecessor_map_get(
|
||||
&predecessor_map,
|
||||
node.state,
|
||||
&predecessor_count
|
||||
);
|
||||
for (unsigned j = 0; j < predecessor_count; j++) {
|
||||
SubgraphNode predecessor_node = {
|
||||
.state = predecessors[j],
|
||||
.child_index = node.child_index - 1,
|
||||
.production_id = node.production_id,
|
||||
.done = false,
|
||||
};
|
||||
unsigned index, exists;
|
||||
array_search_sorted_by(&subgraph->nodes, 0, .state, predecessor_node.state, &index, &exists);
|
||||
if (!exists) {
|
||||
array_insert(&subgraph->nodes, index, predecessor_node);
|
||||
array_push(&next_nodes, predecessor_node);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Debug
|
||||
// {
|
||||
// printf("\nSubgraphs:\n");
|
||||
// for (unsigned i = 0; i < subgraphs.size; i++) {
|
||||
// SymbolSubgraph *subgraph = &subgraphs.contents[i];
|
||||
// printf(" %u, %s:\n", subgraph->symbol, ts_language_symbol_name(self->language, subgraph->symbol));
|
||||
// for (unsigned j = 0; j < subgraph->nodes.size; j++) {
|
||||
// SubgraphNode *node = &subgraph->nodes.contents[j];
|
||||
// printf(" {state: %u, child_index: %u}\n", node->state, node->child_index);
|
||||
// }
|
||||
// printf("\n");
|
||||
// }
|
||||
// }
|
||||
|
||||
// For each non-terminal pattern, determine if the pattern can successfully match,
|
||||
// and all of the possible children within the pattern where matching could fail.
|
||||
WalkStateList walk_states = array_new();
|
||||
WalkStateList next_walk_states = array_new();
|
||||
Array(uint16_t) finished_step_indices = array_new();
|
||||
for (unsigned i = 0; i < parent_patterns.size; i++) {
|
||||
ParentPattern *parent_pattern = &parent_patterns.contents[i];
|
||||
unsigned subgraph_index, exists;
|
||||
array_search_sorted_by(&subgraphs, 0, .symbol, parent_pattern->parent_symbol, &subgraph_index, &exists);
|
||||
if (!exists) {
|
||||
// TODO - what to do for ERROR patterns
|
||||
continue;
|
||||
}
|
||||
SymbolSubgraph *subgraph = &subgraphs.contents[subgraph_index];
|
||||
|
||||
// Initialize a walk at every possible parse state where this non-terminal
|
||||
// symbol can start.
|
||||
array_clear(&walk_states);
|
||||
for (unsigned j = 0; j < subgraph->start_states.size; j++) {
|
||||
TSStateId state = subgraph->start_states.contents[j];
|
||||
array_push(&walk_states, ((WalkState) {
|
||||
.step_index = 0,
|
||||
.stack = {
|
||||
[0] = {
|
||||
.state = state,
|
||||
.child_index = 0,
|
||||
.parent_symbol = subgraph->symbol,
|
||||
.field = 0,
|
||||
},
|
||||
},
|
||||
.depth = 1,
|
||||
}));
|
||||
}
|
||||
|
||||
// Walk the subgraph for this non-terminal, tracking all of the possible
|
||||
// sequences of progress within the pattern.
|
||||
array_clear(&finished_step_indices);
|
||||
while (walk_states.size > 0) {
|
||||
// Debug
|
||||
// {
|
||||
// printf("Walk states for %u %s:\n", i, ts_language_symbol_name(self->language, parent_pattern->parent_symbol));
|
||||
// for (unsigned j = 0; j < walk_states.size; j++) {
|
||||
// WalkState *walk_state = &walk_states.contents[j];
|
||||
// printf(
|
||||
// " %u: {depth: %u, step: %u, state: %u, child_index: %u, parent: %s}\n",
|
||||
// j,
|
||||
// walk_state->depth,
|
||||
// walk_state->step_index,
|
||||
// walk_state->stack[walk_state->depth - 1].state,
|
||||
// walk_state->stack[walk_state->depth - 1].child_index,
|
||||
// ts_language_symbol_name(self->language, walk_state->stack[walk_state->depth - 1].parent_symbol)
|
||||
// );
|
||||
// }
|
||||
|
||||
// printf("\nFinished step indices for %u %s:", i, ts_language_symbol_name(self->language, parent_pattern->parent_symbol));
|
||||
// for (unsigned j = 0; j < finished_step_indices.size; j++) {
|
||||
// printf(" %u", finished_step_indices.contents[j]);
|
||||
// }
|
||||
// printf("\n\n");
|
||||
// }
|
||||
|
||||
array_clear(&next_walk_states);
|
||||
for (unsigned j = 0; j < walk_states.size; j++) {
|
||||
WalkState *walk_state = &walk_states.contents[j];
|
||||
TSStateId state = walk_state->stack[walk_state->depth - 1].state;
|
||||
unsigned child_index = walk_state->stack[walk_state->depth - 1].child_index;
|
||||
TSSymbol parent_symbol = walk_state->stack[walk_state->depth - 1].parent_symbol;
|
||||
|
||||
unsigned subgraph_index, exists;
|
||||
array_search_sorted_by(&subgraphs, 0, .symbol, parent_symbol, &subgraph_index, &exists);
|
||||
if (!exists) continue;
|
||||
SymbolSubgraph *subgraph = &subgraphs.contents[subgraph_index];
|
||||
|
||||
for (TSSymbol sym = 0; sym < self->language->symbol_count; sym++) {
|
||||
TSStateId successor_state = ts_language_next_state(self->language, state, sym);
|
||||
if (successor_state && successor_state != state) {
|
||||
unsigned node_index;
|
||||
array_search_sorted_by(&subgraph->nodes, 0, .state, successor_state, &node_index, &exists);
|
||||
if (exists) {
|
||||
SubgraphNode *node = &subgraph->nodes.contents[node_index];
|
||||
if (node->child_index != child_index + 1) continue;
|
||||
|
||||
WalkState next_walk_state = *walk_state;
|
||||
walk_state__top(&next_walk_state)->child_index++;
|
||||
walk_state__top(&next_walk_state)->state = successor_state;
|
||||
|
||||
bool does_match = true;
|
||||
unsigned step_index = parent_pattern->child_step_indices.contents[walk_state->step_index];
|
||||
QueryStep *step = &self->steps.contents[step_index];
|
||||
TSSymbol alias = ts_language_alias_at(self->language, node->production_id, child_index);
|
||||
TSSymbol visible_symbol = alias
|
||||
? alias
|
||||
: self->language->symbol_metadata[sym].visible
|
||||
? self->language->public_symbol_map[sym]
|
||||
: 0;
|
||||
if (visible_symbol) {
|
||||
if (step->symbol == NAMED_WILDCARD_SYMBOL) {
|
||||
if (!ts_language_symbol_metadata(self->language, visible_symbol).named) does_match = false;
|
||||
} else if (step->symbol != WILDCARD_SYMBOL) {
|
||||
if (step->symbol != visible_symbol) does_match = false;
|
||||
}
|
||||
} else if (next_walk_state.depth < MAX_WALK_STATE_DEPTH) {
|
||||
does_match = false;
|
||||
next_walk_state.depth++;
|
||||
walk_state__top(&next_walk_state)->state = state;
|
||||
walk_state__top(&next_walk_state)->child_index = 0;
|
||||
walk_state__top(&next_walk_state)->parent_symbol = sym;
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
|
||||
TSFieldId field_id = 0;
|
||||
const TSFieldMapEntry *field_map, *field_map_end;
|
||||
ts_language_field_map(self->language, node->production_id, &field_map, &field_map_end);
|
||||
for (; field_map != field_map_end; field_map++) {
|
||||
if (field_map->child_index == child_index) {
|
||||
field_id = field_map->field_id;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (does_match) {
|
||||
next_walk_state.step_index++;
|
||||
}
|
||||
|
||||
if (node->done) {
|
||||
next_walk_state.depth--;
|
||||
}
|
||||
|
||||
if (
|
||||
next_walk_state.depth == 0 ||
|
||||
next_walk_state.step_index == parent_pattern->child_step_indices.size
|
||||
) {
|
||||
unsigned index, exists;
|
||||
array_search_sorted_by(&finished_step_indices, 0, , next_walk_state.step_index, &index, &exists);
|
||||
if (!exists) array_insert(&finished_step_indices, index, next_walk_state.step_index);
|
||||
continue;
|
||||
}
|
||||
|
||||
unsigned index, exists;
|
||||
array_search_sorted_with(
|
||||
&next_walk_states,
|
||||
0,
|
||||
walk_state__compare,
|
||||
&next_walk_state,
|
||||
&index,
|
||||
&exists
|
||||
);
|
||||
if (!exists) {
|
||||
array_insert(&next_walk_states, index, next_walk_state);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
WalkStateList _walk_states = walk_states;
|
||||
walk_states = next_walk_states;
|
||||
next_walk_states = _walk_states;
|
||||
}
|
||||
|
||||
// Debug
|
||||
// {
|
||||
// printf("Finished step indices for %u %s:", i, ts_language_symbol_name(self->language, parent_pattern->parent_symbol));
|
||||
// for (unsigned j = 0; j < finished_step_indices.size; j++) {
|
||||
// printf(" %u", finished_step_indices.contents[j]);
|
||||
// }
|
||||
// printf("\n\n");
|
||||
// }
|
||||
|
||||
// A query step is definite if the containing pattern will definitely match
|
||||
// once the step is reached. In other words, a step is *not* definite if
|
||||
// it's possible to create a syntax node that matches up to until that step,
|
||||
// but does not match the entire pattern.
|
||||
for (unsigned j = 0, n = parent_pattern->child_step_indices.size; j < n; j++) {
|
||||
uint32_t step_index = parent_pattern->child_step_indices.contents[j];
|
||||
for (unsigned k = 0; k < finished_step_indices.size; k++) {
|
||||
uint32_t finished_step_index = finished_step_indices.contents[k];
|
||||
if (finished_step_index >= j && finished_step_index < n) {
|
||||
QueryStep *step = &self->steps.contents[step_index];
|
||||
step->is_definite = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// In order for a parent step to be definite, all of its child steps must
|
||||
// be definite. Propagate the definiteness up the pattern trees by walking
|
||||
// the query's steps in reverse.
|
||||
for (unsigned i = self->steps.size - 1; i + 1 > 0; i--) {
|
||||
QueryStep *step = &self->steps.contents[i];
|
||||
for (unsigned j = i + 1; j < self->steps.size; j++) {
|
||||
QueryStep *child_step = &self->steps.contents[j];
|
||||
if (child_step->depth <= step->depth) break;
|
||||
if (child_step->depth == step->depth + 1 && !child_step->is_definite) {
|
||||
step->is_definite = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Debug
|
||||
// {
|
||||
// printf("\nSteps:\n");
|
||||
// for (unsigned i = 0; i < self->steps.size; i++) {
|
||||
// QueryStep *step = &self->steps.contents[i];
|
||||
// if (step->depth == PATTERN_DONE_MARKER) {
|
||||
// printf("\n");
|
||||
// continue;
|
||||
// }
|
||||
// printf(
|
||||
// " {symbol: %s, is_definite: %d}\n",
|
||||
// (step->symbol == WILDCARD_SYMBOL || step->symbol == NAMED_WILDCARD_SYMBOL) ? "ANY" : ts_language_symbol_name(self->language, step->symbol),
|
||||
// step->is_definite
|
||||
// );
|
||||
// }
|
||||
// }
|
||||
|
||||
// Cleanup
|
||||
for (unsigned i = 0; i < parent_patterns.size; i++) {
|
||||
array_delete(&parent_patterns.contents[i].child_step_indices);
|
||||
}
|
||||
for (unsigned i = 0; i < subgraphs.size; i++) {
|
||||
array_delete(&subgraphs.contents[i].start_states);
|
||||
array_delete(&subgraphs.contents[i].nodes);
|
||||
}
|
||||
array_delete(&stack);
|
||||
array_delete(&subgraphs);
|
||||
array_delete(&next_nodes);
|
||||
array_delete(&walk_states);
|
||||
array_delete(&parent_patterns);
|
||||
array_delete(&next_walk_states);
|
||||
array_delete(&finished_step_indices);
|
||||
state_predecessor_map_delete(&predecessor_map);
|
||||
}
|
||||
|
||||
static void ts_query__finalize_steps(TSQuery *self) {
|
||||
for (unsigned i = 0; i < self->steps.size; i++) {
|
||||
QueryStep *step = &self->steps.contents[i];
|
||||
|
|
@ -588,7 +1143,7 @@ static TSQueryError ts_query__parse_predicate(
|
|||
predicate_name,
|
||||
length
|
||||
);
|
||||
array_back(&self->predicates_by_pattern)->length++;
|
||||
array_back(&self->patterns)->predicate_steps.length++;
|
||||
array_push(&self->predicate_steps, ((TSQueryPredicateStep) {
|
||||
.type = TSQueryPredicateStepTypeString,
|
||||
.value_id = id,
|
||||
|
|
@ -599,7 +1154,7 @@ static TSQueryError ts_query__parse_predicate(
|
|||
if (stream->next == ')') {
|
||||
stream_advance(stream);
|
||||
stream_skip_whitespace(stream);
|
||||
array_back(&self->predicates_by_pattern)->length++;
|
||||
array_back(&self->patterns)->predicate_steps.length++;
|
||||
array_push(&self->predicate_steps, ((TSQueryPredicateStep) {
|
||||
.type = TSQueryPredicateStepTypeDone,
|
||||
.value_id = 0,
|
||||
|
|
@ -628,7 +1183,7 @@ static TSQueryError ts_query__parse_predicate(
|
|||
return TSQueryErrorCapture;
|
||||
}
|
||||
|
||||
array_back(&self->predicates_by_pattern)->length++;
|
||||
array_back(&self->patterns)->predicate_steps.length++;
|
||||
array_push(&self->predicate_steps, ((TSQueryPredicateStep) {
|
||||
.type = TSQueryPredicateStepTypeCapture,
|
||||
.value_id = capture_id,
|
||||
|
|
@ -668,7 +1223,7 @@ static TSQueryError ts_query__parse_predicate(
|
|||
string_content,
|
||||
length
|
||||
);
|
||||
array_back(&self->predicates_by_pattern)->length++;
|
||||
array_back(&self->patterns)->predicate_steps.length++;
|
||||
array_push(&self->predicate_steps, ((TSQueryPredicateStep) {
|
||||
.type = TSQueryPredicateStepTypeString,
|
||||
.value_id = id,
|
||||
|
|
@ -688,7 +1243,7 @@ static TSQueryError ts_query__parse_predicate(
|
|||
symbol_start,
|
||||
length
|
||||
);
|
||||
array_back(&self->predicates_by_pattern)->length++;
|
||||
array_back(&self->patterns)->predicate_steps.length++;
|
||||
array_push(&self->predicate_steps, ((TSQueryPredicateStep) {
|
||||
.type = TSQueryPredicateStepTypeString,
|
||||
.value_id = id,
|
||||
|
|
@ -712,7 +1267,6 @@ static TSQueryError ts_query__parse_pattern(
|
|||
TSQuery *self,
|
||||
Stream *stream,
|
||||
uint32_t depth,
|
||||
uint32_t *capture_count,
|
||||
bool is_immediate
|
||||
) {
|
||||
const uint32_t starting_step_index = self->steps.size;
|
||||
|
|
@ -737,7 +1291,6 @@ static TSQueryError ts_query__parse_pattern(
|
|||
self,
|
||||
stream,
|
||||
depth,
|
||||
capture_count,
|
||||
is_immediate
|
||||
);
|
||||
|
||||
|
|
@ -790,7 +1343,6 @@ static TSQueryError ts_query__parse_pattern(
|
|||
self,
|
||||
stream,
|
||||
depth,
|
||||
capture_count,
|
||||
child_is_immediate
|
||||
);
|
||||
if (e == PARENT_DONE && stream->next == ')') {
|
||||
|
|
@ -871,7 +1423,6 @@ static TSQueryError ts_query__parse_pattern(
|
|||
self,
|
||||
stream,
|
||||
depth + 1,
|
||||
capture_count,
|
||||
child_is_immediate
|
||||
);
|
||||
if (e == PARENT_DONE && stream->next == ')') {
|
||||
|
|
@ -955,7 +1506,6 @@ static TSQueryError ts_query__parse_pattern(
|
|||
self,
|
||||
stream,
|
||||
depth,
|
||||
capture_count,
|
||||
is_immediate
|
||||
);
|
||||
if (e == PARENT_DONE) return TSQueryErrorSyntax;
|
||||
|
|
@ -1069,8 +1619,6 @@ static TSQueryError ts_query__parse_pattern(
|
|||
break;
|
||||
}
|
||||
}
|
||||
|
||||
(*capture_count)++;
|
||||
}
|
||||
|
||||
// No more suffix modifiers
|
||||
|
|
@ -1123,7 +1671,7 @@ TSQuery *ts_query_new(
|
|||
.captures = symbol_table_new(),
|
||||
.predicate_values = symbol_table_new(),
|
||||
.predicate_steps = array_new(),
|
||||
.predicates_by_pattern = array_new(),
|
||||
.patterns = array_new(),
|
||||
.symbol_map = symbol_map,
|
||||
.wildcard_root_pattern_count = 0,
|
||||
.language = language,
|
||||
|
|
@ -1133,15 +1681,14 @@ TSQuery *ts_query_new(
|
|||
Stream stream = stream_new(source, source_len);
|
||||
stream_skip_whitespace(&stream);
|
||||
while (stream.input < stream.end) {
|
||||
uint32_t pattern_index = self->predicates_by_pattern.size;
|
||||
uint32_t pattern_index = self->patterns.size;
|
||||
uint32_t start_step_index = self->steps.size;
|
||||
uint32_t capture_count = 0;
|
||||
array_push(&self->start_bytes_by_pattern, stream.input - source);
|
||||
array_push(&self->predicates_by_pattern, ((Slice) {
|
||||
.offset = self->predicate_steps.size,
|
||||
.length = 0,
|
||||
array_push(&self->patterns, ((QueryPattern) {
|
||||
.predicate_steps = (Slice) {.offset = self->predicate_steps.size, .length = 0},
|
||||
.start_byte = stream.input - source,
|
||||
.start_step = self->steps.size,
|
||||
}));
|
||||
*error_type = ts_query__parse_pattern(self, &stream, 0, &capture_count, false);
|
||||
*error_type = ts_query__parse_pattern(self, &stream, 0, false);
|
||||
array_push(&self->steps, query_step__new(0, PATTERN_DONE_MARKER, false));
|
||||
|
||||
// If any pattern could not be parsed, then report the error information
|
||||
|
|
@ -1183,6 +1730,10 @@ TSQuery *ts_query_new(
|
|||
}
|
||||
}
|
||||
|
||||
if (self->language->version >= TREE_SITTER_LANGUAGE_VERSION_WITH_STATE_COUNT) {
|
||||
ts_query__analyze_patterns(self);
|
||||
}
|
||||
|
||||
ts_query__finalize_steps(self);
|
||||
return self;
|
||||
}
|
||||
|
|
@ -1192,8 +1743,7 @@ void ts_query_delete(TSQuery *self) {
|
|||
array_delete(&self->steps);
|
||||
array_delete(&self->pattern_map);
|
||||
array_delete(&self->predicate_steps);
|
||||
array_delete(&self->predicates_by_pattern);
|
||||
array_delete(&self->start_bytes_by_pattern);
|
||||
array_delete(&self->patterns);
|
||||
symbol_table_delete(&self->captures);
|
||||
symbol_table_delete(&self->predicate_values);
|
||||
ts_free(self->symbol_map);
|
||||
|
|
@ -1202,7 +1752,7 @@ void ts_query_delete(TSQuery *self) {
|
|||
}
|
||||
|
||||
uint32_t ts_query_pattern_count(const TSQuery *self) {
|
||||
return self->predicates_by_pattern.size;
|
||||
return self->patterns.size;
|
||||
}
|
||||
|
||||
uint32_t ts_query_capture_count(const TSQuery *self) {
|
||||
|
|
@ -1234,7 +1784,7 @@ const TSQueryPredicateStep *ts_query_predicates_for_pattern(
|
|||
uint32_t pattern_index,
|
||||
uint32_t *step_count
|
||||
) {
|
||||
Slice slice = self->predicates_by_pattern.contents[pattern_index];
|
||||
Slice slice = self->patterns.contents[pattern_index].predicate_steps;
|
||||
*step_count = slice.length;
|
||||
return &self->predicate_steps.contents[slice.offset];
|
||||
}
|
||||
|
|
@ -1243,7 +1793,35 @@ uint32_t ts_query_start_byte_for_pattern(
|
|||
const TSQuery *self,
|
||||
uint32_t pattern_index
|
||||
) {
|
||||
return self->start_bytes_by_pattern.contents[pattern_index];
|
||||
return self->patterns.contents[pattern_index].start_byte;
|
||||
}
|
||||
|
||||
bool ts_query_pattern_is_definite(
|
||||
const TSQuery *self,
|
||||
uint32_t pattern_index,
|
||||
uint32_t step_count
|
||||
) {
|
||||
uint32_t step_index = self->patterns.contents[pattern_index].start_step;
|
||||
for (;;) {
|
||||
QueryStep *start_step = &self->steps.contents[step_index];
|
||||
if (step_index + step_count < self->steps.size) {
|
||||
QueryStep *step = start_step;
|
||||
for (unsigned i = 0; i < step_count; i++) {
|
||||
if (step->depth == PATTERN_DONE_MARKER) {
|
||||
step = NULL;
|
||||
break;
|
||||
}
|
||||
step++;
|
||||
}
|
||||
if (step && !step->is_definite) return false;
|
||||
}
|
||||
if (start_step->alternative_index != NONE && start_step->alternative_index > step_index) {
|
||||
step_index = start_step->alternative_index;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void ts_query_disable_capture(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue