Place tighter limits on the work done during query analysis

This commit is contained in:
Max Brunsfeld 2021-12-09 22:18:21 -08:00
parent 8de8c64c95
commit 25f64e1eb6

View file

@ -11,9 +11,10 @@
// #define DEBUG_EXECUTE_QUERY
#define MAX_STEP_CAPTURE_COUNT 3
#define MAX_STATE_PREDECESSOR_COUNT 100
#define MAX_ANALYSIS_STATE_DEPTH 8
#define MAX_NEGATED_FIELD_COUNT 8
#define MAX_STATE_PREDECESSOR_COUNT 256
#define MAX_ANALYSIS_STATE_DEPTH 8
#define MAX_ANALYSIS_ITERATION_COUNT 256
/*
* Stream - A sequence of unicode characters derived from a UTF8 string.
@ -571,7 +572,7 @@ static inline StatePredecessorMap state_predecessor_map_new(
) {
return (StatePredecessorMap) {
.contents = ts_calloc(
language->state_count * (MAX_STATE_PREDECESSOR_COUNT + 1),
(size_t)language->state_count * (MAX_STATE_PREDECESSOR_COUNT + 1),
sizeof(TSStateId)
),
};
@ -586,7 +587,7 @@ static inline void state_predecessor_map_add(
TSStateId state,
TSStateId predecessor
) {
unsigned index = state * (MAX_STATE_PREDECESSOR_COUNT + 1);
size_t index = (size_t)state * (MAX_STATE_PREDECESSOR_COUNT + 1);
TSStateId *count = &self->contents[index];
if (
*count == 0 ||
@ -602,7 +603,7 @@ static inline const TSStateId *state_predecessor_map_get(
TSStateId state,
unsigned *count
) {
unsigned index = state * (MAX_STATE_PREDECESSOR_COUNT + 1);
size_t index = (size_t)state * (MAX_STATE_PREDECESSOR_COUNT + 1);
*count = self->contents[index];
return &self->contents[index + 1];
}
@ -635,6 +636,8 @@ static inline int analysis_state__compare_position(
if (self->stack[i].child_index > other->stack[i].child_index) return 1;
}
if (self->depth < other->depth) return 1;
if (self->step_index < other->step_index) return -1;
if (self->step_index > other->step_index) return 1;
return 0;
}
@ -652,8 +655,6 @@ static inline int analysis_state__compare(
if (self->stack[i].field_id < other->stack[i].field_id) return -1;
if (self->stack[i].field_id > other->stack[i].field_id) return 1;
}
if (self->step_index < other->step_index) return -1;
if (self->step_index > other->step_index) return 1;
return 0;
}
@ -1023,13 +1024,18 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) {
// Walk the subgraph for this non-terminal, tracking all of the possible
// sequences of progress within the pattern.
bool can_finish_pattern = false;
bool did_exceed_max_depth = false;
bool did_abort_analysis = false;
unsigned recursion_depth_limit = 0;
unsigned prev_final_step_count = 0;
array_clear(&final_step_indices);
for (;;) {
for (unsigned iteration = 0;; iteration++) {
if (iteration == MAX_ANALYSIS_ITERATION_COUNT) {
did_abort_analysis = true;
break;
}
#ifdef DEBUG_ANALYZE_QUERY
printf("Final step indices:");
printf("Iteration: %u. Final step indices:", iteration);
for (unsigned j = 0; j < final_step_indices.size; j++) {
printf(" %4u", final_step_indices.contents[j]);
}
@ -1085,9 +1091,15 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) {
if (next_states.size > 0) {
int comparison = analysis_state__compare_position(state, array_back(&next_states));
if (comparison == 0) {
#ifdef DEBUG_ANALYZE_QUERY
printf("Skip iteration for state %u\n", j);
#endif
array_insert_sorted_with(&next_states, analysis_state__compare, *state);
continue;
} else if (comparison > 0) {
#ifdef DEBUG_ANALYZE_QUERY
printf("Terminate iteration at state %u\n", j);
#endif
while (j < states.size) {
array_push(&next_states, states.contents[j]);
j++;
@ -1203,7 +1215,7 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) {
printf("Exceeded depth limit for state %u\n", j);
#endif
did_exceed_max_depth = true;
did_abort_analysis = true;
continue;
}
@ -1295,7 +1307,37 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) {
next_states = _states;
}
// Mark as indefinite any step where a match terminated.
// If this pattern could not be fully analyzed, then every step should
// be considered fallible.
if (did_abort_analysis) {
for (unsigned j = parent_step_index + 1; j < self->steps.size; j++) {
QueryStep *step = &self->steps.contents[j];
if (
step->depth <= parent_depth ||
step->depth == PATTERN_DONE_MARKER
) break;
if (!step->is_dead_end) {
step->parent_pattern_guaranteed = false;
step->root_pattern_guaranteed = false;
}
}
continue;
}
// If this pattern cannot match, store the pattern index so that it can be
// returned to the caller.
if (!can_finish_pattern) {
assert(final_step_indices.size > 0);
uint16_t impossible_step_index = *array_back(&final_step_indices);
uint32_t i, exists;
array_search_sorted_by(&self->step_offsets, .step_index, impossible_step_index, &i, &exists);
if (i >= self->step_offsets.size) i = self->step_offsets.size - 1;
*error_offset = self->step_offsets.contents[i].byte_offset;
all_patterns_are_valid = false;
break;
}
// Mark as fallible any step where a match terminated.
// Later, this property will be propagated to all of the step's predecessors.
for (unsigned j = 0; j < final_step_indices.size; j++) {
uint32_t final_step_index = final_step_indices.contents[j];
@ -1309,33 +1351,6 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) {
step->root_pattern_guaranteed = false;
}
}
if (did_exceed_max_depth) {
for (unsigned j = parent_step_index + 1; j < self->steps.size; j++) {
QueryStep *step = &self->steps.contents[j];
if (
step->depth <= parent_depth ||
step->depth == PATTERN_DONE_MARKER
) break;
if (!step->is_dead_end) {
step->parent_pattern_guaranteed = false;
step->root_pattern_guaranteed = false;
}
}
}
// If this pattern cannot match, store the pattern index so that it can be
// returned to the caller.
if (all_patterns_are_valid && !can_finish_pattern && !did_exceed_max_depth) {
assert(final_step_indices.size > 0);
uint16_t impossible_step_index = *array_back(&final_step_indices);
uint32_t i, exists;
array_search_sorted_by(&self->step_offsets, .step_index, impossible_step_index, &i, &exists);
if (i >= self->step_offsets.size) i = self->step_offsets.size - 1;
*error_offset = self->step_offsets.contents[i].byte_offset;
all_patterns_are_valid = false;
break;
}
}
// Mark as indefinite any step with captures that are used in predicates.