From 25f64e1eb66bb1ab3eccd4f0b7da543005f3ba79 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 9 Dec 2021 22:18:21 -0800 Subject: [PATCH] Place tighter limits on the work done during query analysis --- lib/src/query.c | 93 ++++++++++++++++++++++++++++--------------------- 1 file changed, 54 insertions(+), 39 deletions(-) diff --git a/lib/src/query.c b/lib/src/query.c index 5feb6245..51cc6c17 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -11,9 +11,10 @@ // #define DEBUG_EXECUTE_QUERY #define MAX_STEP_CAPTURE_COUNT 3 -#define MAX_STATE_PREDECESSOR_COUNT 100 -#define MAX_ANALYSIS_STATE_DEPTH 8 #define MAX_NEGATED_FIELD_COUNT 8 +#define MAX_STATE_PREDECESSOR_COUNT 256 +#define MAX_ANALYSIS_STATE_DEPTH 8 +#define MAX_ANALYSIS_ITERATION_COUNT 256 /* * Stream - A sequence of unicode characters derived from a UTF8 string. @@ -571,7 +572,7 @@ static inline StatePredecessorMap state_predecessor_map_new( ) { return (StatePredecessorMap) { .contents = ts_calloc( - language->state_count * (MAX_STATE_PREDECESSOR_COUNT + 1), + (size_t)language->state_count * (MAX_STATE_PREDECESSOR_COUNT + 1), sizeof(TSStateId) ), }; @@ -586,7 +587,7 @@ static inline void state_predecessor_map_add( TSStateId state, TSStateId predecessor ) { - unsigned index = state * (MAX_STATE_PREDECESSOR_COUNT + 1); + size_t index = (size_t)state * (MAX_STATE_PREDECESSOR_COUNT + 1); TSStateId *count = &self->contents[index]; if ( *count == 0 || @@ -602,7 +603,7 @@ static inline const TSStateId *state_predecessor_map_get( TSStateId state, unsigned *count ) { - unsigned index = state * (MAX_STATE_PREDECESSOR_COUNT + 1); + size_t index = (size_t)state * (MAX_STATE_PREDECESSOR_COUNT + 1); *count = self->contents[index]; return &self->contents[index + 1]; } @@ -635,6 +636,8 @@ static inline int analysis_state__compare_position( if (self->stack[i].child_index > other->stack[i].child_index) return 1; } if (self->depth < other->depth) return 1; + if (self->step_index < other->step_index) return -1; + if (self->step_index > other->step_index) return 1; return 0; } @@ -652,8 +655,6 @@ static inline int analysis_state__compare( if (self->stack[i].field_id < other->stack[i].field_id) return -1; if (self->stack[i].field_id > other->stack[i].field_id) return 1; } - if (self->step_index < other->step_index) return -1; - if (self->step_index > other->step_index) return 1; return 0; } @@ -1023,13 +1024,18 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { // Walk the subgraph for this non-terminal, tracking all of the possible // sequences of progress within the pattern. bool can_finish_pattern = false; - bool did_exceed_max_depth = false; + bool did_abort_analysis = false; unsigned recursion_depth_limit = 0; unsigned prev_final_step_count = 0; array_clear(&final_step_indices); - for (;;) { + for (unsigned iteration = 0;; iteration++) { + if (iteration == MAX_ANALYSIS_ITERATION_COUNT) { + did_abort_analysis = true; + break; + } + #ifdef DEBUG_ANALYZE_QUERY - printf("Final step indices:"); + printf("Iteration: %u. Final step indices:", iteration); for (unsigned j = 0; j < final_step_indices.size; j++) { printf(" %4u", final_step_indices.contents[j]); } @@ -1085,9 +1091,15 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { if (next_states.size > 0) { int comparison = analysis_state__compare_position(state, array_back(&next_states)); if (comparison == 0) { + #ifdef DEBUG_ANALYZE_QUERY + printf("Skip iteration for state %u\n", j); + #endif array_insert_sorted_with(&next_states, analysis_state__compare, *state); continue; } else if (comparison > 0) { + #ifdef DEBUG_ANALYZE_QUERY + printf("Terminate iteration at state %u\n", j); + #endif while (j < states.size) { array_push(&next_states, states.contents[j]); j++; @@ -1203,7 +1215,7 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { printf("Exceeded depth limit for state %u\n", j); #endif - did_exceed_max_depth = true; + did_abort_analysis = true; continue; } @@ -1295,7 +1307,37 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { next_states = _states; } - // Mark as indefinite any step where a match terminated. + // If this pattern could not be fully analyzed, then every step should + // be considered fallible. + if (did_abort_analysis) { + for (unsigned j = parent_step_index + 1; j < self->steps.size; j++) { + QueryStep *step = &self->steps.contents[j]; + if ( + step->depth <= parent_depth || + step->depth == PATTERN_DONE_MARKER + ) break; + if (!step->is_dead_end) { + step->parent_pattern_guaranteed = false; + step->root_pattern_guaranteed = false; + } + } + continue; + } + + // If this pattern cannot match, store the pattern index so that it can be + // returned to the caller. + if (!can_finish_pattern) { + assert(final_step_indices.size > 0); + uint16_t impossible_step_index = *array_back(&final_step_indices); + uint32_t i, exists; + array_search_sorted_by(&self->step_offsets, .step_index, impossible_step_index, &i, &exists); + if (i >= self->step_offsets.size) i = self->step_offsets.size - 1; + *error_offset = self->step_offsets.contents[i].byte_offset; + all_patterns_are_valid = false; + break; + } + + // Mark as fallible any step where a match terminated. // Later, this property will be propagated to all of the step's predecessors. for (unsigned j = 0; j < final_step_indices.size; j++) { uint32_t final_step_index = final_step_indices.contents[j]; @@ -1309,33 +1351,6 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { step->root_pattern_guaranteed = false; } } - - if (did_exceed_max_depth) { - for (unsigned j = parent_step_index + 1; j < self->steps.size; j++) { - QueryStep *step = &self->steps.contents[j]; - if ( - step->depth <= parent_depth || - step->depth == PATTERN_DONE_MARKER - ) break; - if (!step->is_dead_end) { - step->parent_pattern_guaranteed = false; - step->root_pattern_guaranteed = false; - } - } - } - - // If this pattern cannot match, store the pattern index so that it can be - // returned to the caller. - if (all_patterns_are_valid && !can_finish_pattern && !did_exceed_max_depth) { - assert(final_step_indices.size > 0); - uint16_t impossible_step_index = *array_back(&final_step_indices); - uint32_t i, exists; - array_search_sorted_by(&self->step_offsets, .step_index, impossible_step_index, &i, &exists); - if (i >= self->step_offsets.size) i = self->step_offsets.size - 1; - *error_offset = self->step_offsets.contents[i].byte_offset; - all_patterns_are_valid = false; - break; - } } // Mark as indefinite any step with captures that are used in predicates.