From 645aacb1e7b8a02cf7badaf90e08d77350daa74f Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 26 Jun 2020 15:40:34 -0700 Subject: [PATCH] Optimize query analysis using binary search --- lib/src/array.h | 47 +++++++++++++++++++++++++---------------------- lib/src/query.c | 24 +++++++++++++++--------- 2 files changed, 40 insertions(+), 31 deletions(-) diff --git a/lib/src/array.h b/lib/src/array.h index e95867cf..7fae7a40 100644 --- a/lib/src/array.h +++ b/lib/src/array.h @@ -66,43 +66,46 @@ extern "C" { #define array_assign(self, other) \ array__assign((VoidArray *)(self), (const VoidArray *)(other), array__elem_size(self)) -#define array_search_sorted_by(self, start, field, needle, out_index, out_exists) \ +#define array__search_sorted(self, start, compare, suffix, needle, index, exists) \ do { \ - *(out_exists) = false; \ - for (*(out_index) = start; *(out_index) < (self)->size; (*(out_index))++) { \ - int _comparison = (int)((self)->contents[*(out_index)] field) - (int)(needle); \ - if (_comparison >= 0) { \ - if (_comparison == 0) *(out_exists) = true; \ - break; \ - } \ + *(index) = start; \ + *(exists) = false; \ + uint32_t size = (self)->size - *(index); \ + if (size == 0) break; \ + int comparison; \ + while (size > 1) { \ + uint32_t half_size = size / 2; \ + uint32_t mid_index = *(index) + half_size; \ + comparison = compare(&((self)->contents[mid_index] suffix), (needle)); \ + if (comparison <= 0) *(index) = mid_index; \ + size -= half_size; \ } \ - } while (0); + comparison = compare(&((self)->contents[*(index)] suffix), (needle)); \ + if (comparison == 0) *(exists) = true; \ + else if (comparison < 0) *(index) += 1; \ + } while (0) -#define array_search_sorted_with(self, start, compare, needle, out_index, out_exists) \ - do { \ - *(out_exists) = false; \ - for (*(out_index) = start; *(out_index) < (self)->size; (*(out_index))++) { \ - int _comparison = compare(&(self)->contents[*(out_index)], (needle)); \ - if (_comparison >= 0) { \ - if (_comparison == 0) *(out_exists) = true; \ - break; \ - } \ - } \ - } while (0); +#define _compare_int(a, b) ((int)*(a) - (int)(b)) + +#define array_search_sorted_by(self, start, field, needle, index, exists) \ + array__search_sorted(self, start, _compare_int, field, needle, index, exists) + +#define array_search_sorted_with(self, start, compare, needle, index, exists) \ + array__search_sorted(self, start, compare, , needle, index, exists) #define array_insert_sorted_by(self, start, field, value) \ do { \ unsigned index, exists; \ array_search_sorted_by(self, start, field, (value) field, &index, &exists); \ if (!exists) array_insert(self, index, value); \ - } while (0); + } while (0) #define array_insert_sorted_with(self, start, compare, value) \ do { \ unsigned index, exists; \ array_search_sorted_with(self, start, compare, &(value), &index, &exists); \ if (!exists) array_insert(self, index, value); \ - } while (0); + } while (0) // Private diff --git a/lib/src/query.c b/lib/src/query.c index bf781204..64a1b8a0 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -612,10 +612,10 @@ static inline AnalysisStateEntry *analysis_state__top(AnalysisState *self) { static inline int analysis_subgraph_node__compare(const AnalysisSubgraphNode *self, const AnalysisSubgraphNode *other) { if (self->state < other->state) return -1; if (self->state > other->state) return 1; - if (self->done && !other->done) return -1; - if (!self->done && other->done) return 1; if (self->child_index < other->child_index) return -1; if (self->child_index > other->child_index) return 1; + if (self->done < other->done) return -1; + if (self->done > other->done) return 1; if (self->production_id < other->production_id) return -1; if (self->production_id > other->production_id) return 1; return 0; @@ -961,14 +961,20 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *impossible_index // Follow every possible path in the parse table, but only visit states that // are part of the subgraph for the current symbol. for (TSSymbol sym = 0; sym < self->language->symbol_count; sym++) { - TSStateId successor_state = ts_language_next_state(self->language, parse_state, sym); - if (successor_state && successor_state != parse_state) { + AnalysisSubgraphNode successor = { + .state = ts_language_next_state(self->language, parse_state, sym), + .child_index = child_index + 1, + }; + if (successor.state && successor.state != parse_state) { unsigned node_index; - array_search_sorted_by(&subgraph->nodes, 0, .state, successor_state, &node_index, &exists); - while (exists && node_index < subgraph->nodes.size) { + array_search_sorted_with( + &subgraph->nodes, 0, + analysis_subgraph_node__compare, &successor, + &node_index, &exists + ); + while (node_index < subgraph->nodes.size) { AnalysisSubgraphNode *node = &subgraph->nodes.contents[node_index++]; - if (node->state != successor_state) break; - if (node->child_index != child_index + 1) continue; + if (node->state != successor.state || node->child_index != successor.child_index) break; // Use the subgraph to determine what alias and field will eventually be applied // to this child node. @@ -992,7 +998,7 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *impossible_index AnalysisState next_state = *state; analysis_state__top(&next_state)->child_index++; - analysis_state__top(&next_state)->parse_state = successor_state; + analysis_state__top(&next_state)->parse_state = successor.state; if (node->done) analysis_state__top(&next_state)->done = true; // Determine if this hypothetical child node would match the current step