From d674bc139a0550422193163c253fe23d00cf7278 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 11 Sep 2019 14:44:49 -0700 Subject: [PATCH] Fix more bugs in binary search used in tree queries This binary search implementation differs from Rust's `slice::binary_search_by` method in how they deal with ties. In Rust's implementation: > If there are multiple matches, then any one of the matches > could be returned. This implementation needs to return the index of the *first* match. --- cli/src/tests/query_test.rs | 43 ++++++++++++++++++++++++++++++++++++- lib/src/query.c | 42 ++++++++++++++++++++++++------------ 2 files changed, 70 insertions(+), 15 deletions(-) diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index 5bf781b4..6ab77d1f 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -143,7 +143,7 @@ fn test_query_exec_with_multiple_matches_same_root() { } #[test] -fn test_query_exec_multiple_patterns() { +fn test_query_exec_multiple_patterns_different_roots() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new( @@ -178,6 +178,47 @@ fn test_query_exec_multiple_patterns() { }); } +#[test] +fn test_query_exec_multiple_patterns_same_root() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new( + language, + " + (pair + key: (property_identifier) @method-def + value: (function)) + + (pair + key: (property_identifier) @method-def + value: (arrow_function)) + ", + ) + .unwrap(); + + let source = " + a = { + b: () => { return c; }, + d: function() { return d; } + }; + "; + + let mut parser = Parser::new(); + parser.set_language(language).unwrap(); + let tree = parser.parse(source, None).unwrap(); + let context = query.context(); + let matches = context.exec(tree.root_node()); + + assert_eq!( + collect_matches(matches, &query, source), + &[ + (1, vec![("method-def", "b")]), + (0, vec![("method-def", "d")]), + ], + ); + }); +} + #[test] fn test_query_exec_nested_matches_without_fields() { allocations::record(|| { diff --git a/lib/src/query.c b/lib/src/query.c index fe8d2eec..d728238d 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -260,6 +260,12 @@ static uint16_t ts_query_intern_capture_name( return self->capture_names.size - 1; } +// The `pattern_map` contains a mapping from TSSymbol values to indices in the +// `steps` array. For a given syntax node, the `pattern_map` makes it possible +// to quickly find the starting steps of all of the patterns whose root matches +// that node. It is represented as an array of `(symbol, step index)` pairs, +// sorted by symbol. Lookups use a binary search so that their cost scales +// logarithmically with the number of patterns in the query. static inline bool ts_query__pattern_map_search( const TSQuery *self, TSSymbol needle, @@ -277,24 +283,29 @@ static inline bool ts_query__pattern_map_search( TSSymbol mid_symbol = self->steps.contents[ self->pattern_map.contents[mid_index].step_index ].symbol; - if (needle >= mid_symbol) base_index = mid_index; + if (needle > mid_symbol) base_index = mid_index; size -= half_size; } + TSSymbol symbol = self->steps.contents[ self->pattern_map.contents[base_index].step_index ].symbol; + if (needle > symbol) { - *result = base_index + 1; - return false; - } else if (needle == symbol) { - *result = base_index; - return true; - } else { - *result = base_index; - return false; + base_index++; + if (base_index < self->pattern_map.size) { + symbol = self->steps.contents[ + self->pattern_map.contents[base_index].step_index + ].symbol; + } } + + *result = base_index; + return needle == symbol; } +// Insert a new pattern's start index into the pattern map, maintaining +// the pattern map's ordering invariant. static inline void ts_query__pattern_map_insert( TSQuery *self, TSSymbol symbol, @@ -308,6 +319,9 @@ static inline void ts_query__pattern_map_insert( })); } +// Read one S-expression pattern from the stream, and incorporate it into +// the query's internal state machine representation. For nested patterns, +// this function calls itself recursively. static TSQueryError ts_query_parse_pattern( TSQuery *self, Stream *stream, @@ -459,7 +473,7 @@ static TSQueryError ts_query_parse_pattern( stream_skip_whitespace(stream); - // Parse a '@'-suffixed capture pattern + // Parse an '@'-suffixed capture pattern if (stream->next == '@') { stream_advance(stream); stream_skip_whitespace(stream); @@ -615,18 +629,18 @@ static QueryState *ts_query_context_copy_state( TSQueryContext *self, QueryState *state ) { - uint32_t capture_list_id = capture_list_pool_acquire(&self->capture_list_pool); - if (capture_list_id == NONE) return NULL; + uint32_t new_list_id = capture_list_pool_acquire(&self->capture_list_pool); + if (new_list_id == NONE) return NULL; array_push(&self->states, *state); QueryState *new_state = array_back(&self->states); - new_state->capture_list_id = capture_list_id; + new_state->capture_list_id = new_list_id; TSQueryCapture *old_captures = capture_list_pool_get( &self->capture_list_pool, state->capture_list_id ); TSQueryCapture *new_captures = capture_list_pool_get( &self->capture_list_pool, - capture_list_id + new_list_id ); memcpy(new_captures, old_captures, state->capture_count * sizeof(TSQueryCapture)); return new_state;