From ae7869d1a613bc0d6928a413b20bfc2303670b4d Mon Sep 17 00:00:00 2001 From: Hendrik van Antwerpen Date: Tue, 23 Nov 2021 19:22:24 +0100 Subject: [PATCH 01/16] Expose capture suffixes in queries --- lib/binding_rust/bindings.rs | 9 ++++++ lib/binding_rust/lib.rs | 30 ++++++++++++++++++ lib/include/tree_sitter/api.h | 4 +++ lib/src/query.c | 59 +++++++++++++++++++++++++++++------ 4 files changed, 92 insertions(+), 10 deletions(-) diff --git a/lib/binding_rust/bindings.rs b/lib/binding_rust/bindings.rs index 5bcbac42..9d398f58 100644 --- a/lib/binding_rust/bindings.rs +++ b/lib/binding_rust/bindings.rs @@ -665,6 +665,15 @@ extern "C" { length: *mut u32, ) -> *const ::std::os::raw::c_char; } +extern "C" { + #[doc = " Get the suffix of one of the query's captures, or one of the query's"] + #[doc = " string literals. Each capture and string is associated with a numeric"] + #[doc = " id based on the order that it appeared in the query's source."] + pub fn ts_query_capture_suffix_for_id( + arg1: *const TSQuery, + id: u32, + ) -> ::std::os::raw::c_char; +} extern "C" { pub fn ts_query_string_value_for_id( arg1: *const TSQuery, diff --git a/lib/binding_rust/lib.rs b/lib/binding_rust/lib.rs index cf8437b8..429cd47c 100644 --- a/lib/binding_rust/lib.rs +++ b/lib/binding_rust/lib.rs @@ -98,12 +98,34 @@ pub struct TreeCursor<'a>(ffi::TSTreeCursor, PhantomData<&'a ()>); pub struct Query { ptr: NonNull, capture_names: Vec, + capture_suffixes: Vec, text_predicates: Vec>, property_settings: Vec>, property_predicates: Vec>, general_predicates: Vec>, } +/// A suffix indicating the multiplicity of the capture value +#[derive(Debug, PartialEq, Eq, Clone, Copy)] +pub enum QueryCaptureSuffix { + One, + OneOrMore, + ZeroOrMore, + ZeroOrOne, +} + +impl From for QueryCaptureSuffix { + fn from(value: u8) -> QueryCaptureSuffix { + match value { + b'\0' => QueryCaptureSuffix::One, + b'+' => QueryCaptureSuffix::OneOrMore, + b'*' => QueryCaptureSuffix::ZeroOrMore, + b'?' => QueryCaptureSuffix::ZeroOrOne, + _ => panic!("Unrecognized suffix: {}", value as char), + } + } +} + /// A stateful object for executing a `Query` on a syntax `Tree`. pub struct QueryCursor { ptr: NonNull, @@ -1306,6 +1328,7 @@ impl Query { let mut result = Query { ptr: unsafe { NonNull::new_unchecked(ptr) }, capture_names: Vec::with_capacity(capture_count as usize), + capture_suffixes: Vec::with_capacity(capture_count as usize), text_predicates: Vec::with_capacity(pattern_count), property_predicates: Vec::with_capacity(pattern_count), property_settings: Vec::with_capacity(pattern_count), @@ -1321,6 +1344,8 @@ impl Query { let name = slice::from_raw_parts(name, length as usize); let name = str::from_utf8_unchecked(name); result.capture_names.push(name.to_string()); + let suffix = ffi::ts_query_capture_suffix_for_id(ptr, i) as u8; + result.capture_suffixes.push(suffix.into()); } } @@ -1524,6 +1549,11 @@ impl Query { &self.capture_names } + /// Get the suffixes of the captures used in the query. + pub fn capture_suffixes(&self) -> &[QueryCaptureSuffix] { + &self.capture_suffixes + } + /// Get the index for a given capture name. pub fn capture_index_for_name(&self, name: &str) -> Option { self.capture_names diff --git a/lib/include/tree_sitter/api.h b/lib/include/tree_sitter/api.h index 33b8c8f4..68b67668 100644 --- a/lib/include/tree_sitter/api.h +++ b/lib/include/tree_sitter/api.h @@ -740,6 +740,10 @@ const char *ts_query_capture_name_for_id( uint32_t id, uint32_t *length ); +char ts_query_capture_suffix_for_id( + const TSQuery *, + uint32_t id +); const char *ts_query_string_value_for_id( const TSQuery *, uint32_t id, diff --git a/lib/src/query.c b/lib/src/query.c index 51cc6c17..734d9bcf 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -39,6 +39,9 @@ typedef struct { * was not specified. * - `capture_ids` - An array of integers representing the names of captures * associated with this node in the pattern, terminated by a `NONE` value. + * - `capture_suffixes` - An array of capture suffixes ('\0\, '+', '*', or '?') + * corresponding to the elements in `capture_ids`, terminated by a `NONE` + * value. * - `depth` - The depth where this node occurs in the pattern. The root node * of the pattern has depth zero. * - `negated_field_list_id` - An id representing a set of fields that must @@ -72,7 +75,7 @@ typedef struct { * Steps also store some derived state that summarizes how they relate to other * steps within the same pattern. This is used to optimize the matching process: * - `contains_captures` - Indicates that this step or one of its child steps - * has a non-empty `capture_ids` list. + * has non-empty `capture_ids` and `capture_suffixes` lists. * - `parent_pattern_guaranteed` - Indicates that if this step is reached, then * it and all of its subsequent sibling steps within the same parent pattern * are guaranteed to match. @@ -87,6 +90,7 @@ typedef struct { TSSymbol supertype_symbol; TSFieldId field; uint16_t capture_ids[MAX_STEP_CAPTURE_COUNT]; + uint16_t capture_suffixes[MAX_STEP_CAPTURE_COUNT]; uint16_t depth; uint16_t alternative_index; uint16_t negated_field_list_id; @@ -118,6 +122,7 @@ typedef struct { typedef struct { Array(char) characters; Array(Slice) slices; + Array(char) suffixes; } SymbolTable; /* @@ -461,12 +466,14 @@ static SymbolTable symbol_table_new(void) { return (SymbolTable) { .characters = array_new(), .slices = array_new(), + .suffixes = array_new(), }; } static void symbol_table_delete(SymbolTable *self) { array_delete(&self->characters); array_delete(&self->slices); + array_delete(&self->suffixes); } static int symbol_table_id_for_name( @@ -494,10 +501,18 @@ static const char *symbol_table_name_for_id( return &self->characters.contents[slice.offset]; } +static char symbol_table_suffix_for_id( + const SymbolTable *self, + uint16_t id +) { + return self->suffixes.contents[id]; +} + static uint16_t symbol_table_insert_name( SymbolTable *self, const char *name, - uint32_t length + uint32_t length, + char suffix ) { int id = symbol_table_id_for_name(self, name, length); if (id >= 0) return (uint16_t)id; @@ -509,6 +524,7 @@ static uint16_t symbol_table_insert_name( memcpy(&self->characters.contents[slice.offset], name, length); self->characters.contents[self->characters.size - 1] = 0; array_push(&self->slices, slice); + array_push(&self->suffixes, suffix); return self->slices.size - 1; } @@ -526,6 +542,7 @@ static QueryStep query_step__new( .depth = depth, .field = 0, .capture_ids = {NONE, NONE, NONE}, + .capture_suffixes = {NONE, NONE, NONE}, .alternative_index = NONE, .negated_field_list_id = 0, .contains_captures = false, @@ -539,10 +556,11 @@ static QueryStep query_step__new( }; } -static void query_step__add_capture(QueryStep *self, uint16_t capture_id) { +static void query_step__add_capture(QueryStep *self, uint16_t capture_id, char capture_suffix) { for (unsigned i = 0; i < MAX_STEP_CAPTURE_COUNT; i++) { if (self->capture_ids[i] == NONE) { self->capture_ids[i] = capture_id; + self->capture_suffixes[i] = capture_suffix; break; } } @@ -552,10 +570,13 @@ static void query_step__remove_capture(QueryStep *self, uint16_t capture_id) { for (unsigned i = 0; i < MAX_STEP_CAPTURE_COUNT; i++) { if (self->capture_ids[i] == capture_id) { self->capture_ids[i] = NONE; + self->capture_suffixes[i] = NONE; while (i + 1 < MAX_STEP_CAPTURE_COUNT) { if (self->capture_ids[i + 1] == NONE) break; self->capture_ids[i] = self->capture_ids[i + 1]; + self->capture_suffixes[i] = self->capture_suffixes[i + 1]; self->capture_ids[i + 1] = NONE; + self->capture_suffixes[i + 1] = NONE; i++; } break; @@ -1354,7 +1375,7 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { } // Mark as indefinite any step with captures that are used in predicates. - Array(uint16_t) predicate_capture_ids = array_new(); + Array(uint16_t) predicate_capture_ids = array_new(); // FIXME for (unsigned i = 0; i < self->patterns.size; i++) { QueryPattern *pattern = &self->patterns.contents[i]; @@ -1462,7 +1483,7 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { array_delete(&deeper_states); array_delete(&final_step_indices); array_delete(&parent_step_indices); - array_delete(&predicate_capture_ids); + array_delete(&predicate_capture_ids); // FIXME state_predecessor_map_delete(&predecessor_map); return all_patterns_are_valid; @@ -1590,7 +1611,8 @@ static TSQueryError ts_query__parse_predicate( uint16_t id = symbol_table_insert_name( &self->predicate_values, predicate_name, - length + length, + '\0' ); array_push(&self->predicate_steps, ((TSQueryPredicateStep) { .type = TSQueryPredicateStepTypeString, @@ -1643,7 +1665,8 @@ static TSQueryError ts_query__parse_predicate( uint16_t id = symbol_table_insert_name( &self->predicate_values, self->string_buffer.contents, - self->string_buffer.size + self->string_buffer.size, + '\0' ); array_push(&self->predicate_steps, ((TSQueryPredicateStep) { .type = TSQueryPredicateStepTypeString, @@ -1659,7 +1682,8 @@ static TSQueryError ts_query__parse_predicate( uint16_t id = symbol_table_insert_name( &self->predicate_values, symbol_start, - length + length, + '\0' ); array_push(&self->predicate_steps, ((TSQueryPredicateStep) { .type = TSQueryPredicateStepTypeString, @@ -2034,9 +2058,12 @@ static TSQueryError ts_query__parse_pattern( stream_skip_whitespace(stream); // Parse suffixes modifiers for this pattern + char suffix = '\0'; for (;;) { // Parse the one-or-more operator. if (stream->next == '+') { + suffix = '+'; + stream_advance(stream); stream_skip_whitespace(stream); @@ -2049,6 +2076,8 @@ static TSQueryError ts_query__parse_pattern( // Parse the zero-or-more repetition operator. else if (stream->next == '*') { + suffix = '*'; + stream_advance(stream); stream_skip_whitespace(stream); @@ -2067,6 +2096,8 @@ static TSQueryError ts_query__parse_pattern( // Parse the optional operator. else if (stream->next == '?') { + suffix = '?'; + stream_advance(stream); stream_skip_whitespace(stream); @@ -2090,13 +2121,14 @@ static TSQueryError ts_query__parse_pattern( uint16_t capture_id = symbol_table_insert_name( &self->captures, capture_name, - length + length, + suffix ); uint32_t step_index = starting_step_index; for (;;) { QueryStep *step = &self->steps.contents[step_index]; - query_step__add_capture(step, capture_id); + query_step__add_capture(step, capture_id, suffix); if ( step->alternative_index != NONE && step->alternative_index > step_index && @@ -2279,6 +2311,13 @@ const char *ts_query_capture_name_for_id( return symbol_table_name_for_id(&self->captures, index, length); } +char ts_query_capture_suffix_for_id( + const TSQuery *self, + uint32_t index +) { + return symbol_table_suffix_for_id(&self->captures, index); +} + const char *ts_query_string_value_for_id( const TSQuery *self, uint32_t index, From 9bac0663301d667dac0552ed77cdcda2c3056a51 Mon Sep 17 00:00:00 2001 From: Hendrik van Antwerpen Date: Wed, 1 Dec 2021 19:05:41 +0100 Subject: [PATCH 02/16] Deal with quantifiers appearing on capture's enclosing patterns - Use a proper enum type for quantifiers. - Drop quantifiers from `TSQueryStep`, which was not used. - Keep track of the captures introduced during a pattern parse, and apply the quantifier for the pattern to the captures that were introduced by the pattern or any sub patterns. - Use 'quantifier' instead of 'suffix'. --- lib/binding_rust/bindings.rs | 16 +++-- lib/binding_rust/lib.rs | 34 ++++----- lib/include/tree_sitter/api.h | 15 +++- lib/src/query.c | 125 +++++++++++++++++++++++++--------- 4 files changed, 131 insertions(+), 59 deletions(-) diff --git a/lib/binding_rust/bindings.rs b/lib/binding_rust/bindings.rs index 9d398f58..5da90d92 100644 --- a/lib/binding_rust/bindings.rs +++ b/lib/binding_rust/bindings.rs @@ -107,6 +107,11 @@ pub struct TSQueryCapture { pub node: TSNode, pub index: u32, } +pub const TSQuantifier_One: TSQuantifier = 0; +pub const TSQuantifier_OneOrMore: TSQuantifier = 1; +pub const TSQuantifier_ZeroOrOne: TSQuantifier = 2; +pub const TSQuantifier_ZeroOrMore: TSQuantifier = 3; +pub type TSQuantifier = u32; #[repr(C)] #[derive(Debug, Copy, Clone)] pub struct TSQueryMatch { @@ -666,13 +671,10 @@ extern "C" { ) -> *const ::std::os::raw::c_char; } extern "C" { - #[doc = " Get the suffix of one of the query's captures, or one of the query's"] - #[doc = " string literals. Each capture and string is associated with a numeric"] - #[doc = " id based on the order that it appeared in the query's source."] - pub fn ts_query_capture_suffix_for_id( - arg1: *const TSQuery, - id: u32, - ) -> ::std::os::raw::c_char; + #[doc = " Get the quantifier of the query's captures, or one of the query's string"] + #[doc = " literals. Each capture and string is associated with a numeric id based"] + #[doc = " on the order that it appeared in the query's source."] + pub fn ts_query_capture_quantifier_for_id(arg1: *const TSQuery, id: u32) -> TSQuantifier; } extern "C" { pub fn ts_query_string_value_for_id( diff --git a/lib/binding_rust/lib.rs b/lib/binding_rust/lib.rs index 429cd47c..952d8864 100644 --- a/lib/binding_rust/lib.rs +++ b/lib/binding_rust/lib.rs @@ -98,30 +98,30 @@ pub struct TreeCursor<'a>(ffi::TSTreeCursor, PhantomData<&'a ()>); pub struct Query { ptr: NonNull, capture_names: Vec, - capture_suffixes: Vec, + capture_quantifiers: Vec, text_predicates: Vec>, property_settings: Vec>, property_predicates: Vec>, general_predicates: Vec>, } -/// A suffix indicating the multiplicity of the capture value +/// A quantifier for captures #[derive(Debug, PartialEq, Eq, Clone, Copy)] -pub enum QueryCaptureSuffix { +pub enum Quantifier { One, OneOrMore, - ZeroOrMore, ZeroOrOne, + ZeroOrMore, } -impl From for QueryCaptureSuffix { - fn from(value: u8) -> QueryCaptureSuffix { +impl From for Quantifier { + fn from(value: ffi::TSQuantifier) -> Self { match value { - b'\0' => QueryCaptureSuffix::One, - b'+' => QueryCaptureSuffix::OneOrMore, - b'*' => QueryCaptureSuffix::ZeroOrMore, - b'?' => QueryCaptureSuffix::ZeroOrOne, - _ => panic!("Unrecognized suffix: {}", value as char), + ffi::TSQuantifier_One => Quantifier::One, + ffi::TSQuantifier_OneOrMore => Quantifier::OneOrMore, + ffi::TSQuantifier_ZeroOrOne => Quantifier::ZeroOrOne, + ffi::TSQuantifier_ZeroOrMore => Quantifier::ZeroOrMore, + _ => panic!("Unrecognized quantifier: {}", value), } } } @@ -1328,7 +1328,7 @@ impl Query { let mut result = Query { ptr: unsafe { NonNull::new_unchecked(ptr) }, capture_names: Vec::with_capacity(capture_count as usize), - capture_suffixes: Vec::with_capacity(capture_count as usize), + capture_quantifiers: Vec::with_capacity(capture_count as usize), text_predicates: Vec::with_capacity(pattern_count), property_predicates: Vec::with_capacity(pattern_count), property_settings: Vec::with_capacity(pattern_count), @@ -1344,8 +1344,8 @@ impl Query { let name = slice::from_raw_parts(name, length as usize); let name = str::from_utf8_unchecked(name); result.capture_names.push(name.to_string()); - let suffix = ffi::ts_query_capture_suffix_for_id(ptr, i) as u8; - result.capture_suffixes.push(suffix.into()); + let quantifier = ffi::ts_query_capture_quantifier_for_id(ptr, i); + result.capture_quantifiers.push(quantifier.into()); } } @@ -1549,9 +1549,9 @@ impl Query { &self.capture_names } - /// Get the suffixes of the captures used in the query. - pub fn capture_suffixes(&self) -> &[QueryCaptureSuffix] { - &self.capture_suffixes + /// Get the quantifiers of the captures used in the query. + pub fn capture_quantifiers(&self) -> &[Quantifier] { + &self.capture_quantifiers } /// Get the index for a given capture name. diff --git a/lib/include/tree_sitter/api.h b/lib/include/tree_sitter/api.h index 68b67668..b4f77f46 100644 --- a/lib/include/tree_sitter/api.h +++ b/lib/include/tree_sitter/api.h @@ -106,6 +106,13 @@ typedef struct { uint32_t index; } TSQueryCapture; +typedef enum { + One, + OneOrMore, + ZeroOrOne, + ZeroOrMore, +} TSQuantifier; + typedef struct { uint32_t id; uint16_t pattern_index; @@ -740,7 +747,13 @@ const char *ts_query_capture_name_for_id( uint32_t id, uint32_t *length ); -char ts_query_capture_suffix_for_id( + +/** + * Get the quantifier of the query's captures, or one of the query's string + * literals. Each capture and string is associated with a numeric id based + * on the order that it appeared in the query's source. + */ +TSQuantifier ts_query_capture_quantifier_for_id( const TSQuery *, uint32_t id ); diff --git a/lib/src/query.c b/lib/src/query.c index 734d9bcf..83c6b297 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -39,9 +39,6 @@ typedef struct { * was not specified. * - `capture_ids` - An array of integers representing the names of captures * associated with this node in the pattern, terminated by a `NONE` value. - * - `capture_suffixes` - An array of capture suffixes ('\0\, '+', '*', or '?') - * corresponding to the elements in `capture_ids`, terminated by a `NONE` - * value. * - `depth` - The depth where this node occurs in the pattern. The root node * of the pattern has depth zero. * - `negated_field_list_id` - An id representing a set of fields that must @@ -75,7 +72,7 @@ typedef struct { * Steps also store some derived state that summarizes how they relate to other * steps within the same pattern. This is used to optimize the matching process: * - `contains_captures` - Indicates that this step or one of its child steps - * has non-empty `capture_ids` and `capture_suffixes` lists. + * has a non-empty `capture_ids` list. * - `parent_pattern_guaranteed` - Indicates that if this step is reached, then * it and all of its subsequent sibling steps within the same parent pattern * are guaranteed to match. @@ -90,7 +87,6 @@ typedef struct { TSSymbol supertype_symbol; TSFieldId field; uint16_t capture_ids[MAX_STEP_CAPTURE_COUNT]; - uint16_t capture_suffixes[MAX_STEP_CAPTURE_COUNT]; uint16_t depth; uint16_t alternative_index; uint16_t negated_field_list_id; @@ -122,7 +118,7 @@ typedef struct { typedef struct { Array(char) characters; Array(Slice) slices; - Array(char) suffixes; + Array(TSQuantifier) quantifiers; } SymbolTable; /* @@ -458,6 +454,43 @@ static void capture_list_pool_release(CaptureListPool *self, uint16_t id) { self->free_capture_list_count++; } +/************** + * Quantifiers + **************/ + +static TSQuantifier quantifier_join( + TSQuantifier left, + TSQuantifier right +) { + switch (left) + { + case One: + return right; + case OneOrMore: + switch (right) { + case One: + case OneOrMore: + return OneOrMore; + case ZeroOrOne: + case ZeroOrMore: + return ZeroOrMore; + }; + break; + case ZeroOrOne: + switch (right) { + case One: + case ZeroOrOne: + return ZeroOrOne; + case OneOrMore: + case ZeroOrMore: + return ZeroOrMore; + }; + break; + case ZeroOrMore: + return ZeroOrMore; + } +} + /************** * SymbolTable **************/ @@ -466,14 +499,14 @@ static SymbolTable symbol_table_new(void) { return (SymbolTable) { .characters = array_new(), .slices = array_new(), - .suffixes = array_new(), + .quantifiers = array_new(), }; } static void symbol_table_delete(SymbolTable *self) { array_delete(&self->characters); array_delete(&self->slices); - array_delete(&self->suffixes); + array_delete(&self->quantifiers); } static int symbol_table_id_for_name( @@ -501,18 +534,17 @@ static const char *symbol_table_name_for_id( return &self->characters.contents[slice.offset]; } -static char symbol_table_suffix_for_id( +static TSQuantifier symbol_table_quantifier_for_id( const SymbolTable *self, uint16_t id ) { - return self->suffixes.contents[id]; + return self->quantifiers.contents[id]; } static uint16_t symbol_table_insert_name( SymbolTable *self, const char *name, - uint32_t length, - char suffix + uint32_t length ) { int id = symbol_table_id_for_name(self, name, length); if (id >= 0) return (uint16_t)id; @@ -524,10 +556,22 @@ static uint16_t symbol_table_insert_name( memcpy(&self->characters.contents[slice.offset], name, length); self->characters.contents[self->characters.size - 1] = 0; array_push(&self->slices, slice); - array_push(&self->suffixes, suffix); + array_push(&self->quantifiers, One); return self->slices.size - 1; } +static void symbol_table_quantifiers_join( + SymbolTable *self, + TSQuantifier quantifier, + uint32_t start_index, + uint32_t end_index +) { + for (uint32_t index = start_index; index < end_index; index++) { + TSQuantifier *joined_quantifier = &self->quantifiers.contents[index]; + *joined_quantifier = quantifier_join(quantifier, *joined_quantifier); + } +} + /************ * QueryStep ************/ @@ -542,7 +586,6 @@ static QueryStep query_step__new( .depth = depth, .field = 0, .capture_ids = {NONE, NONE, NONE}, - .capture_suffixes = {NONE, NONE, NONE}, .alternative_index = NONE, .negated_field_list_id = 0, .contains_captures = false, @@ -556,11 +599,10 @@ static QueryStep query_step__new( }; } -static void query_step__add_capture(QueryStep *self, uint16_t capture_id, char capture_suffix) { +static void query_step__add_capture(QueryStep *self, uint16_t capture_id) { for (unsigned i = 0; i < MAX_STEP_CAPTURE_COUNT; i++) { if (self->capture_ids[i] == NONE) { self->capture_ids[i] = capture_id; - self->capture_suffixes[i] = capture_suffix; break; } } @@ -570,13 +612,10 @@ static void query_step__remove_capture(QueryStep *self, uint16_t capture_id) { for (unsigned i = 0; i < MAX_STEP_CAPTURE_COUNT; i++) { if (self->capture_ids[i] == capture_id) { self->capture_ids[i] = NONE; - self->capture_suffixes[i] = NONE; while (i + 1 < MAX_STEP_CAPTURE_COUNT) { if (self->capture_ids[i + 1] == NONE) break; self->capture_ids[i] = self->capture_ids[i + 1]; - self->capture_suffixes[i] = self->capture_suffixes[i + 1]; self->capture_ids[i + 1] = NONE; - self->capture_suffixes[i + 1] = NONE; i++; } break; @@ -1611,8 +1650,7 @@ static TSQueryError ts_query__parse_predicate( uint16_t id = symbol_table_insert_name( &self->predicate_values, predicate_name, - length, - '\0' + length ); array_push(&self->predicate_steps, ((TSQueryPredicateStep) { .type = TSQueryPredicateStepTypeString, @@ -1665,8 +1703,7 @@ static TSQueryError ts_query__parse_predicate( uint16_t id = symbol_table_insert_name( &self->predicate_values, self->string_buffer.contents, - self->string_buffer.size, - '\0' + self->string_buffer.size ); array_push(&self->predicate_steps, ((TSQueryPredicateStep) { .type = TSQueryPredicateStepTypeString, @@ -1682,8 +1719,7 @@ static TSQueryError ts_query__parse_predicate( uint16_t id = symbol_table_insert_name( &self->predicate_values, symbol_start, - length, - '\0' + length ); array_push(&self->predicate_steps, ((TSQueryPredicateStep) { .type = TSQueryPredicateStepTypeString, @@ -1714,6 +1750,7 @@ static TSQueryError ts_query__parse_pattern( if (stream->next == ')' || stream->next == ']') return PARENT_DONE; const uint32_t starting_step_index = self->steps.size; + const uint32_t starting_quantifier_index = self->captures.quantifiers.size; // Store the byte offset of each step in the query. if ( @@ -1771,6 +1808,16 @@ static TSQueryError ts_query__parse_pattern( end_step->is_dead_end = true; } + if (branch_step_indices.size > 1) { + const uint32_t ending_quantifier_index = self->captures.quantifiers.size; + symbol_table_quantifiers_join( + &self->captures, + ZeroOrOne, + starting_quantifier_index, + ending_quantifier_index + ); + } + array_delete(&branch_step_indices); } @@ -2058,11 +2105,11 @@ static TSQueryError ts_query__parse_pattern( stream_skip_whitespace(stream); // Parse suffixes modifiers for this pattern - char suffix = '\0'; + TSQuantifier quantifier = One; for (;;) { // Parse the one-or-more operator. if (stream->next == '+') { - suffix = '+'; + quantifier = quantifier_join(OneOrMore, quantifier); stream_advance(stream); stream_skip_whitespace(stream); @@ -2076,7 +2123,7 @@ static TSQueryError ts_query__parse_pattern( // Parse the zero-or-more repetition operator. else if (stream->next == '*') { - suffix = '*'; + quantifier = quantifier_join(ZeroOrMore, quantifier); stream_advance(stream); stream_skip_whitespace(stream); @@ -2096,7 +2143,7 @@ static TSQueryError ts_query__parse_pattern( // Parse the optional operator. else if (stream->next == '?') { - suffix = '?'; + quantifier = quantifier_join(ZeroOrOne, quantifier); stream_advance(stream); stream_skip_whitespace(stream); @@ -2121,14 +2168,13 @@ static TSQueryError ts_query__parse_pattern( uint16_t capture_id = symbol_table_insert_name( &self->captures, capture_name, - length, - suffix + length ); uint32_t step_index = starting_step_index; for (;;) { QueryStep *step = &self->steps.contents[step_index]; - query_step__add_capture(step, capture_id, suffix); + query_step__add_capture(step, capture_id); if ( step->alternative_index != NONE && step->alternative_index > step_index && @@ -2148,6 +2194,17 @@ static TSQueryError ts_query__parse_pattern( } } + // Patch capture quantifiers + if (quantifier != One) { + const uint32_t ending_quantifier_index = self->captures.quantifiers.size; + symbol_table_quantifiers_join( + &self->captures, + quantifier, + starting_quantifier_index, + ending_quantifier_index + ); + } + return 0; } @@ -2311,11 +2368,11 @@ const char *ts_query_capture_name_for_id( return symbol_table_name_for_id(&self->captures, index, length); } -char ts_query_capture_suffix_for_id( +TSQuantifier ts_query_capture_quantifier_for_id( const TSQuery *self, uint32_t index ) { - return symbol_table_suffix_for_id(&self->captures, index); + return symbol_table_quantifier_for_id(&self->captures, index); } const char *ts_query_string_value_for_id( From 1f1a449c7639bb33c9a68bae340721913e44f34b Mon Sep 17 00:00:00 2001 From: Hendrik van Antwerpen Date: Thu, 2 Dec 2021 19:04:49 +0100 Subject: [PATCH 03/16] Improve capture quantifier computation Compute quantifiers in a bottom-up manner, which allows more precise results for alternations, where the quantifiers are now precisly joined. --- cli/src/tests/query_test.rs | 195 +++++++++++++++++- lib/binding_rust/bindings.rs | 9 +- lib/binding_rust/lib.rs | 16 +- lib/include/tree_sitter/api.h | 5 +- lib/src/query.c | 359 ++++++++++++++++++++++++++++------ 5 files changed, 506 insertions(+), 78 deletions(-) diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index 1703e610..fcb4179e 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -7,8 +7,8 @@ use lazy_static::lazy_static; use rand::{prelude::StdRng, SeedableRng}; use std::{env, fmt::Write}; use tree_sitter::{ - Language, Node, Parser, Point, Query, QueryCapture, QueryCursor, QueryError, QueryErrorKind, - QueryMatch, QueryPredicate, QueryPredicateArg, QueryProperty, + CaptureQuantifier, Language, Node, Parser, Point, Query, QueryCapture, QueryCursor, QueryError, + QueryErrorKind, QueryMatch, QueryPredicate, QueryPredicateArg, QueryProperty, }; lazy_static! { @@ -3818,6 +3818,197 @@ fn test_query_is_pattern_guaranteed_at_step() { }); } +#[test] +fn test_capture_quantifiers() { + struct Row { + description: &'static str, + language: Language, + pattern: &'static str, + capture_quantifiers: &'static [(&'static str, CaptureQuantifier)], + } + + let rows = &[ + // Simple quantifiers + Row { + description: "Top level capture", + language: get_language("python"), + pattern: r#" + (module) @mod + "#, + capture_quantifiers: &[("mod", CaptureQuantifier::One)], + }, + Row { + description: "Nested list capture capture", + language: get_language("javascript"), + pattern: r#" + (array (_)* @elems) @array + "#, + capture_quantifiers: &[ + ("array", CaptureQuantifier::One), + ("elems", CaptureQuantifier::ZeroOrMore), + ], + }, + Row { + description: "Nested non-empty list capture capture", + language: get_language("javascript"), + pattern: r#" + (array (_)+ @elems) @array + "#, + capture_quantifiers: &[ + ("array", CaptureQuantifier::One), + ("elems", CaptureQuantifier::OneOrMore), + ], + }, + // Nested quantifiers + Row { + description: "capture nested in optional pattern", + language: get_language("javascript"), + pattern: r#" + (array (call_expression (arguments (_) @arg))? @call) @array + "#, + capture_quantifiers: &[ + ("array", CaptureQuantifier::One), + ("call", CaptureQuantifier::ZeroOrOne), + ("arg", CaptureQuantifier::ZeroOrOne), + ], + }, + Row { + description: "optional capture nested in non-empty list pattern", + language: get_language("javascript"), + pattern: r#" + (array (call_expression (arguments (_)? @arg))+ @call) @array + "#, + capture_quantifiers: &[ + ("array", CaptureQuantifier::One), + ("call", CaptureQuantifier::OneOrMore), + ("arg", CaptureQuantifier::ZeroOrMore), + ], + }, + Row { + description: "non-empty list capture nested in optional pattern", + language: get_language("javascript"), + pattern: r#" + (array (call_expression (arguments (_)+ @args))? @call) @array + "#, + capture_quantifiers: &[ + ("array", CaptureQuantifier::One), + ("call", CaptureQuantifier::ZeroOrOne), + ("args", CaptureQuantifier::ZeroOrMore), + ], + }, + // Quantifiers in alternations + Row { + description: "capture is the same in all alternatives", + language: get_language("javascript"), + pattern: r#"[ + (function_declaration name:(identifier) @name) + (call_expression function:(identifier) @name) + ]"#, + capture_quantifiers: &[("name", CaptureQuantifier::One)], + }, + Row { + description: "capture appears in some alternatives", + language: get_language("javascript"), + pattern: r#"[ + (function_declaration name:(identifier) @name) + (function) + ] @fun"#, + capture_quantifiers: &[ + ("fun", CaptureQuantifier::One), + ("name", CaptureQuantifier::ZeroOrOne), + ], + }, + Row { + description: "capture has different quantifiers in alternatives", + language: get_language("javascript"), + pattern: r#"[ + (call_expression arguments:(arguments (_)+ @args)) + (new_expression arguments:(arguments (_)? @args)) + ] @call"#, + capture_quantifiers: &[ + ("call", CaptureQuantifier::One), + ("args", CaptureQuantifier::ZeroOrMore), + ], + }, + // Quantifiers in siblings + Row { + description: "siblings have different captures with different quantifiers", + language: get_language("javascript"), + pattern: r#" + (call_expression (arguments (identifier)? @self (_)* @args)) @call + "#, + capture_quantifiers: &[ + ("call", CaptureQuantifier::One), + ("self", CaptureQuantifier::ZeroOrOne), + ("args", CaptureQuantifier::ZeroOrMore), + ], + }, + Row { + description: "siblings have same capture with different quantifiers", + language: get_language("javascript"), + pattern: r#" + (call_expression (arguments (identifier) @args (_)* @args)) @call + "#, + capture_quantifiers: &[ + ("call", CaptureQuantifier::One), + ("args", CaptureQuantifier::OneOrMore), + ], + }, + // Combined nesting, + Row { + description: "combined nesting, alterantives, and siblings", + language: get_language("javascript"), + pattern: r#" + (array + (call_expression + (arguments [ + (identifier) @self + (_)+ @args + ]) + )+ @call + ) @array + "#, + capture_quantifiers: &[ + ("array", CaptureQuantifier::One), + ("call", CaptureQuantifier::OneOrMore), + ("self", CaptureQuantifier::ZeroOrMore), + ("args", CaptureQuantifier::ZeroOrMore), + ], + }, + ]; + + allocations::record(|| { + eprintln!(""); + + for row in rows.iter() { + if let Some(filter) = EXAMPLE_FILTER.as_ref() { + if !row.description.contains(filter.as_str()) { + continue; + } + } + eprintln!(" query example: {:?}", row.description); + let query = Query::new(row.language, row.pattern).unwrap(); + for (capture, expected_quantifier) in row.capture_quantifiers { + let index = query.capture_index_for_name(capture).unwrap(); + let actual_quantifier = query.capture_quantifiers()[index as usize]; + assert_eq!( + actual_quantifier, + *expected_quantifier, + "Description: {}, Pattern: {:?}, expected quantifier of @{} to be {:?} instead of {:?}", + row.description, + row.pattern + .split_ascii_whitespace() + .collect::>() + .join(" "), + capture, + *expected_quantifier, + actual_quantifier, + ) + } + } + }); +} + fn assert_query_matches( language: Language, query: &Query, diff --git a/lib/binding_rust/bindings.rs b/lib/binding_rust/bindings.rs index 5da90d92..7d5a6b6b 100644 --- a/lib/binding_rust/bindings.rs +++ b/lib/binding_rust/bindings.rs @@ -107,10 +107,11 @@ pub struct TSQueryCapture { pub node: TSNode, pub index: u32, } -pub const TSQuantifier_One: TSQuantifier = 0; -pub const TSQuantifier_OneOrMore: TSQuantifier = 1; -pub const TSQuantifier_ZeroOrOne: TSQuantifier = 2; -pub const TSQuantifier_ZeroOrMore: TSQuantifier = 3; +pub const TSQuantifier_Zero: TSQuantifier = 0; +pub const TSQuantifier_ZeroOrOne: TSQuantifier = 1; +pub const TSQuantifier_ZeroOrMore: TSQuantifier = 2; +pub const TSQuantifier_One: TSQuantifier = 3; +pub const TSQuantifier_OneOrMore: TSQuantifier = 4; pub type TSQuantifier = u32; #[repr(C)] #[derive(Debug, Copy, Clone)] diff --git a/lib/binding_rust/lib.rs b/lib/binding_rust/lib.rs index 952d8864..a66cba2f 100644 --- a/lib/binding_rust/lib.rs +++ b/lib/binding_rust/lib.rs @@ -98,7 +98,7 @@ pub struct TreeCursor<'a>(ffi::TSTreeCursor, PhantomData<&'a ()>); pub struct Query { ptr: NonNull, capture_names: Vec, - capture_quantifiers: Vec, + capture_quantifiers: Vec, text_predicates: Vec>, property_settings: Vec>, property_predicates: Vec>, @@ -107,20 +107,20 @@ pub struct Query { /// A quantifier for captures #[derive(Debug, PartialEq, Eq, Clone, Copy)] -pub enum Quantifier { +pub enum CaptureQuantifier { One, OneOrMore, ZeroOrOne, ZeroOrMore, } -impl From for Quantifier { +impl From for CaptureQuantifier { fn from(value: ffi::TSQuantifier) -> Self { match value { - ffi::TSQuantifier_One => Quantifier::One, - ffi::TSQuantifier_OneOrMore => Quantifier::OneOrMore, - ffi::TSQuantifier_ZeroOrOne => Quantifier::ZeroOrOne, - ffi::TSQuantifier_ZeroOrMore => Quantifier::ZeroOrMore, + ffi::TSQuantifier_One => CaptureQuantifier::One, + ffi::TSQuantifier_OneOrMore => CaptureQuantifier::OneOrMore, + ffi::TSQuantifier_ZeroOrOne => CaptureQuantifier::ZeroOrOne, + ffi::TSQuantifier_ZeroOrMore => CaptureQuantifier::ZeroOrMore, _ => panic!("Unrecognized quantifier: {}", value), } } @@ -1550,7 +1550,7 @@ impl Query { } /// Get the quantifiers of the captures used in the query. - pub fn capture_quantifiers(&self) -> &[Quantifier] { + pub fn capture_quantifiers(&self) -> &[CaptureQuantifier] { &self.capture_quantifiers } diff --git a/lib/include/tree_sitter/api.h b/lib/include/tree_sitter/api.h index b4f77f46..275350c9 100644 --- a/lib/include/tree_sitter/api.h +++ b/lib/include/tree_sitter/api.h @@ -107,10 +107,11 @@ typedef struct { } TSQueryCapture; typedef enum { - One, - OneOrMore, + Zero = 0, // must match the array initialization value ZeroOrOne, ZeroOrMore, + One, + OneOrMore, } TSQuantifier; typedef struct { diff --git a/lib/src/query.c b/lib/src/query.c index 83c6b297..2ea9eb02 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -118,9 +118,13 @@ typedef struct { typedef struct { Array(char) characters; Array(Slice) slices; - Array(TSQuantifier) quantifiers; } SymbolTable; +/** + * CaptureQuantififers - a data structure holding the quantifiers of pattern captures. + */ +typedef Array(TSQuantifier) CaptureQuantifiers; + /* * PatternEntry - Information about the starting point for matching a particular * pattern. These entries are stored in a 'pattern map' - a sorted array that @@ -263,6 +267,7 @@ typedef struct { */ struct TSQuery { SymbolTable captures; + CaptureQuantifiers capture_quantifiers; SymbolTable predicate_values; Array(QueryStep) steps; Array(PatternEntry) pattern_map; @@ -458,31 +463,104 @@ static void capture_list_pool_release(CaptureListPool *self, uint16_t id) { * Quantifiers **************/ +static TSQuantifier quantifier_mul( + TSQuantifier left, + TSQuantifier right +) { + switch (left) + { + case Zero: + return Zero; + case One: + return right; + case OneOrMore: + switch (right) { + case Zero: + return Zero; + case ZeroOrOne: + case ZeroOrMore: + return ZeroOrMore; + case One: + case OneOrMore: + return OneOrMore; + }; + break; + case ZeroOrOne: + switch (right) { + case Zero: + return Zero; + case ZeroOrOne: + case One: + return ZeroOrOne; + case ZeroOrMore: + case OneOrMore: + return ZeroOrMore; + }; + break; + case ZeroOrMore: + switch (right) { + case Zero: + return Zero; + case ZeroOrOne: + case ZeroOrMore: + case One: + case OneOrMore: + return ZeroOrMore; + }; + return ZeroOrMore; + } +} + static TSQuantifier quantifier_join( TSQuantifier left, TSQuantifier right ) { switch (left) { - case One: - return right; - case OneOrMore: + case Zero: switch (right) { + case Zero: + return Zero; + case ZeroOrOne: case One: + return ZeroOrOne; + case ZeroOrMore: + case OneOrMore: + return ZeroOrMore; + }; + break; + case One: + switch (right) { + case Zero: + case ZeroOrOne: + return ZeroOrOne; + case ZeroOrMore: + return ZeroOrMore; + case One: + return One; case OneOrMore: return OneOrMore; + }; + break; + case OneOrMore: + switch (right) { + case Zero: case ZeroOrOne: case ZeroOrMore: return ZeroOrMore; + case One: + case OneOrMore: + return OneOrMore; }; break; case ZeroOrOne: switch (right) { - case One: + case Zero: case ZeroOrOne: + case One: return ZeroOrOne; - case OneOrMore: case ZeroOrMore: + case OneOrMore: return ZeroOrMore; }; break; @@ -491,6 +569,148 @@ static TSQuantifier quantifier_join( } } +static TSQuantifier quantifier_add( + TSQuantifier left, + TSQuantifier right +) { + switch (left) + { + case Zero: + return right; + case One: + switch (right) { + case Zero: + return One; + case ZeroOrOne: + case ZeroOrMore: + case One: + case OneOrMore: + return OneOrMore; + }; + break; + case OneOrMore: + return OneOrMore; + case ZeroOrOne: + switch (right) { + case Zero: + return ZeroOrOne; + case ZeroOrOne: + case ZeroOrMore: + return ZeroOrMore; + case One: + case OneOrMore: + return OneOrMore; + }; + break; + case ZeroOrMore: + switch (right) { + case Zero: + return ZeroOrMore; + case ZeroOrOne: + case ZeroOrMore: + return ZeroOrMore; + case One: + case OneOrMore: + return OneOrMore; + }; + break; + } +} + +// Create new capture quantifiers structure +static CaptureQuantifiers capture_quantifiers_new(void) { + return (CaptureQuantifiers) array_new(); +} + +// Delete capture quantifiers structure +static void capture_quantifiers_delete( + CaptureQuantifiers *self +) { + array_delete(self); +} + +// Clear capture quantifiers structure +static void capture_quantifiers_clear( + CaptureQuantifiers *self +) { + array_clear(self); +} + +// Replace capture quantifiers with the given quantifiers +static void capture_quantifiers_replace( + CaptureQuantifiers *self, + CaptureQuantifiers *quantifiers +) { + array_clear(self); + array_push_all(self, quantifiers); +} + +// Return capture quantifier for the given capture id +static TSQuantifier capture_quantifier_for_id( + const CaptureQuantifiers *self, + uint16_t id +) { + return (self->size <= id) ? Zero : *array_get(self, id); +} + +// Add the given quantifier to the current value for id +static void capture_quantifiers_add_for_id( + CaptureQuantifiers *self, + uint16_t id, + TSQuantifier quantifier +) { + if (self->size <= id) { + array_grow_by(self, id + 1 - self->size); + } + TSQuantifier *own_quantifier = array_get(self, id); + *own_quantifier = quantifier_add(*own_quantifier, quantifier); +} + +// Point-wise add the given quantifiers to the current values +static void capture_quantifiers_add_all( + CaptureQuantifiers *self, + CaptureQuantifiers *quantifiers +) { + if (self->size < quantifiers->size) { + array_grow_by(self, quantifiers->size - self->size); + } + for (uint16_t id = 0; id < quantifiers->size; id++) { + TSQuantifier *quantifier = array_get(quantifiers, id); + TSQuantifier *own_quantifier = array_get(self, id); + *own_quantifier = quantifier_add(*own_quantifier, *quantifier); + } +} + +// Join the given quantifier with the current values +static void capture_quantifiers_mul( + CaptureQuantifiers *self, + TSQuantifier quantifier +) { + for (uint16_t id = 0; id < self->size; id++) { + TSQuantifier *own_quantifier = array_get(self, id); + *own_quantifier = quantifier_mul(*own_quantifier, quantifier); + } +} + +// Point-wise join the quantifiers from a list of alternatives with the current values +static void capture_quantifiers_join_all( + CaptureQuantifiers *self, + CaptureQuantifiers *quantifiers +) { + if (self->size < quantifiers->size) { + array_grow_by(self, quantifiers->size - self->size); + } + for (uint32_t id = 0; id < quantifiers->size; id++) { + TSQuantifier *quantifier = array_get(quantifiers, id); + TSQuantifier *own_quantifier = array_get(self, id); + *own_quantifier = quantifier_join(*own_quantifier, *quantifier); + } + for (uint32_t id = quantifiers->size; id < self->size; id++) { + TSQuantifier *own_quantifier = array_get(self, id); + *own_quantifier = quantifier_join(*own_quantifier, Zero); + } +} + /************** * SymbolTable **************/ @@ -499,14 +719,12 @@ static SymbolTable symbol_table_new(void) { return (SymbolTable) { .characters = array_new(), .slices = array_new(), - .quantifiers = array_new(), }; } static void symbol_table_delete(SymbolTable *self) { array_delete(&self->characters); array_delete(&self->slices); - array_delete(&self->quantifiers); } static int symbol_table_id_for_name( @@ -534,13 +752,6 @@ static const char *symbol_table_name_for_id( return &self->characters.contents[slice.offset]; } -static TSQuantifier symbol_table_quantifier_for_id( - const SymbolTable *self, - uint16_t id -) { - return self->quantifiers.contents[id]; -} - static uint16_t symbol_table_insert_name( SymbolTable *self, const char *name, @@ -556,22 +767,9 @@ static uint16_t symbol_table_insert_name( memcpy(&self->characters.contents[slice.offset], name, length); self->characters.contents[self->characters.size - 1] = 0; array_push(&self->slices, slice); - array_push(&self->quantifiers, One); return self->slices.size - 1; } -static void symbol_table_quantifiers_join( - SymbolTable *self, - TSQuantifier quantifier, - uint32_t start_index, - uint32_t end_index -) { - for (uint32_t index = start_index; index < end_index; index++) { - TSQuantifier *joined_quantifier = &self->quantifiers.contents[index]; - *joined_quantifier = quantifier_join(quantifier, *joined_quantifier); - } -} - /************ * QueryStep ************/ @@ -1740,17 +1938,20 @@ static TSQueryError ts_query__parse_predicate( // Read one S-expression pattern from the stream, and incorporate it into // the query's internal state machine representation. For nested patterns, // this function calls itself recursively. +// +// The caller is repsonsible for passing in a dedicated CaptureQuantifiers. +// These should not be shared between different calls to ts_query__parse_pattern! static TSQueryError ts_query__parse_pattern( TSQuery *self, Stream *stream, uint32_t depth, - bool is_immediate + bool is_immediate, + CaptureQuantifiers *capture_quantifiers ) { if (stream->next == 0) return TSQueryErrorSyntax; if (stream->next == ')' || stream->next == ']') return PARENT_DONE; const uint32_t starting_step_index = self->steps.size; - const uint32_t starting_quantifier_index = self->captures.quantifiers.size; // Store the byte offset of each step in the query. if ( @@ -1770,13 +1971,15 @@ static TSQueryError ts_query__parse_pattern( // Parse each branch, and add a placeholder step in between the branches. Array(uint32_t) branch_step_indices = array_new(); + CaptureQuantifiers branch_capture_quantifiers = capture_quantifiers_new(); for (;;) { uint32_t start_index = self->steps.size; TSQueryError e = ts_query__parse_pattern( self, stream, depth, - is_immediate + is_immediate, + &branch_capture_quantifiers ); if (e == PARENT_DONE) { @@ -1787,12 +1990,20 @@ static TSQueryError ts_query__parse_pattern( e = TSQueryErrorSyntax; } if (e) { + capture_quantifiers_delete(&branch_capture_quantifiers); array_delete(&branch_step_indices); return e; } + if(start_index == 0) { + capture_quantifiers_replace(capture_quantifiers, &branch_capture_quantifiers); + } else { + capture_quantifiers_join_all(capture_quantifiers, &branch_capture_quantifiers); + } + array_push(&branch_step_indices, start_index); array_push(&self->steps, query_step__new(0, depth, false)); + capture_quantifiers_clear(&branch_capture_quantifiers); } (void)array_pop(&self->steps); @@ -1808,16 +2019,7 @@ static TSQueryError ts_query__parse_pattern( end_step->is_dead_end = true; } - if (branch_step_indices.size > 1) { - const uint32_t ending_quantifier_index = self->captures.quantifiers.size; - symbol_table_quantifiers_join( - &self->captures, - ZeroOrOne, - starting_quantifier_index, - ending_quantifier_index - ); - } - + capture_quantifiers_delete(&branch_capture_quantifiers); array_delete(&branch_step_indices); } @@ -1832,6 +2034,7 @@ static TSQueryError ts_query__parse_pattern( // If this parenthesis is followed by a node, then it represents a grouped sequence. if (stream->next == '(' || stream->next == '"' || stream->next == '[') { bool child_is_immediate = false; + CaptureQuantifiers child_capture_quantifiers = capture_quantifiers_new(); for (;;) { if (stream->next == '.') { child_is_immediate = true; @@ -1842,7 +2045,8 @@ static TSQueryError ts_query__parse_pattern( self, stream, depth, - child_is_immediate + child_is_immediate, + &child_capture_quantifiers ); if (e == PARENT_DONE) { if (stream->next == ')') { @@ -1851,10 +2055,17 @@ static TSQueryError ts_query__parse_pattern( } e = TSQueryErrorSyntax; } - if (e) return e; + if (e) { + capture_quantifiers_delete(&child_capture_quantifiers); + return e; + } + + capture_quantifiers_add_all(capture_quantifiers, &child_capture_quantifiers); child_is_immediate = false; + capture_quantifiers_clear(&child_capture_quantifiers); } + capture_quantifiers_delete(&child_capture_quantifiers); } // A dot/pound character indicates the start of a predicate. @@ -1943,12 +2154,16 @@ static TSQueryError ts_query__parse_pattern( uint16_t last_child_step_index = 0; uint16_t negated_field_count = 0; TSFieldId negated_field_ids[MAX_NEGATED_FIELD_COUNT]; + CaptureQuantifiers child_capture_quantifiers = capture_quantifiers_new(); for (;;) { // Parse a negated field assertion if (stream->next == '!') { stream_advance(stream); stream_skip_whitespace(stream); - if (!stream_is_ident_start(stream)) return TSQueryErrorSyntax; + if (!stream_is_ident_start(stream)) { + capture_quantifiers_delete(&child_capture_quantifiers); + return TSQueryErrorSyntax; + } const char *field_name = stream->input; stream_scan_identifier(stream); uint32_t length = stream->input - field_name; @@ -1961,6 +2176,7 @@ static TSQueryError ts_query__parse_pattern( ); if (!field_id) { stream->input = field_name; + capture_quantifiers_delete(&child_capture_quantifiers); return TSQueryErrorField; } @@ -1985,12 +2201,16 @@ static TSQueryError ts_query__parse_pattern( self, stream, depth + 1, - child_is_immediate + child_is_immediate, + &child_capture_quantifiers ); if (e == PARENT_DONE) { if (stream->next == ')') { if (child_is_immediate) { - if (last_child_step_index == 0) return TSQueryErrorSyntax; + if (last_child_step_index == 0) { + capture_quantifiers_delete(&child_capture_quantifiers); + return TSQueryErrorSyntax; + } self->steps.contents[last_child_step_index].is_last_child = true; } @@ -2008,11 +2228,18 @@ static TSQueryError ts_query__parse_pattern( } e = TSQueryErrorSyntax; } - if (e) return e; + if (e) { + capture_quantifiers_delete(&child_capture_quantifiers); + return e; + } + + capture_quantifiers_add_all(capture_quantifiers, &child_capture_quantifiers); last_child_step_index = step_index; child_is_immediate = false; + capture_quantifiers_clear(&child_capture_quantifiers); } + capture_quantifiers_delete(&child_capture_quantifiers); } } @@ -2061,14 +2288,22 @@ static TSQueryError ts_query__parse_pattern( stream_skip_whitespace(stream); // Parse the pattern + CaptureQuantifiers field_capture_quantifiers = capture_quantifiers_new(); TSQueryError e = ts_query__parse_pattern( self, stream, depth, - is_immediate + is_immediate, + &field_capture_quantifiers ); - if (e == PARENT_DONE) return TSQueryErrorSyntax; - if (e) return e; + if (e == PARENT_DONE) { + capture_quantifiers_delete(&field_capture_quantifiers); + return TSQueryErrorSyntax; + } + if (e) { + capture_quantifiers_delete(&field_capture_quantifiers); + return e; + } // Add the field name to the first step of the pattern TSFieldId field_id = ts_language_field_id_for_name( @@ -2096,6 +2331,9 @@ static TSQueryError ts_query__parse_pattern( break; } } + + capture_quantifiers_add_all(capture_quantifiers, &field_capture_quantifiers); + capture_quantifiers_delete(&field_capture_quantifiers); } else { @@ -2175,6 +2413,10 @@ static TSQueryError ts_query__parse_pattern( for (;;) { QueryStep *step = &self->steps.contents[step_index]; query_step__add_capture(step, capture_id); + // Add only once, not for every branch, lest the quantifier will be '+' instead of '1' + if (step_index == starting_step_index) { + capture_quantifiers_add_for_id(capture_quantifiers, capture_id, One); + } if ( step->alternative_index != NONE && step->alternative_index > step_index && @@ -2194,16 +2436,7 @@ static TSQueryError ts_query__parse_pattern( } } - // Patch capture quantifiers - if (quantifier != One) { - const uint32_t ending_quantifier_index = self->captures.quantifiers.size; - symbol_table_quantifiers_join( - &self->captures, - quantifier, - starting_quantifier_index, - ending_quantifier_index - ); - } + capture_quantifiers_mul(capture_quantifiers, quantifier); return 0; } @@ -2229,6 +2462,7 @@ TSQuery *ts_query_new( .steps = array_new(), .pattern_map = array_new(), .captures = symbol_table_new(), + .capture_quantifiers = capture_quantifiers_new(), .predicate_values = symbol_table_new(), .predicate_steps = array_new(), .patterns = array_new(), @@ -2253,7 +2487,7 @@ TSQuery *ts_query_new( .predicate_steps = (Slice) {.offset = start_predicate_step_index}, .start_byte = stream_offset(&stream), })); - *error_type = ts_query__parse_pattern(self, &stream, 0, false); + *error_type = ts_query__parse_pattern(self, &stream, 0, false, &self->capture_quantifiers); array_push(&self->steps, query_step__new(0, PATTERN_DONE_MARKER, false)); QueryPattern *pattern = array_back(&self->patterns); @@ -2344,6 +2578,7 @@ void ts_query_delete(TSQuery *self) { array_delete(&self->negated_fields); symbol_table_delete(&self->captures); symbol_table_delete(&self->predicate_values); + capture_quantifiers_delete(&self->capture_quantifiers); ts_free(self); } } @@ -2372,7 +2607,7 @@ TSQuantifier ts_query_capture_quantifier_for_id( const TSQuery *self, uint32_t index ) { - return symbol_table_quantifier_for_id(&self->captures, index); + return capture_quantifier_for_id(&self->capture_quantifiers, index); } const char *ts_query_string_value_for_id( From 1d513bcf674bef7db8a8973e33ddce2f9cc7d044 Mon Sep 17 00:00:00 2001 From: Hendrik van Antwerpen Date: Tue, 7 Dec 2021 14:35:08 +0100 Subject: [PATCH 04/16] Rewrite quantifier oeprations - Simplify control flow by having a single return at the end of the function. - Follow enum order for case order. --- lib/src/query.c | 176 +++++++++++++++++++++++++++++------------------- 1 file changed, 107 insertions(+), 69 deletions(-) diff --git a/lib/src/query.c b/lib/src/query.c index 2ea9eb02..a8d2436c 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -467,90 +467,82 @@ static TSQuantifier quantifier_mul( TSQuantifier left, TSQuantifier right ) { + TSQuantifier result; switch (left) { case Zero: - return Zero; - case One: - return right; - case OneOrMore: - switch (right) { - case Zero: - return Zero; - case ZeroOrOne: - case ZeroOrMore: - return ZeroOrMore; - case One: - case OneOrMore: - return OneOrMore; - }; + result = Zero; break; case ZeroOrOne: switch (right) { case Zero: - return Zero; + result = Zero; + break; case ZeroOrOne: case One: - return ZeroOrOne; + result = ZeroOrOne; + break; case ZeroOrMore: case OneOrMore: - return ZeroOrMore; + result = ZeroOrMore; + break; }; break; case ZeroOrMore: switch (right) { case Zero: - return Zero; + result = Zero; + break; case ZeroOrOne: case ZeroOrMore: case One: case OneOrMore: - return ZeroOrMore; + result = ZeroOrMore; + break; }; - return ZeroOrMore; + break; + case One: + result = right; + break; + case OneOrMore: + switch (right) { + case Zero: + result = Zero; + break; + case ZeroOrOne: + case ZeroOrMore: + result = ZeroOrMore; + break; + case One: + case OneOrMore: + result = OneOrMore; + break; + }; + break; } + return result; } static TSQuantifier quantifier_join( TSQuantifier left, TSQuantifier right ) { + TSQuantifier result; switch (left) { case Zero: switch (right) { case Zero: - return Zero; + result = Zero; + break; case ZeroOrOne: case One: - return ZeroOrOne; + result = ZeroOrOne; + break; case ZeroOrMore: case OneOrMore: - return ZeroOrMore; - }; - break; - case One: - switch (right) { - case Zero: - case ZeroOrOne: - return ZeroOrOne; - case ZeroOrMore: - return ZeroOrMore; - case One: - return One; - case OneOrMore: - return OneOrMore; - }; - break; - case OneOrMore: - switch (right) { - case Zero: - case ZeroOrOne: - case ZeroOrMore: - return ZeroOrMore; - case One: - case OneOrMore: - return OneOrMore; + result = ZeroOrMore; + break; }; break; case ZeroOrOne: @@ -558,63 +550,109 @@ static TSQuantifier quantifier_join( case Zero: case ZeroOrOne: case One: - return ZeroOrOne; + result = ZeroOrOne; + break; case ZeroOrMore: case OneOrMore: - return ZeroOrMore; + result = ZeroOrMore; + break; }; break; case ZeroOrMore: - return ZeroOrMore; + result = ZeroOrMore; + break; + case One: + switch (right) { + case Zero: + case ZeroOrOne: + result = ZeroOrOne; + break; + case ZeroOrMore: + result = ZeroOrMore; + break; + case One: + result = One; + break; + case OneOrMore: + result = OneOrMore; + break; + }; + break; + case OneOrMore: + switch (right) { + case Zero: + case ZeroOrOne: + case ZeroOrMore: + result = ZeroOrMore; + break; + case One: + case OneOrMore: + result = OneOrMore; + break; + }; + break; } + return result; } static TSQuantifier quantifier_add( TSQuantifier left, TSQuantifier right ) { + TSQuantifier result; switch (left) { case Zero: - return right; - case One: - switch (right) { - case Zero: - return One; - case ZeroOrOne: - case ZeroOrMore: - case One: - case OneOrMore: - return OneOrMore; - }; + result = right; break; - case OneOrMore: - return OneOrMore; case ZeroOrOne: switch (right) { case Zero: - return ZeroOrOne; + result = ZeroOrOne; + break; case ZeroOrOne: case ZeroOrMore: - return ZeroOrMore; + result = ZeroOrMore; + break; case One: case OneOrMore: - return OneOrMore; + result = OneOrMore; + break; }; break; case ZeroOrMore: switch (right) { case Zero: - return ZeroOrMore; + result = ZeroOrMore; + break; case ZeroOrOne: case ZeroOrMore: - return ZeroOrMore; + result = ZeroOrMore; + break; case One: case OneOrMore: - return OneOrMore; + result = OneOrMore; + break; }; break; + case One: + switch (right) { + case Zero: + result = One; + break; + case ZeroOrOne: + case ZeroOrMore: + case One: + case OneOrMore: + result = OneOrMore; + break; + }; + break; + case OneOrMore: + result = OneOrMore; + break; } + return result; } // Create new capture quantifiers structure From a1a241b01328c03b48eb61af61ea4e5bf83bf251 Mon Sep 17 00:00:00 2001 From: Hendrik van Antwerpen Date: Tue, 7 Dec 2021 16:17:22 +0100 Subject: [PATCH 05/16] Expose quantifiers per pattern, instead of merging for all patterns in a query --- cli/src/tests/query_test.rs | 94 +++++++++++++++++++++++------------ lib/binding_rust/bindings.rs | 6 ++- lib/binding_rust/lib.rs | 32 ++++++++---- lib/include/tree_sitter/api.h | 4 +- lib/src/query.c | 23 ++++++--- 5 files changed, 108 insertions(+), 51 deletions(-) diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index fcb4179e..0513f3a1 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -3824,7 +3824,7 @@ fn test_capture_quantifiers() { description: &'static str, language: Language, pattern: &'static str, - capture_quantifiers: &'static [(&'static str, CaptureQuantifier)], + capture_quantifiers: &'static [(usize, &'static str, CaptureQuantifier)], } let rows = &[ @@ -3835,7 +3835,7 @@ fn test_capture_quantifiers() { pattern: r#" (module) @mod "#, - capture_quantifiers: &[("mod", CaptureQuantifier::One)], + capture_quantifiers: &[(0, "mod", CaptureQuantifier::One)], }, Row { description: "Nested list capture capture", @@ -3844,8 +3844,8 @@ fn test_capture_quantifiers() { (array (_)* @elems) @array "#, capture_quantifiers: &[ - ("array", CaptureQuantifier::One), - ("elems", CaptureQuantifier::ZeroOrMore), + (0, "array", CaptureQuantifier::One), + (0, "elems", CaptureQuantifier::ZeroOrMore), ], }, Row { @@ -3855,8 +3855,8 @@ fn test_capture_quantifiers() { (array (_)+ @elems) @array "#, capture_quantifiers: &[ - ("array", CaptureQuantifier::One), - ("elems", CaptureQuantifier::OneOrMore), + (0, "array", CaptureQuantifier::One), + (0, "elems", CaptureQuantifier::OneOrMore), ], }, // Nested quantifiers @@ -3867,9 +3867,9 @@ fn test_capture_quantifiers() { (array (call_expression (arguments (_) @arg))? @call) @array "#, capture_quantifiers: &[ - ("array", CaptureQuantifier::One), - ("call", CaptureQuantifier::ZeroOrOne), - ("arg", CaptureQuantifier::ZeroOrOne), + (0, "array", CaptureQuantifier::One), + (0, "call", CaptureQuantifier::ZeroOrOne), + (0, "arg", CaptureQuantifier::ZeroOrOne), ], }, Row { @@ -3879,9 +3879,9 @@ fn test_capture_quantifiers() { (array (call_expression (arguments (_)? @arg))+ @call) @array "#, capture_quantifiers: &[ - ("array", CaptureQuantifier::One), - ("call", CaptureQuantifier::OneOrMore), - ("arg", CaptureQuantifier::ZeroOrMore), + (0, "array", CaptureQuantifier::One), + (0, "call", CaptureQuantifier::OneOrMore), + (0, "arg", CaptureQuantifier::ZeroOrMore), ], }, Row { @@ -3891,9 +3891,9 @@ fn test_capture_quantifiers() { (array (call_expression (arguments (_)+ @args))? @call) @array "#, capture_quantifiers: &[ - ("array", CaptureQuantifier::One), - ("call", CaptureQuantifier::ZeroOrOne), - ("args", CaptureQuantifier::ZeroOrMore), + (0, "array", CaptureQuantifier::One), + (0, "call", CaptureQuantifier::ZeroOrOne), + (0, "args", CaptureQuantifier::ZeroOrMore), ], }, // Quantifiers in alternations @@ -3904,7 +3904,7 @@ fn test_capture_quantifiers() { (function_declaration name:(identifier) @name) (call_expression function:(identifier) @name) ]"#, - capture_quantifiers: &[("name", CaptureQuantifier::One)], + capture_quantifiers: &[(0, "name", CaptureQuantifier::One)], }, Row { description: "capture appears in some alternatives", @@ -3914,8 +3914,8 @@ fn test_capture_quantifiers() { (function) ] @fun"#, capture_quantifiers: &[ - ("fun", CaptureQuantifier::One), - ("name", CaptureQuantifier::ZeroOrOne), + (0, "fun", CaptureQuantifier::One), + (0, "name", CaptureQuantifier::ZeroOrOne), ], }, Row { @@ -3926,8 +3926,8 @@ fn test_capture_quantifiers() { (new_expression arguments:(arguments (_)? @args)) ] @call"#, capture_quantifiers: &[ - ("call", CaptureQuantifier::One), - ("args", CaptureQuantifier::ZeroOrMore), + (0, "call", CaptureQuantifier::One), + (0, "args", CaptureQuantifier::ZeroOrMore), ], }, // Quantifiers in siblings @@ -3938,9 +3938,9 @@ fn test_capture_quantifiers() { (call_expression (arguments (identifier)? @self (_)* @args)) @call "#, capture_quantifiers: &[ - ("call", CaptureQuantifier::One), - ("self", CaptureQuantifier::ZeroOrOne), - ("args", CaptureQuantifier::ZeroOrMore), + (0, "call", CaptureQuantifier::One), + (0, "self", CaptureQuantifier::ZeroOrOne), + (0, "args", CaptureQuantifier::ZeroOrMore), ], }, Row { @@ -3950,13 +3950,13 @@ fn test_capture_quantifiers() { (call_expression (arguments (identifier) @args (_)* @args)) @call "#, capture_quantifiers: &[ - ("call", CaptureQuantifier::One), - ("args", CaptureQuantifier::OneOrMore), + (0, "call", CaptureQuantifier::One), + (0, "args", CaptureQuantifier::OneOrMore), ], }, - // Combined nesting, + // Combined scenarios Row { - description: "combined nesting, alterantives, and siblings", + description: "combined nesting, alternatives, and siblings", language: get_language("javascript"), pattern: r#" (array @@ -3969,10 +3969,38 @@ fn test_capture_quantifiers() { ) @array "#, capture_quantifiers: &[ - ("array", CaptureQuantifier::One), - ("call", CaptureQuantifier::OneOrMore), - ("self", CaptureQuantifier::ZeroOrMore), - ("args", CaptureQuantifier::ZeroOrMore), + (0, "array", CaptureQuantifier::One), + (0, "call", CaptureQuantifier::OneOrMore), + (0, "self", CaptureQuantifier::ZeroOrMore), + (0, "args", CaptureQuantifier::ZeroOrMore), + ], + }, + // Multiple patterns + Row { + description: "multiple patterns", + language: get_language("javascript"), + pattern: r#" + (function_declaration name: (identifier) @x) + (statement_identifier) @y + (property_identifier)+ @z + (array (identifier)* @x) + "#, + capture_quantifiers: &[ + // x + (0, "x", CaptureQuantifier::One), + (1, "x", CaptureQuantifier::Zero), + (2, "x", CaptureQuantifier::Zero), + (3, "x", CaptureQuantifier::ZeroOrMore), + // y + (0, "y", CaptureQuantifier::Zero), + (1, "y", CaptureQuantifier::One), + (2, "y", CaptureQuantifier::Zero), + (3, "y", CaptureQuantifier::Zero), + // z + (0, "z", CaptureQuantifier::Zero), + (1, "z", CaptureQuantifier::Zero), + (2, "z", CaptureQuantifier::OneOrMore), + (3, "z", CaptureQuantifier::Zero), ], }, ]; @@ -3988,9 +4016,9 @@ fn test_capture_quantifiers() { } eprintln!(" query example: {:?}", row.description); let query = Query::new(row.language, row.pattern).unwrap(); - for (capture, expected_quantifier) in row.capture_quantifiers { + for (pattern, capture, expected_quantifier) in row.capture_quantifiers { let index = query.capture_index_for_name(capture).unwrap(); - let actual_quantifier = query.capture_quantifiers()[index as usize]; + let actual_quantifier = query.capture_quantifiers(*pattern)[index as usize]; assert_eq!( actual_quantifier, *expected_quantifier, diff --git a/lib/binding_rust/bindings.rs b/lib/binding_rust/bindings.rs index 7d5a6b6b..686fec06 100644 --- a/lib/binding_rust/bindings.rs +++ b/lib/binding_rust/bindings.rs @@ -675,7 +675,11 @@ extern "C" { #[doc = " Get the quantifier of the query's captures, or one of the query's string"] #[doc = " literals. Each capture and string is associated with a numeric id based"] #[doc = " on the order that it appeared in the query's source."] - pub fn ts_query_capture_quantifier_for_id(arg1: *const TSQuery, id: u32) -> TSQuantifier; + pub fn ts_query_capture_quantifier_for_id( + arg1: *const TSQuery, + pattern_id: u32, + capture_id: u32, + ) -> TSQuantifier; } extern "C" { pub fn ts_query_string_value_for_id( diff --git a/lib/binding_rust/lib.rs b/lib/binding_rust/lib.rs index a66cba2f..cada66c6 100644 --- a/lib/binding_rust/lib.rs +++ b/lib/binding_rust/lib.rs @@ -98,7 +98,7 @@ pub struct TreeCursor<'a>(ffi::TSTreeCursor, PhantomData<&'a ()>); pub struct Query { ptr: NonNull, capture_names: Vec, - capture_quantifiers: Vec, + capture_quantifiers: Vec>, text_predicates: Vec>, property_settings: Vec>, property_predicates: Vec>, @@ -108,19 +108,21 @@ pub struct Query { /// A quantifier for captures #[derive(Debug, PartialEq, Eq, Clone, Copy)] pub enum CaptureQuantifier { - One, - OneOrMore, + Zero, ZeroOrOne, ZeroOrMore, + One, + OneOrMore, } impl From for CaptureQuantifier { fn from(value: ffi::TSQuantifier) -> Self { match value { - ffi::TSQuantifier_One => CaptureQuantifier::One, - ffi::TSQuantifier_OneOrMore => CaptureQuantifier::OneOrMore, + ffi::TSQuantifier_Zero => CaptureQuantifier::Zero, ffi::TSQuantifier_ZeroOrOne => CaptureQuantifier::ZeroOrOne, ffi::TSQuantifier_ZeroOrMore => CaptureQuantifier::ZeroOrMore, + ffi::TSQuantifier_One => CaptureQuantifier::One, + ffi::TSQuantifier_OneOrMore => CaptureQuantifier::OneOrMore, _ => panic!("Unrecognized quantifier: {}", value), } } @@ -1328,7 +1330,7 @@ impl Query { let mut result = Query { ptr: unsafe { NonNull::new_unchecked(ptr) }, capture_names: Vec::with_capacity(capture_count as usize), - capture_quantifiers: Vec::with_capacity(capture_count as usize), + capture_quantifiers: Vec::with_capacity(pattern_count as usize), text_predicates: Vec::with_capacity(pattern_count), property_predicates: Vec::with_capacity(pattern_count), property_settings: Vec::with_capacity(pattern_count), @@ -1344,11 +1346,21 @@ impl Query { let name = slice::from_raw_parts(name, length as usize); let name = str::from_utf8_unchecked(name); result.capture_names.push(name.to_string()); - let quantifier = ffi::ts_query_capture_quantifier_for_id(ptr, i); - result.capture_quantifiers.push(quantifier.into()); } } + // Build + for i in 0..pattern_count { + let mut capture_quantifiers = Vec::with_capacity(capture_count as usize); + for j in 0..capture_count { + unsafe { + let quantifier = ffi::ts_query_capture_quantifier_for_id(ptr, i as u32, j); + capture_quantifiers.push(quantifier.into()); + } + } + result.capture_quantifiers.push(capture_quantifiers); + } + // Build a vector of strings to represent literal values used in predicates. let string_values = (0..string_count) .map(|i| unsafe { @@ -1550,8 +1562,8 @@ impl Query { } /// Get the quantifiers of the captures used in the query. - pub fn capture_quantifiers(&self) -> &[CaptureQuantifier] { - &self.capture_quantifiers + pub fn capture_quantifiers(&self, index: usize) -> &[CaptureQuantifier] { + &self.capture_quantifiers[index] } /// Get the index for a given capture name. diff --git a/lib/include/tree_sitter/api.h b/lib/include/tree_sitter/api.h index 275350c9..0f68530f 100644 --- a/lib/include/tree_sitter/api.h +++ b/lib/include/tree_sitter/api.h @@ -756,8 +756,10 @@ const char *ts_query_capture_name_for_id( */ TSQuantifier ts_query_capture_quantifier_for_id( const TSQuery *, - uint32_t id + uint32_t pattern_id, + uint32_t capture_id ); + const char *ts_query_string_value_for_id( const TSQuery *, uint32_t id, diff --git a/lib/src/query.c b/lib/src/query.c index a8d2436c..14ab91e4 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -267,7 +267,7 @@ typedef struct { */ struct TSQuery { SymbolTable captures; - CaptureQuantifiers capture_quantifiers; + Array(CaptureQuantifiers) capture_quantifiers; SymbolTable predicate_values; Array(QueryStep) steps; Array(PatternEntry) pattern_map; @@ -2500,7 +2500,7 @@ TSQuery *ts_query_new( .steps = array_new(), .pattern_map = array_new(), .captures = symbol_table_new(), - .capture_quantifiers = capture_quantifiers_new(), + .capture_quantifiers = array_new(), .predicate_values = symbol_table_new(), .predicate_steps = array_new(), .patterns = array_new(), @@ -2525,7 +2525,8 @@ TSQuery *ts_query_new( .predicate_steps = (Slice) {.offset = start_predicate_step_index}, .start_byte = stream_offset(&stream), })); - *error_type = ts_query__parse_pattern(self, &stream, 0, false, &self->capture_quantifiers); + CaptureQuantifiers capture_quantifiers = capture_quantifiers_new(); + *error_type = ts_query__parse_pattern(self, &stream, 0, false, &capture_quantifiers); array_push(&self->steps, query_step__new(0, PATTERN_DONE_MARKER, false)); QueryPattern *pattern = array_back(&self->patterns); @@ -2537,10 +2538,14 @@ TSQuery *ts_query_new( if (*error_type) { if (*error_type == PARENT_DONE) *error_type = TSQueryErrorSyntax; *error_offset = stream_offset(&stream); + capture_quantifiers_delete(&capture_quantifiers); ts_query_delete(self); return NULL; } + // Maintain a list of capture quantifiers for each pattern + array_push(&self->capture_quantifiers, capture_quantifiers); + // Maintain a map that can look up patterns for a given root symbol. uint16_t wildcard_root_alternative_index = NONE; for (;;) { @@ -2616,7 +2621,11 @@ void ts_query_delete(TSQuery *self) { array_delete(&self->negated_fields); symbol_table_delete(&self->captures); symbol_table_delete(&self->predicate_values); - capture_quantifiers_delete(&self->capture_quantifiers); + for (uint32_t index = 0; index < self->capture_quantifiers.size; index++) { + CaptureQuantifiers *capture_quantifiers = array_get(&self->capture_quantifiers, index); + capture_quantifiers_delete(capture_quantifiers); + } + array_delete(&self->capture_quantifiers); ts_free(self); } } @@ -2643,9 +2652,11 @@ const char *ts_query_capture_name_for_id( TSQuantifier ts_query_capture_quantifier_for_id( const TSQuery *self, - uint32_t index + uint32_t pattern_index, + uint32_t capture_index ) { - return capture_quantifier_for_id(&self->capture_quantifiers, index); + CaptureQuantifiers *capture_quantifiers = array_get(&self->capture_quantifiers, pattern_index); + return capture_quantifier_for_id(capture_quantifiers, capture_index); } const char *ts_query_string_value_for_id( From ae2ac3c0dbb5d32ebbcb267f6c47662d66ea410e Mon Sep 17 00:00:00 2001 From: Hendrik van Antwerpen Date: Tue, 7 Dec 2021 18:30:37 +0100 Subject: [PATCH 06/16] Initialize variable to silence compiler warnings --- lib/src/query.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/src/query.c b/lib/src/query.c index 14ab91e4..8f56918a 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -467,7 +467,7 @@ static TSQuantifier quantifier_mul( TSQuantifier left, TSQuantifier right ) { - TSQuantifier result; + TSQuantifier result = Zero; // initialized to make compiler happy, but all cases should be covered below! switch (left) { case Zero: @@ -527,7 +527,7 @@ static TSQuantifier quantifier_join( TSQuantifier left, TSQuantifier right ) { - TSQuantifier result; + TSQuantifier result = Zero; // initialized to make compiler happy, but all cases should be covered below! switch (left) { case Zero: @@ -599,7 +599,7 @@ static TSQuantifier quantifier_add( TSQuantifier left, TSQuantifier right ) { - TSQuantifier result; + TSQuantifier result = Zero; // initialized to make compiler happy, but all cases should be covered below! switch (left) { case Zero: From 36f2440369a4e74b7e780f15a5e0314c250e57a1 Mon Sep 17 00:00:00 2001 From: Hendrik van Antwerpen Date: Tue, 11 Jan 2022 17:16:47 +0100 Subject: [PATCH 07/16] Complete comment --- lib/binding_rust/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/binding_rust/lib.rs b/lib/binding_rust/lib.rs index cada66c6..a1199470 100644 --- a/lib/binding_rust/lib.rs +++ b/lib/binding_rust/lib.rs @@ -1349,7 +1349,7 @@ impl Query { } } - // Build + // Build a vector to store capture qunatifiers. for i in 0..pattern_count { let mut capture_quantifiers = Vec::with_capacity(capture_count as usize); for j in 0..capture_count { From e338726cde5ec3fddf20213ebc63e0f624e2e75e Mon Sep 17 00:00:00 2001 From: Hendrik van Antwerpen Date: Tue, 11 Jan 2022 17:44:50 +0100 Subject: [PATCH 08/16] Prefix globally visible TSquantifier values --- lib/binding_rust/bindings.rs | 12 +- lib/binding_rust/lib.rs | 10 +- lib/include/tree_sitter/api.h | 10 +- lib/src/query.c | 210 +++++++++++++++++----------------- 4 files changed, 121 insertions(+), 121 deletions(-) diff --git a/lib/binding_rust/bindings.rs b/lib/binding_rust/bindings.rs index 686fec06..bd4e1501 100644 --- a/lib/binding_rust/bindings.rs +++ b/lib/binding_rust/bindings.rs @@ -107,12 +107,12 @@ pub struct TSQueryCapture { pub node: TSNode, pub index: u32, } -pub const TSQuantifier_Zero: TSQuantifier = 0; -pub const TSQuantifier_ZeroOrOne: TSQuantifier = 1; -pub const TSQuantifier_ZeroOrMore: TSQuantifier = 2; -pub const TSQuantifier_One: TSQuantifier = 3; -pub const TSQuantifier_OneOrMore: TSQuantifier = 4; -pub type TSQuantifier = u32; +pub const TSQuantifier_TSQuantifierZero: TSQuantifier = 0; +pub const TSQuantifier_TSQuantifierZeroOrOne: TSQuantifier = 1; +pub const TSQuantifier_TSQuantifierZeroOrMore: TSQuantifier = 2; +pub const TSQuantifier_TSQuantifierOne: TSQuantifier = 3; +pub const TSQuantifier_TSQuantifierOneOrMore: TSQuantifier = 4; +pub type TSQuantifier = ::std::os::raw::c_uint; #[repr(C)] #[derive(Debug, Copy, Clone)] pub struct TSQueryMatch { diff --git a/lib/binding_rust/lib.rs b/lib/binding_rust/lib.rs index a1199470..e88a411c 100644 --- a/lib/binding_rust/lib.rs +++ b/lib/binding_rust/lib.rs @@ -118,11 +118,11 @@ pub enum CaptureQuantifier { impl From for CaptureQuantifier { fn from(value: ffi::TSQuantifier) -> Self { match value { - ffi::TSQuantifier_Zero => CaptureQuantifier::Zero, - ffi::TSQuantifier_ZeroOrOne => CaptureQuantifier::ZeroOrOne, - ffi::TSQuantifier_ZeroOrMore => CaptureQuantifier::ZeroOrMore, - ffi::TSQuantifier_One => CaptureQuantifier::One, - ffi::TSQuantifier_OneOrMore => CaptureQuantifier::OneOrMore, + ffi::TSQuantifier_TSQuantifierZero => CaptureQuantifier::Zero, + ffi::TSQuantifier_TSQuantifierZeroOrOne => CaptureQuantifier::ZeroOrOne, + ffi::TSQuantifier_TSQuantifierZeroOrMore => CaptureQuantifier::ZeroOrMore, + ffi::TSQuantifier_TSQuantifierOne => CaptureQuantifier::One, + ffi::TSQuantifier_TSQuantifierOneOrMore => CaptureQuantifier::OneOrMore, _ => panic!("Unrecognized quantifier: {}", value), } } diff --git a/lib/include/tree_sitter/api.h b/lib/include/tree_sitter/api.h index 0f68530f..4a150511 100644 --- a/lib/include/tree_sitter/api.h +++ b/lib/include/tree_sitter/api.h @@ -107,11 +107,11 @@ typedef struct { } TSQueryCapture; typedef enum { - Zero = 0, // must match the array initialization value - ZeroOrOne, - ZeroOrMore, - One, - OneOrMore, + TSQuantifierZero = 0, // must match the array initialization value + TSQuantifierZeroOrOne, + TSQuantifierZeroOrMore, + TSQuantifierOne, + TSQuantifierOneOrMore, } TSQuantifier; typedef struct { diff --git a/lib/src/query.c b/lib/src/query.c index 8f56918a..f35ebd23 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -467,55 +467,55 @@ static TSQuantifier quantifier_mul( TSQuantifier left, TSQuantifier right ) { - TSQuantifier result = Zero; // initialized to make compiler happy, but all cases should be covered below! + TSQuantifier result = TSQuantifierZero; // initialized to make compiler happy, but all cases should be covered below! switch (left) { - case Zero: - result = Zero; + case TSQuantifierZero: + result = TSQuantifierZero; break; - case ZeroOrOne: + case TSQuantifierZeroOrOne: switch (right) { - case Zero: - result = Zero; + case TSQuantifierZero: + result = TSQuantifierZero; break; - case ZeroOrOne: - case One: - result = ZeroOrOne; + case TSQuantifierZeroOrOne: + case TSQuantifierOne: + result = TSQuantifierZeroOrOne; break; - case ZeroOrMore: - case OneOrMore: - result = ZeroOrMore; + case TSQuantifierZeroOrMore: + case TSQuantifierOneOrMore: + result = TSQuantifierZeroOrMore; break; }; break; - case ZeroOrMore: + case TSQuantifierZeroOrMore: switch (right) { - case Zero: - result = Zero; + case TSQuantifierZero: + result = TSQuantifierZero; break; - case ZeroOrOne: - case ZeroOrMore: - case One: - case OneOrMore: - result = ZeroOrMore; + case TSQuantifierZeroOrOne: + case TSQuantifierZeroOrMore: + case TSQuantifierOne: + case TSQuantifierOneOrMore: + result = TSQuantifierZeroOrMore; break; }; break; - case One: + case TSQuantifierOne: result = right; break; - case OneOrMore: + case TSQuantifierOneOrMore: switch (right) { - case Zero: - result = Zero; + case TSQuantifierZero: + result = TSQuantifierZero; break; - case ZeroOrOne: - case ZeroOrMore: - result = ZeroOrMore; + case TSQuantifierZeroOrOne: + case TSQuantifierZeroOrMore: + result = TSQuantifierZeroOrMore; break; - case One: - case OneOrMore: - result = OneOrMore; + case TSQuantifierOne: + case TSQuantifierOneOrMore: + result = TSQuantifierOneOrMore; break; }; break; @@ -527,67 +527,67 @@ static TSQuantifier quantifier_join( TSQuantifier left, TSQuantifier right ) { - TSQuantifier result = Zero; // initialized to make compiler happy, but all cases should be covered below! + TSQuantifier result = TSQuantifierZero; // initialized to make compiler happy, but all cases should be covered below! switch (left) { - case Zero: + case TSQuantifierZero: switch (right) { - case Zero: - result = Zero; + case TSQuantifierZero: + result = TSQuantifierZero; break; - case ZeroOrOne: - case One: - result = ZeroOrOne; + case TSQuantifierZeroOrOne: + case TSQuantifierOne: + result = TSQuantifierZeroOrOne; break; - case ZeroOrMore: - case OneOrMore: - result = ZeroOrMore; + case TSQuantifierZeroOrMore: + case TSQuantifierOneOrMore: + result = TSQuantifierZeroOrMore; break; }; break; - case ZeroOrOne: + case TSQuantifierZeroOrOne: switch (right) { - case Zero: - case ZeroOrOne: - case One: - result = ZeroOrOne; + case TSQuantifierZero: + case TSQuantifierZeroOrOne: + case TSQuantifierOne: + result = TSQuantifierZeroOrOne; break; - case ZeroOrMore: - case OneOrMore: - result = ZeroOrMore; + case TSQuantifierZeroOrMore: + case TSQuantifierOneOrMore: + result = TSQuantifierZeroOrMore; break; }; break; - case ZeroOrMore: - result = ZeroOrMore; + case TSQuantifierZeroOrMore: + result = TSQuantifierZeroOrMore; break; - case One: + case TSQuantifierOne: switch (right) { - case Zero: - case ZeroOrOne: - result = ZeroOrOne; + case TSQuantifierZero: + case TSQuantifierZeroOrOne: + result = TSQuantifierZeroOrOne; break; - case ZeroOrMore: - result = ZeroOrMore; + case TSQuantifierZeroOrMore: + result = TSQuantifierZeroOrMore; break; - case One: - result = One; + case TSQuantifierOne: + result = TSQuantifierOne; break; - case OneOrMore: - result = OneOrMore; + case TSQuantifierOneOrMore: + result = TSQuantifierOneOrMore; break; }; break; - case OneOrMore: + case TSQuantifierOneOrMore: switch (right) { - case Zero: - case ZeroOrOne: - case ZeroOrMore: - result = ZeroOrMore; + case TSQuantifierZero: + case TSQuantifierZeroOrOne: + case TSQuantifierZeroOrMore: + result = TSQuantifierZeroOrMore; break; - case One: - case OneOrMore: - result = OneOrMore; + case TSQuantifierOne: + case TSQuantifierOneOrMore: + result = TSQuantifierOneOrMore; break; }; break; @@ -599,57 +599,57 @@ static TSQuantifier quantifier_add( TSQuantifier left, TSQuantifier right ) { - TSQuantifier result = Zero; // initialized to make compiler happy, but all cases should be covered below! + TSQuantifier result = TSQuantifierZero; // initialized to make compiler happy, but all cases should be covered below! switch (left) { - case Zero: + case TSQuantifierZero: result = right; break; - case ZeroOrOne: + case TSQuantifierZeroOrOne: switch (right) { - case Zero: - result = ZeroOrOne; + case TSQuantifierZero: + result = TSQuantifierZeroOrOne; break; - case ZeroOrOne: - case ZeroOrMore: - result = ZeroOrMore; + case TSQuantifierZeroOrOne: + case TSQuantifierZeroOrMore: + result = TSQuantifierZeroOrMore; break; - case One: - case OneOrMore: - result = OneOrMore; + case TSQuantifierOne: + case TSQuantifierOneOrMore: + result = TSQuantifierOneOrMore; break; }; break; - case ZeroOrMore: + case TSQuantifierZeroOrMore: switch (right) { - case Zero: - result = ZeroOrMore; + case TSQuantifierZero: + result = TSQuantifierZeroOrMore; break; - case ZeroOrOne: - case ZeroOrMore: - result = ZeroOrMore; + case TSQuantifierZeroOrOne: + case TSQuantifierZeroOrMore: + result = TSQuantifierZeroOrMore; break; - case One: - case OneOrMore: - result = OneOrMore; + case TSQuantifierOne: + case TSQuantifierOneOrMore: + result = TSQuantifierOneOrMore; break; }; break; - case One: + case TSQuantifierOne: switch (right) { - case Zero: - result = One; + case TSQuantifierZero: + result = TSQuantifierOne; break; - case ZeroOrOne: - case ZeroOrMore: - case One: - case OneOrMore: - result = OneOrMore; + case TSQuantifierZeroOrOne: + case TSQuantifierZeroOrMore: + case TSQuantifierOne: + case TSQuantifierOneOrMore: + result = TSQuantifierOneOrMore; break; }; break; - case OneOrMore: - result = OneOrMore; + case TSQuantifierOneOrMore: + result = TSQuantifierOneOrMore; break; } return result; @@ -688,7 +688,7 @@ static TSQuantifier capture_quantifier_for_id( const CaptureQuantifiers *self, uint16_t id ) { - return (self->size <= id) ? Zero : *array_get(self, id); + return (self->size <= id) ? TSQuantifierZero : *array_get(self, id); } // Add the given quantifier to the current value for id @@ -745,7 +745,7 @@ static void capture_quantifiers_join_all( } for (uint32_t id = quantifiers->size; id < self->size; id++) { TSQuantifier *own_quantifier = array_get(self, id); - *own_quantifier = quantifier_join(*own_quantifier, Zero); + *own_quantifier = quantifier_join(*own_quantifier, TSQuantifierZero); } } @@ -2381,11 +2381,11 @@ static TSQueryError ts_query__parse_pattern( stream_skip_whitespace(stream); // Parse suffixes modifiers for this pattern - TSQuantifier quantifier = One; + TSQuantifier quantifier = TSQuantifierOne; for (;;) { // Parse the one-or-more operator. if (stream->next == '+') { - quantifier = quantifier_join(OneOrMore, quantifier); + quantifier = quantifier_join(TSQuantifierOneOrMore, quantifier); stream_advance(stream); stream_skip_whitespace(stream); @@ -2399,7 +2399,7 @@ static TSQueryError ts_query__parse_pattern( // Parse the zero-or-more repetition operator. else if (stream->next == '*') { - quantifier = quantifier_join(ZeroOrMore, quantifier); + quantifier = quantifier_join(TSQuantifierZeroOrMore, quantifier); stream_advance(stream); stream_skip_whitespace(stream); @@ -2419,7 +2419,7 @@ static TSQueryError ts_query__parse_pattern( // Parse the optional operator. else if (stream->next == '?') { - quantifier = quantifier_join(ZeroOrOne, quantifier); + quantifier = quantifier_join(TSQuantifierZeroOrOne, quantifier); stream_advance(stream); stream_skip_whitespace(stream); @@ -2453,7 +2453,7 @@ static TSQueryError ts_query__parse_pattern( query_step__add_capture(step, capture_id); // Add only once, not for every branch, lest the quantifier will be '+' instead of '1' if (step_index == starting_step_index) { - capture_quantifiers_add_for_id(capture_quantifiers, capture_id, One); + capture_quantifiers_add_for_id(capture_quantifiers, capture_id, TSQuantifierOne); } if ( step->alternative_index != NONE && From 8b28f3a8c4507c366d40a81e586f17dba4eebdd1 Mon Sep 17 00:00:00 2001 From: Hendrik van Antwerpen Date: Tue, 11 Jan 2022 17:50:42 +0100 Subject: [PATCH 09/16] Shorten quantifier operations by using early returns --- lib/src/query.c | 113 +++++++++++++++--------------------------------- 1 file changed, 35 insertions(+), 78 deletions(-) diff --git a/lib/src/query.c b/lib/src/query.c index f35ebd23..7d2bd8cf 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -467,192 +467,149 @@ static TSQuantifier quantifier_mul( TSQuantifier left, TSQuantifier right ) { - TSQuantifier result = TSQuantifierZero; // initialized to make compiler happy, but all cases should be covered below! switch (left) { case TSQuantifierZero: - result = TSQuantifierZero; - break; + return TSQuantifierZero; case TSQuantifierZeroOrOne: switch (right) { case TSQuantifierZero: - result = TSQuantifierZero; - break; + return TSQuantifierZero; case TSQuantifierZeroOrOne: case TSQuantifierOne: - result = TSQuantifierZeroOrOne; - break; + return TSQuantifierZeroOrOne; case TSQuantifierZeroOrMore: case TSQuantifierOneOrMore: - result = TSQuantifierZeroOrMore; - break; + return TSQuantifierZeroOrMore; }; - break; case TSQuantifierZeroOrMore: switch (right) { case TSQuantifierZero: - result = TSQuantifierZero; - break; + return TSQuantifierZero; case TSQuantifierZeroOrOne: case TSQuantifierZeroOrMore: case TSQuantifierOne: case TSQuantifierOneOrMore: - result = TSQuantifierZeroOrMore; - break; + return TSQuantifierZeroOrMore; }; - break; case TSQuantifierOne: - result = right; - break; + return right; case TSQuantifierOneOrMore: switch (right) { case TSQuantifierZero: - result = TSQuantifierZero; - break; + return TSQuantifierZero; case TSQuantifierZeroOrOne: case TSQuantifierZeroOrMore: - result = TSQuantifierZeroOrMore; - break; + return TSQuantifierZeroOrMore; case TSQuantifierOne: case TSQuantifierOneOrMore: - result = TSQuantifierOneOrMore; - break; + return TSQuantifierOneOrMore; }; - break; } - return result; + return TSQuantifierZero; // to make compiler happy, but all cases should be covered above! } static TSQuantifier quantifier_join( TSQuantifier left, TSQuantifier right ) { - TSQuantifier result = TSQuantifierZero; // initialized to make compiler happy, but all cases should be covered below! switch (left) { case TSQuantifierZero: switch (right) { case TSQuantifierZero: - result = TSQuantifierZero; - break; + return TSQuantifierZero; case TSQuantifierZeroOrOne: case TSQuantifierOne: - result = TSQuantifierZeroOrOne; - break; + return TSQuantifierZeroOrOne; case TSQuantifierZeroOrMore: case TSQuantifierOneOrMore: - result = TSQuantifierZeroOrMore; - break; + return TSQuantifierZeroOrMore; }; - break; case TSQuantifierZeroOrOne: switch (right) { case TSQuantifierZero: case TSQuantifierZeroOrOne: case TSQuantifierOne: - result = TSQuantifierZeroOrOne; + return TSQuantifierZeroOrOne; break; case TSQuantifierZeroOrMore: case TSQuantifierOneOrMore: - result = TSQuantifierZeroOrMore; + return TSQuantifierZeroOrMore; break; }; - break; case TSQuantifierZeroOrMore: - result = TSQuantifierZeroOrMore; - break; + return TSQuantifierZeroOrMore; case TSQuantifierOne: switch (right) { case TSQuantifierZero: case TSQuantifierZeroOrOne: - result = TSQuantifierZeroOrOne; - break; + return TSQuantifierZeroOrOne; case TSQuantifierZeroOrMore: - result = TSQuantifierZeroOrMore; - break; + return TSQuantifierZeroOrMore; case TSQuantifierOne: - result = TSQuantifierOne; - break; + return TSQuantifierOne; case TSQuantifierOneOrMore: - result = TSQuantifierOneOrMore; - break; + return TSQuantifierOneOrMore; }; - break; case TSQuantifierOneOrMore: switch (right) { case TSQuantifierZero: case TSQuantifierZeroOrOne: case TSQuantifierZeroOrMore: - result = TSQuantifierZeroOrMore; - break; + return TSQuantifierZeroOrMore; case TSQuantifierOne: case TSQuantifierOneOrMore: - result = TSQuantifierOneOrMore; - break; + return TSQuantifierOneOrMore; }; - break; } - return result; + return TSQuantifierZero; // to make compiler happy, but all cases should be covered above! } static TSQuantifier quantifier_add( TSQuantifier left, TSQuantifier right ) { - TSQuantifier result = TSQuantifierZero; // initialized to make compiler happy, but all cases should be covered below! switch (left) { case TSQuantifierZero: - result = right; - break; + return right; case TSQuantifierZeroOrOne: switch (right) { case TSQuantifierZero: - result = TSQuantifierZeroOrOne; - break; + return TSQuantifierZeroOrOne; case TSQuantifierZeroOrOne: case TSQuantifierZeroOrMore: - result = TSQuantifierZeroOrMore; - break; + return TSQuantifierZeroOrMore; case TSQuantifierOne: case TSQuantifierOneOrMore: - result = TSQuantifierOneOrMore; - break; + return TSQuantifierOneOrMore; }; - break; case TSQuantifierZeroOrMore: switch (right) { case TSQuantifierZero: - result = TSQuantifierZeroOrMore; - break; + return TSQuantifierZeroOrMore; case TSQuantifierZeroOrOne: case TSQuantifierZeroOrMore: - result = TSQuantifierZeroOrMore; - break; + return TSQuantifierZeroOrMore; case TSQuantifierOne: case TSQuantifierOneOrMore: - result = TSQuantifierOneOrMore; - break; + return TSQuantifierOneOrMore; }; - break; case TSQuantifierOne: switch (right) { case TSQuantifierZero: - result = TSQuantifierOne; - break; + return TSQuantifierOne; case TSQuantifierZeroOrOne: case TSQuantifierZeroOrMore: case TSQuantifierOne: case TSQuantifierOneOrMore: - result = TSQuantifierOneOrMore; - break; + return TSQuantifierOneOrMore; }; - break; case TSQuantifierOneOrMore: - result = TSQuantifierOneOrMore; - break; + return TSQuantifierOneOrMore; } - return result; + return TSQuantifierZero; // to make compiler happy, but all cases should be covered above! } // Create new capture quantifiers structure From ec9b00e5c684bbfc6d107769a6da403db830dd54 Mon Sep 17 00:00:00 2001 From: Hendrik van Antwerpen Date: Tue, 11 Jan 2022 18:06:28 +0100 Subject: [PATCH 10/16] Handle multiple top-level alternations correctly --- cli/src/tests/query_test.rs | 18 ++++++++++++++++++ lib/src/query.c | 2 +- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index 0513f3a1..6634c62f 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -4003,6 +4003,24 @@ fn test_capture_quantifiers() { (3, "z", CaptureQuantifier::Zero), ], }, + Row { + description: "multiple alternatives", + language: get_language("javascript"), + pattern: r#" + [ + (array (identifier) @x) + (function_declaration name: (identifier)+ @x) + ] + [ + (array (identifier) @x) + (function_declaration name: (identifier)+ @x) + ] + "#, + capture_quantifiers: &[ + (0, "x", CaptureQuantifier::OneOrMore), + (1, "x", CaptureQuantifier::OneOrMore), + ], + }, ]; allocations::record(|| { diff --git a/lib/src/query.c b/lib/src/query.c index 7d2bd8cf..af393572 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -1990,7 +1990,7 @@ static TSQueryError ts_query__parse_pattern( return e; } - if(start_index == 0) { + if(start_index == starting_step_index) { capture_quantifiers_replace(capture_quantifiers, &branch_capture_quantifiers); } else { capture_quantifiers_join_all(capture_quantifiers, &branch_capture_quantifiers); From 93db8637296e858f857014a6817d6969c19f4986 Mon Sep 17 00:00:00 2001 From: Hendrik van Antwerpen Date: Tue, 11 Jan 2022 18:09:57 +0100 Subject: [PATCH 11/16] Remove obsolete FIXMEs --- lib/src/query.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/src/query.c b/lib/src/query.c index af393572..b43a92c0 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -1607,7 +1607,7 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { } // Mark as indefinite any step with captures that are used in predicates. - Array(uint16_t) predicate_capture_ids = array_new(); // FIXME + Array(uint16_t) predicate_capture_ids = array_new(); for (unsigned i = 0; i < self->patterns.size; i++) { QueryPattern *pattern = &self->patterns.contents[i]; @@ -1715,7 +1715,7 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { array_delete(&deeper_states); array_delete(&final_step_indices); array_delete(&parent_step_indices); - array_delete(&predicate_capture_ids); // FIXME + array_delete(&predicate_capture_ids); state_predecessor_map_delete(&predecessor_map); return all_patterns_are_valid; From 99e74fa0f5cd4c5b18fde5e2c97d04afa522b350 Mon Sep 17 00:00:00 2001 From: Hendrik van Antwerpen Date: Tue, 11 Jan 2022 18:16:52 +0100 Subject: [PATCH 12/16] Move quantifier addition out of loop and drop conditional --- lib/src/query.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/lib/src/query.c b/lib/src/query.c index b43a92c0..f0f02822 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -2404,14 +2404,13 @@ static TSQueryError ts_query__parse_pattern( length ); + // Add the capture quantifier + capture_quantifiers_add_for_id(capture_quantifiers, capture_id, TSQuantifierOne); + uint32_t step_index = starting_step_index; for (;;) { QueryStep *step = &self->steps.contents[step_index]; query_step__add_capture(step, capture_id); - // Add only once, not for every branch, lest the quantifier will be '+' instead of '1' - if (step_index == starting_step_index) { - capture_quantifiers_add_for_id(capture_quantifiers, capture_id, TSQuantifierOne); - } if ( step->alternative_index != NONE && step->alternative_index > step_index && From acd3d32c36bb8438c7f4c6fcf9340e1bf59cddb3 Mon Sep 17 00:00:00 2001 From: Hendrik van Antwerpen Date: Tue, 11 Jan 2022 18:18:21 +0100 Subject: [PATCH 13/16] Remove reference to strings from quantifier-only function --- lib/binding_rust/bindings.rs | 5 ++--- lib/include/tree_sitter/api.h | 5 ++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/lib/binding_rust/bindings.rs b/lib/binding_rust/bindings.rs index bd4e1501..a79f432e 100644 --- a/lib/binding_rust/bindings.rs +++ b/lib/binding_rust/bindings.rs @@ -672,9 +672,8 @@ extern "C" { ) -> *const ::std::os::raw::c_char; } extern "C" { - #[doc = " Get the quantifier of the query's captures, or one of the query's string"] - #[doc = " literals. Each capture and string is associated with a numeric id based"] - #[doc = " on the order that it appeared in the query's source."] + #[doc = " Get the quantifier of the query's captures. Each capture is * associated"] + #[doc = " with a numeric id based on the order that it appeared in the query's source."] pub fn ts_query_capture_quantifier_for_id( arg1: *const TSQuery, pattern_id: u32, diff --git a/lib/include/tree_sitter/api.h b/lib/include/tree_sitter/api.h index 4a150511..7266fba7 100644 --- a/lib/include/tree_sitter/api.h +++ b/lib/include/tree_sitter/api.h @@ -750,9 +750,8 @@ const char *ts_query_capture_name_for_id( ); /** - * Get the quantifier of the query's captures, or one of the query's string - * literals. Each capture and string is associated with a numeric id based - * on the order that it appeared in the query's source. + * Get the quantifier of the query's captures. Each capture is * associated + * with a numeric id based on the order that it appeared in the query's source. */ TSQuantifier ts_query_capture_quantifier_for_id( const TSQuery *, From 70aee901ac78c3071271aafca37ba80072b243c9 Mon Sep 17 00:00:00 2001 From: Hendrik van Antwerpen Date: Tue, 11 Jan 2022 18:20:24 +0100 Subject: [PATCH 14/16] Reduce error handling logic --- lib/src/query.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/lib/src/query.c b/lib/src/query.c index f0f02822..f42e6c77 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -2291,12 +2291,9 @@ static TSQueryError ts_query__parse_pattern( is_immediate, &field_capture_quantifiers ); - if (e == PARENT_DONE) { - capture_quantifiers_delete(&field_capture_quantifiers); - return TSQueryErrorSyntax; - } if (e) { capture_quantifiers_delete(&field_capture_quantifiers); + if (e == PARENT_DONE) e = TSQueryErrorSyntax; return e; } From c76d8ee0761a9c7a3c93ff370a2adea20399ed70 Mon Sep 17 00:00:00 2001 From: Hendrik van Antwerpen Date: Tue, 11 Jan 2022 18:41:33 +0100 Subject: [PATCH 15/16] Represent quantifiers using bytes instead of ints --- lib/src/query.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/lib/src/query.c b/lib/src/query.c index f42e6c77..7611e954 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -123,7 +123,7 @@ typedef struct { /** * CaptureQuantififers - a data structure holding the quantifiers of pattern captures. */ -typedef Array(TSQuantifier) CaptureQuantifiers; +typedef Array(uint8_t) CaptureQuantifiers; /* * PatternEntry - Information about the starting point for matching a particular @@ -645,7 +645,7 @@ static TSQuantifier capture_quantifier_for_id( const CaptureQuantifiers *self, uint16_t id ) { - return (self->size <= id) ? TSQuantifierZero : *array_get(self, id); + return (self->size <= id) ? TSQuantifierZero : (TSQuantifier) *array_get(self, id); } // Add the given quantifier to the current value for id @@ -657,8 +657,8 @@ static void capture_quantifiers_add_for_id( if (self->size <= id) { array_grow_by(self, id + 1 - self->size); } - TSQuantifier *own_quantifier = array_get(self, id); - *own_quantifier = quantifier_add(*own_quantifier, quantifier); + uint8_t *own_quantifier = array_get(self, id); + *own_quantifier = (uint8_t) quantifier_add((TSQuantifier) *own_quantifier, quantifier); } // Point-wise add the given quantifiers to the current values @@ -670,9 +670,9 @@ static void capture_quantifiers_add_all( array_grow_by(self, quantifiers->size - self->size); } for (uint16_t id = 0; id < quantifiers->size; id++) { - TSQuantifier *quantifier = array_get(quantifiers, id); - TSQuantifier *own_quantifier = array_get(self, id); - *own_quantifier = quantifier_add(*own_quantifier, *quantifier); + uint8_t *quantifier = array_get(quantifiers, id); + uint8_t *own_quantifier = array_get(self, id); + *own_quantifier = (uint8_t) quantifier_add((TSQuantifier) *own_quantifier, (TSQuantifier) *quantifier); } } @@ -682,8 +682,8 @@ static void capture_quantifiers_mul( TSQuantifier quantifier ) { for (uint16_t id = 0; id < self->size; id++) { - TSQuantifier *own_quantifier = array_get(self, id); - *own_quantifier = quantifier_mul(*own_quantifier, quantifier); + uint8_t *own_quantifier = array_get(self, id); + *own_quantifier = (uint8_t) quantifier_mul((TSQuantifier) *own_quantifier, quantifier); } } @@ -696,13 +696,13 @@ static void capture_quantifiers_join_all( array_grow_by(self, quantifiers->size - self->size); } for (uint32_t id = 0; id < quantifiers->size; id++) { - TSQuantifier *quantifier = array_get(quantifiers, id); - TSQuantifier *own_quantifier = array_get(self, id); - *own_quantifier = quantifier_join(*own_quantifier, *quantifier); + uint8_t *quantifier = array_get(quantifiers, id); + uint8_t *own_quantifier = array_get(self, id); + *own_quantifier = (uint8_t) quantifier_join((TSQuantifier) *own_quantifier, (TSQuantifier) *quantifier); } for (uint32_t id = quantifiers->size; id < self->size; id++) { - TSQuantifier *own_quantifier = array_get(self, id); - *own_quantifier = quantifier_join(*own_quantifier, TSQuantifierZero); + uint8_t *own_quantifier = array_get(self, id); + *own_quantifier = (uint8_t) quantifier_join((TSQuantifier) *own_quantifier, TSQuantifierZero); } } From 9dace8f9fe5629c9638265d9f928774b66b88a94 Mon Sep 17 00:00:00 2001 From: Hendrik van Antwerpen Date: Tue, 11 Jan 2022 19:08:32 +0100 Subject: [PATCH 16/16] Add explicit breaks to prevent fall through errors --- lib/src/query.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/lib/src/query.c b/lib/src/query.c index 7611e954..1b3da7d3 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -482,6 +482,7 @@ static TSQuantifier quantifier_mul( case TSQuantifierOneOrMore: return TSQuantifierZeroOrMore; }; + break; case TSQuantifierZeroOrMore: switch (right) { case TSQuantifierZero: @@ -492,6 +493,7 @@ static TSQuantifier quantifier_mul( case TSQuantifierOneOrMore: return TSQuantifierZeroOrMore; }; + break; case TSQuantifierOne: return right; case TSQuantifierOneOrMore: @@ -505,6 +507,7 @@ static TSQuantifier quantifier_mul( case TSQuantifierOneOrMore: return TSQuantifierOneOrMore; }; + break; } return TSQuantifierZero; // to make compiler happy, but all cases should be covered above! } @@ -526,6 +529,7 @@ static TSQuantifier quantifier_join( case TSQuantifierOneOrMore: return TSQuantifierZeroOrMore; }; + break; case TSQuantifierZeroOrOne: switch (right) { case TSQuantifierZero: @@ -538,6 +542,7 @@ static TSQuantifier quantifier_join( return TSQuantifierZeroOrMore; break; }; + break; case TSQuantifierZeroOrMore: return TSQuantifierZeroOrMore; case TSQuantifierOne: @@ -552,6 +557,7 @@ static TSQuantifier quantifier_join( case TSQuantifierOneOrMore: return TSQuantifierOneOrMore; }; + break; case TSQuantifierOneOrMore: switch (right) { case TSQuantifierZero: @@ -562,6 +568,7 @@ static TSQuantifier quantifier_join( case TSQuantifierOneOrMore: return TSQuantifierOneOrMore; }; + break; } return TSQuantifierZero; // to make compiler happy, but all cases should be covered above! } @@ -585,6 +592,7 @@ static TSQuantifier quantifier_add( case TSQuantifierOneOrMore: return TSQuantifierOneOrMore; }; + break; case TSQuantifierZeroOrMore: switch (right) { case TSQuantifierZero: @@ -596,6 +604,7 @@ static TSQuantifier quantifier_add( case TSQuantifierOneOrMore: return TSQuantifierOneOrMore; }; + break; case TSQuantifierOne: switch (right) { case TSQuantifierZero: @@ -606,6 +615,7 @@ static TSQuantifier quantifier_add( case TSQuantifierOneOrMore: return TSQuantifierOneOrMore; }; + break; case TSQuantifierOneOrMore: return TSQuantifierOneOrMore; }