diff --git a/lib/binding_rust/bindings.rs b/lib/binding_rust/bindings.rs index 5bcbac42..9d398f58 100644 --- a/lib/binding_rust/bindings.rs +++ b/lib/binding_rust/bindings.rs @@ -665,6 +665,15 @@ extern "C" { length: *mut u32, ) -> *const ::std::os::raw::c_char; } +extern "C" { + #[doc = " Get the suffix of one of the query's captures, or one of the query's"] + #[doc = " string literals. Each capture and string is associated with a numeric"] + #[doc = " id based on the order that it appeared in the query's source."] + pub fn ts_query_capture_suffix_for_id( + arg1: *const TSQuery, + id: u32, + ) -> ::std::os::raw::c_char; +} extern "C" { pub fn ts_query_string_value_for_id( arg1: *const TSQuery, diff --git a/lib/binding_rust/lib.rs b/lib/binding_rust/lib.rs index cf8437b8..429cd47c 100644 --- a/lib/binding_rust/lib.rs +++ b/lib/binding_rust/lib.rs @@ -98,12 +98,34 @@ pub struct TreeCursor<'a>(ffi::TSTreeCursor, PhantomData<&'a ()>); pub struct Query { ptr: NonNull, capture_names: Vec, + capture_suffixes: Vec, text_predicates: Vec>, property_settings: Vec>, property_predicates: Vec>, general_predicates: Vec>, } +/// A suffix indicating the multiplicity of the capture value +#[derive(Debug, PartialEq, Eq, Clone, Copy)] +pub enum QueryCaptureSuffix { + One, + OneOrMore, + ZeroOrMore, + ZeroOrOne, +} + +impl From for QueryCaptureSuffix { + fn from(value: u8) -> QueryCaptureSuffix { + match value { + b'\0' => QueryCaptureSuffix::One, + b'+' => QueryCaptureSuffix::OneOrMore, + b'*' => QueryCaptureSuffix::ZeroOrMore, + b'?' => QueryCaptureSuffix::ZeroOrOne, + _ => panic!("Unrecognized suffix: {}", value as char), + } + } +} + /// A stateful object for executing a `Query` on a syntax `Tree`. pub struct QueryCursor { ptr: NonNull, @@ -1306,6 +1328,7 @@ impl Query { let mut result = Query { ptr: unsafe { NonNull::new_unchecked(ptr) }, capture_names: Vec::with_capacity(capture_count as usize), + capture_suffixes: Vec::with_capacity(capture_count as usize), text_predicates: Vec::with_capacity(pattern_count), property_predicates: Vec::with_capacity(pattern_count), property_settings: Vec::with_capacity(pattern_count), @@ -1321,6 +1344,8 @@ impl Query { let name = slice::from_raw_parts(name, length as usize); let name = str::from_utf8_unchecked(name); result.capture_names.push(name.to_string()); + let suffix = ffi::ts_query_capture_suffix_for_id(ptr, i) as u8; + result.capture_suffixes.push(suffix.into()); } } @@ -1524,6 +1549,11 @@ impl Query { &self.capture_names } + /// Get the suffixes of the captures used in the query. + pub fn capture_suffixes(&self) -> &[QueryCaptureSuffix] { + &self.capture_suffixes + } + /// Get the index for a given capture name. pub fn capture_index_for_name(&self, name: &str) -> Option { self.capture_names diff --git a/lib/include/tree_sitter/api.h b/lib/include/tree_sitter/api.h index 33b8c8f4..68b67668 100644 --- a/lib/include/tree_sitter/api.h +++ b/lib/include/tree_sitter/api.h @@ -740,6 +740,10 @@ const char *ts_query_capture_name_for_id( uint32_t id, uint32_t *length ); +char ts_query_capture_suffix_for_id( + const TSQuery *, + uint32_t id +); const char *ts_query_string_value_for_id( const TSQuery *, uint32_t id, diff --git a/lib/src/query.c b/lib/src/query.c index 51cc6c17..734d9bcf 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -39,6 +39,9 @@ typedef struct { * was not specified. * - `capture_ids` - An array of integers representing the names of captures * associated with this node in the pattern, terminated by a `NONE` value. + * - `capture_suffixes` - An array of capture suffixes ('\0\, '+', '*', or '?') + * corresponding to the elements in `capture_ids`, terminated by a `NONE` + * value. * - `depth` - The depth where this node occurs in the pattern. The root node * of the pattern has depth zero. * - `negated_field_list_id` - An id representing a set of fields that must @@ -72,7 +75,7 @@ typedef struct { * Steps also store some derived state that summarizes how they relate to other * steps within the same pattern. This is used to optimize the matching process: * - `contains_captures` - Indicates that this step or one of its child steps - * has a non-empty `capture_ids` list. + * has non-empty `capture_ids` and `capture_suffixes` lists. * - `parent_pattern_guaranteed` - Indicates that if this step is reached, then * it and all of its subsequent sibling steps within the same parent pattern * are guaranteed to match. @@ -87,6 +90,7 @@ typedef struct { TSSymbol supertype_symbol; TSFieldId field; uint16_t capture_ids[MAX_STEP_CAPTURE_COUNT]; + uint16_t capture_suffixes[MAX_STEP_CAPTURE_COUNT]; uint16_t depth; uint16_t alternative_index; uint16_t negated_field_list_id; @@ -118,6 +122,7 @@ typedef struct { typedef struct { Array(char) characters; Array(Slice) slices; + Array(char) suffixes; } SymbolTable; /* @@ -461,12 +466,14 @@ static SymbolTable symbol_table_new(void) { return (SymbolTable) { .characters = array_new(), .slices = array_new(), + .suffixes = array_new(), }; } static void symbol_table_delete(SymbolTable *self) { array_delete(&self->characters); array_delete(&self->slices); + array_delete(&self->suffixes); } static int symbol_table_id_for_name( @@ -494,10 +501,18 @@ static const char *symbol_table_name_for_id( return &self->characters.contents[slice.offset]; } +static char symbol_table_suffix_for_id( + const SymbolTable *self, + uint16_t id +) { + return self->suffixes.contents[id]; +} + static uint16_t symbol_table_insert_name( SymbolTable *self, const char *name, - uint32_t length + uint32_t length, + char suffix ) { int id = symbol_table_id_for_name(self, name, length); if (id >= 0) return (uint16_t)id; @@ -509,6 +524,7 @@ static uint16_t symbol_table_insert_name( memcpy(&self->characters.contents[slice.offset], name, length); self->characters.contents[self->characters.size - 1] = 0; array_push(&self->slices, slice); + array_push(&self->suffixes, suffix); return self->slices.size - 1; } @@ -526,6 +542,7 @@ static QueryStep query_step__new( .depth = depth, .field = 0, .capture_ids = {NONE, NONE, NONE}, + .capture_suffixes = {NONE, NONE, NONE}, .alternative_index = NONE, .negated_field_list_id = 0, .contains_captures = false, @@ -539,10 +556,11 @@ static QueryStep query_step__new( }; } -static void query_step__add_capture(QueryStep *self, uint16_t capture_id) { +static void query_step__add_capture(QueryStep *self, uint16_t capture_id, char capture_suffix) { for (unsigned i = 0; i < MAX_STEP_CAPTURE_COUNT; i++) { if (self->capture_ids[i] == NONE) { self->capture_ids[i] = capture_id; + self->capture_suffixes[i] = capture_suffix; break; } } @@ -552,10 +570,13 @@ static void query_step__remove_capture(QueryStep *self, uint16_t capture_id) { for (unsigned i = 0; i < MAX_STEP_CAPTURE_COUNT; i++) { if (self->capture_ids[i] == capture_id) { self->capture_ids[i] = NONE; + self->capture_suffixes[i] = NONE; while (i + 1 < MAX_STEP_CAPTURE_COUNT) { if (self->capture_ids[i + 1] == NONE) break; self->capture_ids[i] = self->capture_ids[i + 1]; + self->capture_suffixes[i] = self->capture_suffixes[i + 1]; self->capture_ids[i + 1] = NONE; + self->capture_suffixes[i + 1] = NONE; i++; } break; @@ -1354,7 +1375,7 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { } // Mark as indefinite any step with captures that are used in predicates. - Array(uint16_t) predicate_capture_ids = array_new(); + Array(uint16_t) predicate_capture_ids = array_new(); // FIXME for (unsigned i = 0; i < self->patterns.size; i++) { QueryPattern *pattern = &self->patterns.contents[i]; @@ -1462,7 +1483,7 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { array_delete(&deeper_states); array_delete(&final_step_indices); array_delete(&parent_step_indices); - array_delete(&predicate_capture_ids); + array_delete(&predicate_capture_ids); // FIXME state_predecessor_map_delete(&predecessor_map); return all_patterns_are_valid; @@ -1590,7 +1611,8 @@ static TSQueryError ts_query__parse_predicate( uint16_t id = symbol_table_insert_name( &self->predicate_values, predicate_name, - length + length, + '\0' ); array_push(&self->predicate_steps, ((TSQueryPredicateStep) { .type = TSQueryPredicateStepTypeString, @@ -1643,7 +1665,8 @@ static TSQueryError ts_query__parse_predicate( uint16_t id = symbol_table_insert_name( &self->predicate_values, self->string_buffer.contents, - self->string_buffer.size + self->string_buffer.size, + '\0' ); array_push(&self->predicate_steps, ((TSQueryPredicateStep) { .type = TSQueryPredicateStepTypeString, @@ -1659,7 +1682,8 @@ static TSQueryError ts_query__parse_predicate( uint16_t id = symbol_table_insert_name( &self->predicate_values, symbol_start, - length + length, + '\0' ); array_push(&self->predicate_steps, ((TSQueryPredicateStep) { .type = TSQueryPredicateStepTypeString, @@ -2034,9 +2058,12 @@ static TSQueryError ts_query__parse_pattern( stream_skip_whitespace(stream); // Parse suffixes modifiers for this pattern + char suffix = '\0'; for (;;) { // Parse the one-or-more operator. if (stream->next == '+') { + suffix = '+'; + stream_advance(stream); stream_skip_whitespace(stream); @@ -2049,6 +2076,8 @@ static TSQueryError ts_query__parse_pattern( // Parse the zero-or-more repetition operator. else if (stream->next == '*') { + suffix = '*'; + stream_advance(stream); stream_skip_whitespace(stream); @@ -2067,6 +2096,8 @@ static TSQueryError ts_query__parse_pattern( // Parse the optional operator. else if (stream->next == '?') { + suffix = '?'; + stream_advance(stream); stream_skip_whitespace(stream); @@ -2090,13 +2121,14 @@ static TSQueryError ts_query__parse_pattern( uint16_t capture_id = symbol_table_insert_name( &self->captures, capture_name, - length + length, + suffix ); uint32_t step_index = starting_step_index; for (;;) { QueryStep *step = &self->steps.contents[step_index]; - query_step__add_capture(step, capture_id); + query_step__add_capture(step, capture_id, suffix); if ( step->alternative_index != NONE && step->alternative_index > step_index && @@ -2279,6 +2311,13 @@ const char *ts_query_capture_name_for_id( return symbol_table_name_for_id(&self->captures, index, length); } +char ts_query_capture_suffix_for_id( + const TSQuery *self, + uint32_t index +) { + return symbol_table_suffix_for_id(&self->captures, index); +} + const char *ts_query_string_value_for_id( const TSQuery *self, uint32_t index,