From 25c7189180849be27b1e552d27f0488e3bd5900d Mon Sep 17 00:00:00 2001 From: Amaan Qureshi Date: Thu, 4 Jul 2024 20:51:27 -0400 Subject: [PATCH] feat(lib): add `ts_query_end_byte_for_pattern` --- cli/src/tests/query_test.rs | 26 ++++++++++++++++---------- lib/binding_rust/bindings.rs | 4 ++++ lib/binding_rust/lib.rs | 15 +++++++++++++++ lib/include/tree_sitter/api.h | 8 ++++++++ lib/src/query.c | 9 +++++++++ 5 files changed, 52 insertions(+), 10 deletions(-) diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index 34e59209..90d940c8 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -3640,30 +3640,27 @@ fn test_query_text_callback_returns_chunks() { } #[test] -fn test_query_start_byte_for_pattern() { +fn test_query_start_end_byte_for_pattern() { let language = get_language("javascript"); - let patterns_1 = r#" + let patterns_1 = indoc! {r#" "+" @operator "-" @operator "*" @operator "=" @operator "=>" @operator - "# - .trim_start(); + "#}; - let patterns_2 = " + let patterns_2 = indoc! {" (identifier) @a (string) @b - " - .trim_start(); + "}; - let patterns_3 = " + let patterns_3 = indoc! {" ((identifier) @b (#match? @b i)) (function_declaration name: (identifier) @c) (method_definition name: (property_identifier) @d) - " - .trim_start(); + "}; let mut source = String::new(); source += patterns_1; @@ -3673,11 +3670,20 @@ fn test_query_start_byte_for_pattern() { let query = Query::new(&language, &source).unwrap(); assert_eq!(query.start_byte_for_pattern(0), 0); + assert_eq!(query.end_byte_for_pattern(0), "\"+\" @operator\n".len()); assert_eq!(query.start_byte_for_pattern(5), patterns_1.len()); + assert_eq!( + query.end_byte_for_pattern(5), + patterns_1.len() + "(identifier) @a\n".len() + ); assert_eq!( query.start_byte_for_pattern(7), patterns_1.len() + patterns_2.len() ); + assert_eq!( + query.end_byte_for_pattern(7), + patterns_1.len() + patterns_2.len() + "((identifier) @b (#match? @b i))\n".len() + ); } #[test] diff --git a/lib/binding_rust/bindings.rs b/lib/binding_rust/bindings.rs index 85201987..dce2a21c 100644 --- a/lib/binding_rust/bindings.rs +++ b/lib/binding_rust/bindings.rs @@ -559,6 +559,10 @@ extern "C" { #[doc = " Get the byte offset where the given pattern starts in the query's source.\n\n This can be useful when combining queries by concatenating their source\n code strings."] pub fn ts_query_start_byte_for_pattern(self_: *const TSQuery, pattern_index: u32) -> u32; } +extern "C" { + #[doc = " Get the byte offset where the given pattern ends in the query's source.\n\n This can be useful when combining queries by concatenating their source\n code strings."] + pub fn ts_query_end_byte_for_pattern(self_: *const TSQuery, pattern_index: u32) -> u32; +} extern "C" { #[doc = " Get all of the predicates for the given pattern in the query.\n\n The predicates are represented as a single array of steps. There are three\n types of steps in this array, which correspond to the three legal values for\n the `type` field:\n - `TSQueryPredicateStepTypeCapture` - Steps with this type represent names\n of captures. Their `value_id` can be used with the\n [`ts_query_capture_name_for_id`] function to obtain the name of the capture.\n - `TSQueryPredicateStepTypeString` - Steps with this type represent literal\n strings. Their `value_id` can be used with the\n [`ts_query_string_value_for_id`] function to obtain their string value.\n - `TSQueryPredicateStepTypeDone` - Steps with this type are *sentinels*\n that represent the end of an individual predicate. If a pattern has two\n predicates, then there will be two steps with this `type` in the array."] pub fn ts_query_predicates_for_pattern( diff --git a/lib/binding_rust/lib.rs b/lib/binding_rust/lib.rs index afbe029e..c1d78c1d 100644 --- a/lib/binding_rust/lib.rs +++ b/lib/binding_rust/lib.rs @@ -2152,6 +2152,21 @@ impl Query { } } + /// Get the byte offset where the given pattern ends in the query's + /// source. + #[doc(alias = "ts_query_end_byte_for_pattern")] + #[must_use] + pub fn end_byte_for_pattern(&self, pattern_index: usize) -> usize { + assert!( + pattern_index < self.text_predicates.len(), + "Pattern index is {pattern_index} but the pattern count is {}", + self.text_predicates.len(), + ); + unsafe { + ffi::ts_query_end_byte_for_pattern(self.ptr.as_ptr(), pattern_index as u32) as usize + } + } + /// Get the number of patterns in the query. #[doc(alias = "ts_query_pattern_count")] #[must_use] diff --git a/lib/include/tree_sitter/api.h b/lib/include/tree_sitter/api.h index deb2364e..d7cd31aa 100644 --- a/lib/include/tree_sitter/api.h +++ b/lib/include/tree_sitter/api.h @@ -838,6 +838,14 @@ uint32_t ts_query_string_count(const TSQuery *self); */ uint32_t ts_query_start_byte_for_pattern(const TSQuery *self, uint32_t pattern_index); +/** + * Get the byte offset where the given pattern ends in the query's source. + * + * This can be useful when combining queries by concatenating their source + * code strings. + */ +uint32_t ts_query_end_byte_for_pattern(const TSQuery *self, uint32_t pattern_index); + /** * Get all of the predicates for the given pattern in the query. * diff --git a/lib/src/query.c b/lib/src/query.c index 10587669..f93a688f 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -146,6 +146,7 @@ typedef struct { Slice steps; Slice predicate_steps; uint32_t start_byte; + uint32_t end_byte; bool is_non_local; } QueryPattern; @@ -2715,6 +2716,7 @@ TSQuery *ts_query_new( QueryPattern *pattern = array_back(&self->patterns); pattern->steps.length = self->steps.size - start_step_index; pattern->predicate_steps.length = self->predicate_steps.size - start_predicate_step_index; + pattern->end_byte = stream_offset(&stream); // If any pattern could not be parsed, then report the error information // and terminate. @@ -2873,6 +2875,13 @@ uint32_t ts_query_start_byte_for_pattern( return self->patterns.contents[pattern_index].start_byte; } +uint32_t ts_query_end_byte_for_pattern( + const TSQuery *self, + uint32_t pattern_index +) { + return self->patterns.contents[pattern_index].end_byte; +} + bool ts_query_is_pattern_rooted( const TSQuery *self, uint32_t pattern_index