Merge pull request #2431 from tree-sitter/text-provider1-fin

Improve TextProvider implementation to allow owned text
This commit is contained in:
Andrew Hlynskyi 2023-08-01 10:40:34 +03:00 committed by GitHub
commit 3672463df1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 241 additions and 43 deletions

View file

@ -9,4 +9,5 @@ mod query_test;
mod tags_test;
mod test_highlight_test;
mod test_tags_test;
mod text_provider_test;
mod tree_test;

View file

@ -0,0 +1,173 @@
use std::{iter, sync::Arc};
use crate::tests::helpers::fixtures::get_language;
use tree_sitter::{Language, Node, Parser, Point, Query, QueryCursor, TextProvider, Tree};
fn parse_text(text: impl AsRef<[u8]>) -> (Tree, Language) {
let language = get_language("c");
let mut parser = Parser::new();
parser.set_language(language).unwrap();
(parser.parse(text, None).unwrap(), language)
}
fn parse_text_with<T, F>(callback: &mut F) -> (Tree, Language)
where
T: AsRef<[u8]>,
F: FnMut(usize, Point) -> T,
{
let language = get_language("c");
let mut parser = Parser::new();
parser.set_language(language).unwrap();
let tree = parser.parse_with(callback, None).unwrap();
// eprintln!("{}", tree.clone().root_node().to_sexp());
assert_eq!("comment", tree.clone().root_node().child(0).unwrap().kind());
(tree, language)
}
fn tree_query<I: AsRef<[u8]>>(tree: &Tree, text: impl TextProvider<I>, language: Language) {
let query = Query::new(language, "((comment) @c (#eq? @c \"// comment\"))").unwrap();
let mut cursor = QueryCursor::new();
let mut captures = cursor.captures(&query, tree.root_node(), text);
let (match_, idx) = captures.next().unwrap();
let capture = match_.captures[idx];
assert_eq!(capture.index as usize, idx);
assert_eq!("comment", capture.node.kind());
}
fn check_parsing<I: AsRef<[u8]>>(
parser_text: impl AsRef<[u8]>,
text_provider: impl TextProvider<I>,
) {
let (tree, language) = parse_text(parser_text);
tree_query(&tree, text_provider, language);
}
fn check_parsing_callback<T, F, I: AsRef<[u8]>>(
parser_callback: &mut F,
text_provider: impl TextProvider<I>,
) where
T: AsRef<[u8]>,
F: FnMut(usize, Point) -> T,
{
let (tree, language) = parse_text_with(parser_callback);
tree_query(&tree, text_provider, language);
}
#[test]
fn test_text_provider_for_str_slice() {
let text: &str = "// comment";
check_parsing(text, text.as_bytes());
check_parsing(text.as_bytes(), text.as_bytes());
}
#[test]
fn test_text_provider_for_string() {
let text: String = "// comment".to_owned();
check_parsing(text.clone(), text.as_bytes());
check_parsing(text.as_bytes(), text.as_bytes());
check_parsing(<_ as AsRef<[u8]>>::as_ref(&text), text.as_bytes());
}
#[test]
fn test_text_provider_for_box_of_str_slice() {
let text: Box<str> = "// comment".to_owned().into_boxed_str();
check_parsing(text.as_bytes(), text.as_bytes());
check_parsing(<_ as AsRef<str>>::as_ref(&text), text.as_bytes());
check_parsing(text.as_ref(), text.as_ref().as_bytes());
check_parsing(text.as_ref(), text.as_bytes());
}
#[test]
fn test_text_provider_for_box_of_bytes_slice() {
let text: Box<[u8]> = "// comment".to_owned().into_boxed_str().into_boxed_bytes();
check_parsing(text.as_ref(), text.as_ref());
check_parsing(text.as_ref(), &*text);
check_parsing(&*text, &*text);
}
#[test]
fn test_text_provider_for_vec_of_bytes() {
let text: Vec<u8> = "// comment".to_owned().into_bytes();
check_parsing(&*text, &*text);
}
#[test]
fn test_text_provider_for_arc_of_bytes_slice() {
let text: Vec<u8> = "// comment".to_owned().into_bytes();
let text: Arc<[u8]> = Arc::from(text);
check_parsing(&*text, &*text);
check_parsing(text.as_ref(), text.as_ref());
check_parsing(text.clone(), text.as_ref());
}
#[test]
fn test_text_provider_callback_with_str_slice() {
let text: &str = "// comment";
check_parsing(text, |_node: Node<'_>| iter::once(text));
check_parsing_callback(
&mut |offset, _point| {
(offset < text.len())
.then(|| text.as_bytes())
.unwrap_or_default()
},
|_node: Node<'_>| iter::once(text),
);
}
#[test]
fn test_text_provider_callback_with_owned_string_slice() {
let text: &str = "// comment";
check_parsing_callback(
&mut |offset, _point| {
(offset < text.len())
.then(|| text.as_bytes())
.unwrap_or_default()
},
|_node: Node<'_>| {
let slice: String = text.to_owned();
iter::once(slice)
},
);
}
#[test]
fn test_text_provider_callback_with_owned_bytes_vec_slice() {
let text: &str = "// comment";
check_parsing_callback(
&mut |offset, _point| {
(offset < text.len())
.then(|| text.as_bytes())
.unwrap_or_default()
},
|_node: Node<'_>| {
let slice: Vec<u8> = text.to_owned().into_bytes();
iter::once(slice)
},
);
}
#[test]
fn test_text_provider_callback_with_owned_arc_of_bytes_slice() {
let text: &str = "// comment";
check_parsing_callback(
&mut |offset, _point| {
(offset < text.len())
.then(|| text.as_bytes())
.unwrap_or_default()
},
|_node: Node<'_>| {
let slice: Arc<[u8]> = text.to_owned().into_bytes().into();
iter::once(slice)
},
);
}

View file

@ -165,7 +165,7 @@ where
struct HighlightIterLayer<'a> {
_tree: Tree,
cursor: QueryCursor,
captures: iter::Peekable<QueryCaptures<'a, 'a, &'a [u8]>>,
captures: iter::Peekable<QueryCaptures<'a, 'a, &'a [u8], &'a [u8]>>,
config: &'a HighlightConfiguration,
highlight_end_stack: Vec<usize>,
scope_stack: Vec<LocalScope<'a>>,

View file

@ -175,27 +175,30 @@ pub struct QueryMatch<'cursor, 'tree> {
}
/// A sequence of `QueryMatch`es associated with a given `QueryCursor`.
pub struct QueryMatches<'a, 'tree: 'a, T: TextProvider<'a>> {
pub struct QueryMatches<'a, 'tree: 'a, T: TextProvider<I>, I: AsRef<[u8]>> {
ptr: *mut ffi::TSQueryCursor,
query: &'a Query,
text_provider: T,
buffer1: Vec<u8>,
buffer2: Vec<u8>,
_tree: PhantomData<&'tree ()>,
_phantom: PhantomData<(&'tree (), I)>,
}
/// A sequence of `QueryCapture`s associated with a given `QueryCursor`.
pub struct QueryCaptures<'a, 'tree: 'a, T: TextProvider<'a>> {
pub struct QueryCaptures<'a, 'tree: 'a, T: TextProvider<I>, I: AsRef<[u8]>> {
ptr: *mut ffi::TSQueryCursor,
query: &'a Query,
text_provider: T,
buffer1: Vec<u8>,
buffer2: Vec<u8>,
_tree: PhantomData<&'tree ()>,
_phantom: PhantomData<(&'tree (), I)>,
}
pub trait TextProvider<'a> {
type I: Iterator<Item = &'a [u8]> + 'a;
pub trait TextProvider<I>
where
I: AsRef<[u8]>,
{
type I: Iterator<Item = I>;
fn text(&mut self, node: Node) -> Self::I;
}
@ -1900,12 +1903,12 @@ impl QueryCursor {
/// Because multiple patterns can match the same set of nodes, one match may contain
/// captures that appear *before* some of the captures from a previous match.
#[doc(alias = "ts_query_cursor_exec")]
pub fn matches<'a, 'tree: 'a, T: TextProvider<'a> + 'a>(
pub fn matches<'a, 'tree: 'a, T: TextProvider<I>, I: AsRef<[u8]>>(
&'a mut self,
query: &'a Query,
node: Node<'tree>,
text_provider: T,
) -> QueryMatches<'a, 'tree, T> {
) -> QueryMatches<'a, 'tree, T, I> {
let ptr = self.ptr.as_ptr();
unsafe { ffi::ts_query_cursor_exec(ptr, query.ptr.as_ptr(), node.0) };
QueryMatches {
@ -1914,7 +1917,7 @@ impl QueryCursor {
text_provider,
buffer1: Default::default(),
buffer2: Default::default(),
_tree: PhantomData,
_phantom: PhantomData,
}
}
@ -1923,12 +1926,12 @@ impl QueryCursor {
/// This is useful if you don't care about which pattern matched, and just want a single,
/// ordered sequence of captures.
#[doc(alias = "ts_query_cursor_exec")]
pub fn captures<'a, 'tree: 'a, T: TextProvider<'a> + 'a>(
pub fn captures<'a, 'tree: 'a, T: TextProvider<I>, I: AsRef<[u8]>>(
&'a mut self,
query: &'a Query,
node: Node<'tree>,
text_provider: T,
) -> QueryCaptures<'a, 'tree, T> {
) -> QueryCaptures<'a, 'tree, T, I> {
let ptr = self.ptr.as_ptr();
unsafe { ffi::ts_query_cursor_exec(self.ptr.as_ptr(), query.ptr.as_ptr(), node.0) };
QueryCaptures {
@ -1937,7 +1940,7 @@ impl QueryCursor {
text_provider,
buffer1: Default::default(),
buffer2: Default::default(),
_tree: PhantomData,
_phantom: PhantomData,
}
}
@ -2017,31 +2020,47 @@ impl<'a, 'tree> QueryMatch<'a, 'tree> {
}
}
fn satisfies_text_predicates(
fn satisfies_text_predicates<I: AsRef<[u8]>>(
&self,
query: &Query,
buffer1: &mut Vec<u8>,
buffer2: &mut Vec<u8>,
text_provider: &mut impl TextProvider<'a>,
buffer1: &'a mut Vec<u8>,
buffer2: &'a mut Vec<u8>,
text_provider: &mut impl TextProvider<I>,
) -> bool {
fn get_text<'a, 'b: 'a, I: Iterator<Item = &'b [u8]>>(
struct NodeText<'a, T> {
buffer: &'a mut Vec<u8>,
mut chunks: I,
) -> &'a [u8] {
let first_chunk = chunks.next().unwrap_or(&[]);
if let Some(next_chunk) = chunks.next() {
buffer.clear();
buffer.extend_from_slice(first_chunk);
buffer.extend_from_slice(next_chunk);
for chunk in chunks {
buffer.extend_from_slice(chunk);
first_chunk: Option<T>,
}
impl<'a, T: AsRef<[u8]>> NodeText<'a, T> {
fn new(buffer: &'a mut Vec<u8>) -> Self {
Self {
buffer,
first_chunk: None,
}
}
fn get_text(&mut self, chunks: &mut impl Iterator<Item = T>) -> &[u8] {
self.first_chunk = chunks.next();
if let Some(next_chunk) = chunks.next() {
self.buffer.clear();
self.buffer
.extend_from_slice(self.first_chunk.as_ref().unwrap().as_ref());
self.buffer.extend_from_slice(next_chunk.as_ref());
for chunk in chunks {
self.buffer.extend_from_slice(chunk.as_ref());
}
self.buffer.as_slice()
} else if let Some(ref first_chunk) = self.first_chunk {
first_chunk.as_ref()
} else {
&[]
}
buffer.as_slice()
} else {
first_chunk
}
}
let mut node_text1 = NodeText::new(buffer1);
let mut node_text2 = NodeText::new(buffer2);
query.text_predicates[self.pattern_index]
.iter()
.all(|predicate| match predicate {
@ -2050,8 +2069,10 @@ impl<'a, 'tree> QueryMatch<'a, 'tree> {
let node2 = self.nodes_for_capture_index(*j).next();
match (node1, node2) {
(Some(node1), Some(node2)) => {
let text1 = get_text(buffer1, text_provider.text(node1));
let text2 = get_text(buffer2, text_provider.text(node2));
let mut text1 = text_provider.text(node1);
let mut text2 = text_provider.text(node2);
let text1 = node_text1.get_text(&mut text1);
let text2 = node_text2.get_text(&mut text2);
(text1 == text2) == *is_positive
}
_ => true,
@ -2061,7 +2082,8 @@ impl<'a, 'tree> QueryMatch<'a, 'tree> {
let node = self.nodes_for_capture_index(*i).next();
match node {
Some(node) => {
let text = get_text(buffer1, text_provider.text(node));
let mut text = text_provider.text(node);
let text = node_text1.get_text(&mut text);
(text == s.as_bytes()) == *is_positive
}
None => true,
@ -2071,7 +2093,8 @@ impl<'a, 'tree> QueryMatch<'a, 'tree> {
let node = self.nodes_for_capture_index(*i).next();
match node {
Some(node) => {
let text = get_text(buffer1, text_provider.text(node));
let mut text = text_provider.text(node);
let text = node_text1.get_text(&mut text);
r.is_match(text) == *is_positive
}
None => true,
@ -2091,7 +2114,7 @@ impl QueryProperty {
}
}
impl<'a, 'tree, T: TextProvider<'a>> Iterator for QueryMatches<'a, 'tree, T> {
impl<'a, 'tree, T: TextProvider<I>, I: AsRef<[u8]>> Iterator for QueryMatches<'a, 'tree, T, I> {
type Item = QueryMatch<'a, 'tree>;
fn next(&mut self) -> Option<Self::Item> {
@ -2116,7 +2139,7 @@ impl<'a, 'tree, T: TextProvider<'a>> Iterator for QueryMatches<'a, 'tree, T> {
}
}
impl<'a, 'tree, T: TextProvider<'a>> Iterator for QueryCaptures<'a, 'tree, T> {
impl<'a, 'tree, T: TextProvider<I>, I: AsRef<[u8]>> Iterator for QueryCaptures<'a, 'tree, T, I> {
type Item = (QueryMatch<'a, 'tree>, usize);
fn next(&mut self) -> Option<Self::Item> {
@ -2148,7 +2171,7 @@ impl<'a, 'tree, T: TextProvider<'a>> Iterator for QueryCaptures<'a, 'tree, T> {
}
}
impl<'a, 'tree, T: TextProvider<'a>> QueryMatches<'a, 'tree, T> {
impl<'a, 'tree, T: TextProvider<I>, I: AsRef<[u8]>> QueryMatches<'a, 'tree, T, I> {
#[doc(alias = "ts_query_cursor_set_byte_range")]
pub fn set_byte_range(&mut self, range: ops::Range<usize>) {
unsafe {
@ -2164,7 +2187,7 @@ impl<'a, 'tree, T: TextProvider<'a>> QueryMatches<'a, 'tree, T> {
}
}
impl<'a, 'tree, T: TextProvider<'a>> QueryCaptures<'a, 'tree, T> {
impl<'a, 'tree, T: TextProvider<I>, I: AsRef<[u8]>> QueryCaptures<'a, 'tree, T, I> {
#[doc(alias = "ts_query_cursor_set_byte_range")]
pub fn set_byte_range(&mut self, range: ops::Range<usize>) {
unsafe {
@ -2190,19 +2213,20 @@ impl<'cursor, 'tree> fmt::Debug for QueryMatch<'cursor, 'tree> {
}
}
impl<'a, F, I> TextProvider<'a> for F
impl<F, R, I> TextProvider<I> for F
where
F: FnMut(Node) -> I,
I: Iterator<Item = &'a [u8]> + 'a,
F: FnMut(Node) -> R,
R: Iterator<Item = I>,
I: AsRef<[u8]>,
{
type I = I;
type I = R;
fn text(&mut self, node: Node) -> Self::I {
(self)(node)
}
}
impl<'a> TextProvider<'a> for &'a [u8] {
impl<'a> TextProvider<&'a [u8]> for &'a [u8] {
type I = iter::Once<&'a [u8]>;
fn text(&mut self, node: Node) -> Self::I {