Merge pull request #2431 from tree-sitter/text-provider1-fin
Improve TextProvider implementation to allow owned text
This commit is contained in:
commit
3672463df1
4 changed files with 241 additions and 43 deletions
|
|
@ -9,4 +9,5 @@ mod query_test;
|
|||
mod tags_test;
|
||||
mod test_highlight_test;
|
||||
mod test_tags_test;
|
||||
mod text_provider_test;
|
||||
mod tree_test;
|
||||
|
|
|
|||
173
cli/src/tests/text_provider_test.rs
Normal file
173
cli/src/tests/text_provider_test.rs
Normal file
|
|
@ -0,0 +1,173 @@
|
|||
use std::{iter, sync::Arc};
|
||||
|
||||
use crate::tests::helpers::fixtures::get_language;
|
||||
use tree_sitter::{Language, Node, Parser, Point, Query, QueryCursor, TextProvider, Tree};
|
||||
|
||||
fn parse_text(text: impl AsRef<[u8]>) -> (Tree, Language) {
|
||||
let language = get_language("c");
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(language).unwrap();
|
||||
(parser.parse(text, None).unwrap(), language)
|
||||
}
|
||||
|
||||
fn parse_text_with<T, F>(callback: &mut F) -> (Tree, Language)
|
||||
where
|
||||
T: AsRef<[u8]>,
|
||||
F: FnMut(usize, Point) -> T,
|
||||
{
|
||||
let language = get_language("c");
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(language).unwrap();
|
||||
let tree = parser.parse_with(callback, None).unwrap();
|
||||
// eprintln!("{}", tree.clone().root_node().to_sexp());
|
||||
assert_eq!("comment", tree.clone().root_node().child(0).unwrap().kind());
|
||||
(tree, language)
|
||||
}
|
||||
|
||||
fn tree_query<I: AsRef<[u8]>>(tree: &Tree, text: impl TextProvider<I>, language: Language) {
|
||||
let query = Query::new(language, "((comment) @c (#eq? @c \"// comment\"))").unwrap();
|
||||
let mut cursor = QueryCursor::new();
|
||||
let mut captures = cursor.captures(&query, tree.root_node(), text);
|
||||
let (match_, idx) = captures.next().unwrap();
|
||||
let capture = match_.captures[idx];
|
||||
assert_eq!(capture.index as usize, idx);
|
||||
assert_eq!("comment", capture.node.kind());
|
||||
}
|
||||
|
||||
fn check_parsing<I: AsRef<[u8]>>(
|
||||
parser_text: impl AsRef<[u8]>,
|
||||
text_provider: impl TextProvider<I>,
|
||||
) {
|
||||
let (tree, language) = parse_text(parser_text);
|
||||
tree_query(&tree, text_provider, language);
|
||||
}
|
||||
|
||||
fn check_parsing_callback<T, F, I: AsRef<[u8]>>(
|
||||
parser_callback: &mut F,
|
||||
text_provider: impl TextProvider<I>,
|
||||
) where
|
||||
T: AsRef<[u8]>,
|
||||
F: FnMut(usize, Point) -> T,
|
||||
{
|
||||
let (tree, language) = parse_text_with(parser_callback);
|
||||
tree_query(&tree, text_provider, language);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_text_provider_for_str_slice() {
|
||||
let text: &str = "// comment";
|
||||
|
||||
check_parsing(text, text.as_bytes());
|
||||
check_parsing(text.as_bytes(), text.as_bytes());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_text_provider_for_string() {
|
||||
let text: String = "// comment".to_owned();
|
||||
|
||||
check_parsing(text.clone(), text.as_bytes());
|
||||
check_parsing(text.as_bytes(), text.as_bytes());
|
||||
check_parsing(<_ as AsRef<[u8]>>::as_ref(&text), text.as_bytes());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_text_provider_for_box_of_str_slice() {
|
||||
let text: Box<str> = "// comment".to_owned().into_boxed_str();
|
||||
|
||||
check_parsing(text.as_bytes(), text.as_bytes());
|
||||
check_parsing(<_ as AsRef<str>>::as_ref(&text), text.as_bytes());
|
||||
check_parsing(text.as_ref(), text.as_ref().as_bytes());
|
||||
check_parsing(text.as_ref(), text.as_bytes());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_text_provider_for_box_of_bytes_slice() {
|
||||
let text: Box<[u8]> = "// comment".to_owned().into_boxed_str().into_boxed_bytes();
|
||||
|
||||
check_parsing(text.as_ref(), text.as_ref());
|
||||
check_parsing(text.as_ref(), &*text);
|
||||
check_parsing(&*text, &*text);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_text_provider_for_vec_of_bytes() {
|
||||
let text: Vec<u8> = "// comment".to_owned().into_bytes();
|
||||
|
||||
check_parsing(&*text, &*text);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_text_provider_for_arc_of_bytes_slice() {
|
||||
let text: Vec<u8> = "// comment".to_owned().into_bytes();
|
||||
let text: Arc<[u8]> = Arc::from(text);
|
||||
|
||||
check_parsing(&*text, &*text);
|
||||
check_parsing(text.as_ref(), text.as_ref());
|
||||
check_parsing(text.clone(), text.as_ref());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_text_provider_callback_with_str_slice() {
|
||||
let text: &str = "// comment";
|
||||
|
||||
check_parsing(text, |_node: Node<'_>| iter::once(text));
|
||||
check_parsing_callback(
|
||||
&mut |offset, _point| {
|
||||
(offset < text.len())
|
||||
.then(|| text.as_bytes())
|
||||
.unwrap_or_default()
|
||||
},
|
||||
|_node: Node<'_>| iter::once(text),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_text_provider_callback_with_owned_string_slice() {
|
||||
let text: &str = "// comment";
|
||||
|
||||
check_parsing_callback(
|
||||
&mut |offset, _point| {
|
||||
(offset < text.len())
|
||||
.then(|| text.as_bytes())
|
||||
.unwrap_or_default()
|
||||
},
|
||||
|_node: Node<'_>| {
|
||||
let slice: String = text.to_owned();
|
||||
iter::once(slice)
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_text_provider_callback_with_owned_bytes_vec_slice() {
|
||||
let text: &str = "// comment";
|
||||
|
||||
check_parsing_callback(
|
||||
&mut |offset, _point| {
|
||||
(offset < text.len())
|
||||
.then(|| text.as_bytes())
|
||||
.unwrap_or_default()
|
||||
},
|
||||
|_node: Node<'_>| {
|
||||
let slice: Vec<u8> = text.to_owned().into_bytes();
|
||||
iter::once(slice)
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_text_provider_callback_with_owned_arc_of_bytes_slice() {
|
||||
let text: &str = "// comment";
|
||||
|
||||
check_parsing_callback(
|
||||
&mut |offset, _point| {
|
||||
(offset < text.len())
|
||||
.then(|| text.as_bytes())
|
||||
.unwrap_or_default()
|
||||
},
|
||||
|_node: Node<'_>| {
|
||||
let slice: Arc<[u8]> = text.to_owned().into_bytes().into();
|
||||
iter::once(slice)
|
||||
},
|
||||
);
|
||||
}
|
||||
|
|
@ -165,7 +165,7 @@ where
|
|||
struct HighlightIterLayer<'a> {
|
||||
_tree: Tree,
|
||||
cursor: QueryCursor,
|
||||
captures: iter::Peekable<QueryCaptures<'a, 'a, &'a [u8]>>,
|
||||
captures: iter::Peekable<QueryCaptures<'a, 'a, &'a [u8], &'a [u8]>>,
|
||||
config: &'a HighlightConfiguration,
|
||||
highlight_end_stack: Vec<usize>,
|
||||
scope_stack: Vec<LocalScope<'a>>,
|
||||
|
|
|
|||
|
|
@ -175,27 +175,30 @@ pub struct QueryMatch<'cursor, 'tree> {
|
|||
}
|
||||
|
||||
/// A sequence of `QueryMatch`es associated with a given `QueryCursor`.
|
||||
pub struct QueryMatches<'a, 'tree: 'a, T: TextProvider<'a>> {
|
||||
pub struct QueryMatches<'a, 'tree: 'a, T: TextProvider<I>, I: AsRef<[u8]>> {
|
||||
ptr: *mut ffi::TSQueryCursor,
|
||||
query: &'a Query,
|
||||
text_provider: T,
|
||||
buffer1: Vec<u8>,
|
||||
buffer2: Vec<u8>,
|
||||
_tree: PhantomData<&'tree ()>,
|
||||
_phantom: PhantomData<(&'tree (), I)>,
|
||||
}
|
||||
|
||||
/// A sequence of `QueryCapture`s associated with a given `QueryCursor`.
|
||||
pub struct QueryCaptures<'a, 'tree: 'a, T: TextProvider<'a>> {
|
||||
pub struct QueryCaptures<'a, 'tree: 'a, T: TextProvider<I>, I: AsRef<[u8]>> {
|
||||
ptr: *mut ffi::TSQueryCursor,
|
||||
query: &'a Query,
|
||||
text_provider: T,
|
||||
buffer1: Vec<u8>,
|
||||
buffer2: Vec<u8>,
|
||||
_tree: PhantomData<&'tree ()>,
|
||||
_phantom: PhantomData<(&'tree (), I)>,
|
||||
}
|
||||
|
||||
pub trait TextProvider<'a> {
|
||||
type I: Iterator<Item = &'a [u8]> + 'a;
|
||||
pub trait TextProvider<I>
|
||||
where
|
||||
I: AsRef<[u8]>,
|
||||
{
|
||||
type I: Iterator<Item = I>;
|
||||
fn text(&mut self, node: Node) -> Self::I;
|
||||
}
|
||||
|
||||
|
|
@ -1900,12 +1903,12 @@ impl QueryCursor {
|
|||
/// Because multiple patterns can match the same set of nodes, one match may contain
|
||||
/// captures that appear *before* some of the captures from a previous match.
|
||||
#[doc(alias = "ts_query_cursor_exec")]
|
||||
pub fn matches<'a, 'tree: 'a, T: TextProvider<'a> + 'a>(
|
||||
pub fn matches<'a, 'tree: 'a, T: TextProvider<I>, I: AsRef<[u8]>>(
|
||||
&'a mut self,
|
||||
query: &'a Query,
|
||||
node: Node<'tree>,
|
||||
text_provider: T,
|
||||
) -> QueryMatches<'a, 'tree, T> {
|
||||
) -> QueryMatches<'a, 'tree, T, I> {
|
||||
let ptr = self.ptr.as_ptr();
|
||||
unsafe { ffi::ts_query_cursor_exec(ptr, query.ptr.as_ptr(), node.0) };
|
||||
QueryMatches {
|
||||
|
|
@ -1914,7 +1917,7 @@ impl QueryCursor {
|
|||
text_provider,
|
||||
buffer1: Default::default(),
|
||||
buffer2: Default::default(),
|
||||
_tree: PhantomData,
|
||||
_phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1923,12 +1926,12 @@ impl QueryCursor {
|
|||
/// This is useful if you don't care about which pattern matched, and just want a single,
|
||||
/// ordered sequence of captures.
|
||||
#[doc(alias = "ts_query_cursor_exec")]
|
||||
pub fn captures<'a, 'tree: 'a, T: TextProvider<'a> + 'a>(
|
||||
pub fn captures<'a, 'tree: 'a, T: TextProvider<I>, I: AsRef<[u8]>>(
|
||||
&'a mut self,
|
||||
query: &'a Query,
|
||||
node: Node<'tree>,
|
||||
text_provider: T,
|
||||
) -> QueryCaptures<'a, 'tree, T> {
|
||||
) -> QueryCaptures<'a, 'tree, T, I> {
|
||||
let ptr = self.ptr.as_ptr();
|
||||
unsafe { ffi::ts_query_cursor_exec(self.ptr.as_ptr(), query.ptr.as_ptr(), node.0) };
|
||||
QueryCaptures {
|
||||
|
|
@ -1937,7 +1940,7 @@ impl QueryCursor {
|
|||
text_provider,
|
||||
buffer1: Default::default(),
|
||||
buffer2: Default::default(),
|
||||
_tree: PhantomData,
|
||||
_phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -2017,31 +2020,47 @@ impl<'a, 'tree> QueryMatch<'a, 'tree> {
|
|||
}
|
||||
}
|
||||
|
||||
fn satisfies_text_predicates(
|
||||
fn satisfies_text_predicates<I: AsRef<[u8]>>(
|
||||
&self,
|
||||
query: &Query,
|
||||
buffer1: &mut Vec<u8>,
|
||||
buffer2: &mut Vec<u8>,
|
||||
text_provider: &mut impl TextProvider<'a>,
|
||||
buffer1: &'a mut Vec<u8>,
|
||||
buffer2: &'a mut Vec<u8>,
|
||||
text_provider: &mut impl TextProvider<I>,
|
||||
) -> bool {
|
||||
fn get_text<'a, 'b: 'a, I: Iterator<Item = &'b [u8]>>(
|
||||
struct NodeText<'a, T> {
|
||||
buffer: &'a mut Vec<u8>,
|
||||
mut chunks: I,
|
||||
) -> &'a [u8] {
|
||||
let first_chunk = chunks.next().unwrap_or(&[]);
|
||||
if let Some(next_chunk) = chunks.next() {
|
||||
buffer.clear();
|
||||
buffer.extend_from_slice(first_chunk);
|
||||
buffer.extend_from_slice(next_chunk);
|
||||
for chunk in chunks {
|
||||
buffer.extend_from_slice(chunk);
|
||||
first_chunk: Option<T>,
|
||||
}
|
||||
impl<'a, T: AsRef<[u8]>> NodeText<'a, T> {
|
||||
fn new(buffer: &'a mut Vec<u8>) -> Self {
|
||||
Self {
|
||||
buffer,
|
||||
first_chunk: None,
|
||||
}
|
||||
}
|
||||
|
||||
fn get_text(&mut self, chunks: &mut impl Iterator<Item = T>) -> &[u8] {
|
||||
self.first_chunk = chunks.next();
|
||||
if let Some(next_chunk) = chunks.next() {
|
||||
self.buffer.clear();
|
||||
self.buffer
|
||||
.extend_from_slice(self.first_chunk.as_ref().unwrap().as_ref());
|
||||
self.buffer.extend_from_slice(next_chunk.as_ref());
|
||||
for chunk in chunks {
|
||||
self.buffer.extend_from_slice(chunk.as_ref());
|
||||
}
|
||||
self.buffer.as_slice()
|
||||
} else if let Some(ref first_chunk) = self.first_chunk {
|
||||
first_chunk.as_ref()
|
||||
} else {
|
||||
&[]
|
||||
}
|
||||
buffer.as_slice()
|
||||
} else {
|
||||
first_chunk
|
||||
}
|
||||
}
|
||||
|
||||
let mut node_text1 = NodeText::new(buffer1);
|
||||
let mut node_text2 = NodeText::new(buffer2);
|
||||
|
||||
query.text_predicates[self.pattern_index]
|
||||
.iter()
|
||||
.all(|predicate| match predicate {
|
||||
|
|
@ -2050,8 +2069,10 @@ impl<'a, 'tree> QueryMatch<'a, 'tree> {
|
|||
let node2 = self.nodes_for_capture_index(*j).next();
|
||||
match (node1, node2) {
|
||||
(Some(node1), Some(node2)) => {
|
||||
let text1 = get_text(buffer1, text_provider.text(node1));
|
||||
let text2 = get_text(buffer2, text_provider.text(node2));
|
||||
let mut text1 = text_provider.text(node1);
|
||||
let mut text2 = text_provider.text(node2);
|
||||
let text1 = node_text1.get_text(&mut text1);
|
||||
let text2 = node_text2.get_text(&mut text2);
|
||||
(text1 == text2) == *is_positive
|
||||
}
|
||||
_ => true,
|
||||
|
|
@ -2061,7 +2082,8 @@ impl<'a, 'tree> QueryMatch<'a, 'tree> {
|
|||
let node = self.nodes_for_capture_index(*i).next();
|
||||
match node {
|
||||
Some(node) => {
|
||||
let text = get_text(buffer1, text_provider.text(node));
|
||||
let mut text = text_provider.text(node);
|
||||
let text = node_text1.get_text(&mut text);
|
||||
(text == s.as_bytes()) == *is_positive
|
||||
}
|
||||
None => true,
|
||||
|
|
@ -2071,7 +2093,8 @@ impl<'a, 'tree> QueryMatch<'a, 'tree> {
|
|||
let node = self.nodes_for_capture_index(*i).next();
|
||||
match node {
|
||||
Some(node) => {
|
||||
let text = get_text(buffer1, text_provider.text(node));
|
||||
let mut text = text_provider.text(node);
|
||||
let text = node_text1.get_text(&mut text);
|
||||
r.is_match(text) == *is_positive
|
||||
}
|
||||
None => true,
|
||||
|
|
@ -2091,7 +2114,7 @@ impl QueryProperty {
|
|||
}
|
||||
}
|
||||
|
||||
impl<'a, 'tree, T: TextProvider<'a>> Iterator for QueryMatches<'a, 'tree, T> {
|
||||
impl<'a, 'tree, T: TextProvider<I>, I: AsRef<[u8]>> Iterator for QueryMatches<'a, 'tree, T, I> {
|
||||
type Item = QueryMatch<'a, 'tree>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
|
|
@ -2116,7 +2139,7 @@ impl<'a, 'tree, T: TextProvider<'a>> Iterator for QueryMatches<'a, 'tree, T> {
|
|||
}
|
||||
}
|
||||
|
||||
impl<'a, 'tree, T: TextProvider<'a>> Iterator for QueryCaptures<'a, 'tree, T> {
|
||||
impl<'a, 'tree, T: TextProvider<I>, I: AsRef<[u8]>> Iterator for QueryCaptures<'a, 'tree, T, I> {
|
||||
type Item = (QueryMatch<'a, 'tree>, usize);
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
|
|
@ -2148,7 +2171,7 @@ impl<'a, 'tree, T: TextProvider<'a>> Iterator for QueryCaptures<'a, 'tree, T> {
|
|||
}
|
||||
}
|
||||
|
||||
impl<'a, 'tree, T: TextProvider<'a>> QueryMatches<'a, 'tree, T> {
|
||||
impl<'a, 'tree, T: TextProvider<I>, I: AsRef<[u8]>> QueryMatches<'a, 'tree, T, I> {
|
||||
#[doc(alias = "ts_query_cursor_set_byte_range")]
|
||||
pub fn set_byte_range(&mut self, range: ops::Range<usize>) {
|
||||
unsafe {
|
||||
|
|
@ -2164,7 +2187,7 @@ impl<'a, 'tree, T: TextProvider<'a>> QueryMatches<'a, 'tree, T> {
|
|||
}
|
||||
}
|
||||
|
||||
impl<'a, 'tree, T: TextProvider<'a>> QueryCaptures<'a, 'tree, T> {
|
||||
impl<'a, 'tree, T: TextProvider<I>, I: AsRef<[u8]>> QueryCaptures<'a, 'tree, T, I> {
|
||||
#[doc(alias = "ts_query_cursor_set_byte_range")]
|
||||
pub fn set_byte_range(&mut self, range: ops::Range<usize>) {
|
||||
unsafe {
|
||||
|
|
@ -2190,19 +2213,20 @@ impl<'cursor, 'tree> fmt::Debug for QueryMatch<'cursor, 'tree> {
|
|||
}
|
||||
}
|
||||
|
||||
impl<'a, F, I> TextProvider<'a> for F
|
||||
impl<F, R, I> TextProvider<I> for F
|
||||
where
|
||||
F: FnMut(Node) -> I,
|
||||
I: Iterator<Item = &'a [u8]> + 'a,
|
||||
F: FnMut(Node) -> R,
|
||||
R: Iterator<Item = I>,
|
||||
I: AsRef<[u8]>,
|
||||
{
|
||||
type I = I;
|
||||
type I = R;
|
||||
|
||||
fn text(&mut self, node: Node) -> Self::I {
|
||||
(self)(node)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> TextProvider<'a> for &'a [u8] {
|
||||
impl<'a> TextProvider<&'a [u8]> for &'a [u8] {
|
||||
type I = iter::Once<&'a [u8]>;
|
||||
|
||||
fn text(&mut self, node: Node) -> Self::I {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue