binding_rust: Generalize the interface to callback-based parse methods
Fixes #386
This commit is contained in:
parent
09b46b87dd
commit
b3ab2e07a2
2 changed files with 107 additions and 25 deletions
|
|
@ -160,6 +160,24 @@ fn test_parsing_with_custom_utf16_input() {
|
|||
assert_eq!(root.child(0).unwrap().kind(), "function_item");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parsing_with_callback_returning_owned_strings() {
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(get_language("rust")).unwrap();
|
||||
|
||||
let text = b"pub fn foo() { 1 }";
|
||||
|
||||
let tree = parser
|
||||
.parse_with(
|
||||
&mut |i, _| String::from_utf8(text[i..].to_vec()).unwrap(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let root = tree.root_node();
|
||||
assert_eq!(root.to_sexp(), "(source_file (function_item (visibility_modifier) (identifier) (parameters) (block (integer_literal))))");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parsing_text_with_byte_order_mark() {
|
||||
let mut parser = Parser::new();
|
||||
|
|
@ -380,11 +398,11 @@ fn test_parsing_cancelled_by_another_thread() {
|
|||
let tree = parser.parse_with(
|
||||
&mut |offset, _| {
|
||||
if offset == 0 {
|
||||
b" ["
|
||||
" [".as_bytes()
|
||||
} else if offset >= 20000 {
|
||||
b""
|
||||
"".as_bytes()
|
||||
} else {
|
||||
b"0,"
|
||||
"0,".as_bytes()
|
||||
}
|
||||
},
|
||||
None,
|
||||
|
|
@ -461,11 +479,11 @@ fn test_parsing_with_a_timeout() {
|
|||
.parse_with(
|
||||
&mut |offset, _| {
|
||||
if offset > 5000 {
|
||||
b""
|
||||
"".as_bytes()
|
||||
} else if offset == 5000 {
|
||||
b"]"
|
||||
"]".as_bytes()
|
||||
} else {
|
||||
b",0"
|
||||
",0".as_bytes()
|
||||
}
|
||||
},
|
||||
None,
|
||||
|
|
|
|||
|
|
@ -213,7 +213,9 @@ impl Parser {
|
|||
{
|
||||
Err(LanguageError { version })
|
||||
} else {
|
||||
unsafe { ffi::ts_parser_set_language(self.0, language.0); }
|
||||
unsafe {
|
||||
ffi::ts_parser_set_language(self.0, language.0);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
|
@ -284,8 +286,21 @@ impl Parser {
|
|||
unsafe { ffi::ts_parser_print_dot_graphs(self.0, -1) }
|
||||
}
|
||||
|
||||
pub fn parse(&mut self, input: impl AsRef<[u8]>, old_tree: Option<&Tree>) -> Option<Tree> {
|
||||
let bytes = input.as_ref();
|
||||
/// Parse a slice of UTF8 text.
|
||||
///
|
||||
/// # Arguments:
|
||||
/// * `text` The UTF8-encoded text to parse.
|
||||
/// * `old_tree` A previous syntax tree parsed from the same document.
|
||||
/// If the text of the document has changed since `old_tree` was
|
||||
/// created, then you must edit `old_tree` to match the new text using
|
||||
/// [Tree::edit].
|
||||
///
|
||||
/// Returns a [Tree] if parsing succeeded, or `None` if:
|
||||
/// * The parser has not yet had a language assigned with [Parser::set_language]
|
||||
/// * The timeout set with [Parser::set_timeout_micros] expired
|
||||
/// * The cancellation flag set with [Parser::set_cancellation_flag] was flipped
|
||||
pub fn parse(&mut self, text: impl AsRef<[u8]>, old_tree: Option<&Tree>) -> Option<Tree> {
|
||||
let bytes = text.as_ref();
|
||||
let len = bytes.len();
|
||||
self.parse_with(
|
||||
&mut |i, _| if i < len { &bytes[i..] } else { &[] },
|
||||
|
|
@ -293,6 +308,14 @@ impl Parser {
|
|||
)
|
||||
}
|
||||
|
||||
/// Parse a slice UTF16 text.
|
||||
///
|
||||
/// # Arguments:
|
||||
/// * `text` The UTF16-encoded text to parse.
|
||||
/// * `old_tree` A previous syntax tree parsed from the same document.
|
||||
/// If the text of the document has changed since `old_tree` was
|
||||
/// created, then you must edit `old_tree` to match the new text using
|
||||
/// [Tree::edit].
|
||||
pub fn parse_utf16(
|
||||
&mut self,
|
||||
input: impl AsRef<[u16]>,
|
||||
|
|
@ -306,26 +329,46 @@ impl Parser {
|
|||
)
|
||||
}
|
||||
|
||||
pub fn parse_with<'a, T: FnMut(usize, Point) -> &'a [u8]>(
|
||||
/// Parse UTF8 text provided in chunks by a callback.
|
||||
///
|
||||
/// # Arguments:
|
||||
/// * `callback` A function that takes a byte offset and position and
|
||||
/// returns a slice of UTF8-encoded text starting at that byte offset
|
||||
/// and position. The slices can be of any length. If the given position
|
||||
/// is at the end of the text, the callback should return an empty slice.
|
||||
/// * `old_tree` A previous syntax tree parsed from the same document.
|
||||
/// If the text of the document has changed since `old_tree` was
|
||||
/// created, then you must edit `old_tree` to match the new text using
|
||||
/// [Tree::edit].
|
||||
pub fn parse_with<'a, T: AsRef<[u8]>, F: FnMut(usize, Point) -> T>(
|
||||
&mut self,
|
||||
input: &mut T,
|
||||
callback: &mut F,
|
||||
old_tree: Option<&Tree>,
|
||||
) -> Option<Tree> {
|
||||
unsafe extern "C" fn read<'a, T: FnMut(usize, Point) -> &'a [u8]>(
|
||||
// A pointer to this payload is passed on every call to the `read` C function.
|
||||
// The payload contains two things:
|
||||
// 1. A reference to the rust `callback`.
|
||||
// 2. The text that was returned from the previous call to `callback`.
|
||||
// This allows the callback to return owned values like vectors.
|
||||
let mut payload: (&mut F, Option<T>) = (callback, None);
|
||||
|
||||
// This C function is passed to Tree-sitter as the input callback.
|
||||
unsafe extern "C" fn read<'a, T: AsRef<[u8]>, F: FnMut(usize, Point) -> T>(
|
||||
payload: *mut c_void,
|
||||
byte_offset: u32,
|
||||
position: ffi::TSPoint,
|
||||
bytes_read: *mut u32,
|
||||
) -> *const c_char {
|
||||
let input = (payload as *mut T).as_mut().unwrap();
|
||||
let slice = input(byte_offset as usize, position.into());
|
||||
let (callback, text) = (payload as *mut (&mut F, Option<T>)).as_mut().unwrap();
|
||||
*text = Some(callback(byte_offset as usize, position.into()));
|
||||
let slice = text.as_ref().unwrap().as_ref();
|
||||
*bytes_read = slice.len() as u32;
|
||||
return slice.as_ptr() as *const c_char;
|
||||
};
|
||||
|
||||
let c_input = ffi::TSInput {
|
||||
payload: input as *mut T as *mut c_void,
|
||||
read: Some(read::<T>),
|
||||
payload: &mut payload as *mut (&mut F, Option<T>) as *mut c_void,
|
||||
read: Some(read::<T, F>),
|
||||
encoding: ffi::TSInputEncoding_TSInputEncodingUTF8,
|
||||
};
|
||||
|
||||
|
|
@ -338,32 +381,52 @@ impl Parser {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn parse_utf16_with<'a, T: 'a + FnMut(usize, Point) -> &'a [u16]>(
|
||||
/// Parse UTF16 text provided in chunks by a callback.
|
||||
///
|
||||
/// # Arguments:
|
||||
/// * `callback` A function that takes a code point offset and position and
|
||||
/// returns a slice of UTF16-encoded text starting at that byte offset
|
||||
/// and position. The slices can be of any length. If the given position
|
||||
/// is at the end of the text, the callback should return an empty slice.
|
||||
/// * `old_tree` A previous syntax tree parsed from the same document.
|
||||
/// If the text of the document has changed since `old_tree` was
|
||||
/// created, then you must edit `old_tree` to match the new text using
|
||||
/// [Tree::edit].
|
||||
pub fn parse_utf16_with<'a, T: AsRef<[u16]>, F: FnMut(usize, Point) -> T>(
|
||||
&mut self,
|
||||
input: &mut T,
|
||||
callback: &mut F,
|
||||
old_tree: Option<&Tree>,
|
||||
) -> Option<Tree> {
|
||||
unsafe extern "C" fn read<'a, T: FnMut(usize, Point) -> &'a [u16]>(
|
||||
// A pointer to this payload is passed on every call to the `read` C function.
|
||||
// The payload contains two things:
|
||||
// 1. A reference to the rust `callback`.
|
||||
// 2. The text that was returned from the previous call to `callback`.
|
||||
// This allows the callback to return owned values like vectors.
|
||||
let mut payload: (&mut F, Option<T>) = (callback, None);
|
||||
|
||||
// This C function is passed to Tree-sitter as the input callback.
|
||||
unsafe extern "C" fn read<'a, T: AsRef<[u16]>, F: FnMut(usize, Point) -> T>(
|
||||
payload: *mut c_void,
|
||||
byte_offset: u32,
|
||||
position: ffi::TSPoint,
|
||||
bytes_read: *mut u32,
|
||||
) -> *const c_char {
|
||||
let input = (payload as *mut T).as_mut().unwrap();
|
||||
let slice = input(
|
||||
let (callback, text) = (payload as *mut (&mut F, Option<T>)).as_mut().unwrap();
|
||||
*text = Some(callback(
|
||||
(byte_offset / 2) as usize,
|
||||
Point {
|
||||
row: position.row as usize,
|
||||
column: position.column as usize / 2,
|
||||
},
|
||||
);
|
||||
));
|
||||
let slice = text.as_ref().unwrap().as_ref();
|
||||
*bytes_read = slice.len() as u32 * 2;
|
||||
slice.as_ptr() as *const c_char
|
||||
};
|
||||
|
||||
let c_input = ffi::TSInput {
|
||||
payload: input as *mut T as *mut c_void,
|
||||
read: Some(read::<T>),
|
||||
payload: &mut payload as *mut (&mut F, Option<T>) as *mut c_void,
|
||||
read: Some(read::<T, F>),
|
||||
encoding: ffi::TSInputEncoding_TSInputEncodingUTF16,
|
||||
};
|
||||
|
||||
|
|
@ -451,7 +514,8 @@ impl Tree {
|
|||
pub fn changed_ranges(&self, other: &Tree) -> impl ExactSizeIterator<Item = Range> {
|
||||
let mut count = 0;
|
||||
unsafe {
|
||||
let ptr = ffi::ts_tree_get_changed_ranges(self.0, other.0, &mut count as *mut _ as *mut u32);
|
||||
let ptr =
|
||||
ffi::ts_tree_get_changed_ranges(self.0, other.0, &mut count as *mut _ as *mut u32);
|
||||
util::CBufferIter::new(ptr, count).map(|r| r.into())
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue