binding_rust: Generalize the interface to callback-based parse methods

Fixes #386
This commit is contained in:
Max Brunsfeld 2019-08-29 10:30:57 -07:00
parent 09b46b87dd
commit b3ab2e07a2
2 changed files with 107 additions and 25 deletions

View file

@ -160,6 +160,24 @@ fn test_parsing_with_custom_utf16_input() {
assert_eq!(root.child(0).unwrap().kind(), "function_item");
}
#[test]
fn test_parsing_with_callback_returning_owned_strings() {
let mut parser = Parser::new();
parser.set_language(get_language("rust")).unwrap();
let text = b"pub fn foo() { 1 }";
let tree = parser
.parse_with(
&mut |i, _| String::from_utf8(text[i..].to_vec()).unwrap(),
None,
)
.unwrap();
let root = tree.root_node();
assert_eq!(root.to_sexp(), "(source_file (function_item (visibility_modifier) (identifier) (parameters) (block (integer_literal))))");
}
#[test]
fn test_parsing_text_with_byte_order_mark() {
let mut parser = Parser::new();
@ -380,11 +398,11 @@ fn test_parsing_cancelled_by_another_thread() {
let tree = parser.parse_with(
&mut |offset, _| {
if offset == 0 {
b" ["
" [".as_bytes()
} else if offset >= 20000 {
b""
"".as_bytes()
} else {
b"0,"
"0,".as_bytes()
}
},
None,
@ -461,11 +479,11 @@ fn test_parsing_with_a_timeout() {
.parse_with(
&mut |offset, _| {
if offset > 5000 {
b""
"".as_bytes()
} else if offset == 5000 {
b"]"
"]".as_bytes()
} else {
b",0"
",0".as_bytes()
}
},
None,

View file

@ -213,7 +213,9 @@ impl Parser {
{
Err(LanguageError { version })
} else {
unsafe { ffi::ts_parser_set_language(self.0, language.0); }
unsafe {
ffi::ts_parser_set_language(self.0, language.0);
}
Ok(())
}
}
@ -284,8 +286,21 @@ impl Parser {
unsafe { ffi::ts_parser_print_dot_graphs(self.0, -1) }
}
pub fn parse(&mut self, input: impl AsRef<[u8]>, old_tree: Option<&Tree>) -> Option<Tree> {
let bytes = input.as_ref();
/// Parse a slice of UTF8 text.
///
/// # Arguments:
/// * `text` The UTF8-encoded text to parse.
/// * `old_tree` A previous syntax tree parsed from the same document.
/// If the text of the document has changed since `old_tree` was
/// created, then you must edit `old_tree` to match the new text using
/// [Tree::edit].
///
/// Returns a [Tree] if parsing succeeded, or `None` if:
/// * The parser has not yet had a language assigned with [Parser::set_language]
/// * The timeout set with [Parser::set_timeout_micros] expired
/// * The cancellation flag set with [Parser::set_cancellation_flag] was flipped
pub fn parse(&mut self, text: impl AsRef<[u8]>, old_tree: Option<&Tree>) -> Option<Tree> {
let bytes = text.as_ref();
let len = bytes.len();
self.parse_with(
&mut |i, _| if i < len { &bytes[i..] } else { &[] },
@ -293,6 +308,14 @@ impl Parser {
)
}
/// Parse a slice UTF16 text.
///
/// # Arguments:
/// * `text` The UTF16-encoded text to parse.
/// * `old_tree` A previous syntax tree parsed from the same document.
/// If the text of the document has changed since `old_tree` was
/// created, then you must edit `old_tree` to match the new text using
/// [Tree::edit].
pub fn parse_utf16(
&mut self,
input: impl AsRef<[u16]>,
@ -306,26 +329,46 @@ impl Parser {
)
}
pub fn parse_with<'a, T: FnMut(usize, Point) -> &'a [u8]>(
/// Parse UTF8 text provided in chunks by a callback.
///
/// # Arguments:
/// * `callback` A function that takes a byte offset and position and
/// returns a slice of UTF8-encoded text starting at that byte offset
/// and position. The slices can be of any length. If the given position
/// is at the end of the text, the callback should return an empty slice.
/// * `old_tree` A previous syntax tree parsed from the same document.
/// If the text of the document has changed since `old_tree` was
/// created, then you must edit `old_tree` to match the new text using
/// [Tree::edit].
pub fn parse_with<'a, T: AsRef<[u8]>, F: FnMut(usize, Point) -> T>(
&mut self,
input: &mut T,
callback: &mut F,
old_tree: Option<&Tree>,
) -> Option<Tree> {
unsafe extern "C" fn read<'a, T: FnMut(usize, Point) -> &'a [u8]>(
// A pointer to this payload is passed on every call to the `read` C function.
// The payload contains two things:
// 1. A reference to the rust `callback`.
// 2. The text that was returned from the previous call to `callback`.
// This allows the callback to return owned values like vectors.
let mut payload: (&mut F, Option<T>) = (callback, None);
// This C function is passed to Tree-sitter as the input callback.
unsafe extern "C" fn read<'a, T: AsRef<[u8]>, F: FnMut(usize, Point) -> T>(
payload: *mut c_void,
byte_offset: u32,
position: ffi::TSPoint,
bytes_read: *mut u32,
) -> *const c_char {
let input = (payload as *mut T).as_mut().unwrap();
let slice = input(byte_offset as usize, position.into());
let (callback, text) = (payload as *mut (&mut F, Option<T>)).as_mut().unwrap();
*text = Some(callback(byte_offset as usize, position.into()));
let slice = text.as_ref().unwrap().as_ref();
*bytes_read = slice.len() as u32;
return slice.as_ptr() as *const c_char;
};
let c_input = ffi::TSInput {
payload: input as *mut T as *mut c_void,
read: Some(read::<T>),
payload: &mut payload as *mut (&mut F, Option<T>) as *mut c_void,
read: Some(read::<T, F>),
encoding: ffi::TSInputEncoding_TSInputEncodingUTF8,
};
@ -338,32 +381,52 @@ impl Parser {
}
}
pub fn parse_utf16_with<'a, T: 'a + FnMut(usize, Point) -> &'a [u16]>(
/// Parse UTF16 text provided in chunks by a callback.
///
/// # Arguments:
/// * `callback` A function that takes a code point offset and position and
/// returns a slice of UTF16-encoded text starting at that byte offset
/// and position. The slices can be of any length. If the given position
/// is at the end of the text, the callback should return an empty slice.
/// * `old_tree` A previous syntax tree parsed from the same document.
/// If the text of the document has changed since `old_tree` was
/// created, then you must edit `old_tree` to match the new text using
/// [Tree::edit].
pub fn parse_utf16_with<'a, T: AsRef<[u16]>, F: FnMut(usize, Point) -> T>(
&mut self,
input: &mut T,
callback: &mut F,
old_tree: Option<&Tree>,
) -> Option<Tree> {
unsafe extern "C" fn read<'a, T: FnMut(usize, Point) -> &'a [u16]>(
// A pointer to this payload is passed on every call to the `read` C function.
// The payload contains two things:
// 1. A reference to the rust `callback`.
// 2. The text that was returned from the previous call to `callback`.
// This allows the callback to return owned values like vectors.
let mut payload: (&mut F, Option<T>) = (callback, None);
// This C function is passed to Tree-sitter as the input callback.
unsafe extern "C" fn read<'a, T: AsRef<[u16]>, F: FnMut(usize, Point) -> T>(
payload: *mut c_void,
byte_offset: u32,
position: ffi::TSPoint,
bytes_read: *mut u32,
) -> *const c_char {
let input = (payload as *mut T).as_mut().unwrap();
let slice = input(
let (callback, text) = (payload as *mut (&mut F, Option<T>)).as_mut().unwrap();
*text = Some(callback(
(byte_offset / 2) as usize,
Point {
row: position.row as usize,
column: position.column as usize / 2,
},
);
));
let slice = text.as_ref().unwrap().as_ref();
*bytes_read = slice.len() as u32 * 2;
slice.as_ptr() as *const c_char
};
let c_input = ffi::TSInput {
payload: input as *mut T as *mut c_void,
read: Some(read::<T>),
payload: &mut payload as *mut (&mut F, Option<T>) as *mut c_void,
read: Some(read::<T, F>),
encoding: ffi::TSInputEncoding_TSInputEncodingUTF16,
};
@ -451,7 +514,8 @@ impl Tree {
pub fn changed_ranges(&self, other: &Tree) -> impl ExactSizeIterator<Item = Range> {
let mut count = 0;
unsafe {
let ptr = ffi::ts_tree_get_changed_ranges(self.0, other.0, &mut count as *mut _ as *mut u32);
let ptr =
ffi::ts_tree_get_changed_ranges(self.0, other.0, &mut count as *mut _ as *mut u32);
util::CBufferIter::new(ptr, count).map(|r| r.into())
}
}