binding_rust: Generalize the interface to callback-based parse methods

Fixes #386
This commit is contained in:
Max Brunsfeld 2019-08-29 10:30:57 -07:00
parent 09b46b87dd
commit b3ab2e07a2
2 changed files with 107 additions and 25 deletions

View file

@ -213,7 +213,9 @@ impl Parser {
{
Err(LanguageError { version })
} else {
unsafe { ffi::ts_parser_set_language(self.0, language.0); }
unsafe {
ffi::ts_parser_set_language(self.0, language.0);
}
Ok(())
}
}
@ -284,8 +286,21 @@ impl Parser {
unsafe { ffi::ts_parser_print_dot_graphs(self.0, -1) }
}
pub fn parse(&mut self, input: impl AsRef<[u8]>, old_tree: Option<&Tree>) -> Option<Tree> {
let bytes = input.as_ref();
/// Parse a slice of UTF8 text.
///
/// # Arguments:
/// * `text` The UTF8-encoded text to parse.
/// * `old_tree` A previous syntax tree parsed from the same document.
/// If the text of the document has changed since `old_tree` was
/// created, then you must edit `old_tree` to match the new text using
/// [Tree::edit].
///
/// Returns a [Tree] if parsing succeeded, or `None` if:
/// * The parser has not yet had a language assigned with [Parser::set_language]
/// * The timeout set with [Parser::set_timeout_micros] expired
/// * The cancellation flag set with [Parser::set_cancellation_flag] was flipped
pub fn parse(&mut self, text: impl AsRef<[u8]>, old_tree: Option<&Tree>) -> Option<Tree> {
let bytes = text.as_ref();
let len = bytes.len();
self.parse_with(
&mut |i, _| if i < len { &bytes[i..] } else { &[] },
@ -293,6 +308,14 @@ impl Parser {
)
}
/// Parse a slice UTF16 text.
///
/// # Arguments:
/// * `text` The UTF16-encoded text to parse.
/// * `old_tree` A previous syntax tree parsed from the same document.
/// If the text of the document has changed since `old_tree` was
/// created, then you must edit `old_tree` to match the new text using
/// [Tree::edit].
pub fn parse_utf16(
&mut self,
input: impl AsRef<[u16]>,
@ -306,26 +329,46 @@ impl Parser {
)
}
pub fn parse_with<'a, T: FnMut(usize, Point) -> &'a [u8]>(
/// Parse UTF8 text provided in chunks by a callback.
///
/// # Arguments:
/// * `callback` A function that takes a byte offset and position and
/// returns a slice of UTF8-encoded text starting at that byte offset
/// and position. The slices can be of any length. If the given position
/// is at the end of the text, the callback should return an empty slice.
/// * `old_tree` A previous syntax tree parsed from the same document.
/// If the text of the document has changed since `old_tree` was
/// created, then you must edit `old_tree` to match the new text using
/// [Tree::edit].
pub fn parse_with<'a, T: AsRef<[u8]>, F: FnMut(usize, Point) -> T>(
&mut self,
input: &mut T,
callback: &mut F,
old_tree: Option<&Tree>,
) -> Option<Tree> {
unsafe extern "C" fn read<'a, T: FnMut(usize, Point) -> &'a [u8]>(
// A pointer to this payload is passed on every call to the `read` C function.
// The payload contains two things:
// 1. A reference to the rust `callback`.
// 2. The text that was returned from the previous call to `callback`.
// This allows the callback to return owned values like vectors.
let mut payload: (&mut F, Option<T>) = (callback, None);
// This C function is passed to Tree-sitter as the input callback.
unsafe extern "C" fn read<'a, T: AsRef<[u8]>, F: FnMut(usize, Point) -> T>(
payload: *mut c_void,
byte_offset: u32,
position: ffi::TSPoint,
bytes_read: *mut u32,
) -> *const c_char {
let input = (payload as *mut T).as_mut().unwrap();
let slice = input(byte_offset as usize, position.into());
let (callback, text) = (payload as *mut (&mut F, Option<T>)).as_mut().unwrap();
*text = Some(callback(byte_offset as usize, position.into()));
let slice = text.as_ref().unwrap().as_ref();
*bytes_read = slice.len() as u32;
return slice.as_ptr() as *const c_char;
};
let c_input = ffi::TSInput {
payload: input as *mut T as *mut c_void,
read: Some(read::<T>),
payload: &mut payload as *mut (&mut F, Option<T>) as *mut c_void,
read: Some(read::<T, F>),
encoding: ffi::TSInputEncoding_TSInputEncodingUTF8,
};
@ -338,32 +381,52 @@ impl Parser {
}
}
pub fn parse_utf16_with<'a, T: 'a + FnMut(usize, Point) -> &'a [u16]>(
/// Parse UTF16 text provided in chunks by a callback.
///
/// # Arguments:
/// * `callback` A function that takes a code point offset and position and
/// returns a slice of UTF16-encoded text starting at that byte offset
/// and position. The slices can be of any length. If the given position
/// is at the end of the text, the callback should return an empty slice.
/// * `old_tree` A previous syntax tree parsed from the same document.
/// If the text of the document has changed since `old_tree` was
/// created, then you must edit `old_tree` to match the new text using
/// [Tree::edit].
pub fn parse_utf16_with<'a, T: AsRef<[u16]>, F: FnMut(usize, Point) -> T>(
&mut self,
input: &mut T,
callback: &mut F,
old_tree: Option<&Tree>,
) -> Option<Tree> {
unsafe extern "C" fn read<'a, T: FnMut(usize, Point) -> &'a [u16]>(
// A pointer to this payload is passed on every call to the `read` C function.
// The payload contains two things:
// 1. A reference to the rust `callback`.
// 2. The text that was returned from the previous call to `callback`.
// This allows the callback to return owned values like vectors.
let mut payload: (&mut F, Option<T>) = (callback, None);
// This C function is passed to Tree-sitter as the input callback.
unsafe extern "C" fn read<'a, T: AsRef<[u16]>, F: FnMut(usize, Point) -> T>(
payload: *mut c_void,
byte_offset: u32,
position: ffi::TSPoint,
bytes_read: *mut u32,
) -> *const c_char {
let input = (payload as *mut T).as_mut().unwrap();
let slice = input(
let (callback, text) = (payload as *mut (&mut F, Option<T>)).as_mut().unwrap();
*text = Some(callback(
(byte_offset / 2) as usize,
Point {
row: position.row as usize,
column: position.column as usize / 2,
},
);
));
let slice = text.as_ref().unwrap().as_ref();
*bytes_read = slice.len() as u32 * 2;
slice.as_ptr() as *const c_char
};
let c_input = ffi::TSInput {
payload: input as *mut T as *mut c_void,
read: Some(read::<T>),
payload: &mut payload as *mut (&mut F, Option<T>) as *mut c_void,
read: Some(read::<T, F>),
encoding: ffi::TSInputEncoding_TSInputEncodingUTF16,
};
@ -451,7 +514,8 @@ impl Tree {
pub fn changed_ranges(&self, other: &Tree) -> impl ExactSizeIterator<Item = Range> {
let mut count = 0;
unsafe {
let ptr = ffi::ts_tree_get_changed_ranges(self.0, other.0, &mut count as *mut _ as *mut u32);
let ptr =
ffi::ts_tree_get_changed_ranges(self.0, other.0, &mut count as *mut _ as *mut u32);
util::CBufferIter::new(ptr, count).map(|r| r.into())
}
}