From b3ab2e07a2bee917559fa7f113dc4d031b27fea4 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 29 Aug 2019 10:30:57 -0700 Subject: [PATCH] binding_rust: Generalize the interface to callback-based parse methods Fixes #386 --- cli/src/tests/parser_test.rs | 30 ++++++++--- lib/binding_rust/lib.rs | 102 ++++++++++++++++++++++++++++------- 2 files changed, 107 insertions(+), 25 deletions(-) diff --git a/cli/src/tests/parser_test.rs b/cli/src/tests/parser_test.rs index 96b08af0..882f5963 100644 --- a/cli/src/tests/parser_test.rs +++ b/cli/src/tests/parser_test.rs @@ -160,6 +160,24 @@ fn test_parsing_with_custom_utf16_input() { assert_eq!(root.child(0).unwrap().kind(), "function_item"); } +#[test] +fn test_parsing_with_callback_returning_owned_strings() { + let mut parser = Parser::new(); + parser.set_language(get_language("rust")).unwrap(); + + let text = b"pub fn foo() { 1 }"; + + let tree = parser + .parse_with( + &mut |i, _| String::from_utf8(text[i..].to_vec()).unwrap(), + None, + ) + .unwrap(); + + let root = tree.root_node(); + assert_eq!(root.to_sexp(), "(source_file (function_item (visibility_modifier) (identifier) (parameters) (block (integer_literal))))"); +} + #[test] fn test_parsing_text_with_byte_order_mark() { let mut parser = Parser::new(); @@ -380,11 +398,11 @@ fn test_parsing_cancelled_by_another_thread() { let tree = parser.parse_with( &mut |offset, _| { if offset == 0 { - b" [" + " [".as_bytes() } else if offset >= 20000 { - b"" + "".as_bytes() } else { - b"0," + "0,".as_bytes() } }, None, @@ -461,11 +479,11 @@ fn test_parsing_with_a_timeout() { .parse_with( &mut |offset, _| { if offset > 5000 { - b"" + "".as_bytes() } else if offset == 5000 { - b"]" + "]".as_bytes() } else { - b",0" + ",0".as_bytes() } }, None, diff --git a/lib/binding_rust/lib.rs b/lib/binding_rust/lib.rs index 0ee168da..4c34d202 100644 --- a/lib/binding_rust/lib.rs +++ b/lib/binding_rust/lib.rs @@ -213,7 +213,9 @@ impl Parser { { Err(LanguageError { version }) } else { - unsafe { ffi::ts_parser_set_language(self.0, language.0); } + unsafe { + ffi::ts_parser_set_language(self.0, language.0); + } Ok(()) } } @@ -284,8 +286,21 @@ impl Parser { unsafe { ffi::ts_parser_print_dot_graphs(self.0, -1) } } - pub fn parse(&mut self, input: impl AsRef<[u8]>, old_tree: Option<&Tree>) -> Option { - let bytes = input.as_ref(); + /// Parse a slice of UTF8 text. + /// + /// # Arguments: + /// * `text` The UTF8-encoded text to parse. + /// * `old_tree` A previous syntax tree parsed from the same document. + /// If the text of the document has changed since `old_tree` was + /// created, then you must edit `old_tree` to match the new text using + /// [Tree::edit]. + /// + /// Returns a [Tree] if parsing succeeded, or `None` if: + /// * The parser has not yet had a language assigned with [Parser::set_language] + /// * The timeout set with [Parser::set_timeout_micros] expired + /// * The cancellation flag set with [Parser::set_cancellation_flag] was flipped + pub fn parse(&mut self, text: impl AsRef<[u8]>, old_tree: Option<&Tree>) -> Option { + let bytes = text.as_ref(); let len = bytes.len(); self.parse_with( &mut |i, _| if i < len { &bytes[i..] } else { &[] }, @@ -293,6 +308,14 @@ impl Parser { ) } + /// Parse a slice UTF16 text. + /// + /// # Arguments: + /// * `text` The UTF16-encoded text to parse. + /// * `old_tree` A previous syntax tree parsed from the same document. + /// If the text of the document has changed since `old_tree` was + /// created, then you must edit `old_tree` to match the new text using + /// [Tree::edit]. pub fn parse_utf16( &mut self, input: impl AsRef<[u16]>, @@ -306,26 +329,46 @@ impl Parser { ) } - pub fn parse_with<'a, T: FnMut(usize, Point) -> &'a [u8]>( + /// Parse UTF8 text provided in chunks by a callback. + /// + /// # Arguments: + /// * `callback` A function that takes a byte offset and position and + /// returns a slice of UTF8-encoded text starting at that byte offset + /// and position. The slices can be of any length. If the given position + /// is at the end of the text, the callback should return an empty slice. + /// * `old_tree` A previous syntax tree parsed from the same document. + /// If the text of the document has changed since `old_tree` was + /// created, then you must edit `old_tree` to match the new text using + /// [Tree::edit]. + pub fn parse_with<'a, T: AsRef<[u8]>, F: FnMut(usize, Point) -> T>( &mut self, - input: &mut T, + callback: &mut F, old_tree: Option<&Tree>, ) -> Option { - unsafe extern "C" fn read<'a, T: FnMut(usize, Point) -> &'a [u8]>( + // A pointer to this payload is passed on every call to the `read` C function. + // The payload contains two things: + // 1. A reference to the rust `callback`. + // 2. The text that was returned from the previous call to `callback`. + // This allows the callback to return owned values like vectors. + let mut payload: (&mut F, Option) = (callback, None); + + // This C function is passed to Tree-sitter as the input callback. + unsafe extern "C" fn read<'a, T: AsRef<[u8]>, F: FnMut(usize, Point) -> T>( payload: *mut c_void, byte_offset: u32, position: ffi::TSPoint, bytes_read: *mut u32, ) -> *const c_char { - let input = (payload as *mut T).as_mut().unwrap(); - let slice = input(byte_offset as usize, position.into()); + let (callback, text) = (payload as *mut (&mut F, Option)).as_mut().unwrap(); + *text = Some(callback(byte_offset as usize, position.into())); + let slice = text.as_ref().unwrap().as_ref(); *bytes_read = slice.len() as u32; return slice.as_ptr() as *const c_char; }; let c_input = ffi::TSInput { - payload: input as *mut T as *mut c_void, - read: Some(read::), + payload: &mut payload as *mut (&mut F, Option) as *mut c_void, + read: Some(read::), encoding: ffi::TSInputEncoding_TSInputEncodingUTF8, }; @@ -338,32 +381,52 @@ impl Parser { } } - pub fn parse_utf16_with<'a, T: 'a + FnMut(usize, Point) -> &'a [u16]>( + /// Parse UTF16 text provided in chunks by a callback. + /// + /// # Arguments: + /// * `callback` A function that takes a code point offset and position and + /// returns a slice of UTF16-encoded text starting at that byte offset + /// and position. The slices can be of any length. If the given position + /// is at the end of the text, the callback should return an empty slice. + /// * `old_tree` A previous syntax tree parsed from the same document. + /// If the text of the document has changed since `old_tree` was + /// created, then you must edit `old_tree` to match the new text using + /// [Tree::edit]. + pub fn parse_utf16_with<'a, T: AsRef<[u16]>, F: FnMut(usize, Point) -> T>( &mut self, - input: &mut T, + callback: &mut F, old_tree: Option<&Tree>, ) -> Option { - unsafe extern "C" fn read<'a, T: FnMut(usize, Point) -> &'a [u16]>( + // A pointer to this payload is passed on every call to the `read` C function. + // The payload contains two things: + // 1. A reference to the rust `callback`. + // 2. The text that was returned from the previous call to `callback`. + // This allows the callback to return owned values like vectors. + let mut payload: (&mut F, Option) = (callback, None); + + // This C function is passed to Tree-sitter as the input callback. + unsafe extern "C" fn read<'a, T: AsRef<[u16]>, F: FnMut(usize, Point) -> T>( payload: *mut c_void, byte_offset: u32, position: ffi::TSPoint, bytes_read: *mut u32, ) -> *const c_char { - let input = (payload as *mut T).as_mut().unwrap(); - let slice = input( + let (callback, text) = (payload as *mut (&mut F, Option)).as_mut().unwrap(); + *text = Some(callback( (byte_offset / 2) as usize, Point { row: position.row as usize, column: position.column as usize / 2, }, - ); + )); + let slice = text.as_ref().unwrap().as_ref(); *bytes_read = slice.len() as u32 * 2; slice.as_ptr() as *const c_char }; let c_input = ffi::TSInput { - payload: input as *mut T as *mut c_void, - read: Some(read::), + payload: &mut payload as *mut (&mut F, Option) as *mut c_void, + read: Some(read::), encoding: ffi::TSInputEncoding_TSInputEncodingUTF16, }; @@ -451,7 +514,8 @@ impl Tree { pub fn changed_ranges(&self, other: &Tree) -> impl ExactSizeIterator { let mut count = 0; unsafe { - let ptr = ffi::ts_tree_get_changed_ranges(self.0, other.0, &mut count as *mut _ as *mut u32); + let ptr = + ffi::ts_tree_get_changed_ranges(self.0, other.0, &mut count as *mut _ as *mut u32); util::CBufferIter::new(ptr, count).map(|r| r.into()) } }