binding: Make parse methods more convenient

* Rename parse_str to parse and make it polymorphic.
* Rename parse_utf8 to parse_with, since it is now the callback-based
  version of parse
* Add a parse_utf16 method analogous to parse
* Rename existing parse_utf16 method to parse_utf16_with

This brings in the changes from tree-sitter/rust-tree-sitter#5
This commit is contained in:
Max Brunsfeld 2019-02-05 10:59:31 -08:00
parent 49392c8502
commit 50281637d7
10 changed files with 89 additions and 169 deletions

View file

@ -155,7 +155,7 @@ fn parse(parser: &mut Parser, example_path: &Path, max_path_length: usize) -> us
let source_code = fs::read(example_path).unwrap();
let time = Instant::now();
let _tree = parser
.parse_utf8(&mut |byte, _| &source_code[byte..], None)
.parse(&source_code, None)
.expect("Incompatible language version");
let duration = time.elapsed();
let duration_ms =

View file

@ -33,7 +33,7 @@ pub fn parse_file_at_path(
let time = Instant::now();
let tree = parser
.parse_utf8(&mut |byte, _| &source_code[byte..], None)
.parse(&source_code, None)
.expect("Incompatible language version");
let duration = time.elapsed();
let duration_ms = duration.as_secs() * 1000 + duration.subsec_nanos() as u64 / 1000000;

View file

@ -141,9 +141,7 @@ fn run_tests(
return Ok(());
}
}
let tree = parser
.parse_utf8(&mut |byte_offset, _| &input[byte_offset..], None)
.unwrap();
let tree = parser.parse(&input, None).unwrap();
let actual = tree.root_node().to_sexp();
for _ in 0..indent_level {
print!(" ");

View file

@ -75,7 +75,7 @@ fn test_real_language_corpus_files() {
let mut log_session = None;
let mut parser = get_parser(&mut log_session, "log.html");
parser.set_language(language).unwrap();
let tree = parser.parse_utf8(&mut |i, _| &input[i..], None).unwrap();
let tree = parser.parse(&input, None).unwrap();
let actual_output = tree.root_node().to_sexp();
drop(tree);
drop(parser);
@ -91,9 +91,7 @@ fn test_real_language_corpus_files() {
let mut parser = Parser::new();
parser.set_language(language).unwrap();
let tree = parser
.parse_utf8(&mut |i, _| input.get(i..).unwrap_or(&[]), None)
.unwrap();
let tree = parser.parse(&input, None).unwrap();
drop(parser);
for trial in 1..=TRIAL_COUNT {
@ -122,9 +120,7 @@ fn test_real_language_corpus_files() {
eprintln!("{}\n", String::from_utf8_lossy(&input));
}
let mut tree2 = parser
.parse_utf8(&mut |i, _| input.get(i..).unwrap_or(&[]), Some(&tree))
.unwrap();
let mut tree2 = parser.parse(&input, Some(&tree)).unwrap();
// Check that the new tree is consistent.
check_consistent_sizes(&tree2, &input);
@ -145,9 +141,7 @@ fn test_real_language_corpus_files() {
eprintln!("{}\n", String::from_utf8_lossy(&input));
}
let tree3 = parser
.parse_utf8(&mut |i, _| input.get(i..).unwrap_or(&[]), Some(&tree2))
.unwrap();
let tree3 = parser.parse(&input, Some(&tree2)).unwrap();
// Verify that the final tree matches the expectation from the corpus.
let actual_output = tree3.root_node().to_sexp();
@ -254,7 +248,7 @@ fn test_feature_corpus_files() {
let mut log_session = None;
let mut parser = get_parser(&mut log_session, "log.html");
parser.set_language(language).unwrap();
let tree = parser.parse_utf8(&mut |i, _| &input[i..], None).unwrap();
let tree = parser.parse(&input, None).unwrap();
let actual_output = tree.root_node().to_sexp();
drop(tree);
drop(parser);

View file

@ -248,7 +248,7 @@ fn test_node_named_child_with_aliases_and_extras() {
.set_language(get_test_language(&parser_name, &parser_code, None))
.unwrap();
let tree = parser.parse_str("b ... b ... c", None).unwrap();
let tree = parser.parse("b ... b ... c", None).unwrap();
let root = tree.root_node();
assert_eq!(root.to_sexp(), "(a (b) (comment) (B) (comment) (C))");
assert_eq!(root.named_child_count(), 5);
@ -360,5 +360,5 @@ fn get_all_nodes(tree: &Tree) -> Vec<Node> {
fn parse_json_example() -> Tree {
let mut parser = Parser::new();
parser.set_language(get_language("json")).unwrap();
parser.parse_str(JSON_EXAMPLE, None).unwrap()
parser.parse(JSON_EXAMPLE, None).unwrap()
}

View file

@ -10,7 +10,7 @@ fn test_basic_parsing() {
parser.set_language(get_language("rust")).unwrap();
let tree = parser
.parse_str(
.parse(
"
struct Stuff {}
fn main() {}
@ -42,7 +42,7 @@ fn test_parsing_with_logging() {
})));
parser
.parse_str(
.parse(
"
struct Stuff {}
fn main() {}
@ -66,7 +66,7 @@ fn test_parsing_with_custom_utf8_input() {
let lines = &["pub fn foo() {", " 1", "}"];
let tree = parser
.parse_utf8(
.parse_with(
&mut |_, position| {
let row = position.row;
let column = position.column;
@ -102,7 +102,7 @@ fn test_parsing_with_custom_utf16_input() {
.collect();
let tree = parser
.parse_utf16(
.parse_utf16_with(
&mut |_, position| {
let row = position.row;
let column = position.column;
@ -135,7 +135,7 @@ fn test_parsing_after_editing_beginning_of_code() {
parser.set_language(get_language("javascript")).unwrap();
let mut code = b"123 + 456 * (10 + x);".to_vec();
let mut tree = parser.parse_utf8(&mut |i, _| &code[i..], None).unwrap();
let mut tree = parser.parse(&code, None).unwrap();
assert_eq!(
tree.root_node().to_sexp(),
concat!(
@ -157,7 +157,7 @@ fn test_parsing_after_editing_beginning_of_code() {
let mut recorder = ReadRecorder::new(&code);
let tree = parser
.parse_utf8(&mut |i, _| recorder.read(i), Some(&tree))
.parse_with(&mut |i, _| recorder.read(i), Some(&tree))
.unwrap();
assert_eq!(
tree.root_node().to_sexp(),
@ -179,13 +179,13 @@ fn test_parsing_after_editing_end_of_code() {
parser.set_language(get_language("javascript")).unwrap();
let mut code = b"x * (100 + abc);".to_vec();
let mut tree = parser.parse_utf8(&mut |i, _| &code[i..], None).unwrap();
let mut tree = parser.parse(&code, None).unwrap();
assert_eq!(
tree.root_node().to_sexp(),
concat!(
"(program (expression_statement (binary_expression ",
"(identifier) ",
"(parenthesized_expression (binary_expression (number) (identifier))))))",
"(identifier) ",
"(parenthesized_expression (binary_expression (number) (identifier))))))",
)
);
@ -202,7 +202,7 @@ fn test_parsing_after_editing_end_of_code() {
let mut recorder = ReadRecorder::new(&code);
let tree = parser
.parse_utf8(&mut |i, _| recorder.read(i), Some(&tree))
.parse_with(&mut |i, _| recorder.read(i), Some(&tree))
.unwrap();
assert_eq!(
tree.root_node().to_sexp(),
@ -226,7 +226,7 @@ fn test_parsing_on_multiple_threads() {
let mut parser = Parser::new();
parser.set_language(get_language("rust")).unwrap();
let tree = parser.parse_str(this_file_source, None).unwrap();
let tree = parser.parse(this_file_source, None).unwrap();
let mut parse_threads = Vec::new();
for thread_id in 1..5 {
@ -254,9 +254,7 @@ fn test_parsing_on_multiple_threads() {
// Reparse using the old tree as a starting point.
let mut parser = Parser::new();
parser.set_language(get_language("rust")).unwrap();
parser
.parse_str(&prepended_source, Some(&tree_clone))
.unwrap()
parser.parse(&prepended_source, Some(&tree_clone)).unwrap()
}));
}
@ -281,7 +279,7 @@ fn test_parsing_with_an_operation_limit() {
// Start parsing from an infinite input. Parsing should abort after 5 "operations".
parser.set_operation_limit(5);
let mut call_count = 0;
let tree = parser.parse_utf8(
let tree = parser.parse_with(
&mut |_, _| {
if call_count == 0 {
call_count += 1;
@ -301,7 +299,7 @@ fn test_parsing_with_an_operation_limit() {
call_count = 0;
parser.set_operation_limit(20);
let tree = parser
.parse_utf8(
.parse_with(
&mut |_, _| {
if call_count == 0 {
call_count += 1;
@ -325,27 +323,27 @@ fn test_parsing_with_a_reset_after_reaching_an_operation_limit() {
parser.set_language(get_language("json")).unwrap();
parser.set_operation_limit(3);
let tree = parser.parse_str("[1234, 5, 6, 7, 8]", None);
let tree = parser.parse("[1234, 5, 6, 7, 8]", None);
assert!(tree.is_none());
// Without calling reset, the parser continues from where it left off, so
// it does not see the changes to the beginning of the source code.
parser.set_operation_limit(usize::MAX);
let tree = parser.parse_str("[null, 5, 6, 4, 5]", None).unwrap();
let tree = parser.parse("[null, 5, 6, 4, 5]", None).unwrap();
assert_eq!(
tree.root_node().to_sexp(),
"(value (array (number) (number) (number) (number) (number)))"
);
parser.set_operation_limit(3);
let tree = parser.parse_str("[1234, 5, 6, 7, 8]", None);
let tree = parser.parse("[1234, 5, 6, 7, 8]", None);
assert!(tree.is_none());
// By calling reset, we force the parser to start over from scratch so
// that it sees the changes to the beginning of the source code.
parser.set_operation_limit(usize::MAX);
parser.reset();
let tree = parser.parse_str("[null, 5, 6, 4, 5]", None).unwrap();
let tree = parser.parse("[null, 5, 6, 4, 5]", None).unwrap();
assert_eq!(
tree.root_node().to_sexp(),
"(value (array (null) (number) (number) (number) (number)))"
@ -360,13 +358,13 @@ fn test_parsing_with_one_included_range() {
let mut parser = Parser::new();
parser.set_language(get_language("html")).unwrap();
let html_tree = parser.parse_str(source_code, None).unwrap();
let html_tree = parser.parse(source_code, None).unwrap();
let script_content_node = html_tree.root_node().child(1).unwrap().child(1).unwrap();
assert_eq!(script_content_node.kind(), "raw_text");
parser.set_included_ranges(&[script_content_node.range()]);
parser.set_language(get_language("javascript")).unwrap();
let js_tree = parser.parse_str(source_code, None).unwrap();
let js_tree = parser.parse(source_code, None).unwrap();
assert_eq!(
js_tree.root_node().to_sexp(),
@ -388,7 +386,7 @@ fn test_parsing_with_multiple_included_ranges() {
let mut parser = Parser::new();
parser.set_language(get_language("javascript")).unwrap();
let js_tree = parser.parse_str(source_code, None).unwrap();
let js_tree = parser.parse(source_code, None).unwrap();
let template_string_node = js_tree
.root_node()
.descendant_for_byte_range(
@ -424,7 +422,7 @@ fn test_parsing_with_multiple_included_ranges() {
end_point: close_quote_node.start_position(),
},
]);
let html_tree = parser.parse_str(source_code, None).unwrap();
let html_tree = parser.parse(source_code, None).unwrap();
assert_eq!(
html_tree.root_node().to_sexp(),
@ -488,9 +486,7 @@ fn test_parsing_utf16_code_with_errors_at_the_end_of_an_included_range() {
start_point: Point::new(0, start_byte),
end_point: Point::new(0, end_byte),
}]);
let tree = parser
.parse_utf16(&mut |i, _| &utf16_source_code[i..], None)
.unwrap();
let tree = parser.parse_utf16(&utf16_source_code, None).unwrap();
assert_eq!(tree.root_node().to_sexp(), "(program (ERROR (identifier)))");
}
@ -519,7 +515,7 @@ fn test_parsing_with_external_scanner_that_uses_included_range_boundaries() {
},
]);
let tree = parser.parse_str(source_code, None).unwrap();
let tree = parser.parse(source_code, None).unwrap();
let root = tree.root_node();
let statement1 = root.child(0).unwrap();
let statement2 = root.child(1).unwrap();
@ -546,7 +542,7 @@ fn test_parsing_with_a_newly_excluded_range() {
// Parse HTML including the template directive, which will cause an error
let mut parser = Parser::new();
parser.set_language(get_language("html")).unwrap();
let mut first_tree = parser.parse_str(&source_code, None).unwrap();
let mut first_tree = parser.parse(&source_code, None).unwrap();
// Insert code at the beginning of the document.
let prefix = "a very very long line of plain text. ";
@ -579,7 +575,7 @@ fn test_parsing_with_a_newly_excluded_range() {
end_point: Point::new(0, source_code_end),
},
]);
let tree = parser.parse_str(&source_code, Some(&first_tree)).unwrap();
let tree = parser.parse(&source_code, Some(&first_tree)).unwrap();
assert_eq!(
tree.root_node().to_sexp(),
@ -640,7 +636,7 @@ fn test_parsing_with_a_newly_included_range() {
let mut parser = Parser::new();
parser.set_language(get_language("javascript")).unwrap();
parser.set_included_ranges(&ranges[0..1]);
let first_tree = parser.parse_str(source_code, None).unwrap();
let first_tree = parser.parse(source_code, None).unwrap();
assert_eq!(
first_tree.root_node().to_sexp(),
concat!(
@ -651,7 +647,7 @@ fn test_parsing_with_a_newly_included_range() {
// Parse both the code directives as JavaScript, using the old tree as a reference.
parser.set_included_ranges(&ranges);
let tree = parser.parse_str(&source_code, Some(&first_tree)).unwrap();
let tree = parser.parse(&source_code, Some(&first_tree)).unwrap();
assert_eq!(
tree.root_node().to_sexp(),
concat!(
@ -721,7 +717,7 @@ fn test_parsing_with_included_ranges_and_missing_tokens() {
},
]);
let tree = parser.parse_str(source_code, None).unwrap();
let tree = parser.parse(source_code, None).unwrap();
let root = tree.root_node();
assert_eq!(
root.to_sexp(),

View file

@ -38,7 +38,7 @@ fn test_walk_with_properties_with_nth_child() {
let mut parser = Parser::new();
parser.set_language(language).unwrap();
let tree = parser.parse_str(source_code, None).unwrap();
let tree = parser.parse(source_code, None).unwrap();
let mut cursor = tree.walk_with_properties(&property_sheet, source_code);
assert_eq!(cursor.node().kind(), "program");
@ -96,7 +96,7 @@ fn test_walk_with_properties_with_regexes() {
let mut parser = Parser::new();
parser.set_language(language).unwrap();
let tree = parser.parse_str(source_code, None).unwrap();
let tree = parser.parse(source_code, None).unwrap();
let mut cursor = tree.walk_with_properties(&property_sheet, source_code);
assert_eq!(cursor.node().kind(), "program");

View file

@ -7,7 +7,7 @@ use tree_sitter::{InputEdit, Parser, Point, Range, Tree};
fn test_tree_edit() {
let mut parser = Parser::new();
parser.set_language(get_language("javascript")).unwrap();
let tree = parser.parse_str(" abc !== def", None).unwrap();
let tree = parser.parse(" abc !== def", None).unwrap();
assert_eq!(
tree.root_node().to_sexp(),
@ -194,7 +194,7 @@ fn test_tree_walk() {
parser.set_language(get_language("rust")).unwrap();
let tree = parser
.parse_str(
.parse(
"
struct Stuff {
a: A;
@ -228,7 +228,7 @@ fn test_tree_walk() {
fn test_tree_node_equality() {
let mut parser = Parser::new();
parser.set_language(get_language("rust")).unwrap();
let tree = parser.parse_str("struct A {}", None).unwrap();
let tree = parser.parse("struct A {}", None).unwrap();
let node1 = tree.root_node();
let node2 = tree.root_node();
assert_eq!(node1, node2);
@ -242,9 +242,7 @@ fn test_get_changed_ranges() {
let mut parser = Parser::new();
parser.set_language(get_language("javascript")).unwrap();
let tree = parser
.parse_utf8(&mut |i, _| &source_code[i..], None)
.unwrap();
let tree = parser.parse(&source_code, None).unwrap();
assert_eq!(
tree.root_node().to_sexp(),
@ -370,9 +368,7 @@ fn get_changed_ranges(
edit: Edit,
) -> Vec<Range> {
perform_edit(tree, source_code, &edit);
let new_tree = parser
.parse_utf8(&mut |i, _| &source_code[i..], Some(tree))
.unwrap();
let new_tree = parser.parse(&source_code, Some(tree)).unwrap();
let result = tree.changed_ranges(&new_tree);
*tree = new_tree;
result

View file

@ -34,7 +34,7 @@ Now you can parse source code:
```rust
let source_code = "fn test() {}";
let tree = parser.parse_str(source_code, None);
let tree = parser.parse(source_code, None);
let root_node = tree.root_node();
assert_eq!(root_node.kind(), "source_file");
@ -58,12 +58,12 @@ tree.edit(InputEdit {
new_end_position: Point::new(0, 14),
});
let new_tree = parser.parse_str(new_source_code, Some(&tree));
let new_tree = parser.parse(new_source_code, Some(&tree));
```
### Text Input
The source code to parse can be provided either as a string or as a function that returns text encoded as either UTF8 or UTF16:
The source code to parse can be provided either either as a string, a slice, a vector, or as a function that returns a slice. The text can be encoded as either UTF8 or UTF16:
```rust
// Store some source code in an array of lines.
@ -75,7 +75,7 @@ let lines = &[
// Parse the source code using a custom callback. The callback is called
// with both a byte offset and a row/column offset.
let tree = parser.parse_utf8(&mut |_byte: u32, position: Point| -> &[u8] {
let tree = parser.parse_with(&mut |_byte: u32, position: Point| -> &[u8] {
let row = position.row as usize;
let column = position.column as usize;
if row < lines.len() {

View file

@ -14,7 +14,6 @@ use serde::de::DeserializeOwned;
use std::collections::HashMap;
use std::ffi::CStr;
use std::fmt;
use std::io::{self, Read, Seek};
use std::marker::PhantomData;
use std::os::raw::{c_char, c_void};
use std::ptr;
@ -226,114 +225,35 @@ impl Parser {
unsafe { ffi::ts_parser_print_dot_graphs(self.0, -1) }
}
pub fn parse_str(&mut self, input: &str, old_tree: Option<&Tree>) -> Option<Tree> {
let bytes = input.as_bytes();
self.parse_utf8(
&mut |offset, _| {
if offset < bytes.len() {
&bytes[offset..]
} else {
&[]
}
},
old_tree,
)
pub fn parse(&mut self, input: impl AsRef<[u8]>, old_tree: Option<&Tree>) -> Option<Tree> {
let bytes = input.as_ref();
self.parse_with(&mut |i, _| &bytes[i..], old_tree)
}
pub fn parse_utf8<'a, T: FnMut(usize, Point) -> &'a [u8]>(
pub fn parse_utf16(
&mut self,
input: impl AsRef<[u16]>,
old_tree: Option<&Tree>,
) -> Option<Tree> {
let code_points = input.as_ref();
self.parse_utf16_with(&mut |i, _| &code_points[i..], old_tree)
}
pub fn parse_with<'a, T: FnMut(usize, Point) -> &'a [u8]>(
&mut self,
input: &mut T,
old_tree: Option<&Tree>,
) -> Option<Tree> {
self.parse_utf8_ptr(
&mut |byte, position| {
let slice = input(byte, position);
(slice.as_ptr(), slice.len())
},
old_tree,
)
}
pub fn parse_utf16<'a, T: 'a + FnMut(usize, Point) -> &'a [u16]>(
&mut self,
input: &mut T,
old_tree: Option<&Tree>,
) -> Option<Tree> {
self.parse_utf16_ptr(
&mut |byte, position| {
let slice = input(byte / 2, position);
(slice.as_ptr(), slice.len())
},
old_tree,
)
}
pub fn parse_utf8_io(
&mut self,
mut input: impl Read + Seek,
old_tree: Option<&Tree>,
) -> io::Result<Option<Tree>> {
let mut error = None;
let mut current_offset = 0;
let mut buffer = [0; 10 * 1024];
let result = self.parse_utf8_ptr(
&mut |byte, _| {
if byte as u64 != current_offset {
current_offset = byte as u64;
if let Err(e) = input.seek(io::SeekFrom::Start(current_offset)) {
error = Some(e);
return (ptr::null(), 0);
}
}
match input.read(&mut buffer) {
Err(e) => {
error = Some(e);
(ptr::null(), 0)
}
Ok(length) => (buffer.as_ptr(), length),
}
},
old_tree,
);
match error {
Some(e) => Err(e),
None => Ok(result),
}
}
pub fn reset(&mut self) {
unsafe { ffi::ts_parser_reset(self.0) }
}
pub fn set_operation_limit(&mut self, limit: usize) {
unsafe { ffi::ts_parser_set_operation_limit(self.0, limit) }
}
pub fn set_included_ranges(&mut self, ranges: &[Range]) {
let ts_ranges: Vec<ffi::TSRange> =
ranges.iter().cloned().map(|range| range.into()).collect();
unsafe {
ffi::ts_parser_set_included_ranges(self.0, ts_ranges.as_ptr(), ts_ranges.len() as u32)
};
}
fn parse_utf8_ptr<T: FnMut(usize, Point) -> (*const u8, usize)>(
&mut self,
input: &mut T,
old_tree: Option<&Tree>,
) -> Option<Tree> {
unsafe extern "C" fn read<T: FnMut(usize, Point) -> (*const u8, usize)>(
unsafe extern "C" fn read<'a, T: FnMut(usize, Point) -> &'a [u8]>(
payload: *mut c_void,
byte_offset: u32,
position: ffi::TSPoint,
bytes_read: *mut u32,
) -> *const c_char {
let input = (payload as *mut T).as_mut().unwrap();
let (ptr, length) = (*input)(byte_offset as usize, position.into());
*bytes_read = length as u32;
return ptr as *const c_char;
let slice = input(byte_offset as usize, position.into());
*bytes_read = slice.len() as u32;
return slice.as_ptr() as *const c_char;
};
let c_input = ffi::TSInput {
@ -351,27 +271,27 @@ impl Parser {
}
}
fn parse_utf16_ptr<T: FnMut(usize, Point) -> (*const u16, usize)>(
pub fn parse_utf16_with<'a, T: 'a + FnMut(usize, Point) -> &'a [u16]>(
&mut self,
input: &mut T,
old_tree: Option<&Tree>,
) -> Option<Tree> {
unsafe extern "C" fn read<T: FnMut(usize, Point) -> (*const u16, usize)>(
unsafe extern "C" fn read<'a, T: FnMut(usize, Point) -> &'a [u16]>(
payload: *mut c_void,
byte_offset: u32,
position: ffi::TSPoint,
bytes_read: *mut u32,
) -> *const c_char {
let input = (payload as *mut T).as_mut().unwrap();
let (ptr, length) = (*input)(
byte_offset as usize,
let slice = input(
(byte_offset / 2) as usize,
Point {
row: position.row as usize,
column: position.column as usize / 2,
},
);
*bytes_read = length as u32 * 2;
ptr as *const c_char
*bytes_read = slice.len() as u32 * 2;
slice.as_ptr() as *const c_char
};
let c_input = ffi::TSInput {
@ -388,6 +308,22 @@ impl Parser {
Some(Tree(c_new_tree))
}
}
pub fn reset(&mut self) {
unsafe { ffi::ts_parser_reset(self.0) }
}
pub fn set_operation_limit(&mut self, limit: usize) {
unsafe { ffi::ts_parser_set_operation_limit(self.0, limit) }
}
pub fn set_included_ranges(&mut self, ranges: &[Range]) {
let ts_ranges: Vec<ffi::TSRange> =
ranges.iter().cloned().map(|range| range.into()).collect();
unsafe {
ffi::ts_parser_set_included_ranges(self.0, ts_ranges.as_ptr(), ts_ranges.len() as u32)
};
}
}
impl Drop for Parser {