Update to latest tree-sitter API

This commit is contained in:
Max Brunsfeld 2018-06-19 16:19:37 -07:00
parent 16a7366ec7
commit 5efc28f2f3
5 changed files with 175 additions and 189 deletions

View file

@ -12,6 +12,10 @@ Rust bindings to the [Tree-sitter][] parsing library.
First, create a parser:
```rust
use tree_sitter::{Parser, Language};
// ...
let parser = Parser::new();
```
@ -22,16 +26,17 @@ extern "C" fn tree_sitter_c() -> Language;
extern "C" fn tree_sitter_rust() -> Language;
extern "C" fn tree_sitter_javascript() -> Language;
parser.set_language(unsafe { tree_sitter_rust() }).unwrap();
let language = unsafe { tree_sitter_rust() };
parser.set_language(language).unwrap();
```
Now you can parse source code:
```rust
let source_code = "fn test() {}";
let tree = parser.parse_str(source_code, None);
let root_node = tree.root_node();
assert_eq!(root_node.kind(), "source_file");
assert_eq!(root_node.start_position().column, 0);
assert_eq!(root_node.end_position().column, 12);
@ -39,7 +44,7 @@ assert_eq!(root_node.end_position().column, 12);
### Editing
Once you have a syntax tree, you can update it when your source code changes:
Once you have a syntax tree, you can update it when your source code changes. Passing in the previous edited tree makes `parse` run much more quickly:
```rust
let new_source_code = "fn test(a: u32) {}"
@ -52,49 +57,42 @@ tree.edit(InputEdit {
old_end_position: Point::new(0, 8),
new_end_position: Point::new(0, 14),
});
let new_tree = parser.parse_str(new_source_code, Some(&tree));
```
### Text Input
The code can be provided either as a simple string or by any type that implements Tree-sitter's `Utf8Input` or `Utf16Input` traits:
The source code to parse can be provided either as a string or as a function that returns text encoded as either UTF8 or UTF16:
```rust
struct LineWiseInput {
lines: &'static [&'static str],
row: usize,
column: usize,
}
// Store some source code in an array of lines.
let lines = &[
"pub fn foo() {",
" 1",
"}",
];
impl tree_sitter::Utf8Input for LineWiseInput {
fn read(&mut self) -> &[u8] {
if self.row < self.lines.len() {
let result = &self.lines[self.row].as_bytes()[self.column..];
self.row += 1;
self.column = 0;
result
// Parse the source code using a custom callback. The callback is called
// with both a byte offset and a row/column offset.
let tree = parser.parse_utf8(&mut |_byte: u32, position: Point| -> &[u8] {
let row = position.row as usize;
let column = position.column as usize;
if row < lines.len() {
if column < lines[row].as_bytes().len() {
&lines[row].as_bytes()[column..]
} else {
&[]
"\n".as_bytes()
}
} else {
&[]
}
}, None).unwrap();
fn seek(&mut self, _byte: u32, position: Point) {
self.row = position.row as usize;
self.column = position.column as usize;
}
}
let mut input = LineBasedInput {
lines: &[
"pub fn main() {",
"}",
],
row: 0,
column: 0
};
let tree = parser.parse_utf8(&mut input, None).unwrap();
assert_eq!(
tree.root_node().to_sexp(),
"(source_file (function_item (visibility_modifier) (identifier) (parameters) (block (number_literal))))"
);
```
[tree-sitter]: https://github.com/tree-sitter/tree-sitter

View file

@ -22,7 +22,6 @@ fn main() {
"node.c",
"parser.c",
"stack.c",
"string_input.c",
"subtree.c",
"tree_cursor.c",
"tree.c",

View file

@ -41,15 +41,12 @@ pub struct TSRange {
pub struct TSInput {
pub payload: *mut ::std::os::raw::c_void,
pub read: ::std::option::Option<
unsafe extern "C" fn(payload: *mut ::std::os::raw::c_void, bytes_read: *mut u32)
-> *const ::std::os::raw::c_char,
>,
pub seek: ::std::option::Option<
unsafe extern "C" fn(
payload: *mut ::std::os::raw::c_void,
byte_index: u32,
position: TSPoint,
) -> ::std::os::raw::c_int,
bytes_read: *mut u32,
) -> *const ::std::os::raw::c_char,
>,
pub encoding: TSInputEncoding,
}
@ -127,6 +124,21 @@ extern "C" {
arg4: u32,
) -> *mut TSTree;
}
extern "C" {
pub fn ts_parser_enabled(arg1: *const TSParser) -> bool;
}
extern "C" {
pub fn ts_parser_set_enabled(arg1: *mut TSParser, arg2: bool);
}
extern "C" {
pub fn ts_parser_operation_limit(arg1: *const TSParser) -> usize;
}
extern "C" {
pub fn ts_parser_set_operation_limit(arg1: *mut TSParser, arg2: usize);
}
extern "C" {
pub fn ts_parser_reset(arg1: *mut TSParser);
}
extern "C" {
pub fn ts_tree_copy(arg1: *const TSTree) -> *mut TSTree;
}

View file

@ -3,21 +3,11 @@ mod ffi;
use std::fmt;
use std::ffi::CStr;
use std::marker::PhantomData;
use std::os::raw::{c_char, c_int, c_void};
use std::os::raw::{c_char, c_void};
use std::ptr;
pub type Language = *const ffi::TSLanguage;
pub trait Utf16Input {
fn read(&mut self) -> &[u16];
fn seek(&mut self, u32, Point);
}
pub trait Utf8Input {
fn read(&mut self) -> &[u8];
fn seek(&mut self, u32, Point);
}
#[derive(Debug, PartialEq, Eq)]
pub enum LogType {
Parse,
@ -50,11 +40,6 @@ pub struct Tree(*mut ffi::TSTree);
pub struct TreeCursor<'a>(ffi::TSTreeCursor, PhantomData<&'a ()>);
struct FlatInput<'a> {
bytes: &'a [u8],
offset: usize,
}
impl Parser {
pub fn new() -> Parser {
unsafe {
@ -124,105 +109,86 @@ impl Parser {
}
pub fn parse_str(&mut self, input: &str, old_tree: Option<&Tree>) -> Option<Tree> {
let mut input = FlatInput { bytes: input.as_bytes(), offset: 0};
self.parse_utf8(&mut input, old_tree)
let bytes = input.as_bytes();
self.parse_utf8(&mut |offset, _| &bytes[(offset as usize)..], old_tree)
}
pub fn parse_utf8<T: Utf8Input>(
pub fn parse_utf8<'a, T: 'a + FnMut(u32, Point) -> &'a [u8]>(
&mut self,
input: &mut T,
old_tree: Option<&Tree>,
) -> Option<Tree> {
unsafe extern "C" fn read<T: Utf8Input>(
unsafe extern "C" fn read<'a, T: 'a + FnMut(u32, Point) -> &'a [u8]>(
payload: *mut c_void,
byte_offset: u32,
position: ffi::TSPoint,
bytes_read: *mut u32,
) -> *const c_char {
let input = (payload as *mut T).as_mut().unwrap();
let result = input.read();
let result = (*input)(byte_offset, position.into());
*bytes_read = result.len() as u32;
return result.as_ptr() as *const c_char;
};
unsafe extern "C" fn seek<T: Utf8Input>(
payload: *mut c_void,
byte: u32,
position: ffi::TSPoint,
) -> c_int {
let input = (payload as *mut T).as_mut().unwrap();
input.seek(
byte,
Point {
row: position.row,
column: position.column,
},
);
return 1;
};
let c_input = ffi::TSInput {
payload: input as *mut T as *mut c_void,
read: Some(read::<T>),
seek: Some(seek::<T>),
read: Some(read::<'a, T>),
encoding: ffi::TSInputEncoding_TSInputEncodingUTF8,
};
let old_tree_ptr = old_tree.map_or(ptr::null_mut(), |t| t.0);
let c_old_tree = old_tree.map_or(ptr::null_mut(), |t| t.0);
let new_tree_ptr = unsafe { ffi::ts_parser_parse(self.0, old_tree_ptr, c_input) };
if new_tree_ptr.is_null() {
let c_new_tree = unsafe { ffi::ts_parser_parse(self.0, c_old_tree, c_input) };
if c_new_tree.is_null() {
None
} else {
Some(Tree(new_tree_ptr))
Some(Tree(c_new_tree))
}
}
pub fn parse_utf16<T: Utf16Input>(
pub fn parse_utf16<'a, T: 'a + FnMut(u32, Point) -> &'a [u16]>(
&mut self,
input: &mut T,
old_tree: Option<&Tree>,
) -> Option<Tree> {
unsafe extern "C" fn read<T: Utf16Input>(
unsafe extern "C" fn read<'a, T: 'a + FnMut(u32, Point) -> &'a [u16]>(
payload: *mut c_void,
byte_offset: u32,
position: ffi::TSPoint,
bytes_read: *mut u32,
) -> *const c_char {
let input = (payload as *mut T).as_mut().unwrap();
let result = input.read();
let result = (*input)(byte_offset, Point {
row: position.row,
column: position.column / 2,
});
*bytes_read = result.len() as u32 * 2;
return result.as_ptr() as *const c_char;
};
unsafe extern "C" fn seek<T: Utf16Input>(
payload: *mut c_void,
byte: u32,
position: ffi::TSPoint,
) -> c_int {
let input = (payload as *mut T).as_mut().unwrap();
input.seek(
byte / 2,
Point {
row: position.row,
column: position.column / 2,
},
);
return 1;
};
let c_input = ffi::TSInput {
payload: input as *mut T as *mut c_void,
read: Some(read::<T>),
seek: Some(seek::<T>),
encoding: ffi::TSInputEncoding_TSInputEncodingUTF8,
read: Some(read::<'a, T>),
encoding: ffi::TSInputEncoding_TSInputEncodingUTF16,
};
let old_tree_ptr = old_tree.map_or(ptr::null_mut(), |t| t.0);
let c_old_tree = old_tree.map_or(ptr::null_mut(), |t| t.0);
let new_tree_ptr = unsafe { ffi::ts_parser_parse(self.0, old_tree_ptr, c_input) };
if new_tree_ptr.is_null() {
let c_new_tree = unsafe { ffi::ts_parser_parse(self.0, c_old_tree, c_input) };
if c_new_tree.is_null() {
None
} else {
Some(Tree(new_tree_ptr))
Some(Tree(c_new_tree))
}
}
pub fn reset(&mut self) {
unsafe { ffi::ts_parser_reset(self.0) }
}
pub fn set_operation_limit(&mut self, limit: usize) {
unsafe { ffi::ts_parser_set_operation_limit(self.0, limit) }
}
}
impl Drop for Parser {
@ -442,15 +408,12 @@ impl Into<ffi::TSPoint> for Point {
}
}
impl<'a> Utf8Input for FlatInput<'a> {
fn read(&mut self) -> &[u8] {
let result = &self.bytes[self.offset..];
self.offset = self.bytes.len();
result
}
fn seek(&mut self, offset: u32, _position: Point) {
self.offset = offset as usize;
impl From<ffi::TSPoint> for Point {
fn from(point: ffi::TSPoint) -> Self {
Self {
row: point.row,
column: point.column,
}
}
}
@ -536,49 +499,70 @@ mod tests {
#[test]
fn test_custom_utf8_input() {
struct LineBasedInput {
lines: &'static [&'static str],
row: usize,
column: usize,
}
impl Utf8Input for LineBasedInput {
fn read(&mut self) -> &[u8] {
if self.row < self.lines.len() {
let result = &self.lines[self.row].as_bytes()[self.column..];
self.row += 1;
self.column = 0;
result
} else {
&[]
}
}
fn seek(&mut self, _byte: u32, position: Point) {
self.row = position.row as usize;
self.column = position.column as usize;
}
}
let mut parser = Parser::new();
parser.set_language(rust()).unwrap();
let mut input = LineBasedInput {
lines: &[
"pub fn main() {",
"}",
],
row: 0,
column: 0
};
let lines = &[
"pub fn foo() {",
" 1",
"}",
];
let tree = parser.parse_utf8(&mut |_, position| {
let row = position.row as usize;
let column = position.column as usize;
if row < lines.len() {
if column < lines[row].as_bytes().len() {
&lines[row].as_bytes()[column..]
} else {
"\n".as_bytes()
}
} else {
&[]
}
}, None).unwrap();
let tree = parser.parse_utf8(&mut input, None).unwrap();
let root = tree.root_node();
assert_eq!(root.to_sexp(), "(source_file (function_item (visibility_modifier) (identifier) (parameters) (block (number_literal))))");
assert_eq!(root.kind(), "source_file");
assert_eq!(root.has_error(), false);
assert_eq!(root.child(0).unwrap().kind(), "function_item");
}
let child = root.child(0).unwrap();
assert_eq!(child.kind(), "function_item");
#[test]
fn test_custom_utf16_input() {
let mut parser = Parser::new();
parser.set_language(rust()).unwrap();
parser.set_logger(Some(Box::new(|t, message| {
println!("log: {:?} {}", t, message);
})));
let lines: Vec<Vec<u16>> = [
"pub fn foo() {",
" 1",
"}"
].iter().map(|s| s.encode_utf16().collect()).collect();
let tree = parser.parse_utf16(&mut |_, position| {
let row = position.row as usize;
let column = position.column as usize;
if row < lines.len() {
if column < lines[row].len() {
&lines[row][column..]
} else {
&[10]
}
} else {
&[]
}
}, None).unwrap();
let root = tree.root_node();
assert_eq!(root.to_sexp(), "(source_file (function_item (visibility_modifier) (identifier) (parameters) (block (number_literal))))");
assert_eq!(root.kind(), "source_file");
assert_eq!(root.has_error(), false);
assert_eq!(root.child(0).unwrap().kind(), "function_item");
}
#[test]
@ -595,16 +579,23 @@ mod tests {
#[test]
fn test_editing() {
let mut input = SpyInput {
bytes: "fn test(a: A, c: C) {}".as_bytes(),
offset: 0,
bytes_read: Vec::new(),
};
let mut parser = Parser::new();
parser.set_language(rust()).unwrap();
let mut tree = parser.parse_utf8(&mut input, None).unwrap();
let mut input_bytes = "fn test(a: A, c: C) {}".as_bytes();
let mut input_bytes_read = Vec::new();
let mut tree = parser.parse_utf8(&mut |offset, _| {
let offset = offset as usize;
if offset < input_bytes.len() {
let result = &input_bytes[offset..offset + 1];
input_bytes_read.extend(result.iter());
result
} else {
&[]
}
}, None).unwrap();
let parameters_sexp = tree.root_node()
.named_child(0).unwrap()
.named_child(1).unwrap()
@ -614,9 +605,8 @@ mod tests {
"(parameters (parameter (identifier) (type_identifier)) (parameter (identifier) (type_identifier)))"
);
input.offset = 0;
input.bytes_read.clear();
input.bytes = "fn test(a: A, b: B, c: C) {}".as_bytes();
input_bytes_read.clear();
input_bytes = "fn test(a: A, b: B, c: C) {}".as_bytes();
tree.edit(&InputEdit{
start_byte: 14,
old_end_byte: 14,
@ -626,7 +616,17 @@ mod tests {
new_end_position: Point::new(0, 20),
});
let tree = parser.parse_utf8(&mut input, Some(&tree)).unwrap();
let tree = parser.parse_utf8(&mut |offset, _| {
let offset = offset as usize;
if offset < input_bytes.len() {
let result = &input_bytes[offset..offset + 1];
input_bytes_read.extend(result.iter());
result
} else {
&[]
}
}, Some(&tree)).unwrap();
let parameters_sexp = tree.root_node()
.named_child(0).unwrap()
.named_child(1).unwrap()
@ -636,7 +636,7 @@ mod tests {
"(parameters (parameter (identifier) (type_identifier)) (parameter (identifier) (type_identifier)) (parameter (identifier) (type_identifier)))"
);
let retokenized_content = String::from_utf8(input.bytes_read).unwrap();
let retokenized_content = String::from_utf8(input_bytes_read).unwrap();
assert!(retokenized_content.contains("b: B"));
assert!(!retokenized_content.contains("a: A"));
assert!(!retokenized_content.contains("c: C"));
@ -694,27 +694,4 @@ mod tests {
assert_eq!(child_count_differences, &[1, 2, 3, 4]);
}
struct SpyInput {
bytes: &'static [u8],
offset: usize,
bytes_read: Vec<u8>,
}
impl Utf8Input for SpyInput {
fn read(&mut self) -> &[u8] {
if self.offset < self.bytes.len() {
let result = &self.bytes[self.offset..self.offset + 1];
self.bytes_read.extend(result.iter());
self.offset += 1;
result
} else {
&[]
}
}
fn seek(&mut self, byte: u32, _position: Point) {
self.offset = byte as usize;
}
}
}

2
vendor/tree-sitter vendored

@ -1 +1 @@
Subproject commit 78f28b14ce519ba085ab7886c2fc19739f7f7da0
Subproject commit 26ab57a6562aaeb48b579e3ca29eb064925e857c