Update to latest tree-sitter API
This commit is contained in:
parent
16a7366ec7
commit
5efc28f2f3
5 changed files with 175 additions and 189 deletions
66
README.md
66
README.md
|
|
@ -12,6 +12,10 @@ Rust bindings to the [Tree-sitter][] parsing library.
|
|||
First, create a parser:
|
||||
|
||||
```rust
|
||||
use tree_sitter::{Parser, Language};
|
||||
|
||||
// ...
|
||||
|
||||
let parser = Parser::new();
|
||||
```
|
||||
|
||||
|
|
@ -22,16 +26,17 @@ extern "C" fn tree_sitter_c() -> Language;
|
|||
extern "C" fn tree_sitter_rust() -> Language;
|
||||
extern "C" fn tree_sitter_javascript() -> Language;
|
||||
|
||||
parser.set_language(unsafe { tree_sitter_rust() }).unwrap();
|
||||
let language = unsafe { tree_sitter_rust() };
|
||||
parser.set_language(language).unwrap();
|
||||
```
|
||||
|
||||
Now you can parse source code:
|
||||
|
||||
```rust
|
||||
let source_code = "fn test() {}";
|
||||
|
||||
let tree = parser.parse_str(source_code, None);
|
||||
let root_node = tree.root_node();
|
||||
|
||||
assert_eq!(root_node.kind(), "source_file");
|
||||
assert_eq!(root_node.start_position().column, 0);
|
||||
assert_eq!(root_node.end_position().column, 12);
|
||||
|
|
@ -39,7 +44,7 @@ assert_eq!(root_node.end_position().column, 12);
|
|||
|
||||
### Editing
|
||||
|
||||
Once you have a syntax tree, you can update it when your source code changes:
|
||||
Once you have a syntax tree, you can update it when your source code changes. Passing in the previous edited tree makes `parse` run much more quickly:
|
||||
|
||||
```rust
|
||||
let new_source_code = "fn test(a: u32) {}"
|
||||
|
|
@ -52,49 +57,42 @@ tree.edit(InputEdit {
|
|||
old_end_position: Point::new(0, 8),
|
||||
new_end_position: Point::new(0, 14),
|
||||
});
|
||||
|
||||
let new_tree = parser.parse_str(new_source_code, Some(&tree));
|
||||
```
|
||||
|
||||
### Text Input
|
||||
|
||||
|
||||
The code can be provided either as a simple string or by any type that implements Tree-sitter's `Utf8Input` or `Utf16Input` traits:
|
||||
The source code to parse can be provided either as a string or as a function that returns text encoded as either UTF8 or UTF16:
|
||||
|
||||
```rust
|
||||
struct LineWiseInput {
|
||||
lines: &'static [&'static str],
|
||||
row: usize,
|
||||
column: usize,
|
||||
}
|
||||
// Store some source code in an array of lines.
|
||||
let lines = &[
|
||||
"pub fn foo() {",
|
||||
" 1",
|
||||
"}",
|
||||
];
|
||||
|
||||
impl tree_sitter::Utf8Input for LineWiseInput {
|
||||
fn read(&mut self) -> &[u8] {
|
||||
if self.row < self.lines.len() {
|
||||
let result = &self.lines[self.row].as_bytes()[self.column..];
|
||||
self.row += 1;
|
||||
self.column = 0;
|
||||
result
|
||||
// Parse the source code using a custom callback. The callback is called
|
||||
// with both a byte offset and a row/column offset.
|
||||
let tree = parser.parse_utf8(&mut |_byte: u32, position: Point| -> &[u8] {
|
||||
let row = position.row as usize;
|
||||
let column = position.column as usize;
|
||||
if row < lines.len() {
|
||||
if column < lines[row].as_bytes().len() {
|
||||
&lines[row].as_bytes()[column..]
|
||||
} else {
|
||||
&[]
|
||||
"\n".as_bytes()
|
||||
}
|
||||
} else {
|
||||
&[]
|
||||
}
|
||||
}, None).unwrap();
|
||||
|
||||
fn seek(&mut self, _byte: u32, position: Point) {
|
||||
self.row = position.row as usize;
|
||||
self.column = position.column as usize;
|
||||
}
|
||||
}
|
||||
|
||||
let mut input = LineBasedInput {
|
||||
lines: &[
|
||||
"pub fn main() {",
|
||||
"}",
|
||||
],
|
||||
row: 0,
|
||||
column: 0
|
||||
};
|
||||
|
||||
let tree = parser.parse_utf8(&mut input, None).unwrap();
|
||||
assert_eq!(
|
||||
tree.root_node().to_sexp(),
|
||||
"(source_file (function_item (visibility_modifier) (identifier) (parameters) (block (number_literal))))"
|
||||
);
|
||||
```
|
||||
|
||||
[tree-sitter]: https://github.com/tree-sitter/tree-sitter
|
||||
|
|
|
|||
1
build.rs
1
build.rs
|
|
@ -22,7 +22,6 @@ fn main() {
|
|||
"node.c",
|
||||
"parser.c",
|
||||
"stack.c",
|
||||
"string_input.c",
|
||||
"subtree.c",
|
||||
"tree_cursor.c",
|
||||
"tree.c",
|
||||
|
|
|
|||
|
|
@ -41,15 +41,12 @@ pub struct TSRange {
|
|||
pub struct TSInput {
|
||||
pub payload: *mut ::std::os::raw::c_void,
|
||||
pub read: ::std::option::Option<
|
||||
unsafe extern "C" fn(payload: *mut ::std::os::raw::c_void, bytes_read: *mut u32)
|
||||
-> *const ::std::os::raw::c_char,
|
||||
>,
|
||||
pub seek: ::std::option::Option<
|
||||
unsafe extern "C" fn(
|
||||
payload: *mut ::std::os::raw::c_void,
|
||||
byte_index: u32,
|
||||
position: TSPoint,
|
||||
) -> ::std::os::raw::c_int,
|
||||
bytes_read: *mut u32,
|
||||
) -> *const ::std::os::raw::c_char,
|
||||
>,
|
||||
pub encoding: TSInputEncoding,
|
||||
}
|
||||
|
|
@ -127,6 +124,21 @@ extern "C" {
|
|||
arg4: u32,
|
||||
) -> *mut TSTree;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_parser_enabled(arg1: *const TSParser) -> bool;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_parser_set_enabled(arg1: *mut TSParser, arg2: bool);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_parser_operation_limit(arg1: *const TSParser) -> usize;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_parser_set_operation_limit(arg1: *mut TSParser, arg2: usize);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_parser_reset(arg1: *mut TSParser);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_tree_copy(arg1: *const TSTree) -> *mut TSTree;
|
||||
}
|
||||
|
|
|
|||
273
src/lib.rs
273
src/lib.rs
|
|
@ -3,21 +3,11 @@ mod ffi;
|
|||
use std::fmt;
|
||||
use std::ffi::CStr;
|
||||
use std::marker::PhantomData;
|
||||
use std::os::raw::{c_char, c_int, c_void};
|
||||
use std::os::raw::{c_char, c_void};
|
||||
use std::ptr;
|
||||
|
||||
pub type Language = *const ffi::TSLanguage;
|
||||
|
||||
pub trait Utf16Input {
|
||||
fn read(&mut self) -> &[u16];
|
||||
fn seek(&mut self, u32, Point);
|
||||
}
|
||||
|
||||
pub trait Utf8Input {
|
||||
fn read(&mut self) -> &[u8];
|
||||
fn seek(&mut self, u32, Point);
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub enum LogType {
|
||||
Parse,
|
||||
|
|
@ -50,11 +40,6 @@ pub struct Tree(*mut ffi::TSTree);
|
|||
|
||||
pub struct TreeCursor<'a>(ffi::TSTreeCursor, PhantomData<&'a ()>);
|
||||
|
||||
struct FlatInput<'a> {
|
||||
bytes: &'a [u8],
|
||||
offset: usize,
|
||||
}
|
||||
|
||||
impl Parser {
|
||||
pub fn new() -> Parser {
|
||||
unsafe {
|
||||
|
|
@ -124,105 +109,86 @@ impl Parser {
|
|||
}
|
||||
|
||||
pub fn parse_str(&mut self, input: &str, old_tree: Option<&Tree>) -> Option<Tree> {
|
||||
let mut input = FlatInput { bytes: input.as_bytes(), offset: 0};
|
||||
self.parse_utf8(&mut input, old_tree)
|
||||
let bytes = input.as_bytes();
|
||||
self.parse_utf8(&mut |offset, _| &bytes[(offset as usize)..], old_tree)
|
||||
}
|
||||
|
||||
pub fn parse_utf8<T: Utf8Input>(
|
||||
pub fn parse_utf8<'a, T: 'a + FnMut(u32, Point) -> &'a [u8]>(
|
||||
&mut self,
|
||||
input: &mut T,
|
||||
old_tree: Option<&Tree>,
|
||||
) -> Option<Tree> {
|
||||
unsafe extern "C" fn read<T: Utf8Input>(
|
||||
unsafe extern "C" fn read<'a, T: 'a + FnMut(u32, Point) -> &'a [u8]>(
|
||||
payload: *mut c_void,
|
||||
byte_offset: u32,
|
||||
position: ffi::TSPoint,
|
||||
bytes_read: *mut u32,
|
||||
) -> *const c_char {
|
||||
let input = (payload as *mut T).as_mut().unwrap();
|
||||
let result = input.read();
|
||||
let result = (*input)(byte_offset, position.into());
|
||||
*bytes_read = result.len() as u32;
|
||||
return result.as_ptr() as *const c_char;
|
||||
};
|
||||
|
||||
unsafe extern "C" fn seek<T: Utf8Input>(
|
||||
payload: *mut c_void,
|
||||
byte: u32,
|
||||
position: ffi::TSPoint,
|
||||
) -> c_int {
|
||||
let input = (payload as *mut T).as_mut().unwrap();
|
||||
input.seek(
|
||||
byte,
|
||||
Point {
|
||||
row: position.row,
|
||||
column: position.column,
|
||||
},
|
||||
);
|
||||
return 1;
|
||||
};
|
||||
|
||||
let c_input = ffi::TSInput {
|
||||
payload: input as *mut T as *mut c_void,
|
||||
read: Some(read::<T>),
|
||||
seek: Some(seek::<T>),
|
||||
read: Some(read::<'a, T>),
|
||||
encoding: ffi::TSInputEncoding_TSInputEncodingUTF8,
|
||||
};
|
||||
|
||||
let old_tree_ptr = old_tree.map_or(ptr::null_mut(), |t| t.0);
|
||||
let c_old_tree = old_tree.map_or(ptr::null_mut(), |t| t.0);
|
||||
|
||||
let new_tree_ptr = unsafe { ffi::ts_parser_parse(self.0, old_tree_ptr, c_input) };
|
||||
if new_tree_ptr.is_null() {
|
||||
let c_new_tree = unsafe { ffi::ts_parser_parse(self.0, c_old_tree, c_input) };
|
||||
if c_new_tree.is_null() {
|
||||
None
|
||||
} else {
|
||||
Some(Tree(new_tree_ptr))
|
||||
Some(Tree(c_new_tree))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse_utf16<T: Utf16Input>(
|
||||
pub fn parse_utf16<'a, T: 'a + FnMut(u32, Point) -> &'a [u16]>(
|
||||
&mut self,
|
||||
input: &mut T,
|
||||
old_tree: Option<&Tree>,
|
||||
) -> Option<Tree> {
|
||||
unsafe extern "C" fn read<T: Utf16Input>(
|
||||
unsafe extern "C" fn read<'a, T: 'a + FnMut(u32, Point) -> &'a [u16]>(
|
||||
payload: *mut c_void,
|
||||
byte_offset: u32,
|
||||
position: ffi::TSPoint,
|
||||
bytes_read: *mut u32,
|
||||
) -> *const c_char {
|
||||
let input = (payload as *mut T).as_mut().unwrap();
|
||||
let result = input.read();
|
||||
let result = (*input)(byte_offset, Point {
|
||||
row: position.row,
|
||||
column: position.column / 2,
|
||||
});
|
||||
*bytes_read = result.len() as u32 * 2;
|
||||
return result.as_ptr() as *const c_char;
|
||||
};
|
||||
|
||||
unsafe extern "C" fn seek<T: Utf16Input>(
|
||||
payload: *mut c_void,
|
||||
byte: u32,
|
||||
position: ffi::TSPoint,
|
||||
) -> c_int {
|
||||
let input = (payload as *mut T).as_mut().unwrap();
|
||||
input.seek(
|
||||
byte / 2,
|
||||
Point {
|
||||
row: position.row,
|
||||
column: position.column / 2,
|
||||
},
|
||||
);
|
||||
return 1;
|
||||
};
|
||||
|
||||
let c_input = ffi::TSInput {
|
||||
payload: input as *mut T as *mut c_void,
|
||||
read: Some(read::<T>),
|
||||
seek: Some(seek::<T>),
|
||||
encoding: ffi::TSInputEncoding_TSInputEncodingUTF8,
|
||||
read: Some(read::<'a, T>),
|
||||
encoding: ffi::TSInputEncoding_TSInputEncodingUTF16,
|
||||
};
|
||||
|
||||
let old_tree_ptr = old_tree.map_or(ptr::null_mut(), |t| t.0);
|
||||
let c_old_tree = old_tree.map_or(ptr::null_mut(), |t| t.0);
|
||||
|
||||
let new_tree_ptr = unsafe { ffi::ts_parser_parse(self.0, old_tree_ptr, c_input) };
|
||||
if new_tree_ptr.is_null() {
|
||||
let c_new_tree = unsafe { ffi::ts_parser_parse(self.0, c_old_tree, c_input) };
|
||||
if c_new_tree.is_null() {
|
||||
None
|
||||
} else {
|
||||
Some(Tree(new_tree_ptr))
|
||||
Some(Tree(c_new_tree))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn reset(&mut self) {
|
||||
unsafe { ffi::ts_parser_reset(self.0) }
|
||||
}
|
||||
|
||||
pub fn set_operation_limit(&mut self, limit: usize) {
|
||||
unsafe { ffi::ts_parser_set_operation_limit(self.0, limit) }
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for Parser {
|
||||
|
|
@ -442,15 +408,12 @@ impl Into<ffi::TSPoint> for Point {
|
|||
}
|
||||
}
|
||||
|
||||
impl<'a> Utf8Input for FlatInput<'a> {
|
||||
fn read(&mut self) -> &[u8] {
|
||||
let result = &self.bytes[self.offset..];
|
||||
self.offset = self.bytes.len();
|
||||
result
|
||||
}
|
||||
|
||||
fn seek(&mut self, offset: u32, _position: Point) {
|
||||
self.offset = offset as usize;
|
||||
impl From<ffi::TSPoint> for Point {
|
||||
fn from(point: ffi::TSPoint) -> Self {
|
||||
Self {
|
||||
row: point.row,
|
||||
column: point.column,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -536,49 +499,70 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn test_custom_utf8_input() {
|
||||
struct LineBasedInput {
|
||||
lines: &'static [&'static str],
|
||||
row: usize,
|
||||
column: usize,
|
||||
}
|
||||
|
||||
impl Utf8Input for LineBasedInput {
|
||||
fn read(&mut self) -> &[u8] {
|
||||
if self.row < self.lines.len() {
|
||||
let result = &self.lines[self.row].as_bytes()[self.column..];
|
||||
self.row += 1;
|
||||
self.column = 0;
|
||||
result
|
||||
} else {
|
||||
&[]
|
||||
}
|
||||
}
|
||||
|
||||
fn seek(&mut self, _byte: u32, position: Point) {
|
||||
self.row = position.row as usize;
|
||||
self.column = position.column as usize;
|
||||
}
|
||||
}
|
||||
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(rust()).unwrap();
|
||||
|
||||
let mut input = LineBasedInput {
|
||||
lines: &[
|
||||
"pub fn main() {",
|
||||
"}",
|
||||
],
|
||||
row: 0,
|
||||
column: 0
|
||||
};
|
||||
let lines = &[
|
||||
"pub fn foo() {",
|
||||
" 1",
|
||||
"}",
|
||||
];
|
||||
|
||||
let tree = parser.parse_utf8(&mut |_, position| {
|
||||
let row = position.row as usize;
|
||||
let column = position.column as usize;
|
||||
if row < lines.len() {
|
||||
if column < lines[row].as_bytes().len() {
|
||||
&lines[row].as_bytes()[column..]
|
||||
} else {
|
||||
"\n".as_bytes()
|
||||
}
|
||||
} else {
|
||||
&[]
|
||||
}
|
||||
}, None).unwrap();
|
||||
|
||||
let tree = parser.parse_utf8(&mut input, None).unwrap();
|
||||
let root = tree.root_node();
|
||||
assert_eq!(root.to_sexp(), "(source_file (function_item (visibility_modifier) (identifier) (parameters) (block (number_literal))))");
|
||||
assert_eq!(root.kind(), "source_file");
|
||||
assert_eq!(root.has_error(), false);
|
||||
assert_eq!(root.child(0).unwrap().kind(), "function_item");
|
||||
}
|
||||
|
||||
let child = root.child(0).unwrap();
|
||||
assert_eq!(child.kind(), "function_item");
|
||||
#[test]
|
||||
fn test_custom_utf16_input() {
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(rust()).unwrap();
|
||||
|
||||
parser.set_logger(Some(Box::new(|t, message| {
|
||||
println!("log: {:?} {}", t, message);
|
||||
})));
|
||||
|
||||
let lines: Vec<Vec<u16>> = [
|
||||
"pub fn foo() {",
|
||||
" 1",
|
||||
"}"
|
||||
].iter().map(|s| s.encode_utf16().collect()).collect();
|
||||
|
||||
let tree = parser.parse_utf16(&mut |_, position| {
|
||||
let row = position.row as usize;
|
||||
let column = position.column as usize;
|
||||
if row < lines.len() {
|
||||
if column < lines[row].len() {
|
||||
&lines[row][column..]
|
||||
} else {
|
||||
&[10]
|
||||
}
|
||||
} else {
|
||||
&[]
|
||||
}
|
||||
}, None).unwrap();
|
||||
|
||||
let root = tree.root_node();
|
||||
assert_eq!(root.to_sexp(), "(source_file (function_item (visibility_modifier) (identifier) (parameters) (block (number_literal))))");
|
||||
assert_eq!(root.kind(), "source_file");
|
||||
assert_eq!(root.has_error(), false);
|
||||
assert_eq!(root.child(0).unwrap().kind(), "function_item");
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -595,16 +579,23 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn test_editing() {
|
||||
let mut input = SpyInput {
|
||||
bytes: "fn test(a: A, c: C) {}".as_bytes(),
|
||||
offset: 0,
|
||||
bytes_read: Vec::new(),
|
||||
};
|
||||
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(rust()).unwrap();
|
||||
|
||||
let mut tree = parser.parse_utf8(&mut input, None).unwrap();
|
||||
let mut input_bytes = "fn test(a: A, c: C) {}".as_bytes();
|
||||
let mut input_bytes_read = Vec::new();
|
||||
|
||||
let mut tree = parser.parse_utf8(&mut |offset, _| {
|
||||
let offset = offset as usize;
|
||||
if offset < input_bytes.len() {
|
||||
let result = &input_bytes[offset..offset + 1];
|
||||
input_bytes_read.extend(result.iter());
|
||||
result
|
||||
} else {
|
||||
&[]
|
||||
}
|
||||
}, None).unwrap();
|
||||
|
||||
let parameters_sexp = tree.root_node()
|
||||
.named_child(0).unwrap()
|
||||
.named_child(1).unwrap()
|
||||
|
|
@ -614,9 +605,8 @@ mod tests {
|
|||
"(parameters (parameter (identifier) (type_identifier)) (parameter (identifier) (type_identifier)))"
|
||||
);
|
||||
|
||||
input.offset = 0;
|
||||
input.bytes_read.clear();
|
||||
input.bytes = "fn test(a: A, b: B, c: C) {}".as_bytes();
|
||||
input_bytes_read.clear();
|
||||
input_bytes = "fn test(a: A, b: B, c: C) {}".as_bytes();
|
||||
tree.edit(&InputEdit{
|
||||
start_byte: 14,
|
||||
old_end_byte: 14,
|
||||
|
|
@ -626,7 +616,17 @@ mod tests {
|
|||
new_end_position: Point::new(0, 20),
|
||||
});
|
||||
|
||||
let tree = parser.parse_utf8(&mut input, Some(&tree)).unwrap();
|
||||
let tree = parser.parse_utf8(&mut |offset, _| {
|
||||
let offset = offset as usize;
|
||||
if offset < input_bytes.len() {
|
||||
let result = &input_bytes[offset..offset + 1];
|
||||
input_bytes_read.extend(result.iter());
|
||||
result
|
||||
} else {
|
||||
&[]
|
||||
}
|
||||
}, Some(&tree)).unwrap();
|
||||
|
||||
let parameters_sexp = tree.root_node()
|
||||
.named_child(0).unwrap()
|
||||
.named_child(1).unwrap()
|
||||
|
|
@ -636,7 +636,7 @@ mod tests {
|
|||
"(parameters (parameter (identifier) (type_identifier)) (parameter (identifier) (type_identifier)) (parameter (identifier) (type_identifier)))"
|
||||
);
|
||||
|
||||
let retokenized_content = String::from_utf8(input.bytes_read).unwrap();
|
||||
let retokenized_content = String::from_utf8(input_bytes_read).unwrap();
|
||||
assert!(retokenized_content.contains("b: B"));
|
||||
assert!(!retokenized_content.contains("a: A"));
|
||||
assert!(!retokenized_content.contains("c: C"));
|
||||
|
|
@ -694,27 +694,4 @@ mod tests {
|
|||
|
||||
assert_eq!(child_count_differences, &[1, 2, 3, 4]);
|
||||
}
|
||||
|
||||
struct SpyInput {
|
||||
bytes: &'static [u8],
|
||||
offset: usize,
|
||||
bytes_read: Vec<u8>,
|
||||
}
|
||||
|
||||
impl Utf8Input for SpyInput {
|
||||
fn read(&mut self) -> &[u8] {
|
||||
if self.offset < self.bytes.len() {
|
||||
let result = &self.bytes[self.offset..self.offset + 1];
|
||||
self.bytes_read.extend(result.iter());
|
||||
self.offset += 1;
|
||||
result
|
||||
} else {
|
||||
&[]
|
||||
}
|
||||
}
|
||||
|
||||
fn seek(&mut self, byte: u32, _position: Point) {
|
||||
self.offset = byte as usize;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
2
vendor/tree-sitter
vendored
2
vendor/tree-sitter
vendored
|
|
@ -1 +1 @@
|
|||
Subproject commit 78f28b14ce519ba085ab7886c2fc19739f7f7da0
|
||||
Subproject commit 26ab57a6562aaeb48b579e3ca29eb064925e857c
|
||||
Loading…
Add table
Add a link
Reference in a new issue