Merge branch 'master' into wasm-language
This commit is contained in:
commit
f4e2f68f14
161 changed files with 10293 additions and 4253 deletions
|
|
@ -1,12 +1,13 @@
|
|||
# Rust Tree-sitter
|
||||
|
||||
[](https://travis-ci.org/tree-sitter/tree-sitter)
|
||||
[](https://ci.appveyor.com/project/maxbrunsfeld/tree-sitter/branch/master)
|
||||
[](https://crates.io/crates/tree-sitter)
|
||||
[![crates.io badge]][crates.io]
|
||||
|
||||
[crates.io]: https://crates.io/crates/tree-sitter
|
||||
[crates.io badge]: https://img.shields.io/crates/v/tree-sitter.svg?color=%23B48723
|
||||
|
||||
Rust bindings to the [Tree-sitter][] parsing library.
|
||||
|
||||
### Basic Usage
|
||||
## Basic Usage
|
||||
|
||||
First, create a parser:
|
||||
|
||||
|
|
@ -16,22 +17,6 @@ use tree_sitter::{Parser, Language};
|
|||
let mut parser = Parser::new();
|
||||
```
|
||||
|
||||
Tree-sitter languages consist of generated C code. To make sure they're properly compiled and linked, you can create a [build script](https://doc.rust-lang.org/cargo/reference/build-scripts.html) like the following (assuming `tree-sitter-javascript` is in your root directory):
|
||||
|
||||
```rust
|
||||
use std::path::PathBuf;
|
||||
|
||||
fn main() {
|
||||
let dir: PathBuf = ["tree-sitter-javascript", "src"].iter().collect();
|
||||
|
||||
cc::Build::new()
|
||||
.include(&dir)
|
||||
.file(dir.join("parser.c"))
|
||||
.file(dir.join("scanner.c"))
|
||||
.compile("tree-sitter-javascript");
|
||||
}
|
||||
```
|
||||
|
||||
Add the `cc` crate to your `Cargo.toml` under `[build-dependencies]`:
|
||||
|
||||
```toml
|
||||
|
|
@ -39,15 +24,18 @@ Add the `cc` crate to your `Cargo.toml` under `[build-dependencies]`:
|
|||
cc="*"
|
||||
```
|
||||
|
||||
To then use languages from rust, you must declare them as `extern "C"` functions and invoke them with `unsafe`. Then you can assign them to the parser.
|
||||
Then, add a language as a dependency:
|
||||
|
||||
```toml
|
||||
[dependencies]
|
||||
tree-sitter = "0.20.10"
|
||||
tree-sitter-rust = "0.20.3"
|
||||
```
|
||||
|
||||
To then use a language, you assign them to the parser.
|
||||
|
||||
```rust
|
||||
extern "C" { fn tree_sitter_c() -> Language; }
|
||||
extern "C" { fn tree_sitter_rust() -> Language; }
|
||||
extern "C" { fn tree_sitter_javascript() -> Language; }
|
||||
|
||||
let language = unsafe { tree_sitter_rust() };
|
||||
parser.set_language(language).unwrap();
|
||||
parser.set_language(tree_sitter_rust::language()).expect("Error loading Rust grammar");
|
||||
```
|
||||
|
||||
Now you can parse source code:
|
||||
|
|
@ -64,7 +52,8 @@ assert_eq!(root_node.end_position().column, 12);
|
|||
|
||||
### Editing
|
||||
|
||||
Once you have a syntax tree, you can update it when your source code changes. Passing in the previous edited tree makes `parse` run much more quickly:
|
||||
Once you have a syntax tree, you can update it when your source code changes.
|
||||
Passing in the previous edited tree makes `parse` run much more quickly:
|
||||
|
||||
```rust
|
||||
let new_source_code = "fn test(a: u32) {}"
|
||||
|
|
@ -83,7 +72,8 @@ let new_tree = parser.parse(new_source_code, Some(&tree));
|
|||
|
||||
### Text Input
|
||||
|
||||
The source code to parse can be provided either as a string, a slice, a vector, or as a function that returns a slice. The text can be encoded as either UTF8 or UTF16:
|
||||
The source code to parse can be provided either as a string, a slice, a vector,
|
||||
or as a function that returns a slice. The text can be encoded as either UTF8 or UTF16:
|
||||
|
||||
```rust
|
||||
// Store some source code in an array of lines.
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -17,6 +17,9 @@ fn main() {
|
|||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "bindgen")]
|
||||
generate_bindings();
|
||||
|
||||
let mut config = cc::Build::new();
|
||||
|
||||
println!("cargo:rerun-if-env-changed=CARGO_FEATURE_WASM");
|
||||
|
|
@ -33,7 +36,8 @@ fn main() {
|
|||
|
||||
config
|
||||
.flag_if_supported("-std=c99")
|
||||
.flag_if_supported("-Wno-unused-parameter")
|
||||
.flag_if_supported("-fvisibility=hidden")
|
||||
.flag_if_supported("-Wshadow")
|
||||
.include(src_path)
|
||||
.include(src_path.join("wasm"))
|
||||
.include("include")
|
||||
|
|
@ -41,6 +45,45 @@ fn main() {
|
|||
.compile("tree-sitter");
|
||||
}
|
||||
|
||||
#[cfg(feature = "bindgen")]
|
||||
fn generate_bindings() {
|
||||
const HEADER_PATH: &str = "include/tree_sitter/api.h";
|
||||
|
||||
println!("cargo:rerun-if-changed={}", HEADER_PATH);
|
||||
|
||||
let no_copy = [
|
||||
"TSInput",
|
||||
"TSLanguage",
|
||||
"TSLogger",
|
||||
"TSLookaheadIterator",
|
||||
"TSParser",
|
||||
"TSTree",
|
||||
"TSQuery",
|
||||
"TSQueryCursor",
|
||||
"TSQueryCapture",
|
||||
"TSQueryMatch",
|
||||
"TSQueryPredicateStep",
|
||||
];
|
||||
|
||||
let bindings = bindgen::Builder::default()
|
||||
.header(HEADER_PATH)
|
||||
.layout_tests(false)
|
||||
.allowlist_type("^TS.*")
|
||||
.allowlist_function("^ts_.*")
|
||||
.allowlist_var("^TREE_SITTER.*")
|
||||
.no_copy(no_copy.join("|"))
|
||||
.prepend_enum_name(false)
|
||||
.generate()
|
||||
.expect("Failed to generate bindings");
|
||||
|
||||
let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap());
|
||||
let bindings_rs = out_dir.join("bindings.rs");
|
||||
|
||||
bindings.write_to_file(&bindings_rs).expect(&*format!(
|
||||
"Failed to write bindings into path: {bindings_rs:?}"
|
||||
));
|
||||
}
|
||||
|
||||
fn which(exe_name: impl AsRef<Path>) -> Option<PathBuf> {
|
||||
env::var_os("PATH").and_then(|paths| {
|
||||
env::split_paths(&paths).find_map(|dir| {
|
||||
|
|
|
|||
|
|
@ -2,8 +2,153 @@
|
|||
#![allow(non_upper_case_globals)]
|
||||
#![allow(non_camel_case_types)]
|
||||
|
||||
#[cfg(feature = "bindgen")]
|
||||
include!(concat!(env!("OUT_DIR"), "/bindings.rs"));
|
||||
|
||||
#[cfg(not(feature = "bindgen"))]
|
||||
include!("./bindings.rs");
|
||||
|
||||
extern "C" {
|
||||
pub(crate) fn dup(fd: std::os::raw::c_int) -> std::os::raw::c_int;
|
||||
}
|
||||
|
||||
use crate::{
|
||||
Language, LookaheadIterator, Node, Parser, Query, QueryCursor, QueryError, Tree, TreeCursor,
|
||||
};
|
||||
use std::{marker::PhantomData, mem::ManuallyDrop, ptr::NonNull, str};
|
||||
|
||||
impl Language {
|
||||
/// Reconstructs a [`Language`] from a raw pointer.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// `ptr` must be non-null.
|
||||
pub unsafe fn from_raw(ptr: *const TSLanguage) -> Language {
|
||||
Language(ptr)
|
||||
}
|
||||
|
||||
/// Consumes the [`Language`], returning a raw pointer to the underlying C structure.
|
||||
pub fn into_raw(self) -> *const TSLanguage {
|
||||
ManuallyDrop::new(self).0
|
||||
}
|
||||
}
|
||||
|
||||
impl Parser {
|
||||
/// Reconstructs a [`Parser`] from a raw pointer.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// `ptr` must be non-null.
|
||||
pub unsafe fn from_raw(ptr: *mut TSParser) -> Parser {
|
||||
Parser(NonNull::new_unchecked(ptr))
|
||||
}
|
||||
|
||||
/// Consumes the [`Parser`], returning a raw pointer to the underlying C structure.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// It's a caller responsibility to adjust parser's state
|
||||
/// like disable logging or dot graphs printing if this
|
||||
/// may cause issues like use after free.
|
||||
pub fn into_raw(self) -> *mut TSParser {
|
||||
ManuallyDrop::new(self).0.as_ptr()
|
||||
}
|
||||
}
|
||||
|
||||
impl Tree {
|
||||
/// Reconstructs a [`Tree`] from a raw pointer.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// `ptr` must be non-null.
|
||||
pub unsafe fn from_raw(ptr: *mut TSTree) -> Tree {
|
||||
Tree(NonNull::new_unchecked(ptr))
|
||||
}
|
||||
|
||||
/// Consumes the [`Tree`], returning a raw pointer to the underlying C structure.
|
||||
pub fn into_raw(self) -> *mut TSTree {
|
||||
ManuallyDrop::new(self).0.as_ptr()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'tree> Node<'tree> {
|
||||
/// Reconstructs a [`Node`] from a raw pointer.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// `ptr` must be non-null.
|
||||
pub unsafe fn from_raw(raw: TSNode) -> Node<'tree> {
|
||||
Node(raw, PhantomData)
|
||||
}
|
||||
|
||||
/// Consumes the [`Node`], returning a raw pointer to the underlying C structure.
|
||||
pub fn into_raw(self) -> TSNode {
|
||||
ManuallyDrop::new(self).0
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> TreeCursor<'a> {
|
||||
/// Reconstructs a [`TreeCursor`] from a raw pointer.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// `ptr` must be non-null.
|
||||
pub unsafe fn from_raw(raw: TSTreeCursor) -> TreeCursor<'a> {
|
||||
TreeCursor(raw, PhantomData)
|
||||
}
|
||||
|
||||
/// Consumes the [`TreeCursor`], returning a raw pointer to the underlying C structure.
|
||||
pub fn into_raw(self) -> TSTreeCursor {
|
||||
ManuallyDrop::new(self).0
|
||||
}
|
||||
}
|
||||
|
||||
impl Query {
|
||||
/// Reconstructs a [`Query`] from a raw pointer.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// `ptr` must be non-null.
|
||||
pub unsafe fn from_raw(ptr: *mut TSQuery, source: &str) -> Result<Query, QueryError> {
|
||||
Query::from_raw_parts(ptr, source)
|
||||
}
|
||||
|
||||
/// Consumes the [`Query`], returning a raw pointer to the underlying C structure.
|
||||
pub fn into_raw(self) -> *mut TSQuery {
|
||||
ManuallyDrop::new(self).ptr.as_ptr()
|
||||
}
|
||||
}
|
||||
|
||||
impl QueryCursor {
|
||||
/// Reconstructs a [`QueryCursor`] from a raw pointer.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// `ptr` must be non-null.
|
||||
pub unsafe fn from_raw(ptr: *mut TSQueryCursor) -> QueryCursor {
|
||||
QueryCursor {
|
||||
ptr: NonNull::new_unchecked(ptr),
|
||||
}
|
||||
}
|
||||
|
||||
/// Consumes the [`QueryCursor`], returning a raw pointer to the underlying C structure.
|
||||
pub fn into_raw(self) -> *mut TSQueryCursor {
|
||||
ManuallyDrop::new(self).ptr.as_ptr()
|
||||
}
|
||||
}
|
||||
|
||||
impl LookaheadIterator {
|
||||
/// Reconstructs a [`LookaheadIterator`] from a raw pointer.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// `ptr` must be non-null.
|
||||
pub unsafe fn from_raw(ptr: *mut TSLookaheadIterator) -> LookaheadIterator {
|
||||
LookaheadIterator(NonNull::new_unchecked(ptr))
|
||||
}
|
||||
|
||||
/// Consumes the [`LookaheadIterator`], returning a raw pointer to the underlying C structure.
|
||||
pub fn into_raw(self) -> *mut TSLookaheadIterator {
|
||||
ManuallyDrop::new(self).0.as_ptr()
|
||||
}
|
||||
}
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -37,6 +37,8 @@ impl<T: Copy> ExactSizeIterator for CBufferIter<T> {}
|
|||
|
||||
impl<T> Drop for CBufferIter<T> {
|
||||
fn drop(&mut self) {
|
||||
unsafe { (FREE_FN)(self.ptr as *mut c_void) };
|
||||
if !self.ptr.is_null() {
|
||||
unsafe { (FREE_FN)(self.ptr as *mut c_void) };
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue