diff --git a/.gitignore b/.gitignore index 53eaa219..0e0e5bae 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ /target **/*.rs.bk +fixtures/tree-sitter-rust diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 00000000..eef86f94 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "vendor/tree-sitter"] + path = vendor/tree-sitter + url = https://github.com/tree-sitter/tree-sitter diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 00000000..65c021cf --- /dev/null +++ b/.travis.yml @@ -0,0 +1,14 @@ +language: rust + +rust: + - stable + +env: + - TREE_SITTER_TEST=1 + +before_install: + - ./script/fetch-test-fixtures.sh + +branches: + only: + - master diff --git a/Cargo.lock b/Cargo.lock index 2312d362..758dcad7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -471,26 +471,6 @@ dependencies = [ "time 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)", ] -[[package]] -name = "rust-tree-sitter-cli" -version = "0.1.0" -dependencies = [ - "clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)", - "dirs 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", - "hashbrown 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)", - "ignore 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)", - "lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)", - "libloading 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", - "log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", - "regex-syntax 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)", - "rusqlite 0.14.0 (registry+https://github.com/rust-lang/crates.io-index)", - "serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)", - "serde_derive 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)", - "serde_json 1.0.33 (registry+https://github.com/rust-lang/crates.io-index)", - "smallbitvec 2.3.0 (registry+https://github.com/rust-lang/crates.io-index)", - "tree-sitter 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", -] - [[package]] name = "rustc-demangle" version = "0.1.9" @@ -648,8 +628,7 @@ dependencies = [ [[package]] name = "tree-sitter" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" +version = "0.3.5" dependencies = [ "cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)", "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", @@ -658,6 +637,26 @@ dependencies = [ "serde_json 1.0.33 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "tree-sitter-cli" +version = "0.1.0" +dependencies = [ + "clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)", + "dirs 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", + "hashbrown 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)", + "ignore 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "libloading 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", + "log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", + "regex-syntax 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)", + "rusqlite 0.14.0 (registry+https://github.com/rust-lang/crates.io-index)", + "serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)", + "serde_derive 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)", + "serde_json 1.0.33 (registry+https://github.com/rust-lang/crates.io-index)", + "smallbitvec 2.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "tree-sitter 0.3.5", +] + [[package]] name = "ucd-util" version = "0.1.3" @@ -822,7 +821,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum textwrap 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)" = "307686869c93e71f94da64286f9a9524c0f308a9e1c87a583de8e9c9039ad3f6" "checksum thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c6b53e329000edc2b34dbe8545fd20e55a333362d0a321909685a19bd28c3f1b" "checksum time 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)" = "d825be0eb33fda1a7e68012d51e9c7f451dc1a69391e7fdc197060bb8c56667b" -"checksum tree-sitter 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "311adf1e004ac816285a1196c93ea36364857c3adc37ffc9fd5ed0d70545391a" "checksum ucd-util 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "535c204ee4d8434478593480b8f86ab45ec9aae0e83c568ca81abf0fd0e88f86" "checksum unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "882386231c45df4700b275c7ff55b6f3698780a650026380e72dabe76fa46526" "checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc" diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..971b81f9 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2018 Max Brunsfeld + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/appveyor.yml b/appveyor.yml new file mode 100644 index 00000000..22c8b96e --- /dev/null +++ b/appveyor.yml @@ -0,0 +1,27 @@ +environment: + RUST_TREE_SITTER_TEST: true + +build: false + +install: + - git submodule update --init --recursive + + - appveyor DownloadFile https://win.rustup.rs/ -FileName rustup-init.exe + - rustup-init -yv --default-toolchain stable + - set PATH=%PATH%;%USERPROFILE%\.cargo\bin + - rustc -vV + - cargo -vV + + - script\fetch-test-fixtures.cmd + +test_script: + - cargo build + - cargo test + +branches: + only: + - master + +cache: + - fixtures + - C:\Users\appveyor\.cargo diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 29b10e17..6a9c253d 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -1,9 +1,13 @@ [package] -name = "rust-tree-sitter-cli" +name = "tree-sitter-cli" version = "0.1.0" authors = ["Max Brunsfeld "] edition = "2018" +[[bin]] +name = "tree-sitter" +path = "src/main.rs" + [dependencies] lazy_static = "1.2.0" smallbitvec = "2.3.0" @@ -15,9 +19,11 @@ libloading = "0.5" rusqlite = "0.14.0" serde = "1.0" serde_derive = "1.0" -tree-sitter = "0.3.1" regex-syntax = "0.6.4" +[dependencies.tree-sitter] +path = "../lib" + [dependencies.serde_json] version = "1.0" features = ["preserve_order"] diff --git a/fixtures/.gitkeep b/fixtures/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/lib/Cargo.toml b/lib/Cargo.toml new file mode 100644 index 00000000..e71d0c21 --- /dev/null +++ b/lib/Cargo.toml @@ -0,0 +1,32 @@ +[package] +name = "tree-sitter" +description = "Rust bindings to the Tree-sitter parsing library" +version = "0.3.5" +authors = ["Max Brunsfeld "] +license = "MIT" +readme = "README.md" +keywords = ["incremental", "parsing"] +categories = ["api-bindings", "parsing", "text-editors"] + +include = [ + "/build.rs", + "/Cargo.toml", + "/LICENSE", + "/README.md", + "/src/*", + "/core/tree-sitter/externals/utf8proc/utf8proc*", + "/core/tree-sitter/include/*", + "/core/tree-sitter/src/runtime/*", +] + +[dependencies] +regex = "1" +serde = "1.0" +serde_json = "1.0" +serde_derive = "1.0" + +[build-dependencies] +cc = "1.0" + +[lib] +path = "binding/lib.rs" diff --git a/lib/README.md b/lib/README.md new file mode 100644 index 00000000..449c6c46 --- /dev/null +++ b/lib/README.md @@ -0,0 +1,98 @@ +Rust Tree-sitter +=========================== + +[![Build Status](https://travis-ci.org/tree-sitter/rust-tree-sitter.svg)](https://travis-ci.org/tree-sitter/rust-tree-sitter) +[![Build status](https://ci.appveyor.com/api/projects/status/d0f6vqq3rflxx3y6/branch/master?svg=true)](https://ci.appveyor.com/project/maxbrunsfeld/rust-tree-sitter/branch/master) +[![Crates.io](https://img.shields.io/crates/v/tree-sitter.svg)](https://crates.io/crates/tree-sitter) + +Rust bindings to the [Tree-sitter][] parsing library. + +### Basic Usage + +First, create a parser: + +```rust +use tree_sitter::{Parser, Language}; + +// ... + +let mut parser = Parser::new(); +``` + +Then assign a language to the parser. Tree-sitter languages consist of generated C code. To use them from rust, you must declare them as `extern "C"` functions and invoke them with `unsafe`: + +```rust +extern "C" { fn tree_sitter_c() -> Language; } +extern "C" { fn tree_sitter_rust() -> Language; } +extern "C" { fn tree_sitter_javascript() -> Language; } + +let language = unsafe { tree_sitter_rust() }; +parser.set_language(language).unwrap(); +``` + +Now you can parse source code: + +```rust +let source_code = "fn test() {}"; +let tree = parser.parse_str(source_code, None); +let root_node = tree.root_node(); + +assert_eq!(root_node.kind(), "source_file"); +assert_eq!(root_node.start_position().column, 0); +assert_eq!(root_node.end_position().column, 12); +``` + +### Editing + +Once you have a syntax tree, you can update it when your source code changes. Passing in the previous edited tree makes `parse` run much more quickly: + +```rust +let new_source_code = "fn test(a: u32) {}" + +tree.edit(InputEdit { + start_byte: 8, + old_end_byte: 8, + new_end_byte: 14, + start_position: Point::new(0, 8), + old_end_position: Point::new(0, 8), + new_end_position: Point::new(0, 14), +}); + +let new_tree = parser.parse_str(new_source_code, Some(&tree)); +``` + +### Text Input + +The source code to parse can be provided either as a string or as a function that returns text encoded as either UTF8 or UTF16: + +```rust +// Store some source code in an array of lines. +let lines = &[ + "pub fn foo() {", + " 1", + "}", +]; + +// Parse the source code using a custom callback. The callback is called +// with both a byte offset and a row/column offset. +let tree = parser.parse_utf8(&mut |_byte: u32, position: Point| -> &[u8] { + let row = position.row as usize; + let column = position.column as usize; + if row < lines.len() { + if column < lines[row].as_bytes().len() { + &lines[row].as_bytes()[column..] + } else { + "\n".as_bytes() + } + } else { + &[] + } +}, None).unwrap(); + +assert_eq!( + tree.root_node().to_sexp(), + "(source_file (function_item (visibility_modifier) (identifier) (parameters) (block (number_literal))))" +); +``` + +[tree-sitter]: https://github.com/tree-sitter/tree-sitter diff --git a/lib/binding/bindings.rs b/lib/binding/bindings.rs new file mode 100644 index 00000000..58d0e510 --- /dev/null +++ b/lib/binding/bindings.rs @@ -0,0 +1,310 @@ +/* automatically generated by rust-bindgen */ + +pub type FILE = [u64; 19usize]; +pub type TSSymbol = u16; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct TSLanguage { + _unused: [u8; 0], +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct TSParser { + _unused: [u8; 0], +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct TSTree { + _unused: [u8; 0], +} +pub const TSInputEncoding_TSInputEncodingUTF8: TSInputEncoding = 0; +pub const TSInputEncoding_TSInputEncodingUTF16: TSInputEncoding = 1; +pub type TSInputEncoding = u32; +pub const TSSymbolType_TSSymbolTypeRegular: TSSymbolType = 0; +pub const TSSymbolType_TSSymbolTypeAnonymous: TSSymbolType = 1; +pub const TSSymbolType_TSSymbolTypeAuxiliary: TSSymbolType = 2; +pub type TSSymbolType = u32; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct TSPoint { + pub row: u32, + pub column: u32, +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct TSRange { + pub start_point: TSPoint, + pub end_point: TSPoint, + pub start_byte: u32, + pub end_byte: u32, +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct TSInput { + pub payload: *mut ::std::os::raw::c_void, + pub read: ::std::option::Option< + unsafe extern "C" fn( + payload: *mut ::std::os::raw::c_void, + byte_index: u32, + position: TSPoint, + bytes_read: *mut u32, + ) -> *const ::std::os::raw::c_char, + >, + pub encoding: TSInputEncoding, +} +pub const TSLogType_TSLogTypeParse: TSLogType = 0; +pub const TSLogType_TSLogTypeLex: TSLogType = 1; +pub type TSLogType = u32; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct TSLogger { + pub payload: *mut ::std::os::raw::c_void, + pub log: ::std::option::Option< + unsafe extern "C" fn( + payload: *mut ::std::os::raw::c_void, + arg1: TSLogType, + arg2: *const ::std::os::raw::c_char, + ), + >, +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct TSInputEdit { + pub start_byte: u32, + pub old_end_byte: u32, + pub new_end_byte: u32, + pub start_point: TSPoint, + pub old_end_point: TSPoint, + pub new_end_point: TSPoint, +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct TSNode { + pub context: [u32; 4usize], + pub id: *const ::std::os::raw::c_void, + pub tree: *const TSTree, +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct TSTreeCursor { + pub context: [u32; 2usize], + pub id: *const ::std::os::raw::c_void, + pub tree: *const ::std::os::raw::c_void, +} +extern "C" { + pub fn ts_parser_new() -> *mut TSParser; +} +extern "C" { + pub fn ts_parser_delete(arg1: *mut TSParser); +} +extern "C" { + pub fn ts_parser_language(arg1: *const TSParser) -> *const TSLanguage; +} +extern "C" { + pub fn ts_parser_set_language(arg1: *mut TSParser, arg2: *const TSLanguage) -> bool; +} +extern "C" { + pub fn ts_parser_logger(arg1: *const TSParser) -> TSLogger; +} +extern "C" { + pub fn ts_parser_set_logger(arg1: *mut TSParser, arg2: TSLogger); +} +extern "C" { + pub fn ts_parser_print_dot_graphs(arg1: *mut TSParser, arg2: *mut FILE); +} +extern "C" { + pub fn ts_parser_halt_on_error(arg1: *mut TSParser, arg2: bool); +} +extern "C" { + pub fn ts_parser_parse(arg1: *mut TSParser, arg2: *const TSTree, arg3: TSInput) -> *mut TSTree; +} +extern "C" { + pub fn ts_parser_parse_string( + arg1: *mut TSParser, + arg2: *const TSTree, + arg3: *const ::std::os::raw::c_char, + arg4: u32, + ) -> *mut TSTree; +} +extern "C" { + pub fn ts_parser_enabled(arg1: *const TSParser) -> bool; +} +extern "C" { + pub fn ts_parser_set_enabled(arg1: *mut TSParser, arg2: bool); +} +extern "C" { + pub fn ts_parser_operation_limit(arg1: *const TSParser) -> usize; +} +extern "C" { + pub fn ts_parser_set_operation_limit(arg1: *mut TSParser, arg2: usize); +} +extern "C" { + pub fn ts_parser_reset(arg1: *mut TSParser); +} +extern "C" { + pub fn ts_parser_set_included_ranges(arg1: *mut TSParser, arg2: *const TSRange, arg3: u32); +} +extern "C" { + pub fn ts_parser_included_ranges(arg1: *const TSParser, arg2: *mut u32) -> *const TSRange; +} +extern "C" { + pub fn ts_tree_copy(arg1: *const TSTree) -> *mut TSTree; +} +extern "C" { + pub fn ts_tree_delete(arg1: *mut TSTree); +} +extern "C" { + pub fn ts_tree_root_node(arg1: *const TSTree) -> TSNode; +} +extern "C" { + pub fn ts_tree_edit(arg1: *mut TSTree, arg2: *const TSInputEdit); +} +extern "C" { + pub fn ts_tree_get_changed_ranges( + arg1: *const TSTree, + arg2: *const TSTree, + arg3: *mut u32, + ) -> *mut TSRange; +} +extern "C" { + pub fn ts_tree_print_dot_graph(arg1: *const TSTree, arg2: *mut FILE); +} +extern "C" { + pub fn ts_tree_language(arg1: *const TSTree) -> *const TSLanguage; +} +extern "C" { + pub fn ts_node_start_byte(arg1: TSNode) -> u32; +} +extern "C" { + pub fn ts_node_start_point(arg1: TSNode) -> TSPoint; +} +extern "C" { + pub fn ts_node_end_byte(arg1: TSNode) -> u32; +} +extern "C" { + pub fn ts_node_end_point(arg1: TSNode) -> TSPoint; +} +extern "C" { + pub fn ts_node_symbol(arg1: TSNode) -> TSSymbol; +} +extern "C" { + pub fn ts_node_type(arg1: TSNode) -> *const ::std::os::raw::c_char; +} +extern "C" { + pub fn ts_node_string(arg1: TSNode) -> *mut ::std::os::raw::c_char; +} +extern "C" { + pub fn ts_node_eq(arg1: TSNode, arg2: TSNode) -> bool; +} +extern "C" { + pub fn ts_node_is_null(arg1: TSNode) -> bool; +} +extern "C" { + pub fn ts_node_is_named(arg1: TSNode) -> bool; +} +extern "C" { + pub fn ts_node_is_missing(arg1: TSNode) -> bool; +} +extern "C" { + pub fn ts_node_has_changes(arg1: TSNode) -> bool; +} +extern "C" { + pub fn ts_node_has_error(arg1: TSNode) -> bool; +} +extern "C" { + pub fn ts_node_parent(arg1: TSNode) -> TSNode; +} +extern "C" { + pub fn ts_node_child(arg1: TSNode, arg2: u32) -> TSNode; +} +extern "C" { + pub fn ts_node_named_child(arg1: TSNode, arg2: u32) -> TSNode; +} +extern "C" { + pub fn ts_node_child_count(arg1: TSNode) -> u32; +} +extern "C" { + pub fn ts_node_named_child_count(arg1: TSNode) -> u32; +} +extern "C" { + pub fn ts_node_next_sibling(arg1: TSNode) -> TSNode; +} +extern "C" { + pub fn ts_node_next_named_sibling(arg1: TSNode) -> TSNode; +} +extern "C" { + pub fn ts_node_prev_sibling(arg1: TSNode) -> TSNode; +} +extern "C" { + pub fn ts_node_prev_named_sibling(arg1: TSNode) -> TSNode; +} +extern "C" { + pub fn ts_node_first_child_for_byte(arg1: TSNode, arg2: u32) -> TSNode; +} +extern "C" { + pub fn ts_node_first_named_child_for_byte(arg1: TSNode, arg2: u32) -> TSNode; +} +extern "C" { + pub fn ts_node_descendant_for_byte_range(arg1: TSNode, arg2: u32, arg3: u32) -> TSNode; +} +extern "C" { + pub fn ts_node_named_descendant_for_byte_range(arg1: TSNode, arg2: u32, arg3: u32) -> TSNode; +} +extern "C" { + pub fn ts_node_descendant_for_point_range(arg1: TSNode, arg2: TSPoint, arg3: TSPoint) + -> TSNode; +} +extern "C" { + pub fn ts_node_named_descendant_for_point_range( + arg1: TSNode, + arg2: TSPoint, + arg3: TSPoint, + ) -> TSNode; +} +extern "C" { + pub fn ts_node_edit(arg1: *mut TSNode, arg2: *const TSInputEdit); +} +extern "C" { + pub fn ts_tree_cursor_new(arg1: TSNode) -> TSTreeCursor; +} +extern "C" { + pub fn ts_tree_cursor_delete(arg1: *mut TSTreeCursor); +} +extern "C" { + pub fn ts_tree_cursor_goto_first_child(arg1: *mut TSTreeCursor) -> bool; +} +extern "C" { + pub fn ts_tree_cursor_goto_first_child_for_byte(arg1: *mut TSTreeCursor, arg2: u32) -> i64; +} +extern "C" { + pub fn ts_tree_cursor_goto_next_sibling(arg1: *mut TSTreeCursor) -> bool; +} +extern "C" { + pub fn ts_tree_cursor_goto_parent(arg1: *mut TSTreeCursor) -> bool; +} +extern "C" { + pub fn ts_tree_cursor_current_node(arg1: *const TSTreeCursor) -> TSNode; +} +extern "C" { + pub fn ts_language_symbol_count(arg1: *const TSLanguage) -> u32; +} +extern "C" { + pub fn ts_language_symbol_name( + arg1: *const TSLanguage, + arg2: TSSymbol, + ) -> *const ::std::os::raw::c_char; +} +extern "C" { + pub fn ts_language_symbol_for_name( + arg1: *const TSLanguage, + arg2: *const ::std::os::raw::c_char, + ) -> TSSymbol; +} +extern "C" { + pub fn ts_language_symbol_type(arg1: *const TSLanguage, arg2: TSSymbol) -> TSSymbolType; +} +extern "C" { + pub fn ts_language_version(arg1: *const TSLanguage) -> u32; +} + +pub const TREE_SITTER_LANGUAGE_VERSION: usize = 9; diff --git a/lib/binding/ffi.rs b/lib/binding/ffi.rs new file mode 100644 index 00000000..323609e0 --- /dev/null +++ b/lib/binding/ffi.rs @@ -0,0 +1,4 @@ +#![allow(dead_code)] +#![allow(non_upper_case_globals)] + +include!("./bindings.rs"); diff --git a/lib/binding/lib.rs b/lib/binding/lib.rs new file mode 100644 index 00000000..65a57d16 --- /dev/null +++ b/lib/binding/lib.rs @@ -0,0 +1,1349 @@ +mod ffi; + +#[macro_use] +extern crate serde_derive; +extern crate regex; +extern crate serde; +extern crate serde_json; + +use regex::Regex; +use serde::de::DeserializeOwned; +use std::collections::HashMap; +use std::ffi::CStr; +use std::fmt; +use std::io::{self, Read, Seek}; +use std::marker::PhantomData; +use std::os::raw::{c_char, c_void}; +use std::ptr; +use std::str; + +#[derive(Clone, Copy)] +#[repr(transparent)] +pub struct Language(*const ffi::TSLanguage); + +#[derive(Debug, PartialEq, Eq)] +pub enum LogType { + Parse, + Lex, +} + +type Logger<'a> = Box; + +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct Point { + pub row: usize, + pub column: usize, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct Range { + pub start_byte: usize, + pub end_byte: usize, + pub start_point: Point, + pub end_point: Point, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct InputEdit { + pub start_byte: usize, + pub old_end_byte: usize, + pub new_end_byte: usize, + pub start_position: Point, + pub old_end_position: Point, + pub new_end_position: Point, +} + +struct PropertyTransition { + state_id: usize, + child_index: Option, + text_regex_index: Option, +} + +struct PropertyState { + transitions: HashMap>, + property_set_id: usize, + default_next_state_id: usize, +} + +#[derive(Debug)] +pub enum PropertySheetError { + InvalidJSON(serde_json::Error), + InvalidRegex(regex::Error), +} + +pub struct PropertySheet

> { + states: Vec, + property_sets: Vec

, + text_regexes: Vec, +} + +#[derive(Clone, Copy)] +pub struct Node<'a>(ffi::TSNode, PhantomData<&'a ()>); + +pub struct Parser(*mut ffi::TSParser); + +pub struct Tree(*mut ffi::TSTree); + +pub struct TreeCursor<'a>(ffi::TSTreeCursor, PhantomData<&'a ()>); + +pub struct TreePropertyCursor<'a, P> { + cursor: TreeCursor<'a>, + state_stack: Vec, + child_index_stack: Vec, + property_sheet: &'a PropertySheet

, + source: &'a str, +} + +impl Language { + pub fn node_kind_count(&self) -> usize { + unsafe { ffi::ts_language_symbol_count(self.0) as usize } + } + + pub fn node_kind_for_id(&self, id: u16) -> &'static str { + unsafe { CStr::from_ptr(ffi::ts_language_symbol_name(self.0, id)) } + .to_str() + .unwrap() + } + + pub fn node_kind_is_named(&self, id: u16) -> bool { + unsafe { ffi::ts_language_symbol_type(self.0, id) == ffi::TSSymbolType_TSSymbolTypeRegular } + } +} + +unsafe impl Send for Language {} + +unsafe impl Sync for Language {} + +impl Parser { + pub fn new() -> Parser { + unsafe { + let parser = ffi::ts_parser_new(); + Parser(parser) + } + } + + pub fn set_language(&mut self, language: Language) -> Result<(), String> { + unsafe { + let version = ffi::ts_language_version(language.0) as usize; + if version == ffi::TREE_SITTER_LANGUAGE_VERSION { + ffi::ts_parser_set_language(self.0, language.0); + Ok(()) + } else { + Err(format!( + "Incompatible language version {}. Expected {}.", + version, + ffi::TREE_SITTER_LANGUAGE_VERSION + )) + } + } + } + + pub fn logger(&self) -> Option<&Logger> { + let logger = unsafe { ffi::ts_parser_logger(self.0) }; + unsafe { (logger.payload as *mut Logger).as_ref() } + } + + pub fn set_logger(&mut self, logger: Option) { + let prev_logger = unsafe { ffi::ts_parser_logger(self.0) }; + if !prev_logger.payload.is_null() { + unsafe { Box::from_raw(prev_logger.payload as *mut Logger) }; + } + + let c_logger; + if let Some(logger) = logger { + let container = Box::new(logger); + + unsafe extern "C" fn log( + payload: *mut c_void, + c_log_type: ffi::TSLogType, + c_message: *const c_char, + ) { + let callback = (payload as *mut Logger).as_mut().unwrap(); + if let Ok(message) = CStr::from_ptr(c_message).to_str() { + let log_type = if c_log_type == ffi::TSLogType_TSLogTypeParse { + LogType::Parse + } else { + LogType::Lex + }; + callback(log_type, message); + } + }; + + let raw_container = Box::into_raw(container); + + c_logger = ffi::TSLogger { + payload: raw_container as *mut c_void, + log: Some(log), + }; + } else { + c_logger = ffi::TSLogger { + payload: ptr::null_mut(), + log: None, + }; + } + + unsafe { ffi::ts_parser_set_logger(self.0, c_logger) }; + } + + pub fn parse_str(&mut self, input: &str, old_tree: Option<&Tree>) -> Option { + let bytes = input.as_bytes(); + self.parse_utf8( + &mut |offset, _| { + if offset < bytes.len() { + &bytes[offset..] + } else { + &[] + } + }, + old_tree, + ) + } + + pub fn parse_utf8<'a, T: FnMut(usize, Point) -> &'a [u8]>( + &mut self, + input: &mut T, + old_tree: Option<&Tree>, + ) -> Option { + self.parse_utf8_ptr( + &mut |byte, position| { + let slice = input(byte, position); + (slice.as_ptr(), slice.len()) + }, + old_tree, + ) + } + + pub fn parse_utf16<'a, T: 'a + FnMut(usize, Point) -> &'a [u16]>( + &mut self, + input: &mut T, + old_tree: Option<&Tree>, + ) -> Option { + self.parse_utf16_ptr( + &mut |byte, position| { + let slice = input(byte, position); + (slice.as_ptr(), slice.len()) + }, + old_tree, + ) + } + + pub fn parse_utf8_io( + &mut self, + mut input: impl Read + Seek, + old_tree: Option<&Tree>, + ) -> io::Result> { + let mut error = None; + let mut current_offset = 0; + let mut buffer = [0; 10 * 1024]; + let result = self.parse_utf8_ptr( + &mut |byte, _| { + if byte as u64 != current_offset { + current_offset = byte as u64; + if let Err(e) = input.seek(io::SeekFrom::Start(current_offset)) { + error = Some(e); + return (ptr::null(), 0); + } + } + + match input.read(&mut buffer) { + Err(e) => { + error = Some(e); + (ptr::null(), 0) + } + Ok(length) => (buffer.as_ptr(), length), + } + }, + old_tree, + ); + + match error { + Some(e) => Err(e), + None => Ok(result), + } + } + + pub fn reset(&mut self) { + unsafe { ffi::ts_parser_reset(self.0) } + } + + pub fn set_operation_limit(&mut self, limit: usize) { + unsafe { ffi::ts_parser_set_operation_limit(self.0, limit) } + } + + pub fn set_included_ranges(&mut self, ranges: &[Range]) { + let ts_ranges: Vec = + ranges.iter().cloned().map(|range| range.into()).collect(); + unsafe { + ffi::ts_parser_set_included_ranges(self.0, ts_ranges.as_ptr(), ts_ranges.len() as u32) + }; + } + + fn parse_utf8_ptr (*const u8, usize)>( + &mut self, + input: &mut T, + old_tree: Option<&Tree>, + ) -> Option { + unsafe extern "C" fn read (*const u8, usize)>( + payload: *mut c_void, + byte_offset: u32, + position: ffi::TSPoint, + bytes_read: *mut u32, + ) -> *const c_char { + let input = (payload as *mut T).as_mut().unwrap(); + let (ptr, length) = (*input)(byte_offset as usize, position.into()); + *bytes_read = length as u32; + return ptr as *const c_char; + }; + + let c_input = ffi::TSInput { + payload: input as *mut T as *mut c_void, + read: Some(read::), + encoding: ffi::TSInputEncoding_TSInputEncodingUTF8, + }; + + let c_old_tree = old_tree.map_or(ptr::null_mut(), |t| t.0); + let c_new_tree = unsafe { ffi::ts_parser_parse(self.0, c_old_tree, c_input) }; + if c_new_tree.is_null() { + None + } else { + Some(Tree(c_new_tree)) + } + } + + fn parse_utf16_ptr (*const u16, usize)>( + &mut self, + input: &mut T, + old_tree: Option<&Tree>, + ) -> Option { + unsafe extern "C" fn read (*const u16, usize)>( + payload: *mut c_void, + byte_offset: u32, + position: ffi::TSPoint, + bytes_read: *mut u32, + ) -> *const c_char { + let input = (payload as *mut T).as_mut().unwrap(); + let (ptr, length) = (*input)( + byte_offset as usize, + Point { + row: position.row as usize, + column: position.column as usize / 2, + }, + ); + *bytes_read = length as u32 * 2; + ptr as *const c_char + }; + + let c_input = ffi::TSInput { + payload: input as *mut T as *mut c_void, + read: Some(read::), + encoding: ffi::TSInputEncoding_TSInputEncodingUTF16, + }; + + let c_old_tree = old_tree.map_or(ptr::null_mut(), |t| t.0); + let c_new_tree = unsafe { ffi::ts_parser_parse(self.0, c_old_tree, c_input) }; + if c_new_tree.is_null() { + None + } else { + Some(Tree(c_new_tree)) + } + } +} + +impl Drop for Parser { + fn drop(&mut self) { + self.set_logger(None); + unsafe { ffi::ts_parser_delete(self.0) } + } +} + +unsafe impl Send for Parser {} + +impl Tree { + pub fn root_node(&self) -> Node { + Node::new(unsafe { ffi::ts_tree_root_node(self.0) }).unwrap() + } + + pub fn edit(&mut self, edit: &InputEdit) { + let edit = ffi::TSInputEdit { + start_byte: edit.start_byte as u32, + old_end_byte: edit.old_end_byte as u32, + new_end_byte: edit.new_end_byte as u32, + start_point: edit.start_position.into(), + old_end_point: edit.old_end_position.into(), + new_end_point: edit.new_end_position.into(), + }; + unsafe { ffi::ts_tree_edit(self.0, &edit) }; + } + + pub fn walk(&self) -> TreeCursor { + self.root_node().walk() + } + + pub fn walk_with_properties<'a, P>( + &'a self, + property_sheet: &'a PropertySheet

, + source: &'a str, + ) -> TreePropertyCursor<'a, P> { + TreePropertyCursor::new(self, property_sheet, source) + } +} + +unsafe impl Send for Tree {} + +impl fmt::Debug for Tree { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + write!(f, "{{Tree {:?}}}", self.root_node()) + } +} + +impl Drop for Tree { + fn drop(&mut self) { + unsafe { ffi::ts_tree_delete(self.0) } + } +} + +impl Clone for Tree { + fn clone(&self) -> Tree { + unsafe { Tree(ffi::ts_tree_copy(self.0)) } + } +} + +impl<'tree> Node<'tree> { + fn new(node: ffi::TSNode) -> Option { + if node.id.is_null() { + None + } else { + Some(Node(node, PhantomData)) + } + } + + pub fn kind_id(&self) -> u16 { + unsafe { ffi::ts_node_symbol(self.0) } + } + + pub fn kind(&self) -> &'static str { + unsafe { CStr::from_ptr(ffi::ts_node_type(self.0)) } + .to_str() + .unwrap() + } + + pub fn is_named(&self) -> bool { + unsafe { ffi::ts_node_is_named(self.0) } + } + + pub fn has_changes(&self) -> bool { + unsafe { ffi::ts_node_has_changes(self.0) } + } + + pub fn has_error(&self) -> bool { + unsafe { ffi::ts_node_has_error(self.0) } + } + + pub fn start_byte(&self) -> usize { + unsafe { ffi::ts_node_start_byte(self.0) as usize } + } + + pub fn end_byte(&self) -> usize { + unsafe { ffi::ts_node_end_byte(self.0) as usize } + } + + pub fn range(&self) -> Range { + Range { + start_byte: self.start_byte(), + end_byte: self.end_byte(), + start_point: self.start_position(), + end_point: self.end_position(), + } + } + + pub fn start_position(&self) -> Point { + let result = unsafe { ffi::ts_node_start_point(self.0) }; + result.into() + } + + pub fn end_position(&self) -> Point { + let result = unsafe { ffi::ts_node_end_point(self.0) }; + result.into() + } + + pub fn child(&self, i: usize) -> Option { + Self::new(unsafe { ffi::ts_node_child(self.0, i as u32) }) + } + + pub fn child_count(&self) -> usize { + unsafe { ffi::ts_node_child_count(self.0) as usize } + } + + pub fn children<'a>(&'a self) -> impl Iterator> + 'a { + (0..self.child_count()) + .into_iter() + .map(move |i| self.child(i).unwrap()) + } + + pub fn named_child<'a>(&'a self, i: usize) -> Option { + Self::new(unsafe { ffi::ts_node_named_child(self.0, i as u32) }) + } + + pub fn named_child_count(&self) -> usize { + unsafe { ffi::ts_node_named_child_count(self.0) as usize } + } + + pub fn parent(&self) -> Option { + Self::new(unsafe { ffi::ts_node_parent(self.0) }) + } + + pub fn next_sibling(&self) -> Option { + Self::new(unsafe { ffi::ts_node_next_sibling(self.0) }) + } + + pub fn prev_sibling(&self) -> Option { + Self::new(unsafe { ffi::ts_node_prev_sibling(self.0) }) + } + + pub fn next_named_sibling(&self) -> Option { + Self::new(unsafe { ffi::ts_node_next_named_sibling(self.0) }) + } + + pub fn prev_named_sibling(&self) -> Option { + Self::new(unsafe { ffi::ts_node_prev_named_sibling(self.0) }) + } + + pub fn to_sexp(&self) -> String { + extern "C" { + fn free(pointer: *mut c_void); + } + + let c_string = unsafe { ffi::ts_node_string(self.0) }; + let result = unsafe { CStr::from_ptr(c_string) } + .to_str() + .unwrap() + .to_string(); + unsafe { free(c_string as *mut c_void) }; + result + } + + pub fn utf8_text<'a>(&self, source: &'a str) -> Result<&'a str, str::Utf8Error> { + str::from_utf8(&source.as_bytes()[self.start_byte()..self.end_byte()]) + } + + pub fn utf16_text<'a>(&self, source: &'a [u16]) -> &'a [u16] { + &source[self.start_byte()..self.end_byte()] + } + + pub fn walk(&self) -> TreeCursor<'tree> { + TreeCursor(unsafe { ffi::ts_tree_cursor_new(self.0) }, PhantomData) + } +} + +impl<'a> PartialEq for Node<'a> { + fn eq(&self, other: &Self) -> bool { + self.0.id == other.0.id + } +} + +impl<'a> fmt::Debug for Node<'a> { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + write!( + f, + "{{Node {} {} - {}}}", + self.kind(), + self.start_position(), + self.end_position() + ) + } +} + +impl<'a> TreeCursor<'a> { + pub fn node(&self) -> Node<'a> { + Node( + unsafe { ffi::ts_tree_cursor_current_node(&self.0) }, + PhantomData, + ) + } + + pub fn goto_first_child(&mut self) -> bool { + return unsafe { ffi::ts_tree_cursor_goto_first_child(&mut self.0) }; + } + + pub fn goto_parent(&mut self) -> bool { + return unsafe { ffi::ts_tree_cursor_goto_parent(&mut self.0) }; + } + + pub fn goto_next_sibling(&mut self) -> bool { + return unsafe { ffi::ts_tree_cursor_goto_next_sibling(&mut self.0) }; + } + + pub fn goto_first_child_for_index(&mut self, index: usize) -> Option { + let result = + unsafe { ffi::ts_tree_cursor_goto_first_child_for_byte(&mut self.0, index as u32) }; + if result < 0 { + None + } else { + Some(result as usize) + } + } +} + +impl<'a> Drop for TreeCursor<'a> { + fn drop(&mut self) { + unsafe { ffi::ts_tree_cursor_delete(&mut self.0) } + } +} + +impl<'a, P> TreePropertyCursor<'a, P> { + fn new(tree: &'a Tree, property_sheet: &'a PropertySheet

, source: &'a str) -> Self { + let mut result = Self { + cursor: tree.root_node().walk(), + child_index_stack: vec![0], + state_stack: vec![0], + property_sheet, + source, + }; + let state = result.next_state(&result.current_state(), result.cursor.node().kind_id(), 0); + result.state_stack.push(state); + result + } + + pub fn node(&self) -> Node<'a> { + self.cursor.node() + } + + pub fn node_properties(&self) -> &'a P { + &self.property_sheet.property_sets[self.current_state().property_set_id] + } + + pub fn goto_first_child(&mut self) -> bool { + if self.cursor.goto_first_child() { + let child_index = 0; + let next_state_id = { + let state = &self.current_state(); + let kind_id = self.cursor.node().kind_id(); + self.next_state(state, kind_id, child_index) + }; + self.state_stack.push(next_state_id); + self.child_index_stack.push(child_index); + true + } else { + false + } + } + + pub fn goto_next_sibling(&mut self) -> bool { + if self.cursor.goto_next_sibling() { + let child_index = self.child_index_stack.pop().unwrap() + 1; + self.state_stack.pop(); + let next_state_id = { + let state = &self.current_state(); + let kind_id = self.cursor.node().kind_id(); + self.next_state(state, kind_id, child_index) + }; + self.state_stack.push(next_state_id); + self.child_index_stack.push(child_index); + true + } else { + false + } + } + + pub fn goto_parent(&mut self) -> bool { + if self.cursor.goto_parent() { + self.state_stack.pop(); + self.child_index_stack.pop(); + true + } else { + false + } + } + + fn next_state( + &self, + state: &PropertyState, + node_kind_id: u16, + node_child_index: usize, + ) -> usize { + state + .transitions + .get(&node_kind_id) + .and_then(|transitions| { + for transition in transitions.iter() { + if let Some(text_regex_index) = transition.text_regex_index { + let node = self.cursor.node(); + let text = &self.source.as_bytes()[node.start_byte()..node.end_byte()]; + if let Ok(text) = str::from_utf8(text) { + if !self.property_sheet.text_regexes[text_regex_index].is_match(text) { + continue; + } + } + } + + if let Some(child_index) = transition.child_index { + if child_index != node_child_index { + continue; + } + } + + return Some(transition.state_id); + } + None + }) + .unwrap_or(state.default_next_state_id) + } + + fn current_state(&self) -> &PropertyState { + &self.property_sheet.states[*self.state_stack.last().unwrap()] + } +} + +impl Point { + pub fn new(row: usize, column: usize) -> Self { + Point { row, column } + } +} + +impl fmt::Display for Point { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + write!(f, "({}, {})", self.row, self.column) + } +} + +impl Into for Point { + fn into(self) -> ffi::TSPoint { + ffi::TSPoint { + row: self.row as u32, + column: self.column as u32, + } + } +} + +impl From for Point { + fn from(point: ffi::TSPoint) -> Self { + Self { + row: point.row as usize, + column: point.column as usize, + } + } +} + +impl Into for Range { + fn into(self) -> ffi::TSRange { + ffi::TSRange { + start_byte: self.start_byte as u32, + end_byte: self.end_byte as u32, + start_point: self.start_point.into(), + end_point: self.end_point.into(), + } + } +} + +impl

PropertySheet

{ + pub fn new(language: Language, json: &str) -> Result + where + P: DeserializeOwned, + { + #[derive(Deserialize, Debug)] + struct PropertyTransitionJSON { + #[serde(rename = "type")] + kind: String, + named: bool, + index: Option, + text: Option, + state_id: usize, + } + + #[derive(Deserialize, Debug)] + struct PropertyStateJSON { + transitions: Vec, + property_set_id: usize, + default_next_state_id: usize, + } + + #[derive(Deserialize, Debug)] + struct PropertySheetJSON

{ + states: Vec, + property_sets: Vec

, + } + + let input: PropertySheetJSON

= + serde_json::from_str(json).map_err(PropertySheetError::InvalidJSON)?; + let mut states = Vec::new(); + let mut text_regexes = Vec::new(); + let mut text_regex_patterns = Vec::new(); + + for state in input.states.iter() { + let mut transitions = HashMap::new(); + let node_kind_count = language.node_kind_count(); + for transition in state.transitions.iter() { + let text_regex_index = if let Some(regex_pattern) = transition.text.as_ref() { + if let Some(index) = + text_regex_patterns.iter().position(|r| *r == regex_pattern) + { + Some(index) + } else { + text_regex_patterns.push(regex_pattern); + text_regexes.push( + Regex::new(®ex_pattern).map_err(PropertySheetError::InvalidRegex)?, + ); + Some(text_regexes.len() - 1) + } + } else { + None + }; + + for i in 0..(node_kind_count as u16) { + if transition.kind == language.node_kind_for_id(i) + && transition.named == language.node_kind_is_named(i) + { + let entry = transitions.entry(i).or_insert(Vec::new()); + entry.push(PropertyTransition { + child_index: transition.index, + state_id: transition.state_id, + text_regex_index, + }); + } + } + } + states.push(PropertyState { + transitions, + default_next_state_id: state.default_next_state_id, + property_set_id: state.property_set_id, + }); + } + Ok(Self { + property_sets: input.property_sets, + states, + text_regexes, + }) + } + + pub fn map(self, mut f: F) -> Result, E> + where + F: FnMut(P) -> Result, + { + let mut property_sets = Vec::with_capacity(self.property_sets.len()); + for set in self.property_sets { + property_sets.push(f(set)?); + } + Ok(PropertySheet { + states: self.states, + text_regexes: self.text_regexes, + property_sets, + }) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::thread; + + fn rust() -> Language { + unsafe { tree_sitter_rust() } + } + extern "C" { + fn tree_sitter_rust() -> Language; + } + + #[test] + fn test_basic_parsing() { + let mut parser = Parser::new(); + parser.set_language(rust()).unwrap(); + + let tree = parser + .parse_str( + " + struct Stuff {} + fn main() {} + ", + None, + ) + .unwrap(); + + let root_node = tree.root_node(); + assert_eq!(root_node.kind(), "source_file"); + + assert_eq!( + root_node.to_sexp(), + "(source_file (struct_item (type_identifier) (field_declaration_list)) (function_item (identifier) (parameters) (block)))" + ); + + let struct_node = root_node.child(0).unwrap(); + assert_eq!(struct_node.kind(), "struct_item"); + } + + #[test] + fn test_logging() { + let mut parser = Parser::new(); + parser.set_language(rust()).unwrap(); + + let mut messages = Vec::new(); + parser.set_logger(Some(Box::new(|log_type, message| { + messages.push((log_type, message.to_string())); + }))); + + parser + .parse_str( + " + struct Stuff {} + fn main() {} + ", + None, + ) + .unwrap(); + + assert!(messages.contains(&( + LogType::Parse, + "reduce sym:struct_item, child_count:3".to_string() + ))); + assert!(messages.contains(&(LogType::Lex, "skip character:' '".to_string()))); + } + + #[test] + fn test_tree_cursor() { + let mut parser = Parser::new(); + parser.set_language(rust()).unwrap(); + + let tree = parser + .parse_str( + " + struct Stuff { + a: A; + b: Option, + } + ", + None, + ) + .unwrap(); + + let mut cursor = tree.walk(); + assert_eq!(cursor.node().kind(), "source_file"); + + assert!(cursor.goto_first_child()); + assert_eq!(cursor.node().kind(), "struct_item"); + + assert!(cursor.goto_first_child()); + assert_eq!(cursor.node().kind(), "struct"); + assert_eq!(cursor.node().is_named(), false); + + assert!(cursor.goto_next_sibling()); + assert_eq!(cursor.node().kind(), "type_identifier"); + assert_eq!(cursor.node().is_named(), true); + + assert!(cursor.goto_next_sibling()); + assert_eq!(cursor.node().kind(), "field_declaration_list"); + assert_eq!(cursor.node().is_named(), true); + } + + #[test] + fn test_tree_property_matching() { + let mut parser = Parser::new(); + parser.set_language(rust()).unwrap(); + let source_code = "fn f1() { f2(); }"; + let tree = parser.parse_str(source_code, None).unwrap(); + + #[derive(Debug, Deserialize, PartialEq, Eq)] + struct Properties { + reference: Option, + define: Option, + } + + let empty_properties = Properties { + reference: None, + define: None, + }; + + let property_sheet = PropertySheet::::new( + rust(), + r##" + { + "states": [ + { + "transitions": [ + {"type": "call_expression", "named": true, "state_id": 1}, + {"type": "function_item", "named": true, "state_id": 2} + ], + "default_next_state_id": 0, + "property_set_id": 0 + }, + { + "transitions": [ + {"type": "identifier", "named": true, "state_id": 3} + ], + "default_next_state_id": 0, + "property_set_id": 0 + }, + { + "transitions": [ + {"type": "identifier", "named": true, "state_id": 4} + ], + "default_next_state_id": 0, + "property_set_id": 0 + }, + { + "transitions": [], + "default_next_state_id": 0, + "property_set_id": 1 + }, + { + "transitions": [], + "default_next_state_id": 0, + "property_set_id": 2 + } + ], + "property_sets": [ + {}, + {"reference": "function"}, + {"define": "function"} + ] + } + "##, + ) + .unwrap(); + + let mut cursor = tree.walk_with_properties(&property_sheet, source_code); + assert_eq!(cursor.node().kind(), "source_file"); + assert_eq!(*cursor.node_properties(), empty_properties); + + assert!(cursor.goto_first_child()); + assert_eq!(cursor.node().kind(), "function_item"); + assert_eq!(*cursor.node_properties(), empty_properties); + + assert!(cursor.goto_first_child()); + assert_eq!(cursor.node().kind(), "fn"); + assert_eq!(*cursor.node_properties(), empty_properties); + assert!(!cursor.goto_first_child()); + + assert!(cursor.goto_next_sibling()); + assert_eq!(cursor.node().kind(), "identifier"); + assert_eq!(cursor.node_properties().define, Some("function".to_owned())); + assert!(!cursor.goto_first_child()); + + assert!(cursor.goto_next_sibling()); + assert_eq!(cursor.node().kind(), "parameters"); + assert_eq!(*cursor.node_properties(), empty_properties); + + assert!(cursor.goto_first_child()); + assert_eq!(cursor.node().kind(), "("); + assert!(cursor.goto_next_sibling()); + assert_eq!(cursor.node().kind(), ")"); + assert_eq!(*cursor.node_properties(), empty_properties); + + assert!(cursor.goto_parent()); + assert!(cursor.goto_next_sibling()); + assert_eq!(cursor.node().kind(), "block"); + assert_eq!(*cursor.node_properties(), empty_properties); + + assert!(cursor.goto_first_child()); + assert!(cursor.goto_next_sibling()); + assert_eq!(cursor.node().kind(), "call_expression"); + assert_eq!(*cursor.node_properties(), empty_properties); + + assert!(cursor.goto_first_child()); + assert_eq!(cursor.node().kind(), "identifier"); + assert_eq!( + cursor.node_properties().reference, + Some("function".to_owned()) + ); + } + + #[test] + fn test_tree_property_matching_with_regexes() { + let mut parser = Parser::new(); + parser.set_language(rust()).unwrap(); + let source_code = "fn f1() { None(a()) }"; + let tree = parser.parse_str(source_code, None).unwrap(); + + #[derive(Debug, Deserialize, PartialEq, Eq)] + struct Properties { + scope: Option, + } + + let empty_properties = Properties { scope: None }; + + let property_sheet = PropertySheet::::new( + rust(), + r##" + { + "states": [ + { + "id": 0, + "transitions": [ + {"type": "call_expression", "named": true, "state_id": 1} + ], + "default_next_state_id": 0, + "property_set_id": 0 + }, + { + "id": 1, + "transitions": [ + {"type": "identifier", "named": true, "text": "^[A-Z]", "state_id": 2}, + {"type": "identifier", "named": true, "state_id": 3} + ], + "default_next_state_id": 0, + "property_set_id": 0 + }, + { + "transitions": [], + "default_next_state_id": 0, + "property_set_id": 1 + }, + { + "transitions": [], + "default_next_state_id": 0, + "property_set_id": 2 + } + ], + "property_sets": [ + {}, + {"scope": "constructor"}, + {"scope": "function"} + ] + } + "##, + ) + .unwrap(); + + let mut cursor = tree.walk_with_properties(&property_sheet, source_code); + assert_eq!(cursor.node().kind(), "source_file"); + assert_eq!(*cursor.node_properties(), empty_properties); + + cursor.goto_first_child(); + assert!(cursor.goto_first_child()); + assert!(cursor.goto_next_sibling()); + assert!(cursor.goto_next_sibling()); + assert!(cursor.goto_next_sibling()); + assert_eq!(cursor.node().kind(), "block"); + assert_eq!(*cursor.node_properties(), empty_properties); + + assert!(cursor.goto_first_child()); + assert!(cursor.goto_next_sibling()); + assert_eq!(cursor.node().kind(), "call_expression"); + assert_eq!(*cursor.node_properties(), empty_properties); + + assert!(cursor.goto_first_child()); + assert_eq!(cursor.node().kind(), "identifier"); + assert_eq!( + cursor.node_properties().scope, + Some("constructor".to_owned()) + ); + } + + #[test] + fn test_custom_utf8_input() { + let mut parser = Parser::new(); + parser.set_language(rust()).unwrap(); + + let lines = &["pub fn foo() {", " 1", "}"]; + + let tree = parser + .parse_utf8( + &mut |_, position| { + let row = position.row; + let column = position.column; + if row < lines.len() { + if column < lines[row].as_bytes().len() { + &lines[row].as_bytes()[column..] + } else { + "\n".as_bytes() + } + } else { + &[] + } + }, + None, + ) + .unwrap(); + + let root = tree.root_node(); + assert_eq!(root.to_sexp(), "(source_file (function_item (visibility_modifier) (identifier) (parameters) (block (integer_literal))))"); + assert_eq!(root.kind(), "source_file"); + assert_eq!(root.has_error(), false); + assert_eq!(root.child(0).unwrap().kind(), "function_item"); + } + + #[test] + fn test_custom_utf16_input() { + let mut parser = Parser::new(); + parser.set_language(rust()).unwrap(); + + parser.set_logger(Some(Box::new(|t, message| { + println!("log: {:?} {}", t, message); + }))); + + let lines: Vec> = ["pub fn foo() {", " 1", "}"] + .iter() + .map(|s| s.encode_utf16().collect()) + .collect(); + + let tree = parser + .parse_utf16( + &mut |_, position| { + let row = position.row; + let column = position.column; + if row < lines.len() { + if column < lines[row].len() { + &lines[row][column..] + } else { + &[10] + } + } else { + &[] + } + }, + None, + ) + .unwrap(); + + let root = tree.root_node(); + assert_eq!(root.to_sexp(), "(source_file (function_item (visibility_modifier) (identifier) (parameters) (block (integer_literal))))"); + assert_eq!(root.kind(), "source_file"); + assert_eq!(root.has_error(), false); + assert_eq!(root.child(0).unwrap().kind(), "function_item"); + } + + #[test] + fn test_node_equality() { + let mut parser = Parser::new(); + parser.set_language(rust()).unwrap(); + let tree = parser.parse_str("struct A {}", None).unwrap(); + let node1 = tree.root_node(); + let node2 = tree.root_node(); + assert_eq!(node1, node2); + assert_eq!(node1.child(0).unwrap(), node2.child(0).unwrap()); + assert_ne!(node1.child(0).unwrap(), node2); + } + + #[test] + fn test_editing() { + let mut parser = Parser::new(); + parser.set_language(rust()).unwrap(); + + let mut input_bytes = "fn test(a: A, c: C) {}".as_bytes(); + let mut input_bytes_read = Vec::new(); + + let mut tree = parser + .parse_utf8( + &mut |offset, _| { + let offset = offset; + if offset < input_bytes.len() { + let result = &input_bytes[offset..offset + 1]; + input_bytes_read.extend(result.iter()); + result + } else { + &[] + } + }, + None, + ) + .unwrap(); + + let parameters_sexp = tree + .root_node() + .named_child(0) + .unwrap() + .named_child(1) + .unwrap() + .to_sexp(); + assert_eq!( + parameters_sexp, + "(parameters (parameter (identifier) (type_identifier)) (parameter (identifier) (type_identifier)))" + ); + + input_bytes_read.clear(); + input_bytes = "fn test(a: A, b: B, c: C) {}".as_bytes(); + tree.edit(&InputEdit { + start_byte: 14, + old_end_byte: 14, + new_end_byte: 20, + start_position: Point::new(0, 14), + old_end_position: Point::new(0, 14), + new_end_position: Point::new(0, 20), + }); + + let tree = parser + .parse_utf8( + &mut |offset, _| { + let offset = offset; + if offset < input_bytes.len() { + let result = &input_bytes[offset..offset + 1]; + input_bytes_read.extend(result.iter()); + result + } else { + &[] + } + }, + Some(&tree), + ) + .unwrap(); + + let parameters_sexp = tree + .root_node() + .named_child(0) + .unwrap() + .named_child(1) + .unwrap() + .to_sexp(); + assert_eq!( + parameters_sexp, + "(parameters (parameter (identifier) (type_identifier)) (parameter (identifier) (type_identifier)) (parameter (identifier) (type_identifier)))" + ); + + let retokenized_content = String::from_utf8(input_bytes_read).unwrap(); + assert!(retokenized_content.contains("b: B")); + assert!(!retokenized_content.contains("a: A")); + assert!(!retokenized_content.contains("c: C")); + assert!(!retokenized_content.contains("{}")); + } + + #[test] + fn test_parallel_parsing() { + // Parse this source file so that each thread has a non-trivial amount of + // work to do. + let this_file_source = include_str!("lib.rs"); + + let mut parser = Parser::new(); + parser.set_language(rust()).unwrap(); + let tree = parser.parse_str(this_file_source, None).unwrap(); + + let mut parse_threads = Vec::new(); + for thread_id in 1..5 { + let mut tree_clone = tree.clone(); + parse_threads.push(thread::spawn(move || { + // For each thread, prepend a different number of declarations to the + // source code. + let mut prepend_line_count = 0; + let mut prepended_source = String::new(); + for _ in 0..thread_id { + prepend_line_count += 2; + prepended_source += "struct X {}\n\n"; + } + + tree_clone.edit(&InputEdit { + start_byte: 0, + old_end_byte: 0, + new_end_byte: prepended_source.len(), + start_position: Point::new(0, 0), + old_end_position: Point::new(0, 0), + new_end_position: Point::new(prepend_line_count, 0), + }); + prepended_source += this_file_source; + + // Reparse using the old tree as a starting point. + let mut parser = Parser::new(); + parser.set_language(rust()).unwrap(); + parser + .parse_str(&prepended_source, Some(&tree_clone)) + .unwrap() + })); + } + + // Check that the trees have the expected relationship to one another. + let trees = parse_threads + .into_iter() + .map(|thread| thread.join().unwrap()); + let child_count_differences = trees + .map(|t| t.root_node().child_count() - tree.root_node().child_count()) + .collect::>(); + + assert_eq!(child_count_differences, &[1, 2, 3, 4]); + } +} diff --git a/lib/build.rs b/lib/build.rs new file mode 100644 index 00000000..cee131bd --- /dev/null +++ b/lib/build.rs @@ -0,0 +1,26 @@ +extern crate cc; + +use std::env; +use std::path::PathBuf; + +fn main() { + let mut config = cc::Build::new(); + let src_path: PathBuf = ["src"].iter().collect(); + + config + .define("UTF8PROC_STATIC", "") + .flag_if_supported("-std=c99") + .flag_if_supported("-Wno-unused-parameter") + .include("include") + .include("utf8proc") + .file(src_path.join("runtime.c")); + + if env::var("RUST_TREE_SITTER_TEST").is_ok() { + let parser_dir: PathBuf = ["fixtures", "tree-sitter-rust", "src"].iter().collect(); + config + .file(parser_dir.join("parser.c")) + .file(parser_dir.join("scanner.c")); + } + + config.compile("tree-sitter-runtime"); +} diff --git a/script/bindgen.sh b/script/bindgen.sh new file mode 100755 index 00000000..699f0339 --- /dev/null +++ b/script/bindgen.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +output_path=src/bindings.rs +header_path='vendor/tree-sitter/include/tree_sitter/runtime.h' + +bindgen \ + --no-layout-tests \ + --whitelist-type '^TS.*' \ + --whitelist-function '^ts_.*' \ + --opaque-type FILE \ + --distrust-clang-mangling \ + $header_path > $output_path + +echo "" >> $output_path +version_constant='TREE_SITTER_LANGUAGE_VERSION' +version_number=$(egrep "#define $version_constant (.*)" $header_path | cut -d' ' -f3) +echo "pub const $version_constant: usize = $version_number;" >> $output_path diff --git a/script/fetch-test-fixtures.cmd b/script/fetch-test-fixtures.cmd new file mode 100755 index 00000000..33543961 --- /dev/null +++ b/script/fetch-test-fixtures.cmd @@ -0,0 +1,16 @@ +@Echo off +SETLOCAL + +Set grammar_dir=fixtures\tree-sitter-rust +Set grammar_url=https://github.com/tree-sitter/tree-sitter-rust + +@IF NOT EXIST %grammar_dir% ( + git clone %grammar_url% %grammar_dir% --depth=1 +) + +pushd %grammar_dir% +git fetch origin master --depth=1 +git reset --hard origin/master +popd + +ENDLOCAL diff --git a/script/fetch-test-fixtures.sh b/script/fetch-test-fixtures.sh new file mode 100755 index 00000000..24cc316a --- /dev/null +++ b/script/fetch-test-fixtures.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +grammar_dir='fixtures/tree-sitter-rust' +grammar_url='https://github.com/tree-sitter/tree-sitter-rust' + +if [ ! -d $grammar_dir ]; then + git clone $grammar_url $grammar_dir --depth=1 +fi + +( + cd $grammar_dir; + git fetch origin master --depth=1 + git reset --hard origin/master; +) diff --git a/script/test.sh b/script/test.sh new file mode 100755 index 00000000..eb6183c0 --- /dev/null +++ b/script/test.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +RUST_TREE_SITTER_TEST=1 cargo test $@