Initial commit

This commit is contained in:
Max Brunsfeld 2016-07-10 14:03:00 -07:00
commit 6e4115548c
8 changed files with 734 additions and 0 deletions

2
.gitignore vendored Normal file
View file

@ -0,0 +1,2 @@
target
Cargo.lock

3
.gitmodules vendored Normal file
View file

@ -0,0 +1,3 @@
[submodule "vendor/tree-sitter"]
path = vendor/tree-sitter
url = https://github.com/tree-sitter/tree-sitter

15
Cargo.toml Normal file
View file

@ -0,0 +1,15 @@
[package]
name = "tree-sitter"
version = "0.1.0"
authors = ["Max Brunsfeld <maxbrunsfeld@gmail.com>"]
build = "build.rs"
exclude = ["vendor/tree-sitter/**/*"]
include = [
"vendor/tree-sitter/src/runtime/*",
"vendor/tree-sitter/externals/utf8proc/utf8proc*"
]
[dependencies]
[build-dependencies]
cc = "1.0"

35
build.rs Normal file
View file

@ -0,0 +1,35 @@
extern crate cc;
use std::path::Path;
fn main() {
let dir_path = Path::new("vendor/tree-sitter/src/runtime");
let source_filenames = [
"get_changed_ranges.c",
"language.c",
"lexer.c",
"node.c",
"parser.c",
"parser.c",
"stack.c",
"subtree.c",
"tree_cursor.c",
"tree.c",
"utf16.c",
];
let mut config = cc::Build::new();
config.include("vendor/tree-sitter/src");
config.include("vendor/tree-sitter/include");
config.include("vendor/tree-sitter/externals/utf8proc");
config.flag_if_supported("-Wno-unused-parameter");
for source_filename in source_filenames.iter() {
let source_path = dir_path.join(Path::new(&source_filename));
config.file(&source_path.to_str().unwrap());
}
config.compile("libruntime.a")
}

16
script/bindgen.sh Executable file
View file

@ -0,0 +1,16 @@
#!/bin/bash
output_path=src/ffi.rs
header_path='vendor/tree-sitter/include/tree_sitter/runtime.h'
bindgen \
--no-layout-tests \
--whitelist-type '^TS.*' \
--whitelist-function '^ts_.*' \
--opaque-type FILE \
$header_path > $output_path
echo "" >> $output_path
version_constant='TREE_SITTER_LANGUAGE_VERSION'
version_number=$(egrep "#define $version_constant (.*)" $header_path | cut -d' ' -f3)
echo "pub const $version_constant: usize = $version_number;" >> $output_path

333
src/ffi.rs Normal file
View file

@ -0,0 +1,333 @@
/* automatically generated by rust-bindgen */
pub type FILE = [u64; 19usize];
pub type TSSymbol = ::std::os::raw::c_ushort;
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct TSLanguage {
_unused: [u8; 0],
}
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct TSParser {
_unused: [u8; 0],
}
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct TSTree {
_unused: [u8; 0],
}
pub const TSInputEncoding_TSInputEncodingUTF8: TSInputEncoding = 0;
pub const TSInputEncoding_TSInputEncodingUTF16: TSInputEncoding = 1;
pub type TSInputEncoding = u32;
pub const TSSymbolType_TSSymbolTypeRegular: TSSymbolType = 0;
pub const TSSymbolType_TSSymbolTypeAnonymous: TSSymbolType = 1;
pub const TSSymbolType_TSSymbolTypeAuxiliary: TSSymbolType = 2;
pub type TSSymbolType = u32;
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct TSPoint {
pub row: u32,
pub column: u32,
}
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct TSRange {
pub start: TSPoint,
pub end: TSPoint,
}
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct TSInput {
pub payload: *mut ::std::os::raw::c_void,
pub read: ::std::option::Option<
unsafe extern "C" fn(payload: *mut ::std::os::raw::c_void, bytes_read: *mut u32)
-> *const ::std::os::raw::c_char,
>,
pub seek: ::std::option::Option<
unsafe extern "C" fn(
payload: *mut ::std::os::raw::c_void,
byte_index: u32,
position: TSPoint,
) -> ::std::os::raw::c_int,
>,
pub encoding: TSInputEncoding,
}
pub const TSLogType_TSLogTypeParse: TSLogType = 0;
pub const TSLogType_TSLogTypeLex: TSLogType = 1;
pub type TSLogType = u32;
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct TSLogger {
pub payload: *mut ::std::os::raw::c_void,
pub log: ::std::option::Option<
unsafe extern "C" fn(
payload: *mut ::std::os::raw::c_void,
arg1: TSLogType,
arg2: *const ::std::os::raw::c_char,
),
>,
}
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct TSInputEdit {
pub start_byte: u32,
pub old_end_byte: u32,
pub new_end_byte: u32,
pub start_point: TSPoint,
pub old_end_point: TSPoint,
pub new_end_point: TSPoint,
}
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct TSNode {
pub context: [u32; 4usize],
pub id: *const ::std::os::raw::c_void,
pub tree: *const ::std::os::raw::c_void,
}
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct TSTreeCursor {
pub context: [u32; 2usize],
pub id: *const ::std::os::raw::c_void,
pub tree: *const ::std::os::raw::c_void,
}
extern "C" {
#[link_name = "\u{1}_ts_parser_new"]
pub fn ts_parser_new() -> *mut TSParser;
}
extern "C" {
#[link_name = "\u{1}_ts_parser_delete"]
pub fn ts_parser_delete(arg1: *mut TSParser);
}
extern "C" {
#[link_name = "\u{1}_ts_parser_language"]
pub fn ts_parser_language(arg1: *const TSParser) -> *const TSLanguage;
}
extern "C" {
#[link_name = "\u{1}_ts_parser_set_language"]
pub fn ts_parser_set_language(arg1: *mut TSParser, arg2: *const TSLanguage) -> bool;
}
extern "C" {
#[link_name = "\u{1}_ts_parser_logger"]
pub fn ts_parser_logger(arg1: *const TSParser) -> TSLogger;
}
extern "C" {
#[link_name = "\u{1}_ts_parser_set_logger"]
pub fn ts_parser_set_logger(arg1: *mut TSParser, arg2: TSLogger);
}
extern "C" {
#[link_name = "\u{1}_ts_parser_print_dot_graphs"]
pub fn ts_parser_print_dot_graphs(arg1: *mut TSParser, arg2: *mut FILE);
}
extern "C" {
#[link_name = "\u{1}_ts_parser_halt_on_error"]
pub fn ts_parser_halt_on_error(arg1: *mut TSParser, arg2: bool);
}
extern "C" {
#[link_name = "\u{1}_ts_parser_parse"]
pub fn ts_parser_parse(arg1: *mut TSParser, arg2: *const TSTree, arg3: TSInput) -> *mut TSTree;
}
extern "C" {
#[link_name = "\u{1}_ts_parser_parse_string"]
pub fn ts_parser_parse_string(
arg1: *mut TSParser,
arg2: *const TSTree,
arg3: *const ::std::os::raw::c_char,
arg4: u32,
) -> *mut TSTree;
}
extern "C" {
#[link_name = "\u{1}_ts_tree_copy"]
pub fn ts_tree_copy(arg1: *const TSTree) -> *mut TSTree;
}
extern "C" {
#[link_name = "\u{1}_ts_tree_delete"]
pub fn ts_tree_delete(arg1: *mut TSTree);
}
extern "C" {
#[link_name = "\u{1}_ts_tree_root_node"]
pub fn ts_tree_root_node(arg1: *const TSTree) -> TSNode;
}
extern "C" {
#[link_name = "\u{1}_ts_tree_edit"]
pub fn ts_tree_edit(arg1: *mut TSTree, arg2: *const TSInputEdit);
}
extern "C" {
#[link_name = "\u{1}_ts_tree_get_changed_ranges"]
pub fn ts_tree_get_changed_ranges(
arg1: *const TSTree,
arg2: *const TSTree,
arg3: *mut u32,
) -> *mut TSRange;
}
extern "C" {
#[link_name = "\u{1}_ts_tree_print_dot_graph"]
pub fn ts_tree_print_dot_graph(arg1: *const TSTree, arg2: *mut FILE);
}
extern "C" {
#[link_name = "\u{1}_ts_node_start_byte"]
pub fn ts_node_start_byte(arg1: TSNode) -> u32;
}
extern "C" {
#[link_name = "\u{1}_ts_node_start_point"]
pub fn ts_node_start_point(arg1: TSNode) -> TSPoint;
}
extern "C" {
#[link_name = "\u{1}_ts_node_end_byte"]
pub fn ts_node_end_byte(arg1: TSNode) -> u32;
}
extern "C" {
#[link_name = "\u{1}_ts_node_end_point"]
pub fn ts_node_end_point(arg1: TSNode) -> TSPoint;
}
extern "C" {
#[link_name = "\u{1}_ts_node_symbol"]
pub fn ts_node_symbol(arg1: TSNode) -> TSSymbol;
}
extern "C" {
#[link_name = "\u{1}_ts_node_type"]
pub fn ts_node_type(arg1: TSNode) -> *const ::std::os::raw::c_char;
}
extern "C" {
#[link_name = "\u{1}_ts_node_string"]
pub fn ts_node_string(arg1: TSNode) -> *mut ::std::os::raw::c_char;
}
extern "C" {
#[link_name = "\u{1}_ts_node_eq"]
pub fn ts_node_eq(arg1: TSNode, arg2: TSNode) -> bool;
}
extern "C" {
#[link_name = "\u{1}_ts_node_is_null"]
pub fn ts_node_is_null(arg1: TSNode) -> bool;
}
extern "C" {
#[link_name = "\u{1}_ts_node_is_named"]
pub fn ts_node_is_named(arg1: TSNode) -> bool;
}
extern "C" {
#[link_name = "\u{1}_ts_node_is_missing"]
pub fn ts_node_is_missing(arg1: TSNode) -> bool;
}
extern "C" {
#[link_name = "\u{1}_ts_node_has_changes"]
pub fn ts_node_has_changes(arg1: TSNode) -> bool;
}
extern "C" {
#[link_name = "\u{1}_ts_node_has_error"]
pub fn ts_node_has_error(arg1: TSNode) -> bool;
}
extern "C" {
#[link_name = "\u{1}_ts_node_parent"]
pub fn ts_node_parent(arg1: TSNode) -> TSNode;
}
extern "C" {
#[link_name = "\u{1}_ts_node_child"]
pub fn ts_node_child(arg1: TSNode, arg2: u32) -> TSNode;
}
extern "C" {
#[link_name = "\u{1}_ts_node_named_child"]
pub fn ts_node_named_child(arg1: TSNode, arg2: u32) -> TSNode;
}
extern "C" {
#[link_name = "\u{1}_ts_node_child_count"]
pub fn ts_node_child_count(arg1: TSNode) -> u32;
}
extern "C" {
#[link_name = "\u{1}_ts_node_named_child_count"]
pub fn ts_node_named_child_count(arg1: TSNode) -> u32;
}
extern "C" {
#[link_name = "\u{1}_ts_node_next_sibling"]
pub fn ts_node_next_sibling(arg1: TSNode) -> TSNode;
}
extern "C" {
#[link_name = "\u{1}_ts_node_next_named_sibling"]
pub fn ts_node_next_named_sibling(arg1: TSNode) -> TSNode;
}
extern "C" {
#[link_name = "\u{1}_ts_node_prev_sibling"]
pub fn ts_node_prev_sibling(arg1: TSNode) -> TSNode;
}
extern "C" {
#[link_name = "\u{1}_ts_node_prev_named_sibling"]
pub fn ts_node_prev_named_sibling(arg1: TSNode) -> TSNode;
}
extern "C" {
#[link_name = "\u{1}_ts_node_first_child_for_byte"]
pub fn ts_node_first_child_for_byte(arg1: TSNode, arg2: u32) -> TSNode;
}
extern "C" {
#[link_name = "\u{1}_ts_node_first_named_child_for_byte"]
pub fn ts_node_first_named_child_for_byte(arg1: TSNode, arg2: u32) -> TSNode;
}
extern "C" {
#[link_name = "\u{1}_ts_node_descendant_for_byte_range"]
pub fn ts_node_descendant_for_byte_range(arg1: TSNode, arg2: u32, arg3: u32) -> TSNode;
}
extern "C" {
#[link_name = "\u{1}_ts_node_named_descendant_for_byte_range"]
pub fn ts_node_named_descendant_for_byte_range(arg1: TSNode, arg2: u32, arg3: u32) -> TSNode;
}
extern "C" {
#[link_name = "\u{1}_ts_node_descendant_for_point_range"]
pub fn ts_node_descendant_for_point_range(arg1: TSNode, arg2: TSPoint, arg3: TSPoint)
-> TSNode;
}
extern "C" {
#[link_name = "\u{1}_ts_node_named_descendant_for_point_range"]
pub fn ts_node_named_descendant_for_point_range(
arg1: TSNode,
arg2: TSPoint,
arg3: TSPoint,
) -> TSNode;
}
extern "C" {
#[link_name = "\u{1}_ts_tree_cursor_new"]
pub fn ts_tree_cursor_new(arg1: *const TSTree) -> TSTreeCursor;
}
extern "C" {
#[link_name = "\u{1}_ts_tree_cursor_delete"]
pub fn ts_tree_cursor_delete(arg1: *mut TSTreeCursor);
}
extern "C" {
#[link_name = "\u{1}_ts_tree_cursor_goto_first_child"]
pub fn ts_tree_cursor_goto_first_child(arg1: *mut TSTreeCursor) -> bool;
}
extern "C" {
#[link_name = "\u{1}_ts_tree_cursor_goto_first_child_for_byte"]
pub fn ts_tree_cursor_goto_first_child_for_byte(arg1: *mut TSTreeCursor, arg2: u32) -> i64;
}
extern "C" {
#[link_name = "\u{1}_ts_tree_cursor_goto_next_sibling"]
pub fn ts_tree_cursor_goto_next_sibling(arg1: *mut TSTreeCursor) -> bool;
}
extern "C" {
#[link_name = "\u{1}_ts_tree_cursor_goto_parent"]
pub fn ts_tree_cursor_goto_parent(arg1: *mut TSTreeCursor) -> bool;
}
extern "C" {
#[link_name = "\u{1}_ts_tree_cursor_current_node"]
pub fn ts_tree_cursor_current_node(arg1: *const TSTreeCursor) -> TSNode;
}
extern "C" {
#[link_name = "\u{1}_ts_language_symbol_count"]
pub fn ts_language_symbol_count(arg1: *const TSLanguage) -> u32;
}
extern "C" {
#[link_name = "\u{1}_ts_language_symbol_name"]
pub fn ts_language_symbol_name(
arg1: *const TSLanguage,
arg2: TSSymbol,
) -> *const ::std::os::raw::c_char;
}
extern "C" {
#[link_name = "\u{1}_ts_language_symbol_type"]
pub fn ts_language_symbol_type(arg1: *const TSLanguage, arg2: TSSymbol) -> TSSymbolType;
}
extern "C" {
#[link_name = "\u{1}_ts_language_version"]
pub fn ts_language_version(arg1: *const TSLanguage) -> u32;
}
pub const TREE_SITTER_LANGUAGE_VERSION: usize = 8;

329
src/lib.rs Normal file
View file

@ -0,0 +1,329 @@
mod ffi;
use std::ffi::CStr;
use std::marker::PhantomData;
use std::os::raw::{c_char, c_int, c_void};
use std::ptr;
#[derive(Clone, Copy)]
pub struct Symbol(ffi::TSSymbol);
#[derive(Clone, Copy)]
pub struct Language(*const ffi::TSLanguage);
pub trait Utf16Input {
fn read(&self) -> &[u16];
fn seek(&self, u32, Point);
}
pub trait Utf8Input {
fn read(&self) -> &[u8];
fn seek(&self, u32, Point);
}
pub enum LogType {
Parse,
Lex,
}
#[derive(Clone, Copy, PartialEq, Eq)]
pub struct Point {
pub row: u32,
pub column: u32,
}
#[derive(Clone, Copy, PartialEq, Eq)]
pub struct InputEdit {
pub start_byte: u32,
pub old_end_byte: u32,
pub new_end_byte: u32,
pub start_position: Point,
pub old_end_position: Point,
pub new_end_position: Point,
}
pub struct Node<'a>(ffi::TSNode, PhantomData<&'a ()>);
pub struct Parser(*mut ffi::TSParser);
pub struct Tree(*mut ffi::TSTree, ffi::TSInputEncoding);
pub struct TreeCursor<'a>(ffi::TSTreeCursor, PhantomData<&'a ()>);
impl Parser {
pub fn new() -> Parser {
unsafe {
let parser = ffi::ts_parser_new();
Parser(parser)
}
}
pub fn set_language(&mut self, language: Language) {
unsafe {
ffi::ts_parser_set_language(self.0, language.0);
}
}
pub fn set_logger<F: FnMut(LogType, &str) -> ()>(&mut self, logger: &mut F) {
unsafe extern "C" fn log<F: FnMut(LogType, &str) -> ()>(
payload: *mut c_void,
c_log_type: ffi::TSLogType,
c_message: *const c_char,
) {
let callback = (payload as *mut F).as_mut().unwrap();
if let Ok(message) = CStr::from_ptr(c_message).to_str() {
let log_type = if c_log_type == ffi::TSLogType_TSLogTypeParse {
LogType::Parse
} else {
LogType::Lex
};
callback(log_type, message);
}
};
let c_logger = ffi::TSLogger {
payload: logger as *mut F as *mut c_void,
log: Some(log::<F>),
};
unsafe { ffi::ts_parser_set_logger(self.0, c_logger) };
}
pub fn parse_utf8<T: Utf8Input>(
&mut self,
input: &mut T,
old_tree: Option<Tree>,
) -> Option<Tree> {
unsafe extern "C" fn read<T: Utf8Input>(
payload: *mut c_void,
bytes_read: *mut u32,
) -> *const c_char {
let input = (payload as *mut T).as_mut().unwrap();
let result = input.read();
*bytes_read = result.len() as u32;
return result.as_ptr() as *const c_char;
};
unsafe extern "C" fn seek<T: Utf8Input>(
payload: *mut c_void,
byte: u32,
position: ffi::TSPoint,
) -> c_int {
let input = (payload as *mut T).as_mut().unwrap();
input.seek(
byte,
Point {
row: position.row,
column: position.column,
},
);
return 1;
};
let c_input = ffi::TSInput {
payload: input as *mut T as *mut c_void,
read: Some(read::<T>),
seek: Some(seek::<T>),
encoding: ffi::TSInputEncoding_TSInputEncodingUTF8,
};
let old_tree_ptr = old_tree.map_or(ptr::null_mut(), |t| t.0);
let new_tree_ptr = unsafe { ffi::ts_parser_parse(self.0, old_tree_ptr, c_input) };
if new_tree_ptr.is_null() {
None
} else {
Some(Tree(new_tree_ptr, ffi::TSInputEncoding_TSInputEncodingUTF8))
}
}
pub fn parse_utf16<T: Utf16Input>(
&mut self,
input: &mut T,
old_tree: Option<Tree>,
) -> Option<Tree> {
unsafe extern "C" fn read<T: Utf16Input>(
payload: *mut c_void,
bytes_read: *mut u32,
) -> *const c_char {
let input = (payload as *mut T).as_mut().unwrap();
let result = input.read();
*bytes_read = result.len() as u32 * 2;
return result.as_ptr() as *const c_char;
};
unsafe extern "C" fn seek<T: Utf16Input>(
payload: *mut c_void,
byte: u32,
position: ffi::TSPoint,
) -> c_int {
let input = (payload as *mut T).as_mut().unwrap();
input.seek(
byte / 2,
Point {
row: position.row,
column: position.column / 2,
},
);
return 1;
};
let c_input = ffi::TSInput {
payload: input as *mut T as *mut c_void,
read: Some(read::<T>),
seek: Some(seek::<T>),
encoding: ffi::TSInputEncoding_TSInputEncodingUTF8,
};
let old_tree_ptr = old_tree.map_or(ptr::null_mut(), |t| t.0);
let new_tree_ptr = unsafe { ffi::ts_parser_parse(self.0, old_tree_ptr, c_input) };
if new_tree_ptr.is_null() {
None
} else {
Some(Tree(
new_tree_ptr,
ffi::TSInputEncoding_TSInputEncodingUTF16,
))
}
}
}
impl Drop for Parser {
fn drop(&mut self) {
unsafe { ffi::ts_parser_delete(self.0) }
}
}
impl Tree {
pub fn root_node(&self) -> Node {
Node::new(unsafe { ffi::ts_tree_root_node(self.0) }).unwrap()
}
pub fn edit(&mut self, edit: &InputEdit) {
let edit = ffi::TSInputEdit {
start_byte: edit.start_byte,
old_end_byte: edit.old_end_byte,
new_end_byte: edit.new_end_byte,
start_point: edit.start_position.into(),
old_end_point: edit.old_end_position.into(),
new_end_point: edit.new_end_position.into(),
};
unsafe { ffi::ts_tree_edit(self.0, &edit) };
}
pub fn walk(&self) -> TreeCursor {
TreeCursor(unsafe { ffi::ts_tree_cursor_new(self.0) }, PhantomData)
}
}
impl Drop for Tree {
fn drop(&mut self) {
unsafe { ffi::ts_tree_delete(self.0) }
}
}
impl Clone for Tree {
fn clone(&self) -> Tree {
unsafe { Tree(ffi::ts_tree_copy(self.0), self.1) }
}
}
impl<'a> Node<'a> {
fn new(node: ffi::TSNode) -> Option<Self> {
if node.id.is_null() {
None
} else {
Some(Node(node, PhantomData))
}
}
pub fn name(&self) -> &'static str {
unsafe { CStr::from_ptr(ffi::ts_node_type(self.0)) }
.to_str()
.unwrap()
}
pub fn start_index(&self) -> u32 {
unsafe { ffi::ts_node_start_byte(self.0) }
}
pub fn end_index(&self) -> u32 {
unsafe { ffi::ts_node_end_byte(self.0) }
}
pub fn start_position(&self) -> Point {
let result = unsafe { ffi::ts_node_start_point(self.0) };
Point {
row: result.row,
column: result.column,
}
}
pub fn end_position(&self) -> Point {
let result = unsafe { ffi::ts_node_end_point(self.0) };
Point {
row: result.row,
column: result.column,
}
}
pub fn child(&self, i: u32) -> Option<Node> {
Self::new(unsafe { ffi::ts_node_child(self.0, i) })
}
pub fn parent(&self) -> Option<Node> {
Self::new(unsafe { ffi::ts_node_parent(self.0) })
}
}
impl<'a> TreeCursor<'a> {
fn node(&'a self) -> Node<'a> {
Node(
unsafe { ffi::ts_tree_cursor_current_node(&self.0) },
PhantomData,
)
}
fn goto_first_child(&mut self) -> bool {
return unsafe { ffi::ts_tree_cursor_goto_first_child(&mut self.0) };
}
fn goto_parent(&mut self) -> bool {
return unsafe { ffi::ts_tree_cursor_goto_parent(&mut self.0) };
}
fn goto_next_sibling(&mut self) -> bool {
return unsafe { ffi::ts_tree_cursor_goto_next_sibling(&mut self.0) };
}
fn goto_first_child_for_index(&mut self, index: u32) -> Option<u32> {
let result = unsafe { ffi::ts_tree_cursor_goto_first_child_for_byte(&mut self.0, index) };
if result < 0 {
None
} else {
Some(result as u32)
}
}
}
impl<'a> Drop for TreeCursor<'a> {
fn drop(&mut self) {
unsafe { ffi::ts_tree_cursor_delete(&mut self.0) }
}
}
impl Into<ffi::TSPoint> for Point {
fn into(self) -> ffi::TSPoint {
ffi::TSPoint {
row: self.row,
column: self.column,
}
}
}
#[cfg(test)]
mod tests {
#[test]
fn it_works() {}
}

1
vendor/tree-sitter vendored Submodule

@ -0,0 +1 @@
Subproject commit 5ec3769cb4c9acfda64f80d7c14abce939e8b4c5