From 6e4115548c1982a764ca22f819544455ca9f7807 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Sun, 10 Jul 2016 14:03:00 -0700 Subject: [PATCH 01/58] Initial commit --- .gitignore | 2 + .gitmodules | 3 + Cargo.toml | 15 ++ build.rs | 35 +++++ script/bindgen.sh | 16 +++ src/ffi.rs | 333 +++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 329 ++++++++++++++++++++++++++++++++++++++++++++ vendor/tree-sitter | 1 + 8 files changed, 734 insertions(+) create mode 100644 .gitignore create mode 100644 .gitmodules create mode 100644 Cargo.toml create mode 100644 build.rs create mode 100755 script/bindgen.sh create mode 100644 src/ffi.rs create mode 100644 src/lib.rs create mode 160000 vendor/tree-sitter diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..a9d37c56 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +target +Cargo.lock diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 00000000..eef86f94 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "vendor/tree-sitter"] + path = vendor/tree-sitter + url = https://github.com/tree-sitter/tree-sitter diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 00000000..0a93febe --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "tree-sitter" +version = "0.1.0" +authors = ["Max Brunsfeld "] +build = "build.rs" +exclude = ["vendor/tree-sitter/**/*"] +include = [ + "vendor/tree-sitter/src/runtime/*", + "vendor/tree-sitter/externals/utf8proc/utf8proc*" +] + +[dependencies] + +[build-dependencies] +cc = "1.0" diff --git a/build.rs b/build.rs new file mode 100644 index 00000000..3427ed5f --- /dev/null +++ b/build.rs @@ -0,0 +1,35 @@ +extern crate cc; + +use std::path::Path; + + +fn main() { + let dir_path = Path::new("vendor/tree-sitter/src/runtime"); + + let source_filenames = [ + "get_changed_ranges.c", + "language.c", + "lexer.c", + "node.c", + "parser.c", + "parser.c", + "stack.c", + "subtree.c", + "tree_cursor.c", + "tree.c", + "utf16.c", + ]; + + let mut config = cc::Build::new(); + config.include("vendor/tree-sitter/src"); + config.include("vendor/tree-sitter/include"); + config.include("vendor/tree-sitter/externals/utf8proc"); + config.flag_if_supported("-Wno-unused-parameter"); + + for source_filename in source_filenames.iter() { + let source_path = dir_path.join(Path::new(&source_filename)); + config.file(&source_path.to_str().unwrap()); + } + + config.compile("libruntime.a") +} diff --git a/script/bindgen.sh b/script/bindgen.sh new file mode 100755 index 00000000..190e7a4f --- /dev/null +++ b/script/bindgen.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +output_path=src/ffi.rs +header_path='vendor/tree-sitter/include/tree_sitter/runtime.h' + +bindgen \ + --no-layout-tests \ + --whitelist-type '^TS.*' \ + --whitelist-function '^ts_.*' \ + --opaque-type FILE \ + $header_path > $output_path + +echo "" >> $output_path +version_constant='TREE_SITTER_LANGUAGE_VERSION' +version_number=$(egrep "#define $version_constant (.*)" $header_path | cut -d' ' -f3) +echo "pub const $version_constant: usize = $version_number;" >> $output_path diff --git a/src/ffi.rs b/src/ffi.rs new file mode 100644 index 00000000..7d1c06e8 --- /dev/null +++ b/src/ffi.rs @@ -0,0 +1,333 @@ +/* automatically generated by rust-bindgen */ + +pub type FILE = [u64; 19usize]; +pub type TSSymbol = ::std::os::raw::c_ushort; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct TSLanguage { + _unused: [u8; 0], +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct TSParser { + _unused: [u8; 0], +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct TSTree { + _unused: [u8; 0], +} +pub const TSInputEncoding_TSInputEncodingUTF8: TSInputEncoding = 0; +pub const TSInputEncoding_TSInputEncodingUTF16: TSInputEncoding = 1; +pub type TSInputEncoding = u32; +pub const TSSymbolType_TSSymbolTypeRegular: TSSymbolType = 0; +pub const TSSymbolType_TSSymbolTypeAnonymous: TSSymbolType = 1; +pub const TSSymbolType_TSSymbolTypeAuxiliary: TSSymbolType = 2; +pub type TSSymbolType = u32; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct TSPoint { + pub row: u32, + pub column: u32, +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct TSRange { + pub start: TSPoint, + pub end: TSPoint, +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct TSInput { + pub payload: *mut ::std::os::raw::c_void, + pub read: ::std::option::Option< + unsafe extern "C" fn(payload: *mut ::std::os::raw::c_void, bytes_read: *mut u32) + -> *const ::std::os::raw::c_char, + >, + pub seek: ::std::option::Option< + unsafe extern "C" fn( + payload: *mut ::std::os::raw::c_void, + byte_index: u32, + position: TSPoint, + ) -> ::std::os::raw::c_int, + >, + pub encoding: TSInputEncoding, +} +pub const TSLogType_TSLogTypeParse: TSLogType = 0; +pub const TSLogType_TSLogTypeLex: TSLogType = 1; +pub type TSLogType = u32; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct TSLogger { + pub payload: *mut ::std::os::raw::c_void, + pub log: ::std::option::Option< + unsafe extern "C" fn( + payload: *mut ::std::os::raw::c_void, + arg1: TSLogType, + arg2: *const ::std::os::raw::c_char, + ), + >, +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct TSInputEdit { + pub start_byte: u32, + pub old_end_byte: u32, + pub new_end_byte: u32, + pub start_point: TSPoint, + pub old_end_point: TSPoint, + pub new_end_point: TSPoint, +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct TSNode { + pub context: [u32; 4usize], + pub id: *const ::std::os::raw::c_void, + pub tree: *const ::std::os::raw::c_void, +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct TSTreeCursor { + pub context: [u32; 2usize], + pub id: *const ::std::os::raw::c_void, + pub tree: *const ::std::os::raw::c_void, +} +extern "C" { + #[link_name = "\u{1}_ts_parser_new"] + pub fn ts_parser_new() -> *mut TSParser; +} +extern "C" { + #[link_name = "\u{1}_ts_parser_delete"] + pub fn ts_parser_delete(arg1: *mut TSParser); +} +extern "C" { + #[link_name = "\u{1}_ts_parser_language"] + pub fn ts_parser_language(arg1: *const TSParser) -> *const TSLanguage; +} +extern "C" { + #[link_name = "\u{1}_ts_parser_set_language"] + pub fn ts_parser_set_language(arg1: *mut TSParser, arg2: *const TSLanguage) -> bool; +} +extern "C" { + #[link_name = "\u{1}_ts_parser_logger"] + pub fn ts_parser_logger(arg1: *const TSParser) -> TSLogger; +} +extern "C" { + #[link_name = "\u{1}_ts_parser_set_logger"] + pub fn ts_parser_set_logger(arg1: *mut TSParser, arg2: TSLogger); +} +extern "C" { + #[link_name = "\u{1}_ts_parser_print_dot_graphs"] + pub fn ts_parser_print_dot_graphs(arg1: *mut TSParser, arg2: *mut FILE); +} +extern "C" { + #[link_name = "\u{1}_ts_parser_halt_on_error"] + pub fn ts_parser_halt_on_error(arg1: *mut TSParser, arg2: bool); +} +extern "C" { + #[link_name = "\u{1}_ts_parser_parse"] + pub fn ts_parser_parse(arg1: *mut TSParser, arg2: *const TSTree, arg3: TSInput) -> *mut TSTree; +} +extern "C" { + #[link_name = "\u{1}_ts_parser_parse_string"] + pub fn ts_parser_parse_string( + arg1: *mut TSParser, + arg2: *const TSTree, + arg3: *const ::std::os::raw::c_char, + arg4: u32, + ) -> *mut TSTree; +} +extern "C" { + #[link_name = "\u{1}_ts_tree_copy"] + pub fn ts_tree_copy(arg1: *const TSTree) -> *mut TSTree; +} +extern "C" { + #[link_name = "\u{1}_ts_tree_delete"] + pub fn ts_tree_delete(arg1: *mut TSTree); +} +extern "C" { + #[link_name = "\u{1}_ts_tree_root_node"] + pub fn ts_tree_root_node(arg1: *const TSTree) -> TSNode; +} +extern "C" { + #[link_name = "\u{1}_ts_tree_edit"] + pub fn ts_tree_edit(arg1: *mut TSTree, arg2: *const TSInputEdit); +} +extern "C" { + #[link_name = "\u{1}_ts_tree_get_changed_ranges"] + pub fn ts_tree_get_changed_ranges( + arg1: *const TSTree, + arg2: *const TSTree, + arg3: *mut u32, + ) -> *mut TSRange; +} +extern "C" { + #[link_name = "\u{1}_ts_tree_print_dot_graph"] + pub fn ts_tree_print_dot_graph(arg1: *const TSTree, arg2: *mut FILE); +} +extern "C" { + #[link_name = "\u{1}_ts_node_start_byte"] + pub fn ts_node_start_byte(arg1: TSNode) -> u32; +} +extern "C" { + #[link_name = "\u{1}_ts_node_start_point"] + pub fn ts_node_start_point(arg1: TSNode) -> TSPoint; +} +extern "C" { + #[link_name = "\u{1}_ts_node_end_byte"] + pub fn ts_node_end_byte(arg1: TSNode) -> u32; +} +extern "C" { + #[link_name = "\u{1}_ts_node_end_point"] + pub fn ts_node_end_point(arg1: TSNode) -> TSPoint; +} +extern "C" { + #[link_name = "\u{1}_ts_node_symbol"] + pub fn ts_node_symbol(arg1: TSNode) -> TSSymbol; +} +extern "C" { + #[link_name = "\u{1}_ts_node_type"] + pub fn ts_node_type(arg1: TSNode) -> *const ::std::os::raw::c_char; +} +extern "C" { + #[link_name = "\u{1}_ts_node_string"] + pub fn ts_node_string(arg1: TSNode) -> *mut ::std::os::raw::c_char; +} +extern "C" { + #[link_name = "\u{1}_ts_node_eq"] + pub fn ts_node_eq(arg1: TSNode, arg2: TSNode) -> bool; +} +extern "C" { + #[link_name = "\u{1}_ts_node_is_null"] + pub fn ts_node_is_null(arg1: TSNode) -> bool; +} +extern "C" { + #[link_name = "\u{1}_ts_node_is_named"] + pub fn ts_node_is_named(arg1: TSNode) -> bool; +} +extern "C" { + #[link_name = "\u{1}_ts_node_is_missing"] + pub fn ts_node_is_missing(arg1: TSNode) -> bool; +} +extern "C" { + #[link_name = "\u{1}_ts_node_has_changes"] + pub fn ts_node_has_changes(arg1: TSNode) -> bool; +} +extern "C" { + #[link_name = "\u{1}_ts_node_has_error"] + pub fn ts_node_has_error(arg1: TSNode) -> bool; +} +extern "C" { + #[link_name = "\u{1}_ts_node_parent"] + pub fn ts_node_parent(arg1: TSNode) -> TSNode; +} +extern "C" { + #[link_name = "\u{1}_ts_node_child"] + pub fn ts_node_child(arg1: TSNode, arg2: u32) -> TSNode; +} +extern "C" { + #[link_name = "\u{1}_ts_node_named_child"] + pub fn ts_node_named_child(arg1: TSNode, arg2: u32) -> TSNode; +} +extern "C" { + #[link_name = "\u{1}_ts_node_child_count"] + pub fn ts_node_child_count(arg1: TSNode) -> u32; +} +extern "C" { + #[link_name = "\u{1}_ts_node_named_child_count"] + pub fn ts_node_named_child_count(arg1: TSNode) -> u32; +} +extern "C" { + #[link_name = "\u{1}_ts_node_next_sibling"] + pub fn ts_node_next_sibling(arg1: TSNode) -> TSNode; +} +extern "C" { + #[link_name = "\u{1}_ts_node_next_named_sibling"] + pub fn ts_node_next_named_sibling(arg1: TSNode) -> TSNode; +} +extern "C" { + #[link_name = "\u{1}_ts_node_prev_sibling"] + pub fn ts_node_prev_sibling(arg1: TSNode) -> TSNode; +} +extern "C" { + #[link_name = "\u{1}_ts_node_prev_named_sibling"] + pub fn ts_node_prev_named_sibling(arg1: TSNode) -> TSNode; +} +extern "C" { + #[link_name = "\u{1}_ts_node_first_child_for_byte"] + pub fn ts_node_first_child_for_byte(arg1: TSNode, arg2: u32) -> TSNode; +} +extern "C" { + #[link_name = "\u{1}_ts_node_first_named_child_for_byte"] + pub fn ts_node_first_named_child_for_byte(arg1: TSNode, arg2: u32) -> TSNode; +} +extern "C" { + #[link_name = "\u{1}_ts_node_descendant_for_byte_range"] + pub fn ts_node_descendant_for_byte_range(arg1: TSNode, arg2: u32, arg3: u32) -> TSNode; +} +extern "C" { + #[link_name = "\u{1}_ts_node_named_descendant_for_byte_range"] + pub fn ts_node_named_descendant_for_byte_range(arg1: TSNode, arg2: u32, arg3: u32) -> TSNode; +} +extern "C" { + #[link_name = "\u{1}_ts_node_descendant_for_point_range"] + pub fn ts_node_descendant_for_point_range(arg1: TSNode, arg2: TSPoint, arg3: TSPoint) + -> TSNode; +} +extern "C" { + #[link_name = "\u{1}_ts_node_named_descendant_for_point_range"] + pub fn ts_node_named_descendant_for_point_range( + arg1: TSNode, + arg2: TSPoint, + arg3: TSPoint, + ) -> TSNode; +} +extern "C" { + #[link_name = "\u{1}_ts_tree_cursor_new"] + pub fn ts_tree_cursor_new(arg1: *const TSTree) -> TSTreeCursor; +} +extern "C" { + #[link_name = "\u{1}_ts_tree_cursor_delete"] + pub fn ts_tree_cursor_delete(arg1: *mut TSTreeCursor); +} +extern "C" { + #[link_name = "\u{1}_ts_tree_cursor_goto_first_child"] + pub fn ts_tree_cursor_goto_first_child(arg1: *mut TSTreeCursor) -> bool; +} +extern "C" { + #[link_name = "\u{1}_ts_tree_cursor_goto_first_child_for_byte"] + pub fn ts_tree_cursor_goto_first_child_for_byte(arg1: *mut TSTreeCursor, arg2: u32) -> i64; +} +extern "C" { + #[link_name = "\u{1}_ts_tree_cursor_goto_next_sibling"] + pub fn ts_tree_cursor_goto_next_sibling(arg1: *mut TSTreeCursor) -> bool; +} +extern "C" { + #[link_name = "\u{1}_ts_tree_cursor_goto_parent"] + pub fn ts_tree_cursor_goto_parent(arg1: *mut TSTreeCursor) -> bool; +} +extern "C" { + #[link_name = "\u{1}_ts_tree_cursor_current_node"] + pub fn ts_tree_cursor_current_node(arg1: *const TSTreeCursor) -> TSNode; +} +extern "C" { + #[link_name = "\u{1}_ts_language_symbol_count"] + pub fn ts_language_symbol_count(arg1: *const TSLanguage) -> u32; +} +extern "C" { + #[link_name = "\u{1}_ts_language_symbol_name"] + pub fn ts_language_symbol_name( + arg1: *const TSLanguage, + arg2: TSSymbol, + ) -> *const ::std::os::raw::c_char; +} +extern "C" { + #[link_name = "\u{1}_ts_language_symbol_type"] + pub fn ts_language_symbol_type(arg1: *const TSLanguage, arg2: TSSymbol) -> TSSymbolType; +} +extern "C" { + #[link_name = "\u{1}_ts_language_version"] + pub fn ts_language_version(arg1: *const TSLanguage) -> u32; +} + +pub const TREE_SITTER_LANGUAGE_VERSION: usize = 8; diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 00000000..ef11757a --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,329 @@ +mod ffi; + +use std::ffi::CStr; +use std::marker::PhantomData; +use std::os::raw::{c_char, c_int, c_void}; +use std::ptr; + +#[derive(Clone, Copy)] +pub struct Symbol(ffi::TSSymbol); + +#[derive(Clone, Copy)] +pub struct Language(*const ffi::TSLanguage); + +pub trait Utf16Input { + fn read(&self) -> &[u16]; + fn seek(&self, u32, Point); +} + +pub trait Utf8Input { + fn read(&self) -> &[u8]; + fn seek(&self, u32, Point); +} + +pub enum LogType { + Parse, + Lex, +} + +#[derive(Clone, Copy, PartialEq, Eq)] +pub struct Point { + pub row: u32, + pub column: u32, +} + +#[derive(Clone, Copy, PartialEq, Eq)] +pub struct InputEdit { + pub start_byte: u32, + pub old_end_byte: u32, + pub new_end_byte: u32, + pub start_position: Point, + pub old_end_position: Point, + pub new_end_position: Point, +} + +pub struct Node<'a>(ffi::TSNode, PhantomData<&'a ()>); + +pub struct Parser(*mut ffi::TSParser); + +pub struct Tree(*mut ffi::TSTree, ffi::TSInputEncoding); + +pub struct TreeCursor<'a>(ffi::TSTreeCursor, PhantomData<&'a ()>); + +impl Parser { + pub fn new() -> Parser { + unsafe { + let parser = ffi::ts_parser_new(); + Parser(parser) + } + } + + pub fn set_language(&mut self, language: Language) { + unsafe { + ffi::ts_parser_set_language(self.0, language.0); + } + } + + pub fn set_logger ()>(&mut self, logger: &mut F) { + unsafe extern "C" fn log ()>( + payload: *mut c_void, + c_log_type: ffi::TSLogType, + c_message: *const c_char, + ) { + let callback = (payload as *mut F).as_mut().unwrap(); + if let Ok(message) = CStr::from_ptr(c_message).to_str() { + let log_type = if c_log_type == ffi::TSLogType_TSLogTypeParse { + LogType::Parse + } else { + LogType::Lex + }; + callback(log_type, message); + } + }; + + let c_logger = ffi::TSLogger { + payload: logger as *mut F as *mut c_void, + log: Some(log::), + }; + + unsafe { ffi::ts_parser_set_logger(self.0, c_logger) }; + } + + pub fn parse_utf8( + &mut self, + input: &mut T, + old_tree: Option, + ) -> Option { + unsafe extern "C" fn read( + payload: *mut c_void, + bytes_read: *mut u32, + ) -> *const c_char { + let input = (payload as *mut T).as_mut().unwrap(); + let result = input.read(); + *bytes_read = result.len() as u32; + return result.as_ptr() as *const c_char; + }; + + unsafe extern "C" fn seek( + payload: *mut c_void, + byte: u32, + position: ffi::TSPoint, + ) -> c_int { + let input = (payload as *mut T).as_mut().unwrap(); + input.seek( + byte, + Point { + row: position.row, + column: position.column, + }, + ); + return 1; + }; + + let c_input = ffi::TSInput { + payload: input as *mut T as *mut c_void, + read: Some(read::), + seek: Some(seek::), + encoding: ffi::TSInputEncoding_TSInputEncodingUTF8, + }; + + let old_tree_ptr = old_tree.map_or(ptr::null_mut(), |t| t.0); + + let new_tree_ptr = unsafe { ffi::ts_parser_parse(self.0, old_tree_ptr, c_input) }; + if new_tree_ptr.is_null() { + None + } else { + Some(Tree(new_tree_ptr, ffi::TSInputEncoding_TSInputEncodingUTF8)) + } + } + + pub fn parse_utf16( + &mut self, + input: &mut T, + old_tree: Option, + ) -> Option { + unsafe extern "C" fn read( + payload: *mut c_void, + bytes_read: *mut u32, + ) -> *const c_char { + let input = (payload as *mut T).as_mut().unwrap(); + let result = input.read(); + *bytes_read = result.len() as u32 * 2; + return result.as_ptr() as *const c_char; + }; + + unsafe extern "C" fn seek( + payload: *mut c_void, + byte: u32, + position: ffi::TSPoint, + ) -> c_int { + let input = (payload as *mut T).as_mut().unwrap(); + input.seek( + byte / 2, + Point { + row: position.row, + column: position.column / 2, + }, + ); + return 1; + }; + + let c_input = ffi::TSInput { + payload: input as *mut T as *mut c_void, + read: Some(read::), + seek: Some(seek::), + encoding: ffi::TSInputEncoding_TSInputEncodingUTF8, + }; + + let old_tree_ptr = old_tree.map_or(ptr::null_mut(), |t| t.0); + + let new_tree_ptr = unsafe { ffi::ts_parser_parse(self.0, old_tree_ptr, c_input) }; + if new_tree_ptr.is_null() { + None + } else { + Some(Tree( + new_tree_ptr, + ffi::TSInputEncoding_TSInputEncodingUTF16, + )) + } + } +} + +impl Drop for Parser { + fn drop(&mut self) { + unsafe { ffi::ts_parser_delete(self.0) } + } +} + +impl Tree { + pub fn root_node(&self) -> Node { + Node::new(unsafe { ffi::ts_tree_root_node(self.0) }).unwrap() + } + + pub fn edit(&mut self, edit: &InputEdit) { + let edit = ffi::TSInputEdit { + start_byte: edit.start_byte, + old_end_byte: edit.old_end_byte, + new_end_byte: edit.new_end_byte, + start_point: edit.start_position.into(), + old_end_point: edit.old_end_position.into(), + new_end_point: edit.new_end_position.into(), + }; + unsafe { ffi::ts_tree_edit(self.0, &edit) }; + } + + pub fn walk(&self) -> TreeCursor { + TreeCursor(unsafe { ffi::ts_tree_cursor_new(self.0) }, PhantomData) + } +} + +impl Drop for Tree { + fn drop(&mut self) { + unsafe { ffi::ts_tree_delete(self.0) } + } +} + +impl Clone for Tree { + fn clone(&self) -> Tree { + unsafe { Tree(ffi::ts_tree_copy(self.0), self.1) } + } +} + +impl<'a> Node<'a> { + fn new(node: ffi::TSNode) -> Option { + if node.id.is_null() { + None + } else { + Some(Node(node, PhantomData)) + } + } + + pub fn name(&self) -> &'static str { + unsafe { CStr::from_ptr(ffi::ts_node_type(self.0)) } + .to_str() + .unwrap() + } + + pub fn start_index(&self) -> u32 { + unsafe { ffi::ts_node_start_byte(self.0) } + } + + pub fn end_index(&self) -> u32 { + unsafe { ffi::ts_node_end_byte(self.0) } + } + + pub fn start_position(&self) -> Point { + let result = unsafe { ffi::ts_node_start_point(self.0) }; + Point { + row: result.row, + column: result.column, + } + } + + pub fn end_position(&self) -> Point { + let result = unsafe { ffi::ts_node_end_point(self.0) }; + Point { + row: result.row, + column: result.column, + } + } + + pub fn child(&self, i: u32) -> Option { + Self::new(unsafe { ffi::ts_node_child(self.0, i) }) + } + + pub fn parent(&self) -> Option { + Self::new(unsafe { ffi::ts_node_parent(self.0) }) + } +} + +impl<'a> TreeCursor<'a> { + fn node(&'a self) -> Node<'a> { + Node( + unsafe { ffi::ts_tree_cursor_current_node(&self.0) }, + PhantomData, + ) + } + + fn goto_first_child(&mut self) -> bool { + return unsafe { ffi::ts_tree_cursor_goto_first_child(&mut self.0) }; + } + + fn goto_parent(&mut self) -> bool { + return unsafe { ffi::ts_tree_cursor_goto_parent(&mut self.0) }; + } + + fn goto_next_sibling(&mut self) -> bool { + return unsafe { ffi::ts_tree_cursor_goto_next_sibling(&mut self.0) }; + } + + fn goto_first_child_for_index(&mut self, index: u32) -> Option { + let result = unsafe { ffi::ts_tree_cursor_goto_first_child_for_byte(&mut self.0, index) }; + if result < 0 { + None + } else { + Some(result as u32) + } + } +} + +impl<'a> Drop for TreeCursor<'a> { + fn drop(&mut self) { + unsafe { ffi::ts_tree_cursor_delete(&mut self.0) } + } +} + +impl Into for Point { + fn into(self) -> ffi::TSPoint { + ffi::TSPoint { + row: self.row, + column: self.column, + } + } +} + +#[cfg(test)] +mod tests { + #[test] + fn it_works() {} +} diff --git a/vendor/tree-sitter b/vendor/tree-sitter new file mode 160000 index 00000000..5ec3769c --- /dev/null +++ b/vendor/tree-sitter @@ -0,0 +1 @@ +Subproject commit 5ec3769cb4c9acfda64f80d7c14abce939e8b4c5 From 8918d1a5b14f9a54ef23dcb4b29d8bf2bccd6384 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 17 May 2018 14:35:31 -0700 Subject: [PATCH 02/58] Add boilerplate --- .travis.yml | 8 ++++++++ LICENSE | 21 +++++++++++++++++++++ README.md | 8 ++++++++ 3 files changed, 37 insertions(+) create mode 100644 .travis.yml create mode 100644 LICENSE create mode 100644 README.md diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 00000000..32e3a71f --- /dev/null +++ b/.travis.yml @@ -0,0 +1,8 @@ +language: rust + +rust: + - stable + +branches: + only: + - master diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..971b81f9 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2018 Max Brunsfeld + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 00000000..08df0e4e --- /dev/null +++ b/README.md @@ -0,0 +1,8 @@ +Rust Tree-sitter +=========================== + +[![Build Status](https://travis-ci.org/tree-sitter/rust-tree-sitter.svg)](https://travis-ci.org/tree-sitter/rust-tree-sitter) + +Rust bindings to the [Tree-sitter][] parsing library. + +[tree-sitter]: https://github.com/tree-sitter/tree-sitter From f07f710db7633dc26d86163972512799ae407540 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 17 May 2018 14:40:31 -0700 Subject: [PATCH 03/58] Compile tree-sitter sources in c99 mode --- build.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/build.rs b/build.rs index 3427ed5f..53265655 100644 --- a/build.rs +++ b/build.rs @@ -24,6 +24,7 @@ fn main() { config.include("vendor/tree-sitter/src"); config.include("vendor/tree-sitter/include"); config.include("vendor/tree-sitter/externals/utf8proc"); + config.flag_if_supported("-std=c99"); config.flag_if_supported("-Wno-unused-parameter"); for source_filename in source_filenames.iter() { From ead0e312624a4e20a312875c073be4dc51a2f29b Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 17 May 2018 14:43:30 -0700 Subject: [PATCH 04/58] Fix duplicated compile of parser.c --- build.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/build.rs b/build.rs index 53265655..ad62f3a1 100644 --- a/build.rs +++ b/build.rs @@ -12,7 +12,6 @@ fn main() { "lexer.c", "node.c", "parser.c", - "parser.c", "stack.c", "subtree.c", "tree_cursor.c", From 08217fff8dfc7a80b2348679144ff44344d63008 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 17 May 2018 17:16:35 -0700 Subject: [PATCH 05/58] Get basic parsing working, add some unit tests --- .gitignore | 1 + .travis.yml | 6 ++ Cargo.toml | 2 - build.rs | 31 ++++++---- fixtures/.gitkeep | 0 script/fetch-test-fixtures.sh | 14 +++++ src/lib.rs | 113 +++++++++++++++++++++++++++++----- 7 files changed, 138 insertions(+), 29 deletions(-) create mode 100644 fixtures/.gitkeep create mode 100755 script/fetch-test-fixtures.sh diff --git a/.gitignore b/.gitignore index a9d37c56..fbd4fda0 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ target Cargo.lock +fixtures/tree-sitter-rust diff --git a/.travis.yml b/.travis.yml index 32e3a71f..10fcfe94 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,6 +3,12 @@ language: rust rust: - stable +env: + - RUST_TREE_SITTER_TEST=1 + +before_install: + - ./script/fetch-test-fixtures.sh + branches: only: - master diff --git a/Cargo.toml b/Cargo.toml index 0a93febe..e20d40aa 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,7 +9,5 @@ include = [ "vendor/tree-sitter/externals/utf8proc/utf8proc*" ] -[dependencies] - [build-dependencies] cc = "1.0" diff --git a/build.rs b/build.rs index ad62f3a1..fa8b41ea 100644 --- a/build.rs +++ b/build.rs @@ -1,10 +1,17 @@ extern crate cc; +use std::env; use std::path::Path; - fn main() { - let dir_path = Path::new("vendor/tree-sitter/src/runtime"); + let root_path = Path::new("vendor/tree-sitter"); + + let mut config = cc::Build::new(); + config.flag_if_supported("-std=c99"); + config.flag_if_supported("-Wno-unused-parameter"); + config.include(root_path.join(Path::new("src"))); + config.include(root_path.join(Path::new("include"))); + config.include(root_path.join(Path::new("externals/utf8proc"))); let source_filenames = [ "get_changed_ranges.c", @@ -19,16 +26,18 @@ fn main() { "utf16.c", ]; - let mut config = cc::Build::new(); - config.include("vendor/tree-sitter/src"); - config.include("vendor/tree-sitter/include"); - config.include("vendor/tree-sitter/externals/utf8proc"); - config.flag_if_supported("-std=c99"); - config.flag_if_supported("-Wno-unused-parameter"); + config.files(source_filenames.iter().map(|source_filename| { + root_path + .join(Path::new(&"src/runtime")) + .join(Path::new(&source_filename)) + })); - for source_filename in source_filenames.iter() { - let source_path = dir_path.join(Path::new(&source_filename)); - config.file(&source_path.to_str().unwrap()); + config.file(root_path.join(Path::new("externals/utf8proc/utf8proc.c"))); + + if env::var("RUST_TREE_SITTER_TEST").is_ok() { + let parser_dir = Path::new("fixtures/tree-sitter-rust/src"); + config.file(parser_dir.join("parser.c")); + config.file(parser_dir.join("scanner.c")); } config.compile("libruntime.a") diff --git a/fixtures/.gitkeep b/fixtures/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/script/fetch-test-fixtures.sh b/script/fetch-test-fixtures.sh new file mode 100755 index 00000000..24cc316a --- /dev/null +++ b/script/fetch-test-fixtures.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +grammar_dir='fixtures/tree-sitter-rust' +grammar_url='https://github.com/tree-sitter/tree-sitter-rust' + +if [ ! -d $grammar_dir ]; then + git clone $grammar_url $grammar_dir --depth=1 +fi + +( + cd $grammar_dir; + git fetch origin master --depth=1 + git reset --hard origin/master; +) diff --git a/src/lib.rs b/src/lib.rs index ef11757a..fa1db0f9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -8,19 +8,19 @@ use std::ptr; #[derive(Clone, Copy)] pub struct Symbol(ffi::TSSymbol); -#[derive(Clone, Copy)] -pub struct Language(*const ffi::TSLanguage); +pub type Language = *const ffi::TSLanguage; pub trait Utf16Input { - fn read(&self) -> &[u16]; - fn seek(&self, u32, Point); + fn read(&mut self) -> &[u16]; + fn seek(&mut self, u32, Point); } pub trait Utf8Input { - fn read(&self) -> &[u8]; - fn seek(&self, u32, Point); + fn read(&mut self) -> &[u8]; + fn seek(&mut self, u32, Point); } +#[derive(Debug, PartialEq, Eq)] pub enum LogType { Parse, Lex, @@ -50,6 +50,11 @@ pub struct Tree(*mut ffi::TSTree, ffi::TSInputEncoding); pub struct TreeCursor<'a>(ffi::TSTreeCursor, PhantomData<&'a ()>); +struct FlatInput<'a> { + bytes: &'a [u8], + offset: usize, +} + impl Parser { pub fn new() -> Parser { unsafe { @@ -60,11 +65,11 @@ impl Parser { pub fn set_language(&mut self, language: Language) { unsafe { - ffi::ts_parser_set_language(self.0, language.0); + ffi::ts_parser_set_language(self.0, language); } } - pub fn set_logger ()>(&mut self, logger: &mut F) { + pub fn set_logger ()>(&mut self, logger: Option<&mut F>) { unsafe extern "C" fn log ()>( payload: *mut c_void, c_log_type: ffi::TSLogType, @@ -81,14 +86,24 @@ impl Parser { } }; - let c_logger = ffi::TSLogger { - payload: logger as *mut F as *mut c_void, - log: Some(log::), - }; + let c_logger; + if let Some(logger) = logger { + c_logger = ffi::TSLogger { + payload: logger as *mut F as *mut c_void, + log: Some(log::), + }; + } else { + c_logger = ffi::TSLogger { payload: ptr::null_mut(), log: None }; + } unsafe { ffi::ts_parser_set_logger(self.0, c_logger) }; } + pub fn parse_str(&mut self, input: &str, old_tree: Option) -> Option { + let mut input = FlatInput { bytes: input.as_bytes(), offset: 0}; + self.parse_utf8(&mut input, old_tree) + } + pub fn parse_utf8( &mut self, input: &mut T, @@ -239,9 +254,7 @@ impl<'a> Node<'a> { } pub fn name(&self) -> &'static str { - unsafe { CStr::from_ptr(ffi::ts_node_type(self.0)) } - .to_str() - .unwrap() + unsafe { CStr::from_ptr(ffi::ts_node_type(self.0)) }.to_str().unwrap() } pub fn start_index(&self) -> u32 { @@ -272,11 +285,24 @@ impl<'a> Node<'a> { Self::new(unsafe { ffi::ts_node_child(self.0, i) }) } + pub fn child_count(&self) -> u32 { + unsafe { ffi::ts_node_child_count(self.0) } + } + pub fn parent(&self) -> Option { Self::new(unsafe { ffi::ts_node_parent(self.0) }) } + + pub fn to_sexp(&self) -> String { + let c_string = unsafe { ffi::ts_node_string(self.0) }; + let result = unsafe { CStr::from_ptr(c_string) }.to_str().unwrap().to_string(); + unsafe { free(c_string as *mut c_void) }; + result + } } +extern "C" { fn free(pointer: *mut c_void); } + impl<'a> TreeCursor<'a> { fn node(&'a self) -> Node<'a> { Node( @@ -322,8 +348,63 @@ impl Into for Point { } } +impl<'a> Utf8Input for FlatInput<'a> { + fn read(&mut self) -> &[u8] { + let result = &self.bytes[self.offset..]; + self.offset = self.bytes.len(); + result + } + + fn seek(&mut self, offset: u32, _position: Point) { + self.offset = offset as usize; + } +} + #[cfg(test)] mod tests { + use super::*; + + fn rust() -> Language { unsafe { tree_sitter_rust() } } + extern "C" { fn tree_sitter_rust() -> Language; } + #[test] - fn it_works() {} + fn test_basic_parsing() { + let mut parser = Parser::new(); + parser.set_language(rust()); + + let tree = parser.parse_str(" + struct Stuff {} + fn main() {} + ", None).unwrap(); + + let root_node = tree.root_node(); + assert_eq!(root_node.name(), "source_file"); + + assert_eq!( + root_node.to_sexp(), + "(source_file (struct_item (type_identifier) (field_declaration_list)) (function_item (identifier) (parameters) (block)))" + ); + + let struct_node = root_node.child(0).unwrap(); + assert_eq!(struct_node.name(), "struct_item"); + } + + #[test] + fn test_logging() { + let mut parser = Parser::new(); + parser.set_language(rust()); + + let mut messages = Vec::new(); + parser.set_logger(Some(&mut |log_type, message| { + messages.push((log_type, message.to_string())); + })); + + parser.parse_str(" + struct Stuff {} + fn main() {} + ", None).unwrap(); + + assert!(messages.contains(&(LogType::Parse, "reduce sym:struct_item, child_count:3".to_string()))); + assert!(messages.contains(&(LogType::Lex, "skip character:' '".to_string()))); + } } From 7e6675d56effa6177eaf387b13942c8219107ae1 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 17 May 2018 17:23:35 -0700 Subject: [PATCH 06/58] Use a more unique library name when building C sources --- build.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.rs b/build.rs index fa8b41ea..4e2c3b8f 100644 --- a/build.rs +++ b/build.rs @@ -40,5 +40,5 @@ fn main() { config.file(parser_dir.join("scanner.c")); } - config.compile("libruntime.a") + config.compile("treesitter") } From 572a60183c86920b0c1bc83941d70b3772534e3a Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 17 May 2018 17:29:23 -0700 Subject: [PATCH 07/58] Suppress warnings associated w/ generated bindings --- build.rs | 1 + script/bindgen.sh | 2 +- src/bindings.rs | 333 +++++++++++++++++++++++++++++++++++++++++++++ src/ffi.rs | 335 +--------------------------------------------- 4 files changed, 338 insertions(+), 333 deletions(-) create mode 100644 src/bindings.rs diff --git a/build.rs b/build.rs index 4e2c3b8f..5fa5d408 100644 --- a/build.rs +++ b/build.rs @@ -36,6 +36,7 @@ fn main() { if env::var("RUST_TREE_SITTER_TEST").is_ok() { let parser_dir = Path::new("fixtures/tree-sitter-rust/src"); + config.flag_if_supported("-Wno-typedef-redefinition"); config.file(parser_dir.join("parser.c")); config.file(parser_dir.join("scanner.c")); } diff --git a/script/bindgen.sh b/script/bindgen.sh index 190e7a4f..1b9008b2 100755 --- a/script/bindgen.sh +++ b/script/bindgen.sh @@ -1,6 +1,6 @@ #!/bin/bash -output_path=src/ffi.rs +output_path=src/bindings.rs header_path='vendor/tree-sitter/include/tree_sitter/runtime.h' bindgen \ diff --git a/src/bindings.rs b/src/bindings.rs new file mode 100644 index 00000000..7d1c06e8 --- /dev/null +++ b/src/bindings.rs @@ -0,0 +1,333 @@ +/* automatically generated by rust-bindgen */ + +pub type FILE = [u64; 19usize]; +pub type TSSymbol = ::std::os::raw::c_ushort; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct TSLanguage { + _unused: [u8; 0], +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct TSParser { + _unused: [u8; 0], +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct TSTree { + _unused: [u8; 0], +} +pub const TSInputEncoding_TSInputEncodingUTF8: TSInputEncoding = 0; +pub const TSInputEncoding_TSInputEncodingUTF16: TSInputEncoding = 1; +pub type TSInputEncoding = u32; +pub const TSSymbolType_TSSymbolTypeRegular: TSSymbolType = 0; +pub const TSSymbolType_TSSymbolTypeAnonymous: TSSymbolType = 1; +pub const TSSymbolType_TSSymbolTypeAuxiliary: TSSymbolType = 2; +pub type TSSymbolType = u32; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct TSPoint { + pub row: u32, + pub column: u32, +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct TSRange { + pub start: TSPoint, + pub end: TSPoint, +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct TSInput { + pub payload: *mut ::std::os::raw::c_void, + pub read: ::std::option::Option< + unsafe extern "C" fn(payload: *mut ::std::os::raw::c_void, bytes_read: *mut u32) + -> *const ::std::os::raw::c_char, + >, + pub seek: ::std::option::Option< + unsafe extern "C" fn( + payload: *mut ::std::os::raw::c_void, + byte_index: u32, + position: TSPoint, + ) -> ::std::os::raw::c_int, + >, + pub encoding: TSInputEncoding, +} +pub const TSLogType_TSLogTypeParse: TSLogType = 0; +pub const TSLogType_TSLogTypeLex: TSLogType = 1; +pub type TSLogType = u32; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct TSLogger { + pub payload: *mut ::std::os::raw::c_void, + pub log: ::std::option::Option< + unsafe extern "C" fn( + payload: *mut ::std::os::raw::c_void, + arg1: TSLogType, + arg2: *const ::std::os::raw::c_char, + ), + >, +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct TSInputEdit { + pub start_byte: u32, + pub old_end_byte: u32, + pub new_end_byte: u32, + pub start_point: TSPoint, + pub old_end_point: TSPoint, + pub new_end_point: TSPoint, +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct TSNode { + pub context: [u32; 4usize], + pub id: *const ::std::os::raw::c_void, + pub tree: *const ::std::os::raw::c_void, +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct TSTreeCursor { + pub context: [u32; 2usize], + pub id: *const ::std::os::raw::c_void, + pub tree: *const ::std::os::raw::c_void, +} +extern "C" { + #[link_name = "\u{1}_ts_parser_new"] + pub fn ts_parser_new() -> *mut TSParser; +} +extern "C" { + #[link_name = "\u{1}_ts_parser_delete"] + pub fn ts_parser_delete(arg1: *mut TSParser); +} +extern "C" { + #[link_name = "\u{1}_ts_parser_language"] + pub fn ts_parser_language(arg1: *const TSParser) -> *const TSLanguage; +} +extern "C" { + #[link_name = "\u{1}_ts_parser_set_language"] + pub fn ts_parser_set_language(arg1: *mut TSParser, arg2: *const TSLanguage) -> bool; +} +extern "C" { + #[link_name = "\u{1}_ts_parser_logger"] + pub fn ts_parser_logger(arg1: *const TSParser) -> TSLogger; +} +extern "C" { + #[link_name = "\u{1}_ts_parser_set_logger"] + pub fn ts_parser_set_logger(arg1: *mut TSParser, arg2: TSLogger); +} +extern "C" { + #[link_name = "\u{1}_ts_parser_print_dot_graphs"] + pub fn ts_parser_print_dot_graphs(arg1: *mut TSParser, arg2: *mut FILE); +} +extern "C" { + #[link_name = "\u{1}_ts_parser_halt_on_error"] + pub fn ts_parser_halt_on_error(arg1: *mut TSParser, arg2: bool); +} +extern "C" { + #[link_name = "\u{1}_ts_parser_parse"] + pub fn ts_parser_parse(arg1: *mut TSParser, arg2: *const TSTree, arg3: TSInput) -> *mut TSTree; +} +extern "C" { + #[link_name = "\u{1}_ts_parser_parse_string"] + pub fn ts_parser_parse_string( + arg1: *mut TSParser, + arg2: *const TSTree, + arg3: *const ::std::os::raw::c_char, + arg4: u32, + ) -> *mut TSTree; +} +extern "C" { + #[link_name = "\u{1}_ts_tree_copy"] + pub fn ts_tree_copy(arg1: *const TSTree) -> *mut TSTree; +} +extern "C" { + #[link_name = "\u{1}_ts_tree_delete"] + pub fn ts_tree_delete(arg1: *mut TSTree); +} +extern "C" { + #[link_name = "\u{1}_ts_tree_root_node"] + pub fn ts_tree_root_node(arg1: *const TSTree) -> TSNode; +} +extern "C" { + #[link_name = "\u{1}_ts_tree_edit"] + pub fn ts_tree_edit(arg1: *mut TSTree, arg2: *const TSInputEdit); +} +extern "C" { + #[link_name = "\u{1}_ts_tree_get_changed_ranges"] + pub fn ts_tree_get_changed_ranges( + arg1: *const TSTree, + arg2: *const TSTree, + arg3: *mut u32, + ) -> *mut TSRange; +} +extern "C" { + #[link_name = "\u{1}_ts_tree_print_dot_graph"] + pub fn ts_tree_print_dot_graph(arg1: *const TSTree, arg2: *mut FILE); +} +extern "C" { + #[link_name = "\u{1}_ts_node_start_byte"] + pub fn ts_node_start_byte(arg1: TSNode) -> u32; +} +extern "C" { + #[link_name = "\u{1}_ts_node_start_point"] + pub fn ts_node_start_point(arg1: TSNode) -> TSPoint; +} +extern "C" { + #[link_name = "\u{1}_ts_node_end_byte"] + pub fn ts_node_end_byte(arg1: TSNode) -> u32; +} +extern "C" { + #[link_name = "\u{1}_ts_node_end_point"] + pub fn ts_node_end_point(arg1: TSNode) -> TSPoint; +} +extern "C" { + #[link_name = "\u{1}_ts_node_symbol"] + pub fn ts_node_symbol(arg1: TSNode) -> TSSymbol; +} +extern "C" { + #[link_name = "\u{1}_ts_node_type"] + pub fn ts_node_type(arg1: TSNode) -> *const ::std::os::raw::c_char; +} +extern "C" { + #[link_name = "\u{1}_ts_node_string"] + pub fn ts_node_string(arg1: TSNode) -> *mut ::std::os::raw::c_char; +} +extern "C" { + #[link_name = "\u{1}_ts_node_eq"] + pub fn ts_node_eq(arg1: TSNode, arg2: TSNode) -> bool; +} +extern "C" { + #[link_name = "\u{1}_ts_node_is_null"] + pub fn ts_node_is_null(arg1: TSNode) -> bool; +} +extern "C" { + #[link_name = "\u{1}_ts_node_is_named"] + pub fn ts_node_is_named(arg1: TSNode) -> bool; +} +extern "C" { + #[link_name = "\u{1}_ts_node_is_missing"] + pub fn ts_node_is_missing(arg1: TSNode) -> bool; +} +extern "C" { + #[link_name = "\u{1}_ts_node_has_changes"] + pub fn ts_node_has_changes(arg1: TSNode) -> bool; +} +extern "C" { + #[link_name = "\u{1}_ts_node_has_error"] + pub fn ts_node_has_error(arg1: TSNode) -> bool; +} +extern "C" { + #[link_name = "\u{1}_ts_node_parent"] + pub fn ts_node_parent(arg1: TSNode) -> TSNode; +} +extern "C" { + #[link_name = "\u{1}_ts_node_child"] + pub fn ts_node_child(arg1: TSNode, arg2: u32) -> TSNode; +} +extern "C" { + #[link_name = "\u{1}_ts_node_named_child"] + pub fn ts_node_named_child(arg1: TSNode, arg2: u32) -> TSNode; +} +extern "C" { + #[link_name = "\u{1}_ts_node_child_count"] + pub fn ts_node_child_count(arg1: TSNode) -> u32; +} +extern "C" { + #[link_name = "\u{1}_ts_node_named_child_count"] + pub fn ts_node_named_child_count(arg1: TSNode) -> u32; +} +extern "C" { + #[link_name = "\u{1}_ts_node_next_sibling"] + pub fn ts_node_next_sibling(arg1: TSNode) -> TSNode; +} +extern "C" { + #[link_name = "\u{1}_ts_node_next_named_sibling"] + pub fn ts_node_next_named_sibling(arg1: TSNode) -> TSNode; +} +extern "C" { + #[link_name = "\u{1}_ts_node_prev_sibling"] + pub fn ts_node_prev_sibling(arg1: TSNode) -> TSNode; +} +extern "C" { + #[link_name = "\u{1}_ts_node_prev_named_sibling"] + pub fn ts_node_prev_named_sibling(arg1: TSNode) -> TSNode; +} +extern "C" { + #[link_name = "\u{1}_ts_node_first_child_for_byte"] + pub fn ts_node_first_child_for_byte(arg1: TSNode, arg2: u32) -> TSNode; +} +extern "C" { + #[link_name = "\u{1}_ts_node_first_named_child_for_byte"] + pub fn ts_node_first_named_child_for_byte(arg1: TSNode, arg2: u32) -> TSNode; +} +extern "C" { + #[link_name = "\u{1}_ts_node_descendant_for_byte_range"] + pub fn ts_node_descendant_for_byte_range(arg1: TSNode, arg2: u32, arg3: u32) -> TSNode; +} +extern "C" { + #[link_name = "\u{1}_ts_node_named_descendant_for_byte_range"] + pub fn ts_node_named_descendant_for_byte_range(arg1: TSNode, arg2: u32, arg3: u32) -> TSNode; +} +extern "C" { + #[link_name = "\u{1}_ts_node_descendant_for_point_range"] + pub fn ts_node_descendant_for_point_range(arg1: TSNode, arg2: TSPoint, arg3: TSPoint) + -> TSNode; +} +extern "C" { + #[link_name = "\u{1}_ts_node_named_descendant_for_point_range"] + pub fn ts_node_named_descendant_for_point_range( + arg1: TSNode, + arg2: TSPoint, + arg3: TSPoint, + ) -> TSNode; +} +extern "C" { + #[link_name = "\u{1}_ts_tree_cursor_new"] + pub fn ts_tree_cursor_new(arg1: *const TSTree) -> TSTreeCursor; +} +extern "C" { + #[link_name = "\u{1}_ts_tree_cursor_delete"] + pub fn ts_tree_cursor_delete(arg1: *mut TSTreeCursor); +} +extern "C" { + #[link_name = "\u{1}_ts_tree_cursor_goto_first_child"] + pub fn ts_tree_cursor_goto_first_child(arg1: *mut TSTreeCursor) -> bool; +} +extern "C" { + #[link_name = "\u{1}_ts_tree_cursor_goto_first_child_for_byte"] + pub fn ts_tree_cursor_goto_first_child_for_byte(arg1: *mut TSTreeCursor, arg2: u32) -> i64; +} +extern "C" { + #[link_name = "\u{1}_ts_tree_cursor_goto_next_sibling"] + pub fn ts_tree_cursor_goto_next_sibling(arg1: *mut TSTreeCursor) -> bool; +} +extern "C" { + #[link_name = "\u{1}_ts_tree_cursor_goto_parent"] + pub fn ts_tree_cursor_goto_parent(arg1: *mut TSTreeCursor) -> bool; +} +extern "C" { + #[link_name = "\u{1}_ts_tree_cursor_current_node"] + pub fn ts_tree_cursor_current_node(arg1: *const TSTreeCursor) -> TSNode; +} +extern "C" { + #[link_name = "\u{1}_ts_language_symbol_count"] + pub fn ts_language_symbol_count(arg1: *const TSLanguage) -> u32; +} +extern "C" { + #[link_name = "\u{1}_ts_language_symbol_name"] + pub fn ts_language_symbol_name( + arg1: *const TSLanguage, + arg2: TSSymbol, + ) -> *const ::std::os::raw::c_char; +} +extern "C" { + #[link_name = "\u{1}_ts_language_symbol_type"] + pub fn ts_language_symbol_type(arg1: *const TSLanguage, arg2: TSSymbol) -> TSSymbolType; +} +extern "C" { + #[link_name = "\u{1}_ts_language_version"] + pub fn ts_language_version(arg1: *const TSLanguage) -> u32; +} + +pub const TREE_SITTER_LANGUAGE_VERSION: usize = 8; diff --git a/src/ffi.rs b/src/ffi.rs index 7d1c06e8..323609e0 100644 --- a/src/ffi.rs +++ b/src/ffi.rs @@ -1,333 +1,4 @@ -/* automatically generated by rust-bindgen */ +#![allow(dead_code)] +#![allow(non_upper_case_globals)] -pub type FILE = [u64; 19usize]; -pub type TSSymbol = ::std::os::raw::c_ushort; -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct TSLanguage { - _unused: [u8; 0], -} -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct TSParser { - _unused: [u8; 0], -} -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct TSTree { - _unused: [u8; 0], -} -pub const TSInputEncoding_TSInputEncodingUTF8: TSInputEncoding = 0; -pub const TSInputEncoding_TSInputEncodingUTF16: TSInputEncoding = 1; -pub type TSInputEncoding = u32; -pub const TSSymbolType_TSSymbolTypeRegular: TSSymbolType = 0; -pub const TSSymbolType_TSSymbolTypeAnonymous: TSSymbolType = 1; -pub const TSSymbolType_TSSymbolTypeAuxiliary: TSSymbolType = 2; -pub type TSSymbolType = u32; -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct TSPoint { - pub row: u32, - pub column: u32, -} -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct TSRange { - pub start: TSPoint, - pub end: TSPoint, -} -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct TSInput { - pub payload: *mut ::std::os::raw::c_void, - pub read: ::std::option::Option< - unsafe extern "C" fn(payload: *mut ::std::os::raw::c_void, bytes_read: *mut u32) - -> *const ::std::os::raw::c_char, - >, - pub seek: ::std::option::Option< - unsafe extern "C" fn( - payload: *mut ::std::os::raw::c_void, - byte_index: u32, - position: TSPoint, - ) -> ::std::os::raw::c_int, - >, - pub encoding: TSInputEncoding, -} -pub const TSLogType_TSLogTypeParse: TSLogType = 0; -pub const TSLogType_TSLogTypeLex: TSLogType = 1; -pub type TSLogType = u32; -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct TSLogger { - pub payload: *mut ::std::os::raw::c_void, - pub log: ::std::option::Option< - unsafe extern "C" fn( - payload: *mut ::std::os::raw::c_void, - arg1: TSLogType, - arg2: *const ::std::os::raw::c_char, - ), - >, -} -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct TSInputEdit { - pub start_byte: u32, - pub old_end_byte: u32, - pub new_end_byte: u32, - pub start_point: TSPoint, - pub old_end_point: TSPoint, - pub new_end_point: TSPoint, -} -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct TSNode { - pub context: [u32; 4usize], - pub id: *const ::std::os::raw::c_void, - pub tree: *const ::std::os::raw::c_void, -} -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct TSTreeCursor { - pub context: [u32; 2usize], - pub id: *const ::std::os::raw::c_void, - pub tree: *const ::std::os::raw::c_void, -} -extern "C" { - #[link_name = "\u{1}_ts_parser_new"] - pub fn ts_parser_new() -> *mut TSParser; -} -extern "C" { - #[link_name = "\u{1}_ts_parser_delete"] - pub fn ts_parser_delete(arg1: *mut TSParser); -} -extern "C" { - #[link_name = "\u{1}_ts_parser_language"] - pub fn ts_parser_language(arg1: *const TSParser) -> *const TSLanguage; -} -extern "C" { - #[link_name = "\u{1}_ts_parser_set_language"] - pub fn ts_parser_set_language(arg1: *mut TSParser, arg2: *const TSLanguage) -> bool; -} -extern "C" { - #[link_name = "\u{1}_ts_parser_logger"] - pub fn ts_parser_logger(arg1: *const TSParser) -> TSLogger; -} -extern "C" { - #[link_name = "\u{1}_ts_parser_set_logger"] - pub fn ts_parser_set_logger(arg1: *mut TSParser, arg2: TSLogger); -} -extern "C" { - #[link_name = "\u{1}_ts_parser_print_dot_graphs"] - pub fn ts_parser_print_dot_graphs(arg1: *mut TSParser, arg2: *mut FILE); -} -extern "C" { - #[link_name = "\u{1}_ts_parser_halt_on_error"] - pub fn ts_parser_halt_on_error(arg1: *mut TSParser, arg2: bool); -} -extern "C" { - #[link_name = "\u{1}_ts_parser_parse"] - pub fn ts_parser_parse(arg1: *mut TSParser, arg2: *const TSTree, arg3: TSInput) -> *mut TSTree; -} -extern "C" { - #[link_name = "\u{1}_ts_parser_parse_string"] - pub fn ts_parser_parse_string( - arg1: *mut TSParser, - arg2: *const TSTree, - arg3: *const ::std::os::raw::c_char, - arg4: u32, - ) -> *mut TSTree; -} -extern "C" { - #[link_name = "\u{1}_ts_tree_copy"] - pub fn ts_tree_copy(arg1: *const TSTree) -> *mut TSTree; -} -extern "C" { - #[link_name = "\u{1}_ts_tree_delete"] - pub fn ts_tree_delete(arg1: *mut TSTree); -} -extern "C" { - #[link_name = "\u{1}_ts_tree_root_node"] - pub fn ts_tree_root_node(arg1: *const TSTree) -> TSNode; -} -extern "C" { - #[link_name = "\u{1}_ts_tree_edit"] - pub fn ts_tree_edit(arg1: *mut TSTree, arg2: *const TSInputEdit); -} -extern "C" { - #[link_name = "\u{1}_ts_tree_get_changed_ranges"] - pub fn ts_tree_get_changed_ranges( - arg1: *const TSTree, - arg2: *const TSTree, - arg3: *mut u32, - ) -> *mut TSRange; -} -extern "C" { - #[link_name = "\u{1}_ts_tree_print_dot_graph"] - pub fn ts_tree_print_dot_graph(arg1: *const TSTree, arg2: *mut FILE); -} -extern "C" { - #[link_name = "\u{1}_ts_node_start_byte"] - pub fn ts_node_start_byte(arg1: TSNode) -> u32; -} -extern "C" { - #[link_name = "\u{1}_ts_node_start_point"] - pub fn ts_node_start_point(arg1: TSNode) -> TSPoint; -} -extern "C" { - #[link_name = "\u{1}_ts_node_end_byte"] - pub fn ts_node_end_byte(arg1: TSNode) -> u32; -} -extern "C" { - #[link_name = "\u{1}_ts_node_end_point"] - pub fn ts_node_end_point(arg1: TSNode) -> TSPoint; -} -extern "C" { - #[link_name = "\u{1}_ts_node_symbol"] - pub fn ts_node_symbol(arg1: TSNode) -> TSSymbol; -} -extern "C" { - #[link_name = "\u{1}_ts_node_type"] - pub fn ts_node_type(arg1: TSNode) -> *const ::std::os::raw::c_char; -} -extern "C" { - #[link_name = "\u{1}_ts_node_string"] - pub fn ts_node_string(arg1: TSNode) -> *mut ::std::os::raw::c_char; -} -extern "C" { - #[link_name = "\u{1}_ts_node_eq"] - pub fn ts_node_eq(arg1: TSNode, arg2: TSNode) -> bool; -} -extern "C" { - #[link_name = "\u{1}_ts_node_is_null"] - pub fn ts_node_is_null(arg1: TSNode) -> bool; -} -extern "C" { - #[link_name = "\u{1}_ts_node_is_named"] - pub fn ts_node_is_named(arg1: TSNode) -> bool; -} -extern "C" { - #[link_name = "\u{1}_ts_node_is_missing"] - pub fn ts_node_is_missing(arg1: TSNode) -> bool; -} -extern "C" { - #[link_name = "\u{1}_ts_node_has_changes"] - pub fn ts_node_has_changes(arg1: TSNode) -> bool; -} -extern "C" { - #[link_name = "\u{1}_ts_node_has_error"] - pub fn ts_node_has_error(arg1: TSNode) -> bool; -} -extern "C" { - #[link_name = "\u{1}_ts_node_parent"] - pub fn ts_node_parent(arg1: TSNode) -> TSNode; -} -extern "C" { - #[link_name = "\u{1}_ts_node_child"] - pub fn ts_node_child(arg1: TSNode, arg2: u32) -> TSNode; -} -extern "C" { - #[link_name = "\u{1}_ts_node_named_child"] - pub fn ts_node_named_child(arg1: TSNode, arg2: u32) -> TSNode; -} -extern "C" { - #[link_name = "\u{1}_ts_node_child_count"] - pub fn ts_node_child_count(arg1: TSNode) -> u32; -} -extern "C" { - #[link_name = "\u{1}_ts_node_named_child_count"] - pub fn ts_node_named_child_count(arg1: TSNode) -> u32; -} -extern "C" { - #[link_name = "\u{1}_ts_node_next_sibling"] - pub fn ts_node_next_sibling(arg1: TSNode) -> TSNode; -} -extern "C" { - #[link_name = "\u{1}_ts_node_next_named_sibling"] - pub fn ts_node_next_named_sibling(arg1: TSNode) -> TSNode; -} -extern "C" { - #[link_name = "\u{1}_ts_node_prev_sibling"] - pub fn ts_node_prev_sibling(arg1: TSNode) -> TSNode; -} -extern "C" { - #[link_name = "\u{1}_ts_node_prev_named_sibling"] - pub fn ts_node_prev_named_sibling(arg1: TSNode) -> TSNode; -} -extern "C" { - #[link_name = "\u{1}_ts_node_first_child_for_byte"] - pub fn ts_node_first_child_for_byte(arg1: TSNode, arg2: u32) -> TSNode; -} -extern "C" { - #[link_name = "\u{1}_ts_node_first_named_child_for_byte"] - pub fn ts_node_first_named_child_for_byte(arg1: TSNode, arg2: u32) -> TSNode; -} -extern "C" { - #[link_name = "\u{1}_ts_node_descendant_for_byte_range"] - pub fn ts_node_descendant_for_byte_range(arg1: TSNode, arg2: u32, arg3: u32) -> TSNode; -} -extern "C" { - #[link_name = "\u{1}_ts_node_named_descendant_for_byte_range"] - pub fn ts_node_named_descendant_for_byte_range(arg1: TSNode, arg2: u32, arg3: u32) -> TSNode; -} -extern "C" { - #[link_name = "\u{1}_ts_node_descendant_for_point_range"] - pub fn ts_node_descendant_for_point_range(arg1: TSNode, arg2: TSPoint, arg3: TSPoint) - -> TSNode; -} -extern "C" { - #[link_name = "\u{1}_ts_node_named_descendant_for_point_range"] - pub fn ts_node_named_descendant_for_point_range( - arg1: TSNode, - arg2: TSPoint, - arg3: TSPoint, - ) -> TSNode; -} -extern "C" { - #[link_name = "\u{1}_ts_tree_cursor_new"] - pub fn ts_tree_cursor_new(arg1: *const TSTree) -> TSTreeCursor; -} -extern "C" { - #[link_name = "\u{1}_ts_tree_cursor_delete"] - pub fn ts_tree_cursor_delete(arg1: *mut TSTreeCursor); -} -extern "C" { - #[link_name = "\u{1}_ts_tree_cursor_goto_first_child"] - pub fn ts_tree_cursor_goto_first_child(arg1: *mut TSTreeCursor) -> bool; -} -extern "C" { - #[link_name = "\u{1}_ts_tree_cursor_goto_first_child_for_byte"] - pub fn ts_tree_cursor_goto_first_child_for_byte(arg1: *mut TSTreeCursor, arg2: u32) -> i64; -} -extern "C" { - #[link_name = "\u{1}_ts_tree_cursor_goto_next_sibling"] - pub fn ts_tree_cursor_goto_next_sibling(arg1: *mut TSTreeCursor) -> bool; -} -extern "C" { - #[link_name = "\u{1}_ts_tree_cursor_goto_parent"] - pub fn ts_tree_cursor_goto_parent(arg1: *mut TSTreeCursor) -> bool; -} -extern "C" { - #[link_name = "\u{1}_ts_tree_cursor_current_node"] - pub fn ts_tree_cursor_current_node(arg1: *const TSTreeCursor) -> TSNode; -} -extern "C" { - #[link_name = "\u{1}_ts_language_symbol_count"] - pub fn ts_language_symbol_count(arg1: *const TSLanguage) -> u32; -} -extern "C" { - #[link_name = "\u{1}_ts_language_symbol_name"] - pub fn ts_language_symbol_name( - arg1: *const TSLanguage, - arg2: TSSymbol, - ) -> *const ::std::os::raw::c_char; -} -extern "C" { - #[link_name = "\u{1}_ts_language_symbol_type"] - pub fn ts_language_symbol_type(arg1: *const TSLanguage, arg2: TSSymbol) -> TSSymbolType; -} -extern "C" { - #[link_name = "\u{1}_ts_language_version"] - pub fn ts_language_version(arg1: *const TSLanguage) -> u32; -} - -pub const TREE_SITTER_LANGUAGE_VERSION: usize = 8; +include!("./bindings.rs"); From b1ff399960cb4a72fe9a4323ecfc9b633c35e545 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 17 May 2018 18:02:01 -0700 Subject: [PATCH 08/58] :arrow_up: tree-sitter for warning fixes --- build.rs | 21 +++++++++++---------- vendor/tree-sitter | 2 +- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/build.rs b/build.rs index 5fa5d408..8736b645 100644 --- a/build.rs +++ b/build.rs @@ -4,14 +4,15 @@ use std::env; use std::path::Path; fn main() { + let mut config = cc::Build::new(); let root_path = Path::new("vendor/tree-sitter"); - let mut config = cc::Build::new(); - config.flag_if_supported("-std=c99"); - config.flag_if_supported("-Wno-unused-parameter"); - config.include(root_path.join(Path::new("src"))); - config.include(root_path.join(Path::new("include"))); - config.include(root_path.join(Path::new("externals/utf8proc"))); + config + .flag("-std=c99") + .flag("-Wno-unused-parameter") + .include(root_path.join(Path::new("src"))) + .include(root_path.join(Path::new("include"))) + .include(root_path.join(Path::new("externals/utf8proc"))); let source_filenames = [ "get_changed_ranges.c", @@ -36,10 +37,10 @@ fn main() { if env::var("RUST_TREE_SITTER_TEST").is_ok() { let parser_dir = Path::new("fixtures/tree-sitter-rust/src"); - config.flag_if_supported("-Wno-typedef-redefinition"); - config.file(parser_dir.join("parser.c")); - config.file(parser_dir.join("scanner.c")); + config + .file(parser_dir.join("parser.c")) + .file(parser_dir.join("scanner.c")); } - config.compile("treesitter") + config.compile("treesitter_ffi"); } diff --git a/vendor/tree-sitter b/vendor/tree-sitter index 5ec3769c..3c01382b 160000 --- a/vendor/tree-sitter +++ b/vendor/tree-sitter @@ -1 +1 @@ -Subproject commit 5ec3769cb4c9acfda64f80d7c14abce939e8b4c5 +Subproject commit 3c01382b95364ce40f0cf9856865a30af77f9690 From 13e26b5007b19f2f98584adf594b478f2cbb9175 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 17 May 2018 18:08:44 -0700 Subject: [PATCH 09/58] Try a static flag --- build.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/build.rs b/build.rs index 8736b645..b7433f54 100644 --- a/build.rs +++ b/build.rs @@ -10,6 +10,7 @@ fn main() { config .flag("-std=c99") .flag("-Wno-unused-parameter") + .static_flag(true) .include(root_path.join(Path::new("src"))) .include(root_path.join(Path::new("include"))) .include(root_path.join(Path::new("externals/utf8proc"))); From 29dfa0550413cecb9f2fb13798e60f95522bb0ba Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 17 May 2018 19:40:06 -0700 Subject: [PATCH 10/58] Try clang --- .travis.yml | 12 +++++++++++- build.rs | 1 - 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 10fcfe94..5b99d596 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,11 +4,21 @@ rust: - stable env: - - RUST_TREE_SITTER_TEST=1 + - CC=clang-3.6 RUST_TREE_SITTER_TEST=1 before_install: - ./script/fetch-test-fixtures.sh +compiler: clang-3.6 + +addons: + apt: + sources: + - llvm-toolchain-precise-3.6 + - ubuntu-toolchain-r-test + packages: + - clang-3.6 + branches: only: - master diff --git a/build.rs b/build.rs index b7433f54..8736b645 100644 --- a/build.rs +++ b/build.rs @@ -10,7 +10,6 @@ fn main() { config .flag("-std=c99") .flag("-Wno-unused-parameter") - .static_flag(true) .include(root_path.join(Path::new("src"))) .include(root_path.join(Path::new("include"))) .include(root_path.join(Path::new("externals/utf8proc"))); From e61edf539824631b4e59a8d8ed022f7a065cf95a Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 18 May 2018 09:30:00 -0700 Subject: [PATCH 11/58] Don't perform platform-specific name mangling on C functions for bindings --- script/bindgen.sh | 1 + src/bindings.rs | 57 +---------------------------------------------- 2 files changed, 2 insertions(+), 56 deletions(-) diff --git a/script/bindgen.sh b/script/bindgen.sh index 1b9008b2..699f0339 100755 --- a/script/bindgen.sh +++ b/script/bindgen.sh @@ -8,6 +8,7 @@ bindgen \ --whitelist-type '^TS.*' \ --whitelist-function '^ts_.*' \ --opaque-type FILE \ + --distrust-clang-mangling \ $header_path > $output_path echo "" >> $output_path diff --git a/src/bindings.rs b/src/bindings.rs index 7d1c06e8..1ab49bde 100644 --- a/src/bindings.rs +++ b/src/bindings.rs @@ -1,7 +1,7 @@ /* automatically generated by rust-bindgen */ pub type FILE = [u64; 19usize]; -pub type TSSymbol = ::std::os::raw::c_ushort; +pub type TSSymbol = u16; #[repr(C)] #[derive(Debug, Copy, Clone)] pub struct TSLanguage { @@ -93,43 +93,33 @@ pub struct TSTreeCursor { pub tree: *const ::std::os::raw::c_void, } extern "C" { - #[link_name = "\u{1}_ts_parser_new"] pub fn ts_parser_new() -> *mut TSParser; } extern "C" { - #[link_name = "\u{1}_ts_parser_delete"] pub fn ts_parser_delete(arg1: *mut TSParser); } extern "C" { - #[link_name = "\u{1}_ts_parser_language"] pub fn ts_parser_language(arg1: *const TSParser) -> *const TSLanguage; } extern "C" { - #[link_name = "\u{1}_ts_parser_set_language"] pub fn ts_parser_set_language(arg1: *mut TSParser, arg2: *const TSLanguage) -> bool; } extern "C" { - #[link_name = "\u{1}_ts_parser_logger"] pub fn ts_parser_logger(arg1: *const TSParser) -> TSLogger; } extern "C" { - #[link_name = "\u{1}_ts_parser_set_logger"] pub fn ts_parser_set_logger(arg1: *mut TSParser, arg2: TSLogger); } extern "C" { - #[link_name = "\u{1}_ts_parser_print_dot_graphs"] pub fn ts_parser_print_dot_graphs(arg1: *mut TSParser, arg2: *mut FILE); } extern "C" { - #[link_name = "\u{1}_ts_parser_halt_on_error"] pub fn ts_parser_halt_on_error(arg1: *mut TSParser, arg2: bool); } extern "C" { - #[link_name = "\u{1}_ts_parser_parse"] pub fn ts_parser_parse(arg1: *mut TSParser, arg2: *const TSTree, arg3: TSInput) -> *mut TSTree; } extern "C" { - #[link_name = "\u{1}_ts_parser_parse_string"] pub fn ts_parser_parse_string( arg1: *mut TSParser, arg2: *const TSTree, @@ -138,23 +128,18 @@ extern "C" { ) -> *mut TSTree; } extern "C" { - #[link_name = "\u{1}_ts_tree_copy"] pub fn ts_tree_copy(arg1: *const TSTree) -> *mut TSTree; } extern "C" { - #[link_name = "\u{1}_ts_tree_delete"] pub fn ts_tree_delete(arg1: *mut TSTree); } extern "C" { - #[link_name = "\u{1}_ts_tree_root_node"] pub fn ts_tree_root_node(arg1: *const TSTree) -> TSNode; } extern "C" { - #[link_name = "\u{1}_ts_tree_edit"] pub fn ts_tree_edit(arg1: *mut TSTree, arg2: *const TSInputEdit); } extern "C" { - #[link_name = "\u{1}_ts_tree_get_changed_ranges"] pub fn ts_tree_get_changed_ranges( arg1: *const TSTree, arg2: *const TSTree, @@ -162,120 +147,91 @@ extern "C" { ) -> *mut TSRange; } extern "C" { - #[link_name = "\u{1}_ts_tree_print_dot_graph"] pub fn ts_tree_print_dot_graph(arg1: *const TSTree, arg2: *mut FILE); } extern "C" { - #[link_name = "\u{1}_ts_node_start_byte"] pub fn ts_node_start_byte(arg1: TSNode) -> u32; } extern "C" { - #[link_name = "\u{1}_ts_node_start_point"] pub fn ts_node_start_point(arg1: TSNode) -> TSPoint; } extern "C" { - #[link_name = "\u{1}_ts_node_end_byte"] pub fn ts_node_end_byte(arg1: TSNode) -> u32; } extern "C" { - #[link_name = "\u{1}_ts_node_end_point"] pub fn ts_node_end_point(arg1: TSNode) -> TSPoint; } extern "C" { - #[link_name = "\u{1}_ts_node_symbol"] pub fn ts_node_symbol(arg1: TSNode) -> TSSymbol; } extern "C" { - #[link_name = "\u{1}_ts_node_type"] pub fn ts_node_type(arg1: TSNode) -> *const ::std::os::raw::c_char; } extern "C" { - #[link_name = "\u{1}_ts_node_string"] pub fn ts_node_string(arg1: TSNode) -> *mut ::std::os::raw::c_char; } extern "C" { - #[link_name = "\u{1}_ts_node_eq"] pub fn ts_node_eq(arg1: TSNode, arg2: TSNode) -> bool; } extern "C" { - #[link_name = "\u{1}_ts_node_is_null"] pub fn ts_node_is_null(arg1: TSNode) -> bool; } extern "C" { - #[link_name = "\u{1}_ts_node_is_named"] pub fn ts_node_is_named(arg1: TSNode) -> bool; } extern "C" { - #[link_name = "\u{1}_ts_node_is_missing"] pub fn ts_node_is_missing(arg1: TSNode) -> bool; } extern "C" { - #[link_name = "\u{1}_ts_node_has_changes"] pub fn ts_node_has_changes(arg1: TSNode) -> bool; } extern "C" { - #[link_name = "\u{1}_ts_node_has_error"] pub fn ts_node_has_error(arg1: TSNode) -> bool; } extern "C" { - #[link_name = "\u{1}_ts_node_parent"] pub fn ts_node_parent(arg1: TSNode) -> TSNode; } extern "C" { - #[link_name = "\u{1}_ts_node_child"] pub fn ts_node_child(arg1: TSNode, arg2: u32) -> TSNode; } extern "C" { - #[link_name = "\u{1}_ts_node_named_child"] pub fn ts_node_named_child(arg1: TSNode, arg2: u32) -> TSNode; } extern "C" { - #[link_name = "\u{1}_ts_node_child_count"] pub fn ts_node_child_count(arg1: TSNode) -> u32; } extern "C" { - #[link_name = "\u{1}_ts_node_named_child_count"] pub fn ts_node_named_child_count(arg1: TSNode) -> u32; } extern "C" { - #[link_name = "\u{1}_ts_node_next_sibling"] pub fn ts_node_next_sibling(arg1: TSNode) -> TSNode; } extern "C" { - #[link_name = "\u{1}_ts_node_next_named_sibling"] pub fn ts_node_next_named_sibling(arg1: TSNode) -> TSNode; } extern "C" { - #[link_name = "\u{1}_ts_node_prev_sibling"] pub fn ts_node_prev_sibling(arg1: TSNode) -> TSNode; } extern "C" { - #[link_name = "\u{1}_ts_node_prev_named_sibling"] pub fn ts_node_prev_named_sibling(arg1: TSNode) -> TSNode; } extern "C" { - #[link_name = "\u{1}_ts_node_first_child_for_byte"] pub fn ts_node_first_child_for_byte(arg1: TSNode, arg2: u32) -> TSNode; } extern "C" { - #[link_name = "\u{1}_ts_node_first_named_child_for_byte"] pub fn ts_node_first_named_child_for_byte(arg1: TSNode, arg2: u32) -> TSNode; } extern "C" { - #[link_name = "\u{1}_ts_node_descendant_for_byte_range"] pub fn ts_node_descendant_for_byte_range(arg1: TSNode, arg2: u32, arg3: u32) -> TSNode; } extern "C" { - #[link_name = "\u{1}_ts_node_named_descendant_for_byte_range"] pub fn ts_node_named_descendant_for_byte_range(arg1: TSNode, arg2: u32, arg3: u32) -> TSNode; } extern "C" { - #[link_name = "\u{1}_ts_node_descendant_for_point_range"] pub fn ts_node_descendant_for_point_range(arg1: TSNode, arg2: TSPoint, arg3: TSPoint) -> TSNode; } extern "C" { - #[link_name = "\u{1}_ts_node_named_descendant_for_point_range"] pub fn ts_node_named_descendant_for_point_range( arg1: TSNode, arg2: TSPoint, @@ -283,50 +239,39 @@ extern "C" { ) -> TSNode; } extern "C" { - #[link_name = "\u{1}_ts_tree_cursor_new"] pub fn ts_tree_cursor_new(arg1: *const TSTree) -> TSTreeCursor; } extern "C" { - #[link_name = "\u{1}_ts_tree_cursor_delete"] pub fn ts_tree_cursor_delete(arg1: *mut TSTreeCursor); } extern "C" { - #[link_name = "\u{1}_ts_tree_cursor_goto_first_child"] pub fn ts_tree_cursor_goto_first_child(arg1: *mut TSTreeCursor) -> bool; } extern "C" { - #[link_name = "\u{1}_ts_tree_cursor_goto_first_child_for_byte"] pub fn ts_tree_cursor_goto_first_child_for_byte(arg1: *mut TSTreeCursor, arg2: u32) -> i64; } extern "C" { - #[link_name = "\u{1}_ts_tree_cursor_goto_next_sibling"] pub fn ts_tree_cursor_goto_next_sibling(arg1: *mut TSTreeCursor) -> bool; } extern "C" { - #[link_name = "\u{1}_ts_tree_cursor_goto_parent"] pub fn ts_tree_cursor_goto_parent(arg1: *mut TSTreeCursor) -> bool; } extern "C" { - #[link_name = "\u{1}_ts_tree_cursor_current_node"] pub fn ts_tree_cursor_current_node(arg1: *const TSTreeCursor) -> TSNode; } extern "C" { - #[link_name = "\u{1}_ts_language_symbol_count"] pub fn ts_language_symbol_count(arg1: *const TSLanguage) -> u32; } extern "C" { - #[link_name = "\u{1}_ts_language_symbol_name"] pub fn ts_language_symbol_name( arg1: *const TSLanguage, arg2: TSSymbol, ) -> *const ::std::os::raw::c_char; } extern "C" { - #[link_name = "\u{1}_ts_language_symbol_type"] pub fn ts_language_symbol_type(arg1: *const TSLanguage, arg2: TSSymbol) -> TSSymbolType; } extern "C" { - #[link_name = "\u{1}_ts_language_version"] pub fn ts_language_version(arg1: *const TSLanguage) -> u32; } From 29c0cd3aa4d9e569c0ea2d1b4ea2652e207ca51a Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 18 May 2018 09:48:24 -0700 Subject: [PATCH 12/58] Add appveyor config --- README.md | 1 + appveyor.yml | 24 ++++++++++++++++++++++++ script/fetch-test-fixtures.cmd | 16 ++++++++++++++++ 3 files changed, 41 insertions(+) create mode 100644 appveyor.yml create mode 100755 script/fetch-test-fixtures.cmd diff --git a/README.md b/README.md index 08df0e4e..40f5624f 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,7 @@ Rust Tree-sitter =========================== [![Build Status](https://travis-ci.org/tree-sitter/rust-tree-sitter.svg)](https://travis-ci.org/tree-sitter/rust-tree-sitter) +[![Build status](https://ci.appveyor.com/api/projects/status/d0f6vqq3rflxx3y6/branch/master?svg=true)](https://ci.appveyor.com/project/maxbrunsfeld/rust-tree-sitter/branch/master) Rust bindings to the [Tree-sitter][] parsing library. diff --git a/appveyor.yml b/appveyor.yml new file mode 100644 index 00000000..23fe3d97 --- /dev/null +++ b/appveyor.yml @@ -0,0 +1,24 @@ +environment: + RUST_TREE_SITTER_TEST: true + +build: false + +install: + - appveyor DownloadFile https://win.rustup.rs/ -FileName rustup-init.exe + - rustup-init -yv --default-toolchain stable + - set PATH=%PATH%;%USERPROFILE%\.cargo\bin + - rustc -vV + - cargo -vV + - script\fetch-test-fixtures.cmd + +test_script: + - cargo build + - cargo test + +branches: + only: + - master + +cache: + - fixtures + - C:\Users\appveyor\.cargo diff --git a/script/fetch-test-fixtures.cmd b/script/fetch-test-fixtures.cmd new file mode 100755 index 00000000..33543961 --- /dev/null +++ b/script/fetch-test-fixtures.cmd @@ -0,0 +1,16 @@ +@Echo off +SETLOCAL + +Set grammar_dir=fixtures\tree-sitter-rust +Set grammar_url=https://github.com/tree-sitter/tree-sitter-rust + +@IF NOT EXIST %grammar_dir% ( + git clone %grammar_url% %grammar_dir% --depth=1 +) + +pushd %grammar_dir% +git fetch origin master --depth=1 +git reset --hard origin/master +popd + +ENDLOCAL From 8d485857e10d90f76c344811a2da645ddfb74bd2 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 18 May 2018 10:01:37 -0700 Subject: [PATCH 13/58] Tweak build script for windows --- build.rs | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/build.rs b/build.rs index 8736b645..c1e768ff 100644 --- a/build.rs +++ b/build.rs @@ -1,18 +1,18 @@ extern crate cc; use std::env; -use std::path::Path; +use std::path::PathBuf; fn main() { let mut config = cc::Build::new(); - let root_path = Path::new("vendor/tree-sitter"); + let root_path: PathBuf = ["vendor", "tree-sitter"].iter().collect(); config - .flag("-std=c99") - .flag("-Wno-unused-parameter") - .include(root_path.join(Path::new("src"))) - .include(root_path.join(Path::new("include"))) - .include(root_path.join(Path::new("externals/utf8proc"))); + .flag_if_supported("-std=c99") + .flag_if_supported("-Wno-unused-parameter") + .include(root_path.join("src")) + .include(root_path.join("include")) + .include(root_path.join("externals").join("utf8proc")); let source_filenames = [ "get_changed_ranges.c", @@ -29,18 +29,19 @@ fn main() { config.files(source_filenames.iter().map(|source_filename| { root_path - .join(Path::new(&"src/runtime")) - .join(Path::new(&source_filename)) + .join("src") + .join("runtime") + .join(&source_filename) })); - config.file(root_path.join(Path::new("externals/utf8proc/utf8proc.c"))); + config.file(root_path.join("externals").join("utf8proc").join("utf8proc.c")); if env::var("RUST_TREE_SITTER_TEST").is_ok() { - let parser_dir = Path::new("fixtures/tree-sitter-rust/src"); + let parser_dir: PathBuf = ["fixtures", "tree-sitter-rust", "src"].iter().collect(); config .file(parser_dir.join("parser.c")) .file(parser_dir.join("scanner.c")); } - config.compile("treesitter_ffi"); + config.compile("tree-sitter-runtime"); } From 7748f8e1687042fc477890378fc653c152bc2b31 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 18 May 2018 10:16:28 -0700 Subject: [PATCH 14/58] Fetch submodules on appveyor --- appveyor.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/appveyor.yml b/appveyor.yml index 23fe3d97..22c8b96e 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -4,11 +4,14 @@ environment: build: false install: + - git submodule update --init --recursive + - appveyor DownloadFile https://win.rustup.rs/ -FileName rustup-init.exe - rustup-init -yv --default-toolchain stable - set PATH=%PATH%;%USERPROFILE%\.cargo\bin - rustc -vV - cargo -vV + - script\fetch-test-fixtures.cmd test_script: From 654789f92534b4fd6d59006a13353edc923da1cb Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 18 May 2018 10:27:36 -0700 Subject: [PATCH 15/58] Use UTF8PROC_STATIC macro --- build.rs | 1 + vendor/tree-sitter | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/build.rs b/build.rs index c1e768ff..7d9ee83e 100644 --- a/build.rs +++ b/build.rs @@ -8,6 +8,7 @@ fn main() { let root_path: PathBuf = ["vendor", "tree-sitter"].iter().collect(); config + .define("UTF8PROC_STATIC", "") .flag_if_supported("-std=c99") .flag_if_supported("-Wno-unused-parameter") .include(root_path.join("src")) diff --git a/vendor/tree-sitter b/vendor/tree-sitter index 3c01382b..9c1e82a7 160000 --- a/vendor/tree-sitter +++ b/vendor/tree-sitter @@ -1 +1 @@ -Subproject commit 3c01382b95364ce40f0cf9856865a30af77f9690 +Subproject commit 9c1e82a7eac97767cee0469faa2722fd5753b065 From 993bfea669b1ba49fa4a37b11abd82c5206f0209 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 18 May 2018 10:39:00 -0700 Subject: [PATCH 16/58] Add missing source file --- build.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/build.rs b/build.rs index 7d9ee83e..2843c758 100644 --- a/build.rs +++ b/build.rs @@ -22,6 +22,7 @@ fn main() { "node.c", "parser.c", "stack.c", + "string_input.c", "subtree.c", "tree_cursor.c", "tree.c", From 4603542747743e0f0bb1361a8cdb3d4abbb089b0 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 18 May 2018 10:44:14 -0700 Subject: [PATCH 17/58] Add more public methods and tests --- src/lib.rs | 134 +++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 124 insertions(+), 10 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index fa1db0f9..ef53e4de 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -253,15 +253,27 @@ impl<'a> Node<'a> { } } - pub fn name(&self) -> &'static str { + pub fn kind(&self) -> &'static str { unsafe { CStr::from_ptr(ffi::ts_node_type(self.0)) }.to_str().unwrap() } - pub fn start_index(&self) -> u32 { + pub fn is_named(&self) -> bool { + unsafe { ffi::ts_node_is_named(self.0) } + } + + pub fn has_changes(&self) -> bool { + unsafe { ffi::ts_node_has_changes(self.0) } + } + + pub fn has_error(&self) -> bool { + unsafe { ffi::ts_node_has_error(self.0) } + } + + pub fn start_byte(&self) -> u32 { unsafe { ffi::ts_node_start_byte(self.0) } } - pub fn end_index(&self) -> u32 { + pub fn end_byte(&self) -> u32 { unsafe { ffi::ts_node_end_byte(self.0) } } @@ -289,10 +301,34 @@ impl<'a> Node<'a> { unsafe { ffi::ts_node_child_count(self.0) } } + pub fn named_child(&self, i: u32) -> Option { + Self::new(unsafe { ffi::ts_node_named_child(self.0, i) }) + } + + pub fn named_child_count(&self) -> u32 { + unsafe { ffi::ts_node_named_child_count(self.0) } + } + pub fn parent(&self) -> Option { Self::new(unsafe { ffi::ts_node_parent(self.0) }) } + pub fn next_sibling(&self) -> Option { + Self::new(unsafe { ffi::ts_node_next_sibling(self.0) }) + } + + pub fn prev_sibling(&self) -> Option { + Self::new(unsafe { ffi::ts_node_prev_sibling(self.0) }) + } + + pub fn next_named_sibling(&self) -> Option { + Self::new(unsafe { ffi::ts_node_next_named_sibling(self.0) }) + } + + pub fn prev_named_sibling(&self) -> Option { + Self::new(unsafe { ffi::ts_node_prev_named_sibling(self.0) }) + } + pub fn to_sexp(&self) -> String { let c_string = unsafe { ffi::ts_node_string(self.0) }; let result = unsafe { CStr::from_ptr(c_string) }.to_str().unwrap().to_string(); @@ -304,26 +340,26 @@ impl<'a> Node<'a> { extern "C" { fn free(pointer: *mut c_void); } impl<'a> TreeCursor<'a> { - fn node(&'a self) -> Node<'a> { + pub fn node(&'a self) -> Node<'a> { Node( unsafe { ffi::ts_tree_cursor_current_node(&self.0) }, PhantomData, ) } - fn goto_first_child(&mut self) -> bool { + pub fn goto_first_child(&mut self) -> bool { return unsafe { ffi::ts_tree_cursor_goto_first_child(&mut self.0) }; } - fn goto_parent(&mut self) -> bool { + pub fn goto_parent(&mut self) -> bool { return unsafe { ffi::ts_tree_cursor_goto_parent(&mut self.0) }; } - fn goto_next_sibling(&mut self) -> bool { + pub fn goto_next_sibling(&mut self) -> bool { return unsafe { ffi::ts_tree_cursor_goto_next_sibling(&mut self.0) }; } - fn goto_first_child_for_index(&mut self, index: u32) -> Option { + pub fn goto_first_child_for_index(&mut self, index: u32) -> Option { let result = unsafe { ffi::ts_tree_cursor_goto_first_child_for_byte(&mut self.0, index) }; if result < 0 { None @@ -378,7 +414,7 @@ mod tests { ", None).unwrap(); let root_node = tree.root_node(); - assert_eq!(root_node.name(), "source_file"); + assert_eq!(root_node.kind(), "source_file"); assert_eq!( root_node.to_sexp(), @@ -386,7 +422,7 @@ mod tests { ); let struct_node = root_node.child(0).unwrap(); - assert_eq!(struct_node.name(), "struct_item"); + assert_eq!(struct_node.kind(), "struct_item"); } #[test] @@ -407,4 +443,82 @@ mod tests { assert!(messages.contains(&(LogType::Parse, "reduce sym:struct_item, child_count:3".to_string()))); assert!(messages.contains(&(LogType::Lex, "skip character:' '".to_string()))); } + + #[test] + fn test_tree_cursor() { + let mut parser = Parser::new(); + parser.set_language(rust()); + + let tree = parser.parse_str(" + struct Stuff { + a: A; + b: Option, + } + ", None).unwrap(); + + let mut cursor = tree.walk(); + assert_eq!(cursor.node().kind(), "source_file"); + + assert!(cursor.goto_first_child()); + assert_eq!(cursor.node().kind(), "struct_item"); + + assert!(cursor.goto_first_child()); + assert_eq!(cursor.node().kind(), "struct"); + assert_eq!(cursor.node().is_named(), false); + + assert!(cursor.goto_next_sibling()); + assert_eq!(cursor.node().kind(), "type_identifier"); + assert_eq!(cursor.node().is_named(), true); + + assert!(cursor.goto_next_sibling()); + assert_eq!(cursor.node().kind(), "field_declaration_list"); + assert_eq!(cursor.node().is_named(), true); + } + + #[test] + fn test_custom_utf8_input() { + struct LineBasedInput { + lines: &'static [&'static str], + row: usize, + column: usize, + } + + impl Utf8Input for LineBasedInput { + fn read(&mut self) -> &[u8] { + if self.row < self.lines.len() { + let result = &self.lines[self.row].as_bytes()[self.column..]; + self.row += 1; + self.column = 0; + result + } else { + &[] + } + } + + fn seek(&mut self, _byte: u32, position: Point) { + self.row = position.row as usize; + self.column = position.column as usize; + } + } + + let mut parser = Parser::new(); + parser.set_language(rust()); + + let mut input = LineBasedInput { + lines: &[ + "pub fn main() {", + "}", + ], + row: 0, + column: 0 + }; + + let tree = parser.parse_utf8(&mut input, None).unwrap(); + let root = tree.root_node(); + assert_eq!(root.kind(), "source_file"); + assert_eq!(root.has_error(), false); + + let child = root.child(0).unwrap(); + assert_eq!(child.kind(), "function_item"); + } } From e10a817704c3982b4ed41928b2b504cdbdbaf702 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 18 May 2018 10:55:42 -0700 Subject: [PATCH 18/58] Switch back to default c compiler on travis --- .travis.yml | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/.travis.yml b/.travis.yml index 5b99d596..10fcfe94 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,21 +4,11 @@ rust: - stable env: - - CC=clang-3.6 RUST_TREE_SITTER_TEST=1 + - RUST_TREE_SITTER_TEST=1 before_install: - ./script/fetch-test-fixtures.sh -compiler: clang-3.6 - -addons: - apt: - sources: - - llvm-toolchain-precise-3.6 - - ubuntu-toolchain-r-test - packages: - - clang-3.6 - branches: only: - master From 870dc11f791425f441eb6e84f86332f4a6b1a21a Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 18 May 2018 11:15:37 -0700 Subject: [PATCH 19/58] Implement Eq and Debug for Node --- src/lib.rs | 66 ++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 54 insertions(+), 12 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index ef53e4de..2ecc7341 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,13 +1,11 @@ mod ffi; +use std::fmt; use std::ffi::CStr; use std::marker::PhantomData; use std::os::raw::{c_char, c_int, c_void}; use std::ptr; -#[derive(Clone, Copy)] -pub struct Symbol(ffi::TSSymbol); - pub type Language = *const ffi::TSLanguage; pub trait Utf16Input { @@ -26,13 +24,13 @@ pub enum LogType { Lex, } -#[derive(Clone, Copy, PartialEq, Eq)] +#[derive(Clone, Copy, Debug, PartialEq, Eq)] pub struct Point { pub row: u32, pub column: u32, } -#[derive(Clone, Copy, PartialEq, Eq)] +#[derive(Clone, Copy, Debug, PartialEq, Eq)] pub struct InputEdit { pub start_byte: u32, pub old_end_byte: u32, @@ -63,9 +61,19 @@ impl Parser { } } - pub fn set_language(&mut self, language: Language) { + pub fn set_language(&mut self, language: Language) -> Result<(), String> { unsafe { - ffi::ts_parser_set_language(self.0, language); + let version = ffi::ts_language_version(language) as usize; + if version == ffi::TREE_SITTER_LANGUAGE_VERSION { + ffi::ts_parser_set_language(self.0, language); + Ok(()) + } else { + Err(format!( + "Incompatible language version {}. Expected {}.", + version, + ffi::TREE_SITTER_LANGUAGE_VERSION + )) + } } } @@ -253,6 +261,10 @@ impl<'a> Node<'a> { } } + pub fn kind_id(&self) -> u16 { + unsafe { ffi::ts_node_symbol(self.0) } + } + pub fn kind(&self) -> &'static str { unsafe { CStr::from_ptr(ffi::ts_node_type(self.0)) }.to_str().unwrap() } @@ -330,6 +342,8 @@ impl<'a> Node<'a> { } pub fn to_sexp(&self) -> String { + extern "C" { fn free(pointer: *mut c_void); } + let c_string = unsafe { ffi::ts_node_string(self.0) }; let result = unsafe { CStr::from_ptr(c_string) }.to_str().unwrap().to_string(); unsafe { free(c_string as *mut c_void) }; @@ -337,7 +351,17 @@ impl<'a> Node<'a> { } } -extern "C" { fn free(pointer: *mut c_void); } +impl<'a> PartialEq for Node<'a> { + fn eq(&self, other: &Self) -> bool { + self.0.id == other.0.id + } +} + +impl<'a> fmt::Debug for Node<'a> { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + write!(f, "{{Node {} {} - {}}}", self.kind(), self.start_position(), self.end_position()) + } +} impl<'a> TreeCursor<'a> { pub fn node(&'a self) -> Node<'a> { @@ -375,6 +399,12 @@ impl<'a> Drop for TreeCursor<'a> { } } +impl fmt::Display for Point { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + write!(f, "({}, {})", self.row, self.column) + } +} + impl Into for Point { fn into(self) -> ffi::TSPoint { ffi::TSPoint { @@ -406,7 +436,7 @@ mod tests { #[test] fn test_basic_parsing() { let mut parser = Parser::new(); - parser.set_language(rust()); + parser.set_language(rust()).unwrap(); let tree = parser.parse_str(" struct Stuff {} @@ -428,7 +458,7 @@ mod tests { #[test] fn test_logging() { let mut parser = Parser::new(); - parser.set_language(rust()); + parser.set_language(rust()).unwrap(); let mut messages = Vec::new(); parser.set_logger(Some(&mut |log_type, message| { @@ -447,7 +477,7 @@ mod tests { #[test] fn test_tree_cursor() { let mut parser = Parser::new(); - parser.set_language(rust()); + parser.set_language(rust()).unwrap(); let tree = parser.parse_str(" struct Stuff { @@ -502,7 +532,7 @@ mod tests { } let mut parser = Parser::new(); - parser.set_language(rust()); + parser.set_language(rust()).unwrap(); let mut input = LineBasedInput { lines: &[ @@ -521,4 +551,16 @@ mod tests { let child = root.child(0).unwrap(); assert_eq!(child.kind(), "function_item"); } + + #[test] + fn test_node_equality() { + let mut parser = Parser::new(); + parser.set_language(rust()).unwrap(); + let tree = parser.parse_str("struct A {}", None).unwrap(); + let node1 = tree.root_node(); + let node2 = tree.root_node(); + assert_eq!(node1, node2); + assert_eq!(node1.child(0).unwrap(), node2.child(0).unwrap()); + assert_ne!(node1.child(0).unwrap(), node2); + } } From a27ac49dea32cb296ff4ebdd939c7fa01a3d72e7 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 18 May 2018 11:42:13 -0700 Subject: [PATCH 20/58] Flesh out README --- README.md | 90 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) diff --git a/README.md b/README.md index 40f5624f..43270713 100644 --- a/README.md +++ b/README.md @@ -6,4 +6,94 @@ Rust Tree-sitter Rust bindings to the [Tree-sitter][] parsing library. +### Basic Usage + +First, create a parser: + +```rust +let parser = Parser::new(); +``` + +Then assign a language to the parser. Tree-sitter languages consist of generated C code. To use them from rust, you must declare them as `extern "C"` functions and invoke them with `unsafe`: + +```rust +extern "C" fn tree_sitter_c() -> Language; +extern "C" fn tree_sitter_rust() -> Language; +extern "C" fn tree_sitter_javascript() -> Language; + +parser.set_language(unsafe { tree_sitter_rust() }).unwrap(); +``` + +Now you can parse source code: + +```rust +let source_code = "fn test() {}"; + +let tree = parser.parse_str(source_code, None); +let root_node = tree.root_node(); +assert_eq!(root_node.kind(), "source_file"); +assert_eq!(root_node.start_position().column, 0); +assert_eq!(root_node.end_position().column, 12); +``` + +### Editing + +Once you have a syntax tree, you can update it when your source code changes: + +```rust +let new_source_code = "fn test(a: u32) {}" + +tree.edit(InputEdit { + start_byte: 8, + old_end_byte: 8, + new_end_byte: 14, + start_position: Point::new(0, 8), + old_end_position: Point::new(0, 8), + new_end_position: Point::new(0, 14), +}); +let new_tree = parser.parse_str(new_source_code, Some(tree)); +``` + +### Text Input + + +The code can be provided either as a simple string or by any type that implements Tree-sitter's `Utf8Input` or `Utf16Input` traits: + +```rust +struct LineWiseInput { + lines: &'static [&'static str], + row: usize, + column: usize, +} + +impl tree_sitter::Utf8Input for LineWiseInput { + fn read(&mut self) -> &[u8] { + if self.row < self.lines.len() { + let result = &self.lines[self.row].as_bytes()[self.column..]; + self.row += 1; + self.column = 0; + result + } else { + &[] + } + } + + fn seek(&mut self, _byte: u32, position: Point) { + self.row = position.row as usize; + self.column = position.column as usize; + } +} + +let mut input = LineBasedInput { + lines: &[ + "pub fn main() {", + "}", + ], + row: 0, + column: 0 +}; + +let tree = parser.parse_utf8(&mut input, None).unwrap(); +``` + [tree-sitter]: https://github.com/tree-sitter/tree-sitter From c0b49e99357fbe25d62d800c9da2fd47566e9b31 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 18 May 2018 11:51:46 -0700 Subject: [PATCH 21/58] Fix include globs in package manifest --- Cargo.toml | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index e20d40aa..560d9a71 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,12 +1,19 @@ [package] name = "tree-sitter" +description = "Rust bindings to the Tree-sitter parsing library" version = "0.1.0" authors = ["Max Brunsfeld "] build = "build.rs" -exclude = ["vendor/tree-sitter/**/*"] +license = "MIT" include = [ - "vendor/tree-sitter/src/runtime/*", - "vendor/tree-sitter/externals/utf8proc/utf8proc*" + "/build.rs", + "/Cargo.toml", + "/LICENSE", + "/README.md", + "/src/*", + "/vendor/tree-sitter/externals/utf8proc/utf8proc*", + "/vendor/tree-sitter/include/*", + "/vendor/tree-sitter/src/runtime/*", ] [build-dependencies] From e6d580597d5925f3d43bf01b2101d6e0ca9643fc Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 18 May 2018 12:02:52 -0700 Subject: [PATCH 22/58] Add crates.io badge to README --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 43270713..da6e1a80 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,7 @@ Rust Tree-sitter [![Build Status](https://travis-ci.org/tree-sitter/rust-tree-sitter.svg)](https://travis-ci.org/tree-sitter/rust-tree-sitter) [![Build status](https://ci.appveyor.com/api/projects/status/d0f6vqq3rflxx3y6/branch/master?svg=true)](https://ci.appveyor.com/project/maxbrunsfeld/rust-tree-sitter/branch/master) +[![Crates.io](https://img.shields.io/crates/v/tree-sitter.svg)](https://crates.io/crates/tree-sitter) Rust bindings to the [Tree-sitter][] parsing library. From 819b14070123c4f6c61aa73c72654ce1b97fef16 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 18 May 2018 14:06:49 -0700 Subject: [PATCH 23/58] Make set_logger take a boxed function --- src/lib.rs | 68 ++++++++++++++++++++++++++++++++---------------------- 1 file changed, 41 insertions(+), 27 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 2ecc7341..5ef80f70 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -24,6 +24,8 @@ pub enum LogType { Lex, } +type Logger<'a> = Box; + #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub struct Point { pub row: u32, @@ -44,7 +46,7 @@ pub struct Node<'a>(ffi::TSNode, PhantomData<&'a ()>); pub struct Parser(*mut ffi::TSParser); -pub struct Tree(*mut ffi::TSTree, ffi::TSInputEncoding); +pub struct Tree(*mut ffi::TSTree); pub struct TreeCursor<'a>(ffi::TSTreeCursor, PhantomData<&'a ()>); @@ -77,28 +79,42 @@ impl Parser { } } - pub fn set_logger ()>(&mut self, logger: Option<&mut F>) { - unsafe extern "C" fn log ()>( - payload: *mut c_void, - c_log_type: ffi::TSLogType, - c_message: *const c_char, - ) { - let callback = (payload as *mut F).as_mut().unwrap(); - if let Ok(message) = CStr::from_ptr(c_message).to_str() { - let log_type = if c_log_type == ffi::TSLogType_TSLogTypeParse { - LogType::Parse - } else { - LogType::Lex - }; - callback(log_type, message); - } - }; + pub fn logger(&self) -> Option<&Logger> { + let logger = unsafe { ffi::ts_parser_logger(self.0) }; + unsafe { (logger.payload as *mut Logger).as_ref() } + } + + pub fn set_logger(&mut self, logger: Option) { + let prev_logger = unsafe { ffi::ts_parser_logger(self.0) }; + if !prev_logger.payload.is_null() { + unsafe { Box::from_raw(prev_logger.payload as *mut Logger) }; + } let c_logger; if let Some(logger) = logger { + let container = Box::new(logger); + + unsafe extern "C" fn log( + payload: *mut c_void, + c_log_type: ffi::TSLogType, + c_message: *const c_char, + ) { + let callback = (payload as *mut Logger).as_mut().unwrap(); + if let Ok(message) = CStr::from_ptr(c_message).to_str() { + let log_type = if c_log_type == ffi::TSLogType_TSLogTypeParse { + LogType::Parse + } else { + LogType::Lex + }; + callback(log_type, message); + } + }; + + let raw_container = Box::into_raw(container); + c_logger = ffi::TSLogger { - payload: logger as *mut F as *mut c_void, - log: Some(log::), + payload: raw_container as *mut c_void, + log: Some(log), }; } else { c_logger = ffi::TSLogger { payload: ptr::null_mut(), log: None }; @@ -156,7 +172,7 @@ impl Parser { if new_tree_ptr.is_null() { None } else { - Some(Tree(new_tree_ptr, ffi::TSInputEncoding_TSInputEncodingUTF8)) + Some(Tree(new_tree_ptr)) } } @@ -204,16 +220,14 @@ impl Parser { if new_tree_ptr.is_null() { None } else { - Some(Tree( - new_tree_ptr, - ffi::TSInputEncoding_TSInputEncodingUTF16, - )) + Some(Tree(new_tree_ptr)) } } } impl Drop for Parser { fn drop(&mut self) { + self.set_logger(None); unsafe { ffi::ts_parser_delete(self.0) } } } @@ -248,7 +262,7 @@ impl Drop for Tree { impl Clone for Tree { fn clone(&self) -> Tree { - unsafe { Tree(ffi::ts_tree_copy(self.0), self.1) } + unsafe { Tree(ffi::ts_tree_copy(self.0)) } } } @@ -461,9 +475,9 @@ mod tests { parser.set_language(rust()).unwrap(); let mut messages = Vec::new(); - parser.set_logger(Some(&mut |log_type, message| { + parser.set_logger(Some(Box::new(|log_type, message| { messages.push((log_type, message.to_string())); - })); + }))); parser.parse_str(" struct Stuff {} From 4da669ce8d23cbfaeaba2d2c5969b678779ff0e9 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 18 May 2018 14:27:08 -0700 Subject: [PATCH 24/58] Fix bugs in editing/reparsing --- README.md | 2 +- src/lib.rs | 101 ++++++++++++++++++++++++++++++++++++++++----- vendor/tree-sitter | 2 +- 3 files changed, 92 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index da6e1a80..d0806bbb 100644 --- a/README.md +++ b/README.md @@ -52,7 +52,7 @@ tree.edit(InputEdit { old_end_position: Point::new(0, 8), new_end_position: Point::new(0, 14), }); -let new_tree = parser.parse_str(new_source_code, Some(tree)); +let new_tree = parser.parse_str(new_source_code, Some(&tree)); ``` ### Text Input diff --git a/src/lib.rs b/src/lib.rs index 5ef80f70..0ac1300e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -123,7 +123,7 @@ impl Parser { unsafe { ffi::ts_parser_set_logger(self.0, c_logger) }; } - pub fn parse_str(&mut self, input: &str, old_tree: Option) -> Option { + pub fn parse_str(&mut self, input: &str, old_tree: Option<&Tree>) -> Option { let mut input = FlatInput { bytes: input.as_bytes(), offset: 0}; self.parse_utf8(&mut input, old_tree) } @@ -131,7 +131,7 @@ impl Parser { pub fn parse_utf8( &mut self, input: &mut T, - old_tree: Option, + old_tree: Option<&Tree>, ) -> Option { unsafe extern "C" fn read( payload: *mut c_void, @@ -179,7 +179,7 @@ impl Parser { pub fn parse_utf16( &mut self, input: &mut T, - old_tree: Option, + old_tree: Option<&Tree>, ) -> Option { unsafe extern "C" fn read( payload: *mut c_void, @@ -266,7 +266,7 @@ impl Clone for Tree { } } -impl<'a> Node<'a> { +impl<'tree> Node<'tree> { fn new(node: ffi::TSNode) -> Option { if node.id.is_null() { None @@ -319,7 +319,7 @@ impl<'a> Node<'a> { } } - pub fn child(&self, i: u32) -> Option { + pub fn child(&self, i: u32) -> Option { Self::new(unsafe { ffi::ts_node_child(self.0, i) }) } @@ -327,7 +327,7 @@ impl<'a> Node<'a> { unsafe { ffi::ts_node_child_count(self.0) } } - pub fn named_child(&self, i: u32) -> Option { + pub fn named_child<'a>(&'a self, i: u32) -> Option { Self::new(unsafe { ffi::ts_node_named_child(self.0, i) }) } @@ -335,23 +335,23 @@ impl<'a> Node<'a> { unsafe { ffi::ts_node_named_child_count(self.0) } } - pub fn parent(&self) -> Option { + pub fn parent(&self) -> Option { Self::new(unsafe { ffi::ts_node_parent(self.0) }) } - pub fn next_sibling(&self) -> Option { + pub fn next_sibling(&self) -> Option { Self::new(unsafe { ffi::ts_node_next_sibling(self.0) }) } - pub fn prev_sibling(&self) -> Option { + pub fn prev_sibling(&self) -> Option { Self::new(unsafe { ffi::ts_node_prev_sibling(self.0) }) } - pub fn next_named_sibling(&self) -> Option { + pub fn next_named_sibling(&self) -> Option { Self::new(unsafe { ffi::ts_node_next_named_sibling(self.0) }) } - pub fn prev_named_sibling(&self) -> Option { + pub fn prev_named_sibling(&self) -> Option { Self::new(unsafe { ffi::ts_node_prev_named_sibling(self.0) }) } @@ -413,6 +413,12 @@ impl<'a> Drop for TreeCursor<'a> { } } +impl Point { + pub fn new(row: u32, column: u32) -> Self { + Point { row, column } + } +} + impl fmt::Display for Point { fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { write!(f, "({}, {})", self.row, self.column) @@ -577,4 +583,77 @@ mod tests { assert_eq!(node1.child(0).unwrap(), node2.child(0).unwrap()); assert_ne!(node1.child(0).unwrap(), node2); } + + #[test] + fn test_editing() { + struct SpyInput { + bytes: &'static [u8], + offset: usize, + bytes_read: Vec, + } + + impl Utf8Input for SpyInput { + fn read(&mut self) -> &[u8] { + if self.offset < self.bytes.len() { + let result = &self.bytes[self.offset..self.offset + 1]; + self.bytes_read.extend(result.iter()); + self.offset += 1; + result + } else { + &[] + } + } + + fn seek(&mut self, byte: u32, _position: Point) { + self.offset = byte as usize; + } + } + + let mut input = SpyInput { + bytes: "fn test(a: A, c: C) {}".as_bytes(), + offset: 0, + bytes_read: Vec::new(), + }; + + let mut parser = Parser::new(); + parser.set_language(rust()).unwrap(); + + let mut tree = parser.parse_utf8(&mut input, None).unwrap(); + let parameters_sexp = tree.root_node() + .named_child(0).unwrap() + .named_child(1).unwrap() + .to_sexp(); + assert_eq!( + parameters_sexp, + "(parameters (parameter (identifier) (type_identifier)) (parameter (identifier) (type_identifier)))" + ); + + input.offset = 0; + input.bytes_read.clear(); + input.bytes = "fn test(a: A, b: B, c: C) {}".as_bytes(); + tree.edit(&InputEdit{ + start_byte: 14, + old_end_byte: 14, + new_end_byte: 20, + start_position: Point::new(0, 14), + old_end_position: Point::new(0, 14), + new_end_position: Point::new(0, 20), + }); + + let tree = parser.parse_utf8(&mut input, Some(&tree)).unwrap(); + let parameters_sexp = tree.root_node() + .named_child(0).unwrap() + .named_child(1).unwrap() + .to_sexp(); + assert_eq!( + parameters_sexp, + "(parameters (parameter (identifier) (type_identifier)) (parameter (identifier) (type_identifier)) (parameter (identifier) (type_identifier)))" + ); + + let retokenized_content = String::from_utf8(input.bytes_read).unwrap(); + assert!(retokenized_content.contains("b: B")); + assert!(!retokenized_content.contains("a: A")); + assert!(!retokenized_content.contains("c: C")); + assert!(!retokenized_content.contains("{}")); + } } diff --git a/vendor/tree-sitter b/vendor/tree-sitter index 9c1e82a7..78f28b14 160000 --- a/vendor/tree-sitter +++ b/vendor/tree-sitter @@ -1 +1 @@ -Subproject commit 9c1e82a7eac97767cee0469faa2722fd5753b065 +Subproject commit 78f28b14ce519ba085ab7886c2fc19739f7f7da0 From 45660e7b4e5db579905924717fa4da22f6a1d97d Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 18 May 2018 14:27:08 -0700 Subject: [PATCH 25/58] Make syntax trees implement Send --- src/lib.rs | 107 +++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 84 insertions(+), 23 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 0ac1300e..6084516c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -254,6 +254,14 @@ impl Tree { } } +unsafe impl Send for Tree {} + +impl fmt::Debug for Tree { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + write!(f, "{{Tree {:?}}}", self.root_node()) + } +} + impl Drop for Tree { fn drop(&mut self) { unsafe { ffi::ts_tree_delete(self.0) } @@ -448,6 +456,7 @@ impl<'a> Utf8Input for FlatInput<'a> { #[cfg(test)] mod tests { + use std::thread; use super::*; fn rust() -> Language { unsafe { tree_sitter_rust() } } @@ -586,29 +595,6 @@ mod tests { #[test] fn test_editing() { - struct SpyInput { - bytes: &'static [u8], - offset: usize, - bytes_read: Vec, - } - - impl Utf8Input for SpyInput { - fn read(&mut self) -> &[u8] { - if self.offset < self.bytes.len() { - let result = &self.bytes[self.offset..self.offset + 1]; - self.bytes_read.extend(result.iter()); - self.offset += 1; - result - } else { - &[] - } - } - - fn seek(&mut self, byte: u32, _position: Point) { - self.offset = byte as usize; - } - } - let mut input = SpyInput { bytes: "fn test(a: A, c: C) {}".as_bytes(), offset: 0, @@ -656,4 +642,79 @@ mod tests { assert!(!retokenized_content.contains("c: C")); assert!(!retokenized_content.contains("{}")); } + + #[test] + fn test_parallel_parsing() { + // Parse this source file so that each thread has a non-trivial amount of + // work to do. + let this_file_source = include_str!("lib.rs"); + + let mut parser = Parser::new(); + parser.set_language(rust()).unwrap(); + let tree = parser.parse_str(this_file_source, None).unwrap(); + + let mut parse_threads = Vec::new(); + for thread_id in 1..5 { + let mut tree_clone = tree.clone(); + parse_threads.push(thread::spawn(move || { + + // For each thread, prepend a different number of declarations to the + // source code. + let mut prepend_line_count = 0; + let mut prepended_source = String::new(); + for _ in 0..thread_id { + prepend_line_count += 2; + prepended_source += "struct X {}\n\n"; + } + + tree_clone.edit(&InputEdit{ + start_byte: 0, + old_end_byte: 0, + new_end_byte: prepended_source.len() as u32, + start_position: Point::new(0, 0), + old_end_position: Point::new(0, 0), + new_end_position: Point::new(prepend_line_count, 0), + }); + prepended_source += this_file_source; + + // Reparse using the old tree as a starting point. + let mut parser = Parser::new(); + parser.set_language(rust()).unwrap(); + parser.parse_str(&prepended_source, Some(&tree_clone)).unwrap() + })); + } + + // Check that the trees have the expected relationship to one another. + let trees = parse_threads + .into_iter() + .map(|thread| thread.join().unwrap()); + let child_count_differences = trees + .map(|t| t.root_node().child_count() - tree.root_node().child_count()) + .collect::>(); + + assert_eq!(child_count_differences, &[1, 2, 3, 4]); + } + + struct SpyInput { + bytes: &'static [u8], + offset: usize, + bytes_read: Vec, + } + + impl Utf8Input for SpyInput { + fn read(&mut self) -> &[u8] { + if self.offset < self.bytes.len() { + let result = &self.bytes[self.offset..self.offset + 1]; + self.bytes_read.extend(result.iter()); + self.offset += 1; + result + } else { + &[] + } + } + + fn seek(&mut self, byte: u32, _position: Point) { + self.offset = byte as usize; + } + } } From 0034fce8093374bc5193727c96d45d98b9816a32 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 18 May 2018 15:05:31 -0700 Subject: [PATCH 26/58] Add some fields to the cargo manifest --- Cargo.toml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Cargo.toml b/Cargo.toml index 560d9a71..13c84759 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,6 +5,10 @@ version = "0.1.0" authors = ["Max Brunsfeld "] build = "build.rs" license = "MIT" +readme = "README.md" +keywords = ["incremental", "parsing"] +categories = ["parsing", "text editors", "api bindings"] + include = [ "/build.rs", "/Cargo.toml", From 16a7366ec75f5c03d497a12bb796d883bfd32466 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 18 May 2018 15:06:05 -0700 Subject: [PATCH 27/58] 0.1.1 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 13c84759..12d92923 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "tree-sitter" description = "Rust bindings to the Tree-sitter parsing library" -version = "0.1.0" +version = "0.1.1" authors = ["Max Brunsfeld "] build = "build.rs" license = "MIT" From 5efc28f2f3741e9f3b1ff376be5de2890df80ed0 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 19 Jun 2018 16:19:37 -0700 Subject: [PATCH 28/58] Update to latest tree-sitter API --- README.md | 66 ++++++----- build.rs | 1 - src/bindings.rs | 22 +++- src/lib.rs | 273 +++++++++++++++++++++------------------------ vendor/tree-sitter | 2 +- 5 files changed, 175 insertions(+), 189 deletions(-) diff --git a/README.md b/README.md index d0806bbb..ff7140c5 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,10 @@ Rust bindings to the [Tree-sitter][] parsing library. First, create a parser: ```rust +use tree_sitter::{Parser, Language}; + +// ... + let parser = Parser::new(); ``` @@ -22,16 +26,17 @@ extern "C" fn tree_sitter_c() -> Language; extern "C" fn tree_sitter_rust() -> Language; extern "C" fn tree_sitter_javascript() -> Language; -parser.set_language(unsafe { tree_sitter_rust() }).unwrap(); +let language = unsafe { tree_sitter_rust() }; +parser.set_language(language).unwrap(); ``` Now you can parse source code: ```rust let source_code = "fn test() {}"; - let tree = parser.parse_str(source_code, None); let root_node = tree.root_node(); + assert_eq!(root_node.kind(), "source_file"); assert_eq!(root_node.start_position().column, 0); assert_eq!(root_node.end_position().column, 12); @@ -39,7 +44,7 @@ assert_eq!(root_node.end_position().column, 12); ### Editing -Once you have a syntax tree, you can update it when your source code changes: +Once you have a syntax tree, you can update it when your source code changes. Passing in the previous edited tree makes `parse` run much more quickly: ```rust let new_source_code = "fn test(a: u32) {}" @@ -52,49 +57,42 @@ tree.edit(InputEdit { old_end_position: Point::new(0, 8), new_end_position: Point::new(0, 14), }); + let new_tree = parser.parse_str(new_source_code, Some(&tree)); ``` ### Text Input - -The code can be provided either as a simple string or by any type that implements Tree-sitter's `Utf8Input` or `Utf16Input` traits: +The source code to parse can be provided either as a string or as a function that returns text encoded as either UTF8 or UTF16: ```rust -struct LineWiseInput { - lines: &'static [&'static str], - row: usize, - column: usize, -} +// Store some source code in an array of lines. +let lines = &[ + "pub fn foo() {", + " 1", + "}", +]; -impl tree_sitter::Utf8Input for LineWiseInput { - fn read(&mut self) -> &[u8] { - if self.row < self.lines.len() { - let result = &self.lines[self.row].as_bytes()[self.column..]; - self.row += 1; - self.column = 0; - result +// Parse the source code using a custom callback. The callback is called +// with both a byte offset and a row/column offset. +let tree = parser.parse_utf8(&mut |_byte: u32, position: Point| -> &[u8] { + let row = position.row as usize; + let column = position.column as usize; + if row < lines.len() { + if column < lines[row].as_bytes().len() { + &lines[row].as_bytes()[column..] } else { - &[] + "\n".as_bytes() } + } else { + &[] } +}, None).unwrap(); - fn seek(&mut self, _byte: u32, position: Point) { - self.row = position.row as usize; - self.column = position.column as usize; - } -} - -let mut input = LineBasedInput { - lines: &[ - "pub fn main() {", - "}", - ], - row: 0, - column: 0 -}; - -let tree = parser.parse_utf8(&mut input, None).unwrap(); +assert_eq!( + tree.root_node().to_sexp(), + "(source_file (function_item (visibility_modifier) (identifier) (parameters) (block (number_literal))))" +); ``` [tree-sitter]: https://github.com/tree-sitter/tree-sitter diff --git a/build.rs b/build.rs index 2843c758..7d9ee83e 100644 --- a/build.rs +++ b/build.rs @@ -22,7 +22,6 @@ fn main() { "node.c", "parser.c", "stack.c", - "string_input.c", "subtree.c", "tree_cursor.c", "tree.c", diff --git a/src/bindings.rs b/src/bindings.rs index 1ab49bde..b2d83729 100644 --- a/src/bindings.rs +++ b/src/bindings.rs @@ -41,15 +41,12 @@ pub struct TSRange { pub struct TSInput { pub payload: *mut ::std::os::raw::c_void, pub read: ::std::option::Option< - unsafe extern "C" fn(payload: *mut ::std::os::raw::c_void, bytes_read: *mut u32) - -> *const ::std::os::raw::c_char, - >, - pub seek: ::std::option::Option< unsafe extern "C" fn( payload: *mut ::std::os::raw::c_void, byte_index: u32, position: TSPoint, - ) -> ::std::os::raw::c_int, + bytes_read: *mut u32, + ) -> *const ::std::os::raw::c_char, >, pub encoding: TSInputEncoding, } @@ -127,6 +124,21 @@ extern "C" { arg4: u32, ) -> *mut TSTree; } +extern "C" { + pub fn ts_parser_enabled(arg1: *const TSParser) -> bool; +} +extern "C" { + pub fn ts_parser_set_enabled(arg1: *mut TSParser, arg2: bool); +} +extern "C" { + pub fn ts_parser_operation_limit(arg1: *const TSParser) -> usize; +} +extern "C" { + pub fn ts_parser_set_operation_limit(arg1: *mut TSParser, arg2: usize); +} +extern "C" { + pub fn ts_parser_reset(arg1: *mut TSParser); +} extern "C" { pub fn ts_tree_copy(arg1: *const TSTree) -> *mut TSTree; } diff --git a/src/lib.rs b/src/lib.rs index 6084516c..84d51f04 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3,21 +3,11 @@ mod ffi; use std::fmt; use std::ffi::CStr; use std::marker::PhantomData; -use std::os::raw::{c_char, c_int, c_void}; +use std::os::raw::{c_char, c_void}; use std::ptr; pub type Language = *const ffi::TSLanguage; -pub trait Utf16Input { - fn read(&mut self) -> &[u16]; - fn seek(&mut self, u32, Point); -} - -pub trait Utf8Input { - fn read(&mut self) -> &[u8]; - fn seek(&mut self, u32, Point); -} - #[derive(Debug, PartialEq, Eq)] pub enum LogType { Parse, @@ -50,11 +40,6 @@ pub struct Tree(*mut ffi::TSTree); pub struct TreeCursor<'a>(ffi::TSTreeCursor, PhantomData<&'a ()>); -struct FlatInput<'a> { - bytes: &'a [u8], - offset: usize, -} - impl Parser { pub fn new() -> Parser { unsafe { @@ -124,105 +109,86 @@ impl Parser { } pub fn parse_str(&mut self, input: &str, old_tree: Option<&Tree>) -> Option { - let mut input = FlatInput { bytes: input.as_bytes(), offset: 0}; - self.parse_utf8(&mut input, old_tree) + let bytes = input.as_bytes(); + self.parse_utf8(&mut |offset, _| &bytes[(offset as usize)..], old_tree) } - pub fn parse_utf8( + pub fn parse_utf8<'a, T: 'a + FnMut(u32, Point) -> &'a [u8]>( &mut self, input: &mut T, old_tree: Option<&Tree>, ) -> Option { - unsafe extern "C" fn read( + unsafe extern "C" fn read<'a, T: 'a + FnMut(u32, Point) -> &'a [u8]>( payload: *mut c_void, + byte_offset: u32, + position: ffi::TSPoint, bytes_read: *mut u32, ) -> *const c_char { let input = (payload as *mut T).as_mut().unwrap(); - let result = input.read(); + let result = (*input)(byte_offset, position.into()); *bytes_read = result.len() as u32; return result.as_ptr() as *const c_char; }; - unsafe extern "C" fn seek( - payload: *mut c_void, - byte: u32, - position: ffi::TSPoint, - ) -> c_int { - let input = (payload as *mut T).as_mut().unwrap(); - input.seek( - byte, - Point { - row: position.row, - column: position.column, - }, - ); - return 1; - }; - let c_input = ffi::TSInput { payload: input as *mut T as *mut c_void, - read: Some(read::), - seek: Some(seek::), + read: Some(read::<'a, T>), encoding: ffi::TSInputEncoding_TSInputEncodingUTF8, }; - let old_tree_ptr = old_tree.map_or(ptr::null_mut(), |t| t.0); + let c_old_tree = old_tree.map_or(ptr::null_mut(), |t| t.0); - let new_tree_ptr = unsafe { ffi::ts_parser_parse(self.0, old_tree_ptr, c_input) }; - if new_tree_ptr.is_null() { + let c_new_tree = unsafe { ffi::ts_parser_parse(self.0, c_old_tree, c_input) }; + if c_new_tree.is_null() { None } else { - Some(Tree(new_tree_ptr)) + Some(Tree(c_new_tree)) } } - pub fn parse_utf16( + pub fn parse_utf16<'a, T: 'a + FnMut(u32, Point) -> &'a [u16]>( &mut self, input: &mut T, old_tree: Option<&Tree>, ) -> Option { - unsafe extern "C" fn read( + unsafe extern "C" fn read<'a, T: 'a + FnMut(u32, Point) -> &'a [u16]>( payload: *mut c_void, + byte_offset: u32, + position: ffi::TSPoint, bytes_read: *mut u32, ) -> *const c_char { let input = (payload as *mut T).as_mut().unwrap(); - let result = input.read(); + let result = (*input)(byte_offset, Point { + row: position.row, + column: position.column / 2, + }); *bytes_read = result.len() as u32 * 2; return result.as_ptr() as *const c_char; }; - unsafe extern "C" fn seek( - payload: *mut c_void, - byte: u32, - position: ffi::TSPoint, - ) -> c_int { - let input = (payload as *mut T).as_mut().unwrap(); - input.seek( - byte / 2, - Point { - row: position.row, - column: position.column / 2, - }, - ); - return 1; - }; - let c_input = ffi::TSInput { payload: input as *mut T as *mut c_void, - read: Some(read::), - seek: Some(seek::), - encoding: ffi::TSInputEncoding_TSInputEncodingUTF8, + read: Some(read::<'a, T>), + encoding: ffi::TSInputEncoding_TSInputEncodingUTF16, }; - let old_tree_ptr = old_tree.map_or(ptr::null_mut(), |t| t.0); + let c_old_tree = old_tree.map_or(ptr::null_mut(), |t| t.0); - let new_tree_ptr = unsafe { ffi::ts_parser_parse(self.0, old_tree_ptr, c_input) }; - if new_tree_ptr.is_null() { + let c_new_tree = unsafe { ffi::ts_parser_parse(self.0, c_old_tree, c_input) }; + if c_new_tree.is_null() { None } else { - Some(Tree(new_tree_ptr)) + Some(Tree(c_new_tree)) } } + + pub fn reset(&mut self) { + unsafe { ffi::ts_parser_reset(self.0) } + } + + pub fn set_operation_limit(&mut self, limit: usize) { + unsafe { ffi::ts_parser_set_operation_limit(self.0, limit) } + } } impl Drop for Parser { @@ -442,15 +408,12 @@ impl Into for Point { } } -impl<'a> Utf8Input for FlatInput<'a> { - fn read(&mut self) -> &[u8] { - let result = &self.bytes[self.offset..]; - self.offset = self.bytes.len(); - result - } - - fn seek(&mut self, offset: u32, _position: Point) { - self.offset = offset as usize; +impl From for Point { + fn from(point: ffi::TSPoint) -> Self { + Self { + row: point.row, + column: point.column, + } } } @@ -536,49 +499,70 @@ mod tests { #[test] fn test_custom_utf8_input() { - struct LineBasedInput { - lines: &'static [&'static str], - row: usize, - column: usize, - } - - impl Utf8Input for LineBasedInput { - fn read(&mut self) -> &[u8] { - if self.row < self.lines.len() { - let result = &self.lines[self.row].as_bytes()[self.column..]; - self.row += 1; - self.column = 0; - result - } else { - &[] - } - } - - fn seek(&mut self, _byte: u32, position: Point) { - self.row = position.row as usize; - self.column = position.column as usize; - } - } - let mut parser = Parser::new(); parser.set_language(rust()).unwrap(); - let mut input = LineBasedInput { - lines: &[ - "pub fn main() {", - "}", - ], - row: 0, - column: 0 - }; + let lines = &[ + "pub fn foo() {", + " 1", + "}", + ]; + + let tree = parser.parse_utf8(&mut |_, position| { + let row = position.row as usize; + let column = position.column as usize; + if row < lines.len() { + if column < lines[row].as_bytes().len() { + &lines[row].as_bytes()[column..] + } else { + "\n".as_bytes() + } + } else { + &[] + } + }, None).unwrap(); - let tree = parser.parse_utf8(&mut input, None).unwrap(); let root = tree.root_node(); + assert_eq!(root.to_sexp(), "(source_file (function_item (visibility_modifier) (identifier) (parameters) (block (number_literal))))"); assert_eq!(root.kind(), "source_file"); assert_eq!(root.has_error(), false); + assert_eq!(root.child(0).unwrap().kind(), "function_item"); + } - let child = root.child(0).unwrap(); - assert_eq!(child.kind(), "function_item"); + #[test] + fn test_custom_utf16_input() { + let mut parser = Parser::new(); + parser.set_language(rust()).unwrap(); + + parser.set_logger(Some(Box::new(|t, message| { + println!("log: {:?} {}", t, message); + }))); + + let lines: Vec> = [ + "pub fn foo() {", + " 1", + "}" + ].iter().map(|s| s.encode_utf16().collect()).collect(); + + let tree = parser.parse_utf16(&mut |_, position| { + let row = position.row as usize; + let column = position.column as usize; + if row < lines.len() { + if column < lines[row].len() { + &lines[row][column..] + } else { + &[10] + } + } else { + &[] + } + }, None).unwrap(); + + let root = tree.root_node(); + assert_eq!(root.to_sexp(), "(source_file (function_item (visibility_modifier) (identifier) (parameters) (block (number_literal))))"); + assert_eq!(root.kind(), "source_file"); + assert_eq!(root.has_error(), false); + assert_eq!(root.child(0).unwrap().kind(), "function_item"); } #[test] @@ -595,16 +579,23 @@ mod tests { #[test] fn test_editing() { - let mut input = SpyInput { - bytes: "fn test(a: A, c: C) {}".as_bytes(), - offset: 0, - bytes_read: Vec::new(), - }; - let mut parser = Parser::new(); parser.set_language(rust()).unwrap(); - let mut tree = parser.parse_utf8(&mut input, None).unwrap(); + let mut input_bytes = "fn test(a: A, c: C) {}".as_bytes(); + let mut input_bytes_read = Vec::new(); + + let mut tree = parser.parse_utf8(&mut |offset, _| { + let offset = offset as usize; + if offset < input_bytes.len() { + let result = &input_bytes[offset..offset + 1]; + input_bytes_read.extend(result.iter()); + result + } else { + &[] + } + }, None).unwrap(); + let parameters_sexp = tree.root_node() .named_child(0).unwrap() .named_child(1).unwrap() @@ -614,9 +605,8 @@ mod tests { "(parameters (parameter (identifier) (type_identifier)) (parameter (identifier) (type_identifier)))" ); - input.offset = 0; - input.bytes_read.clear(); - input.bytes = "fn test(a: A, b: B, c: C) {}".as_bytes(); + input_bytes_read.clear(); + input_bytes = "fn test(a: A, b: B, c: C) {}".as_bytes(); tree.edit(&InputEdit{ start_byte: 14, old_end_byte: 14, @@ -626,7 +616,17 @@ mod tests { new_end_position: Point::new(0, 20), }); - let tree = parser.parse_utf8(&mut input, Some(&tree)).unwrap(); + let tree = parser.parse_utf8(&mut |offset, _| { + let offset = offset as usize; + if offset < input_bytes.len() { + let result = &input_bytes[offset..offset + 1]; + input_bytes_read.extend(result.iter()); + result + } else { + &[] + } + }, Some(&tree)).unwrap(); + let parameters_sexp = tree.root_node() .named_child(0).unwrap() .named_child(1).unwrap() @@ -636,7 +636,7 @@ mod tests { "(parameters (parameter (identifier) (type_identifier)) (parameter (identifier) (type_identifier)) (parameter (identifier) (type_identifier)))" ); - let retokenized_content = String::from_utf8(input.bytes_read).unwrap(); + let retokenized_content = String::from_utf8(input_bytes_read).unwrap(); assert!(retokenized_content.contains("b: B")); assert!(!retokenized_content.contains("a: A")); assert!(!retokenized_content.contains("c: C")); @@ -694,27 +694,4 @@ mod tests { assert_eq!(child_count_differences, &[1, 2, 3, 4]); } - - struct SpyInput { - bytes: &'static [u8], - offset: usize, - bytes_read: Vec, - } - - impl Utf8Input for SpyInput { - fn read(&mut self) -> &[u8] { - if self.offset < self.bytes.len() { - let result = &self.bytes[self.offset..self.offset + 1]; - self.bytes_read.extend(result.iter()); - self.offset += 1; - result - } else { - &[] - } - } - - fn seek(&mut self, byte: u32, _position: Point) { - self.offset = byte as usize; - } - } } diff --git a/vendor/tree-sitter b/vendor/tree-sitter index 78f28b14..26ab57a6 160000 --- a/vendor/tree-sitter +++ b/vendor/tree-sitter @@ -1 +1 @@ -Subproject commit 78f28b14ce519ba085ab7886c2fc19739f7f7da0 +Subproject commit 26ab57a6562aaeb48b579e3ca29eb064925e857c From 86c8206e35757694d37d3fe627236d22a75eb3ec Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 19 Jun 2018 16:20:58 -0700 Subject: [PATCH 29/58] 0.2.0 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 12d92923..bfc6b2e2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "tree-sitter" description = "Rust bindings to the Tree-sitter parsing library" -version = "0.1.1" +version = "0.2.0" authors = ["Max Brunsfeld "] build = "build.rs" license = "MIT" From 2eff3225bac3422b19fc442482eb45f0462fa478 Mon Sep 17 00:00:00 2001 From: Stephan Renatus Date: Thu, 28 Jun 2018 10:25:01 +0200 Subject: [PATCH 30/58] README.md: small fixes To call .set_language on parser, it needs to be mut; also, the syntax for the extern "C" blocks seemed to be a bit off. Both now corresponds to what's in the tests. Signed-off-by: Stephan Renatus --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index ff7140c5..449c6c46 100644 --- a/README.md +++ b/README.md @@ -16,15 +16,15 @@ use tree_sitter::{Parser, Language}; // ... -let parser = Parser::new(); +let mut parser = Parser::new(); ``` Then assign a language to the parser. Tree-sitter languages consist of generated C code. To use them from rust, you must declare them as `extern "C"` functions and invoke them with `unsafe`: ```rust -extern "C" fn tree_sitter_c() -> Language; -extern "C" fn tree_sitter_rust() -> Language; -extern "C" fn tree_sitter_javascript() -> Language; +extern "C" { fn tree_sitter_c() -> Language; } +extern "C" { fn tree_sitter_rust() -> Language; } +extern "C" { fn tree_sitter_javascript() -> Language; } let language = unsafe { tree_sitter_rust() }; parser.set_language(language).unwrap(); From c477e45fccf746fcb9335ba777ace035a6292a48 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 20 Jul 2018 13:32:22 -0700 Subject: [PATCH 31/58] Update to the latest Tree-sitter --- src/bindings.rs | 30 +++++++++++++++++++++++++----- src/lib.rs | 6 +++++- vendor/tree-sitter | 2 +- 3 files changed, 31 insertions(+), 7 deletions(-) diff --git a/src/bindings.rs b/src/bindings.rs index b2d83729..58d0e510 100644 --- a/src/bindings.rs +++ b/src/bindings.rs @@ -33,8 +33,10 @@ pub struct TSPoint { #[repr(C)] #[derive(Debug, Copy, Clone)] pub struct TSRange { - pub start: TSPoint, - pub end: TSPoint, + pub start_point: TSPoint, + pub end_point: TSPoint, + pub start_byte: u32, + pub end_byte: u32, } #[repr(C)] #[derive(Debug, Copy, Clone)] @@ -80,7 +82,7 @@ pub struct TSInputEdit { pub struct TSNode { pub context: [u32; 4usize], pub id: *const ::std::os::raw::c_void, - pub tree: *const ::std::os::raw::c_void, + pub tree: *const TSTree, } #[repr(C)] #[derive(Debug, Copy, Clone)] @@ -139,6 +141,12 @@ extern "C" { extern "C" { pub fn ts_parser_reset(arg1: *mut TSParser); } +extern "C" { + pub fn ts_parser_set_included_ranges(arg1: *mut TSParser, arg2: *const TSRange, arg3: u32); +} +extern "C" { + pub fn ts_parser_included_ranges(arg1: *const TSParser, arg2: *mut u32) -> *const TSRange; +} extern "C" { pub fn ts_tree_copy(arg1: *const TSTree) -> *mut TSTree; } @@ -161,6 +169,9 @@ extern "C" { extern "C" { pub fn ts_tree_print_dot_graph(arg1: *const TSTree, arg2: *mut FILE); } +extern "C" { + pub fn ts_tree_language(arg1: *const TSTree) -> *const TSLanguage; +} extern "C" { pub fn ts_node_start_byte(arg1: TSNode) -> u32; } @@ -251,7 +262,10 @@ extern "C" { ) -> TSNode; } extern "C" { - pub fn ts_tree_cursor_new(arg1: *const TSTree) -> TSTreeCursor; + pub fn ts_node_edit(arg1: *mut TSNode, arg2: *const TSInputEdit); +} +extern "C" { + pub fn ts_tree_cursor_new(arg1: TSNode) -> TSTreeCursor; } extern "C" { pub fn ts_tree_cursor_delete(arg1: *mut TSTreeCursor); @@ -280,6 +294,12 @@ extern "C" { arg2: TSSymbol, ) -> *const ::std::os::raw::c_char; } +extern "C" { + pub fn ts_language_symbol_for_name( + arg1: *const TSLanguage, + arg2: *const ::std::os::raw::c_char, + ) -> TSSymbol; +} extern "C" { pub fn ts_language_symbol_type(arg1: *const TSLanguage, arg2: TSSymbol) -> TSSymbolType; } @@ -287,4 +307,4 @@ extern "C" { pub fn ts_language_version(arg1: *const TSLanguage) -> u32; } -pub const TREE_SITTER_LANGUAGE_VERSION: usize = 8; +pub const TREE_SITTER_LANGUAGE_VERSION: usize = 9; diff --git a/src/lib.rs b/src/lib.rs index 84d51f04..9f0ef9b9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -216,7 +216,7 @@ impl Tree { } pub fn walk(&self) -> TreeCursor { - TreeCursor(unsafe { ffi::ts_tree_cursor_new(self.0) }, PhantomData) + self.root_node().walk() } } @@ -337,6 +337,10 @@ impl<'tree> Node<'tree> { unsafe { free(c_string as *mut c_void) }; result } + + pub fn walk(&self) -> TreeCursor<'tree> { + TreeCursor(unsafe { ffi::ts_tree_cursor_new(self.0) }, PhantomData) + } } impl<'a> PartialEq for Node<'a> { diff --git a/vendor/tree-sitter b/vendor/tree-sitter index 26ab57a6..16376c43 160000 --- a/vendor/tree-sitter +++ b/vendor/tree-sitter @@ -1 +1 @@ -Subproject commit 26ab57a6562aaeb48b579e3ca29eb064925e857c +Subproject commit 16376c43f5cc75bbc5297e6d5716bd94d55ccc05 From 47a7430da319b8e2a55cdb8998acc3f3f099a1c7 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 20 Jul 2018 13:32:56 -0700 Subject: [PATCH 32/58] 0.3.0 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index bfc6b2e2..746d2d47 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "tree-sitter" description = "Rust bindings to the Tree-sitter parsing library" -version = "0.2.0" +version = "0.3.0" authors = ["Max Brunsfeld "] build = "build.rs" license = "MIT" From bdd52376a82ae2354b6226d9bb3b23649b81df4d Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 20 Jul 2018 13:36:12 -0700 Subject: [PATCH 33/58] Fix cargo category slugs --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 746d2d47..c2d733f2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,7 +7,7 @@ build = "build.rs" license = "MIT" readme = "README.md" keywords = ["incremental", "parsing"] -categories = ["parsing", "text editors", "api bindings"] +categories = ["api-bindings", "parsing", "text-editors"] include = [ "/build.rs", From 5fbb261316737117c827db935e667bcfd3932348 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 20 Jul 2018 13:36:42 -0700 Subject: [PATCH 34/58] 0.3.1 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index c2d733f2..9adbcfd1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "tree-sitter" description = "Rust bindings to the Tree-sitter parsing library" -version = "0.3.0" +version = "0.3.1" authors = ["Max Brunsfeld "] build = "build.rs" license = "MIT" From c8125ec617ec4a3e2d93c460bcc22c89f1c06981 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 8 Oct 2018 11:32:40 -0700 Subject: [PATCH 35/58] Make Language send + sync, add language methods --- src/lib.rs | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 9f0ef9b9..434d05fb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -6,7 +6,8 @@ use std::marker::PhantomData; use std::os::raw::{c_char, c_void}; use std::ptr; -pub type Language = *const ffi::TSLanguage; +#[repr(transparent)] +pub struct Language (*const ffi::TSLanguage); #[derive(Debug, PartialEq, Eq)] pub enum LogType { @@ -50,9 +51,9 @@ impl Parser { pub fn set_language(&mut self, language: Language) -> Result<(), String> { unsafe { - let version = ffi::ts_language_version(language) as usize; + let version = ffi::ts_language_version(language.0) as usize; if version == ffi::TREE_SITTER_LANGUAGE_VERSION { - ffi::ts_parser_set_language(self.0, language); + ffi::ts_parser_set_language(self.0, language.0); Ok(()) } else { Err(format!( @@ -222,6 +223,24 @@ impl Tree { unsafe impl Send for Tree {} +impl Language { + pub fn node_kind_count(&self) -> usize { + unsafe { ffi::ts_language_symbol_count(self.0) as usize } + } + + pub fn node_kind_for_id(&self, id: u16) -> &'static str { + unsafe { CStr::from_ptr(ffi::ts_language_symbol_name(self.0, id)) }.to_str().unwrap() + } + + pub fn node_kind_is_named(&self, id: u16) -> bool { + unsafe { ffi::ts_language_symbol_type(self.0, id) == ffi::TSSymbolType_TSSymbolTypeRegular } + } +} + +unsafe impl Send for Language {} + +unsafe impl Sync for Language {} + impl fmt::Debug for Tree { fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { write!(f, "{{Tree {:?}}}", self.root_node()) @@ -527,7 +546,7 @@ mod tests { }, None).unwrap(); let root = tree.root_node(); - assert_eq!(root.to_sexp(), "(source_file (function_item (visibility_modifier) (identifier) (parameters) (block (number_literal))))"); + assert_eq!(root.to_sexp(), "(source_file (function_item (visibility_modifier) (identifier) (parameters) (block (integer_literal))))"); assert_eq!(root.kind(), "source_file"); assert_eq!(root.has_error(), false); assert_eq!(root.child(0).unwrap().kind(), "function_item"); @@ -563,7 +582,7 @@ mod tests { }, None).unwrap(); let root = tree.root_node(); - assert_eq!(root.to_sexp(), "(source_file (function_item (visibility_modifier) (identifier) (parameters) (block (number_literal))))"); + assert_eq!(root.to_sexp(), "(source_file (function_item (visibility_modifier) (identifier) (parameters) (block (integer_literal))))"); assert_eq!(root.kind(), "source_file"); assert_eq!(root.has_error(), false); assert_eq!(root.child(0).unwrap().kind(), "function_item"); From 0c2e1c189b2c4f696a1a1b48ee1ad04c7ef49936 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 8 Oct 2018 22:32:58 -0700 Subject: [PATCH 36/58] Implement Clone for Language --- src/lib.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/lib.rs b/src/lib.rs index 434d05fb..81b4d09a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -6,6 +6,7 @@ use std::marker::PhantomData; use std::os::raw::{c_char, c_void}; use std::ptr; +#[derive(Clone, Copy)] #[repr(transparent)] pub struct Language (*const ffi::TSLanguage); From 572e8c202e36c98e875a67f2edadbbad341602cf Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 8 Oct 2018 22:33:11 -0700 Subject: [PATCH 37/58] Implement Send for Parser --- src/lib.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index 81b4d09a..c547974b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -200,6 +200,8 @@ impl Drop for Parser { } } +unsafe impl Send for Parser {} + impl Tree { pub fn root_node(&self) -> Node { Node::new(unsafe { ffi::ts_tree_root_node(self.0) }).unwrap() From 91d35dec7d4ddf60054efbbc6631489af74c09f0 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 8 Oct 2018 22:33:43 -0700 Subject: [PATCH 38/58] Add Parser.parser_utf8_io() method --- src/lib.rs | 159 +++++++++++++++++++++++++++++++++++------------------ 1 file changed, 107 insertions(+), 52 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index c547974b..ff272a29 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,6 +5,7 @@ use std::ffi::CStr; use std::marker::PhantomData; use std::os::raw::{c_char, c_void}; use std::ptr; +use std::io::{self, Read, Seek}; #[derive(Clone, Copy)] #[repr(transparent)] @@ -115,37 +116,15 @@ impl Parser { self.parse_utf8(&mut |offset, _| &bytes[(offset as usize)..], old_tree) } - pub fn parse_utf8<'a, T: 'a + FnMut(u32, Point) -> &'a [u8]>( + pub fn parse_utf8<'a, T: FnMut(u32, Point) -> &'a [u8]>( &mut self, input: &mut T, old_tree: Option<&Tree>, ) -> Option { - unsafe extern "C" fn read<'a, T: 'a + FnMut(u32, Point) -> &'a [u8]>( - payload: *mut c_void, - byte_offset: u32, - position: ffi::TSPoint, - bytes_read: *mut u32, - ) -> *const c_char { - let input = (payload as *mut T).as_mut().unwrap(); - let result = (*input)(byte_offset, position.into()); - *bytes_read = result.len() as u32; - return result.as_ptr() as *const c_char; - }; - - let c_input = ffi::TSInput { - payload: input as *mut T as *mut c_void, - read: Some(read::<'a, T>), - encoding: ffi::TSInputEncoding_TSInputEncodingUTF8, - }; - - let c_old_tree = old_tree.map_or(ptr::null_mut(), |t| t.0); - - let c_new_tree = unsafe { ffi::ts_parser_parse(self.0, c_old_tree, c_input) }; - if c_new_tree.is_null() { - None - } else { - Some(Tree(c_new_tree)) - } + self.parse_utf8_ptr(&mut |byte, position| { + let slice = input(byte, position); + (slice.as_ptr(), slice.len()) + }, old_tree) } pub fn parse_utf16<'a, T: 'a + FnMut(u32, Point) -> &'a [u16]>( @@ -153,34 +132,43 @@ impl Parser { input: &mut T, old_tree: Option<&Tree>, ) -> Option { - unsafe extern "C" fn read<'a, T: 'a + FnMut(u32, Point) -> &'a [u16]>( - payload: *mut c_void, - byte_offset: u32, - position: ffi::TSPoint, - bytes_read: *mut u32, - ) -> *const c_char { - let input = (payload as *mut T).as_mut().unwrap(); - let result = (*input)(byte_offset, Point { - row: position.row, - column: position.column / 2, - }); - *bytes_read = result.len() as u32 * 2; - return result.as_ptr() as *const c_char; - }; + self.parse_utf16_ptr(&mut |byte, position| { + let slice = input(byte, position); + (slice.as_ptr(), slice.len()) + }, old_tree) + } - let c_input = ffi::TSInput { - payload: input as *mut T as *mut c_void, - read: Some(read::<'a, T>), - encoding: ffi::TSInputEncoding_TSInputEncodingUTF16, - }; + pub fn parse_utf8_io( + &mut self, + mut input: impl Read + Seek, + old_tree: Option<&Tree>, + ) -> io::Result> { + let mut error = None; + let mut current_offset = 0; + let mut buffer = [0; 10 * 1024]; + let result = self.parse_utf8_ptr(&mut |byte, _| { + if byte as u64 != current_offset { + current_offset = byte as u64; + if let Err(e) = input.seek(io::SeekFrom::Start(current_offset)) { + error = Some(e); + return (ptr::null(), 0) + } + } - let c_old_tree = old_tree.map_or(ptr::null_mut(), |t| t.0); + match input.read(&mut buffer) { + Err(e) => { + error = Some(e); + (ptr::null(), 0) + }, + Ok(length) => { + (buffer.as_ptr(), length) + } + } + }, old_tree); - let c_new_tree = unsafe { ffi::ts_parser_parse(self.0, c_old_tree, c_input) }; - if c_new_tree.is_null() { - None - } else { - Some(Tree(c_new_tree)) + match error { + Some(e) => Err(e), + None => Ok(result) } } @@ -191,6 +179,73 @@ impl Parser { pub fn set_operation_limit(&mut self, limit: usize) { unsafe { ffi::ts_parser_set_operation_limit(self.0, limit) } } + + fn parse_utf8_ptr (*const u8, usize)>( + &mut self, + input: &mut T, + old_tree: Option<&Tree>, + ) -> Option { + unsafe extern "C" fn read (*const u8, usize)> ( + payload: *mut c_void, + byte_offset: u32, + position: ffi::TSPoint, + bytes_read: *mut u32, + ) -> *const c_char { + let input = (payload as *mut T).as_mut().unwrap(); + let (ptr, length) = (*input)(byte_offset, position.into()); + *bytes_read = length as u32; + return ptr as *const c_char; + }; + + let c_input = ffi::TSInput { + payload: input as *mut T as *mut c_void, + read: Some(read::), + encoding: ffi::TSInputEncoding_TSInputEncodingUTF8, + }; + + let c_old_tree = old_tree.map_or(ptr::null_mut(), |t| t.0); + let c_new_tree = unsafe { ffi::ts_parser_parse(self.0, c_old_tree, c_input) }; + if c_new_tree.is_null() { + None + } else { + Some(Tree(c_new_tree)) + } + } + + fn parse_utf16_ptr (*const u16, usize)>( + &mut self, + input: &mut T, + old_tree: Option<&Tree>, + ) -> Option { + unsafe extern "C" fn read (*const u16, usize)>( + payload: *mut c_void, + byte_offset: u32, + position: ffi::TSPoint, + bytes_read: *mut u32, + ) -> *const c_char { + let input = (payload as *mut T).as_mut().unwrap(); + let (ptr, length) = (*input)(byte_offset, Point { + row: position.row, + column: position.column / 2, + }); + *bytes_read = length as u32 * 2; + ptr as *const c_char + }; + + let c_input = ffi::TSInput { + payload: input as *mut T as *mut c_void, + read: Some(read::), + encoding: ffi::TSInputEncoding_TSInputEncodingUTF16, + }; + + let c_old_tree = old_tree.map_or(ptr::null_mut(), |t| t.0); + let c_new_tree = unsafe { ffi::ts_parser_parse(self.0, c_old_tree, c_input) }; + if c_new_tree.is_null() { + None + } else { + Some(Tree(c_new_tree)) + } + } } impl Drop for Parser { From a8cbde6dbfbc8ae9b7b37075ad0dffeed3e079b8 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 9 Oct 2018 08:23:02 -0700 Subject: [PATCH 39/58] Run rustfmt on lib.rs --- src/lib.rs | 336 ++++++++++++++++++++++++++++++++--------------------- 1 file changed, 204 insertions(+), 132 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index ff272a29..4a132a3f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,15 +1,15 @@ mod ffi; -use std::fmt; use std::ffi::CStr; +use std::fmt; +use std::io::{self, Read, Seek}; use std::marker::PhantomData; use std::os::raw::{c_char, c_void}; use std::ptr; -use std::io::{self, Read, Seek}; #[derive(Clone, Copy)] #[repr(transparent)] -pub struct Language (*const ffi::TSLanguage); +pub struct Language(*const ffi::TSLanguage); #[derive(Debug, PartialEq, Eq)] pub enum LogType { @@ -43,6 +43,26 @@ pub struct Tree(*mut ffi::TSTree); pub struct TreeCursor<'a>(ffi::TSTreeCursor, PhantomData<&'a ()>); +impl Language { + pub fn node_kind_count(&self) -> usize { + unsafe { ffi::ts_language_symbol_count(self.0) as usize } + } + + pub fn node_kind_for_id(&self, id: u16) -> &'static str { + unsafe { CStr::from_ptr(ffi::ts_language_symbol_name(self.0, id)) } + .to_str() + .unwrap() + } + + pub fn node_kind_is_named(&self, id: u16) -> bool { + unsafe { ffi::ts_language_symbol_type(self.0, id) == ffi::TSSymbolType_TSSymbolTypeRegular } + } +} + +unsafe impl Send for Language {} + +unsafe impl Sync for Language {} + impl Parser { pub fn new() -> Parser { unsafe { @@ -105,7 +125,10 @@ impl Parser { log: Some(log), }; } else { - c_logger = ffi::TSLogger { payload: ptr::null_mut(), log: None }; + c_logger = ffi::TSLogger { + payload: ptr::null_mut(), + log: None, + }; } unsafe { ffi::ts_parser_set_logger(self.0, c_logger) }; @@ -121,10 +144,13 @@ impl Parser { input: &mut T, old_tree: Option<&Tree>, ) -> Option { - self.parse_utf8_ptr(&mut |byte, position| { - let slice = input(byte, position); - (slice.as_ptr(), slice.len()) - }, old_tree) + self.parse_utf8_ptr( + &mut |byte, position| { + let slice = input(byte, position); + (slice.as_ptr(), slice.len()) + }, + old_tree, + ) } pub fn parse_utf16<'a, T: 'a + FnMut(u32, Point) -> &'a [u16]>( @@ -132,10 +158,13 @@ impl Parser { input: &mut T, old_tree: Option<&Tree>, ) -> Option { - self.parse_utf16_ptr(&mut |byte, position| { - let slice = input(byte, position); - (slice.as_ptr(), slice.len()) - }, old_tree) + self.parse_utf16_ptr( + &mut |byte, position| { + let slice = input(byte, position); + (slice.as_ptr(), slice.len()) + }, + old_tree, + ) } pub fn parse_utf8_io( @@ -146,29 +175,30 @@ impl Parser { let mut error = None; let mut current_offset = 0; let mut buffer = [0; 10 * 1024]; - let result = self.parse_utf8_ptr(&mut |byte, _| { - if byte as u64 != current_offset { - current_offset = byte as u64; - if let Err(e) = input.seek(io::SeekFrom::Start(current_offset)) { - error = Some(e); - return (ptr::null(), 0) + let result = self.parse_utf8_ptr( + &mut |byte, _| { + if byte as u64 != current_offset { + current_offset = byte as u64; + if let Err(e) = input.seek(io::SeekFrom::Start(current_offset)) { + error = Some(e); + return (ptr::null(), 0); + } } - } - match input.read(&mut buffer) { - Err(e) => { - error = Some(e); - (ptr::null(), 0) - }, - Ok(length) => { - (buffer.as_ptr(), length) + match input.read(&mut buffer) { + Err(e) => { + error = Some(e); + (ptr::null(), 0) + } + Ok(length) => (buffer.as_ptr(), length), } - } - }, old_tree); + }, + old_tree, + ); match error { Some(e) => Err(e), - None => Ok(result) + None => Ok(result), } } @@ -185,7 +215,7 @@ impl Parser { input: &mut T, old_tree: Option<&Tree>, ) -> Option { - unsafe extern "C" fn read (*const u8, usize)> ( + unsafe extern "C" fn read (*const u8, usize)>( payload: *mut c_void, byte_offset: u32, position: ffi::TSPoint, @@ -224,10 +254,13 @@ impl Parser { bytes_read: *mut u32, ) -> *const c_char { let input = (payload as *mut T).as_mut().unwrap(); - let (ptr, length) = (*input)(byte_offset, Point { - row: position.row, - column: position.column / 2, - }); + let (ptr, length) = (*input)( + byte_offset, + Point { + row: position.row, + column: position.column / 2, + }, + ); *bytes_read = length as u32 * 2; ptr as *const c_char }; @@ -281,24 +314,6 @@ impl Tree { unsafe impl Send for Tree {} -impl Language { - pub fn node_kind_count(&self) -> usize { - unsafe { ffi::ts_language_symbol_count(self.0) as usize } - } - - pub fn node_kind_for_id(&self, id: u16) -> &'static str { - unsafe { CStr::from_ptr(ffi::ts_language_symbol_name(self.0, id)) }.to_str().unwrap() - } - - pub fn node_kind_is_named(&self, id: u16) -> bool { - unsafe { ffi::ts_language_symbol_type(self.0, id) == ffi::TSSymbolType_TSSymbolTypeRegular } - } -} - -unsafe impl Send for Language {} - -unsafe impl Sync for Language {} - impl fmt::Debug for Tree { fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { write!(f, "{{Tree {:?}}}", self.root_node()) @@ -331,7 +346,9 @@ impl<'tree> Node<'tree> { } pub fn kind(&self) -> &'static str { - unsafe { CStr::from_ptr(ffi::ts_node_type(self.0)) }.to_str().unwrap() + unsafe { CStr::from_ptr(ffi::ts_node_type(self.0)) } + .to_str() + .unwrap() } pub fn is_named(&self) -> bool { @@ -407,10 +424,15 @@ impl<'tree> Node<'tree> { } pub fn to_sexp(&self) -> String { - extern "C" { fn free(pointer: *mut c_void); } + extern "C" { + fn free(pointer: *mut c_void); + } let c_string = unsafe { ffi::ts_node_string(self.0) }; - let result = unsafe { CStr::from_ptr(c_string) }.to_str().unwrap().to_string(); + let result = unsafe { CStr::from_ptr(c_string) } + .to_str() + .unwrap() + .to_string(); unsafe { free(c_string as *mut c_void) }; result } @@ -428,7 +450,13 @@ impl<'a> PartialEq for Node<'a> { impl<'a> fmt::Debug for Node<'a> { fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { - write!(f, "{{Node {} {} - {}}}", self.kind(), self.start_position(), self.end_position()) + write!( + f, + "{{Node {} {} - {}}}", + self.kind(), + self.start_position(), + self.end_position() + ) } } @@ -500,21 +528,30 @@ impl From for Point { #[cfg(test)] mod tests { - use std::thread; use super::*; + use std::thread; - fn rust() -> Language { unsafe { tree_sitter_rust() } } - extern "C" { fn tree_sitter_rust() -> Language; } + fn rust() -> Language { + unsafe { tree_sitter_rust() } + } + extern "C" { + fn tree_sitter_rust() -> Language; + } #[test] fn test_basic_parsing() { let mut parser = Parser::new(); parser.set_language(rust()).unwrap(); - let tree = parser.parse_str(" + let tree = parser + .parse_str( + " struct Stuff {} fn main() {} - ", None).unwrap(); + ", + None, + ) + .unwrap(); let root_node = tree.root_node(); assert_eq!(root_node.kind(), "source_file"); @@ -538,12 +575,20 @@ mod tests { messages.push((log_type, message.to_string())); }))); - parser.parse_str(" + parser + .parse_str( + " struct Stuff {} fn main() {} - ", None).unwrap(); + ", + None, + ) + .unwrap(); - assert!(messages.contains(&(LogType::Parse, "reduce sym:struct_item, child_count:3".to_string()))); + assert!(messages.contains(&( + LogType::Parse, + "reduce sym:struct_item, child_count:3".to_string() + ))); assert!(messages.contains(&(LogType::Lex, "skip character:' '".to_string()))); } @@ -552,12 +597,17 @@ mod tests { let mut parser = Parser::new(); parser.set_language(rust()).unwrap(); - let tree = parser.parse_str(" + let tree = parser + .parse_str( + " struct Stuff { a: A; b: Option, } - ", None).unwrap(); + ", + None, + ) + .unwrap(); let mut cursor = tree.walk(); assert_eq!(cursor.node().kind(), "source_file"); @@ -583,25 +633,26 @@ mod tests { let mut parser = Parser::new(); parser.set_language(rust()).unwrap(); - let lines = &[ - "pub fn foo() {", - " 1", - "}", - ]; + let lines = &["pub fn foo() {", " 1", "}"]; - let tree = parser.parse_utf8(&mut |_, position| { - let row = position.row as usize; - let column = position.column as usize; - if row < lines.len() { - if column < lines[row].as_bytes().len() { - &lines[row].as_bytes()[column..] - } else { - "\n".as_bytes() - } - } else { - &[] - } - }, None).unwrap(); + let tree = parser + .parse_utf8( + &mut |_, position| { + let row = position.row as usize; + let column = position.column as usize; + if row < lines.len() { + if column < lines[row].as_bytes().len() { + &lines[row].as_bytes()[column..] + } else { + "\n".as_bytes() + } + } else { + &[] + } + }, + None, + ) + .unwrap(); let root = tree.root_node(); assert_eq!(root.to_sexp(), "(source_file (function_item (visibility_modifier) (identifier) (parameters) (block (integer_literal))))"); @@ -619,25 +670,29 @@ mod tests { println!("log: {:?} {}", t, message); }))); - let lines: Vec> = [ - "pub fn foo() {", - " 1", - "}" - ].iter().map(|s| s.encode_utf16().collect()).collect(); + let lines: Vec> = ["pub fn foo() {", " 1", "}"] + .iter() + .map(|s| s.encode_utf16().collect()) + .collect(); - let tree = parser.parse_utf16(&mut |_, position| { - let row = position.row as usize; - let column = position.column as usize; - if row < lines.len() { - if column < lines[row].len() { - &lines[row][column..] - } else { - &[10] - } - } else { - &[] - } - }, None).unwrap(); + let tree = parser + .parse_utf16( + &mut |_, position| { + let row = position.row as usize; + let column = position.column as usize; + if row < lines.len() { + if column < lines[row].len() { + &lines[row][column..] + } else { + &[10] + } + } else { + &[] + } + }, + None, + ) + .unwrap(); let root = tree.root_node(); assert_eq!(root.to_sexp(), "(source_file (function_item (visibility_modifier) (identifier) (parameters) (block (integer_literal))))"); @@ -666,20 +721,28 @@ mod tests { let mut input_bytes = "fn test(a: A, c: C) {}".as_bytes(); let mut input_bytes_read = Vec::new(); - let mut tree = parser.parse_utf8(&mut |offset, _| { - let offset = offset as usize; - if offset < input_bytes.len() { - let result = &input_bytes[offset..offset + 1]; - input_bytes_read.extend(result.iter()); - result - } else { - &[] - } - }, None).unwrap(); + let mut tree = parser + .parse_utf8( + &mut |offset, _| { + let offset = offset as usize; + if offset < input_bytes.len() { + let result = &input_bytes[offset..offset + 1]; + input_bytes_read.extend(result.iter()); + result + } else { + &[] + } + }, + None, + ) + .unwrap(); - let parameters_sexp = tree.root_node() - .named_child(0).unwrap() - .named_child(1).unwrap() + let parameters_sexp = tree + .root_node() + .named_child(0) + .unwrap() + .named_child(1) + .unwrap() .to_sexp(); assert_eq!( parameters_sexp, @@ -688,7 +751,7 @@ mod tests { input_bytes_read.clear(); input_bytes = "fn test(a: A, b: B, c: C) {}".as_bytes(); - tree.edit(&InputEdit{ + tree.edit(&InputEdit { start_byte: 14, old_end_byte: 14, new_end_byte: 20, @@ -697,20 +760,28 @@ mod tests { new_end_position: Point::new(0, 20), }); - let tree = parser.parse_utf8(&mut |offset, _| { - let offset = offset as usize; - if offset < input_bytes.len() { - let result = &input_bytes[offset..offset + 1]; - input_bytes_read.extend(result.iter()); - result - } else { - &[] - } - }, Some(&tree)).unwrap(); + let tree = parser + .parse_utf8( + &mut |offset, _| { + let offset = offset as usize; + if offset < input_bytes.len() { + let result = &input_bytes[offset..offset + 1]; + input_bytes_read.extend(result.iter()); + result + } else { + &[] + } + }, + Some(&tree), + ) + .unwrap(); - let parameters_sexp = tree.root_node() - .named_child(0).unwrap() - .named_child(1).unwrap() + let parameters_sexp = tree + .root_node() + .named_child(0) + .unwrap() + .named_child(1) + .unwrap() .to_sexp(); assert_eq!( parameters_sexp, @@ -738,7 +809,6 @@ mod tests { for thread_id in 1..5 { let mut tree_clone = tree.clone(); parse_threads.push(thread::spawn(move || { - // For each thread, prepend a different number of declarations to the // source code. let mut prepend_line_count = 0; @@ -748,7 +818,7 @@ mod tests { prepended_source += "struct X {}\n\n"; } - tree_clone.edit(&InputEdit{ + tree_clone.edit(&InputEdit { start_byte: 0, old_end_byte: 0, new_end_byte: prepended_source.len() as u32, @@ -761,7 +831,9 @@ mod tests { // Reparse using the old tree as a starting point. let mut parser = Parser::new(); parser.set_language(rust()).unwrap(); - parser.parse_str(&prepended_source, Some(&tree_clone)).unwrap() + parser + .parse_str(&prepended_source, Some(&tree_clone)) + .unwrap() })); } From db360b73fb33d5c03a226b42b1bfa60398645873 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Sat, 13 Oct 2018 14:09:36 -0700 Subject: [PATCH 40/58] Add Tree.walk_with_properties --- Cargo.toml | 5 + src/lib.rs | 294 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 292 insertions(+), 7 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 9adbcfd1..485d369e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,5 +20,10 @@ include = [ "/vendor/tree-sitter/src/runtime/*", ] +[dependencies] +serde = "1.0" +serde_json = "1.0" +serde_derive = "1.0" + [build-dependencies] cc = "1.0" diff --git a/src/lib.rs b/src/lib.rs index 4a132a3f..19b9a670 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,11 +1,17 @@ mod ffi; +#[macro_use] +extern crate serde_derive; +extern crate serde_json; + +use std::collections::HashMap; use std::ffi::CStr; use std::fmt; use std::io::{self, Read, Seek}; use std::marker::PhantomData; use std::os::raw::{c_char, c_void}; use std::ptr; +use std::str; #[derive(Clone, Copy)] #[repr(transparent)] @@ -19,7 +25,7 @@ pub enum LogType { type Logger<'a> = Box; -#[derive(Clone, Copy, Debug, PartialEq, Eq)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] pub struct Point { pub row: u32, pub column: u32, @@ -35,6 +41,22 @@ pub struct InputEdit { pub new_end_position: Point, } +struct PropertyTransition { + state_id: u32, + child_index: Option, +} + +struct PropertyState { + transitions: HashMap>, + property_set_id: u32, + default_next_state_id: u32, +} + +pub struct PropertySheet { + states: Vec, + property_sets: Vec>, +} + pub struct Node<'a>(ffi::TSNode, PhantomData<&'a ()>); pub struct Parser(*mut ffi::TSParser); @@ -43,6 +65,13 @@ pub struct Tree(*mut ffi::TSTree); pub struct TreeCursor<'a>(ffi::TSTreeCursor, PhantomData<&'a ()>); +pub struct TreePropertyCursor<'a> { + cursor: TreeCursor<'a>, + state_stack: Vec, + child_index_stack: Vec, + property_sheet: &'a PropertySheet, +} + impl Language { pub fn node_kind_count(&self) -> usize { unsafe { ffi::ts_language_symbol_count(self.0) as usize } @@ -310,6 +339,13 @@ impl Tree { pub fn walk(&self) -> TreeCursor { self.root_node().walk() } + + pub fn walk_with_properties<'a>( + &'a self, + property_sheet: &'a PropertySheet, + ) -> TreePropertyCursor<'a> { + TreePropertyCursor::new(self, property_sheet) + } } unsafe impl Send for Tree {} @@ -437,6 +473,14 @@ impl<'tree> Node<'tree> { result } + pub fn utf8_text<'a>(&self, source: &'a str) -> Result<&'a str, str::Utf8Error> { + str::from_utf8(&source.as_bytes()[self.start_byte() as usize..self.end_byte() as usize]) + } + + pub fn utf16_text<'a>(&self, source: &'a [u16]) -> &'a [u16] { + &source[self.start_byte() as usize..self.end_byte() as usize] + } + pub fn walk(&self) -> TreeCursor<'tree> { TreeCursor(unsafe { ffi::ts_tree_cursor_new(self.0) }, PhantomData) } @@ -461,7 +505,7 @@ impl<'a> fmt::Debug for Node<'a> { } impl<'a> TreeCursor<'a> { - pub fn node(&'a self) -> Node<'a> { + pub fn node(&self) -> Node<'a> { Node( unsafe { ffi::ts_tree_cursor_current_node(&self.0) }, PhantomData, @@ -496,6 +540,87 @@ impl<'a> Drop for TreeCursor<'a> { } } +impl<'a> TreePropertyCursor<'a> { + fn new(tree: &'a Tree, property_sheet: &'a PropertySheet) -> Self { + Self { + cursor: tree.root_node().walk(), + child_index_stack: vec![0], + state_stack: vec![0], + property_sheet, + } + } + + pub fn node(&self) -> Node<'a> { + self.cursor.node() + } + + pub fn node_properties(&self) -> &'a HashMap { + &self.property_sheet.property_sets[self.current_state().property_set_id as usize] + } + + pub fn goto_first_child(&mut self) -> bool { + if self.cursor.goto_first_child() { + let child_index = 0; + let next_state_id = { + let state = &self.current_state(); + let kind_id = self.cursor.node().kind_id(); + self.next_state(state, kind_id, child_index) + }; + self.state_stack.push(next_state_id); + self.child_index_stack.push(child_index); + true + } else { + false + } + } + + pub fn goto_next_sibling(&mut self) -> bool { + if self.cursor.goto_next_sibling() { + let child_index = self.child_index_stack.pop().unwrap() + 1; + self.state_stack.pop(); + let next_state_id = { + let state = &self.current_state(); + let kind_id = self.cursor.node().kind_id(); + self.next_state(state, kind_id, child_index) + }; + self.state_stack.push(next_state_id); + self.child_index_stack.push(child_index); + true + } else { + false + } + } + + pub fn goto_parent(&mut self) -> bool { + if self.cursor.goto_parent() { + self.state_stack.pop(); + self.child_index_stack.pop(); + true + } else { + false + } + } + + fn next_state(&self, state: &PropertyState, node_kind_id: u16, node_child_index: u32) -> u32 { + state + .transitions + .get(&node_kind_id) + .and_then(|transitions| { + for transition in transitions.iter() { + if transition.child_index == Some(node_child_index) || transition.child_index == None { + return Some(transition.state_id); + } + } + None + }) + .unwrap_or(state.default_next_state_id) + } + + fn current_state(&self) -> &PropertyState { + &self.property_sheet.states[*self.state_stack.last().unwrap() as usize] + } +} + impl Point { pub fn new(row: u32, column: u32) -> Self { Point { row, column } @@ -526,6 +651,64 @@ impl From for Point { } } +impl PropertySheet { + pub fn new(language: Language, json: &str) -> Result { + #[derive(Deserialize, Debug)] + struct PropertyTransitionJSON { + #[serde(rename = "type")] + kind: String, + named: bool, + index: Option, + state_id: u32, + } + + #[derive(Deserialize, Debug)] + struct PropertyStateJSON { + transitions: Vec, + property_set_id: u32, + default_next_state_id: u32, + } + + #[derive(Deserialize, Debug)] + struct PropertySheetJSON { + states: Vec, + property_sets: Vec>, + } + + let input: PropertySheetJSON = serde_json::from_str(json)?; + Ok(PropertySheet { + property_sets: input.property_sets, + states: input + .states + .iter() + .map(|state| { + let mut transitions = HashMap::new(); + let node_kind_count = language.node_kind_count(); + for transition in state.transitions.iter() { + for i in 0..node_kind_count { + let i = i as u16; + if language.node_kind_is_named(i) == transition.named + && transition.kind == language.node_kind_for_id(i) + { + let entry = transitions.entry(i).or_insert(Vec::new()); + entry.push(PropertyTransition { + child_index: transition.index, + state_id: transition.state_id, + }); + } + } + } + PropertyState { + transitions, + default_next_state_id: state.default_next_state_id, + property_set_id: state.property_set_id, + } + }) + .collect(), + }) + } +} + #[cfg(test)] mod tests { use super::*; @@ -600,11 +783,11 @@ mod tests { let tree = parser .parse_str( " - struct Stuff { - a: A; - b: Option, - } - ", + struct Stuff { + a: A; + b: Option, + } + ", None, ) .unwrap(); @@ -628,6 +811,103 @@ mod tests { assert_eq!(cursor.node().is_named(), true); } + #[test] + fn test_tree_property_matching() { + let mut parser = Parser::new(); + parser.set_language(rust()).unwrap(); + let tree = parser.parse_str("fn f1() { f2(); }", None).unwrap(); + + let property_sheet = PropertySheet::new( + rust(), + r##" + { + "states": [ + { + "transitions": [ + {"type": "call_expression", "named": true, "state_id": 1}, + {"type": "function_item", "named": true, "state_id": 2} + ], + "default_next_state_id": 0, + "property_set_id": 0 + }, + { + "transitions": [ + {"type": "identifier", "named": true, "state_id": 3} + ], + "default_next_state_id": 0, + "property_set_id": 0 + }, + { + "transitions": [ + {"type": "identifier", "named": true, "state_id": 4} + ], + "default_next_state_id": 0, + "property_set_id": 0 + }, + { + "transitions": [], + "default_next_state_id": 0, + "property_set_id": 1 + }, + { + "transitions": [], + "default_next_state_id": 0, + "property_set_id": 2 + } + ], + "property_sets": [ + {}, + {"reference": "function"}, + {"define": "function"} + ] + } + "##, + ) + .unwrap(); + + let mut cursor = tree.walk_with_properties(&property_sheet); + assert_eq!(cursor.node().kind(), "source_file"); + assert_eq!(*cursor.node_properties(), HashMap::new()); + + assert!(cursor.goto_first_child()); + assert_eq!(cursor.node().kind(), "function_item"); + assert_eq!(*cursor.node_properties(), HashMap::new()); + + assert!(cursor.goto_first_child()); + assert_eq!(cursor.node().kind(), "fn"); + assert_eq!(*cursor.node_properties(), HashMap::new()); + assert!(!cursor.goto_first_child()); + + assert!(cursor.goto_next_sibling()); + assert_eq!(cursor.node().kind(), "identifier"); + assert_eq!(cursor.node_properties()["define"], "function"); + assert!(!cursor.goto_first_child()); + + assert!(cursor.goto_next_sibling()); + assert_eq!(cursor.node().kind(), "parameters"); + assert_eq!(*cursor.node_properties(), HashMap::new()); + + assert!(cursor.goto_first_child()); + assert_eq!(cursor.node().kind(), "("); + assert!(cursor.goto_next_sibling()); + assert_eq!(cursor.node().kind(), ")"); + assert_eq!(*cursor.node_properties(), HashMap::new()); + + assert!(cursor.goto_parent()); + assert!(cursor.goto_next_sibling()); + assert_eq!(cursor.node().kind(), "block"); + assert_eq!(*cursor.node_properties(), HashMap::new()); + + assert!(cursor.goto_first_child()); + assert!(cursor.goto_next_sibling()); + assert_eq!(cursor.node().kind(), "call_expression"); + assert_eq!(*cursor.node_properties(), HashMap::new()); + + assert!(cursor.goto_first_child()); + assert_eq!(cursor.node().kind(), "identifier"); + assert_eq!(cursor.node_properties()["reference"], "function"); + } + #[test] fn test_custom_utf8_input() { let mut parser = Parser::new(); From afe722358236dfb1389471a1037531b7c5422d0f Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 20 Nov 2018 15:56:16 -0800 Subject: [PATCH 41/58] Upgrade Tree-sitter, use single source file in build script --- build.rs | 25 ++----------------------- vendor/tree-sitter | 2 +- 2 files changed, 3 insertions(+), 24 deletions(-) diff --git a/build.rs b/build.rs index 7d9ee83e..add3bec7 100644 --- a/build.rs +++ b/build.rs @@ -13,29 +13,8 @@ fn main() { .flag_if_supported("-Wno-unused-parameter") .include(root_path.join("src")) .include(root_path.join("include")) - .include(root_path.join("externals").join("utf8proc")); - - let source_filenames = [ - "get_changed_ranges.c", - "language.c", - "lexer.c", - "node.c", - "parser.c", - "stack.c", - "subtree.c", - "tree_cursor.c", - "tree.c", - "utf16.c", - ]; - - config.files(source_filenames.iter().map(|source_filename| { - root_path - .join("src") - .join("runtime") - .join(&source_filename) - })); - - config.file(root_path.join("externals").join("utf8proc").join("utf8proc.c")); + .include(root_path.join("externals").join("utf8proc")) + .file(root_path.join("src").join("runtime").join("runtime.c")); if env::var("RUST_TREE_SITTER_TEST").is_ok() { let parser_dir: PathBuf = ["fixtures", "tree-sitter-rust", "src"].iter().collect(); diff --git a/vendor/tree-sitter b/vendor/tree-sitter index 16376c43..6b8e5bd1 160000 --- a/vendor/tree-sitter +++ b/vendor/tree-sitter @@ -1 +1 @@ -Subproject commit 16376c43f5cc75bbc5297e6d5716bd94d55ccc05 +Subproject commit 6b8e5bd1f96ab63f17873ef9f7a72569a421810f From 8fdcf84ff3396e4c8fc8ee4cdc9e37ebe9f126cf Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 20 Nov 2018 16:00:45 -0800 Subject: [PATCH 42/58] 0.3.2 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 485d369e..2c92acc5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "tree-sitter" description = "Rust bindings to the Tree-sitter parsing library" -version = "0.3.1" +version = "0.3.2" authors = ["Max Brunsfeld "] build = "build.rs" license = "MIT" From a741265ead8dc67de991046d295e2f316681cce0 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 28 Nov 2018 17:26:16 -0800 Subject: [PATCH 43/58] Replace all u32s in the API with usizes Co-Authored-By: Timothy Clem --- src/lib.rs | 146 +++++++++++++++++++++++++++-------------------------- 1 file changed, 75 insertions(+), 71 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 19b9a670..fa3d970e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -27,29 +27,36 @@ type Logger<'a> = Box; #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] pub struct Point { - pub row: u32, - pub column: u32, + pub row: usize, + pub column: usize, } #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub struct InputEdit { - pub start_byte: u32, - pub old_end_byte: u32, - pub new_end_byte: u32, + pub start_byte: usize, + pub old_end_byte: usize, + pub new_end_byte: usize, pub start_position: Point, pub old_end_position: Point, pub new_end_position: Point, } struct PropertyTransition { - state_id: u32, - child_index: Option, + state_id: usize, + child_index: Option, + text_regex: Option, } struct PropertyState { transitions: HashMap>, - property_set_id: u32, - default_next_state_id: u32, + property_set_id: usize, + default_next_state_id: usize, +} + +#[derive(Debug)] +pub enum PropertySheetError { + InvalidJSON(serde_json::Error), + InvalidRegex(regex::Error) } pub struct PropertySheet { @@ -67,9 +74,10 @@ pub struct TreeCursor<'a>(ffi::TSTreeCursor, PhantomData<&'a ()>); pub struct TreePropertyCursor<'a> { cursor: TreeCursor<'a>, - state_stack: Vec, - child_index_stack: Vec, + state_stack: Vec, + child_index_stack: Vec, property_sheet: &'a PropertySheet, + source: &'a str, } impl Language { @@ -165,10 +173,10 @@ impl Parser { pub fn parse_str(&mut self, input: &str, old_tree: Option<&Tree>) -> Option { let bytes = input.as_bytes(); - self.parse_utf8(&mut |offset, _| &bytes[(offset as usize)..], old_tree) + self.parse_utf8(&mut |offset, _| &bytes[offset..], old_tree) } - pub fn parse_utf8<'a, T: FnMut(u32, Point) -> &'a [u8]>( + pub fn parse_utf8<'a, T: FnMut(usize, Point) -> &'a [u8]>( &mut self, input: &mut T, old_tree: Option<&Tree>, @@ -182,7 +190,7 @@ impl Parser { ) } - pub fn parse_utf16<'a, T: 'a + FnMut(u32, Point) -> &'a [u16]>( + pub fn parse_utf16<'a, T: 'a + FnMut(usize, Point) -> &'a [u16]>( &mut self, input: &mut T, old_tree: Option<&Tree>, @@ -239,19 +247,19 @@ impl Parser { unsafe { ffi::ts_parser_set_operation_limit(self.0, limit) } } - fn parse_utf8_ptr (*const u8, usize)>( + fn parse_utf8_ptr (*const u8, usize)>( &mut self, input: &mut T, old_tree: Option<&Tree>, ) -> Option { - unsafe extern "C" fn read (*const u8, usize)>( + unsafe extern "C" fn read (*const u8, usize)>( payload: *mut c_void, byte_offset: u32, position: ffi::TSPoint, bytes_read: *mut u32, ) -> *const c_char { let input = (payload as *mut T).as_mut().unwrap(); - let (ptr, length) = (*input)(byte_offset, position.into()); + let (ptr, length) = (*input)(byte_offset as usize, position.into()); *bytes_read = length as u32; return ptr as *const c_char; }; @@ -271,12 +279,12 @@ impl Parser { } } - fn parse_utf16_ptr (*const u16, usize)>( + fn parse_utf16_ptr (*const u16, usize)>( &mut self, input: &mut T, old_tree: Option<&Tree>, ) -> Option { - unsafe extern "C" fn read (*const u16, usize)>( + unsafe extern "C" fn read (*const u16, usize)>( payload: *mut c_void, byte_offset: u32, position: ffi::TSPoint, @@ -284,10 +292,10 @@ impl Parser { ) -> *const c_char { let input = (payload as *mut T).as_mut().unwrap(); let (ptr, length) = (*input)( - byte_offset, + byte_offset as usize, Point { - row: position.row, - column: position.column / 2, + row: position.row as usize, + column: position.column as usize / 2, }, ); *bytes_read = length as u32 * 2; @@ -326,9 +334,9 @@ impl Tree { pub fn edit(&mut self, edit: &InputEdit) { let edit = ffi::TSInputEdit { - start_byte: edit.start_byte, - old_end_byte: edit.old_end_byte, - new_end_byte: edit.new_end_byte, + start_byte: edit.start_byte as u32, + old_end_byte: edit.old_end_byte as u32, + new_end_byte: edit.new_end_byte as u32, start_point: edit.start_position.into(), old_end_point: edit.old_end_position.into(), new_end_point: edit.new_end_position.into(), @@ -399,44 +407,38 @@ impl<'tree> Node<'tree> { unsafe { ffi::ts_node_has_error(self.0) } } - pub fn start_byte(&self) -> u32 { - unsafe { ffi::ts_node_start_byte(self.0) } + pub fn start_byte(&self) -> usize { + unsafe { ffi::ts_node_start_byte(self.0) as usize } } - pub fn end_byte(&self) -> u32 { - unsafe { ffi::ts_node_end_byte(self.0) } + pub fn end_byte(&self) -> usize { + unsafe { ffi::ts_node_end_byte(self.0) as usize } } pub fn start_position(&self) -> Point { let result = unsafe { ffi::ts_node_start_point(self.0) }; - Point { - row: result.row, - column: result.column, - } + result.into() } pub fn end_position(&self) -> Point { let result = unsafe { ffi::ts_node_end_point(self.0) }; - Point { - row: result.row, - column: result.column, - } + result.into() } - pub fn child(&self, i: u32) -> Option { - Self::new(unsafe { ffi::ts_node_child(self.0, i) }) + pub fn child(&self, i: usize) -> Option { + Self::new(unsafe { ffi::ts_node_child(self.0, i as u32) }) } - pub fn child_count(&self) -> u32 { - unsafe { ffi::ts_node_child_count(self.0) } + pub fn child_count(&self) -> usize { + unsafe { ffi::ts_node_child_count(self.0) as usize } } - pub fn named_child<'a>(&'a self, i: u32) -> Option { - Self::new(unsafe { ffi::ts_node_named_child(self.0, i) }) + pub fn named_child<'a>(&'a self, i: usize) -> Option { + Self::new(unsafe { ffi::ts_node_named_child(self.0, i as u32) }) } - pub fn named_child_count(&self) -> u32 { - unsafe { ffi::ts_node_named_child_count(self.0) } + pub fn named_child_count(&self) -> usize { + unsafe { ffi::ts_node_named_child_count(self.0) as usize } } pub fn parent(&self) -> Option { @@ -474,11 +476,11 @@ impl<'tree> Node<'tree> { } pub fn utf8_text<'a>(&self, source: &'a str) -> Result<&'a str, str::Utf8Error> { - str::from_utf8(&source.as_bytes()[self.start_byte() as usize..self.end_byte() as usize]) + str::from_utf8(&source.as_bytes()[self.start_byte()..self.end_byte()]) } pub fn utf16_text<'a>(&self, source: &'a [u16]) -> &'a [u16] { - &source[self.start_byte() as usize..self.end_byte() as usize] + &source[self.start_byte()..self.end_byte()] } pub fn walk(&self) -> TreeCursor<'tree> { @@ -524,12 +526,12 @@ impl<'a> TreeCursor<'a> { return unsafe { ffi::ts_tree_cursor_goto_next_sibling(&mut self.0) }; } - pub fn goto_first_child_for_index(&mut self, index: u32) -> Option { - let result = unsafe { ffi::ts_tree_cursor_goto_first_child_for_byte(&mut self.0, index) }; + pub fn goto_first_child_for_index(&mut self, index: usize) -> Option { + let result = unsafe { ffi::ts_tree_cursor_goto_first_child_for_byte(&mut self.0, index as u32) }; if result < 0 { None } else { - Some(result as u32) + Some(result as usize) } } } @@ -541,12 +543,13 @@ impl<'a> Drop for TreeCursor<'a> { } impl<'a> TreePropertyCursor<'a> { - fn new(tree: &'a Tree, property_sheet: &'a PropertySheet) -> Self { + fn new(tree: &'a Tree, property_sheet: &'a PropertySheet, source: &'a str) -> Self { Self { cursor: tree.root_node().walk(), child_index_stack: vec![0], state_stack: vec![0], property_sheet, + source, } } @@ -555,7 +558,7 @@ impl<'a> TreePropertyCursor<'a> { } pub fn node_properties(&self) -> &'a HashMap { - &self.property_sheet.property_sets[self.current_state().property_set_id as usize] + &self.property_sheet.property_sets[self.current_state().property_set_id] } pub fn goto_first_child(&mut self) -> bool { @@ -601,7 +604,7 @@ impl<'a> TreePropertyCursor<'a> { } } - fn next_state(&self, state: &PropertyState, node_kind_id: u16, node_child_index: u32) -> u32 { + fn next_state(&self, state: &PropertyState, node_kind_id: u16, node_child_index: usize) -> usize { state .transitions .get(&node_kind_id) @@ -617,12 +620,12 @@ impl<'a> TreePropertyCursor<'a> { } fn current_state(&self) -> &PropertyState { - &self.property_sheet.states[*self.state_stack.last().unwrap() as usize] + &self.property_sheet.states[*self.state_stack.last().unwrap()] } } impl Point { - pub fn new(row: u32, column: u32) -> Self { + pub fn new(row: usize, column: usize) -> Self { Point { row, column } } } @@ -636,8 +639,8 @@ impl fmt::Display for Point { impl Into for Point { fn into(self) -> ffi::TSPoint { ffi::TSPoint { - row: self.row, - column: self.column, + row: self.row as u32, + column: self.column as u32, } } } @@ -645,28 +648,29 @@ impl Into for Point { impl From for Point { fn from(point: ffi::TSPoint) -> Self { Self { - row: point.row, - column: point.column, + row: point.row as usize, + column: point.column as usize, } } } impl PropertySheet { - pub fn new(language: Language, json: &str) -> Result { + pub fn new(language: Language, json: &str) -> Result { #[derive(Deserialize, Debug)] struct PropertyTransitionJSON { #[serde(rename = "type")] kind: String, named: bool, - index: Option, - state_id: u32, + index: Option, + text: Option, + state_id: usize, } #[derive(Deserialize, Debug)] struct PropertyStateJSON { transitions: Vec, - property_set_id: u32, - default_next_state_id: u32, + property_set_id: usize, + default_next_state_id: usize, } #[derive(Deserialize, Debug)] @@ -918,8 +922,8 @@ mod tests { let tree = parser .parse_utf8( &mut |_, position| { - let row = position.row as usize; - let column = position.column as usize; + let row = position.row; + let column = position.column; if row < lines.len() { if column < lines[row].as_bytes().len() { &lines[row].as_bytes()[column..] @@ -958,8 +962,8 @@ mod tests { let tree = parser .parse_utf16( &mut |_, position| { - let row = position.row as usize; - let column = position.column as usize; + let row = position.row; + let column = position.column; if row < lines.len() { if column < lines[row].len() { &lines[row][column..] @@ -1004,7 +1008,7 @@ mod tests { let mut tree = parser .parse_utf8( &mut |offset, _| { - let offset = offset as usize; + let offset = offset; if offset < input_bytes.len() { let result = &input_bytes[offset..offset + 1]; input_bytes_read.extend(result.iter()); @@ -1043,7 +1047,7 @@ mod tests { let tree = parser .parse_utf8( &mut |offset, _| { - let offset = offset as usize; + let offset = offset; if offset < input_bytes.len() { let result = &input_bytes[offset..offset + 1]; input_bytes_read.extend(result.iter()); @@ -1101,7 +1105,7 @@ mod tests { tree_clone.edit(&InputEdit { start_byte: 0, old_end_byte: 0, - new_end_byte: prepended_source.len() as u32, + new_end_byte: prepended_source.len(), start_position: Point::new(0, 0), old_end_position: Point::new(0, 0), new_end_position: Point::new(prepend_line_count, 0), From d5b53cde7dded6ebbc0d78ed131e9a10f2a62c5b Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 28 Nov 2018 17:26:48 -0800 Subject: [PATCH 44/58] Respect the `:text` pseudo-class in TreePropertyCursor Co-Authored-By: Timothy Clem --- Cargo.toml | 1 + src/lib.rs | 87 ++++++++++++++++++++++++++++++++++-------------------- 2 files changed, 56 insertions(+), 32 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 2c92acc5..0ffee772 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,6 +21,7 @@ include = [ ] [dependencies] +regex = "1" serde = "1.0" serde_json = "1.0" serde_derive = "1.0" diff --git a/src/lib.rs b/src/lib.rs index fa3d970e..a76ed115 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3,7 +3,9 @@ mod ffi; #[macro_use] extern crate serde_derive; extern crate serde_json; +extern crate regex; +use regex::Regex; use std::collections::HashMap; use std::ffi::CStr; use std::fmt; @@ -351,8 +353,9 @@ impl Tree { pub fn walk_with_properties<'a>( &'a self, property_sheet: &'a PropertySheet, + source: &'a str, ) -> TreePropertyCursor<'a> { - TreePropertyCursor::new(self, property_sheet) + TreePropertyCursor::new(self, property_sheet, source) } } @@ -610,9 +613,23 @@ impl<'a> TreePropertyCursor<'a> { .get(&node_kind_id) .and_then(|transitions| { for transition in transitions.iter() { - if transition.child_index == Some(node_child_index) || transition.child_index == None { - return Some(transition.state_id); + if let Some(text_regex) = transition.text_regex.as_ref() { + let node = self.cursor.node(); + let text = &self.source.as_bytes()[node.start_byte()..node.end_byte()]; + if let Ok(text) = str::from_utf8(text) { + if !text_regex.is_match(text) { + continue; + } + } } + + if let Some(child_index) = transition.child_index { + if child_index != node_child_index { + continue; + } + } + + return Some(transition.state_id); } None }) @@ -679,36 +696,42 @@ impl PropertySheet { property_sets: Vec>, } - let input: PropertySheetJSON = serde_json::from_str(json)?; + let input: PropertySheetJSON = serde_json::from_str(json) + .map_err(|e| PropertySheetError::InvalidJSON(e))?; + let mut states = Vec::new(); + + for state in input.states.iter() { + let mut transitions = HashMap::new(); + let node_kind_count = language.node_kind_count(); + for transition in state.transitions.iter() { + for i in 0..node_kind_count { + let i = i as u16; + if language.node_kind_is_named(i) == transition.named + && transition.kind == language.node_kind_for_id(i) + { + let entry = transitions.entry(i).or_insert(Vec::new()); + let text_regex = if let Some(text) = transition.text.as_ref() { + Some(Regex::new(&text).map_err(|e| PropertySheetError::InvalidRegex(e))?) + } else { + None + }; + entry.push(PropertyTransition { + child_index: transition.index, + state_id: transition.state_id, + text_regex + }); + } + } + } + states.push(PropertyState { + transitions, + default_next_state_id: state.default_next_state_id, + property_set_id: state.property_set_id, + }); + } Ok(PropertySheet { property_sets: input.property_sets, - states: input - .states - .iter() - .map(|state| { - let mut transitions = HashMap::new(); - let node_kind_count = language.node_kind_count(); - for transition in state.transitions.iter() { - for i in 0..node_kind_count { - let i = i as u16; - if language.node_kind_is_named(i) == transition.named - && transition.kind == language.node_kind_for_id(i) - { - let entry = transitions.entry(i).or_insert(Vec::new()); - entry.push(PropertyTransition { - child_index: transition.index, - state_id: transition.state_id, - }); - } - } - } - PropertyState { - transitions, - default_next_state_id: state.default_next_state_id, - property_set_id: state.property_set_id, - } - }) - .collect(), + states, }) } } @@ -869,7 +892,7 @@ mod tests { ) .unwrap(); - let mut cursor = tree.walk_with_properties(&property_sheet); + let mut cursor = tree.walk_with_properties(&property_sheet, ""); assert_eq!(cursor.node().kind(), "source_file"); assert_eq!(*cursor.node_properties(), HashMap::new()); From c9ce314695a5bad674aed9b267b9c430411bb731 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 29 Nov 2018 16:21:01 -0800 Subject: [PATCH 45/58] Make PropertySheet generic on the properties type Co-Authored-By: Timothy Clem --- src/lib.rs | 38 +++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index a76ed115..68715879 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,7 +4,9 @@ mod ffi; extern crate serde_derive; extern crate serde_json; extern crate regex; +extern crate serde; +use serde::Deserialize; use regex::Regex; use std::collections::HashMap; use std::ffi::CStr; @@ -61,9 +63,10 @@ pub enum PropertySheetError { InvalidRegex(regex::Error) } -pub struct PropertySheet { +pub struct PropertySheet<'d, P: Deserialize<'d>> { states: Vec, - property_sets: Vec>, + property_sets: Vec

, + _phantom: &'d std::marker::PhantomData<()>, } pub struct Node<'a>(ffi::TSNode, PhantomData<&'a ()>); @@ -74,11 +77,11 @@ pub struct Tree(*mut ffi::TSTree); pub struct TreeCursor<'a>(ffi::TSTreeCursor, PhantomData<&'a ()>); -pub struct TreePropertyCursor<'a> { +pub struct TreePropertyCursor<'a, 'd, P: Deserialize<'d>> { cursor: TreeCursor<'a>, state_stack: Vec, child_index_stack: Vec, - property_sheet: &'a PropertySheet, + property_sheet: &'a PropertySheet<'d, P>, source: &'a str, } @@ -350,11 +353,11 @@ impl Tree { self.root_node().walk() } - pub fn walk_with_properties<'a>( + pub fn walk_with_properties<'a, 'd, P: Deserialize<'d>>( &'a self, - property_sheet: &'a PropertySheet, + property_sheet: &'a PropertySheet<'d, P>, source: &'a str, - ) -> TreePropertyCursor<'a> { + ) -> TreePropertyCursor<'a, 'd, P> { TreePropertyCursor::new(self, property_sheet, source) } } @@ -545,8 +548,8 @@ impl<'a> Drop for TreeCursor<'a> { } } -impl<'a> TreePropertyCursor<'a> { - fn new(tree: &'a Tree, property_sheet: &'a PropertySheet, source: &'a str) -> Self { +impl<'a, 'd, P: Deserialize<'d>> TreePropertyCursor<'a, 'd, P> { + fn new(tree: &'a Tree, property_sheet: &'a PropertySheet<'d, P>, source: &'a str) -> Self { Self { cursor: tree.root_node().walk(), child_index_stack: vec![0], @@ -560,7 +563,7 @@ impl<'a> TreePropertyCursor<'a> { self.cursor.node() } - pub fn node_properties(&self) -> &'a HashMap { + pub fn node_properties(&self) -> &'a P { &self.property_sheet.property_sets[self.current_state().property_set_id] } @@ -671,8 +674,8 @@ impl From for Point { } } -impl PropertySheet { - pub fn new(language: Language, json: &str) -> Result { +impl<'a, P: Deserialize<'a>> PropertySheet<'a, P> { + pub fn new(language: Language, json: &'a str) -> Result { #[derive(Deserialize, Debug)] struct PropertyTransitionJSON { #[serde(rename = "type")] @@ -691,12 +694,12 @@ impl PropertySheet { } #[derive(Deserialize, Debug)] - struct PropertySheetJSON { + struct PropertySheetJSON

{ states: Vec, - property_sets: Vec>, + property_sets: Vec

, } - let input: PropertySheetJSON = serde_json::from_str(json) + let input: PropertySheetJSON

= serde_json::from_str(json) .map_err(|e| PropertySheetError::InvalidJSON(e))?; let mut states = Vec::new(); @@ -729,9 +732,10 @@ impl PropertySheet { property_set_id: state.property_set_id, }); } - Ok(PropertySheet { + Ok(Self { property_sets: input.property_sets, states, + _phantom: &std::marker::PhantomData, }) } } @@ -844,7 +848,7 @@ mod tests { parser.set_language(rust()).unwrap(); let tree = parser.parse_str("fn f1() { f2(); }", None).unwrap(); - let property_sheet = PropertySheet::new( + let property_sheet = PropertySheet::>::new( rust(), r##" { From 11610e1df66214a1bf58bff2565b52d270bf0d5b Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 29 Nov 2018 20:51:50 -0800 Subject: [PATCH 46/58] Eliminate deserializer lifetime on PropertySheet The PropertySheet is intended to be a long-lived object, whereas its JSON source is not needed once the property sheet is instantiated. Co-Authored-By: Timothy Clem --- src/lib.rs | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 68715879..681af7fb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -6,7 +6,7 @@ extern crate serde_json; extern crate regex; extern crate serde; -use serde::Deserialize; +use serde::de::DeserializeOwned; use regex::Regex; use std::collections::HashMap; use std::ffi::CStr; @@ -63,10 +63,9 @@ pub enum PropertySheetError { InvalidRegex(regex::Error) } -pub struct PropertySheet<'d, P: Deserialize<'d>> { +pub struct PropertySheet> { states: Vec, property_sets: Vec

, - _phantom: &'d std::marker::PhantomData<()>, } pub struct Node<'a>(ffi::TSNode, PhantomData<&'a ()>); @@ -77,11 +76,11 @@ pub struct Tree(*mut ffi::TSTree); pub struct TreeCursor<'a>(ffi::TSTreeCursor, PhantomData<&'a ()>); -pub struct TreePropertyCursor<'a, 'd, P: Deserialize<'d>> { +pub struct TreePropertyCursor<'a, P: 'a + DeserializeOwned> { cursor: TreeCursor<'a>, state_stack: Vec, child_index_stack: Vec, - property_sheet: &'a PropertySheet<'d, P>, + property_sheet: &'a PropertySheet

, source: &'a str, } @@ -353,11 +352,11 @@ impl Tree { self.root_node().walk() } - pub fn walk_with_properties<'a, 'd, P: Deserialize<'d>>( + pub fn walk_with_properties<'a, P: DeserializeOwned>( &'a self, - property_sheet: &'a PropertySheet<'d, P>, + property_sheet: &'a PropertySheet

, source: &'a str, - ) -> TreePropertyCursor<'a, 'd, P> { + ) -> TreePropertyCursor<'a, P> { TreePropertyCursor::new(self, property_sheet, source) } } @@ -548,8 +547,8 @@ impl<'a> Drop for TreeCursor<'a> { } } -impl<'a, 'd, P: Deserialize<'d>> TreePropertyCursor<'a, 'd, P> { - fn new(tree: &'a Tree, property_sheet: &'a PropertySheet<'d, P>, source: &'a str) -> Self { +impl<'a, P: DeserializeOwned> TreePropertyCursor<'a, P> { + fn new(tree: &'a Tree, property_sheet: &'a PropertySheet

, source: &'a str) -> Self { Self { cursor: tree.root_node().walk(), child_index_stack: vec![0], @@ -674,8 +673,8 @@ impl From for Point { } } -impl<'a, P: Deserialize<'a>> PropertySheet<'a, P> { - pub fn new(language: Language, json: &'a str) -> Result { +impl PropertySheet

{ + pub fn new(language: Language, json: &str) -> Result { #[derive(Deserialize, Debug)] struct PropertyTransitionJSON { #[serde(rename = "type")] @@ -735,7 +734,6 @@ impl<'a, P: Deserialize<'a>> PropertySheet<'a, P> { Ok(Self { property_sets: input.property_sets, states, - _phantom: &std::marker::PhantomData, }) } } From fbb220f19302ff44f172b6a48362ece7f62167ee Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 3 Dec 2018 10:43:58 -0800 Subject: [PATCH 47/58] Add test for regexes in property sheets --- src/lib.rs | 112 +++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 100 insertions(+), 12 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 681af7fb..724a08bd 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -844,9 +844,18 @@ mod tests { fn test_tree_property_matching() { let mut parser = Parser::new(); parser.set_language(rust()).unwrap(); - let tree = parser.parse_str("fn f1() { f2(); }", None).unwrap(); + let source_code = "fn f1() { f2(); }"; + let tree = parser.parse_str(source_code, None).unwrap(); - let property_sheet = PropertySheet::>::new( + #[derive(Debug, Deserialize, PartialEq, Eq)] + struct Properties { + reference: Option, + define: Option, + } + + let empty_properties = Properties { reference: None, define: None }; + + let property_sheet = PropertySheet::::new( rust(), r##" { @@ -894,47 +903,126 @@ mod tests { ) .unwrap(); - let mut cursor = tree.walk_with_properties(&property_sheet, ""); + let mut cursor = tree.walk_with_properties(&property_sheet, source_code); assert_eq!(cursor.node().kind(), "source_file"); - assert_eq!(*cursor.node_properties(), HashMap::new()); + assert_eq!(*cursor.node_properties(), empty_properties); assert!(cursor.goto_first_child()); assert_eq!(cursor.node().kind(), "function_item"); - assert_eq!(*cursor.node_properties(), HashMap::new()); + assert_eq!(*cursor.node_properties(), empty_properties); assert!(cursor.goto_first_child()); assert_eq!(cursor.node().kind(), "fn"); - assert_eq!(*cursor.node_properties(), HashMap::new()); + assert_eq!(*cursor.node_properties(), empty_properties); assert!(!cursor.goto_first_child()); assert!(cursor.goto_next_sibling()); assert_eq!(cursor.node().kind(), "identifier"); - assert_eq!(cursor.node_properties()["define"], "function"); + assert_eq!(cursor.node_properties().define, Some("function".to_owned())); assert!(!cursor.goto_first_child()); assert!(cursor.goto_next_sibling()); assert_eq!(cursor.node().kind(), "parameters"); - assert_eq!(*cursor.node_properties(), HashMap::new()); + assert_eq!(*cursor.node_properties(), empty_properties); assert!(cursor.goto_first_child()); assert_eq!(cursor.node().kind(), "("); assert!(cursor.goto_next_sibling()); assert_eq!(cursor.node().kind(), ")"); - assert_eq!(*cursor.node_properties(), HashMap::new()); + assert_eq!(*cursor.node_properties(), empty_properties); assert!(cursor.goto_parent()); assert!(cursor.goto_next_sibling()); assert_eq!(cursor.node().kind(), "block"); - assert_eq!(*cursor.node_properties(), HashMap::new()); + assert_eq!(*cursor.node_properties(), empty_properties); assert!(cursor.goto_first_child()); assert!(cursor.goto_next_sibling()); assert_eq!(cursor.node().kind(), "call_expression"); - assert_eq!(*cursor.node_properties(), HashMap::new()); + assert_eq!(*cursor.node_properties(), empty_properties); assert!(cursor.goto_first_child()); assert_eq!(cursor.node().kind(), "identifier"); - assert_eq!(cursor.node_properties()["reference"], "function"); + assert_eq!(cursor.node_properties().reference, Some("function".to_owned())); + } + + #[test] + fn test_tree_property_matching_with_regexes() { + let mut parser = Parser::new(); + parser.set_language(rust()).unwrap(); + let source_code = "fn f1() { None(a()) }"; + let tree = parser.parse_str(source_code, None).unwrap(); + + #[derive(Debug, Deserialize, PartialEq, Eq)] + struct Properties { + scope: Option, + } + + let empty_properties = Properties { scope: None }; + + let property_sheet = PropertySheet::::new( + rust(), + r##" + { + "states": [ + { + "id": 0, + "transitions": [ + {"type": "call_expression", "named": true, "state_id": 1} + ], + "default_next_state_id": 0, + "property_set_id": 0 + }, + { + "id": 1, + "transitions": [ + {"type": "identifier", "named": true, "text": "^[A-Z]", "state_id": 2}, + {"type": "identifier", "named": true, "state_id": 3} + ], + "default_next_state_id": 0, + "property_set_id": 0 + }, + { + "transitions": [], + "default_next_state_id": 0, + "property_set_id": 1 + }, + { + "transitions": [], + "default_next_state_id": 0, + "property_set_id": 2 + } + ], + "property_sets": [ + {}, + {"scope": "constructor"}, + {"scope": "function"} + ] + } + "##, + ) + .unwrap(); + + let mut cursor = tree.walk_with_properties(&property_sheet, source_code); + assert_eq!(cursor.node().kind(), "source_file"); + assert_eq!(*cursor.node_properties(), empty_properties); + + cursor.goto_first_child(); + assert!(cursor.goto_first_child()); + assert!(cursor.goto_next_sibling()); + assert!(cursor.goto_next_sibling()); + assert!(cursor.goto_next_sibling()); + assert_eq!(cursor.node().kind(), "block"); + assert_eq!(*cursor.node_properties(), empty_properties); + + assert!(cursor.goto_first_child()); + assert!(cursor.goto_next_sibling()); + assert_eq!(cursor.node().kind(), "call_expression"); + assert_eq!(*cursor.node_properties(), empty_properties); + + assert!(cursor.goto_first_child()); + assert_eq!(cursor.node().kind(), "identifier"); + assert_eq!(cursor.node_properties().scope, Some("constructor".to_owned())); } #[test] From beb60194d12b62cf70bc6b9e8652258ae07a9b44 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 3 Dec 2018 14:42:18 -0800 Subject: [PATCH 48/58] 0.3.3 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 0ffee772..f61b1583 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "tree-sitter" description = "Rust bindings to the Tree-sitter parsing library" -version = "0.3.2" +version = "0.3.3" authors = ["Max Brunsfeld "] build = "build.rs" license = "MIT" From b0a7c854a4939915703980c229093e70147a1615 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 10 Dec 2018 14:57:46 -0800 Subject: [PATCH 49/58] Avoid redundant regex complication when instantiating PropertySheets --- src/lib.rs | 39 +++++++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 724a08bd..d70dc607 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -48,7 +48,7 @@ pub struct InputEdit { struct PropertyTransition { state_id: usize, child_index: Option, - text_regex: Option, + text_regex_index: Option, } struct PropertyState { @@ -66,6 +66,7 @@ pub enum PropertySheetError { pub struct PropertySheet> { states: Vec, property_sets: Vec

, + text_regexes: Vec, } pub struct Node<'a>(ffi::TSNode, PhantomData<&'a ()>); @@ -615,11 +616,11 @@ impl<'a, P: DeserializeOwned> TreePropertyCursor<'a, P> { .get(&node_kind_id) .and_then(|transitions| { for transition in transitions.iter() { - if let Some(text_regex) = transition.text_regex.as_ref() { + if let Some(text_regex_index) = transition.text_regex_index { let node = self.cursor.node(); let text = &self.source.as_bytes()[node.start_byte()..node.end_byte()]; if let Ok(text) = str::from_utf8(text) { - if !text_regex.is_match(text) { + if !self.property_sheet.text_regexes[text_regex_index].is_match(text) { continue; } } @@ -699,28 +700,37 @@ impl PropertySheet

{ } let input: PropertySheetJSON

= serde_json::from_str(json) - .map_err(|e| PropertySheetError::InvalidJSON(e))?; + .map_err(PropertySheetError::InvalidJSON)?; let mut states = Vec::new(); + let mut text_regexes = Vec::new(); + let mut text_regex_patterns = Vec::new(); for state in input.states.iter() { let mut transitions = HashMap::new(); let node_kind_count = language.node_kind_count(); for transition in state.transitions.iter() { - for i in 0..node_kind_count { - let i = i as u16; - if language.node_kind_is_named(i) == transition.named - && transition.kind == language.node_kind_for_id(i) + let text_regex_index = if let Some(regex_pattern) = transition.text.as_ref() { + if let Some(index) = text_regex_patterns.iter().position(|r| *r == regex_pattern) { + Some(index) + } else { + text_regex_patterns.push(regex_pattern); + text_regexes.push(Regex::new(®ex_pattern).map_err(PropertySheetError::InvalidRegex)?); + Some(text_regexes.len() - 1) + } + } else { + None + }; + + for i in 0..(node_kind_count as u16) { + if + transition.kind == language.node_kind_for_id(i) && + transition.named == language.node_kind_is_named(i) { let entry = transitions.entry(i).or_insert(Vec::new()); - let text_regex = if let Some(text) = transition.text.as_ref() { - Some(Regex::new(&text).map_err(|e| PropertySheetError::InvalidRegex(e))?) - } else { - None - }; entry.push(PropertyTransition { child_index: transition.index, state_id: transition.state_id, - text_regex + text_regex_index, }); } } @@ -734,6 +744,7 @@ impl PropertySheet

{ Ok(Self { property_sets: input.property_sets, states, + text_regexes, }) } } From ba9da0a9b48dd7d374438eece53749061453fefe Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 11 Dec 2018 10:35:03 -0800 Subject: [PATCH 50/58] 0.3.4 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index f61b1583..fde4fd31 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "tree-sitter" description = "Rust bindings to the Tree-sitter parsing library" -version = "0.3.3" +version = "0.3.4" authors = ["Max Brunsfeld "] build = "build.rs" license = "MIT" From 85347541f155736e423203944903033c76993187 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 11 Dec 2018 17:30:12 -0800 Subject: [PATCH 51/58] Allow PropertySheet selectors to match the root node Co-Authored-By: Timothy Clem --- src/lib.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index d70dc607..ad31d3c4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -550,13 +550,16 @@ impl<'a> Drop for TreeCursor<'a> { impl<'a, P: DeserializeOwned> TreePropertyCursor<'a, P> { fn new(tree: &'a Tree, property_sheet: &'a PropertySheet

, source: &'a str) -> Self { - Self { + let mut result = Self { cursor: tree.root_node().walk(), child_index_stack: vec![0], state_stack: vec![0], property_sheet, source, - } + }; + let state = result.next_state(&result.current_state(), result.cursor.node().kind_id(), 0); + result.state_stack.push(state); + result } pub fn node(&self) -> Node<'a> { From 494329c93b4c54b583e68634132e1f45b383e91f Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 13 Dec 2018 10:08:25 -0800 Subject: [PATCH 52/58] Add Parser.set_included_ranges and Node.range --- src/lib.rs | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index ad31d3c4..98d2234e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -35,6 +35,14 @@ pub struct Point { pub column: usize, } +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct Range { + pub start_byte: usize, + pub end_byte: usize, + pub start_point: Point, + pub end_point: Point, +} + #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub struct InputEdit { pub start_byte: usize, @@ -252,6 +260,14 @@ impl Parser { unsafe { ffi::ts_parser_set_operation_limit(self.0, limit) } } + pub fn set_included_ranges(&mut self, ranges: &[Range]) { + let ts_ranges: Vec = + ranges.iter().cloned().map(|range| range.into()).collect(); + unsafe { + ffi::ts_parser_set_included_ranges(self.0, ts_ranges.as_ptr(), ts_ranges.len() as u32) + }; + } + fn parse_utf8_ptr (*const u8, usize)>( &mut self, input: &mut T, @@ -421,6 +437,15 @@ impl<'tree> Node<'tree> { unsafe { ffi::ts_node_end_byte(self.0) as usize } } + pub fn range(&self) -> Range { + Range { + start_byte: self.start_byte(), + end_byte: self.end_byte(), + start_point: self.start_position(), + end_point: self.end_position(), + } + } + pub fn start_position(&self) -> Point { let result = unsafe { ffi::ts_node_start_point(self.0) }; result.into() @@ -677,6 +702,17 @@ impl From for Point { } } +impl Into for Range { + fn into(self) -> ffi::TSRange { + ffi::TSRange { + start_byte: self.start_byte as u32, + end_byte: self.end_byte as u32, + start_point: self.start_point.into(), + end_point: self.end_point.into(), + } + } +} + impl PropertySheet

{ pub fn new(language: Language, json: &str) -> Result { #[derive(Deserialize, Debug)] From 4a361fbb3fafa41ffa1247501f8199938e5aab6c Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 13 Dec 2018 10:08:50 -0800 Subject: [PATCH 53/58] Implement Copy for Node --- src/lib.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/lib.rs b/src/lib.rs index 98d2234e..428e8101 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -77,6 +77,7 @@ pub struct PropertySheet> { text_regexes: Vec, } +#[derive(Clone, Copy)] pub struct Node<'a>(ffi::TSNode, PhantomData<&'a ()>); pub struct Parser(*mut ffi::TSParser); From bdd3f20522eefe01831ad9cd74002dfe95de20d1 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 13 Dec 2018 16:30:40 -0800 Subject: [PATCH 54/58] Add PropertySheet::map method --- src/lib.rs | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 428e8101..0a53e320 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -71,7 +71,7 @@ pub enum PropertySheetError { InvalidRegex(regex::Error) } -pub struct PropertySheet> { +pub struct PropertySheet

> { states: Vec, property_sets: Vec

, text_regexes: Vec, @@ -86,7 +86,7 @@ pub struct Tree(*mut ffi::TSTree); pub struct TreeCursor<'a>(ffi::TSTreeCursor, PhantomData<&'a ()>); -pub struct TreePropertyCursor<'a, P: 'a + DeserializeOwned> { +pub struct TreePropertyCursor<'a, P> { cursor: TreeCursor<'a>, state_stack: Vec, child_index_stack: Vec, @@ -370,7 +370,7 @@ impl Tree { self.root_node().walk() } - pub fn walk_with_properties<'a, P: DeserializeOwned>( + pub fn walk_with_properties<'a, P>( &'a self, property_sheet: &'a PropertySheet

, source: &'a str, @@ -574,7 +574,7 @@ impl<'a> Drop for TreeCursor<'a> { } } -impl<'a, P: DeserializeOwned> TreePropertyCursor<'a, P> { +impl<'a, P> TreePropertyCursor<'a, P> { fn new(tree: &'a Tree, property_sheet: &'a PropertySheet

, source: &'a str) -> Self { let mut result = Self { cursor: tree.root_node().walk(), @@ -714,8 +714,11 @@ impl Into for Range { } } -impl PropertySheet

{ - pub fn new(language: Language, json: &str) -> Result { +impl

PropertySheet

{ + pub fn new(language: Language, json: &str) -> Result + where + P: DeserializeOwned, + { #[derive(Deserialize, Debug)] struct PropertyTransitionJSON { #[serde(rename = "type")] @@ -787,6 +790,21 @@ impl PropertySheet

{ text_regexes, }) } + + pub fn map(self, mut f: F) -> Result, E> + where + F: FnMut(P) -> Result, + { + let mut property_sets = Vec::with_capacity(self.property_sets.len()); + for set in self.property_sets { + property_sets.push(f(set)?); + } + Ok(PropertySheet { + states: self.states, + text_regexes: self.text_regexes, + property_sets, + }) + } } #[cfg(test)] From 6d3835d292e7bc37965ad5623c3688c4862ee4b1 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 13 Dec 2018 16:32:10 -0800 Subject: [PATCH 55/58] Add Node::children method --- src/lib.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index 0a53e320..f1a83203 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -465,6 +465,12 @@ impl<'tree> Node<'tree> { unsafe { ffi::ts_node_child_count(self.0) as usize } } + pub fn children<'a>(&'a self) -> impl Iterator> + 'a { + (0..self.child_count()) + .into_iter() + .map(move |i| self.child(i).unwrap()) + } + pub fn named_child<'a>(&'a self, i: usize) -> Option { Self::new(unsafe { ffi::ts_node_named_child(self.0, i as u32) }) } From 3f1fc65a2736a573920c4139a844d99187ebb894 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 13 Dec 2018 16:32:22 -0800 Subject: [PATCH 56/58] Auto-format lib.rs --- src/lib.rs | 59 +++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 43 insertions(+), 16 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index f1a83203..65a57d16 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,12 +2,12 @@ mod ffi; #[macro_use] extern crate serde_derive; -extern crate serde_json; extern crate regex; extern crate serde; +extern crate serde_json; -use serde::de::DeserializeOwned; use regex::Regex; +use serde::de::DeserializeOwned; use std::collections::HashMap; use std::ffi::CStr; use std::fmt; @@ -68,7 +68,7 @@ struct PropertyState { #[derive(Debug)] pub enum PropertySheetError { InvalidJSON(serde_json::Error), - InvalidRegex(regex::Error) + InvalidRegex(regex::Error), } pub struct PropertySheet

> { @@ -187,7 +187,16 @@ impl Parser { pub fn parse_str(&mut self, input: &str, old_tree: Option<&Tree>) -> Option { let bytes = input.as_bytes(); - self.parse_utf8(&mut |offset, _| &bytes[offset..], old_tree) + self.parse_utf8( + &mut |offset, _| { + if offset < bytes.len() { + &bytes[offset..] + } else { + &[] + } + }, + old_tree, + ) } pub fn parse_utf8<'a, T: FnMut(usize, Point) -> &'a [u8]>( @@ -565,7 +574,8 @@ impl<'a> TreeCursor<'a> { } pub fn goto_first_child_for_index(&mut self, index: usize) -> Option { - let result = unsafe { ffi::ts_tree_cursor_goto_first_child_for_byte(&mut self.0, index as u32) }; + let result = + unsafe { ffi::ts_tree_cursor_goto_first_child_for_byte(&mut self.0, index as u32) }; if result < 0 { None } else { @@ -645,7 +655,12 @@ impl<'a, P> TreePropertyCursor<'a, P> { } } - fn next_state(&self, state: &PropertyState, node_kind_id: u16, node_child_index: usize) -> usize { + fn next_state( + &self, + state: &PropertyState, + node_kind_id: u16, + node_child_index: usize, + ) -> usize { state .transitions .get(&node_kind_id) @@ -748,8 +763,8 @@ impl

PropertySheet

{ property_sets: Vec

, } - let input: PropertySheetJSON

= serde_json::from_str(json) - .map_err(PropertySheetError::InvalidJSON)?; + let input: PropertySheetJSON

= + serde_json::from_str(json).map_err(PropertySheetError::InvalidJSON)?; let mut states = Vec::new(); let mut text_regexes = Vec::new(); let mut text_regex_patterns = Vec::new(); @@ -759,11 +774,15 @@ impl

PropertySheet

{ let node_kind_count = language.node_kind_count(); for transition in state.transitions.iter() { let text_regex_index = if let Some(regex_pattern) = transition.text.as_ref() { - if let Some(index) = text_regex_patterns.iter().position(|r| *r == regex_pattern) { + if let Some(index) = + text_regex_patterns.iter().position(|r| *r == regex_pattern) + { Some(index) } else { text_regex_patterns.push(regex_pattern); - text_regexes.push(Regex::new(®ex_pattern).map_err(PropertySheetError::InvalidRegex)?); + text_regexes.push( + Regex::new(®ex_pattern).map_err(PropertySheetError::InvalidRegex)?, + ); Some(text_regexes.len() - 1) } } else { @@ -771,9 +790,8 @@ impl

PropertySheet

{ }; for i in 0..(node_kind_count as u16) { - if - transition.kind == language.node_kind_for_id(i) && - transition.named == language.node_kind_is_named(i) + if transition.kind == language.node_kind_for_id(i) + && transition.named == language.node_kind_is_named(i) { let entry = transitions.entry(i).or_insert(Vec::new()); entry.push(PropertyTransition { @@ -928,7 +946,10 @@ mod tests { define: Option, } - let empty_properties = Properties { reference: None, define: None }; + let empty_properties = Properties { + reference: None, + define: None, + }; let property_sheet = PropertySheet::::new( rust(), @@ -1018,7 +1039,10 @@ mod tests { assert!(cursor.goto_first_child()); assert_eq!(cursor.node().kind(), "identifier"); - assert_eq!(cursor.node_properties().reference, Some("function".to_owned())); + assert_eq!( + cursor.node_properties().reference, + Some("function".to_owned()) + ); } #[test] @@ -1097,7 +1121,10 @@ mod tests { assert!(cursor.goto_first_child()); assert_eq!(cursor.node().kind(), "identifier"); - assert_eq!(cursor.node_properties().scope, Some("constructor".to_owned())); + assert_eq!( + cursor.node_properties().scope, + Some("constructor".to_owned()) + ); } #[test] From d79203f58c7e3bb06232385a6da701ed5dfde739 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 13 Dec 2018 16:42:46 -0800 Subject: [PATCH 57/58] Add test script --- script/test.sh | 3 +++ 1 file changed, 3 insertions(+) create mode 100755 script/test.sh diff --git a/script/test.sh b/script/test.sh new file mode 100755 index 00000000..eb6183c0 --- /dev/null +++ b/script/test.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +RUST_TREE_SITTER_TEST=1 cargo test $@ From 7bd9eaa97065c3153ae44d1f219d3bfc741e82a6 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 13 Dec 2018 16:43:44 -0800 Subject: [PATCH 58/58] 0.3.5 --- Cargo.toml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index fde4fd31..7f0458ec 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,9 +1,8 @@ [package] name = "tree-sitter" description = "Rust bindings to the Tree-sitter parsing library" -version = "0.3.4" +version = "0.3.5" authors = ["Max Brunsfeld "] -build = "build.rs" license = "MIT" readme = "README.md" keywords = ["incremental", "parsing"]