diff --git a/.gitignore b/.gitignore index a9d37c56..fbd4fda0 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ target Cargo.lock +fixtures/tree-sitter-rust diff --git a/.travis.yml b/.travis.yml index 32e3a71f..10fcfe94 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,6 +3,12 @@ language: rust rust: - stable +env: + - RUST_TREE_SITTER_TEST=1 + +before_install: + - ./script/fetch-test-fixtures.sh + branches: only: - master diff --git a/Cargo.toml b/Cargo.toml index 0a93febe..e20d40aa 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,7 +9,5 @@ include = [ "vendor/tree-sitter/externals/utf8proc/utf8proc*" ] -[dependencies] - [build-dependencies] cc = "1.0" diff --git a/build.rs b/build.rs index ad62f3a1..fa8b41ea 100644 --- a/build.rs +++ b/build.rs @@ -1,10 +1,17 @@ extern crate cc; +use std::env; use std::path::Path; - fn main() { - let dir_path = Path::new("vendor/tree-sitter/src/runtime"); + let root_path = Path::new("vendor/tree-sitter"); + + let mut config = cc::Build::new(); + config.flag_if_supported("-std=c99"); + config.flag_if_supported("-Wno-unused-parameter"); + config.include(root_path.join(Path::new("src"))); + config.include(root_path.join(Path::new("include"))); + config.include(root_path.join(Path::new("externals/utf8proc"))); let source_filenames = [ "get_changed_ranges.c", @@ -19,16 +26,18 @@ fn main() { "utf16.c", ]; - let mut config = cc::Build::new(); - config.include("vendor/tree-sitter/src"); - config.include("vendor/tree-sitter/include"); - config.include("vendor/tree-sitter/externals/utf8proc"); - config.flag_if_supported("-std=c99"); - config.flag_if_supported("-Wno-unused-parameter"); + config.files(source_filenames.iter().map(|source_filename| { + root_path + .join(Path::new(&"src/runtime")) + .join(Path::new(&source_filename)) + })); - for source_filename in source_filenames.iter() { - let source_path = dir_path.join(Path::new(&source_filename)); - config.file(&source_path.to_str().unwrap()); + config.file(root_path.join(Path::new("externals/utf8proc/utf8proc.c"))); + + if env::var("RUST_TREE_SITTER_TEST").is_ok() { + let parser_dir = Path::new("fixtures/tree-sitter-rust/src"); + config.file(parser_dir.join("parser.c")); + config.file(parser_dir.join("scanner.c")); } config.compile("libruntime.a") diff --git a/fixtures/.gitkeep b/fixtures/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/script/fetch-test-fixtures.sh b/script/fetch-test-fixtures.sh new file mode 100755 index 00000000..24cc316a --- /dev/null +++ b/script/fetch-test-fixtures.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +grammar_dir='fixtures/tree-sitter-rust' +grammar_url='https://github.com/tree-sitter/tree-sitter-rust' + +if [ ! -d $grammar_dir ]; then + git clone $grammar_url $grammar_dir --depth=1 +fi + +( + cd $grammar_dir; + git fetch origin master --depth=1 + git reset --hard origin/master; +) diff --git a/src/lib.rs b/src/lib.rs index ef11757a..fa1db0f9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -8,19 +8,19 @@ use std::ptr; #[derive(Clone, Copy)] pub struct Symbol(ffi::TSSymbol); -#[derive(Clone, Copy)] -pub struct Language(*const ffi::TSLanguage); +pub type Language = *const ffi::TSLanguage; pub trait Utf16Input { - fn read(&self) -> &[u16]; - fn seek(&self, u32, Point); + fn read(&mut self) -> &[u16]; + fn seek(&mut self, u32, Point); } pub trait Utf8Input { - fn read(&self) -> &[u8]; - fn seek(&self, u32, Point); + fn read(&mut self) -> &[u8]; + fn seek(&mut self, u32, Point); } +#[derive(Debug, PartialEq, Eq)] pub enum LogType { Parse, Lex, @@ -50,6 +50,11 @@ pub struct Tree(*mut ffi::TSTree, ffi::TSInputEncoding); pub struct TreeCursor<'a>(ffi::TSTreeCursor, PhantomData<&'a ()>); +struct FlatInput<'a> { + bytes: &'a [u8], + offset: usize, +} + impl Parser { pub fn new() -> Parser { unsafe { @@ -60,11 +65,11 @@ impl Parser { pub fn set_language(&mut self, language: Language) { unsafe { - ffi::ts_parser_set_language(self.0, language.0); + ffi::ts_parser_set_language(self.0, language); } } - pub fn set_logger ()>(&mut self, logger: &mut F) { + pub fn set_logger ()>(&mut self, logger: Option<&mut F>) { unsafe extern "C" fn log ()>( payload: *mut c_void, c_log_type: ffi::TSLogType, @@ -81,14 +86,24 @@ impl Parser { } }; - let c_logger = ffi::TSLogger { - payload: logger as *mut F as *mut c_void, - log: Some(log::), - }; + let c_logger; + if let Some(logger) = logger { + c_logger = ffi::TSLogger { + payload: logger as *mut F as *mut c_void, + log: Some(log::), + }; + } else { + c_logger = ffi::TSLogger { payload: ptr::null_mut(), log: None }; + } unsafe { ffi::ts_parser_set_logger(self.0, c_logger) }; } + pub fn parse_str(&mut self, input: &str, old_tree: Option) -> Option { + let mut input = FlatInput { bytes: input.as_bytes(), offset: 0}; + self.parse_utf8(&mut input, old_tree) + } + pub fn parse_utf8( &mut self, input: &mut T, @@ -239,9 +254,7 @@ impl<'a> Node<'a> { } pub fn name(&self) -> &'static str { - unsafe { CStr::from_ptr(ffi::ts_node_type(self.0)) } - .to_str() - .unwrap() + unsafe { CStr::from_ptr(ffi::ts_node_type(self.0)) }.to_str().unwrap() } pub fn start_index(&self) -> u32 { @@ -272,11 +285,24 @@ impl<'a> Node<'a> { Self::new(unsafe { ffi::ts_node_child(self.0, i) }) } + pub fn child_count(&self) -> u32 { + unsafe { ffi::ts_node_child_count(self.0) } + } + pub fn parent(&self) -> Option { Self::new(unsafe { ffi::ts_node_parent(self.0) }) } + + pub fn to_sexp(&self) -> String { + let c_string = unsafe { ffi::ts_node_string(self.0) }; + let result = unsafe { CStr::from_ptr(c_string) }.to_str().unwrap().to_string(); + unsafe { free(c_string as *mut c_void) }; + result + } } +extern "C" { fn free(pointer: *mut c_void); } + impl<'a> TreeCursor<'a> { fn node(&'a self) -> Node<'a> { Node( @@ -322,8 +348,63 @@ impl Into for Point { } } +impl<'a> Utf8Input for FlatInput<'a> { + fn read(&mut self) -> &[u8] { + let result = &self.bytes[self.offset..]; + self.offset = self.bytes.len(); + result + } + + fn seek(&mut self, offset: u32, _position: Point) { + self.offset = offset as usize; + } +} + #[cfg(test)] mod tests { + use super::*; + + fn rust() -> Language { unsafe { tree_sitter_rust() } } + extern "C" { fn tree_sitter_rust() -> Language; } + #[test] - fn it_works() {} + fn test_basic_parsing() { + let mut parser = Parser::new(); + parser.set_language(rust()); + + let tree = parser.parse_str(" + struct Stuff {} + fn main() {} + ", None).unwrap(); + + let root_node = tree.root_node(); + assert_eq!(root_node.name(), "source_file"); + + assert_eq!( + root_node.to_sexp(), + "(source_file (struct_item (type_identifier) (field_declaration_list)) (function_item (identifier) (parameters) (block)))" + ); + + let struct_node = root_node.child(0).unwrap(); + assert_eq!(struct_node.name(), "struct_item"); + } + + #[test] + fn test_logging() { + let mut parser = Parser::new(); + parser.set_language(rust()); + + let mut messages = Vec::new(); + parser.set_logger(Some(&mut |log_type, message| { + messages.push((log_type, message.to_string())); + })); + + parser.parse_str(" + struct Stuff {} + fn main() {} + ", None).unwrap(); + + assert!(messages.contains(&(LogType::Parse, "reduce sym:struct_item, child_count:3".to_string()))); + assert!(messages.contains(&(LogType::Lex, "skip character:' '".to_string()))); + } }