Get basic parsing working, add some unit tests

This commit is contained in:
Max Brunsfeld 2018-05-17 17:16:35 -07:00
parent ead0e31262
commit 08217fff8d
7 changed files with 138 additions and 29 deletions

1
.gitignore vendored
View file

@ -1,2 +1,3 @@
target
Cargo.lock
fixtures/tree-sitter-rust

View file

@ -3,6 +3,12 @@ language: rust
rust:
- stable
env:
- RUST_TREE_SITTER_TEST=1
before_install:
- ./script/fetch-test-fixtures.sh
branches:
only:
- master

View file

@ -9,7 +9,5 @@ include = [
"vendor/tree-sitter/externals/utf8proc/utf8proc*"
]
[dependencies]
[build-dependencies]
cc = "1.0"

View file

@ -1,10 +1,17 @@
extern crate cc;
use std::env;
use std::path::Path;
fn main() {
let dir_path = Path::new("vendor/tree-sitter/src/runtime");
let root_path = Path::new("vendor/tree-sitter");
let mut config = cc::Build::new();
config.flag_if_supported("-std=c99");
config.flag_if_supported("-Wno-unused-parameter");
config.include(root_path.join(Path::new("src")));
config.include(root_path.join(Path::new("include")));
config.include(root_path.join(Path::new("externals/utf8proc")));
let source_filenames = [
"get_changed_ranges.c",
@ -19,16 +26,18 @@ fn main() {
"utf16.c",
];
let mut config = cc::Build::new();
config.include("vendor/tree-sitter/src");
config.include("vendor/tree-sitter/include");
config.include("vendor/tree-sitter/externals/utf8proc");
config.flag_if_supported("-std=c99");
config.flag_if_supported("-Wno-unused-parameter");
config.files(source_filenames.iter().map(|source_filename| {
root_path
.join(Path::new(&"src/runtime"))
.join(Path::new(&source_filename))
}));
for source_filename in source_filenames.iter() {
let source_path = dir_path.join(Path::new(&source_filename));
config.file(&source_path.to_str().unwrap());
config.file(root_path.join(Path::new("externals/utf8proc/utf8proc.c")));
if env::var("RUST_TREE_SITTER_TEST").is_ok() {
let parser_dir = Path::new("fixtures/tree-sitter-rust/src");
config.file(parser_dir.join("parser.c"));
config.file(parser_dir.join("scanner.c"));
}
config.compile("libruntime.a")

0
fixtures/.gitkeep Normal file
View file

14
script/fetch-test-fixtures.sh Executable file
View file

@ -0,0 +1,14 @@
#!/bin/bash
grammar_dir='fixtures/tree-sitter-rust'
grammar_url='https://github.com/tree-sitter/tree-sitter-rust'
if [ ! -d $grammar_dir ]; then
git clone $grammar_url $grammar_dir --depth=1
fi
(
cd $grammar_dir;
git fetch origin master --depth=1
git reset --hard origin/master;
)

View file

@ -8,19 +8,19 @@ use std::ptr;
#[derive(Clone, Copy)]
pub struct Symbol(ffi::TSSymbol);
#[derive(Clone, Copy)]
pub struct Language(*const ffi::TSLanguage);
pub type Language = *const ffi::TSLanguage;
pub trait Utf16Input {
fn read(&self) -> &[u16];
fn seek(&self, u32, Point);
fn read(&mut self) -> &[u16];
fn seek(&mut self, u32, Point);
}
pub trait Utf8Input {
fn read(&self) -> &[u8];
fn seek(&self, u32, Point);
fn read(&mut self) -> &[u8];
fn seek(&mut self, u32, Point);
}
#[derive(Debug, PartialEq, Eq)]
pub enum LogType {
Parse,
Lex,
@ -50,6 +50,11 @@ pub struct Tree(*mut ffi::TSTree, ffi::TSInputEncoding);
pub struct TreeCursor<'a>(ffi::TSTreeCursor, PhantomData<&'a ()>);
struct FlatInput<'a> {
bytes: &'a [u8],
offset: usize,
}
impl Parser {
pub fn new() -> Parser {
unsafe {
@ -60,11 +65,11 @@ impl Parser {
pub fn set_language(&mut self, language: Language) {
unsafe {
ffi::ts_parser_set_language(self.0, language.0);
ffi::ts_parser_set_language(self.0, language);
}
}
pub fn set_logger<F: FnMut(LogType, &str) -> ()>(&mut self, logger: &mut F) {
pub fn set_logger<F: FnMut(LogType, &str) -> ()>(&mut self, logger: Option<&mut F>) {
unsafe extern "C" fn log<F: FnMut(LogType, &str) -> ()>(
payload: *mut c_void,
c_log_type: ffi::TSLogType,
@ -81,14 +86,24 @@ impl Parser {
}
};
let c_logger = ffi::TSLogger {
payload: logger as *mut F as *mut c_void,
log: Some(log::<F>),
};
let c_logger;
if let Some(logger) = logger {
c_logger = ffi::TSLogger {
payload: logger as *mut F as *mut c_void,
log: Some(log::<F>),
};
} else {
c_logger = ffi::TSLogger { payload: ptr::null_mut(), log: None };
}
unsafe { ffi::ts_parser_set_logger(self.0, c_logger) };
}
pub fn parse_str(&mut self, input: &str, old_tree: Option<Tree>) -> Option<Tree> {
let mut input = FlatInput { bytes: input.as_bytes(), offset: 0};
self.parse_utf8(&mut input, old_tree)
}
pub fn parse_utf8<T: Utf8Input>(
&mut self,
input: &mut T,
@ -239,9 +254,7 @@ impl<'a> Node<'a> {
}
pub fn name(&self) -> &'static str {
unsafe { CStr::from_ptr(ffi::ts_node_type(self.0)) }
.to_str()
.unwrap()
unsafe { CStr::from_ptr(ffi::ts_node_type(self.0)) }.to_str().unwrap()
}
pub fn start_index(&self) -> u32 {
@ -272,11 +285,24 @@ impl<'a> Node<'a> {
Self::new(unsafe { ffi::ts_node_child(self.0, i) })
}
pub fn child_count(&self) -> u32 {
unsafe { ffi::ts_node_child_count(self.0) }
}
pub fn parent(&self) -> Option<Node> {
Self::new(unsafe { ffi::ts_node_parent(self.0) })
}
pub fn to_sexp(&self) -> String {
let c_string = unsafe { ffi::ts_node_string(self.0) };
let result = unsafe { CStr::from_ptr(c_string) }.to_str().unwrap().to_string();
unsafe { free(c_string as *mut c_void) };
result
}
}
extern "C" { fn free(pointer: *mut c_void); }
impl<'a> TreeCursor<'a> {
fn node(&'a self) -> Node<'a> {
Node(
@ -322,8 +348,63 @@ impl Into<ffi::TSPoint> for Point {
}
}
impl<'a> Utf8Input for FlatInput<'a> {
fn read(&mut self) -> &[u8] {
let result = &self.bytes[self.offset..];
self.offset = self.bytes.len();
result
}
fn seek(&mut self, offset: u32, _position: Point) {
self.offset = offset as usize;
}
}
#[cfg(test)]
mod tests {
use super::*;
fn rust() -> Language { unsafe { tree_sitter_rust() } }
extern "C" { fn tree_sitter_rust() -> Language; }
#[test]
fn it_works() {}
fn test_basic_parsing() {
let mut parser = Parser::new();
parser.set_language(rust());
let tree = parser.parse_str("
struct Stuff {}
fn main() {}
", None).unwrap();
let root_node = tree.root_node();
assert_eq!(root_node.name(), "source_file");
assert_eq!(
root_node.to_sexp(),
"(source_file (struct_item (type_identifier) (field_declaration_list)) (function_item (identifier) (parameters) (block)))"
);
let struct_node = root_node.child(0).unwrap();
assert_eq!(struct_node.name(), "struct_item");
}
#[test]
fn test_logging() {
let mut parser = Parser::new();
parser.set_language(rust());
let mut messages = Vec::new();
parser.set_logger(Some(&mut |log_type, message| {
messages.push((log_type, message.to_string()));
}));
parser.parse_str("
struct Stuff {}
fn main() {}
", None).unwrap();
assert!(messages.contains(&(LogType::Parse, "reduce sym:struct_item, child_count:3".to_string())));
assert!(messages.contains(&(LogType::Lex, "skip character:' '".to_string())));
}
}