Get basic parsing working, add some unit tests
This commit is contained in:
parent
ead0e31262
commit
08217fff8d
7 changed files with 138 additions and 29 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
|
@ -1,2 +1,3 @@
|
|||
target
|
||||
Cargo.lock
|
||||
fixtures/tree-sitter-rust
|
||||
|
|
|
|||
|
|
@ -3,6 +3,12 @@ language: rust
|
|||
rust:
|
||||
- stable
|
||||
|
||||
env:
|
||||
- RUST_TREE_SITTER_TEST=1
|
||||
|
||||
before_install:
|
||||
- ./script/fetch-test-fixtures.sh
|
||||
|
||||
branches:
|
||||
only:
|
||||
- master
|
||||
|
|
|
|||
|
|
@ -9,7 +9,5 @@ include = [
|
|||
"vendor/tree-sitter/externals/utf8proc/utf8proc*"
|
||||
]
|
||||
|
||||
[dependencies]
|
||||
|
||||
[build-dependencies]
|
||||
cc = "1.0"
|
||||
|
|
|
|||
31
build.rs
31
build.rs
|
|
@ -1,10 +1,17 @@
|
|||
extern crate cc;
|
||||
|
||||
use std::env;
|
||||
use std::path::Path;
|
||||
|
||||
|
||||
fn main() {
|
||||
let dir_path = Path::new("vendor/tree-sitter/src/runtime");
|
||||
let root_path = Path::new("vendor/tree-sitter");
|
||||
|
||||
let mut config = cc::Build::new();
|
||||
config.flag_if_supported("-std=c99");
|
||||
config.flag_if_supported("-Wno-unused-parameter");
|
||||
config.include(root_path.join(Path::new("src")));
|
||||
config.include(root_path.join(Path::new("include")));
|
||||
config.include(root_path.join(Path::new("externals/utf8proc")));
|
||||
|
||||
let source_filenames = [
|
||||
"get_changed_ranges.c",
|
||||
|
|
@ -19,16 +26,18 @@ fn main() {
|
|||
"utf16.c",
|
||||
];
|
||||
|
||||
let mut config = cc::Build::new();
|
||||
config.include("vendor/tree-sitter/src");
|
||||
config.include("vendor/tree-sitter/include");
|
||||
config.include("vendor/tree-sitter/externals/utf8proc");
|
||||
config.flag_if_supported("-std=c99");
|
||||
config.flag_if_supported("-Wno-unused-parameter");
|
||||
config.files(source_filenames.iter().map(|source_filename| {
|
||||
root_path
|
||||
.join(Path::new(&"src/runtime"))
|
||||
.join(Path::new(&source_filename))
|
||||
}));
|
||||
|
||||
for source_filename in source_filenames.iter() {
|
||||
let source_path = dir_path.join(Path::new(&source_filename));
|
||||
config.file(&source_path.to_str().unwrap());
|
||||
config.file(root_path.join(Path::new("externals/utf8proc/utf8proc.c")));
|
||||
|
||||
if env::var("RUST_TREE_SITTER_TEST").is_ok() {
|
||||
let parser_dir = Path::new("fixtures/tree-sitter-rust/src");
|
||||
config.file(parser_dir.join("parser.c"));
|
||||
config.file(parser_dir.join("scanner.c"));
|
||||
}
|
||||
|
||||
config.compile("libruntime.a")
|
||||
|
|
|
|||
0
fixtures/.gitkeep
Normal file
0
fixtures/.gitkeep
Normal file
14
script/fetch-test-fixtures.sh
Executable file
14
script/fetch-test-fixtures.sh
Executable file
|
|
@ -0,0 +1,14 @@
|
|||
#!/bin/bash
|
||||
|
||||
grammar_dir='fixtures/tree-sitter-rust'
|
||||
grammar_url='https://github.com/tree-sitter/tree-sitter-rust'
|
||||
|
||||
if [ ! -d $grammar_dir ]; then
|
||||
git clone $grammar_url $grammar_dir --depth=1
|
||||
fi
|
||||
|
||||
(
|
||||
cd $grammar_dir;
|
||||
git fetch origin master --depth=1
|
||||
git reset --hard origin/master;
|
||||
)
|
||||
113
src/lib.rs
113
src/lib.rs
|
|
@ -8,19 +8,19 @@ use std::ptr;
|
|||
#[derive(Clone, Copy)]
|
||||
pub struct Symbol(ffi::TSSymbol);
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
pub struct Language(*const ffi::TSLanguage);
|
||||
pub type Language = *const ffi::TSLanguage;
|
||||
|
||||
pub trait Utf16Input {
|
||||
fn read(&self) -> &[u16];
|
||||
fn seek(&self, u32, Point);
|
||||
fn read(&mut self) -> &[u16];
|
||||
fn seek(&mut self, u32, Point);
|
||||
}
|
||||
|
||||
pub trait Utf8Input {
|
||||
fn read(&self) -> &[u8];
|
||||
fn seek(&self, u32, Point);
|
||||
fn read(&mut self) -> &[u8];
|
||||
fn seek(&mut self, u32, Point);
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub enum LogType {
|
||||
Parse,
|
||||
Lex,
|
||||
|
|
@ -50,6 +50,11 @@ pub struct Tree(*mut ffi::TSTree, ffi::TSInputEncoding);
|
|||
|
||||
pub struct TreeCursor<'a>(ffi::TSTreeCursor, PhantomData<&'a ()>);
|
||||
|
||||
struct FlatInput<'a> {
|
||||
bytes: &'a [u8],
|
||||
offset: usize,
|
||||
}
|
||||
|
||||
impl Parser {
|
||||
pub fn new() -> Parser {
|
||||
unsafe {
|
||||
|
|
@ -60,11 +65,11 @@ impl Parser {
|
|||
|
||||
pub fn set_language(&mut self, language: Language) {
|
||||
unsafe {
|
||||
ffi::ts_parser_set_language(self.0, language.0);
|
||||
ffi::ts_parser_set_language(self.0, language);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn set_logger<F: FnMut(LogType, &str) -> ()>(&mut self, logger: &mut F) {
|
||||
pub fn set_logger<F: FnMut(LogType, &str) -> ()>(&mut self, logger: Option<&mut F>) {
|
||||
unsafe extern "C" fn log<F: FnMut(LogType, &str) -> ()>(
|
||||
payload: *mut c_void,
|
||||
c_log_type: ffi::TSLogType,
|
||||
|
|
@ -81,14 +86,24 @@ impl Parser {
|
|||
}
|
||||
};
|
||||
|
||||
let c_logger = ffi::TSLogger {
|
||||
payload: logger as *mut F as *mut c_void,
|
||||
log: Some(log::<F>),
|
||||
};
|
||||
let c_logger;
|
||||
if let Some(logger) = logger {
|
||||
c_logger = ffi::TSLogger {
|
||||
payload: logger as *mut F as *mut c_void,
|
||||
log: Some(log::<F>),
|
||||
};
|
||||
} else {
|
||||
c_logger = ffi::TSLogger { payload: ptr::null_mut(), log: None };
|
||||
}
|
||||
|
||||
unsafe { ffi::ts_parser_set_logger(self.0, c_logger) };
|
||||
}
|
||||
|
||||
pub fn parse_str(&mut self, input: &str, old_tree: Option<Tree>) -> Option<Tree> {
|
||||
let mut input = FlatInput { bytes: input.as_bytes(), offset: 0};
|
||||
self.parse_utf8(&mut input, old_tree)
|
||||
}
|
||||
|
||||
pub fn parse_utf8<T: Utf8Input>(
|
||||
&mut self,
|
||||
input: &mut T,
|
||||
|
|
@ -239,9 +254,7 @@ impl<'a> Node<'a> {
|
|||
}
|
||||
|
||||
pub fn name(&self) -> &'static str {
|
||||
unsafe { CStr::from_ptr(ffi::ts_node_type(self.0)) }
|
||||
.to_str()
|
||||
.unwrap()
|
||||
unsafe { CStr::from_ptr(ffi::ts_node_type(self.0)) }.to_str().unwrap()
|
||||
}
|
||||
|
||||
pub fn start_index(&self) -> u32 {
|
||||
|
|
@ -272,11 +285,24 @@ impl<'a> Node<'a> {
|
|||
Self::new(unsafe { ffi::ts_node_child(self.0, i) })
|
||||
}
|
||||
|
||||
pub fn child_count(&self) -> u32 {
|
||||
unsafe { ffi::ts_node_child_count(self.0) }
|
||||
}
|
||||
|
||||
pub fn parent(&self) -> Option<Node> {
|
||||
Self::new(unsafe { ffi::ts_node_parent(self.0) })
|
||||
}
|
||||
|
||||
pub fn to_sexp(&self) -> String {
|
||||
let c_string = unsafe { ffi::ts_node_string(self.0) };
|
||||
let result = unsafe { CStr::from_ptr(c_string) }.to_str().unwrap().to_string();
|
||||
unsafe { free(c_string as *mut c_void) };
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" { fn free(pointer: *mut c_void); }
|
||||
|
||||
impl<'a> TreeCursor<'a> {
|
||||
fn node(&'a self) -> Node<'a> {
|
||||
Node(
|
||||
|
|
@ -322,8 +348,63 @@ impl Into<ffi::TSPoint> for Point {
|
|||
}
|
||||
}
|
||||
|
||||
impl<'a> Utf8Input for FlatInput<'a> {
|
||||
fn read(&mut self) -> &[u8] {
|
||||
let result = &self.bytes[self.offset..];
|
||||
self.offset = self.bytes.len();
|
||||
result
|
||||
}
|
||||
|
||||
fn seek(&mut self, offset: u32, _position: Point) {
|
||||
self.offset = offset as usize;
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn rust() -> Language { unsafe { tree_sitter_rust() } }
|
||||
extern "C" { fn tree_sitter_rust() -> Language; }
|
||||
|
||||
#[test]
|
||||
fn it_works() {}
|
||||
fn test_basic_parsing() {
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(rust());
|
||||
|
||||
let tree = parser.parse_str("
|
||||
struct Stuff {}
|
||||
fn main() {}
|
||||
", None).unwrap();
|
||||
|
||||
let root_node = tree.root_node();
|
||||
assert_eq!(root_node.name(), "source_file");
|
||||
|
||||
assert_eq!(
|
||||
root_node.to_sexp(),
|
||||
"(source_file (struct_item (type_identifier) (field_declaration_list)) (function_item (identifier) (parameters) (block)))"
|
||||
);
|
||||
|
||||
let struct_node = root_node.child(0).unwrap();
|
||||
assert_eq!(struct_node.name(), "struct_item");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_logging() {
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(rust());
|
||||
|
||||
let mut messages = Vec::new();
|
||||
parser.set_logger(Some(&mut |log_type, message| {
|
||||
messages.push((log_type, message.to_string()));
|
||||
}));
|
||||
|
||||
parser.parse_str("
|
||||
struct Stuff {}
|
||||
fn main() {}
|
||||
", None).unwrap();
|
||||
|
||||
assert!(messages.contains(&(LogType::Parse, "reduce sym:struct_item, child_count:3".to_string())));
|
||||
assert!(messages.contains(&(LogType::Lex, "skip character:' '".to_string())));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue