Add test subcommand
Co-Authored-By: Timothy Clem <timothy.clem@gmail.com>
This commit is contained in:
parent
f059557a9d
commit
8291d294fb
6 changed files with 496 additions and 9 deletions
2
Cargo.lock
generated
2
Cargo.lock
generated
|
|
@ -641,6 +641,7 @@ dependencies = [
|
|||
name = "tree-sitter-cli"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"dirs 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"hashbrown 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
|
|
@ -648,6 +649,7 @@ dependencies = [
|
|||
"lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libloading 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex-syntax 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rusqlite 0.14.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ name = "tree-sitter"
|
|||
path = "src/main.rs"
|
||||
|
||||
[dependencies]
|
||||
ansi_term = "0.11"
|
||||
lazy_static = "1.2.0"
|
||||
smallbitvec = "2.3.0"
|
||||
clap = "2.32"
|
||||
|
|
@ -20,6 +21,7 @@ rusqlite = "0.14.0"
|
|||
serde = "1.0"
|
||||
serde_derive = "1.0"
|
||||
regex-syntax = "0.6.4"
|
||||
regex = "1"
|
||||
|
||||
[dependencies.tree-sitter]
|
||||
path = "../lib"
|
||||
|
|
|
|||
|
|
@ -1,3 +1,5 @@
|
|||
use std::io;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Error(pub String);
|
||||
|
||||
|
|
@ -22,3 +24,15 @@ impl From<serde_json::Error> for Error {
|
|||
Error(error.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<io::Error> for Error {
|
||||
fn from(error: io::Error) -> Self {
|
||||
Error(error.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<String> for Error {
|
||||
fn from(error: String) -> Self {
|
||||
Error(error)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
241
cli/src/loader.rs
Normal file
241
cli/src/loader.rs
Normal file
|
|
@ -0,0 +1,241 @@
|
|||
use libloading::{Library, Symbol};
|
||||
use regex::{Regex, RegexBuilder};
|
||||
use std::collections::HashMap;
|
||||
use std::fs;
|
||||
use std::io;
|
||||
use std::mem;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::process::Command;
|
||||
use tree_sitter::{Language, PropertySheet};
|
||||
|
||||
const PACKAGE_JSON_PATH: &'static str = "package.json";
|
||||
const PARSER_C_PATH: &'static str = "src/parser.c";
|
||||
const SCANNER_C_PATH: &'static str = "src/scanner.c";
|
||||
const SCANNER_CC_PATH: &'static str = "src/scanner.cc";
|
||||
|
||||
#[cfg(unix)]
|
||||
const DYLIB_EXTENSION: &'static str = "so";
|
||||
|
||||
#[cfg(windows)]
|
||||
const DYLIB_EXTENSION: &'static str = "dll";
|
||||
|
||||
struct LanguageRepo {
|
||||
name: String,
|
||||
path: PathBuf,
|
||||
language: Option<Language>,
|
||||
configurations: Vec<LanguageConfiguration>,
|
||||
}
|
||||
|
||||
pub struct LanguageConfiguration {
|
||||
name: String,
|
||||
content_regex: Option<Regex>,
|
||||
first_line_regex: Option<Regex>,
|
||||
file_types: Vec<String>,
|
||||
highlight_property_sheet: Option<Result<PropertySheet, PathBuf>>,
|
||||
}
|
||||
|
||||
pub struct Loader {
|
||||
parser_lib_path: PathBuf,
|
||||
language_repos: Vec<LanguageRepo>,
|
||||
language_configuration_indices_by_file_type: HashMap<String, Vec<(usize, usize)>>,
|
||||
}
|
||||
|
||||
unsafe impl Send for Loader {}
|
||||
unsafe impl Sync for Loader {}
|
||||
|
||||
impl Loader {
|
||||
pub fn new(parser_lib_path: PathBuf) -> Self {
|
||||
Loader {
|
||||
parser_lib_path,
|
||||
language_repos: Vec::new(),
|
||||
language_configuration_indices_by_file_type: HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn find_parsers(&mut self, parser_src_paths: &Vec<PathBuf>) -> io::Result<()> {
|
||||
for parser_container_dir in parser_src_paths.iter() {
|
||||
for entry in fs::read_dir(parser_container_dir)? {
|
||||
let entry = entry?;
|
||||
if let Some(parser_dir_name) = entry.file_name().to_str() {
|
||||
if parser_dir_name.starts_with("tree-sitter-") {
|
||||
if self.load_language_configurations(
|
||||
&parser_container_dir.join(parser_dir_name),
|
||||
).is_err() {
|
||||
eprintln!("Error loading {}", parser_dir_name);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn language_configuration_at_path(
|
||||
&mut self,
|
||||
path: &Path,
|
||||
) -> io::Result<Option<(Language, &LanguageConfiguration)>> {
|
||||
let repo_index = self.load_language_configurations(path)?;
|
||||
self.load_language_from_repo(repo_index, 0)
|
||||
}
|
||||
|
||||
pub fn language_for_file_name(
|
||||
&mut self,
|
||||
path: &Path,
|
||||
) -> io::Result<Option<(Language, &LanguageConfiguration)>> {
|
||||
let indices = path
|
||||
.file_name()
|
||||
.and_then(|n| n.to_str())
|
||||
.and_then(|file_name| {
|
||||
self.language_configuration_indices_by_file_type
|
||||
.get(file_name)
|
||||
})
|
||||
.or_else(|| {
|
||||
path.extension()
|
||||
.and_then(|extension| extension.to_str())
|
||||
.and_then(|extension| {
|
||||
self.language_configuration_indices_by_file_type
|
||||
.get(extension)
|
||||
})
|
||||
});
|
||||
|
||||
if let Some(indices) = indices {
|
||||
// TODO use `content-regex` to pick one
|
||||
for (repo_index, conf_index) in indices {
|
||||
return self.load_language_from_repo(*repo_index, *conf_index);
|
||||
}
|
||||
}
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
fn load_language_from_repo(
|
||||
&mut self,
|
||||
repo_index: usize,
|
||||
conf_index: usize,
|
||||
) -> io::Result<Option<(Language, &LanguageConfiguration)>> {
|
||||
let repo = &self.language_repos[repo_index];
|
||||
let language = if let Some(language) = repo.language {
|
||||
language
|
||||
} else {
|
||||
let language = self.load_language_at_path(&repo.name, &repo.path)?;
|
||||
self.language_repos[repo_index].language = Some(language);
|
||||
language
|
||||
};
|
||||
if let Some(configuration) = self.language_repos[repo_index]
|
||||
.configurations
|
||||
.get(conf_index)
|
||||
{
|
||||
Ok(Some((language, configuration)))
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
fn load_language_at_path(&self, name: &str, language_path: &Path) -> io::Result<Language> {
|
||||
let parser_c_path = language_path.join(PARSER_C_PATH);
|
||||
let mut library_path = self.parser_lib_path.join(name);
|
||||
library_path.set_extension(DYLIB_EXTENSION);
|
||||
|
||||
if !library_path.exists() || was_modified_more_recently(&parser_c_path, &library_path)? {
|
||||
let compiler_name = std::env::var("CXX").unwrap_or("c++".to_owned());
|
||||
let mut command = Command::new(compiler_name);
|
||||
command
|
||||
.arg("-shared")
|
||||
.arg("-fPIC")
|
||||
.arg("-I")
|
||||
.arg(language_path.join("src"))
|
||||
.arg("-o")
|
||||
.arg(&library_path)
|
||||
.arg("-xc")
|
||||
.arg(parser_c_path);
|
||||
let scanner_c_path = language_path.join(SCANNER_C_PATH);
|
||||
let scanner_cc_path = language_path.join(SCANNER_CC_PATH);
|
||||
if scanner_c_path.exists() {
|
||||
command.arg("-xc").arg(scanner_c_path);
|
||||
} else if scanner_cc_path.exists() {
|
||||
command.arg("-xc++").arg(scanner_cc_path);
|
||||
}
|
||||
command.output()?;
|
||||
}
|
||||
|
||||
let library = Library::new(library_path)?;
|
||||
let language_fn_name = format!("tree_sitter_{}", name);
|
||||
let language = unsafe {
|
||||
let language_fn: Symbol<unsafe extern "C" fn() -> Language> =
|
||||
library.get(language_fn_name.as_bytes())?;
|
||||
language_fn()
|
||||
};
|
||||
mem::forget(library);
|
||||
Ok(language)
|
||||
}
|
||||
|
||||
fn load_language_configurations<'a>(&'a mut self, parser_path: &Path) -> io::Result<usize> {
|
||||
let name = parser_path
|
||||
.file_name()
|
||||
.unwrap()
|
||||
.to_str()
|
||||
.unwrap()
|
||||
.split_at("tree-sitter-".len())
|
||||
.1;
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct LanguageConfigurationJSON {
|
||||
name: String,
|
||||
#[serde(rename = "file-types")]
|
||||
file_types: Option<Vec<String>>,
|
||||
#[serde(rename = "content-regex")]
|
||||
content_regex: Option<String>,
|
||||
#[serde(rename = "first-line-regex")]
|
||||
first_line_regex: Option<String>,
|
||||
highlights: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct PackageJSON {
|
||||
#[serde(rename = "tree-sitter")]
|
||||
tree_sitter: Option<Vec<LanguageConfigurationJSON>>,
|
||||
}
|
||||
|
||||
let package_json_contents = fs::read_to_string(&parser_path.join(PACKAGE_JSON_PATH))?;
|
||||
let package_json: PackageJSON = serde_json::from_str(&package_json_contents)?;
|
||||
let configurations = package_json
|
||||
.tree_sitter
|
||||
.map_or(Vec::new(), |configurations| {
|
||||
configurations
|
||||
.into_iter()
|
||||
.map(|conf| LanguageConfiguration {
|
||||
name: conf.name,
|
||||
file_types: conf.file_types.unwrap_or(Vec::new()),
|
||||
content_regex: conf
|
||||
.content_regex
|
||||
.and_then(|r| RegexBuilder::new(&r).multi_line(true).build().ok()),
|
||||
first_line_regex: conf
|
||||
.first_line_regex
|
||||
.and_then(|r| RegexBuilder::new(&r).multi_line(true).build().ok()),
|
||||
highlight_property_sheet: conf.highlights.map(|d| Err(d.into())),
|
||||
})
|
||||
.collect()
|
||||
});
|
||||
|
||||
for (i, configuration) in configurations.iter().enumerate() {
|
||||
for file_type in &configuration.file_types {
|
||||
self.language_configuration_indices_by_file_type
|
||||
.entry(file_type.to_string())
|
||||
.or_insert(Vec::new())
|
||||
.push((self.language_repos.len(), i));
|
||||
}
|
||||
}
|
||||
|
||||
self.language_repos.push(LanguageRepo {
|
||||
name: name.to_string(),
|
||||
path: parser_path.to_owned(),
|
||||
language: None,
|
||||
configurations,
|
||||
});
|
||||
|
||||
Ok(self.language_repos.len() - 1)
|
||||
}
|
||||
}
|
||||
|
||||
fn was_modified_more_recently(a: &Path, b: &Path) -> io::Result<bool> {
|
||||
Ok(fs::metadata(a)?.modified()? > fs::metadata(b)?.modified()?)
|
||||
}
|
||||
|
|
@ -5,14 +5,20 @@ extern crate log;
|
|||
#[macro_use]
|
||||
extern crate serde_derive;
|
||||
extern crate hashbrown;
|
||||
extern crate regex;
|
||||
extern crate serde_json;
|
||||
|
||||
mod error;
|
||||
mod generate;
|
||||
mod loader;
|
||||
mod logger;
|
||||
mod parse;
|
||||
mod test;
|
||||
|
||||
use self::loader::Loader;
|
||||
use clap::{App, Arg, SubCommand};
|
||||
use std::env;
|
||||
use std::path::Path;
|
||||
use std::process::exit;
|
||||
use std::usize;
|
||||
|
||||
|
|
@ -44,15 +50,13 @@ fn run() -> error::Result<()> {
|
|||
.about("Parse a file")
|
||||
.arg(Arg::with_name("path").index(1)),
|
||||
)
|
||||
.subcommand(
|
||||
SubCommand::with_name("test")
|
||||
.about("Run a parser's tests")
|
||||
.arg(Arg::with_name("path").index(1).required(true))
|
||||
.arg(Arg::with_name("line").index(2).required(true))
|
||||
.arg(Arg::with_name("column").index(3).required(true)),
|
||||
)
|
||||
.subcommand(SubCommand::with_name("test").about("Run a parser's tests"))
|
||||
.get_matches();
|
||||
|
||||
let home_dir = dirs::home_dir().unwrap();
|
||||
let current_dir = env::current_dir().unwrap();
|
||||
let mut loader = Loader::new(home_dir.join(".tree-sitter"));
|
||||
|
||||
if let Some(matches) = matches.subcommand_matches("generate") {
|
||||
if matches.is_present("log") {
|
||||
logger::init();
|
||||
|
|
@ -65,11 +69,23 @@ fn run() -> error::Result<()> {
|
|||
ids.filter_map(|id| usize::from_str_radix(id, 10).ok())
|
||||
.collect()
|
||||
});
|
||||
let mut grammar_path = env::current_dir().expect("Failed to read CWD");
|
||||
grammar_path.push("grammar.js");
|
||||
let grammar_path = current_dir.join("grammar.js");
|
||||
let code =
|
||||
generate::generate_parser_for_grammar(&grammar_path, minimize, state_ids_to_log)?;
|
||||
println!("{}", code);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
if let Some(_matches) = matches.subcommand_matches("test") {
|
||||
let corpus_path = current_dir.join("corpus");
|
||||
let home_dir = dirs::home_dir().unwrap();
|
||||
let mut loader = Loader::new(home_dir.join(".tree-sitter"));
|
||||
if let Some((language, _)) = loader.language_configuration_at_path(¤t_dir)? {
|
||||
test::run_tests_at_path(language, &corpus_path)?;
|
||||
} else {
|
||||
eprintln!("No language found");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
|
|
|||
212
cli/src/test.rs
Normal file
212
cli/src/test.rs
Normal file
|
|
@ -0,0 +1,212 @@
|
|||
use super::error::Result;
|
||||
use ansi_term::Colour;
|
||||
use regex::bytes::{Regex as ByteRegex, RegexBuilder as ByteRegexBuilder};
|
||||
use regex::Regex;
|
||||
use std::char;
|
||||
use std::fs;
|
||||
use std::io;
|
||||
use std::path::Path;
|
||||
use std::str;
|
||||
use tree_sitter::{Language, Parser};
|
||||
|
||||
lazy_static! {
|
||||
static ref HEADER_REGEX: ByteRegex = ByteRegexBuilder::new(r"^===+\r?\n([^=]*)\r?\n===+\r?\n")
|
||||
.multi_line(true)
|
||||
.build()
|
||||
.unwrap();
|
||||
static ref DIVIDER_REGEX: ByteRegex = ByteRegexBuilder::new(r"^---+\r?\n")
|
||||
.multi_line(true)
|
||||
.build()
|
||||
.unwrap();
|
||||
static ref WHITESPACE_REGEX: Regex = Regex::new(r"\s+").unwrap();
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub enum TestEntry {
|
||||
Group {
|
||||
name: String,
|
||||
children: Vec<TestEntry>,
|
||||
},
|
||||
Example {
|
||||
name: String,
|
||||
input: Vec<u8>,
|
||||
output: String,
|
||||
},
|
||||
}
|
||||
|
||||
pub fn run_tests_at_path(language: Language, path: &Path) -> Result<()> {
|
||||
let test_entry = parse_tests(path)?;
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(language)?;
|
||||
|
||||
let mut failures = Vec::new();
|
||||
if let TestEntry::Group { children, .. } = test_entry {
|
||||
for child in children {
|
||||
run_tests(&mut parser, child, 0, &mut failures)?;
|
||||
}
|
||||
}
|
||||
|
||||
if failures.len() > 0 {
|
||||
println!("");
|
||||
|
||||
if failures.len() == 1 {
|
||||
println!("1 failure:")
|
||||
} else {
|
||||
println!("{} failures:", failures.len())
|
||||
}
|
||||
|
||||
for (name, actual, expected) in failures {
|
||||
println!("\n {}:", name);
|
||||
println!(" Expected: {}", expected);
|
||||
println!(" Actual: {}", actual);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn run_tests(
|
||||
parser: &mut Parser,
|
||||
test_entry: TestEntry,
|
||||
mut indent_level: i32,
|
||||
failures: &mut Vec<(String, String, String)>,
|
||||
) -> Result<()> {
|
||||
for _ in 0..indent_level {
|
||||
print!(" ");
|
||||
}
|
||||
match test_entry {
|
||||
TestEntry::Example {
|
||||
name,
|
||||
input,
|
||||
output,
|
||||
} => {
|
||||
let tree = parser
|
||||
.parse_utf8(&mut |byte_offset, _| &input[byte_offset..], None)
|
||||
.unwrap();
|
||||
let actual = tree.root_node().to_sexp();
|
||||
if actual == output {
|
||||
println!("✓ {}", Colour::Green.paint(&name));
|
||||
} else {
|
||||
println!("✗ {}", Colour::Red.paint(&name));
|
||||
failures.push((name, actual, output));
|
||||
}
|
||||
}
|
||||
TestEntry::Group { name, children } => {
|
||||
println!("{}:", name);
|
||||
indent_level += 1;
|
||||
for child in children {
|
||||
run_tests(parser, child, indent_level, failures)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn parse_tests(path: &Path) -> io::Result<TestEntry> {
|
||||
let name = path
|
||||
.file_name()
|
||||
.and_then(|s| s.to_str())
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
if path.is_dir() {
|
||||
let mut children = Vec::new();
|
||||
for entry in fs::read_dir(path)? {
|
||||
let entry = entry?;
|
||||
children.push(parse_tests(&entry.path())?);
|
||||
}
|
||||
Ok(TestEntry::Group { name, children })
|
||||
} else {
|
||||
let content = fs::read_to_string(path)?;
|
||||
Ok(parse_test_content(name, content))
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_test_content(name: String, content: String) -> TestEntry {
|
||||
let mut children = Vec::new();
|
||||
let bytes = content.as_bytes();
|
||||
let mut previous_name = String::new();
|
||||
let mut previous_header_end = 0;
|
||||
for header_match in HEADER_REGEX
|
||||
.find_iter(&bytes)
|
||||
.map(|m| (m.start(), m.end()))
|
||||
.chain(Some((bytes.len(), bytes.len())))
|
||||
{
|
||||
let (header_start, header_end) = header_match;
|
||||
if previous_header_end > 0 {
|
||||
if let Some(divider_match) =
|
||||
DIVIDER_REGEX.find(&bytes[previous_header_end..header_start])
|
||||
{
|
||||
let (divider_start, divider_end) = (
|
||||
previous_header_end + divider_match.start(),
|
||||
previous_header_end + divider_match.end(),
|
||||
);
|
||||
if let Ok(output) = str::from_utf8(&bytes[divider_end..header_start]) {
|
||||
let input = bytes[previous_header_end..divider_start].to_vec();
|
||||
let output = WHITESPACE_REGEX.replace_all(output.trim(), " ").to_string();
|
||||
children.push(TestEntry::Example {
|
||||
name: previous_name,
|
||||
input,
|
||||
output,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
previous_name = String::from_utf8_lossy(&bytes[header_start..header_end])
|
||||
.trim_matches(|c| char::is_whitespace(c) || c == '=')
|
||||
.to_string();
|
||||
previous_header_end = header_end;
|
||||
}
|
||||
TestEntry::Group { name, children }
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_parse_test_content() {
|
||||
let entry = parse_test_content(
|
||||
"the-filename".to_string(),
|
||||
r#"
|
||||
===============
|
||||
The first test
|
||||
===============
|
||||
|
||||
a b c
|
||||
|
||||
---
|
||||
|
||||
(a
|
||||
(b c))
|
||||
|
||||
================
|
||||
The second test
|
||||
================
|
||||
d
|
||||
---
|
||||
(d)
|
||||
"#
|
||||
.trim()
|
||||
.to_string(),
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
entry,
|
||||
TestEntry::Group {
|
||||
name: "the-filename".to_string(),
|
||||
children: vec![
|
||||
TestEntry::Example {
|
||||
name: "The first test".to_string(),
|
||||
input: "\na b c\n\n".as_bytes().to_vec(),
|
||||
output: "(a (b c))".to_string(),
|
||||
},
|
||||
TestEntry::Example {
|
||||
name: "The second test".to_string(),
|
||||
input: "d\n".as_bytes().to_vec(),
|
||||
output: "(d)".to_string(),
|
||||
},
|
||||
]
|
||||
}
|
||||
);
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue