From 8291d294fb0b251addc745c90863e22792f5cc28 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 7 Jan 2019 17:57:27 -0800 Subject: [PATCH] Add test subcommand Co-Authored-By: Timothy Clem --- Cargo.lock | 2 + cli/Cargo.toml | 2 + cli/src/error.rs | 14 +++ cli/src/loader.rs | 241 ++++++++++++++++++++++++++++++++++++++++++++++ cli/src/main.rs | 34 +++++-- cli/src/test.rs | 212 ++++++++++++++++++++++++++++++++++++++++ 6 files changed, 496 insertions(+), 9 deletions(-) create mode 100644 cli/src/loader.rs create mode 100644 cli/src/test.rs diff --git a/Cargo.lock b/Cargo.lock index 758dcad7..7a249312 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -641,6 +641,7 @@ dependencies = [ name = "tree-sitter-cli" version = "0.1.0" dependencies = [ + "ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)", "clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)", "dirs 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", "hashbrown 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)", @@ -648,6 +649,7 @@ dependencies = [ "lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)", "libloading 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "regex-syntax 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)", "rusqlite 0.14.0 (registry+https://github.com/rust-lang/crates.io-index)", "serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)", diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 6a9c253d..200fd2f1 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -9,6 +9,7 @@ name = "tree-sitter" path = "src/main.rs" [dependencies] +ansi_term = "0.11" lazy_static = "1.2.0" smallbitvec = "2.3.0" clap = "2.32" @@ -20,6 +21,7 @@ rusqlite = "0.14.0" serde = "1.0" serde_derive = "1.0" regex-syntax = "0.6.4" +regex = "1" [dependencies.tree-sitter] path = "../lib" diff --git a/cli/src/error.rs b/cli/src/error.rs index 9a5801f8..1b8b1a79 100644 --- a/cli/src/error.rs +++ b/cli/src/error.rs @@ -1,3 +1,5 @@ +use std::io; + #[derive(Debug)] pub struct Error(pub String); @@ -22,3 +24,15 @@ impl From for Error { Error(error.to_string()) } } + +impl From for Error { + fn from(error: io::Error) -> Self { + Error(error.to_string()) + } +} + +impl From for Error { + fn from(error: String) -> Self { + Error(error) + } +} diff --git a/cli/src/loader.rs b/cli/src/loader.rs new file mode 100644 index 00000000..7dfb233b --- /dev/null +++ b/cli/src/loader.rs @@ -0,0 +1,241 @@ +use libloading::{Library, Symbol}; +use regex::{Regex, RegexBuilder}; +use std::collections::HashMap; +use std::fs; +use std::io; +use std::mem; +use std::path::{Path, PathBuf}; +use std::process::Command; +use tree_sitter::{Language, PropertySheet}; + +const PACKAGE_JSON_PATH: &'static str = "package.json"; +const PARSER_C_PATH: &'static str = "src/parser.c"; +const SCANNER_C_PATH: &'static str = "src/scanner.c"; +const SCANNER_CC_PATH: &'static str = "src/scanner.cc"; + +#[cfg(unix)] +const DYLIB_EXTENSION: &'static str = "so"; + +#[cfg(windows)] +const DYLIB_EXTENSION: &'static str = "dll"; + +struct LanguageRepo { + name: String, + path: PathBuf, + language: Option, + configurations: Vec, +} + +pub struct LanguageConfiguration { + name: String, + content_regex: Option, + first_line_regex: Option, + file_types: Vec, + highlight_property_sheet: Option>, +} + +pub struct Loader { + parser_lib_path: PathBuf, + language_repos: Vec, + language_configuration_indices_by_file_type: HashMap>, +} + +unsafe impl Send for Loader {} +unsafe impl Sync for Loader {} + +impl Loader { + pub fn new(parser_lib_path: PathBuf) -> Self { + Loader { + parser_lib_path, + language_repos: Vec::new(), + language_configuration_indices_by_file_type: HashMap::new(), + } + } + + pub fn find_parsers(&mut self, parser_src_paths: &Vec) -> io::Result<()> { + for parser_container_dir in parser_src_paths.iter() { + for entry in fs::read_dir(parser_container_dir)? { + let entry = entry?; + if let Some(parser_dir_name) = entry.file_name().to_str() { + if parser_dir_name.starts_with("tree-sitter-") { + if self.load_language_configurations( + &parser_container_dir.join(parser_dir_name), + ).is_err() { + eprintln!("Error loading {}", parser_dir_name); + } + } + } + } + } + Ok(()) + } + + pub fn language_configuration_at_path( + &mut self, + path: &Path, + ) -> io::Result> { + let repo_index = self.load_language_configurations(path)?; + self.load_language_from_repo(repo_index, 0) + } + + pub fn language_for_file_name( + &mut self, + path: &Path, + ) -> io::Result> { + let indices = path + .file_name() + .and_then(|n| n.to_str()) + .and_then(|file_name| { + self.language_configuration_indices_by_file_type + .get(file_name) + }) + .or_else(|| { + path.extension() + .and_then(|extension| extension.to_str()) + .and_then(|extension| { + self.language_configuration_indices_by_file_type + .get(extension) + }) + }); + + if let Some(indices) = indices { + // TODO use `content-regex` to pick one + for (repo_index, conf_index) in indices { + return self.load_language_from_repo(*repo_index, *conf_index); + } + } + Ok(None) + } + + fn load_language_from_repo( + &mut self, + repo_index: usize, + conf_index: usize, + ) -> io::Result> { + let repo = &self.language_repos[repo_index]; + let language = if let Some(language) = repo.language { + language + } else { + let language = self.load_language_at_path(&repo.name, &repo.path)?; + self.language_repos[repo_index].language = Some(language); + language + }; + if let Some(configuration) = self.language_repos[repo_index] + .configurations + .get(conf_index) + { + Ok(Some((language, configuration))) + } else { + Ok(None) + } + } + + fn load_language_at_path(&self, name: &str, language_path: &Path) -> io::Result { + let parser_c_path = language_path.join(PARSER_C_PATH); + let mut library_path = self.parser_lib_path.join(name); + library_path.set_extension(DYLIB_EXTENSION); + + if !library_path.exists() || was_modified_more_recently(&parser_c_path, &library_path)? { + let compiler_name = std::env::var("CXX").unwrap_or("c++".to_owned()); + let mut command = Command::new(compiler_name); + command + .arg("-shared") + .arg("-fPIC") + .arg("-I") + .arg(language_path.join("src")) + .arg("-o") + .arg(&library_path) + .arg("-xc") + .arg(parser_c_path); + let scanner_c_path = language_path.join(SCANNER_C_PATH); + let scanner_cc_path = language_path.join(SCANNER_CC_PATH); + if scanner_c_path.exists() { + command.arg("-xc").arg(scanner_c_path); + } else if scanner_cc_path.exists() { + command.arg("-xc++").arg(scanner_cc_path); + } + command.output()?; + } + + let library = Library::new(library_path)?; + let language_fn_name = format!("tree_sitter_{}", name); + let language = unsafe { + let language_fn: Symbol Language> = + library.get(language_fn_name.as_bytes())?; + language_fn() + }; + mem::forget(library); + Ok(language) + } + + fn load_language_configurations<'a>(&'a mut self, parser_path: &Path) -> io::Result { + let name = parser_path + .file_name() + .unwrap() + .to_str() + .unwrap() + .split_at("tree-sitter-".len()) + .1; + + #[derive(Deserialize)] + struct LanguageConfigurationJSON { + name: String, + #[serde(rename = "file-types")] + file_types: Option>, + #[serde(rename = "content-regex")] + content_regex: Option, + #[serde(rename = "first-line-regex")] + first_line_regex: Option, + highlights: Option, + } + + #[derive(Deserialize)] + struct PackageJSON { + #[serde(rename = "tree-sitter")] + tree_sitter: Option>, + } + + let package_json_contents = fs::read_to_string(&parser_path.join(PACKAGE_JSON_PATH))?; + let package_json: PackageJSON = serde_json::from_str(&package_json_contents)?; + let configurations = package_json + .tree_sitter + .map_or(Vec::new(), |configurations| { + configurations + .into_iter() + .map(|conf| LanguageConfiguration { + name: conf.name, + file_types: conf.file_types.unwrap_or(Vec::new()), + content_regex: conf + .content_regex + .and_then(|r| RegexBuilder::new(&r).multi_line(true).build().ok()), + first_line_regex: conf + .first_line_regex + .and_then(|r| RegexBuilder::new(&r).multi_line(true).build().ok()), + highlight_property_sheet: conf.highlights.map(|d| Err(d.into())), + }) + .collect() + }); + + for (i, configuration) in configurations.iter().enumerate() { + for file_type in &configuration.file_types { + self.language_configuration_indices_by_file_type + .entry(file_type.to_string()) + .or_insert(Vec::new()) + .push((self.language_repos.len(), i)); + } + } + + self.language_repos.push(LanguageRepo { + name: name.to_string(), + path: parser_path.to_owned(), + language: None, + configurations, + }); + + Ok(self.language_repos.len() - 1) + } +} + +fn was_modified_more_recently(a: &Path, b: &Path) -> io::Result { + Ok(fs::metadata(a)?.modified()? > fs::metadata(b)?.modified()?) +} diff --git a/cli/src/main.rs b/cli/src/main.rs index fe6ffd8c..87f9e26d 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -5,14 +5,20 @@ extern crate log; #[macro_use] extern crate serde_derive; extern crate hashbrown; +extern crate regex; extern crate serde_json; mod error; mod generate; +mod loader; mod logger; +mod parse; +mod test; +use self::loader::Loader; use clap::{App, Arg, SubCommand}; use std::env; +use std::path::Path; use std::process::exit; use std::usize; @@ -44,15 +50,13 @@ fn run() -> error::Result<()> { .about("Parse a file") .arg(Arg::with_name("path").index(1)), ) - .subcommand( - SubCommand::with_name("test") - .about("Run a parser's tests") - .arg(Arg::with_name("path").index(1).required(true)) - .arg(Arg::with_name("line").index(2).required(true)) - .arg(Arg::with_name("column").index(3).required(true)), - ) + .subcommand(SubCommand::with_name("test").about("Run a parser's tests")) .get_matches(); + let home_dir = dirs::home_dir().unwrap(); + let current_dir = env::current_dir().unwrap(); + let mut loader = Loader::new(home_dir.join(".tree-sitter")); + if let Some(matches) = matches.subcommand_matches("generate") { if matches.is_present("log") { logger::init(); @@ -65,11 +69,23 @@ fn run() -> error::Result<()> { ids.filter_map(|id| usize::from_str_radix(id, 10).ok()) .collect() }); - let mut grammar_path = env::current_dir().expect("Failed to read CWD"); - grammar_path.push("grammar.js"); + let grammar_path = current_dir.join("grammar.js"); let code = generate::generate_parser_for_grammar(&grammar_path, minimize, state_ids_to_log)?; println!("{}", code); + return Ok(()); + } + + if let Some(_matches) = matches.subcommand_matches("test") { + let corpus_path = current_dir.join("corpus"); + let home_dir = dirs::home_dir().unwrap(); + let mut loader = Loader::new(home_dir.join(".tree-sitter")); + if let Some((language, _)) = loader.language_configuration_at_path(¤t_dir)? { + test::run_tests_at_path(language, &corpus_path)?; + } else { + eprintln!("No language found"); + } + } } Ok(()) diff --git a/cli/src/test.rs b/cli/src/test.rs new file mode 100644 index 00000000..7ef63bb7 --- /dev/null +++ b/cli/src/test.rs @@ -0,0 +1,212 @@ +use super::error::Result; +use ansi_term::Colour; +use regex::bytes::{Regex as ByteRegex, RegexBuilder as ByteRegexBuilder}; +use regex::Regex; +use std::char; +use std::fs; +use std::io; +use std::path::Path; +use std::str; +use tree_sitter::{Language, Parser}; + +lazy_static! { + static ref HEADER_REGEX: ByteRegex = ByteRegexBuilder::new(r"^===+\r?\n([^=]*)\r?\n===+\r?\n") + .multi_line(true) + .build() + .unwrap(); + static ref DIVIDER_REGEX: ByteRegex = ByteRegexBuilder::new(r"^---+\r?\n") + .multi_line(true) + .build() + .unwrap(); + static ref WHITESPACE_REGEX: Regex = Regex::new(r"\s+").unwrap(); +} + +#[derive(Debug, PartialEq, Eq)] +pub enum TestEntry { + Group { + name: String, + children: Vec, + }, + Example { + name: String, + input: Vec, + output: String, + }, +} + +pub fn run_tests_at_path(language: Language, path: &Path) -> Result<()> { + let test_entry = parse_tests(path)?; + let mut parser = Parser::new(); + parser.set_language(language)?; + + let mut failures = Vec::new(); + if let TestEntry::Group { children, .. } = test_entry { + for child in children { + run_tests(&mut parser, child, 0, &mut failures)?; + } + } + + if failures.len() > 0 { + println!(""); + + if failures.len() == 1 { + println!("1 failure:") + } else { + println!("{} failures:", failures.len()) + } + + for (name, actual, expected) in failures { + println!("\n {}:", name); + println!(" Expected: {}", expected); + println!(" Actual: {}", actual); + } + } + + Ok(()) +} + +fn run_tests( + parser: &mut Parser, + test_entry: TestEntry, + mut indent_level: i32, + failures: &mut Vec<(String, String, String)>, +) -> Result<()> { + for _ in 0..indent_level { + print!(" "); + } + match test_entry { + TestEntry::Example { + name, + input, + output, + } => { + let tree = parser + .parse_utf8(&mut |byte_offset, _| &input[byte_offset..], None) + .unwrap(); + let actual = tree.root_node().to_sexp(); + if actual == output { + println!("✓ {}", Colour::Green.paint(&name)); + } else { + println!("✗ {}", Colour::Red.paint(&name)); + failures.push((name, actual, output)); + } + } + TestEntry::Group { name, children } => { + println!("{}:", name); + indent_level += 1; + for child in children { + run_tests(parser, child, indent_level, failures)?; + } + } + } + Ok(()) +} + +pub fn parse_tests(path: &Path) -> io::Result { + let name = path + .file_name() + .and_then(|s| s.to_str()) + .unwrap_or("") + .to_string(); + if path.is_dir() { + let mut children = Vec::new(); + for entry in fs::read_dir(path)? { + let entry = entry?; + children.push(parse_tests(&entry.path())?); + } + Ok(TestEntry::Group { name, children }) + } else { + let content = fs::read_to_string(path)?; + Ok(parse_test_content(name, content)) + } +} + +fn parse_test_content(name: String, content: String) -> TestEntry { + let mut children = Vec::new(); + let bytes = content.as_bytes(); + let mut previous_name = String::new(); + let mut previous_header_end = 0; + for header_match in HEADER_REGEX + .find_iter(&bytes) + .map(|m| (m.start(), m.end())) + .chain(Some((bytes.len(), bytes.len()))) + { + let (header_start, header_end) = header_match; + if previous_header_end > 0 { + if let Some(divider_match) = + DIVIDER_REGEX.find(&bytes[previous_header_end..header_start]) + { + let (divider_start, divider_end) = ( + previous_header_end + divider_match.start(), + previous_header_end + divider_match.end(), + ); + if let Ok(output) = str::from_utf8(&bytes[divider_end..header_start]) { + let input = bytes[previous_header_end..divider_start].to_vec(); + let output = WHITESPACE_REGEX.replace_all(output.trim(), " ").to_string(); + children.push(TestEntry::Example { + name: previous_name, + input, + output, + }); + } + } + } + previous_name = String::from_utf8_lossy(&bytes[header_start..header_end]) + .trim_matches(|c| char::is_whitespace(c) || c == '=') + .to_string(); + previous_header_end = header_end; + } + TestEntry::Group { name, children } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_test_content() { + let entry = parse_test_content( + "the-filename".to_string(), + r#" +=============== +The first test +=============== + +a b c + +--- + +(a + (b c)) + +================ +The second test +================ +d +--- +(d) + "# + .trim() + .to_string(), + ); + + assert_eq!( + entry, + TestEntry::Group { + name: "the-filename".to_string(), + children: vec![ + TestEntry::Example { + name: "The first test".to_string(), + input: "\na b c\n\n".as_bytes().to_vec(), + output: "(a (b c))".to_string(), + }, + TestEntry::Example { + name: "The second test".to_string(), + input: "d\n".as_bytes().to_vec(), + output: "(d)".to_string(), + }, + ] + } + ); + } +}