diff --git a/.appveyor.yml b/.appveyor.yml index de82a7d5..610ac134 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -25,8 +25,8 @@ test_script: - script\regenerate-fixtures.cmd # Run tests - - set TREE_SITTER_TEST=1 - script\test.cmd + - script\benchmark.cmd before_deploy: - move target\release\tree-sitter.exe tree-sitter.exe diff --git a/.travis.yml b/.travis.yml index 722a4dc9..06c71b34 100644 --- a/.travis.yml +++ b/.travis.yml @@ -15,6 +15,7 @@ script: # Run tests - export TREE_SITTER_STATIC_ANALYSIS=1 - script/test + - script/benchmark branches: only: diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 75efdb18..35b6c7a0 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -8,6 +8,10 @@ edition = "2018" name = "tree-sitter" path = "src/main.rs" +[[bench]] +name = "benchmark" +harness = false + [dependencies] cc = "1.0" ansi_term = "0.11" diff --git a/cli/benches/benchmark.rs b/cli/benches/benchmark.rs new file mode 100644 index 00000000..472ab886 --- /dev/null +++ b/cli/benches/benchmark.rs @@ -0,0 +1,172 @@ +use lazy_static::lazy_static; +use std::collections::BTreeMap; +use std::path::{Path, PathBuf}; +use std::time::Instant; +use std::{env, fs, usize}; +use tree_sitter::{Language, Parser}; +use tree_sitter_cli::loader::Loader; + +include!("../src/tests/helpers/dirs.rs"); + +lazy_static! { + static ref LANGUAGE_FILTER: Option = + env::var("TREE_SITTER_BENCHMARK_LANGUAGE_FILTER").ok(); + static ref EXAMPLE_FILTER: Option = + env::var("TREE_SITTER_BENCHMARK_EXAMPLE_FILTER").ok(); + static ref TEST_LOADER: Loader = Loader::new(SCRATCH_DIR.clone()); + static ref EXAMPLE_PATHS_BY_LANGUAGE_NAME: BTreeMap> = { + let mut result = BTreeMap::new(); + let grammar_dirs = fs::read_dir(&(*GRAMMARS_DIR)).unwrap(); + for grammar_dir in grammar_dirs { + let grammar_dir = grammar_dir.unwrap(); + if !grammar_dir.path().is_dir() { + continue; + } + + let language_name = grammar_dir.file_name(); + let language_name = language_name.to_str().unwrap(); + if let Ok(example_files) = fs::read_dir(&grammar_dir.path().join("examples")) { + result.insert( + language_name.to_string(), + example_files + .filter_map(|p| { + let p = p.unwrap().path(); + if p.is_file() { + Some(p) + } else { + None + } + }) + .collect(), + ); + } else { + result.insert(language_name.to_string(), Vec::new()); + } + } + + result + }; +} + +fn main() { + let mut parser = Parser::new(); + let max_path_length = EXAMPLE_PATHS_BY_LANGUAGE_NAME + .iter() + .flat_map(|(_, paths)| paths.iter()) + .map(|p| p.file_name().unwrap().to_str().unwrap().chars().count()) + .max() + .unwrap(); + + let mut all_normal_speeds = Vec::new(); + let mut all_error_speeds = Vec::new(); + + for (language_name, example_paths) in EXAMPLE_PATHS_BY_LANGUAGE_NAME.iter() { + // TODO - remove after fixing slow error parsing HTML. + if language_name == "html" { + continue; + } + + if let Some(filter) = LANGUAGE_FILTER.as_ref() { + if language_name != filter.as_str() { + continue; + } + } + + eprintln!("\nLanguage: {}", language_name); + parser.set_language(get_language(language_name)).unwrap(); + + eprintln!(" Normal examples:"); + let mut normal_speeds = Vec::new(); + for example_path in example_paths { + if let Some(filter) = EXAMPLE_FILTER.as_ref() { + if !example_path.to_str().unwrap().contains(filter.as_str()) { + continue; + } + } + + normal_speeds.push(parse(&mut parser, example_path, max_path_length)); + } + + eprintln!(" Error examples (mismatched languages):"); + let mut error_speeds = Vec::new(); + for (other_language_name, example_paths) in EXAMPLE_PATHS_BY_LANGUAGE_NAME.iter() { + if other_language_name != language_name { + for example_path in example_paths { + if let Some(filter) = EXAMPLE_FILTER.as_ref() { + if !example_path.to_str().unwrap().contains(filter.as_str()) { + continue; + } + } + + error_speeds.push(parse(&mut parser, example_path, max_path_length)); + } + } + } + + if let Some((average_normal, worst_normal)) = aggregate(&normal_speeds) { + eprintln!(" Average Speed (normal): {} bytes/ms", average_normal); + eprintln!(" Worst Speed (normal): {} bytes/ms", worst_normal); + } + + if let Some((average_error, worst_error)) = aggregate(&error_speeds) { + eprintln!(" Average Speed (errors): {} bytes/ms", average_error); + eprintln!(" Worst Speed (errors): {} bytes/ms", worst_error); + } + + all_normal_speeds.extend(normal_speeds); + all_error_speeds.extend(error_speeds); + } + + eprintln!("\nOverall"); + if let Some((average_normal, worst_normal)) = aggregate(&all_normal_speeds) { + eprintln!(" Average Speed (normal): {} bytes/ms", average_normal); + eprintln!(" Worst Speed (normal): {} bytes/ms", worst_normal); + } + + if let Some((average_error, worst_error)) = aggregate(&all_error_speeds) { + eprintln!(" Average Speed (errors): {} bytes/ms", average_error); + eprintln!(" Worst Speed (errors): {} bytes/ms", worst_error); + } + eprintln!(""); +} + +fn aggregate(speeds: &Vec<(usize)>) -> Option<(usize, usize)> { + if speeds.is_empty() { + return None; + } + let mut total = 0; + let mut max = usize::MAX; + for speed in speeds.iter().cloned() { + total += speed; + if speed < max { + max = speed; + } + } + Some((total / speeds.len(), max)) +} + +fn parse(parser: &mut Parser, example_path: &Path, max_path_length: usize) -> usize { + eprint!( + " {:width$}\t", + example_path.file_name().unwrap().to_str().unwrap(), + width = max_path_length + ); + + let source_code = fs::read(example_path).unwrap(); + let time = Instant::now(); + let _tree = parser + .parse_utf8(&mut |byte, _| &source_code[byte..], None) + .expect("Incompatible language version"); + let duration = time.elapsed(); + let duration_ms = + duration.as_secs() as f64 * 1000.0 + duration.subsec_nanos() as f64 / 1000000.0; + let speed = (source_code.len() as f64 / duration_ms) as usize; + eprintln!("time {} ms\tspeed {} bytes/ms", duration_ms as usize, speed); + speed +} + +fn get_language(name: &str) -> Language { + TEST_LOADER + .load_language_at_path(name, &GRAMMARS_DIR.join(name).join("src"), &HEADER_DIR) + .unwrap() +} diff --git a/cli/src/generate/build_tables/build_lex_table.rs b/cli/src/generate/build_tables/build_lex_table.rs index 38f56cc3..ef4b3e5e 100644 --- a/cli/src/generate/build_tables/build_lex_table.rs +++ b/cli/src/generate/build_tables/build_lex_table.rs @@ -5,6 +5,7 @@ use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar}; use crate::generate::nfa::{CharacterSet, NfaCursor}; use crate::generate::rules::Symbol; use crate::generate::tables::{AdvanceAction, LexState, LexTable, ParseStateId, ParseTable}; +use log::info; use std::collections::hash_map::Entry; use std::collections::{BTreeMap, HashMap, VecDeque}; diff --git a/cli/src/generate/build_tables/item.rs b/cli/src/generate/build_tables/item.rs index 9f3307dd..b450bb75 100644 --- a/cli/src/generate/build_tables/item.rs +++ b/cli/src/generate/build_tables/item.rs @@ -1,6 +1,7 @@ use crate::generate::grammars::{LexicalGrammar, Production, ProductionStep, SyntaxGrammar}; use crate::generate::rules::Associativity; use crate::generate::rules::{Symbol, SymbolType}; +use lazy_static::lazy_static; use smallbitvec::SmallBitVec; use std::cmp::Ordering; use std::fmt; diff --git a/cli/src/generate/build_tables/minimize_parse_table.rs b/cli/src/generate/build_tables/minimize_parse_table.rs index bb9b26eb..9b012afe 100644 --- a/cli/src/generate/build_tables/minimize_parse_table.rs +++ b/cli/src/generate/build_tables/minimize_parse_table.rs @@ -4,6 +4,7 @@ use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar, VariableType}; use crate::generate::rules::{AliasMap, Symbol}; use crate::generate::tables::{ParseAction, ParseState, ParseTable, ParseTableEntry}; use hashbrown::{HashMap, HashSet}; +use log::info; pub(crate) fn minimize_parse_table( parse_table: &mut ParseTable, diff --git a/cli/src/generate/build_tables/mod.rs b/cli/src/generate/build_tables/mod.rs index 36f6770b..df19f9e0 100644 --- a/cli/src/generate/build_tables/mod.rs +++ b/cli/src/generate/build_tables/mod.rs @@ -17,6 +17,7 @@ use crate::generate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGram use crate::generate::nfa::{CharacterSet, NfaCursor}; use crate::generate::rules::{AliasMap, Symbol, SymbolType}; use crate::generate::tables::{LexTable, ParseAction, ParseTable, ParseTableEntry}; +use log::info; pub(crate) fn build_tables( syntax_grammar: &SyntaxGrammar, diff --git a/cli/src/generate/mod.rs b/cli/src/generate/mod.rs index 9e954298..397fd677 100644 --- a/cli/src/generate/mod.rs +++ b/cli/src/generate/mod.rs @@ -3,6 +3,7 @@ use self::parse_grammar::parse_grammar; use self::prepare_grammar::prepare_grammar; use self::render::render_c_code; use crate::error::{Error, Result}; +use lazy_static::lazy_static; use regex::{Regex, RegexBuilder}; use std::fs; use std::io::Write; @@ -62,7 +63,6 @@ pub fn generate_parser_in_directory( Ok(()) } -#[cfg(test)] pub fn generate_parser_for_grammar(grammar_json: &String) -> Result<(String, String)> { let grammar_json = JSON_COMMENT_REGEX.replace_all(grammar_json, "\n"); generate_parser_for_grammar_with_opts(&grammar_json, true, Vec::new()) @@ -141,6 +141,7 @@ fn ensure_file>(path: &PathBuf, f: impl Fn() -> T) -> Result<()> if path.exists() { Ok(()) } else { - fs::write(path, f().as_ref()).map_err(|e| Error(format!("Failed to write file {:?}: {}", path, e))) + fs::write(path, f().as_ref()) + .map_err(|e| Error(format!("Failed to write file {:?}: {}", path, e))) } } diff --git a/cli/src/generate/parse_grammar.rs b/cli/src/generate/parse_grammar.rs index e77dce9b..cf2005ad 100644 --- a/cli/src/generate/parse_grammar.rs +++ b/cli/src/generate/parse_grammar.rs @@ -1,6 +1,7 @@ use super::grammars::{InputGrammar, Variable, VariableType}; use super::rules::Rule; use crate::error::Result; +use serde_derive::Deserialize; use serde_json::{Map, Value}; #[derive(Deserialize)] diff --git a/cli/src/generate/prepare_grammar/expand_tokens.rs b/cli/src/generate/prepare_grammar/expand_tokens.rs index 8e0f12fe..9e2cf9fe 100644 --- a/cli/src/generate/prepare_grammar/expand_tokens.rs +++ b/cli/src/generate/prepare_grammar/expand_tokens.rs @@ -3,6 +3,7 @@ use crate::error::{Error, Result}; use crate::generate::grammars::{LexicalGrammar, LexicalVariable}; use crate::generate::nfa::{CharacterSet, Nfa, NfaState}; use crate::generate::rules::Rule; +use lazy_static::lazy_static; use regex::Regex; use regex_syntax::ast::{ parse, Ast, Class, ClassPerlKind, ClassSet, ClassSetItem, RepetitionKind, RepetitionRange, diff --git a/cli/src/generate/properties.rs b/cli/src/generate/properties.rs index 4df4d67d..f5861159 100644 --- a/cli/src/generate/properties.rs +++ b/cli/src/generate/properties.rs @@ -1,6 +1,8 @@ use crate::error::{Error, Result}; +use log::info; use rsass; use rsass::sass::Value; +use serde_derive::Serialize; use std::cmp::Ordering; use std::collections::hash_map::Entry; use std::collections::{BTreeMap, HashMap, HashSet, VecDeque}; diff --git a/cli/src/lib.rs b/cli/src/lib.rs new file mode 100644 index 00000000..9038b5b8 --- /dev/null +++ b/cli/src/lib.rs @@ -0,0 +1,10 @@ +pub mod error; +pub mod generate; +pub mod loader; +pub mod logger; +pub mod parse; +pub mod test; +pub mod util; + +#[cfg(test)] +mod tests; diff --git a/cli/src/loader.rs b/cli/src/loader.rs index 6dd4e4db..6c6d2c5c 100644 --- a/cli/src/loader.rs +++ b/cli/src/loader.rs @@ -1,5 +1,6 @@ use libloading::{Library, Symbol}; use regex::{Regex, RegexBuilder}; +use serde_derive::Deserialize; use std::collections::HashMap; use std::fs; use std::io; diff --git a/cli/src/logger.rs b/cli/src/logger.rs index 18df763d..6abe6470 100644 --- a/cli/src/logger.rs +++ b/cli/src/logger.rs @@ -23,7 +23,7 @@ impl Log for Logger { fn flush(&self) {} } -pub(crate) fn init() { +pub fn init() { log::set_boxed_logger(Box::new(Logger { filter: None })).unwrap(); log::set_max_level(LevelFilter::Info); } diff --git a/cli/src/main.rs b/cli/src/main.rs index 0bf4f01a..3c0b057e 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -1,31 +1,10 @@ -#[macro_use] -extern crate lazy_static; -#[macro_use] -extern crate log; -#[macro_use] -extern crate serde_derive; -extern crate hashbrown; -extern crate regex; -extern crate rsass; -extern crate serde_json; - -mod error; -mod generate; -mod loader; -mod logger; -mod parse; -mod test; -mod util; - -#[cfg(test)] -mod tests; - -use self::loader::Loader; use clap::{App, AppSettings, Arg, SubCommand}; use std::env; use std::fs; use std::path::Path; use std::process::exit; +use tree_sitter_cli::loader::Loader; +use tree_sitter_cli::{error, generate, logger, parse, test}; use std::usize; fn main() { diff --git a/cli/src/test.rs b/cli/src/test.rs index d6a2a7ce..c8330af9 100644 --- a/cli/src/test.rs +++ b/cli/src/test.rs @@ -2,6 +2,7 @@ use super::error::Result; use super::util; use ansi_term::Colour; use difference::{Changeset, Difference}; +use lazy_static::lazy_static; use regex::bytes::{Regex as ByteRegex, RegexBuilder as ByteRegexBuilder}; use regex::Regex; use std::char; @@ -38,7 +39,10 @@ pub enum TestEntry { impl Default for TestEntry { fn default() -> Self { - TestEntry::Group { name: String::new(), children: Vec::new() } + TestEntry::Group { + name: String::new(), + children: Vec::new(), + } } } diff --git a/cli/src/tests/corpus_test.rs b/cli/src/tests/corpus_test.rs index 449669e3..1ee3ddc1 100644 --- a/cli/src/tests/corpus_test.rs +++ b/cli/src/tests/corpus_test.rs @@ -5,6 +5,7 @@ use super::helpers::scope_sequence::ScopeSequence; use crate::generate; use crate::test::{parse_tests, print_diff, print_diff_key, TestEntry}; use crate::util; +use lazy_static::lazy_static; use std::{env, fs, time, usize}; use tree_sitter::{InputEdit, LogType, Node, Parser, Point, Tree}; @@ -373,7 +374,10 @@ fn check_consistent_sizes(tree: &Tree, input: &Vec) { assert!(start_byte <= end_byte); assert!(start_point <= end_point); - assert_eq!(start_byte, line_offsets[start_point.row] + start_point.column); + assert_eq!( + start_byte, + line_offsets[start_point.row] + start_point.column + ); assert_eq!(end_byte, line_offsets[end_point.row] + end_point.column); let mut last_child_end_byte = start_byte; diff --git a/cli/src/tests/helpers/allocations.rs b/cli/src/tests/helpers/allocations.rs index e3cdae27..ae246c40 100644 --- a/cli/src/tests/helpers/allocations.rs +++ b/cli/src/tests/helpers/allocations.rs @@ -1,6 +1,7 @@ #![cfg(test)] #![allow(dead_code)] +use lazy_static::lazy_static; use spin::Mutex; use std::collections::HashMap; use std::os::raw::{c_ulong, c_void}; @@ -46,10 +47,7 @@ pub fn stop_recording() { .map(|e| e.1) .collect::>(); allocation_indices.sort_unstable(); - panic!( - "Leaked allocation indices: {:?}", - allocation_indices - ); + panic!("Leaked allocation indices: {:?}", allocation_indices); } } diff --git a/cli/src/tests/helpers/dirs.rs b/cli/src/tests/helpers/dirs.rs new file mode 100644 index 00000000..4bf345d8 --- /dev/null +++ b/cli/src/tests/helpers/dirs.rs @@ -0,0 +1,11 @@ +lazy_static! { + static ref ROOT_DIR: PathBuf = PathBuf::from(env!("CARGO_MANIFEST_DIR")).parent().unwrap().to_owned(); + static ref FIXTURES_DIR: PathBuf = ROOT_DIR.join("test").join("fixtures"); + static ref HEADER_DIR: PathBuf = ROOT_DIR.join("lib").join("include"); + static ref GRAMMARS_DIR: PathBuf = ROOT_DIR.join("test").join("fixtures").join("grammars"); + static ref SCRATCH_DIR: PathBuf = { + let result = ROOT_DIR.join("target").join("scratch"); + fs::create_dir_all(&result).unwrap(); + result + }; +} diff --git a/cli/src/tests/helpers/fixtures.rs b/cli/src/tests/helpers/fixtures.rs index 639b1004..981f0ab6 100644 --- a/cli/src/tests/helpers/fixtures.rs +++ b/cli/src/tests/helpers/fixtures.rs @@ -1,18 +1,12 @@ use crate::loader::Loader; +use lazy_static::lazy_static; use std::fs; use std::path::{Path, PathBuf}; use tree_sitter::Language; +include!("./dirs.rs"); + lazy_static! { - static ref ROOT_DIR: PathBuf = [env!("CARGO_MANIFEST_DIR"), ".."].iter().collect(); - static ref FIXTURES_DIR: PathBuf = ROOT_DIR.join("test").join("fixtures"); - static ref HEADER_DIR: PathBuf = ROOT_DIR.join("lib").join("include"); - static ref GRAMMARS_DIR: PathBuf = ROOT_DIR.join("test").join("fixtures").join("grammars"); - static ref SCRATCH_DIR: PathBuf = { - let result = ROOT_DIR.join("target").join("scratch"); - fs::create_dir_all(&result).unwrap(); - result - }; static ref TEST_LOADER: Loader = Loader::new(SCRATCH_DIR.clone()); } diff --git a/cli/src/tests/parser_api_test.rs b/cli/src/tests/parser_api_test.rs index 9584ac4e..e46d9b55 100644 --- a/cli/src/tests/parser_api_test.rs +++ b/cli/src/tests/parser_api_test.rs @@ -1,4 +1,5 @@ use super::helpers::fixtures::get_language; +use serde_derive::Deserialize; use std::thread; use tree_sitter::{InputEdit, Language, LogType, Parser, Point, PropertySheet}; diff --git a/cli/src/util.rs b/cli/src/util.rs index 004d3b06..e880bea1 100644 --- a/cli/src/util.rs +++ b/cli/src/util.rs @@ -8,18 +8,18 @@ use tree_sitter::Parser; const HTML_HEADER: &[u8] = b"\n\n\n"; #[cfg(windows)] -pub(crate) struct LogSession(); +pub struct LogSession(); #[cfg(unix)] -pub(crate) struct LogSession(PathBuf, Option, Option); +pub struct LogSession(PathBuf, Option, Option); #[cfg(windows)] -pub(crate) fn log_graphs(_parser: &mut Parser, _path: &str) -> std::io::Result { +pub fn log_graphs(_parser: &mut Parser, _path: &str) -> std::io::Result { Ok(LogSession()) } #[cfg(unix)] -pub(crate) fn log_graphs(parser: &mut Parser, path: &str) -> std::io::Result { +pub fn log_graphs(parser: &mut Parser, path: &str) -> std::io::Result { use std::io::Write; let mut dot_file = std::fs::File::create(path)?; diff --git a/script/benchmark b/script/benchmark index e24c6b58..9b4ec3f0 100755 --- a/script/benchmark +++ b/script/benchmark @@ -6,7 +6,7 @@ function usage { cat <<-EOF USAGE - $0 [-Ld] [-l language-name] [-f example-file-name] + $0 [-h] [-l language-name] [-e example-file-name] OPTIONS @@ -14,63 +14,24 @@ OPTIONS -l run only the benchmarks for the given language - -f run only the benchmarks that parse the file with the given name - - -d run tests in a debugger (either lldb or gdb) - - -L run benchmarks with parse logging turned on - - -b run make under the scan-build static analyzer + -e run only the benchmarks that parse the example file with the given name EOF } -if [ "$(uname -s)" == "Darwin" ]; then - export LINK="clang++ -fsanitize=address" -fi - -mode=normal -export BUILDTYPE=Release -cmd=out/$BUILDTYPE/benchmarks -run_scan_build= - -while getopts "bdhf:l:SL" option; do +while getopts "hl:e:" option; do case ${option} in h) usage exit ;; - d) - mode=debug - ;; - f) - export TREE_SITTER_BENCHMARK_FILE_NAME=${OPTARG} + e) + export TREE_SITTER_BENCHMARK_EXAMPLE_FILTER=${OPTARG} ;; l) - export TREE_SITTER_BENCHMARK_LANGUAGE=${OPTARG} - ;; - L) - export TREE_SITTER_BENCHMARK_LOG=1 - ;; - b) - run_scan_build=true + export TREE_SITTER_BENCHMARK_LANGUAGE_FILTER=${OPTARG} ;; esac done -if [[ -n "$run_scan_build" ]]; then - . script/util/scan-build.sh - scan_build make -j2 benchmarks -else - make -j2 benchmarks -fi - -case $mode in - debug) - lldb $cmd - ;; - - normal) - exec $cmd - ;; -esac +cargo bench diff --git a/script/benchmark.cmd b/script/benchmark.cmd new file mode 100644 index 00000000..f5608d9d --- /dev/null +++ b/script/benchmark.cmd @@ -0,0 +1,3 @@ +@echo off + +cargo bench