Add benchmark script

* Structure `cli` crate as both a library and an executable, so that
benchmarks can import code from the crate.
* Import macros in the Rust 2018 style.
This commit is contained in:
Max Brunsfeld 2019-02-01 14:39:37 -08:00
parent e26cbb62a5
commit 4cac85fec4
25 changed files with 244 additions and 92 deletions

View file

@ -25,8 +25,8 @@ test_script:
- script\regenerate-fixtures.cmd
# Run tests
- set TREE_SITTER_TEST=1
- script\test.cmd
- script\benchmark.cmd
before_deploy:
- move target\release\tree-sitter.exe tree-sitter.exe

View file

@ -15,6 +15,7 @@ script:
# Run tests
- export TREE_SITTER_STATIC_ANALYSIS=1
- script/test
- script/benchmark
branches:
only:

View file

@ -8,6 +8,10 @@ edition = "2018"
name = "tree-sitter"
path = "src/main.rs"
[[bench]]
name = "benchmark"
harness = false
[dependencies]
cc = "1.0"
ansi_term = "0.11"

172
cli/benches/benchmark.rs Normal file
View file

@ -0,0 +1,172 @@
use lazy_static::lazy_static;
use std::collections::BTreeMap;
use std::path::{Path, PathBuf};
use std::time::Instant;
use std::{env, fs, usize};
use tree_sitter::{Language, Parser};
use tree_sitter_cli::loader::Loader;
include!("../src/tests/helpers/dirs.rs");
lazy_static! {
static ref LANGUAGE_FILTER: Option<String> =
env::var("TREE_SITTER_BENCHMARK_LANGUAGE_FILTER").ok();
static ref EXAMPLE_FILTER: Option<String> =
env::var("TREE_SITTER_BENCHMARK_EXAMPLE_FILTER").ok();
static ref TEST_LOADER: Loader = Loader::new(SCRATCH_DIR.clone());
static ref EXAMPLE_PATHS_BY_LANGUAGE_NAME: BTreeMap<String, Vec<PathBuf>> = {
let mut result = BTreeMap::new();
let grammar_dirs = fs::read_dir(&(*GRAMMARS_DIR)).unwrap();
for grammar_dir in grammar_dirs {
let grammar_dir = grammar_dir.unwrap();
if !grammar_dir.path().is_dir() {
continue;
}
let language_name = grammar_dir.file_name();
let language_name = language_name.to_str().unwrap();
if let Ok(example_files) = fs::read_dir(&grammar_dir.path().join("examples")) {
result.insert(
language_name.to_string(),
example_files
.filter_map(|p| {
let p = p.unwrap().path();
if p.is_file() {
Some(p)
} else {
None
}
})
.collect(),
);
} else {
result.insert(language_name.to_string(), Vec::new());
}
}
result
};
}
fn main() {
let mut parser = Parser::new();
let max_path_length = EXAMPLE_PATHS_BY_LANGUAGE_NAME
.iter()
.flat_map(|(_, paths)| paths.iter())
.map(|p| p.file_name().unwrap().to_str().unwrap().chars().count())
.max()
.unwrap();
let mut all_normal_speeds = Vec::new();
let mut all_error_speeds = Vec::new();
for (language_name, example_paths) in EXAMPLE_PATHS_BY_LANGUAGE_NAME.iter() {
// TODO - remove after fixing slow error parsing HTML.
if language_name == "html" {
continue;
}
if let Some(filter) = LANGUAGE_FILTER.as_ref() {
if language_name != filter.as_str() {
continue;
}
}
eprintln!("\nLanguage: {}", language_name);
parser.set_language(get_language(language_name)).unwrap();
eprintln!(" Normal examples:");
let mut normal_speeds = Vec::new();
for example_path in example_paths {
if let Some(filter) = EXAMPLE_FILTER.as_ref() {
if !example_path.to_str().unwrap().contains(filter.as_str()) {
continue;
}
}
normal_speeds.push(parse(&mut parser, example_path, max_path_length));
}
eprintln!(" Error examples (mismatched languages):");
let mut error_speeds = Vec::new();
for (other_language_name, example_paths) in EXAMPLE_PATHS_BY_LANGUAGE_NAME.iter() {
if other_language_name != language_name {
for example_path in example_paths {
if let Some(filter) = EXAMPLE_FILTER.as_ref() {
if !example_path.to_str().unwrap().contains(filter.as_str()) {
continue;
}
}
error_speeds.push(parse(&mut parser, example_path, max_path_length));
}
}
}
if let Some((average_normal, worst_normal)) = aggregate(&normal_speeds) {
eprintln!(" Average Speed (normal): {} bytes/ms", average_normal);
eprintln!(" Worst Speed (normal): {} bytes/ms", worst_normal);
}
if let Some((average_error, worst_error)) = aggregate(&error_speeds) {
eprintln!(" Average Speed (errors): {} bytes/ms", average_error);
eprintln!(" Worst Speed (errors): {} bytes/ms", worst_error);
}
all_normal_speeds.extend(normal_speeds);
all_error_speeds.extend(error_speeds);
}
eprintln!("\nOverall");
if let Some((average_normal, worst_normal)) = aggregate(&all_normal_speeds) {
eprintln!(" Average Speed (normal): {} bytes/ms", average_normal);
eprintln!(" Worst Speed (normal): {} bytes/ms", worst_normal);
}
if let Some((average_error, worst_error)) = aggregate(&all_error_speeds) {
eprintln!(" Average Speed (errors): {} bytes/ms", average_error);
eprintln!(" Worst Speed (errors): {} bytes/ms", worst_error);
}
eprintln!("");
}
fn aggregate(speeds: &Vec<(usize)>) -> Option<(usize, usize)> {
if speeds.is_empty() {
return None;
}
let mut total = 0;
let mut max = usize::MAX;
for speed in speeds.iter().cloned() {
total += speed;
if speed < max {
max = speed;
}
}
Some((total / speeds.len(), max))
}
fn parse(parser: &mut Parser, example_path: &Path, max_path_length: usize) -> usize {
eprint!(
" {:width$}\t",
example_path.file_name().unwrap().to_str().unwrap(),
width = max_path_length
);
let source_code = fs::read(example_path).unwrap();
let time = Instant::now();
let _tree = parser
.parse_utf8(&mut |byte, _| &source_code[byte..], None)
.expect("Incompatible language version");
let duration = time.elapsed();
let duration_ms =
duration.as_secs() as f64 * 1000.0 + duration.subsec_nanos() as f64 / 1000000.0;
let speed = (source_code.len() as f64 / duration_ms) as usize;
eprintln!("time {} ms\tspeed {} bytes/ms", duration_ms as usize, speed);
speed
}
fn get_language(name: &str) -> Language {
TEST_LOADER
.load_language_at_path(name, &GRAMMARS_DIR.join(name).join("src"), &HEADER_DIR)
.unwrap()
}

View file

@ -5,6 +5,7 @@ use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar};
use crate::generate::nfa::{CharacterSet, NfaCursor};
use crate::generate::rules::Symbol;
use crate::generate::tables::{AdvanceAction, LexState, LexTable, ParseStateId, ParseTable};
use log::info;
use std::collections::hash_map::Entry;
use std::collections::{BTreeMap, HashMap, VecDeque};

View file

@ -1,6 +1,7 @@
use crate::generate::grammars::{LexicalGrammar, Production, ProductionStep, SyntaxGrammar};
use crate::generate::rules::Associativity;
use crate::generate::rules::{Symbol, SymbolType};
use lazy_static::lazy_static;
use smallbitvec::SmallBitVec;
use std::cmp::Ordering;
use std::fmt;

View file

@ -4,6 +4,7 @@ use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar, VariableType};
use crate::generate::rules::{AliasMap, Symbol};
use crate::generate::tables::{ParseAction, ParseState, ParseTable, ParseTableEntry};
use hashbrown::{HashMap, HashSet};
use log::info;
pub(crate) fn minimize_parse_table(
parse_table: &mut ParseTable,

View file

@ -17,6 +17,7 @@ use crate::generate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGram
use crate::generate::nfa::{CharacterSet, NfaCursor};
use crate::generate::rules::{AliasMap, Symbol, SymbolType};
use crate::generate::tables::{LexTable, ParseAction, ParseTable, ParseTableEntry};
use log::info;
pub(crate) fn build_tables(
syntax_grammar: &SyntaxGrammar,

View file

@ -3,6 +3,7 @@ use self::parse_grammar::parse_grammar;
use self::prepare_grammar::prepare_grammar;
use self::render::render_c_code;
use crate::error::{Error, Result};
use lazy_static::lazy_static;
use regex::{Regex, RegexBuilder};
use std::fs;
use std::io::Write;
@ -62,7 +63,6 @@ pub fn generate_parser_in_directory(
Ok(())
}
#[cfg(test)]
pub fn generate_parser_for_grammar(grammar_json: &String) -> Result<(String, String)> {
let grammar_json = JSON_COMMENT_REGEX.replace_all(grammar_json, "\n");
generate_parser_for_grammar_with_opts(&grammar_json, true, Vec::new())
@ -141,6 +141,7 @@ fn ensure_file<T: AsRef<[u8]>>(path: &PathBuf, f: impl Fn() -> T) -> Result<()>
if path.exists() {
Ok(())
} else {
fs::write(path, f().as_ref()).map_err(|e| Error(format!("Failed to write file {:?}: {}", path, e)))
fs::write(path, f().as_ref())
.map_err(|e| Error(format!("Failed to write file {:?}: {}", path, e)))
}
}

View file

@ -1,6 +1,7 @@
use super::grammars::{InputGrammar, Variable, VariableType};
use super::rules::Rule;
use crate::error::Result;
use serde_derive::Deserialize;
use serde_json::{Map, Value};
#[derive(Deserialize)]

View file

@ -3,6 +3,7 @@ use crate::error::{Error, Result};
use crate::generate::grammars::{LexicalGrammar, LexicalVariable};
use crate::generate::nfa::{CharacterSet, Nfa, NfaState};
use crate::generate::rules::Rule;
use lazy_static::lazy_static;
use regex::Regex;
use regex_syntax::ast::{
parse, Ast, Class, ClassPerlKind, ClassSet, ClassSetItem, RepetitionKind, RepetitionRange,

View file

@ -1,6 +1,8 @@
use crate::error::{Error, Result};
use log::info;
use rsass;
use rsass::sass::Value;
use serde_derive::Serialize;
use std::cmp::Ordering;
use std::collections::hash_map::Entry;
use std::collections::{BTreeMap, HashMap, HashSet, VecDeque};

10
cli/src/lib.rs Normal file
View file

@ -0,0 +1,10 @@
pub mod error;
pub mod generate;
pub mod loader;
pub mod logger;
pub mod parse;
pub mod test;
pub mod util;
#[cfg(test)]
mod tests;

View file

@ -1,5 +1,6 @@
use libloading::{Library, Symbol};
use regex::{Regex, RegexBuilder};
use serde_derive::Deserialize;
use std::collections::HashMap;
use std::fs;
use std::io;

View file

@ -23,7 +23,7 @@ impl Log for Logger {
fn flush(&self) {}
}
pub(crate) fn init() {
pub fn init() {
log::set_boxed_logger(Box::new(Logger { filter: None })).unwrap();
log::set_max_level(LevelFilter::Info);
}

View file

@ -1,31 +1,10 @@
#[macro_use]
extern crate lazy_static;
#[macro_use]
extern crate log;
#[macro_use]
extern crate serde_derive;
extern crate hashbrown;
extern crate regex;
extern crate rsass;
extern crate serde_json;
mod error;
mod generate;
mod loader;
mod logger;
mod parse;
mod test;
mod util;
#[cfg(test)]
mod tests;
use self::loader::Loader;
use clap::{App, AppSettings, Arg, SubCommand};
use std::env;
use std::fs;
use std::path::Path;
use std::process::exit;
use tree_sitter_cli::loader::Loader;
use tree_sitter_cli::{error, generate, logger, parse, test};
use std::usize;
fn main() {

View file

@ -2,6 +2,7 @@ use super::error::Result;
use super::util;
use ansi_term::Colour;
use difference::{Changeset, Difference};
use lazy_static::lazy_static;
use regex::bytes::{Regex as ByteRegex, RegexBuilder as ByteRegexBuilder};
use regex::Regex;
use std::char;
@ -38,7 +39,10 @@ pub enum TestEntry {
impl Default for TestEntry {
fn default() -> Self {
TestEntry::Group { name: String::new(), children: Vec::new() }
TestEntry::Group {
name: String::new(),
children: Vec::new(),
}
}
}

View file

@ -5,6 +5,7 @@ use super::helpers::scope_sequence::ScopeSequence;
use crate::generate;
use crate::test::{parse_tests, print_diff, print_diff_key, TestEntry};
use crate::util;
use lazy_static::lazy_static;
use std::{env, fs, time, usize};
use tree_sitter::{InputEdit, LogType, Node, Parser, Point, Tree};
@ -373,7 +374,10 @@ fn check_consistent_sizes(tree: &Tree, input: &Vec<u8>) {
assert!(start_byte <= end_byte);
assert!(start_point <= end_point);
assert_eq!(start_byte, line_offsets[start_point.row] + start_point.column);
assert_eq!(
start_byte,
line_offsets[start_point.row] + start_point.column
);
assert_eq!(end_byte, line_offsets[end_point.row] + end_point.column);
let mut last_child_end_byte = start_byte;

View file

@ -1,6 +1,7 @@
#![cfg(test)]
#![allow(dead_code)]
use lazy_static::lazy_static;
use spin::Mutex;
use std::collections::HashMap;
use std::os::raw::{c_ulong, c_void};
@ -46,10 +47,7 @@ pub fn stop_recording() {
.map(|e| e.1)
.collect::<Vec<_>>();
allocation_indices.sort_unstable();
panic!(
"Leaked allocation indices: {:?}",
allocation_indices
);
panic!("Leaked allocation indices: {:?}", allocation_indices);
}
}

View file

@ -0,0 +1,11 @@
lazy_static! {
static ref ROOT_DIR: PathBuf = PathBuf::from(env!("CARGO_MANIFEST_DIR")).parent().unwrap().to_owned();
static ref FIXTURES_DIR: PathBuf = ROOT_DIR.join("test").join("fixtures");
static ref HEADER_DIR: PathBuf = ROOT_DIR.join("lib").join("include");
static ref GRAMMARS_DIR: PathBuf = ROOT_DIR.join("test").join("fixtures").join("grammars");
static ref SCRATCH_DIR: PathBuf = {
let result = ROOT_DIR.join("target").join("scratch");
fs::create_dir_all(&result).unwrap();
result
};
}

View file

@ -1,18 +1,12 @@
use crate::loader::Loader;
use lazy_static::lazy_static;
use std::fs;
use std::path::{Path, PathBuf};
use tree_sitter::Language;
include!("./dirs.rs");
lazy_static! {
static ref ROOT_DIR: PathBuf = [env!("CARGO_MANIFEST_DIR"), ".."].iter().collect();
static ref FIXTURES_DIR: PathBuf = ROOT_DIR.join("test").join("fixtures");
static ref HEADER_DIR: PathBuf = ROOT_DIR.join("lib").join("include");
static ref GRAMMARS_DIR: PathBuf = ROOT_DIR.join("test").join("fixtures").join("grammars");
static ref SCRATCH_DIR: PathBuf = {
let result = ROOT_DIR.join("target").join("scratch");
fs::create_dir_all(&result).unwrap();
result
};
static ref TEST_LOADER: Loader = Loader::new(SCRATCH_DIR.clone());
}

View file

@ -1,4 +1,5 @@
use super::helpers::fixtures::get_language;
use serde_derive::Deserialize;
use std::thread;
use tree_sitter::{InputEdit, Language, LogType, Parser, Point, PropertySheet};

View file

@ -8,18 +8,18 @@ use tree_sitter::Parser;
const HTML_HEADER: &[u8] = b"<!DOCTYPE html>\n<style>svg { width: 100%; }</style>\n\n";
#[cfg(windows)]
pub(crate) struct LogSession();
pub struct LogSession();
#[cfg(unix)]
pub(crate) struct LogSession(PathBuf, Option<Child>, Option<ChildStdin>);
pub struct LogSession(PathBuf, Option<Child>, Option<ChildStdin>);
#[cfg(windows)]
pub(crate) fn log_graphs(_parser: &mut Parser, _path: &str) -> std::io::Result<LogSession> {
pub fn log_graphs(_parser: &mut Parser, _path: &str) -> std::io::Result<LogSession> {
Ok(LogSession())
}
#[cfg(unix)]
pub(crate) fn log_graphs(parser: &mut Parser, path: &str) -> std::io::Result<LogSession> {
pub fn log_graphs(parser: &mut Parser, path: &str) -> std::io::Result<LogSession> {
use std::io::Write;
let mut dot_file = std::fs::File::create(path)?;

View file

@ -6,7 +6,7 @@ function usage {
cat <<-EOF
USAGE
$0 [-Ld] [-l language-name] [-f example-file-name]
$0 [-h] [-l language-name] [-e example-file-name]
OPTIONS
@ -14,63 +14,24 @@ OPTIONS
-l run only the benchmarks for the given language
-f run only the benchmarks that parse the file with the given name
-d run tests in a debugger (either lldb or gdb)
-L run benchmarks with parse logging turned on
-b run make under the scan-build static analyzer
-e run only the benchmarks that parse the example file with the given name
EOF
}
if [ "$(uname -s)" == "Darwin" ]; then
export LINK="clang++ -fsanitize=address"
fi
mode=normal
export BUILDTYPE=Release
cmd=out/$BUILDTYPE/benchmarks
run_scan_build=
while getopts "bdhf:l:SL" option; do
while getopts "hl:e:" option; do
case ${option} in
h)
usage
exit
;;
d)
mode=debug
;;
f)
export TREE_SITTER_BENCHMARK_FILE_NAME=${OPTARG}
e)
export TREE_SITTER_BENCHMARK_EXAMPLE_FILTER=${OPTARG}
;;
l)
export TREE_SITTER_BENCHMARK_LANGUAGE=${OPTARG}
;;
L)
export TREE_SITTER_BENCHMARK_LOG=1
;;
b)
run_scan_build=true
export TREE_SITTER_BENCHMARK_LANGUAGE_FILTER=${OPTARG}
;;
esac
done
if [[ -n "$run_scan_build" ]]; then
. script/util/scan-build.sh
scan_build make -j2 benchmarks
else
make -j2 benchmarks
fi
case $mode in
debug)
lldb $cmd
;;
normal)
exec $cmd
;;
esac
cargo bench

3
script/benchmark.cmd Normal file
View file

@ -0,0 +1,3 @@
@echo off
cargo bench