feat(cli): show parse times in testing output

This commit is contained in:
Will Lillis 2024-12-23 01:22:37 -05:00 committed by GitHub
parent 8744a4e3f2
commit 5d9870ebee
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 138 additions and 22 deletions

View file

@ -24,7 +24,7 @@ use tree_sitter_cli::{
logger,
parse::{self, ParseFileOptions, ParseOutput, ParseTheme},
playground, query, tags,
test::{self, TestOptions},
test::{self, TestOptions, TestStats},
test_highlight, test_tags, util, version, wasm,
};
use tree_sitter_config::Config;
@ -274,6 +274,8 @@ struct Test {
pub config_path: Option<PathBuf>,
#[arg(long, help = "Force showing fields in test diffs")]
pub show_fields: bool,
#[arg(long, help = "Show parsing statistics")]
pub stat: Option<TestStats>,
#[arg(short, long, help = "Force rebuild the parser")]
pub rebuild: bool,
#[arg(long, help = "Show only the pass-fail overview tree")]
@ -936,6 +938,7 @@ impl Test {
fn run(self, mut loader: loader::Loader, current_dir: &Path) -> Result<()> {
let config = Config::load(self.config_path)?;
let color = env::var("NO_COLOR").map_or(true, |v| v != "1");
let stat = self.stat.unwrap_or_default();
loader.debug_build(self.debug_build);
loader.force_rebuild(self.rebuild);
@ -959,11 +962,15 @@ impl Test {
parser.set_language(language)?;
let test_dir = current_dir.join("test");
let mut stats = parse::Stats::default();
// Run the corpus tests. Look for them in `test/corpus`.
let test_corpus_dir = test_dir.join("corpus");
if test_corpus_dir.is_dir() {
let mut output = String::new();
let mut rates = Vec::new();
let mut opts = TestOptions {
output: &mut output,
path: test_corpus_dir,
debug: self.debug,
debug_graph: self.debug_graph,
@ -974,11 +981,15 @@ impl Test {
languages: languages.iter().map(|(l, n)| (n.as_str(), l)).collect(),
color,
test_num: 1,
parse_rates: &mut rates,
stat_display: stat,
stats: &mut stats,
show_fields: self.show_fields,
overview_only: self.overview_only,
};
test::run_tests_at_path(&mut parser, &mut opts)?;
println!("\n{stats}");
}
// Check that all of the queries are valid.

View file

@ -1,14 +1,17 @@
use std::{
collections::BTreeMap,
ffi::OsStr,
fmt::Write as _,
fs,
io::{self, Write},
path::{Path, PathBuf},
str,
time::Duration,
};
use anstyle::{AnsiColor, Color, Style};
use anyhow::{anyhow, Context, Result};
use clap::ValueEnum;
use indoc::indoc;
use lazy_static::lazy_static;
use regex::{
@ -16,10 +19,11 @@ use regex::{
Regex,
};
use similar::{ChangeTag, TextDiff};
use tree_sitter::{format_sexp, Language, LogType, Parser, Query};
use tree_sitter::{format_sexp, Language, LogType, Parser, Query, Tree};
use walkdir::WalkDir;
use super::util;
use crate::parse::Stats;
lazy_static! {
static ref HEADER_REGEX: ByteRegex = ByteRegexBuilder::new(
@ -95,7 +99,16 @@ impl Default for TestAttributes {
}
}
#[derive(ValueEnum, Default, Copy, Clone, PartialEq, Eq)]
pub enum TestStats {
All,
#[default]
OutliersAndTotal,
TotalOnly,
}
pub struct TestOptions<'a> {
pub output: &'a mut String,
pub path: PathBuf,
pub debug: bool,
pub debug_graph: bool,
@ -106,6 +119,11 @@ pub struct TestOptions<'a> {
pub languages: BTreeMap<&'a str, &'a Language>,
pub color: bool,
pub test_num: usize,
/// Whether a test ran for the nth line in `output`, the true parse rate, and the adjusted
/// parse rate
pub parse_rates: &'a mut Vec<(bool, Option<(f64, f64)>)>,
pub stat_display: TestStats,
pub stats: &'a mut Stats,
pub show_fields: bool,
pub overview_only: bool,
}
@ -138,6 +156,53 @@ pub fn run_tests_at_path(parser: &mut Parser, opts: &mut TestOptions) -> Result<
&mut has_parse_errors,
)?;
let (count, total_adj_parse_time) = opts
.parse_rates
.iter()
.flat_map(|(_, rates)| rates)
.fold((0usize, 0.0f64), |(count, rate_accum), (_, adj_rate)| {
(count + 1, rate_accum + adj_rate)
});
let avg = total_adj_parse_time / count as f64;
let std_dev = {
let variance = opts
.parse_rates
.iter()
.flat_map(|(_, rates)| rates)
.map(|(_, rate_i)| (rate_i - avg).powi(2))
.sum::<f64>()
/ count as f64;
variance.sqrt()
};
for ((is_test, rates), out_line) in opts.parse_rates.iter().zip(opts.output.lines()) {
let stat_display = if !is_test {
// Test group, no actual parsing took place
String::new()
} else {
match (opts.stat_display, rates) {
(TestStats::TotalOnly, _) | (_, None) => String::new(),
(display, Some((true_rate, adj_rate))) => {
let mut stats = if display == TestStats::All {
format!(" ({true_rate:.3} bytes/ms)")
} else {
String::new()
};
// 3 standard deviations below the mean, aka the "Empirical Rule"
if *adj_rate < 3.0f64.mul_add(-std_dev, avg) {
stats += &paint(
opts.color.then_some(AnsiColor::Red),
&format!(" -- Warning: Slow parse rate ({true_rate:.3} bytes/ms)"),
);
}
stats
}
}
};
println!("{out_line}{stat_display}");
}
parser.stop_printing_dot_graphs();
if failures.is_empty() {
@ -354,24 +419,28 @@ fn run_tests(
attributes_str,
attributes,
} => {
print!("{}", " ".repeat(indent_level as usize));
write!(opts.output, "{}", " ".repeat(indent_level as usize))?;
if attributes.skip {
println!(
writeln!(
opts.output,
"{:>3}. ⌀ {}",
opts.test_num,
paint(opts.color.then_some(AnsiColor::Yellow), &name),
);
)?;
opts.parse_rates.push((true, None));
opts.test_num += 1;
return Ok(true);
}
if !attributes.platform {
println!(
writeln!(
opts.output,
"{:>3}. ⌀ {}",
opts.test_num,
paint(opts.color.then_some(AnsiColor::Magenta), &name),
);
)?;
opts.parse_rates.push((true, None));
opts.test_num += 1;
return Ok(true);
}
@ -384,15 +453,30 @@ fn run_tests(
.ok_or_else(|| anyhow!("Language not found: {language_name}"))?;
parser.set_language(language)?;
}
let start = std::time::Instant::now();
let tree = parser.parse(&input, None).unwrap();
{
let parse_time = start.elapsed();
let true_parse_rate = tree.root_node().byte_range().len() as f64
/ (parse_time.as_nanos() as f64 / 1_000_000.0);
let adj_parse_rate = adjusted_parse_rate(&tree, parse_time);
opts.parse_rates
.push((true, Some((true_parse_rate, adj_parse_rate))));
opts.stats.total_parses += 1;
opts.stats.total_duration += parse_time;
opts.stats.total_bytes += tree.root_node().byte_range().len();
}
if attributes.error {
if tree.root_node().has_error() {
println!(
writeln!(
opts.output,
"{:>3}. ✓ {}",
opts.test_num,
paint(opts.color.then_some(AnsiColor::Green), &name)
);
paint(opts.color.then_some(AnsiColor::Green), &name),
)?;
opts.stats.successful_parses += 1;
if opts.update {
let input = String::from_utf8(input.clone()).unwrap();
let output = format_sexp(&output, 0);
@ -419,11 +503,12 @@ fn run_tests(
divider_delim_len,
));
}
println!(
writeln!(
opts.output,
"{:>3}. ✗ {}",
opts.test_num,
paint(opts.color.then_some(AnsiColor::Red), &name)
);
paint(opts.color.then_some(AnsiColor::Red), &name),
)?;
failures.push((
name.clone(),
tree.root_node().to_sexp(),
@ -441,11 +526,13 @@ fn run_tests(
}
if actual == output {
println!(
writeln!(
opts.output,
"{:>3}. ✓ {}",
opts.test_num,
paint(opts.color.then_some(AnsiColor::Green), &name)
);
paint(opts.color.then_some(AnsiColor::Green), &name),
)?;
opts.stats.successful_parses += 1;
if opts.update {
let input = String::from_utf8(input.clone()).unwrap();
let output = format_sexp(&output, 0);
@ -490,18 +577,20 @@ fn run_tests(
header_delim_len,
divider_delim_len,
));
println!(
writeln!(
opts.output,
"{:>3}. ✓ {}",
opts.test_num,
paint(opts.color.then_some(AnsiColor::Blue), &name),
);
)?;
}
} else {
println!(
writeln!(
opts.output,
"{:>3}. ✗ {}",
opts.test_num,
paint(opts.color.then_some(AnsiColor::Red), &name),
);
)?;
}
failures.push((name.clone(), actual, output.clone()));
@ -583,8 +672,12 @@ fn run_tests(
}
if !has_printed && indent_level > 1 {
has_printed = true;
print!("{}", " ".repeat((indent_level - 1) as usize));
println!("{name}:");
writeln!(
opts.output,
"{}{name}:",
" ".repeat((indent_level - 1) as usize)
)?;
opts.parse_rates.push((false, None));
}
if !run_tests(
parser,
@ -620,6 +713,18 @@ fn count_subtests(test_entry: &TestEntry) -> usize {
}
}
// Parse time is interpreted in ns before converting to ms to avoid truncation issues
// Parse rates often have several outliers, leading to a large standard deviation. Taking
// the log of these rates serves to "flatten" out the distribution, yielding a more
// usable standard deviation for finding statistically significant slow parse rates
// NOTE: This is just a heuristic
#[must_use]
pub fn adjusted_parse_rate(tree: &Tree, parse_time: Duration) -> f64 {
f64::ln(
tree.root_node().byte_range().len() as f64 / (parse_time.as_nanos() as f64 / 1_000_000.0),
)
}
fn write_tests(
file_path: &Path,
corrected_entries: &[(String, String, String, String, usize, usize)],