diff --git a/cli/src/main.rs b/cli/src/main.rs index ec71d855..9b85c71d 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -24,7 +24,7 @@ use tree_sitter_cli::{ logger, parse::{self, ParseFileOptions, ParseOutput, ParseTheme}, playground, query, tags, - test::{self, TestOptions}, + test::{self, TestOptions, TestStats}, test_highlight, test_tags, util, version, wasm, }; use tree_sitter_config::Config; @@ -274,6 +274,8 @@ struct Test { pub config_path: Option, #[arg(long, help = "Force showing fields in test diffs")] pub show_fields: bool, + #[arg(long, help = "Show parsing statistics")] + pub stat: Option, #[arg(short, long, help = "Force rebuild the parser")] pub rebuild: bool, #[arg(long, help = "Show only the pass-fail overview tree")] @@ -936,6 +938,7 @@ impl Test { fn run(self, mut loader: loader::Loader, current_dir: &Path) -> Result<()> { let config = Config::load(self.config_path)?; let color = env::var("NO_COLOR").map_or(true, |v| v != "1"); + let stat = self.stat.unwrap_or_default(); loader.debug_build(self.debug_build); loader.force_rebuild(self.rebuild); @@ -959,11 +962,15 @@ impl Test { parser.set_language(language)?; let test_dir = current_dir.join("test"); + let mut stats = parse::Stats::default(); // Run the corpus tests. Look for them in `test/corpus`. let test_corpus_dir = test_dir.join("corpus"); if test_corpus_dir.is_dir() { + let mut output = String::new(); + let mut rates = Vec::new(); let mut opts = TestOptions { + output: &mut output, path: test_corpus_dir, debug: self.debug, debug_graph: self.debug_graph, @@ -974,11 +981,15 @@ impl Test { languages: languages.iter().map(|(l, n)| (n.as_str(), l)).collect(), color, test_num: 1, + parse_rates: &mut rates, + stat_display: stat, + stats: &mut stats, show_fields: self.show_fields, overview_only: self.overview_only, }; test::run_tests_at_path(&mut parser, &mut opts)?; + println!("\n{stats}"); } // Check that all of the queries are valid. diff --git a/cli/src/test.rs b/cli/src/test.rs index 98220093..11c3bfce 100644 --- a/cli/src/test.rs +++ b/cli/src/test.rs @@ -1,14 +1,17 @@ use std::{ collections::BTreeMap, ffi::OsStr, + fmt::Write as _, fs, io::{self, Write}, path::{Path, PathBuf}, str, + time::Duration, }; use anstyle::{AnsiColor, Color, Style}; use anyhow::{anyhow, Context, Result}; +use clap::ValueEnum; use indoc::indoc; use lazy_static::lazy_static; use regex::{ @@ -16,10 +19,11 @@ use regex::{ Regex, }; use similar::{ChangeTag, TextDiff}; -use tree_sitter::{format_sexp, Language, LogType, Parser, Query}; +use tree_sitter::{format_sexp, Language, LogType, Parser, Query, Tree}; use walkdir::WalkDir; use super::util; +use crate::parse::Stats; lazy_static! { static ref HEADER_REGEX: ByteRegex = ByteRegexBuilder::new( @@ -95,7 +99,16 @@ impl Default for TestAttributes { } } +#[derive(ValueEnum, Default, Copy, Clone, PartialEq, Eq)] +pub enum TestStats { + All, + #[default] + OutliersAndTotal, + TotalOnly, +} + pub struct TestOptions<'a> { + pub output: &'a mut String, pub path: PathBuf, pub debug: bool, pub debug_graph: bool, @@ -106,6 +119,11 @@ pub struct TestOptions<'a> { pub languages: BTreeMap<&'a str, &'a Language>, pub color: bool, pub test_num: usize, + /// Whether a test ran for the nth line in `output`, the true parse rate, and the adjusted + /// parse rate + pub parse_rates: &'a mut Vec<(bool, Option<(f64, f64)>)>, + pub stat_display: TestStats, + pub stats: &'a mut Stats, pub show_fields: bool, pub overview_only: bool, } @@ -138,6 +156,53 @@ pub fn run_tests_at_path(parser: &mut Parser, opts: &mut TestOptions) -> Result< &mut has_parse_errors, )?; + let (count, total_adj_parse_time) = opts + .parse_rates + .iter() + .flat_map(|(_, rates)| rates) + .fold((0usize, 0.0f64), |(count, rate_accum), (_, adj_rate)| { + (count + 1, rate_accum + adj_rate) + }); + + let avg = total_adj_parse_time / count as f64; + let std_dev = { + let variance = opts + .parse_rates + .iter() + .flat_map(|(_, rates)| rates) + .map(|(_, rate_i)| (rate_i - avg).powi(2)) + .sum::() + / count as f64; + variance.sqrt() + }; + + for ((is_test, rates), out_line) in opts.parse_rates.iter().zip(opts.output.lines()) { + let stat_display = if !is_test { + // Test group, no actual parsing took place + String::new() + } else { + match (opts.stat_display, rates) { + (TestStats::TotalOnly, _) | (_, None) => String::new(), + (display, Some((true_rate, adj_rate))) => { + let mut stats = if display == TestStats::All { + format!(" ({true_rate:.3} bytes/ms)") + } else { + String::new() + }; + // 3 standard deviations below the mean, aka the "Empirical Rule" + if *adj_rate < 3.0f64.mul_add(-std_dev, avg) { + stats += &paint( + opts.color.then_some(AnsiColor::Red), + &format!(" -- Warning: Slow parse rate ({true_rate:.3} bytes/ms)"), + ); + } + stats + } + } + }; + println!("{out_line}{stat_display}"); + } + parser.stop_printing_dot_graphs(); if failures.is_empty() { @@ -354,24 +419,28 @@ fn run_tests( attributes_str, attributes, } => { - print!("{}", " ".repeat(indent_level as usize)); + write!(opts.output, "{}", " ".repeat(indent_level as usize))?; if attributes.skip { - println!( + writeln!( + opts.output, "{:>3}. ⌀ {}", opts.test_num, paint(opts.color.then_some(AnsiColor::Yellow), &name), - ); + )?; + opts.parse_rates.push((true, None)); opts.test_num += 1; return Ok(true); } if !attributes.platform { - println!( + writeln!( + opts.output, "{:>3}. ⌀ {}", opts.test_num, paint(opts.color.then_some(AnsiColor::Magenta), &name), - ); + )?; + opts.parse_rates.push((true, None)); opts.test_num += 1; return Ok(true); } @@ -384,15 +453,30 @@ fn run_tests( .ok_or_else(|| anyhow!("Language not found: {language_name}"))?; parser.set_language(language)?; } + let start = std::time::Instant::now(); let tree = parser.parse(&input, None).unwrap(); + { + let parse_time = start.elapsed(); + let true_parse_rate = tree.root_node().byte_range().len() as f64 + / (parse_time.as_nanos() as f64 / 1_000_000.0); + let adj_parse_rate = adjusted_parse_rate(&tree, parse_time); + + opts.parse_rates + .push((true, Some((true_parse_rate, adj_parse_rate)))); + opts.stats.total_parses += 1; + opts.stats.total_duration += parse_time; + opts.stats.total_bytes += tree.root_node().byte_range().len(); + } if attributes.error { if tree.root_node().has_error() { - println!( + writeln!( + opts.output, "{:>3}. ✓ {}", opts.test_num, - paint(opts.color.then_some(AnsiColor::Green), &name) - ); + paint(opts.color.then_some(AnsiColor::Green), &name), + )?; + opts.stats.successful_parses += 1; if opts.update { let input = String::from_utf8(input.clone()).unwrap(); let output = format_sexp(&output, 0); @@ -419,11 +503,12 @@ fn run_tests( divider_delim_len, )); } - println!( + writeln!( + opts.output, "{:>3}. ✗ {}", opts.test_num, - paint(opts.color.then_some(AnsiColor::Red), &name) - ); + paint(opts.color.then_some(AnsiColor::Red), &name), + )?; failures.push(( name.clone(), tree.root_node().to_sexp(), @@ -441,11 +526,13 @@ fn run_tests( } if actual == output { - println!( + writeln!( + opts.output, "{:>3}. ✓ {}", opts.test_num, - paint(opts.color.then_some(AnsiColor::Green), &name) - ); + paint(opts.color.then_some(AnsiColor::Green), &name), + )?; + opts.stats.successful_parses += 1; if opts.update { let input = String::from_utf8(input.clone()).unwrap(); let output = format_sexp(&output, 0); @@ -490,18 +577,20 @@ fn run_tests( header_delim_len, divider_delim_len, )); - println!( + writeln!( + opts.output, "{:>3}. ✓ {}", opts.test_num, paint(opts.color.then_some(AnsiColor::Blue), &name), - ); + )?; } } else { - println!( + writeln!( + opts.output, "{:>3}. ✗ {}", opts.test_num, paint(opts.color.then_some(AnsiColor::Red), &name), - ); + )?; } failures.push((name.clone(), actual, output.clone())); @@ -583,8 +672,12 @@ fn run_tests( } if !has_printed && indent_level > 1 { has_printed = true; - print!("{}", " ".repeat((indent_level - 1) as usize)); - println!("{name}:"); + writeln!( + opts.output, + "{}{name}:", + " ".repeat((indent_level - 1) as usize) + )?; + opts.parse_rates.push((false, None)); } if !run_tests( parser, @@ -620,6 +713,18 @@ fn count_subtests(test_entry: &TestEntry) -> usize { } } +// Parse time is interpreted in ns before converting to ms to avoid truncation issues +// Parse rates often have several outliers, leading to a large standard deviation. Taking +// the log of these rates serves to "flatten" out the distribution, yielding a more +// usable standard deviation for finding statistically significant slow parse rates +// NOTE: This is just a heuristic +#[must_use] +pub fn adjusted_parse_rate(tree: &Tree, parse_time: Duration) -> f64 { + f64::ln( + tree.root_node().byte_range().len() as f64 / (parse_time.as_nanos() as f64 / 1_000_000.0), + ) +} + fn write_tests( file_path: &Path, corrected_entries: &[(String, String, String, String, usize, usize)],