Improve randomized testing setup

* Allow iterations to be specified via an env var
* Randomly decide the edit count, with a maximum
  specified via an env var.
* Instead of separate env vars for starting seed + trial, just accept a seed
* Remove some noisy output
This commit is contained in:
Max Brunsfeld 2022-03-02 16:45:54 -08:00
parent 4bf5149a18
commit 7170ec7c96
7 changed files with 140 additions and 181 deletions

View file

@ -4,7 +4,8 @@ use super::helpers::{
fixtures::{fixtures_dir, get_language, get_test_language},
random::Rand,
scope_sequence::ScopeSequence,
EXAMPLE_FILTER, LANGUAGE_FILTER, LOG_ENABLED, LOG_GRAPH_ENABLED, SEED, TRIAL_FILTER,
EDIT_COUNT, EXAMPLE_FILTER, ITERATION_COUNT, LANGUAGE_FILTER, LOG_ENABLED, LOG_GRAPH_ENABLED,
START_SEED,
};
use crate::{
generate,
@ -15,9 +16,6 @@ use crate::{
use std::fs;
use tree_sitter::{LogType, Node, Parser, Tree};
const EDIT_COUNT: usize = 3;
const TRIAL_COUNT: usize = 10;
#[test]
fn test_bash_corpus() {
test_language_corpus("bash");
@ -79,18 +77,8 @@ fn test_rust_corpus() {
}
fn test_language_corpus(language_name: &str) {
if let Some(language_filter) = LANGUAGE_FILTER.as_ref() {
if language_filter != language_name {
return;
}
}
let grammars_dir = fixtures_dir().join("grammars");
let error_corpus_dir = fixtures_dir().join("error_corpus");
let mut failure_count = 0;
let language = get_language(language_name);
let mut corpus_dir = grammars_dir.join(language_name).join("corpus");
if !corpus_dir.is_dir() {
corpus_dir = grammars_dir.join(language_name).join("test").join("corpus");
@ -102,38 +90,39 @@ fn test_language_corpus(language_name: &str) {
let mut tests = flatten_tests(main_tests);
tests.extend(flatten_tests(error_tests));
let language = get_language(language_name);
let mut failure_count = 0;
for (example_name, input, expected_output, has_fields) in tests {
println!(" {} example - {}", language_name, example_name);
let trial = 0;
if TRIAL_FILTER.map_or(true, |t| t == trial) {
let passed = allocations::record(|| {
let mut log_session = None;
let mut parser = get_parser(&mut log_session, "log.html");
parser.set_language(language).unwrap();
let tree = parser.parse(&input, None).unwrap();
let mut actual_output = tree.root_node().to_sexp();
if !has_fields {
actual_output = strip_sexp_fields(actual_output);
}
if actual_output == expected_output {
true
} else {
println!(
"Incorrect initial parse for {} - {}",
language_name, example_name,
);
print_diff_key();
print_diff(&actual_output, &expected_output);
println!("");
false
}
});
let passed = allocations::record(|| {
let mut log_session = None;
let mut parser = get_parser(&mut log_session, "log.html");
parser.set_language(language).unwrap();
if !passed {
failure_count += 1;
continue;
let tree = parser.parse(&input, None).unwrap();
let mut actual_output = tree.root_node().to_sexp();
if !has_fields {
actual_output = strip_sexp_fields(actual_output);
}
if actual_output != expected_output {
println!(
"Incorrect initial parse for {} - {}",
language_name, example_name,
);
print_diff_key();
print_diff(&actual_output, &expected_output);
println!("");
return false;
}
true
});
if !passed {
failure_count += 1;
continue;
}
let mut parser = Parser::new();
@ -141,88 +130,86 @@ fn test_language_corpus(language_name: &str) {
let tree = parser.parse(&input, None).unwrap();
drop(parser);
for trial in 1..=TRIAL_COUNT {
if TRIAL_FILTER.map_or(true, |filter| filter == trial) {
let mut rand = Rand::new(*SEED + trial);
for trial in 0..*ITERATION_COUNT {
let seed = *START_SEED + trial;
let passed = allocations::record(|| {
let mut rand = Rand::new(seed);
let mut log_session = None;
let mut parser = get_parser(&mut log_session, "log.html");
parser.set_language(language).unwrap();
let mut tree = tree.clone();
let mut input = input.clone();
let passed = allocations::record(|| {
let mut log_session = None;
let mut parser = get_parser(&mut log_session, "log.html");
parser.set_language(language).unwrap();
let mut tree = tree.clone();
let mut input = input.clone();
if *LOG_GRAPH_ENABLED {
eprintln!("{}\n", String::from_utf8_lossy(&input));
}
// Perform a random series of edits and reparse.
let mut undo_stack = Vec::new();
for _ in 0..EDIT_COUNT {
let edit = get_random_edit(&mut rand, &input);
undo_stack.push(invert_edit(&input, &edit));
perform_edit(&mut tree, &mut input, &edit);
}
if *LOG_GRAPH_ENABLED {
eprintln!("{}\n", String::from_utf8_lossy(&input));
}
let mut tree2 = parser.parse(&input, Some(&tree)).unwrap();
// Check that the new tree is consistent.
check_consistent_sizes(&tree2, &input);
if let Err(message) = check_changed_ranges(&tree, &tree2, &input) {
println!(
"\nUnexpected scope change in trial {}\n{}\n\n",
trial, message
);
return false;
}
// Undo all of the edits and re-parse again.
while let Some(edit) = undo_stack.pop() {
perform_edit(&mut tree2, &mut input, &edit);
}
if *LOG_GRAPH_ENABLED {
eprintln!("{}\n", String::from_utf8_lossy(&input));
}
let tree3 = parser.parse(&input, Some(&tree2)).unwrap();
// Verify that the final tree matches the expectation from the corpus.
let mut actual_output = tree3.root_node().to_sexp();
if !has_fields {
actual_output = strip_sexp_fields(actual_output);
}
if actual_output != expected_output {
println!(
"Incorrect parse for {} - {} - trial {}",
language_name, example_name, trial
);
print_diff_key();
print_diff(&actual_output, &expected_output);
println!("");
return false;
}
// Check that the edited tree is consistent.
check_consistent_sizes(&tree3, &input);
if let Err(message) = check_changed_ranges(&tree2, &tree3, &input) {
eprintln!(
"Unexpected scope change in trial {}\n{}\n\n",
trial, message
);
return false;
}
true
});
if !passed {
failure_count += 1;
break;
if *LOG_GRAPH_ENABLED {
eprintln!("{}\n", String::from_utf8_lossy(&input));
}
// Perform a random series of edits and reparse.
let mut undo_stack = Vec::new();
for _ in 0..1 + rand.unsigned(*EDIT_COUNT) {
let edit = get_random_edit(&mut rand, &input);
undo_stack.push(invert_edit(&input, &edit));
perform_edit(&mut tree, &mut input, &edit);
}
// println!(" seed: {}", seed);
if *LOG_GRAPH_ENABLED {
eprintln!("{}\n", String::from_utf8_lossy(&input));
}
let mut tree2 = parser.parse(&input, Some(&tree)).unwrap();
// Check that the new tree is consistent.
check_consistent_sizes(&tree2, &input);
if let Err(message) = check_changed_ranges(&tree, &tree2, &input) {
println!(
"\nUnexpected scope change in seed {}\n{}\n\n",
seed, message
);
return false;
}
// Undo all of the edits and re-parse again.
while let Some(edit) = undo_stack.pop() {
perform_edit(&mut tree2, &mut input, &edit);
}
if *LOG_GRAPH_ENABLED {
eprintln!("{}\n", String::from_utf8_lossy(&input));
}
let tree3 = parser.parse(&input, Some(&tree2)).unwrap();
// Verify that the final tree matches the expectation from the corpus.
let mut actual_output = tree3.root_node().to_sexp();
if !has_fields {
actual_output = strip_sexp_fields(actual_output);
}
if actual_output != expected_output {
println!(
"Incorrect parse for {} - {} - seed {}",
language_name, example_name, seed
);
print_diff_key();
print_diff(&actual_output, &expected_output);
println!("");
return false;
}
// Check that the edited tree is consistent.
check_consistent_sizes(&tree3, &input);
if let Err(message) = check_changed_ranges(&tree2, &tree3, &input) {
eprintln!("Unexpected scope change in seed {}\n{}\n\n", seed, message);
return false;
}
true
});
if !passed {
failure_count += 1;
break;
}
}
}

View file

@ -72,10 +72,7 @@ pub fn get_random_edit(rand: &mut Rand, input: &Vec<u8>) -> Edit {
}
} else if choice < 5 {
// Delete text from the end
let mut deleted_length = rand.unsigned(10);
if deleted_length > input.len() {
deleted_length = input.len();
}
let deleted_length = rand.unsigned(30).min(input.len());
Edit {
position: input.len() - deleted_length,
deleted_length,

View file

@ -9,25 +9,22 @@ use lazy_static::lazy_static;
use std::{env, time, usize};
lazy_static! {
pub static ref SEED: usize = {
let seed = env::var("TREE_SITTER_TEST_SEED")
.map(|s| usize::from_str_radix(&s, 10).unwrap())
.unwrap_or(
time::SystemTime::now()
.duration_since(time::UNIX_EPOCH)
.unwrap()
.as_secs() as usize,
);
eprintln!("\n\nRandom seed: {}\n", seed);
seed
};
pub static ref LOG_ENABLED: bool = env::var("TREE_SITTER_TEST_ENABLE_LOG").is_ok();
pub static ref LOG_GRAPH_ENABLED: bool = env::var("TREE_SITTER_TEST_ENABLE_LOG_GRAPHS").is_ok();
pub static ref LANGUAGE_FILTER: Option<String> =
env::var("TREE_SITTER_TEST_LANGUAGE_FILTER").ok();
pub static ref EXAMPLE_FILTER: Option<String> =
env::var("TREE_SITTER_TEST_EXAMPLE_FILTER").ok();
pub static ref TRIAL_FILTER: Option<usize> = env::var("TREE_SITTER_TEST_TRIAL_FILTER")
.map(|s| usize::from_str_radix(&s, 10).unwrap())
.ok();
pub static ref LOG_ENABLED: bool = env::var("TREE_SITTER_LOG").is_ok();
pub static ref LOG_GRAPH_ENABLED: bool = env::var("TREE_SITTER_LOG_GRAPHS").is_ok();
pub static ref LANGUAGE_FILTER: Option<String> = env::var("TREE_SITTER_LANGUAGE").ok();
pub static ref EXAMPLE_FILTER: Option<String> = env::var("TREE_SITTER_EXAMPLE").ok();
}
lazy_static! {
pub static ref START_SEED: usize =
int_env_var("TREE_SITTER_SEED").unwrap_or_else(|| time::SystemTime::now()
.duration_since(time::UNIX_EPOCH)
.unwrap()
.as_secs() as usize,);
pub static ref EDIT_COUNT: usize = int_env_var("TREE_SITTER_EDITS").unwrap_or(3);
pub static ref ITERATION_COUNT: usize = int_env_var("TREE_SITTER_ITERATIONS").unwrap_or(10);
}
fn int_env_var(name: &'static str) -> Option<usize> {
env::var(name).ok().and_then(|e| e.parse().ok())
}

View file

@ -1,5 +1,7 @@
use rand::distributions::Alphanumeric;
use rand::prelude::{Rng, SeedableRng, StdRng};
use rand::{
distributions::Alphanumeric,
prelude::{Rng, SeedableRng, StdRng},
};
const OPERATORS: &[char] = &[
'+', '-', '<', '>', '(', ')', '*', '/', '&', '|', '!', ',', '.',

View file

@ -3518,18 +3518,10 @@ fn test_query_random() {
for i in 0..100 {
let seed = (start_seed + i) as u64;
let mut rand = StdRng::seed_from_u64(seed);
let (pattern_ast, range) = Pattern::random_pattern_in_tree(&pattern_tree, &mut rand);
let (pattern_ast, _) = Pattern::random_pattern_in_tree(&pattern_tree, &mut rand);
let pattern = pattern_ast.to_string();
let expected_matches = pattern_ast.matches_in_tree(&test_tree);
eprintln!(
"seed: {}\nsource_range: {:?}\npattern:\n{}\nexpected match count: {}\n",
seed,
range,
pattern,
expected_matches.len(),
);
let query = Query::new(language, &pattern).unwrap();
let mut actual_matches = cursor
.matches(

View file

@ -34,7 +34,7 @@ EOF
export RUST_BACKTRACE=full
mode=normal
test_flags="-p tree-sitter-cli"
test_flags=""
while getopts "adDghl:e:s:t:" option; do
case ${option} in
@ -56,23 +56,17 @@ while getopts "adDghl:e:s:t:" option; do
fi
test_flags="${test_flags} --target ${current_target}"
;;
l)
export TREE_SITTER_TEST_LANGUAGE_FILTER=${OPTARG}
;;
e)
export TREE_SITTER_TEST_EXAMPLE_FILTER=${OPTARG}
;;
t)
export TREE_SITTER_TEST_TRIAL_FILTER=${OPTARG}
export TREE_SITTER_EXAMPLE=${OPTARG}
;;
s)
export TREE_SITTER_TEST_SEED=${OPTARG}
export TREE_SITTER_SEED=${OPTARG}
;;
d)
export TREE_SITTER_TEST_ENABLE_LOG=1
export TREE_SITTER_LOG=1
;;
D)
export TREE_SITTER_TEST_ENABLE_LOG_GRAPHS=1
export TREE_SITTER_LOG_GRAPHS=1
;;
g)
mode=debug
@ -82,22 +76,12 @@ done
shift $(expr $OPTIND - 1)
top_level_filter=$1
if [[ \
-n $TREE_SITTER_TEST_LANGUAGE_FILTER || \
-n $TREE_SITTER_TEST_EXAMPLE_FILTER || \
-n $TREE_SITTER_TEST_TRIAL_FILTER \
]]; then
: ${top_level_filter:=corpus}
fi
if [[ "${mode}" == "debug" ]]; then
test_binary=$(
cargo test $test_flags --no-run --message-format=json 2> /dev/null |\
jq -rs 'map(select(.target.name == "tree-sitter-cli" and .executable))[0].executable'
)
lldb "${test_binary}" -- $top_level_filter
lldb "${test_binary}" -- $1
else
cargo test $test_flags --jobs 1 $top_level_filter -- --nocapture
cargo test $test_flags $1 -- --nocapture
fi

View file

@ -3,5 +3,5 @@
setlocal
set RUST_TEST_THREADS=1
set RUST_BACKTRACE=full
cargo test -p tree-sitter-cli "%~1" -- --nocapture
cargo test "%~1"
endlocal