feat: add fuzz subcommand

This commit is contained in:
Amaan Qureshi 2024-04-15 22:41:54 -04:00
parent 7f4a57817d
commit e553578696
24 changed files with 827 additions and 360 deletions

View file

@ -1,23 +1,26 @@
use std::{collections::HashMap, env, fs};
use tree_sitter::{LogType, Node, Parser, Point, Range, Tree};
use tree_sitter::Parser;
use tree_sitter_proc_macro::test_with_seed;
use super::helpers::{
allocations,
edits::{get_random_edit, invert_edit},
fixtures::{fixtures_dir, get_language, get_test_language, SCRATCH_BASE_DIR},
new_seed,
random::Rand,
scope_sequence::ScopeSequence,
EDIT_COUNT, EXAMPLE_FILTER, ITERATION_COUNT, LANGUAGE_FILTER, LOG_ENABLED, LOG_GRAPH_ENABLED,
START_SEED,
};
use crate::{
fuzz::{
corpus_test::{
check_changed_ranges, check_consistent_sizes, get_parser, set_included_ranges,
},
edits::{get_random_edit, invert_edit},
flatten_tests, new_seed,
random::Rand,
EDIT_COUNT, EXAMPLE_FILTER, ITERATION_COUNT, LANGUAGE_FILTER, LOG_GRAPH_ENABLED,
START_SEED,
},
generate,
parse::perform_edit,
test::{parse_tests, print_diff, print_diff_key, strip_sexp_fields, TestEntry},
util,
test::{parse_tests, print_diff, print_diff_key, strip_sexp_fields},
tests::{
allocations,
helpers::fixtures::{fixtures_dir, get_language, get_test_language, SCRATCH_BASE_DIR},
},
};
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
@ -79,7 +82,7 @@ fn test_corpus_for_json(seed: usize) {
#[ignore]
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_php(seed: usize) {
test_language_corpus("php", seed, None, Some("php"));
test_language_corpus("php", seed, None, None);
}
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
@ -107,7 +110,7 @@ fn test_corpus_for_tsx(seed: usize) {
test_language_corpus("typescript", seed, None, Some("tsx"));
}
fn test_language_corpus(
pub fn test_language_corpus(
language_name: &str,
start_seed: usize,
skipped: Option<&[&str]>,
@ -120,17 +123,23 @@ fn test_language_corpus(
let template_corpus_dir = fixtures_dir().join("template_corpus");
let corpus_dir = grammars_dir.join(language_name).join("test").join("corpus");
println!("Testing {language_name} corpus @ {}", corpus_dir.display());
let error_corpus_file = error_corpus_dir.join(format!("{language_name}_errors.txt"));
let template_corpus_file = template_corpus_dir.join(format!("{language_name}_templates.txt"));
let main_tests = parse_tests(&corpus_dir).unwrap();
let error_tests = parse_tests(&error_corpus_file).unwrap_or_default();
let template_tests = parse_tests(&template_corpus_file).unwrap_or_default();
let mut tests = flatten_tests(main_tests);
tests.extend(flatten_tests(error_tests));
tests.extend(flatten_tests(template_tests).into_iter().map(|mut t| {
t.template_delimiters = Some(("<%", "%>"));
t
}));
let mut tests = flatten_tests(main_tests, EXAMPLE_FILTER.as_ref());
tests.extend(flatten_tests(error_tests, EXAMPLE_FILTER.as_ref()));
tests.extend(
flatten_tests(template_tests, EXAMPLE_FILTER.as_ref())
.into_iter()
.map(|mut t| {
t.template_delimiters = Some(("<%", "%>"));
t
}),
);
tests.retain(|t| t.languages[0].is_empty() || t.languages.contains(&Box::from(language_dir)));
@ -185,7 +194,8 @@ fn test_language_corpus(
}
true
});
})
.unwrap();
if !passed {
failure_count += 1;
@ -279,7 +289,7 @@ fn test_language_corpus(
}
true
});
}).unwrap();
if !passed {
failure_count += 1;
@ -367,7 +377,7 @@ fn test_feature_corpus_files() {
let c_code = generate_result.unwrap().1;
let language = get_test_language(language_name, &c_code, Some(&test_path));
let test = parse_tests(&corpus_path).unwrap();
let tests = flatten_tests(test);
let tests = flatten_tests(test, EXAMPLE_FILTER.as_ref());
if !tests.is_empty() {
eprintln!("test language: {language_name:?}");
@ -393,7 +403,8 @@ fn test_feature_corpus_files() {
println!();
false
}
});
})
.unwrap();
if !passed {
failure_count += 1;
@ -405,202 +416,3 @@ fn test_feature_corpus_files() {
assert!(failure_count == 0, "{failure_count} corpus tests failed");
}
fn check_consistent_sizes(tree: &Tree, input: &[u8]) {
fn check(node: Node, line_offsets: &[usize]) {
let start_byte = node.start_byte();
let end_byte = node.end_byte();
let start_point = node.start_position();
let end_point = node.end_position();
assert!(start_byte <= end_byte);
assert!(start_point <= end_point);
assert_eq!(
start_byte,
line_offsets[start_point.row] + start_point.column
);
assert_eq!(end_byte, line_offsets[end_point.row] + end_point.column);
let mut last_child_end_byte = start_byte;
let mut last_child_end_point = start_point;
let mut some_child_has_changes = false;
let mut actual_named_child_count = 0;
for i in 0..node.child_count() {
let child = node.child(i).unwrap();
assert!(child.start_byte() >= last_child_end_byte);
assert!(child.start_position() >= last_child_end_point);
check(child, line_offsets);
if child.has_changes() {
some_child_has_changes = true;
}
if child.is_named() {
actual_named_child_count += 1;
}
last_child_end_byte = child.end_byte();
last_child_end_point = child.end_position();
}
assert_eq!(actual_named_child_count, node.named_child_count());
if node.child_count() > 0 {
assert!(end_byte >= last_child_end_byte);
assert!(end_point >= last_child_end_point);
}
if some_child_has_changes {
assert!(node.has_changes());
}
}
let mut line_offsets = vec![0];
for (i, c) in input.iter().enumerate() {
if *c == b'\n' {
line_offsets.push(i + 1);
}
}
check(tree.root_node(), &line_offsets);
}
fn check_changed_ranges(old_tree: &Tree, new_tree: &Tree, input: &[u8]) -> Result<(), String> {
let changed_ranges = old_tree.changed_ranges(new_tree).collect::<Vec<_>>();
let old_scope_sequence = ScopeSequence::new(old_tree);
let new_scope_sequence = ScopeSequence::new(new_tree);
let old_range = old_tree.root_node().range();
let new_range = new_tree.root_node().range();
let byte_range =
old_range.start_byte.min(new_range.start_byte)..old_range.end_byte.max(new_range.end_byte);
let point_range = old_range.start_point.min(new_range.start_point)
..old_range.end_point.max(new_range.end_point);
for range in &changed_ranges {
if range.end_byte > byte_range.end || range.end_point > point_range.end {
return Err(format!(
"changed range extends outside of the old and new trees {range:?}",
));
}
}
old_scope_sequence.check_changes(&new_scope_sequence, input, &changed_ranges)
}
fn set_included_ranges(parser: &mut Parser, input: &[u8], delimiters: Option<(&str, &str)>) {
if let Some((start, end)) = delimiters {
let mut ranges = Vec::new();
let mut ix = 0;
while ix < input.len() {
let Some(mut start_ix) = input[ix..]
.windows(2)
.position(|win| win == start.as_bytes())
else {
break;
};
start_ix += ix + start.len();
let end_ix = input[start_ix..]
.windows(2)
.position(|win| win == end.as_bytes())
.map_or(input.len(), |ix| start_ix + ix);
ix = end_ix;
ranges.push(Range {
start_byte: start_ix,
end_byte: end_ix,
start_point: point_for_offset(input, start_ix),
end_point: point_for_offset(input, end_ix),
});
}
parser.set_included_ranges(&ranges).unwrap();
} else {
parser.set_included_ranges(&[]).unwrap();
}
}
fn point_for_offset(text: &[u8], offset: usize) -> Point {
let mut point = Point::default();
for byte in &text[..offset] {
if *byte == b'\n' {
point.row += 1;
point.column = 0;
} else {
point.column += 1;
}
}
point
}
fn get_parser(session: &mut Option<util::LogSession>, log_filename: &str) -> Parser {
let mut parser = Parser::new();
if *LOG_ENABLED {
parser.set_logger(Some(Box::new(|log_type, msg| {
if log_type == LogType::Lex {
eprintln!(" {msg}");
} else {
eprintln!("{msg}");
}
})));
} else if *LOG_GRAPH_ENABLED {
*session = Some(util::log_graphs(&mut parser, log_filename, false).unwrap());
}
parser
}
struct FlattenedTest {
name: String,
input: Vec<u8>,
output: String,
languages: Vec<Box<str>>,
has_fields: bool,
template_delimiters: Option<(&'static str, &'static str)>,
}
fn flatten_tests(test: TestEntry) -> Vec<FlattenedTest> {
fn helper(test: TestEntry, is_root: bool, prefix: &str, result: &mut Vec<FlattenedTest>) {
match test {
TestEntry::Example {
mut name,
input,
output,
has_fields,
attributes,
..
} => {
if !prefix.is_empty() {
name.insert_str(0, " - ");
name.insert_str(0, prefix);
}
if let Some(filter) = EXAMPLE_FILTER.as_ref() {
if !name.contains(filter.as_str()) {
return;
}
}
result.push(FlattenedTest {
name,
input,
output,
has_fields,
languages: attributes.languages,
template_delimiters: None,
});
}
TestEntry::Group {
mut name, children, ..
} => {
if !is_root && !prefix.is_empty() {
name.insert_str(0, " - ");
name.insert_str(0, prefix);
}
for child in children {
helper(child, false, &name, result);
}
}
}
}
let mut result = Vec::new();
helper(test, true, "", &mut result);
result
}

View file

@ -1,8 +1,5 @@
use std::{ops::Range, str};
use super::random::Rand;
use crate::parse::Edit;
#[derive(Debug)]
pub struct ReadRecorder<'a> {
content: &'a [u8],
@ -50,55 +47,3 @@ impl<'a> ReadRecorder<'a> {
result
}
}
pub fn invert_edit(input: &[u8], edit: &Edit) -> Edit {
let position = edit.position;
let removed_content = &input[position..(position + edit.deleted_length)];
Edit {
position,
deleted_length: edit.inserted_text.len(),
inserted_text: removed_content.to_vec(),
}
}
pub fn get_random_edit(rand: &mut Rand, input: &[u8]) -> Edit {
let choice = rand.unsigned(10);
if choice < 2 {
// Insert text at end
let inserted_text = rand.words(3);
Edit {
position: input.len(),
deleted_length: 0,
inserted_text,
}
} else if choice < 5 {
// Delete text from the end
let deleted_length = rand.unsigned(30).min(input.len());
Edit {
position: input.len() - deleted_length,
deleted_length,
inserted_text: vec![],
}
} else if choice < 8 {
// Insert at a random position
let position = rand.unsigned(input.len());
let word_count = 1 + rand.unsigned(3);
let inserted_text = rand.words(word_count);
Edit {
position,
deleted_length: 0,
inserted_text,
}
} else {
// Replace at random position
let position = rand.unsigned(input.len());
let deleted_length = rand.unsigned(input.len() - position);
let word_count = 1 + rand.unsigned(3);
let inserted_text = rand.words(word_count);
Edit {
position,
deleted_length,
inserted_text,
}
}
}

View file

@ -18,7 +18,7 @@ lazy_static! {
static ref TEST_LOADER: Loader = {
let mut loader = Loader::with_parser_lib_path(SCRATCH_DIR.clone());
if env::var("TREE_SITTER_GRAMMAR_DEBUG").is_ok() {
loader.use_debug_build(true);
loader.debug_build(true);
}
loader
};

View file

@ -1,35 +1,4 @@
pub(super) mod allocations;
pub(super) mod edits;
pub mod allocations;
pub mod edits;
pub(super) mod fixtures;
pub(super) mod query_helpers;
pub(super) mod random;
pub(super) mod scope_sequence;
use std::env;
use lazy_static::lazy_static;
use rand::Rng;
lazy_static! {
pub static ref LOG_ENABLED: bool = env::var("TREE_SITTER_LOG").is_ok();
pub static ref LOG_GRAPH_ENABLED: bool = env::var("TREE_SITTER_LOG_GRAPHS").is_ok();
pub static ref LANGUAGE_FILTER: Option<String> = env::var("TREE_SITTER_LANGUAGE").ok();
pub static ref EXAMPLE_FILTER: Option<String> = env::var("TREE_SITTER_EXAMPLE").ok();
}
lazy_static! {
pub static ref START_SEED: usize = new_seed();
pub static ref EDIT_COUNT: usize = int_env_var("TREE_SITTER_EDITS").unwrap_or(3);
pub static ref ITERATION_COUNT: usize = int_env_var("TREE_SITTER_ITERATIONS").unwrap_or(10);
}
fn int_env_var(name: &'static str) -> Option<usize> {
env::var(name).ok().and_then(|e| e.parse().ok())
}
pub fn new_seed() -> usize {
int_env_var("TREE_SITTER_SEED").unwrap_or_else(|| {
let mut rng = rand::thread_rng();
rng.gen::<usize>()
})
}

View file

@ -1,43 +0,0 @@
use rand::{
distributions::Alphanumeric,
prelude::{Rng, SeedableRng, StdRng},
};
const OPERATORS: &[char] = &[
'+', '-', '<', '>', '(', ')', '*', '/', '&', '|', '!', ',', '.', '%',
];
pub struct Rand(StdRng);
impl Rand {
pub fn new(seed: usize) -> Self {
Self(StdRng::seed_from_u64(seed as u64))
}
pub fn unsigned(&mut self, max: usize) -> usize {
self.0.gen_range(0..=max)
}
pub fn words(&mut self, max_count: usize) -> Vec<u8> {
let mut result = Vec::new();
let word_count = self.unsigned(max_count);
for i in 0..word_count {
if i > 0 {
if self.unsigned(5) == 0 {
result.push(b'\n');
} else {
result.push(b' ');
}
}
if self.unsigned(3) == 0 {
let index = self.unsigned(OPERATORS.len() - 1);
result.push(OPERATORS[index] as u8);
} else {
for _ in 0..self.unsigned(8) {
result.push(self.0.sample(Alphanumeric));
}
}
}
result
}
}

View file

@ -1,90 +0,0 @@
use tree_sitter::{Point, Range, Tree};
#[derive(Debug)]
pub struct ScopeSequence(Vec<ScopeStack>);
type ScopeStack = Vec<&'static str>;
impl ScopeSequence {
pub fn new(tree: &Tree) -> Self {
let mut result = Self(Vec::new());
let mut scope_stack = Vec::new();
let mut cursor = tree.walk();
let mut visited_children = false;
loop {
let node = cursor.node();
for _ in result.0.len()..node.start_byte() {
result.0.push(scope_stack.clone());
}
if visited_children {
for _ in result.0.len()..node.end_byte() {
result.0.push(scope_stack.clone());
}
scope_stack.pop();
if cursor.goto_next_sibling() {
visited_children = false;
} else if !cursor.goto_parent() {
break;
}
} else {
scope_stack.push(cursor.node().kind());
if !cursor.goto_first_child() {
visited_children = true;
}
}
}
result
}
pub fn check_changes(
&self,
other: &Self,
text: &[u8],
known_changed_ranges: &[Range],
) -> Result<(), String> {
let mut position = Point { row: 0, column: 0 };
for i in 0..(self.0.len().max(other.0.len())) {
let stack = &self.0.get(i);
let other_stack = &other.0.get(i);
if *stack != *other_stack && ![b'\r', b'\n'].contains(&text[i]) {
let containing_range = known_changed_ranges
.iter()
.find(|range| range.start_point <= position && position < range.end_point);
if containing_range.is_none() {
let line = &text[(i - position.column)..]
.split(|c| *c == b'\n')
.next()
.unwrap();
return Err(format!(
concat!(
"Position: {}\n",
"Byte offset: {}\n",
"Line: {}\n",
"{}^\n",
"Old scopes: {:?}\n",
"New scopes: {:?}\n",
"Invalidated ranges: {:?}",
),
position,
i,
String::from_utf8_lossy(line),
String::from(" ").repeat(position.column + "Line: ".len()),
stack,
other_stack,
known_changed_ranges,
));
}
}
if text[i] == b'\n' {
position.row += 1;
position.column = 0;
} else {
position.column += 1;
}
}
Ok(())
}
}

View file

@ -17,3 +17,10 @@ mod tree_test;
#[cfg(feature = "wasm")]
mod wasm_language_test;
pub use crate::fuzz::{
allocations,
edits::{get_random_edit, invert_edit},
random::Rand,
ITERATION_COUNT,
};

View file

@ -1,9 +1,9 @@
use tree_sitter::{Node, Parser, Point, Tree};
use super::helpers::{
edits::get_random_edit,
fixtures::{fixtures_dir, get_language, get_test_language},
random::Rand,
use super::{
get_random_edit,
helpers::fixtures::{fixtures_dir, get_language, get_test_language},
Rand,
};
use crate::{
generate::{generate_parser_for_grammar, load_grammar_file},

View file

@ -8,13 +8,14 @@ use tree_sitter_proc_macro::retry;
use super::helpers::{
allocations,
edits::{invert_edit, ReadRecorder},
edits::ReadRecorder,
fixtures::{get_language, get_test_language},
};
use crate::{
fuzz::edits::Edit,
generate::{generate_parser_for_grammar, load_grammar_file},
parse::{perform_edit, Edit},
tests::helpers::fixtures::fixtures_dir,
parse::perform_edit,
tests::{helpers::fixtures::fixtures_dir, invert_edit},
};
#[test]

View file

@ -13,11 +13,13 @@ use super::helpers::{
allocations,
fixtures::{get_language, get_test_language},
query_helpers::{assert_query_matches, Match, Pattern},
ITERATION_COUNT,
};
use crate::{
generate::generate_parser_for_grammar,
tests::helpers::query_helpers::{collect_captures, collect_matches},
tests::{
helpers::query_helpers::{collect_captures, collect_matches},
ITERATION_COUNT,
},
};
lazy_static! {

View file

@ -2,8 +2,8 @@ use std::str;
use tree_sitter::{InputEdit, Parser, Point, Range, Tree};
use super::helpers::{edits::invert_edit, fixtures::get_language};
use crate::parse::{perform_edit, Edit};
use super::helpers::fixtures::get_language;
use crate::{fuzz::edits::Edit, parse::perform_edit, tests::invert_edit};
#[test]
fn test_tree_edit() {