feat: add fuzz subcommand
This commit is contained in:
parent
7f4a57817d
commit
e553578696
24 changed files with 827 additions and 360 deletions
122
cli/src/fuzz/allocations.rs
Normal file
122
cli/src/fuzz/allocations.rs
Normal file
|
|
@ -0,0 +1,122 @@
|
|||
use std::{
|
||||
collections::HashMap,
|
||||
os::raw::c_void,
|
||||
sync::{
|
||||
atomic::{AtomicBool, AtomicUsize, Ordering::SeqCst},
|
||||
Mutex,
|
||||
},
|
||||
};
|
||||
|
||||
#[ctor::ctor]
|
||||
unsafe fn initialize_allocation_recording() {
|
||||
tree_sitter::set_allocator(
|
||||
Some(ts_record_malloc),
|
||||
Some(ts_record_calloc),
|
||||
Some(ts_record_realloc),
|
||||
Some(ts_record_free),
|
||||
);
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Hash)]
|
||||
struct Allocation(*const c_void);
|
||||
unsafe impl Send for Allocation {}
|
||||
unsafe impl Sync for Allocation {}
|
||||
|
||||
#[derive(Default)]
|
||||
struct AllocationRecorder {
|
||||
enabled: AtomicBool,
|
||||
allocation_count: AtomicUsize,
|
||||
outstanding_allocations: Mutex<HashMap<Allocation, usize>>,
|
||||
}
|
||||
|
||||
thread_local! {
|
||||
static RECORDER: AllocationRecorder = AllocationRecorder::default();
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
fn malloc(size: usize) -> *mut c_void;
|
||||
fn calloc(count: usize, size: usize) -> *mut c_void;
|
||||
fn realloc(ptr: *mut c_void, size: usize) -> *mut c_void;
|
||||
fn free(ptr: *mut c_void);
|
||||
}
|
||||
|
||||
pub fn record<T>(f: impl FnOnce() -> T) -> Result<T, String> {
|
||||
RECORDER.with(|recorder| {
|
||||
recorder.enabled.store(true, SeqCst);
|
||||
recorder.allocation_count.store(0, SeqCst);
|
||||
recorder.outstanding_allocations.lock().unwrap().clear();
|
||||
});
|
||||
|
||||
let value = f();
|
||||
|
||||
let outstanding_allocation_indices = RECORDER.with(|recorder| {
|
||||
recorder.enabled.store(false, SeqCst);
|
||||
recorder.allocation_count.store(0, SeqCst);
|
||||
recorder
|
||||
.outstanding_allocations
|
||||
.lock()
|
||||
.unwrap()
|
||||
.drain()
|
||||
.map(|e| e.1)
|
||||
.collect::<Vec<_>>()
|
||||
});
|
||||
if !outstanding_allocation_indices.is_empty() {
|
||||
return Err(format!(
|
||||
"Leaked allocation indices: {outstanding_allocation_indices:?}",
|
||||
));
|
||||
}
|
||||
Ok(value)
|
||||
}
|
||||
|
||||
fn record_alloc(ptr: *mut c_void) {
|
||||
RECORDER.with(|recorder| {
|
||||
if recorder.enabled.load(SeqCst) {
|
||||
let count = recorder.allocation_count.fetch_add(1, SeqCst);
|
||||
recorder
|
||||
.outstanding_allocations
|
||||
.lock()
|
||||
.unwrap()
|
||||
.insert(Allocation(ptr), count);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
fn record_dealloc(ptr: *mut c_void) {
|
||||
RECORDER.with(|recorder| {
|
||||
if recorder.enabled.load(SeqCst) {
|
||||
recorder
|
||||
.outstanding_allocations
|
||||
.lock()
|
||||
.unwrap()
|
||||
.remove(&Allocation(ptr));
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
unsafe extern "C" fn ts_record_malloc(size: usize) -> *mut c_void {
|
||||
let result = malloc(size);
|
||||
record_alloc(result);
|
||||
result
|
||||
}
|
||||
|
||||
unsafe extern "C" fn ts_record_calloc(count: usize, size: usize) -> *mut c_void {
|
||||
let result = calloc(count, size);
|
||||
record_alloc(result);
|
||||
result
|
||||
}
|
||||
|
||||
unsafe extern "C" fn ts_record_realloc(ptr: *mut c_void, size: usize) -> *mut c_void {
|
||||
let result = realloc(ptr, size);
|
||||
if ptr.is_null() {
|
||||
record_alloc(result);
|
||||
} else if ptr != result {
|
||||
record_dealloc(ptr);
|
||||
record_alloc(result);
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
unsafe extern "C" fn ts_record_free(ptr: *mut c_void) {
|
||||
record_dealloc(ptr);
|
||||
free(ptr);
|
||||
}
|
||||
147
cli/src/fuzz/corpus_test.rs
Normal file
147
cli/src/fuzz/corpus_test.rs
Normal file
|
|
@ -0,0 +1,147 @@
|
|||
use tree_sitter::{LogType, Node, Parser, Point, Range, Tree};
|
||||
|
||||
use super::{scope_sequence::ScopeSequence, LOG_ENABLED, LOG_GRAPH_ENABLED};
|
||||
use crate::util;
|
||||
|
||||
pub fn check_consistent_sizes(tree: &Tree, input: &[u8]) {
|
||||
fn check(node: Node, line_offsets: &[usize]) {
|
||||
let start_byte = node.start_byte();
|
||||
let end_byte = node.end_byte();
|
||||
let start_point = node.start_position();
|
||||
let end_point = node.end_position();
|
||||
|
||||
assert!(start_byte <= end_byte);
|
||||
assert!(start_point <= end_point);
|
||||
assert_eq!(
|
||||
start_byte,
|
||||
line_offsets[start_point.row] + start_point.column
|
||||
);
|
||||
assert_eq!(end_byte, line_offsets[end_point.row] + end_point.column);
|
||||
|
||||
let mut last_child_end_byte = start_byte;
|
||||
let mut last_child_end_point = start_point;
|
||||
let mut some_child_has_changes = false;
|
||||
let mut actual_named_child_count = 0;
|
||||
for i in 0..node.child_count() {
|
||||
let child = node.child(i).unwrap();
|
||||
assert!(child.start_byte() >= last_child_end_byte);
|
||||
assert!(child.start_position() >= last_child_end_point);
|
||||
check(child, line_offsets);
|
||||
if child.has_changes() {
|
||||
some_child_has_changes = true;
|
||||
}
|
||||
if child.is_named() {
|
||||
actual_named_child_count += 1;
|
||||
}
|
||||
last_child_end_byte = child.end_byte();
|
||||
last_child_end_point = child.end_position();
|
||||
}
|
||||
|
||||
assert_eq!(actual_named_child_count, node.named_child_count());
|
||||
|
||||
if node.child_count() > 0 {
|
||||
assert!(end_byte >= last_child_end_byte);
|
||||
assert!(end_point >= last_child_end_point);
|
||||
}
|
||||
|
||||
if some_child_has_changes {
|
||||
assert!(node.has_changes());
|
||||
}
|
||||
}
|
||||
|
||||
let mut line_offsets = vec![0];
|
||||
for (i, c) in input.iter().enumerate() {
|
||||
if *c == b'\n' {
|
||||
line_offsets.push(i + 1);
|
||||
}
|
||||
}
|
||||
|
||||
check(tree.root_node(), &line_offsets);
|
||||
}
|
||||
|
||||
pub fn check_changed_ranges(old_tree: &Tree, new_tree: &Tree, input: &[u8]) -> Result<(), String> {
|
||||
let changed_ranges = old_tree.changed_ranges(new_tree).collect::<Vec<_>>();
|
||||
let old_scope_sequence = ScopeSequence::new(old_tree);
|
||||
let new_scope_sequence = ScopeSequence::new(new_tree);
|
||||
|
||||
let old_range = old_tree.root_node().range();
|
||||
let new_range = new_tree.root_node().range();
|
||||
|
||||
let byte_range =
|
||||
old_range.start_byte.min(new_range.start_byte)..old_range.end_byte.max(new_range.end_byte);
|
||||
let point_range = old_range.start_point.min(new_range.start_point)
|
||||
..old_range.end_point.max(new_range.end_point);
|
||||
|
||||
for range in &changed_ranges {
|
||||
if range.end_byte > byte_range.end || range.end_point > point_range.end {
|
||||
return Err(format!(
|
||||
"changed range extends outside of the old and new trees {range:?}",
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
old_scope_sequence.check_changes(&new_scope_sequence, input, &changed_ranges)
|
||||
}
|
||||
|
||||
pub fn set_included_ranges(parser: &mut Parser, input: &[u8], delimiters: Option<(&str, &str)>) {
|
||||
if let Some((start, end)) = delimiters {
|
||||
let mut ranges = Vec::new();
|
||||
let mut ix = 0;
|
||||
while ix < input.len() {
|
||||
let Some(mut start_ix) = input[ix..]
|
||||
.windows(2)
|
||||
.position(|win| win == start.as_bytes())
|
||||
else {
|
||||
break;
|
||||
};
|
||||
start_ix += ix + start.len();
|
||||
let end_ix = input[start_ix..]
|
||||
.windows(2)
|
||||
.position(|win| win == end.as_bytes())
|
||||
.map_or(input.len(), |ix| start_ix + ix);
|
||||
ix = end_ix;
|
||||
ranges.push(Range {
|
||||
start_byte: start_ix,
|
||||
end_byte: end_ix,
|
||||
start_point: point_for_offset(input, start_ix),
|
||||
end_point: point_for_offset(input, end_ix),
|
||||
});
|
||||
}
|
||||
|
||||
parser.set_included_ranges(&ranges).unwrap();
|
||||
} else {
|
||||
parser.set_included_ranges(&[]).unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
fn point_for_offset(text: &[u8], offset: usize) -> Point {
|
||||
let mut point = Point::default();
|
||||
for byte in &text[..offset] {
|
||||
if *byte == b'\n' {
|
||||
point.row += 1;
|
||||
point.column = 0;
|
||||
} else {
|
||||
point.column += 1;
|
||||
}
|
||||
}
|
||||
point
|
||||
}
|
||||
|
||||
pub fn get_parser(session: &mut Option<util::LogSession>, log_filename: &str) -> Parser {
|
||||
let mut parser = Parser::new();
|
||||
|
||||
if *LOG_ENABLED {
|
||||
parser.set_logger(Some(Box::new(|log_type, msg| {
|
||||
if log_type == LogType::Lex {
|
||||
eprintln!(" {msg}");
|
||||
} else {
|
||||
eprintln!("{msg}");
|
||||
}
|
||||
})));
|
||||
}
|
||||
if *LOG_GRAPH_ENABLED {
|
||||
*session = Some(util::log_graphs(&mut parser, log_filename, false).unwrap());
|
||||
}
|
||||
|
||||
parser
|
||||
}
|
||||
60
cli/src/fuzz/edits.rs
Normal file
60
cli/src/fuzz/edits.rs
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
use super::random::Rand;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Edit {
|
||||
pub position: usize,
|
||||
pub deleted_length: usize,
|
||||
pub inserted_text: Vec<u8>,
|
||||
}
|
||||
|
||||
pub fn invert_edit(input: &[u8], edit: &Edit) -> Edit {
|
||||
let position = edit.position;
|
||||
let removed_content = &input[position..(position + edit.deleted_length)];
|
||||
Edit {
|
||||
position,
|
||||
deleted_length: edit.inserted_text.len(),
|
||||
inserted_text: removed_content.to_vec(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_random_edit(rand: &mut Rand, input: &[u8]) -> Edit {
|
||||
let choice = rand.unsigned(10);
|
||||
if choice < 2 {
|
||||
// Insert text at end
|
||||
let inserted_text = rand.words(3);
|
||||
Edit {
|
||||
position: input.len(),
|
||||
deleted_length: 0,
|
||||
inserted_text,
|
||||
}
|
||||
} else if choice < 5 {
|
||||
// Delete text from the end
|
||||
let deleted_length = rand.unsigned(30).min(input.len());
|
||||
Edit {
|
||||
position: input.len() - deleted_length,
|
||||
deleted_length,
|
||||
inserted_text: vec![],
|
||||
}
|
||||
} else if choice < 8 {
|
||||
// Insert at a random position
|
||||
let position = rand.unsigned(input.len());
|
||||
let word_count = 1 + rand.unsigned(3);
|
||||
let inserted_text = rand.words(word_count);
|
||||
Edit {
|
||||
position,
|
||||
deleted_length: 0,
|
||||
inserted_text,
|
||||
}
|
||||
} else {
|
||||
// Replace at random position
|
||||
let position = rand.unsigned(input.len());
|
||||
let deleted_length = rand.unsigned(input.len() - position);
|
||||
let word_count = 1 + rand.unsigned(3);
|
||||
let inserted_text = rand.words(word_count);
|
||||
Edit {
|
||||
position,
|
||||
deleted_length,
|
||||
inserted_text,
|
||||
}
|
||||
}
|
||||
}
|
||||
349
cli/src/fuzz/mod.rs
Normal file
349
cli/src/fuzz/mod.rs
Normal file
|
|
@ -0,0 +1,349 @@
|
|||
use std::{collections::HashMap, env, fs, path::Path};
|
||||
|
||||
use lazy_static::lazy_static;
|
||||
use rand::Rng;
|
||||
use regex::Regex;
|
||||
use tree_sitter::{Language, Parser};
|
||||
|
||||
pub mod allocations;
|
||||
pub mod corpus_test;
|
||||
pub mod edits;
|
||||
pub mod random;
|
||||
pub mod scope_sequence;
|
||||
|
||||
use crate::{
|
||||
fuzz::{
|
||||
corpus_test::{
|
||||
check_changed_ranges, check_consistent_sizes, get_parser, set_included_ranges,
|
||||
},
|
||||
edits::{get_random_edit, invert_edit},
|
||||
random::Rand,
|
||||
},
|
||||
parse::perform_edit,
|
||||
test::{parse_tests, print_diff, print_diff_key, strip_sexp_fields, TestEntry},
|
||||
};
|
||||
|
||||
lazy_static! {
|
||||
pub static ref LOG_ENABLED: bool = env::var("TREE_SITTER_LOG").is_ok();
|
||||
pub static ref LOG_GRAPH_ENABLED: bool = env::var("TREE_SITTER_LOG_GRAPHS").is_ok();
|
||||
pub static ref LANGUAGE_FILTER: Option<String> = env::var("TREE_SITTER_LANGUAGE").ok();
|
||||
pub static ref EXAMPLE_FILTER: Option<Regex> = regex_env_var("TREE_SITTER_EXAMPLE");
|
||||
pub static ref START_SEED: usize = new_seed();
|
||||
pub static ref EDIT_COUNT: usize = int_env_var("TREE_SITTER_EDITS").unwrap_or(3);
|
||||
pub static ref ITERATION_COUNT: usize = int_env_var("TREE_SITTER_ITERATIONS").unwrap_or(10);
|
||||
}
|
||||
|
||||
fn int_env_var(name: &'static str) -> Option<usize> {
|
||||
env::var(name).ok().and_then(|e| e.parse().ok())
|
||||
}
|
||||
|
||||
fn regex_env_var(name: &'static str) -> Option<Regex> {
|
||||
env::var(name).ok().and_then(|e| Regex::new(&e).ok())
|
||||
}
|
||||
|
||||
pub fn new_seed() -> usize {
|
||||
int_env_var("TREE_SITTER_SEED").unwrap_or_else(|| {
|
||||
let mut rng = rand::thread_rng();
|
||||
rng.gen::<usize>()
|
||||
})
|
||||
}
|
||||
|
||||
pub struct FuzzOptions {
|
||||
pub skipped: Option<Vec<String>>,
|
||||
pub subdir: Option<String>,
|
||||
pub edits: usize,
|
||||
pub iterations: usize,
|
||||
pub filter: Option<Regex>,
|
||||
pub log_graphs: bool,
|
||||
pub log: bool,
|
||||
}
|
||||
|
||||
pub fn fuzz_language_corpus(
|
||||
language: &Language,
|
||||
language_name: &str,
|
||||
start_seed: usize,
|
||||
grammar_dir: &Path,
|
||||
options: &mut FuzzOptions,
|
||||
) {
|
||||
let subdir = options.subdir.take().unwrap_or_default();
|
||||
|
||||
let corpus_dir = grammar_dir.join(subdir).join("test").join("corpus");
|
||||
|
||||
if !corpus_dir.exists() || !corpus_dir.is_dir() {
|
||||
eprintln!("No corpus directory found, ensure that you have a `test/corpus` directory in your grammar directory with at least one test file.");
|
||||
return;
|
||||
}
|
||||
|
||||
if std::fs::read_dir(&corpus_dir).unwrap().count() == 0 {
|
||||
eprintln!("No corpus files found in `test/corpus`, ensure that you have at least one test file in your corpus directory.");
|
||||
return;
|
||||
}
|
||||
|
||||
fn retain(entry: &mut TestEntry, language_name: &str) -> bool {
|
||||
match entry {
|
||||
TestEntry::Example { attributes, .. } => {
|
||||
attributes.languages[0].is_empty()
|
||||
|| attributes
|
||||
.languages
|
||||
.iter()
|
||||
.any(|lang| lang.as_ref() == language_name)
|
||||
}
|
||||
TestEntry::Group {
|
||||
ref mut children, ..
|
||||
} => {
|
||||
children.retain_mut(|child| retain(child, language_name));
|
||||
!children.is_empty()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut main_tests = parse_tests(&corpus_dir).unwrap();
|
||||
match main_tests {
|
||||
TestEntry::Group {
|
||||
ref mut children, ..
|
||||
} => {
|
||||
children.retain_mut(|child| retain(child, language_name));
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
let tests = flatten_tests(main_tests, options.filter.as_ref());
|
||||
|
||||
let mut skipped = options.skipped.as_ref().map(|x| {
|
||||
x.iter()
|
||||
.map(|x| (x.as_str(), 0))
|
||||
.collect::<HashMap<&str, usize>>()
|
||||
});
|
||||
|
||||
let mut failure_count = 0;
|
||||
|
||||
let log_seed = env::var("TREE_SITTER_LOG_SEED").is_ok();
|
||||
let dump_edits = env::var("TREE_SITTER_DUMP_EDITS").is_ok();
|
||||
|
||||
if log_seed {
|
||||
println!(" start seed: {start_seed}");
|
||||
}
|
||||
|
||||
println!();
|
||||
for (test_index, test) in tests.iter().enumerate() {
|
||||
let test_name = format!("{language_name} - {}", test.name);
|
||||
if let Some(skipped) = skipped.as_mut() {
|
||||
if let Some(counter) = skipped.get_mut(test_name.as_str()) {
|
||||
println!(" {test_index}. {test_name} - SKIPPED");
|
||||
*counter += 1;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
println!(" {test_index}. {test_name}");
|
||||
|
||||
let passed = allocations::record(|| {
|
||||
let mut log_session = None;
|
||||
let mut parser = get_parser(&mut log_session, "log.html");
|
||||
parser.set_language(language).unwrap();
|
||||
set_included_ranges(&mut parser, &test.input, test.template_delimiters);
|
||||
|
||||
let tree = parser.parse(&test.input, None).unwrap();
|
||||
let mut actual_output = tree.root_node().to_sexp();
|
||||
if !test.has_fields {
|
||||
actual_output = strip_sexp_fields(&actual_output);
|
||||
}
|
||||
|
||||
if actual_output != test.output {
|
||||
println!("Incorrect initial parse for {test_name}");
|
||||
print_diff_key();
|
||||
print_diff(&actual_output, &test.output, true);
|
||||
println!();
|
||||
return false;
|
||||
}
|
||||
|
||||
true
|
||||
})
|
||||
.unwrap_or_else(|e| {
|
||||
eprintln!("Error: {e}");
|
||||
false
|
||||
});
|
||||
|
||||
if !passed {
|
||||
failure_count += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(language).unwrap();
|
||||
let tree = parser.parse(&test.input, None).unwrap();
|
||||
drop(parser);
|
||||
|
||||
for trial in 0..options.iterations {
|
||||
let seed = start_seed + trial;
|
||||
let passed = allocations::record(|| {
|
||||
let mut rand = Rand::new(seed);
|
||||
let mut log_session = None;
|
||||
let mut parser = get_parser(&mut log_session, "log.html");
|
||||
parser.set_language(language).unwrap();
|
||||
let mut tree = tree.clone();
|
||||
let mut input = test.input.clone();
|
||||
|
||||
if options.log_graphs {
|
||||
eprintln!("{}\n", String::from_utf8_lossy(&input));
|
||||
}
|
||||
|
||||
// Perform a random series of edits and reparse.
|
||||
let mut undo_stack = Vec::new();
|
||||
for _ in 0..=rand.unsigned(*EDIT_COUNT) {
|
||||
let edit = get_random_edit(&mut rand, &input);
|
||||
undo_stack.push(invert_edit(&input, &edit));
|
||||
perform_edit(&mut tree, &mut input, &edit).unwrap();
|
||||
}
|
||||
|
||||
if log_seed {
|
||||
println!(" {test_index}.{trial:<2} seed: {seed}");
|
||||
}
|
||||
|
||||
if dump_edits {
|
||||
fs::create_dir_all("fuzz").unwrap();
|
||||
fs::write(
|
||||
Path::new("fuzz")
|
||||
.join(format!("edit.{seed}.{test_index}.{trial} {test_name}")),
|
||||
&input,
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
if options.log_graphs {
|
||||
eprintln!("{}\n", String::from_utf8_lossy(&input));
|
||||
}
|
||||
|
||||
set_included_ranges(&mut parser, &input, test.template_delimiters);
|
||||
let mut tree2 = parser.parse(&input, Some(&tree)).unwrap();
|
||||
|
||||
// Check that the new tree is consistent.
|
||||
check_consistent_sizes(&tree2, &input);
|
||||
if let Err(message) = check_changed_ranges(&tree, &tree2, &input) {
|
||||
println!("\nUnexpected scope change in seed {seed} with start seed {start_seed}\n{message}\n\n",);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Undo all of the edits and re-parse again.
|
||||
while let Some(edit) = undo_stack.pop() {
|
||||
perform_edit(&mut tree2, &mut input, &edit).unwrap();
|
||||
}
|
||||
if options.log_graphs {
|
||||
eprintln!("{}\n", String::from_utf8_lossy(&input));
|
||||
}
|
||||
|
||||
set_included_ranges(&mut parser, &test.input, test.template_delimiters);
|
||||
let tree3 = parser.parse(&input, Some(&tree2)).unwrap();
|
||||
|
||||
// Verify that the final tree matches the expectation from the corpus.
|
||||
let mut actual_output = tree3.root_node().to_sexp();
|
||||
if !test.has_fields {
|
||||
actual_output = strip_sexp_fields(&actual_output);
|
||||
}
|
||||
|
||||
if actual_output != test.output {
|
||||
println!("Incorrect parse for {test_name} - seed {seed}");
|
||||
print_diff_key();
|
||||
print_diff(&actual_output, &test.output, true);
|
||||
println!();
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check that the edited tree is consistent.
|
||||
check_consistent_sizes(&tree3, &input);
|
||||
if let Err(message) = check_changed_ranges(&tree2, &tree3, &input) {
|
||||
println!("Unexpected scope change in seed {seed} with start seed {start_seed}\n{message}\n\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
true
|
||||
}).unwrap_or_else(|e| {
|
||||
eprintln!("Error: {e}");
|
||||
false
|
||||
});
|
||||
|
||||
if !passed {
|
||||
failure_count += 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if failure_count != 0 {
|
||||
eprintln!("{failure_count} {language_name} corpus tests failed fuzzing");
|
||||
}
|
||||
|
||||
if let Some(skipped) = skipped.as_mut() {
|
||||
skipped.retain(|_, v| *v == 0);
|
||||
|
||||
if !skipped.is_empty() {
|
||||
println!("Non matchable skip definitions:");
|
||||
for k in skipped.keys() {
|
||||
println!(" {k}");
|
||||
}
|
||||
panic!("Non matchable skip definitions needs to be removed");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct FlattenedTest {
|
||||
pub name: String,
|
||||
pub input: Vec<u8>,
|
||||
pub output: String,
|
||||
pub languages: Vec<Box<str>>,
|
||||
pub has_fields: bool,
|
||||
pub template_delimiters: Option<(&'static str, &'static str)>,
|
||||
}
|
||||
|
||||
pub fn flatten_tests(test: TestEntry, filter: Option<&Regex>) -> Vec<FlattenedTest> {
|
||||
fn helper(
|
||||
test: TestEntry,
|
||||
filter: Option<&Regex>,
|
||||
is_root: bool,
|
||||
prefix: &str,
|
||||
result: &mut Vec<FlattenedTest>,
|
||||
) {
|
||||
match test {
|
||||
TestEntry::Example {
|
||||
mut name,
|
||||
input,
|
||||
output,
|
||||
has_fields,
|
||||
attributes,
|
||||
..
|
||||
} => {
|
||||
if !prefix.is_empty() {
|
||||
name.insert_str(0, " - ");
|
||||
name.insert_str(0, prefix);
|
||||
}
|
||||
if let Some(filter) = filter {
|
||||
if filter.find(&name).is_none() {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
result.push(FlattenedTest {
|
||||
name,
|
||||
input,
|
||||
output,
|
||||
has_fields,
|
||||
languages: attributes.languages,
|
||||
template_delimiters: None,
|
||||
});
|
||||
}
|
||||
TestEntry::Group {
|
||||
mut name, children, ..
|
||||
} => {
|
||||
if !is_root && !prefix.is_empty() {
|
||||
name.insert_str(0, " - ");
|
||||
name.insert_str(0, prefix);
|
||||
}
|
||||
for child in children {
|
||||
helper(child, filter, false, &name, result);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
let mut result = Vec::new();
|
||||
helper(test, filter, true, "", &mut result);
|
||||
result
|
||||
}
|
||||
|
|
@ -258,7 +258,7 @@ impl Generator {
|
|||
let constant_name = if let Some(symbol) = symbol {
|
||||
format!("{}_character_set_{}", self.symbol_ids[symbol], count)
|
||||
} else {
|
||||
format!("extras_character_set_{}", count)
|
||||
format!("extras_character_set_{count}")
|
||||
};
|
||||
self.large_character_set_info.push(LargeCharacterSetInfo {
|
||||
constant_name,
|
||||
|
|
@ -369,12 +369,12 @@ impl Generator {
|
|||
for symbol in &self.parse_table.symbols {
|
||||
if *symbol != Symbol::end() {
|
||||
self.symbol_order.insert(*symbol, i);
|
||||
add_line!(self, "{} = {},", self.symbol_ids[symbol], i);
|
||||
add_line!(self, "{} = {i},", self.symbol_ids[symbol]);
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
for alias in &self.unique_aliases {
|
||||
add_line!(self, "{} = {},", self.alias_ids[alias], i);
|
||||
add_line!(self, "{} = {i},", self.alias_ids[alias]);
|
||||
i += 1;
|
||||
}
|
||||
dedent!(self);
|
||||
|
|
@ -393,7 +393,7 @@ impl Generator {
|
|||
alias.value.as_str()
|
||||
}),
|
||||
);
|
||||
add_line!(self, "[{}] = \"{}\",", self.symbol_ids[symbol], name);
|
||||
add_line!(self, "[{}] = \"{name}\",", self.symbol_ids[symbol]);
|
||||
}
|
||||
for alias in &self.unique_aliases {
|
||||
add_line!(
|
||||
|
|
@ -450,12 +450,7 @@ impl Generator {
|
|||
indent!(self);
|
||||
add_line!(self, "[0] = NULL,");
|
||||
for field_name in &self.field_names {
|
||||
add_line!(
|
||||
self,
|
||||
"[{}] = \"{}\",",
|
||||
self.field_id(field_name),
|
||||
field_name
|
||||
);
|
||||
add_line!(self, "[{}] = \"{field_name}\",", self.field_id(field_name));
|
||||
}
|
||||
dedent!(self);
|
||||
add_line!(self, "}};");
|
||||
|
|
@ -473,7 +468,7 @@ impl Generator {
|
|||
indent!(self);
|
||||
if let Some(Alias { is_named, .. }) = self.default_aliases.get(symbol) {
|
||||
add_line!(self, ".visible = true,");
|
||||
add_line!(self, ".named = {},", is_named);
|
||||
add_line!(self, ".named = {is_named},");
|
||||
} else {
|
||||
match self.metadata_for_symbol(*symbol).1 {
|
||||
VariableType::Named => {
|
||||
|
|
@ -529,11 +524,11 @@ impl Generator {
|
|||
continue;
|
||||
}
|
||||
|
||||
add_line!(self, "[{}] = {{", i);
|
||||
add_line!(self, "[{i}] = {{");
|
||||
indent!(self);
|
||||
for (j, alias) in production_info.alias_sequence.iter().enumerate() {
|
||||
if let Some(alias) = alias {
|
||||
add_line!(self, "[{}] = {},", j, self.alias_ids[alias]);
|
||||
add_line!(self, "[{j}] = {},", self.alias_ids[alias]);
|
||||
}
|
||||
}
|
||||
dedent!(self);
|
||||
|
|
@ -1044,9 +1039,8 @@ impl Generator {
|
|||
for i in 0..self.syntax_grammar.external_tokens.len() {
|
||||
add_line!(
|
||||
self,
|
||||
"{} = {},",
|
||||
"{} = {i},",
|
||||
self.external_token_id(&self.syntax_grammar.external_tokens[i]),
|
||||
i
|
||||
);
|
||||
}
|
||||
dedent!(self);
|
||||
|
|
@ -1133,7 +1127,7 @@ impl Generator {
|
|||
.enumerate()
|
||||
.take(self.large_state_count)
|
||||
{
|
||||
add_line!(self, "[{}] = {{", i);
|
||||
add_line!(self, "[{i}] = {{");
|
||||
indent!(self);
|
||||
|
||||
// Ensure the entries are in a deterministic order, since they are
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
#![doc = include_str!("../README.md")]
|
||||
|
||||
pub mod fuzz;
|
||||
pub mod generate;
|
||||
pub mod highlight;
|
||||
pub mod logger;
|
||||
|
|
|
|||
|
|
@ -11,6 +11,10 @@ use glob::glob;
|
|||
use regex::Regex;
|
||||
use tree_sitter::{ffi, Parser, Point};
|
||||
use tree_sitter_cli::{
|
||||
fuzz::{
|
||||
fuzz_language_corpus, FuzzOptions, EDIT_COUNT, ITERATION_COUNT, LOG_ENABLED,
|
||||
LOG_GRAPH_ENABLED, START_SEED,
|
||||
},
|
||||
generate::{self, lookup_package_json_for_path},
|
||||
highlight, logger,
|
||||
parse::{self, ParseFileOptions, ParseOutput},
|
||||
|
|
@ -36,6 +40,7 @@ enum Commands {
|
|||
BuildWasm(BuildWasm),
|
||||
Parse(Parse),
|
||||
Test(Test),
|
||||
Fuzz(Fuzz),
|
||||
Query(Query),
|
||||
Highlight(Highlight),
|
||||
Tags(Tags),
|
||||
|
|
@ -249,6 +254,25 @@ struct Test {
|
|||
pub config_path: Option<PathBuf>,
|
||||
}
|
||||
|
||||
#[derive(Args)]
|
||||
#[command(about = "Fuzz a parser", alias = "f")]
|
||||
struct Fuzz {
|
||||
#[arg(long, short, help = "List of test names to skip")]
|
||||
pub skip: Option<Vec<String>>,
|
||||
#[arg(long, help = "Subdirectory to the language")]
|
||||
pub subdir: Option<String>,
|
||||
#[arg(long, short, help = "Maximum number of edits to perform per fuzz test")]
|
||||
pub edits: Option<usize>,
|
||||
#[arg(long, short, help = "Number of fuzzing iterations to run per test")]
|
||||
pub iterations: Option<usize>,
|
||||
#[arg(long, short, help = "Regex pattern to filter tests")]
|
||||
pub filter: Option<Regex>,
|
||||
#[arg(long, short, help = "Enable logging of graphs and input")]
|
||||
pub log_graphs: bool,
|
||||
#[arg(long, short, help = "Enable parser logging")]
|
||||
pub log: bool,
|
||||
}
|
||||
|
||||
#[derive(Args)]
|
||||
#[command(about = "Search files using a syntax tree query", alias = "q")]
|
||||
struct Query {
|
||||
|
|
@ -457,7 +481,7 @@ fn run() -> Result<()> {
|
|||
if let Some(path) = generate_options.libdir {
|
||||
loader = loader::Loader::with_parser_lib_path(PathBuf::from(path));
|
||||
}
|
||||
loader.use_debug_build(generate_options.debug_build);
|
||||
loader.debug_build(generate_options.debug_build);
|
||||
loader.languages_at_path(¤t_dir)?;
|
||||
}
|
||||
}
|
||||
|
|
@ -507,7 +531,7 @@ fn run() -> Result<()> {
|
|||
(false, false) => &[],
|
||||
};
|
||||
|
||||
loader.use_debug_build(build_options.debug);
|
||||
loader.debug_build(build_options.debug);
|
||||
|
||||
let config = Config::load(None)?;
|
||||
let loader_config = config.get()?;
|
||||
|
|
@ -560,7 +584,7 @@ fn run() -> Result<()> {
|
|||
let cancellation_flag = util::cancel_on_signal();
|
||||
let mut parser = Parser::new();
|
||||
|
||||
loader.use_debug_build(parse_options.debug_build);
|
||||
loader.debug_build(parse_options.debug_build);
|
||||
|
||||
#[cfg(feature = "wasm")]
|
||||
if parse_options.wasm {
|
||||
|
|
@ -656,7 +680,7 @@ fn run() -> Result<()> {
|
|||
Commands::Test(test_options) => {
|
||||
let config = Config::load(test_options.config_path)?;
|
||||
|
||||
loader.use_debug_build(test_options.debug_build);
|
||||
loader.debug_build(test_options.debug_build);
|
||||
|
||||
let mut parser = Parser::new();
|
||||
|
||||
|
|
@ -730,6 +754,33 @@ fn run() -> Result<()> {
|
|||
}
|
||||
}
|
||||
|
||||
Commands::Fuzz(fuzz_options) => {
|
||||
loader.sanitize_build(true);
|
||||
|
||||
let languages = loader.languages_at_path(¤t_dir)?;
|
||||
let (language, language_name) = &languages
|
||||
.first()
|
||||
.ok_or_else(|| anyhow!("No language found"))?;
|
||||
|
||||
let mut fuzz_options = FuzzOptions {
|
||||
skipped: fuzz_options.skip,
|
||||
subdir: fuzz_options.subdir,
|
||||
edits: fuzz_options.edits.unwrap_or(*EDIT_COUNT),
|
||||
iterations: fuzz_options.iterations.unwrap_or(*ITERATION_COUNT),
|
||||
filter: fuzz_options.filter,
|
||||
log_graphs: fuzz_options.log_graphs || *LOG_GRAPH_ENABLED,
|
||||
log: fuzz_options.log || *LOG_ENABLED,
|
||||
};
|
||||
|
||||
fuzz_language_corpus(
|
||||
language,
|
||||
language_name,
|
||||
*START_SEED,
|
||||
¤t_dir,
|
||||
&mut fuzz_options,
|
||||
);
|
||||
}
|
||||
|
||||
Commands::Query(query_options) => {
|
||||
let config = Config::load(query_options.config_path)?;
|
||||
let paths = collect_paths(query_options.paths_file.as_deref(), query_options.paths)?;
|
||||
|
|
|
|||
|
|
@ -10,13 +10,7 @@ use anyhow::{anyhow, Context, Result};
|
|||
use tree_sitter::{ffi, InputEdit, Language, LogType, Parser, Point, Tree};
|
||||
|
||||
use super::util;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Edit {
|
||||
pub position: usize,
|
||||
pub deleted_length: usize,
|
||||
pub inserted_text: Vec<u8>,
|
||||
}
|
||||
use crate::fuzz::edits::Edit;
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct Stats {
|
||||
|
|
|
|||
|
|
@ -1,23 +1,26 @@
|
|||
use std::{collections::HashMap, env, fs};
|
||||
|
||||
use tree_sitter::{LogType, Node, Parser, Point, Range, Tree};
|
||||
use tree_sitter::Parser;
|
||||
use tree_sitter_proc_macro::test_with_seed;
|
||||
|
||||
use super::helpers::{
|
||||
allocations,
|
||||
edits::{get_random_edit, invert_edit},
|
||||
fixtures::{fixtures_dir, get_language, get_test_language, SCRATCH_BASE_DIR},
|
||||
new_seed,
|
||||
random::Rand,
|
||||
scope_sequence::ScopeSequence,
|
||||
EDIT_COUNT, EXAMPLE_FILTER, ITERATION_COUNT, LANGUAGE_FILTER, LOG_ENABLED, LOG_GRAPH_ENABLED,
|
||||
START_SEED,
|
||||
};
|
||||
use crate::{
|
||||
fuzz::{
|
||||
corpus_test::{
|
||||
check_changed_ranges, check_consistent_sizes, get_parser, set_included_ranges,
|
||||
},
|
||||
edits::{get_random_edit, invert_edit},
|
||||
flatten_tests, new_seed,
|
||||
random::Rand,
|
||||
EDIT_COUNT, EXAMPLE_FILTER, ITERATION_COUNT, LANGUAGE_FILTER, LOG_GRAPH_ENABLED,
|
||||
START_SEED,
|
||||
},
|
||||
generate,
|
||||
parse::perform_edit,
|
||||
test::{parse_tests, print_diff, print_diff_key, strip_sexp_fields, TestEntry},
|
||||
util,
|
||||
test::{parse_tests, print_diff, print_diff_key, strip_sexp_fields},
|
||||
tests::{
|
||||
allocations,
|
||||
helpers::fixtures::{fixtures_dir, get_language, get_test_language, SCRATCH_BASE_DIR},
|
||||
},
|
||||
};
|
||||
|
||||
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
|
||||
|
|
@ -79,7 +82,7 @@ fn test_corpus_for_json(seed: usize) {
|
|||
#[ignore]
|
||||
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
|
||||
fn test_corpus_for_php(seed: usize) {
|
||||
test_language_corpus("php", seed, None, Some("php"));
|
||||
test_language_corpus("php", seed, None, None);
|
||||
}
|
||||
|
||||
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
|
||||
|
|
@ -107,7 +110,7 @@ fn test_corpus_for_tsx(seed: usize) {
|
|||
test_language_corpus("typescript", seed, None, Some("tsx"));
|
||||
}
|
||||
|
||||
fn test_language_corpus(
|
||||
pub fn test_language_corpus(
|
||||
language_name: &str,
|
||||
start_seed: usize,
|
||||
skipped: Option<&[&str]>,
|
||||
|
|
@ -120,17 +123,23 @@ fn test_language_corpus(
|
|||
let template_corpus_dir = fixtures_dir().join("template_corpus");
|
||||
let corpus_dir = grammars_dir.join(language_name).join("test").join("corpus");
|
||||
|
||||
println!("Testing {language_name} corpus @ {}", corpus_dir.display());
|
||||
|
||||
let error_corpus_file = error_corpus_dir.join(format!("{language_name}_errors.txt"));
|
||||
let template_corpus_file = template_corpus_dir.join(format!("{language_name}_templates.txt"));
|
||||
let main_tests = parse_tests(&corpus_dir).unwrap();
|
||||
let error_tests = parse_tests(&error_corpus_file).unwrap_or_default();
|
||||
let template_tests = parse_tests(&template_corpus_file).unwrap_or_default();
|
||||
let mut tests = flatten_tests(main_tests);
|
||||
tests.extend(flatten_tests(error_tests));
|
||||
tests.extend(flatten_tests(template_tests).into_iter().map(|mut t| {
|
||||
t.template_delimiters = Some(("<%", "%>"));
|
||||
t
|
||||
}));
|
||||
let mut tests = flatten_tests(main_tests, EXAMPLE_FILTER.as_ref());
|
||||
tests.extend(flatten_tests(error_tests, EXAMPLE_FILTER.as_ref()));
|
||||
tests.extend(
|
||||
flatten_tests(template_tests, EXAMPLE_FILTER.as_ref())
|
||||
.into_iter()
|
||||
.map(|mut t| {
|
||||
t.template_delimiters = Some(("<%", "%>"));
|
||||
t
|
||||
}),
|
||||
);
|
||||
|
||||
tests.retain(|t| t.languages[0].is_empty() || t.languages.contains(&Box::from(language_dir)));
|
||||
|
||||
|
|
@ -185,7 +194,8 @@ fn test_language_corpus(
|
|||
}
|
||||
|
||||
true
|
||||
});
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
if !passed {
|
||||
failure_count += 1;
|
||||
|
|
@ -279,7 +289,7 @@ fn test_language_corpus(
|
|||
}
|
||||
|
||||
true
|
||||
});
|
||||
}).unwrap();
|
||||
|
||||
if !passed {
|
||||
failure_count += 1;
|
||||
|
|
@ -367,7 +377,7 @@ fn test_feature_corpus_files() {
|
|||
let c_code = generate_result.unwrap().1;
|
||||
let language = get_test_language(language_name, &c_code, Some(&test_path));
|
||||
let test = parse_tests(&corpus_path).unwrap();
|
||||
let tests = flatten_tests(test);
|
||||
let tests = flatten_tests(test, EXAMPLE_FILTER.as_ref());
|
||||
|
||||
if !tests.is_empty() {
|
||||
eprintln!("test language: {language_name:?}");
|
||||
|
|
@ -393,7 +403,8 @@ fn test_feature_corpus_files() {
|
|||
println!();
|
||||
false
|
||||
}
|
||||
});
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
if !passed {
|
||||
failure_count += 1;
|
||||
|
|
@ -405,202 +416,3 @@ fn test_feature_corpus_files() {
|
|||
|
||||
assert!(failure_count == 0, "{failure_count} corpus tests failed");
|
||||
}
|
||||
|
||||
fn check_consistent_sizes(tree: &Tree, input: &[u8]) {
|
||||
fn check(node: Node, line_offsets: &[usize]) {
|
||||
let start_byte = node.start_byte();
|
||||
let end_byte = node.end_byte();
|
||||
let start_point = node.start_position();
|
||||
let end_point = node.end_position();
|
||||
|
||||
assert!(start_byte <= end_byte);
|
||||
assert!(start_point <= end_point);
|
||||
assert_eq!(
|
||||
start_byte,
|
||||
line_offsets[start_point.row] + start_point.column
|
||||
);
|
||||
assert_eq!(end_byte, line_offsets[end_point.row] + end_point.column);
|
||||
|
||||
let mut last_child_end_byte = start_byte;
|
||||
let mut last_child_end_point = start_point;
|
||||
let mut some_child_has_changes = false;
|
||||
let mut actual_named_child_count = 0;
|
||||
for i in 0..node.child_count() {
|
||||
let child = node.child(i).unwrap();
|
||||
assert!(child.start_byte() >= last_child_end_byte);
|
||||
assert!(child.start_position() >= last_child_end_point);
|
||||
check(child, line_offsets);
|
||||
if child.has_changes() {
|
||||
some_child_has_changes = true;
|
||||
}
|
||||
if child.is_named() {
|
||||
actual_named_child_count += 1;
|
||||
}
|
||||
last_child_end_byte = child.end_byte();
|
||||
last_child_end_point = child.end_position();
|
||||
}
|
||||
|
||||
assert_eq!(actual_named_child_count, node.named_child_count());
|
||||
|
||||
if node.child_count() > 0 {
|
||||
assert!(end_byte >= last_child_end_byte);
|
||||
assert!(end_point >= last_child_end_point);
|
||||
}
|
||||
|
||||
if some_child_has_changes {
|
||||
assert!(node.has_changes());
|
||||
}
|
||||
}
|
||||
|
||||
let mut line_offsets = vec![0];
|
||||
for (i, c) in input.iter().enumerate() {
|
||||
if *c == b'\n' {
|
||||
line_offsets.push(i + 1);
|
||||
}
|
||||
}
|
||||
|
||||
check(tree.root_node(), &line_offsets);
|
||||
}
|
||||
|
||||
fn check_changed_ranges(old_tree: &Tree, new_tree: &Tree, input: &[u8]) -> Result<(), String> {
|
||||
let changed_ranges = old_tree.changed_ranges(new_tree).collect::<Vec<_>>();
|
||||
let old_scope_sequence = ScopeSequence::new(old_tree);
|
||||
let new_scope_sequence = ScopeSequence::new(new_tree);
|
||||
|
||||
let old_range = old_tree.root_node().range();
|
||||
let new_range = new_tree.root_node().range();
|
||||
|
||||
let byte_range =
|
||||
old_range.start_byte.min(new_range.start_byte)..old_range.end_byte.max(new_range.end_byte);
|
||||
let point_range = old_range.start_point.min(new_range.start_point)
|
||||
..old_range.end_point.max(new_range.end_point);
|
||||
|
||||
for range in &changed_ranges {
|
||||
if range.end_byte > byte_range.end || range.end_point > point_range.end {
|
||||
return Err(format!(
|
||||
"changed range extends outside of the old and new trees {range:?}",
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
old_scope_sequence.check_changes(&new_scope_sequence, input, &changed_ranges)
|
||||
}
|
||||
|
||||
fn set_included_ranges(parser: &mut Parser, input: &[u8], delimiters: Option<(&str, &str)>) {
|
||||
if let Some((start, end)) = delimiters {
|
||||
let mut ranges = Vec::new();
|
||||
let mut ix = 0;
|
||||
while ix < input.len() {
|
||||
let Some(mut start_ix) = input[ix..]
|
||||
.windows(2)
|
||||
.position(|win| win == start.as_bytes())
|
||||
else {
|
||||
break;
|
||||
};
|
||||
start_ix += ix + start.len();
|
||||
let end_ix = input[start_ix..]
|
||||
.windows(2)
|
||||
.position(|win| win == end.as_bytes())
|
||||
.map_or(input.len(), |ix| start_ix + ix);
|
||||
ix = end_ix;
|
||||
ranges.push(Range {
|
||||
start_byte: start_ix,
|
||||
end_byte: end_ix,
|
||||
start_point: point_for_offset(input, start_ix),
|
||||
end_point: point_for_offset(input, end_ix),
|
||||
});
|
||||
}
|
||||
|
||||
parser.set_included_ranges(&ranges).unwrap();
|
||||
} else {
|
||||
parser.set_included_ranges(&[]).unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
fn point_for_offset(text: &[u8], offset: usize) -> Point {
|
||||
let mut point = Point::default();
|
||||
for byte in &text[..offset] {
|
||||
if *byte == b'\n' {
|
||||
point.row += 1;
|
||||
point.column = 0;
|
||||
} else {
|
||||
point.column += 1;
|
||||
}
|
||||
}
|
||||
point
|
||||
}
|
||||
|
||||
fn get_parser(session: &mut Option<util::LogSession>, log_filename: &str) -> Parser {
|
||||
let mut parser = Parser::new();
|
||||
|
||||
if *LOG_ENABLED {
|
||||
parser.set_logger(Some(Box::new(|log_type, msg| {
|
||||
if log_type == LogType::Lex {
|
||||
eprintln!(" {msg}");
|
||||
} else {
|
||||
eprintln!("{msg}");
|
||||
}
|
||||
})));
|
||||
} else if *LOG_GRAPH_ENABLED {
|
||||
*session = Some(util::log_graphs(&mut parser, log_filename, false).unwrap());
|
||||
}
|
||||
|
||||
parser
|
||||
}
|
||||
|
||||
struct FlattenedTest {
|
||||
name: String,
|
||||
input: Vec<u8>,
|
||||
output: String,
|
||||
languages: Vec<Box<str>>,
|
||||
has_fields: bool,
|
||||
template_delimiters: Option<(&'static str, &'static str)>,
|
||||
}
|
||||
|
||||
fn flatten_tests(test: TestEntry) -> Vec<FlattenedTest> {
|
||||
fn helper(test: TestEntry, is_root: bool, prefix: &str, result: &mut Vec<FlattenedTest>) {
|
||||
match test {
|
||||
TestEntry::Example {
|
||||
mut name,
|
||||
input,
|
||||
output,
|
||||
has_fields,
|
||||
attributes,
|
||||
..
|
||||
} => {
|
||||
if !prefix.is_empty() {
|
||||
name.insert_str(0, " - ");
|
||||
name.insert_str(0, prefix);
|
||||
}
|
||||
if let Some(filter) = EXAMPLE_FILTER.as_ref() {
|
||||
if !name.contains(filter.as_str()) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
result.push(FlattenedTest {
|
||||
name,
|
||||
input,
|
||||
output,
|
||||
has_fields,
|
||||
languages: attributes.languages,
|
||||
template_delimiters: None,
|
||||
});
|
||||
}
|
||||
TestEntry::Group {
|
||||
mut name, children, ..
|
||||
} => {
|
||||
if !is_root && !prefix.is_empty() {
|
||||
name.insert_str(0, " - ");
|
||||
name.insert_str(0, prefix);
|
||||
}
|
||||
for child in children {
|
||||
helper(child, false, &name, result);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
let mut result = Vec::new();
|
||||
helper(test, true, "", &mut result);
|
||||
result
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,8 +1,5 @@
|
|||
use std::{ops::Range, str};
|
||||
|
||||
use super::random::Rand;
|
||||
use crate::parse::Edit;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct ReadRecorder<'a> {
|
||||
content: &'a [u8],
|
||||
|
|
@ -50,55 +47,3 @@ impl<'a> ReadRecorder<'a> {
|
|||
result
|
||||
}
|
||||
}
|
||||
|
||||
pub fn invert_edit(input: &[u8], edit: &Edit) -> Edit {
|
||||
let position = edit.position;
|
||||
let removed_content = &input[position..(position + edit.deleted_length)];
|
||||
Edit {
|
||||
position,
|
||||
deleted_length: edit.inserted_text.len(),
|
||||
inserted_text: removed_content.to_vec(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_random_edit(rand: &mut Rand, input: &[u8]) -> Edit {
|
||||
let choice = rand.unsigned(10);
|
||||
if choice < 2 {
|
||||
// Insert text at end
|
||||
let inserted_text = rand.words(3);
|
||||
Edit {
|
||||
position: input.len(),
|
||||
deleted_length: 0,
|
||||
inserted_text,
|
||||
}
|
||||
} else if choice < 5 {
|
||||
// Delete text from the end
|
||||
let deleted_length = rand.unsigned(30).min(input.len());
|
||||
Edit {
|
||||
position: input.len() - deleted_length,
|
||||
deleted_length,
|
||||
inserted_text: vec![],
|
||||
}
|
||||
} else if choice < 8 {
|
||||
// Insert at a random position
|
||||
let position = rand.unsigned(input.len());
|
||||
let word_count = 1 + rand.unsigned(3);
|
||||
let inserted_text = rand.words(word_count);
|
||||
Edit {
|
||||
position,
|
||||
deleted_length: 0,
|
||||
inserted_text,
|
||||
}
|
||||
} else {
|
||||
// Replace at random position
|
||||
let position = rand.unsigned(input.len());
|
||||
let deleted_length = rand.unsigned(input.len() - position);
|
||||
let word_count = 1 + rand.unsigned(3);
|
||||
let inserted_text = rand.words(word_count);
|
||||
Edit {
|
||||
position,
|
||||
deleted_length,
|
||||
inserted_text,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ lazy_static! {
|
|||
static ref TEST_LOADER: Loader = {
|
||||
let mut loader = Loader::with_parser_lib_path(SCRATCH_DIR.clone());
|
||||
if env::var("TREE_SITTER_GRAMMAR_DEBUG").is_ok() {
|
||||
loader.use_debug_build(true);
|
||||
loader.debug_build(true);
|
||||
}
|
||||
loader
|
||||
};
|
||||
|
|
|
|||
|
|
@ -1,35 +1,4 @@
|
|||
pub(super) mod allocations;
|
||||
pub(super) mod edits;
|
||||
pub mod allocations;
|
||||
pub mod edits;
|
||||
pub(super) mod fixtures;
|
||||
pub(super) mod query_helpers;
|
||||
pub(super) mod random;
|
||||
pub(super) mod scope_sequence;
|
||||
|
||||
use std::env;
|
||||
|
||||
use lazy_static::lazy_static;
|
||||
use rand::Rng;
|
||||
|
||||
lazy_static! {
|
||||
pub static ref LOG_ENABLED: bool = env::var("TREE_SITTER_LOG").is_ok();
|
||||
pub static ref LOG_GRAPH_ENABLED: bool = env::var("TREE_SITTER_LOG_GRAPHS").is_ok();
|
||||
pub static ref LANGUAGE_FILTER: Option<String> = env::var("TREE_SITTER_LANGUAGE").ok();
|
||||
pub static ref EXAMPLE_FILTER: Option<String> = env::var("TREE_SITTER_EXAMPLE").ok();
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
pub static ref START_SEED: usize = new_seed();
|
||||
pub static ref EDIT_COUNT: usize = int_env_var("TREE_SITTER_EDITS").unwrap_or(3);
|
||||
pub static ref ITERATION_COUNT: usize = int_env_var("TREE_SITTER_ITERATIONS").unwrap_or(10);
|
||||
}
|
||||
|
||||
fn int_env_var(name: &'static str) -> Option<usize> {
|
||||
env::var(name).ok().and_then(|e| e.parse().ok())
|
||||
}
|
||||
|
||||
pub fn new_seed() -> usize {
|
||||
int_env_var("TREE_SITTER_SEED").unwrap_or_else(|| {
|
||||
let mut rng = rand::thread_rng();
|
||||
rng.gen::<usize>()
|
||||
})
|
||||
}
|
||||
|
|
|
|||
|
|
@ -17,3 +17,10 @@ mod tree_test;
|
|||
|
||||
#[cfg(feature = "wasm")]
|
||||
mod wasm_language_test;
|
||||
|
||||
pub use crate::fuzz::{
|
||||
allocations,
|
||||
edits::{get_random_edit, invert_edit},
|
||||
random::Rand,
|
||||
ITERATION_COUNT,
|
||||
};
|
||||
|
|
|
|||
|
|
@ -1,9 +1,9 @@
|
|||
use tree_sitter::{Node, Parser, Point, Tree};
|
||||
|
||||
use super::helpers::{
|
||||
edits::get_random_edit,
|
||||
fixtures::{fixtures_dir, get_language, get_test_language},
|
||||
random::Rand,
|
||||
use super::{
|
||||
get_random_edit,
|
||||
helpers::fixtures::{fixtures_dir, get_language, get_test_language},
|
||||
Rand,
|
||||
};
|
||||
use crate::{
|
||||
generate::{generate_parser_for_grammar, load_grammar_file},
|
||||
|
|
|
|||
|
|
@ -8,13 +8,14 @@ use tree_sitter_proc_macro::retry;
|
|||
|
||||
use super::helpers::{
|
||||
allocations,
|
||||
edits::{invert_edit, ReadRecorder},
|
||||
edits::ReadRecorder,
|
||||
fixtures::{get_language, get_test_language},
|
||||
};
|
||||
use crate::{
|
||||
fuzz::edits::Edit,
|
||||
generate::{generate_parser_for_grammar, load_grammar_file},
|
||||
parse::{perform_edit, Edit},
|
||||
tests::helpers::fixtures::fixtures_dir,
|
||||
parse::perform_edit,
|
||||
tests::{helpers::fixtures::fixtures_dir, invert_edit},
|
||||
};
|
||||
|
||||
#[test]
|
||||
|
|
|
|||
|
|
@ -13,11 +13,13 @@ use super::helpers::{
|
|||
allocations,
|
||||
fixtures::{get_language, get_test_language},
|
||||
query_helpers::{assert_query_matches, Match, Pattern},
|
||||
ITERATION_COUNT,
|
||||
};
|
||||
use crate::{
|
||||
generate::generate_parser_for_grammar,
|
||||
tests::helpers::query_helpers::{collect_captures, collect_matches},
|
||||
tests::{
|
||||
helpers::query_helpers::{collect_captures, collect_matches},
|
||||
ITERATION_COUNT,
|
||||
},
|
||||
};
|
||||
|
||||
lazy_static! {
|
||||
|
|
|
|||
|
|
@ -2,8 +2,8 @@ use std::str;
|
|||
|
||||
use tree_sitter::{InputEdit, Parser, Point, Range, Tree};
|
||||
|
||||
use super::helpers::{edits::invert_edit, fixtures::get_language};
|
||||
use crate::parse::{perform_edit, Edit};
|
||||
use super::helpers::fixtures::get_language;
|
||||
use crate::{fuzz::edits::Edit, parse::perform_edit, tests::invert_edit};
|
||||
|
||||
#[test]
|
||||
fn test_tree_edit() {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue