feat: add fuzz subcommand

2024-04-15 22:41:54 -04:00 · 2024-04-15 22:41:54 -04:00 · e553578696
commit e553578696
parent 7f4a57817d
24 changed files with 827 additions and 360 deletions
--- a/cli/src/fuzz/allocations.rs
+++ b/cli/src/fuzz/allocations.rs
@ -0,0 +1,122 @@
+use std::{
+    collections::HashMap,
+    os::raw::c_void,
+    sync::{
+        atomic::{AtomicBool, AtomicUsize, Ordering::SeqCst},
+        Mutex,
+    },
+};
+
+#[ctor::ctor]
+unsafe fn initialize_allocation_recording() {
+    tree_sitter::set_allocator(
+        Some(ts_record_malloc),
+        Some(ts_record_calloc),
+        Some(ts_record_realloc),
+        Some(ts_record_free),
+    );
+}
+
+#[derive(Debug, PartialEq, Eq, Hash)]
+struct Allocation(*const c_void);
+unsafe impl Send for Allocation {}
+unsafe impl Sync for Allocation {}
+
+#[derive(Default)]
+struct AllocationRecorder {
+    enabled: AtomicBool,
+    allocation_count: AtomicUsize,
+    outstanding_allocations: Mutex<HashMap<Allocation, usize>>,
+}
+
+thread_local! {
+    static RECORDER: AllocationRecorder = AllocationRecorder::default();
+}
+
+extern "C" {
+    fn malloc(size: usize) -> *mut c_void;
+    fn calloc(count: usize, size: usize) -> *mut c_void;
+    fn realloc(ptr: *mut c_void, size: usize) -> *mut c_void;
+    fn free(ptr: *mut c_void);
+}
+
+pub fn record<T>(f: impl FnOnce() -> T) -> Result<T, String> {
+    RECORDER.with(|recorder| {
+        recorder.enabled.store(true, SeqCst);
+        recorder.allocation_count.store(0, SeqCst);
+        recorder.outstanding_allocations.lock().unwrap().clear();
+    });
+
+    let value = f();
+
+    let outstanding_allocation_indices = RECORDER.with(|recorder| {
+        recorder.enabled.store(false, SeqCst);
+        recorder.allocation_count.store(0, SeqCst);
+        recorder
+            .outstanding_allocations
+            .lock()
+            .unwrap()
+            .drain()
+            .map(|e| e.1)
+            .collect::<Vec<_>>()
+    });
+    if !outstanding_allocation_indices.is_empty() {
+        return Err(format!(
+            "Leaked allocation indices: {outstanding_allocation_indices:?}",
+        ));
+    }
+    Ok(value)
+}
+
+fn record_alloc(ptr: *mut c_void) {
+    RECORDER.with(|recorder| {
+        if recorder.enabled.load(SeqCst) {
+            let count = recorder.allocation_count.fetch_add(1, SeqCst);
+            recorder
+                .outstanding_allocations
+                .lock()
+                .unwrap()
+                .insert(Allocation(ptr), count);
+        }
+    });
+}
+
+fn record_dealloc(ptr: *mut c_void) {
+    RECORDER.with(|recorder| {
+        if recorder.enabled.load(SeqCst) {
+            recorder
+                .outstanding_allocations
+                .lock()
+                .unwrap()
+                .remove(&Allocation(ptr));
+        }
+    });
+}
+
+unsafe extern "C" fn ts_record_malloc(size: usize) -> *mut c_void {
+    let result = malloc(size);
+    record_alloc(result);
+    result
+}
+
+unsafe extern "C" fn ts_record_calloc(count: usize, size: usize) -> *mut c_void {
+    let result = calloc(count, size);
+    record_alloc(result);
+    result
+}
+
+unsafe extern "C" fn ts_record_realloc(ptr: *mut c_void, size: usize) -> *mut c_void {
+    let result = realloc(ptr, size);
+    if ptr.is_null() {
+        record_alloc(result);
+    } else if ptr != result {
+        record_dealloc(ptr);
+        record_alloc(result);
+    }
+    result
+}
+
+unsafe extern "C" fn ts_record_free(ptr: *mut c_void) {
+    record_dealloc(ptr);
+    free(ptr);
+}
--- a/cli/src/fuzz/corpus_test.rs
+++ b/cli/src/fuzz/corpus_test.rs
@ -0,0 +1,147 @@
+use tree_sitter::{LogType, Node, Parser, Point, Range, Tree};
+
+use super::{scope_sequence::ScopeSequence, LOG_ENABLED, LOG_GRAPH_ENABLED};
+use crate::util;
+
+pub fn check_consistent_sizes(tree: &Tree, input: &[u8]) {
+    fn check(node: Node, line_offsets: &[usize]) {
+        let start_byte = node.start_byte();
+        let end_byte = node.end_byte();
+        let start_point = node.start_position();
+        let end_point = node.end_position();
+
+        assert!(start_byte <= end_byte);
+        assert!(start_point <= end_point);
+        assert_eq!(
+            start_byte,
+            line_offsets[start_point.row] + start_point.column
+        );
+        assert_eq!(end_byte, line_offsets[end_point.row] + end_point.column);
+
+        let mut last_child_end_byte = start_byte;
+        let mut last_child_end_point = start_point;
+        let mut some_child_has_changes = false;
+        let mut actual_named_child_count = 0;
+        for i in 0..node.child_count() {
+            let child = node.child(i).unwrap();
+            assert!(child.start_byte() >= last_child_end_byte);
+            assert!(child.start_position() >= last_child_end_point);
+            check(child, line_offsets);
+            if child.has_changes() {
+                some_child_has_changes = true;
+            }
+            if child.is_named() {
+                actual_named_child_count += 1;
+            }
+            last_child_end_byte = child.end_byte();
+            last_child_end_point = child.end_position();
+        }
+
+        assert_eq!(actual_named_child_count, node.named_child_count());
+
+        if node.child_count() > 0 {
+            assert!(end_byte >= last_child_end_byte);
+            assert!(end_point >= last_child_end_point);
+        }
+
+        if some_child_has_changes {
+            assert!(node.has_changes());
+        }
+    }
+
+    let mut line_offsets = vec![0];
+    for (i, c) in input.iter().enumerate() {
+        if *c == b'\n' {
+            line_offsets.push(i + 1);
+        }
+    }
+
+    check(tree.root_node(), &line_offsets);
+}
+
+pub fn check_changed_ranges(old_tree: &Tree, new_tree: &Tree, input: &[u8]) -> Result<(), String> {
+    let changed_ranges = old_tree.changed_ranges(new_tree).collect::<Vec<_>>();
+    let old_scope_sequence = ScopeSequence::new(old_tree);
+    let new_scope_sequence = ScopeSequence::new(new_tree);
+
+    let old_range = old_tree.root_node().range();
+    let new_range = new_tree.root_node().range();
+
+    let byte_range =
+        old_range.start_byte.min(new_range.start_byte)..old_range.end_byte.max(new_range.end_byte);
+    let point_range = old_range.start_point.min(new_range.start_point)
+        ..old_range.end_point.max(new_range.end_point);
+
+    for range in &changed_ranges {
+        if range.end_byte > byte_range.end || range.end_point > point_range.end {
+            return Err(format!(
+                "changed range extends outside of the old and new trees {range:?}",
+            ));
+        }
+    }
+
+    old_scope_sequence.check_changes(&new_scope_sequence, input, &changed_ranges)
+}
+
+pub fn set_included_ranges(parser: &mut Parser, input: &[u8], delimiters: Option<(&str, &str)>) {
+    if let Some((start, end)) = delimiters {
+        let mut ranges = Vec::new();
+        let mut ix = 0;
+        while ix < input.len() {
+            let Some(mut start_ix) = input[ix..]
+                .windows(2)
+                .position(|win| win == start.as_bytes())
+            else {
+                break;
+            };
+            start_ix += ix + start.len();
+            let end_ix = input[start_ix..]
+                .windows(2)
+                .position(|win| win == end.as_bytes())
+                .map_or(input.len(), |ix| start_ix + ix);
+            ix = end_ix;
+            ranges.push(Range {
+                start_byte: start_ix,
+                end_byte: end_ix,
+                start_point: point_for_offset(input, start_ix),
+                end_point: point_for_offset(input, end_ix),
+            });
+        }
+
+        parser.set_included_ranges(&ranges).unwrap();
+    } else {
+        parser.set_included_ranges(&[]).unwrap();
+    }
+}
+
+fn point_for_offset(text: &[u8], offset: usize) -> Point {
+    let mut point = Point::default();
+    for byte in &text[..offset] {
+        if *byte == b'\n' {
+            point.row += 1;
+            point.column = 0;
+        } else {
+            point.column += 1;
+        }
+    }
+    point
+}
+
+pub fn get_parser(session: &mut Option<util::LogSession>, log_filename: &str) -> Parser {
+    let mut parser = Parser::new();
+
+    if *LOG_ENABLED {
+        parser.set_logger(Some(Box::new(|log_type, msg| {
+            if log_type == LogType::Lex {
+                eprintln!("  {msg}");
+            } else {
+                eprintln!("{msg}");
+            }
+        })));
+    }
+    if *LOG_GRAPH_ENABLED {
+        *session = Some(util::log_graphs(&mut parser, log_filename, false).unwrap());
+    }
+
+    parser
+}
--- a/cli/src/fuzz/edits.rs
+++ b/cli/src/fuzz/edits.rs
@ -0,0 +1,60 @@
+use super::random::Rand;
+
+#[derive(Debug)]
+pub struct Edit {
+    pub position: usize,
+    pub deleted_length: usize,
+    pub inserted_text: Vec<u8>,
+}
+
+pub fn invert_edit(input: &[u8], edit: &Edit) -> Edit {
+    let position = edit.position;
+    let removed_content = &input[position..(position + edit.deleted_length)];
+    Edit {
+        position,
+        deleted_length: edit.inserted_text.len(),
+        inserted_text: removed_content.to_vec(),
+    }
+}
+
+pub fn get_random_edit(rand: &mut Rand, input: &[u8]) -> Edit {
+    let choice = rand.unsigned(10);
+    if choice < 2 {
+        // Insert text at end
+        let inserted_text = rand.words(3);
+        Edit {
+            position: input.len(),
+            deleted_length: 0,
+            inserted_text,
+        }
+    } else if choice < 5 {
+        // Delete text from the end
+        let deleted_length = rand.unsigned(30).min(input.len());
+        Edit {
+            position: input.len() - deleted_length,
+            deleted_length,
+            inserted_text: vec![],
+        }
+    } else if choice < 8 {
+        // Insert at a random position
+        let position = rand.unsigned(input.len());
+        let word_count = 1 + rand.unsigned(3);
+        let inserted_text = rand.words(word_count);
+        Edit {
+            position,
+            deleted_length: 0,
+            inserted_text,
+        }
+    } else {
+        // Replace at random position
+        let position = rand.unsigned(input.len());
+        let deleted_length = rand.unsigned(input.len() - position);
+        let word_count = 1 + rand.unsigned(3);
+        let inserted_text = rand.words(word_count);
+        Edit {
+            position,
+            deleted_length,
+            inserted_text,
+        }
+    }
+}
--- a/cli/src/fuzz/mod.rs
+++ b/cli/src/fuzz/mod.rs
@ -0,0 +1,349 @@
+use std::{collections::HashMap, env, fs, path::Path};
+
+use lazy_static::lazy_static;
+use rand::Rng;
+use regex::Regex;
+use tree_sitter::{Language, Parser};
+
+pub mod allocations;
+pub mod corpus_test;
+pub mod edits;
+pub mod random;
+pub mod scope_sequence;
+
+use crate::{
+    fuzz::{
+        corpus_test::{
+            check_changed_ranges, check_consistent_sizes, get_parser, set_included_ranges,
+        },
+        edits::{get_random_edit, invert_edit},
+        random::Rand,
+    },
+    parse::perform_edit,
+    test::{parse_tests, print_diff, print_diff_key, strip_sexp_fields, TestEntry},
+};
+
+lazy_static! {
+    pub static ref LOG_ENABLED: bool = env::var("TREE_SITTER_LOG").is_ok();
+    pub static ref LOG_GRAPH_ENABLED: bool = env::var("TREE_SITTER_LOG_GRAPHS").is_ok();
+    pub static ref LANGUAGE_FILTER: Option<String> = env::var("TREE_SITTER_LANGUAGE").ok();
+    pub static ref EXAMPLE_FILTER: Option<Regex> = regex_env_var("TREE_SITTER_EXAMPLE");
+    pub static ref START_SEED: usize = new_seed();
+    pub static ref EDIT_COUNT: usize = int_env_var("TREE_SITTER_EDITS").unwrap_or(3);
+    pub static ref ITERATION_COUNT: usize = int_env_var("TREE_SITTER_ITERATIONS").unwrap_or(10);
+}
+
+fn int_env_var(name: &'static str) -> Option<usize> {
+    env::var(name).ok().and_then(|e| e.parse().ok())
+}
+
+fn regex_env_var(name: &'static str) -> Option<Regex> {
+    env::var(name).ok().and_then(|e| Regex::new(&e).ok())
+}
+
+pub fn new_seed() -> usize {
+    int_env_var("TREE_SITTER_SEED").unwrap_or_else(|| {
+        let mut rng = rand::thread_rng();
+        rng.gen::<usize>()
+    })
+}
+
+pub struct FuzzOptions {
+    pub skipped: Option<Vec<String>>,
+    pub subdir: Option<String>,
+    pub edits: usize,
+    pub iterations: usize,
+    pub filter: Option<Regex>,
+    pub log_graphs: bool,
+    pub log: bool,
+}
+
+pub fn fuzz_language_corpus(
+    language: &Language,
+    language_name: &str,
+    start_seed: usize,
+    grammar_dir: &Path,
+    options: &mut FuzzOptions,
+) {
+    let subdir = options.subdir.take().unwrap_or_default();
+
+    let corpus_dir = grammar_dir.join(subdir).join("test").join("corpus");
+
+    if !corpus_dir.exists() || !corpus_dir.is_dir() {
+        eprintln!("No corpus directory found, ensure that you have a `test/corpus` directory in your grammar directory with at least one test file.");
+        return;
+    }
+
+    if std::fs::read_dir(&corpus_dir).unwrap().count() == 0 {
+        eprintln!("No corpus files found in `test/corpus`, ensure that you have at least one test file in your corpus directory.");
+        return;
+    }
+
+    fn retain(entry: &mut TestEntry, language_name: &str) -> bool {
+        match entry {
+            TestEntry::Example { attributes, .. } => {
+                attributes.languages[0].is_empty()
+                    || attributes
+                        .languages
+                        .iter()
+                        .any(|lang| lang.as_ref() == language_name)
+            }
+            TestEntry::Group {
+                ref mut children, ..
+            } => {
+                children.retain_mut(|child| retain(child, language_name));
+                !children.is_empty()
+            }
+        }
+    }
+
+    let mut main_tests = parse_tests(&corpus_dir).unwrap();
+    match main_tests {
+        TestEntry::Group {
+            ref mut children, ..
+        } => {
+            children.retain_mut(|child| retain(child, language_name));
+        }
+        _ => unreachable!(),
+    }
+    let tests = flatten_tests(main_tests, options.filter.as_ref());
+
+    let mut skipped = options.skipped.as_ref().map(|x| {
+        x.iter()
+            .map(|x| (x.as_str(), 0))
+            .collect::<HashMap<&str, usize>>()
+    });
+
+    let mut failure_count = 0;
+
+    let log_seed = env::var("TREE_SITTER_LOG_SEED").is_ok();
+    let dump_edits = env::var("TREE_SITTER_DUMP_EDITS").is_ok();
+
+    if log_seed {
+        println!("  start seed: {start_seed}");
+    }
+
+    println!();
+    for (test_index, test) in tests.iter().enumerate() {
+        let test_name = format!("{language_name} - {}", test.name);
+        if let Some(skipped) = skipped.as_mut() {
+            if let Some(counter) = skipped.get_mut(test_name.as_str()) {
+                println!("  {test_index}. {test_name} - SKIPPED");
+                *counter += 1;
+                continue;
+            }
+        }
+
+        println!("  {test_index}. {test_name}");
+
+        let passed = allocations::record(|| {
+            let mut log_session = None;
+            let mut parser = get_parser(&mut log_session, "log.html");
+            parser.set_language(language).unwrap();
+            set_included_ranges(&mut parser, &test.input, test.template_delimiters);
+
+            let tree = parser.parse(&test.input, None).unwrap();
+            let mut actual_output = tree.root_node().to_sexp();
+            if !test.has_fields {
+                actual_output = strip_sexp_fields(&actual_output);
+            }
+
+            if actual_output != test.output {
+                println!("Incorrect initial parse for {test_name}");
+                print_diff_key();
+                print_diff(&actual_output, &test.output, true);
+                println!();
+                return false;
+            }
+
+            true
+        })
+        .unwrap_or_else(|e| {
+            eprintln!("Error: {e}");
+            false
+        });
+
+        if !passed {
+            failure_count += 1;
+            continue;
+        }
+
+        let mut parser = Parser::new();
+        parser.set_language(language).unwrap();
+        let tree = parser.parse(&test.input, None).unwrap();
+        drop(parser);
+
+        for trial in 0..options.iterations {
+            let seed = start_seed + trial;
+            let passed = allocations::record(|| {
+                let mut rand = Rand::new(seed);
+                let mut log_session = None;
+                let mut parser = get_parser(&mut log_session, "log.html");
+                parser.set_language(language).unwrap();
+                let mut tree = tree.clone();
+                let mut input = test.input.clone();
+
+                if options.log_graphs {
+                    eprintln!("{}\n", String::from_utf8_lossy(&input));
+                }
+
+                // Perform a random series of edits and reparse.
+                let mut undo_stack = Vec::new();
+                for _ in 0..=rand.unsigned(*EDIT_COUNT) {
+                    let edit = get_random_edit(&mut rand, &input);
+                    undo_stack.push(invert_edit(&input, &edit));
+                    perform_edit(&mut tree, &mut input, &edit).unwrap();
+                }
+
+                if log_seed {
+                    println!("   {test_index}.{trial:<2} seed: {seed}");
+                }
+
+                if dump_edits {
+                    fs::create_dir_all("fuzz").unwrap();
+                    fs::write(
+                        Path::new("fuzz")
+                            .join(format!("edit.{seed}.{test_index}.{trial} {test_name}")),
+                        &input,
+                    )
+                    .unwrap();
+                }
+
+                if options.log_graphs {
+                    eprintln!("{}\n", String::from_utf8_lossy(&input));
+                }
+
+                set_included_ranges(&mut parser, &input, test.template_delimiters);
+                let mut tree2 = parser.parse(&input, Some(&tree)).unwrap();
+
+                // Check that the new tree is consistent.
+                check_consistent_sizes(&tree2, &input);
+                if let Err(message) = check_changed_ranges(&tree, &tree2, &input) {
+                    println!("\nUnexpected scope change in seed {seed} with start seed {start_seed}\n{message}\n\n",);
+                    return false;
+                }
+
+                // Undo all of the edits and re-parse again.
+                while let Some(edit) = undo_stack.pop() {
+                    perform_edit(&mut tree2, &mut input, &edit).unwrap();
+                }
+                if options.log_graphs {
+                    eprintln!("{}\n", String::from_utf8_lossy(&input));
+                }
+
+                set_included_ranges(&mut parser, &test.input, test.template_delimiters);
+                let tree3 = parser.parse(&input, Some(&tree2)).unwrap();
+
+                // Verify that the final tree matches the expectation from the corpus.
+                let mut actual_output = tree3.root_node().to_sexp();
+                if !test.has_fields {
+                    actual_output = strip_sexp_fields(&actual_output);
+                }
+
+                if actual_output != test.output {
+                    println!("Incorrect parse for {test_name} - seed {seed}");
+                    print_diff_key();
+                    print_diff(&actual_output, &test.output, true);
+                    println!();
+                    return false;
+                }
+
+                // Check that the edited tree is consistent.
+                check_consistent_sizes(&tree3, &input);
+                if let Err(message) = check_changed_ranges(&tree2, &tree3, &input) {
+                    println!("Unexpected scope change in seed {seed} with start seed {start_seed}\n{message}\n\n");
+                    return false;
+                }
+
+                true
+            }).unwrap_or_else(|e| {
+                eprintln!("Error: {e}");
+                false
+            });
+
+            if !passed {
+                failure_count += 1;
+                break;
+            }
+        }
+    }
+
+    if failure_count != 0 {
+        eprintln!("{failure_count} {language_name} corpus tests failed fuzzing");
+    }
+
+    if let Some(skipped) = skipped.as_mut() {
+        skipped.retain(|_, v| *v == 0);
+
+        if !skipped.is_empty() {
+            println!("Non matchable skip definitions:");
+            for k in skipped.keys() {
+                println!("  {k}");
+            }
+            panic!("Non matchable skip definitions needs to be removed");
+        }
+    }
+}
+
+pub struct FlattenedTest {
+    pub name: String,
+    pub input: Vec<u8>,
+    pub output: String,
+    pub languages: Vec<Box<str>>,
+    pub has_fields: bool,
+    pub template_delimiters: Option<(&'static str, &'static str)>,
+}
+
+pub fn flatten_tests(test: TestEntry, filter: Option<&Regex>) -> Vec<FlattenedTest> {
+    fn helper(
+        test: TestEntry,
+        filter: Option<&Regex>,
+        is_root: bool,
+        prefix: &str,
+        result: &mut Vec<FlattenedTest>,
+    ) {
+        match test {
+            TestEntry::Example {
+                mut name,
+                input,
+                output,
+                has_fields,
+                attributes,
+                ..
+            } => {
+                if !prefix.is_empty() {
+                    name.insert_str(0, " - ");
+                    name.insert_str(0, prefix);
+                }
+                if let Some(filter) = filter {
+                    if filter.find(&name).is_none() {
+                        return;
+                    }
+                }
+
+                result.push(FlattenedTest {
+                    name,
+                    input,
+                    output,
+                    has_fields,
+                    languages: attributes.languages,
+                    template_delimiters: None,
+                });
+            }
+            TestEntry::Group {
+                mut name, children, ..
+            } => {
+                if !is_root && !prefix.is_empty() {
+                    name.insert_str(0, " - ");
+                    name.insert_str(0, prefix);
+                }
+                for child in children {
+                    helper(child, filter, false, &name, result);
+                }
+            }
+        }
+    }
+    let mut result = Vec::new();
+    helper(test, filter, true, "", &mut result);
+    result
+}
--- a/cli/src/tests/helpers/random.rs
+++ b/cli/src/tests/helpers/random.rs
--- a/cli/src/tests/helpers/scope_sequence.rs
+++ b/cli/src/tests/helpers/scope_sequence.rs
--- a/cli/src/generate/render.rs
+++ b/cli/src/generate/render.rs
@ -258,7 +258,7 @@ impl Generator {
            let constant_name = if let Some(symbol) = symbol {
                format!("{}_character_set_{}", self.symbol_ids[symbol], count)
            } else {
-                format!("extras_character_set_{}", count)
+                format!("extras_character_set_{count}")
            };
            self.large_character_set_info.push(LargeCharacterSetInfo {
                constant_name,
@ -369,12 +369,12 @@ impl Generator {
        for symbol in &self.parse_table.symbols {
            if *symbol != Symbol::end() {
                self.symbol_order.insert(*symbol, i);
-                add_line!(self, "{} = {},", self.symbol_ids[symbol], i);
+                add_line!(self, "{} = {i},", self.symbol_ids[symbol]);
                i += 1;
            }
        }
        for alias in &self.unique_aliases {
-            add_line!(self, "{} = {},", self.alias_ids[alias], i);
+            add_line!(self, "{} = {i},", self.alias_ids[alias]);
            i += 1;
        }
        dedent!(self);
@ -393,7 +393,7 @@ impl Generator {
                        alias.value.as_str()
                    }),
            );
-            add_line!(self, "[{}] = \"{}\",", self.symbol_ids[symbol], name);
+            add_line!(self, "[{}] = \"{name}\",", self.symbol_ids[symbol]);
        }
        for alias in &self.unique_aliases {
            add_line!(
@ -450,12 +450,7 @@ impl Generator {
        indent!(self);
        add_line!(self, "[0] = NULL,");
        for field_name in &self.field_names {
-            add_line!(
-                self,
-                "[{}] = \"{}\",",
-                self.field_id(field_name),
-                field_name
-            );
+            add_line!(self, "[{}] = \"{field_name}\",", self.field_id(field_name));
        }
        dedent!(self);
        add_line!(self, "}};");
@ -473,7 +468,7 @@ impl Generator {
            indent!(self);
            if let Some(Alias { is_named, .. }) = self.default_aliases.get(symbol) {
                add_line!(self, ".visible = true,");
-                add_line!(self, ".named = {},", is_named);
+                add_line!(self, ".named = {is_named},");
            } else {
                match self.metadata_for_symbol(*symbol).1 {
                    VariableType::Named => {
@ -529,11 +524,11 @@ impl Generator {
                continue;
            }

-            add_line!(self, "[{}] = {{", i);
+            add_line!(self, "[{i}] = {{");
            indent!(self);
            for (j, alias) in production_info.alias_sequence.iter().enumerate() {
                if let Some(alias) = alias {
-                    add_line!(self, "[{}] = {},", j, self.alias_ids[alias]);
+                    add_line!(self, "[{j}] = {},", self.alias_ids[alias]);
                }
            }
            dedent!(self);
@ -1044,9 +1039,8 @@ impl Generator {
        for i in 0..self.syntax_grammar.external_tokens.len() {
            add_line!(
                self,
-                "{} = {},",
+                "{} = {i},",
                self.external_token_id(&self.syntax_grammar.external_tokens[i]),
-                i
            );
        }
        dedent!(self);
@ -1133,7 +1127,7 @@ impl Generator {
            .enumerate()
            .take(self.large_state_count)
        {
-            add_line!(self, "[{}] = {{", i);
+            add_line!(self, "[{i}] = {{");
            indent!(self);

            // Ensure the entries are in a deterministic order, since they are
--- a/cli/src/lib.rs
+++ b/cli/src/lib.rs
@ -1,5 +1,6 @@
 #![doc = include_str!("../README.md")]

+pub mod fuzz;
 pub mod generate;
 pub mod highlight;
 pub mod logger;
--- a/cli/src/main.rs
+++ b/cli/src/main.rs
@ -11,6 +11,10 @@ use glob::glob;
 use regex::Regex;
 use tree_sitter::{ffi, Parser, Point};
 use tree_sitter_cli::{
+    fuzz::{
+        fuzz_language_corpus, FuzzOptions, EDIT_COUNT, ITERATION_COUNT, LOG_ENABLED,
+        LOG_GRAPH_ENABLED, START_SEED,
+    },
    generate::{self, lookup_package_json_for_path},
    highlight, logger,
    parse::{self, ParseFileOptions, ParseOutput},
@ -36,6 +40,7 @@ enum Commands {
    BuildWasm(BuildWasm),
    Parse(Parse),
    Test(Test),
+    Fuzz(Fuzz),
    Query(Query),
    Highlight(Highlight),
    Tags(Tags),
@ -249,6 +254,25 @@ struct Test {
    pub config_path: Option<PathBuf>,
 }

+#[derive(Args)]
+#[command(about = "Fuzz a parser", alias = "f")]
+struct Fuzz {
+    #[arg(long, short, help = "List of test names to skip")]
+    pub skip: Option<Vec<String>>,
+    #[arg(long, help = "Subdirectory to the language")]
+    pub subdir: Option<String>,
+    #[arg(long, short, help = "Maximum number of edits to perform per fuzz test")]
+    pub edits: Option<usize>,
+    #[arg(long, short, help = "Number of fuzzing iterations to run per test")]
+    pub iterations: Option<usize>,
+    #[arg(long, short, help = "Regex pattern to filter tests")]
+    pub filter: Option<Regex>,
+    #[arg(long, short, help = "Enable logging of graphs and input")]
+    pub log_graphs: bool,
+    #[arg(long, short, help = "Enable parser logging")]
+    pub log: bool,
+}
+
 #[derive(Args)]
 #[command(about = "Search files using a syntax tree query", alias = "q")]
 struct Query {
@ -457,7 +481,7 @@ fn run() -> Result<()> {
                if let Some(path) = generate_options.libdir {
                    loader = loader::Loader::with_parser_lib_path(PathBuf::from(path));
                }
-                loader.use_debug_build(generate_options.debug_build);
+                loader.debug_build(generate_options.debug_build);
                loader.languages_at_path(&current_dir)?;
            }
        }
@ -507,7 +531,7 @@ fn run() -> Result<()> {
                    (false, false) => &[],
                };

-                loader.use_debug_build(build_options.debug);
+                loader.debug_build(build_options.debug);

                let config = Config::load(None)?;
                let loader_config = config.get()?;
@ -560,7 +584,7 @@ fn run() -> Result<()> {
            let cancellation_flag = util::cancel_on_signal();
            let mut parser = Parser::new();

-            loader.use_debug_build(parse_options.debug_build);
+            loader.debug_build(parse_options.debug_build);

            #[cfg(feature = "wasm")]
            if parse_options.wasm {
@ -656,7 +680,7 @@ fn run() -> Result<()> {
        Commands::Test(test_options) => {
            let config = Config::load(test_options.config_path)?;

-            loader.use_debug_build(test_options.debug_build);
+            loader.debug_build(test_options.debug_build);

            let mut parser = Parser::new();

@ -730,6 +754,33 @@ fn run() -> Result<()> {
            }
        }

+        Commands::Fuzz(fuzz_options) => {
+            loader.sanitize_build(true);
+
+            let languages = loader.languages_at_path(&current_dir)?;
+            let (language, language_name) = &languages
+                .first()
+                .ok_or_else(|| anyhow!("No language found"))?;
+
+            let mut fuzz_options = FuzzOptions {
+                skipped: fuzz_options.skip,
+                subdir: fuzz_options.subdir,
+                edits: fuzz_options.edits.unwrap_or(*EDIT_COUNT),
+                iterations: fuzz_options.iterations.unwrap_or(*ITERATION_COUNT),
+                filter: fuzz_options.filter,
+                log_graphs: fuzz_options.log_graphs || *LOG_GRAPH_ENABLED,
+                log: fuzz_options.log || *LOG_ENABLED,
+            };
+
+            fuzz_language_corpus(
+                language,
+                language_name,
+                *START_SEED,
+                &current_dir,
+                &mut fuzz_options,
+            );
+        }
+
        Commands::Query(query_options) => {
            let config = Config::load(query_options.config_path)?;
            let paths = collect_paths(query_options.paths_file.as_deref(), query_options.paths)?;
--- a/cli/src/parse.rs
+++ b/cli/src/parse.rs
@ -10,13 +10,7 @@ use anyhow::{anyhow, Context, Result};
 use tree_sitter::{ffi, InputEdit, Language, LogType, Parser, Point, Tree};

 use super::util;
-
-#[derive(Debug)]
-pub struct Edit {
-    pub position: usize,
-    pub deleted_length: usize,
-    pub inserted_text: Vec<u8>,
-}
+use crate::fuzz::edits::Edit;

 #[derive(Debug, Default)]
 pub struct Stats {
--- a/cli/src/tests/corpus_test.rs
+++ b/cli/src/tests/corpus_test.rs
@ -1,23 +1,26 @@
 use std::{collections::HashMap, env, fs};

-use tree_sitter::{LogType, Node, Parser, Point, Range, Tree};
+use tree_sitter::Parser;
 use tree_sitter_proc_macro::test_with_seed;

-use super::helpers::{
-    allocations,
-    edits::{get_random_edit, invert_edit},
-    fixtures::{fixtures_dir, get_language, get_test_language, SCRATCH_BASE_DIR},
-    new_seed,
-    random::Rand,
-    scope_sequence::ScopeSequence,
-    EDIT_COUNT, EXAMPLE_FILTER, ITERATION_COUNT, LANGUAGE_FILTER, LOG_ENABLED, LOG_GRAPH_ENABLED,
-    START_SEED,
-};
 use crate::{
+    fuzz::{
+        corpus_test::{
+            check_changed_ranges, check_consistent_sizes, get_parser, set_included_ranges,
+        },
+        edits::{get_random_edit, invert_edit},
+        flatten_tests, new_seed,
+        random::Rand,
+        EDIT_COUNT, EXAMPLE_FILTER, ITERATION_COUNT, LANGUAGE_FILTER, LOG_GRAPH_ENABLED,
+        START_SEED,
+    },
    generate,
    parse::perform_edit,
-    test::{parse_tests, print_diff, print_diff_key, strip_sexp_fields, TestEntry},
-    util,
+    test::{parse_tests, print_diff, print_diff_key, strip_sexp_fields},
+    tests::{
+        allocations,
+        helpers::fixtures::{fixtures_dir, get_language, get_test_language, SCRATCH_BASE_DIR},
+    },
 };

 #[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
@ -79,7 +82,7 @@ fn test_corpus_for_json(seed: usize) {
 #[ignore]
 #[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
 fn test_corpus_for_php(seed: usize) {
-    test_language_corpus("php", seed, None, Some("php"));
+    test_language_corpus("php", seed, None, None);
 }

 #[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
@ -107,7 +110,7 @@ fn test_corpus_for_tsx(seed: usize) {
    test_language_corpus("typescript", seed, None, Some("tsx"));
 }

-fn test_language_corpus(
+pub fn test_language_corpus(
    language_name: &str,
    start_seed: usize,
    skipped: Option<&[&str]>,
@ -120,17 +123,23 @@ fn test_language_corpus(
    let template_corpus_dir = fixtures_dir().join("template_corpus");
    let corpus_dir = grammars_dir.join(language_name).join("test").join("corpus");

+    println!("Testing {language_name} corpus @ {}", corpus_dir.display());
+
    let error_corpus_file = error_corpus_dir.join(format!("{language_name}_errors.txt"));
    let template_corpus_file = template_corpus_dir.join(format!("{language_name}_templates.txt"));
    let main_tests = parse_tests(&corpus_dir).unwrap();
    let error_tests = parse_tests(&error_corpus_file).unwrap_or_default();
    let template_tests = parse_tests(&template_corpus_file).unwrap_or_default();
-    let mut tests = flatten_tests(main_tests);
-    tests.extend(flatten_tests(error_tests));
-    tests.extend(flatten_tests(template_tests).into_iter().map(|mut t| {
-        t.template_delimiters = Some(("<%", "%>"));
-        t
-    }));
+    let mut tests = flatten_tests(main_tests, EXAMPLE_FILTER.as_ref());
+    tests.extend(flatten_tests(error_tests, EXAMPLE_FILTER.as_ref()));
+    tests.extend(
+        flatten_tests(template_tests, EXAMPLE_FILTER.as_ref())
+            .into_iter()
+            .map(|mut t| {
+                t.template_delimiters = Some(("<%", "%>"));
+                t
+            }),
+    );

    tests.retain(|t| t.languages[0].is_empty() || t.languages.contains(&Box::from(language_dir)));

@ -185,7 +194,8 @@ fn test_language_corpus(
            }

            true
-        });
+        })
+        .unwrap();

        if !passed {
            failure_count += 1;
@ -279,7 +289,7 @@ fn test_language_corpus(
                }

                true
-            });
+            }).unwrap();

            if !passed {
                failure_count += 1;
@ -367,7 +377,7 @@ fn test_feature_corpus_files() {
            let c_code = generate_result.unwrap().1;
            let language = get_test_language(language_name, &c_code, Some(&test_path));
            let test = parse_tests(&corpus_path).unwrap();
-            let tests = flatten_tests(test);
+            let tests = flatten_tests(test, EXAMPLE_FILTER.as_ref());

            if !tests.is_empty() {
                eprintln!("test language: {language_name:?}");
@ -393,7 +403,8 @@ fn test_feature_corpus_files() {
                        println!();
                        false
                    }
-                });
+                })
+                .unwrap();

                if !passed {
                    failure_count += 1;
@ -405,202 +416,3 @@ fn test_feature_corpus_files() {

    assert!(failure_count == 0, "{failure_count} corpus tests failed");
 }
-
-fn check_consistent_sizes(tree: &Tree, input: &[u8]) {
-    fn check(node: Node, line_offsets: &[usize]) {
-        let start_byte = node.start_byte();
-        let end_byte = node.end_byte();
-        let start_point = node.start_position();
-        let end_point = node.end_position();
-
-        assert!(start_byte <= end_byte);
-        assert!(start_point <= end_point);
-        assert_eq!(
-            start_byte,
-            line_offsets[start_point.row] + start_point.column
-        );
-        assert_eq!(end_byte, line_offsets[end_point.row] + end_point.column);
-
-        let mut last_child_end_byte = start_byte;
-        let mut last_child_end_point = start_point;
-        let mut some_child_has_changes = false;
-        let mut actual_named_child_count = 0;
-        for i in 0..node.child_count() {
-            let child = node.child(i).unwrap();
-            assert!(child.start_byte() >= last_child_end_byte);
-            assert!(child.start_position() >= last_child_end_point);
-            check(child, line_offsets);
-            if child.has_changes() {
-                some_child_has_changes = true;
-            }
-            if child.is_named() {
-                actual_named_child_count += 1;
-            }
-            last_child_end_byte = child.end_byte();
-            last_child_end_point = child.end_position();
-        }
-
-        assert_eq!(actual_named_child_count, node.named_child_count());
-
-        if node.child_count() > 0 {
-            assert!(end_byte >= last_child_end_byte);
-            assert!(end_point >= last_child_end_point);
-        }
-
-        if some_child_has_changes {
-            assert!(node.has_changes());
-        }
-    }
-
-    let mut line_offsets = vec![0];
-    for (i, c) in input.iter().enumerate() {
-        if *c == b'\n' {
-            line_offsets.push(i + 1);
-        }
-    }
-
-    check(tree.root_node(), &line_offsets);
-}
-
-fn check_changed_ranges(old_tree: &Tree, new_tree: &Tree, input: &[u8]) -> Result<(), String> {
-    let changed_ranges = old_tree.changed_ranges(new_tree).collect::<Vec<_>>();
-    let old_scope_sequence = ScopeSequence::new(old_tree);
-    let new_scope_sequence = ScopeSequence::new(new_tree);
-
-    let old_range = old_tree.root_node().range();
-    let new_range = new_tree.root_node().range();
-
-    let byte_range =
-        old_range.start_byte.min(new_range.start_byte)..old_range.end_byte.max(new_range.end_byte);
-    let point_range = old_range.start_point.min(new_range.start_point)
-        ..old_range.end_point.max(new_range.end_point);
-
-    for range in &changed_ranges {
-        if range.end_byte > byte_range.end || range.end_point > point_range.end {
-            return Err(format!(
-                "changed range extends outside of the old and new trees {range:?}",
-            ));
-        }
-    }
-
-    old_scope_sequence.check_changes(&new_scope_sequence, input, &changed_ranges)
-}
-
-fn set_included_ranges(parser: &mut Parser, input: &[u8], delimiters: Option<(&str, &str)>) {
-    if let Some((start, end)) = delimiters {
-        let mut ranges = Vec::new();
-        let mut ix = 0;
-        while ix < input.len() {
-            let Some(mut start_ix) = input[ix..]
-                .windows(2)
-                .position(|win| win == start.as_bytes())
-            else {
-                break;
-            };
-            start_ix += ix + start.len();
-            let end_ix = input[start_ix..]
-                .windows(2)
-                .position(|win| win == end.as_bytes())
-                .map_or(input.len(), |ix| start_ix + ix);
-            ix = end_ix;
-            ranges.push(Range {
-                start_byte: start_ix,
-                end_byte: end_ix,
-                start_point: point_for_offset(input, start_ix),
-                end_point: point_for_offset(input, end_ix),
-            });
-        }
-
-        parser.set_included_ranges(&ranges).unwrap();
-    } else {
-        parser.set_included_ranges(&[]).unwrap();
-    }
-}
-
-fn point_for_offset(text: &[u8], offset: usize) -> Point {
-    let mut point = Point::default();
-    for byte in &text[..offset] {
-        if *byte == b'\n' {
-            point.row += 1;
-            point.column = 0;
-        } else {
-            point.column += 1;
-        }
-    }
-    point
-}
-
-fn get_parser(session: &mut Option<util::LogSession>, log_filename: &str) -> Parser {
-    let mut parser = Parser::new();
-
-    if *LOG_ENABLED {
-        parser.set_logger(Some(Box::new(|log_type, msg| {
-            if log_type == LogType::Lex {
-                eprintln!("  {msg}");
-            } else {
-                eprintln!("{msg}");
-            }
-        })));
-    } else if *LOG_GRAPH_ENABLED {
-        *session = Some(util::log_graphs(&mut parser, log_filename, false).unwrap());
-    }
-
-    parser
-}
-
-struct FlattenedTest {
-    name: String,
-    input: Vec<u8>,
-    output: String,
-    languages: Vec<Box<str>>,
-    has_fields: bool,
-    template_delimiters: Option<(&'static str, &'static str)>,
-}
-
-fn flatten_tests(test: TestEntry) -> Vec<FlattenedTest> {
-    fn helper(test: TestEntry, is_root: bool, prefix: &str, result: &mut Vec<FlattenedTest>) {
-        match test {
-            TestEntry::Example {
-                mut name,
-                input,
-                output,
-                has_fields,
-                attributes,
-                ..
-            } => {
-                if !prefix.is_empty() {
-                    name.insert_str(0, " - ");
-                    name.insert_str(0, prefix);
-                }
-                if let Some(filter) = EXAMPLE_FILTER.as_ref() {
-                    if !name.contains(filter.as_str()) {
-                        return;
-                    }
-                }
-
-                result.push(FlattenedTest {
-                    name,
-                    input,
-                    output,
-                    has_fields,
-                    languages: attributes.languages,
-                    template_delimiters: None,
-                });
-            }
-            TestEntry::Group {
-                mut name, children, ..
-            } => {
-                if !is_root && !prefix.is_empty() {
-                    name.insert_str(0, " - ");
-                    name.insert_str(0, prefix);
-                }
-                for child in children {
-                    helper(child, false, &name, result);
-                }
-            }
-        }
-    }
-    let mut result = Vec::new();
-    helper(test, true, "", &mut result);
-    result
-}
--- a/cli/src/tests/helpers/edits.rs
+++ b/cli/src/tests/helpers/edits.rs
@ -1,8 +1,5 @@
 use std::{ops::Range, str};

-use super::random::Rand;
-use crate::parse::Edit;
-
 #[derive(Debug)]
 pub struct ReadRecorder<'a> {
    content: &'a [u8],
@ -50,55 +47,3 @@ impl<'a> ReadRecorder<'a> {
        result
    }
 }
-
-pub fn invert_edit(input: &[u8], edit: &Edit) -> Edit {
-    let position = edit.position;
-    let removed_content = &input[position..(position + edit.deleted_length)];
-    Edit {
-        position,
-        deleted_length: edit.inserted_text.len(),
-        inserted_text: removed_content.to_vec(),
-    }
-}
-
-pub fn get_random_edit(rand: &mut Rand, input: &[u8]) -> Edit {
-    let choice = rand.unsigned(10);
-    if choice < 2 {
-        // Insert text at end
-        let inserted_text = rand.words(3);
-        Edit {
-            position: input.len(),
-            deleted_length: 0,
-            inserted_text,
-        }
-    } else if choice < 5 {
-        // Delete text from the end
-        let deleted_length = rand.unsigned(30).min(input.len());
-        Edit {
-            position: input.len() - deleted_length,
-            deleted_length,
-            inserted_text: vec![],
-        }
-    } else if choice < 8 {
-        // Insert at a random position
-        let position = rand.unsigned(input.len());
-        let word_count = 1 + rand.unsigned(3);
-        let inserted_text = rand.words(word_count);
-        Edit {
-            position,
-            deleted_length: 0,
-            inserted_text,
-        }
-    } else {
-        // Replace at random position
-        let position = rand.unsigned(input.len());
-        let deleted_length = rand.unsigned(input.len() - position);
-        let word_count = 1 + rand.unsigned(3);
-        let inserted_text = rand.words(word_count);
-        Edit {
-            position,
-            deleted_length,
-            inserted_text,
-        }
-    }
-}
--- a/cli/src/tests/helpers/fixtures.rs
+++ b/cli/src/tests/helpers/fixtures.rs
@ -18,7 +18,7 @@ lazy_static! {
    static ref TEST_LOADER: Loader = {
        let mut loader = Loader::with_parser_lib_path(SCRATCH_DIR.clone());
        if env::var("TREE_SITTER_GRAMMAR_DEBUG").is_ok() {
-            loader.use_debug_build(true);
+            loader.debug_build(true);
        }
        loader
    };
--- a/cli/src/tests/helpers/mod.rs
+++ b/cli/src/tests/helpers/mod.rs
@ -1,35 +1,4 @@
-pub(super) mod allocations;
-pub(super) mod edits;
+pub mod allocations;
+pub mod edits;
 pub(super) mod fixtures;
 pub(super) mod query_helpers;
-pub(super) mod random;
-pub(super) mod scope_sequence;
-
-use std::env;
-
-use lazy_static::lazy_static;
-use rand::Rng;
-
-lazy_static! {
-    pub static ref LOG_ENABLED: bool = env::var("TREE_SITTER_LOG").is_ok();
-    pub static ref LOG_GRAPH_ENABLED: bool = env::var("TREE_SITTER_LOG_GRAPHS").is_ok();
-    pub static ref LANGUAGE_FILTER: Option<String> = env::var("TREE_SITTER_LANGUAGE").ok();
-    pub static ref EXAMPLE_FILTER: Option<String> = env::var("TREE_SITTER_EXAMPLE").ok();
-}
-
-lazy_static! {
-    pub static ref START_SEED: usize = new_seed();
-    pub static ref EDIT_COUNT: usize = int_env_var("TREE_SITTER_EDITS").unwrap_or(3);
-    pub static ref ITERATION_COUNT: usize = int_env_var("TREE_SITTER_ITERATIONS").unwrap_or(10);
-}
-
-fn int_env_var(name: &'static str) -> Option<usize> {
-    env::var(name).ok().and_then(|e| e.parse().ok())
-}
-
-pub fn new_seed() -> usize {
-    int_env_var("TREE_SITTER_SEED").unwrap_or_else(|| {
-        let mut rng = rand::thread_rng();
-        rng.gen::<usize>()
-    })
-}
--- a/cli/src/tests/mod.rs
+++ b/cli/src/tests/mod.rs
@ -17,3 +17,10 @@ mod tree_test;

 #[cfg(feature = "wasm")]
 mod wasm_language_test;
+
+pub use crate::fuzz::{
+    allocations,
+    edits::{get_random_edit, invert_edit},
+    random::Rand,
+    ITERATION_COUNT,
+};
--- a/cli/src/tests/node_test.rs
+++ b/cli/src/tests/node_test.rs
@ -1,9 +1,9 @@
 use tree_sitter::{Node, Parser, Point, Tree};

-use super::helpers::{
-    edits::get_random_edit,
-    fixtures::{fixtures_dir, get_language, get_test_language},
-    random::Rand,
+use super::{
+    get_random_edit,
+    helpers::fixtures::{fixtures_dir, get_language, get_test_language},
+    Rand,
 };
 use crate::{
    generate::{generate_parser_for_grammar, load_grammar_file},
--- a/cli/src/tests/parser_test.rs
+++ b/cli/src/tests/parser_test.rs
@ -8,13 +8,14 @@ use tree_sitter_proc_macro::retry;

 use super::helpers::{
    allocations,
-    edits::{invert_edit, ReadRecorder},
+    edits::ReadRecorder,
    fixtures::{get_language, get_test_language},
 };
 use crate::{
+    fuzz::edits::Edit,
    generate::{generate_parser_for_grammar, load_grammar_file},
-    parse::{perform_edit, Edit},
-    tests::helpers::fixtures::fixtures_dir,
+    parse::perform_edit,
+    tests::{helpers::fixtures::fixtures_dir, invert_edit},
 };

 #[test]
--- a/cli/src/tests/query_test.rs
+++ b/cli/src/tests/query_test.rs
@ -13,11 +13,13 @@ use super::helpers::{
    allocations,
    fixtures::{get_language, get_test_language},
    query_helpers::{assert_query_matches, Match, Pattern},
-    ITERATION_COUNT,
 };
 use crate::{
    generate::generate_parser_for_grammar,
-    tests::helpers::query_helpers::{collect_captures, collect_matches},
+    tests::{
+        helpers::query_helpers::{collect_captures, collect_matches},
+        ITERATION_COUNT,
+    },
 };

 lazy_static! {
--- a/cli/src/tests/tree_test.rs
+++ b/cli/src/tests/tree_test.rs
@ -2,8 +2,8 @@ use std::str;

 use tree_sitter::{InputEdit, Parser, Point, Range, Tree};

-use super::helpers::{edits::invert_edit, fixtures::get_language};
-use crate::parse::{perform_edit, Edit};
+use super::helpers::fixtures::get_language;
+use crate::{fuzz::edits::Edit, parse::perform_edit, tests::invert_edit};

 #[test]
 fn test_tree_edit() {