Reorganize rust crates into a flat crates directory, simplify some CI steps (#4496)

* Move all rust crates (except lib) into crates dir, w/o nesting * Remove stale path from .gitattributes * Rename lib.rs files for easier navigation * Rename mod.rs file for easier navigation * Fix emscripten-version path * Fix fixtures dir paths * Use the default rustfmt settings * Don't use nightly on CI
2025-06-06 14:25:37 -07:00 · 2025-06-06 14:25:37 -07:00 · 0fdf569571
commit 0fdf569571
parent a6e530b33d
163 changed files with 69 additions and 89 deletions
--- a/crates/cli/src/fuzz.rs
+++ b/crates/cli/src/fuzz.rs
@ -0,0 +1,394 @@
+use std::{
+    collections::HashMap,
+    env, fs,
+    path::{Path, PathBuf},
+    sync::LazyLock,
+};
+
+use rand::Rng;
+use regex::Regex;
+use tree_sitter::{Language, Parser};
+
+pub mod allocations;
+pub mod corpus_test;
+pub mod edits;
+pub mod random;
+pub mod scope_sequence;
+
+use crate::{
+    fuzz::{
+        corpus_test::{
+            check_changed_ranges, check_consistent_sizes, get_parser, set_included_ranges,
+        },
+        edits::{get_random_edit, invert_edit},
+        random::Rand,
+    },
+    parse::perform_edit,
+    test::{parse_tests, print_diff, print_diff_key, strip_sexp_fields, TestEntry},
+};
+
+pub static LOG_ENABLED: LazyLock<bool> = LazyLock::new(|| env::var("TREE_SITTER_LOG").is_ok());
+
+pub static LOG_GRAPH_ENABLED: LazyLock<bool> =
+    LazyLock::new(|| env::var("TREE_SITTER_LOG_GRAPHS").is_ok());
+
+pub static LANGUAGE_FILTER: LazyLock<Option<String>> =
+    LazyLock::new(|| env::var("TREE_SITTER_LANGUAGE").ok());
+
+pub static EXAMPLE_INCLUDE: LazyLock<Option<Regex>> =
+    LazyLock::new(|| regex_env_var("TREE_SITTER_EXAMPLE_INCLUDE"));
+
+pub static EXAMPLE_EXCLUDE: LazyLock<Option<Regex>> =
+    LazyLock::new(|| regex_env_var("TREE_SITTER_EXAMPLE_EXCLUDE"));
+
+pub static START_SEED: LazyLock<usize> = LazyLock::new(new_seed);
+
+pub static EDIT_COUNT: LazyLock<usize> =
+    LazyLock::new(|| int_env_var("TREE_SITTER_EDITS").unwrap_or(3));
+
+pub static ITERATION_COUNT: LazyLock<usize> =
+    LazyLock::new(|| int_env_var("TREE_SITTER_ITERATIONS").unwrap_or(10));
+
+fn int_env_var(name: &'static str) -> Option<usize> {
+    env::var(name).ok().and_then(|e| e.parse().ok())
+}
+
+fn regex_env_var(name: &'static str) -> Option<Regex> {
+    env::var(name).ok().and_then(|e| Regex::new(&e).ok())
+}
+
+#[must_use]
+pub fn new_seed() -> usize {
+    int_env_var("TREE_SITTER_SEED").unwrap_or_else(|| {
+        let mut rng = rand::thread_rng();
+        let seed = rng.gen::<usize>();
+        eprintln!("Seed: {seed}");
+        seed
+    })
+}
+
+pub struct FuzzOptions {
+    pub skipped: Option<Vec<String>>,
+    pub subdir: Option<PathBuf>,
+    pub edits: usize,
+    pub iterations: usize,
+    pub include: Option<Regex>,
+    pub exclude: Option<Regex>,
+    pub log_graphs: bool,
+    pub log: bool,
+}
+
+pub fn fuzz_language_corpus(
+    language: &Language,
+    language_name: &str,
+    start_seed: usize,
+    grammar_dir: &Path,
+    options: &mut FuzzOptions,
+) {
+    fn retain(entry: &mut TestEntry, language_name: &str) -> bool {
+        match entry {
+            TestEntry::Example { attributes, .. } => {
+                attributes.languages[0].is_empty()
+                    || attributes
+                        .languages
+                        .iter()
+                        .any(|lang| lang.as_ref() == language_name)
+            }
+            TestEntry::Group {
+                ref mut children, ..
+            } => {
+                children.retain_mut(|child| retain(child, language_name));
+                !children.is_empty()
+            }
+        }
+    }
+
+    let subdir = options.subdir.take().unwrap_or_default();
+
+    let corpus_dir = grammar_dir.join(subdir).join("test").join("corpus");
+
+    if !corpus_dir.exists() || !corpus_dir.is_dir() {
+        eprintln!("No corpus directory found, ensure that you have a `test/corpus` directory in your grammar directory with at least one test file.");
+        return;
+    }
+
+    if std::fs::read_dir(&corpus_dir).unwrap().count() == 0 {
+        eprintln!("No corpus files found in `test/corpus`, ensure that you have at least one test file in your corpus directory.");
+        return;
+    }
+
+    let mut main_tests = parse_tests(&corpus_dir).unwrap();
+    match main_tests {
+        TestEntry::Group {
+            ref mut children, ..
+        } => {
+            children.retain_mut(|child| retain(child, language_name));
+        }
+        TestEntry::Example { .. } => unreachable!(),
+    }
+    let tests = flatten_tests(
+        main_tests,
+        options.include.as_ref(),
+        options.exclude.as_ref(),
+    );
+
+    let get_test_name = |test: &FlattenedTest| format!("{language_name} - {}", test.name);
+
+    let mut skipped = options
+        .skipped
+        .take()
+        .unwrap_or_default()
+        .into_iter()
+        .chain(tests.iter().filter(|x| x.skip).map(get_test_name))
+        .map(|x| (x, 0))
+        .collect::<HashMap<String, usize>>();
+
+    let mut failure_count = 0;
+
+    let log_seed = env::var("TREE_SITTER_LOG_SEED").is_ok();
+    let dump_edits = env::var("TREE_SITTER_DUMP_EDITS").is_ok();
+
+    if log_seed {
+        println!("  start seed: {start_seed}");
+    }
+
+    println!();
+    for (test_index, test) in tests.iter().enumerate() {
+        let test_name = get_test_name(test);
+        if let Some(counter) = skipped.get_mut(test_name.as_str()) {
+            println!("  {test_index}. {test_name} - SKIPPED");
+            *counter += 1;
+            continue;
+        }
+
+        println!("  {test_index}. {test_name}");
+
+        let passed = allocations::record(|| {
+            let mut log_session = None;
+            let mut parser = get_parser(&mut log_session, "log.html");
+            parser.set_language(language).unwrap();
+            set_included_ranges(&mut parser, &test.input, test.template_delimiters);
+
+            let tree = parser.parse(&test.input, None).unwrap();
+
+            if test.error {
+                return true;
+            }
+
+            let mut actual_output = tree.root_node().to_sexp();
+            if !test.has_fields {
+                actual_output = strip_sexp_fields(&actual_output);
+            }
+
+            if actual_output != test.output {
+                println!("Incorrect initial parse for {test_name}");
+                print_diff_key();
+                print_diff(&actual_output, &test.output, true);
+                println!();
+                return false;
+            }
+
+            true
+        })
+        .unwrap_or_else(|e| {
+            eprintln!("Error: {e}");
+            false
+        });
+
+        if !passed {
+            failure_count += 1;
+            continue;
+        }
+
+        let mut parser = Parser::new();
+        parser.set_language(language).unwrap();
+        let tree = parser.parse(&test.input, None).unwrap();
+        drop(parser);
+
+        for trial in 0..options.iterations {
+            let seed = start_seed + trial;
+            let passed = allocations::record(|| {
+                let mut rand = Rand::new(seed);
+                let mut log_session = None;
+                let mut parser = get_parser(&mut log_session, "log.html");
+                parser.set_language(language).unwrap();
+                let mut tree = tree.clone();
+                let mut input = test.input.clone();
+
+                if options.log_graphs {
+                    eprintln!("{}\n", String::from_utf8_lossy(&input));
+                }
+
+                // Perform a random series of edits and reparse.
+                let mut undo_stack = Vec::new();
+                for _ in 0..=rand.unsigned(*EDIT_COUNT) {
+                    let edit = get_random_edit(&mut rand, &input);
+                    undo_stack.push(invert_edit(&input, &edit));
+                    perform_edit(&mut tree, &mut input, &edit).unwrap();
+                }
+
+                if log_seed {
+                    println!("   {test_index}.{trial:<2} seed: {seed}");
+                }
+
+                if dump_edits {
+                    fs::create_dir_all("fuzz").unwrap();
+                    fs::write(
+                        Path::new("fuzz")
+                            .join(format!("edit.{seed}.{test_index}.{trial} {test_name}")),
+                        &input,
+                    )
+                    .unwrap();
+                }
+
+                if options.log_graphs {
+                    eprintln!("{}\n", String::from_utf8_lossy(&input));
+                }
+
+                set_included_ranges(&mut parser, &input, test.template_delimiters);
+                let mut tree2 = parser.parse(&input, Some(&tree)).unwrap();
+
+                // Check that the new tree is consistent.
+                check_consistent_sizes(&tree2, &input);
+                if let Err(message) = check_changed_ranges(&tree, &tree2, &input) {
+                    println!("\nUnexpected scope change in seed {seed} with start seed {start_seed}\n{message}\n\n",);
+                    return false;
+                }
+
+                // Undo all of the edits and re-parse again.
+                while let Some(edit) = undo_stack.pop() {
+                    perform_edit(&mut tree2, &mut input, &edit).unwrap();
+                }
+                if options.log_graphs {
+                    eprintln!("{}\n", String::from_utf8_lossy(&input));
+                }
+
+                set_included_ranges(&mut parser, &test.input, test.template_delimiters);
+                let tree3 = parser.parse(&input, Some(&tree2)).unwrap();
+
+                // Verify that the final tree matches the expectation from the corpus.
+                let mut actual_output = tree3.root_node().to_sexp();
+                if !test.has_fields {
+                    actual_output = strip_sexp_fields(&actual_output);
+                }
+
+                if actual_output != test.output && !test.error {
+                    println!("Incorrect parse for {test_name} - seed {seed}");
+                    print_diff_key();
+                    print_diff(&actual_output, &test.output, true);
+                    println!();
+                    return false;
+                }
+
+                // Check that the edited tree is consistent.
+                check_consistent_sizes(&tree3, &input);
+                if let Err(message) = check_changed_ranges(&tree2, &tree3, &input) {
+                    println!("Unexpected scope change in seed {seed} with start seed {start_seed}\n{message}\n\n");
+                    return false;
+                }
+
+                true
+            }).unwrap_or_else(|e| {
+                eprintln!("Error: {e}");
+                false
+            });
+
+            if !passed {
+                failure_count += 1;
+                break;
+            }
+        }
+    }
+
+    if failure_count != 0 {
+        eprintln!("{failure_count} {language_name} corpus tests failed fuzzing");
+    }
+
+    skipped.retain(|_, v| *v == 0);
+
+    if !skipped.is_empty() {
+        println!("Non matchable skip definitions:");
+        for k in skipped.keys() {
+            println!("  {k}");
+        }
+        panic!("Non matchable skip definitions needs to be removed");
+    }
+}
+
+pub struct FlattenedTest {
+    pub name: String,
+    pub input: Vec<u8>,
+    pub output: String,
+    pub languages: Vec<Box<str>>,
+    pub error: bool,
+    pub skip: bool,
+    pub has_fields: bool,
+    pub template_delimiters: Option<(&'static str, &'static str)>,
+}
+
+#[must_use]
+pub fn flatten_tests(
+    test: TestEntry,
+    include: Option<&Regex>,
+    exclude: Option<&Regex>,
+) -> Vec<FlattenedTest> {
+    fn helper(
+        test: TestEntry,
+        include: Option<&Regex>,
+        exclude: Option<&Regex>,
+        is_root: bool,
+        prefix: &str,
+        result: &mut Vec<FlattenedTest>,
+    ) {
+        match test {
+            TestEntry::Example {
+                mut name,
+                input,
+                output,
+                has_fields,
+                attributes,
+                ..
+            } => {
+                if !prefix.is_empty() {
+                    name.insert_str(0, " - ");
+                    name.insert_str(0, prefix);
+                }
+
+                if let Some(include) = include {
+                    if !include.is_match(&name) {
+                        return;
+                    }
+                } else if let Some(exclude) = exclude {
+                    if exclude.is_match(&name) {
+                        return;
+                    }
+                }
+
+                result.push(FlattenedTest {
+                    name,
+                    input,
+                    output,
+                    has_fields,
+                    languages: attributes.languages,
+                    error: attributes.error,
+                    skip: attributes.skip,
+                    template_delimiters: None,
+                });
+            }
+            TestEntry::Group {
+                mut name, children, ..
+            } => {
+                if !is_root && !prefix.is_empty() {
+                    name.insert_str(0, " - ");
+                    name.insert_str(0, prefix);
+                }
+                for child in children {
+                    helper(child, include, exclude, false, &name, result);
+                }
+            }
+        }
+    }
+    let mut result = Vec::new();
+    helper(test, include, exclude, true, "", &mut result);
+    result
+}
--- a/crates/cli/src/fuzz/allocations.rs
+++ b/crates/cli/src/fuzz/allocations.rs
@ -0,0 +1,122 @@
+use std::{
+    collections::HashMap,
+    os::raw::c_void,
+    sync::{
+        atomic::{AtomicBool, AtomicUsize, Ordering::SeqCst},
+        Mutex,
+    },
+};
+
+#[ctor::ctor]
+unsafe fn initialize_allocation_recording() {
+    tree_sitter::set_allocator(
+        Some(ts_record_malloc),
+        Some(ts_record_calloc),
+        Some(ts_record_realloc),
+        Some(ts_record_free),
+    );
+}
+
+#[derive(Debug, PartialEq, Eq, Hash)]
+struct Allocation(*const c_void);
+unsafe impl Send for Allocation {}
+unsafe impl Sync for Allocation {}
+
+#[derive(Default)]
+struct AllocationRecorder {
+    enabled: AtomicBool,
+    allocation_count: AtomicUsize,
+    outstanding_allocations: Mutex<HashMap<Allocation, usize>>,
+}
+
+thread_local! {
+    static RECORDER: AllocationRecorder = AllocationRecorder::default();
+}
+
+extern "C" {
+    fn malloc(size: usize) -> *mut c_void;
+    fn calloc(count: usize, size: usize) -> *mut c_void;
+    fn realloc(ptr: *mut c_void, size: usize) -> *mut c_void;
+    fn free(ptr: *mut c_void);
+}
+
+pub fn record<T>(f: impl FnOnce() -> T) -> Result<T, String> {
+    RECORDER.with(|recorder| {
+        recorder.enabled.store(true, SeqCst);
+        recorder.allocation_count.store(0, SeqCst);
+        recorder.outstanding_allocations.lock().unwrap().clear();
+    });
+
+    let value = f();
+
+    let outstanding_allocation_indices = RECORDER.with(|recorder| {
+        recorder.enabled.store(false, SeqCst);
+        recorder.allocation_count.store(0, SeqCst);
+        recorder
+            .outstanding_allocations
+            .lock()
+            .unwrap()
+            .drain()
+            .map(|e| e.1)
+            .collect::<Vec<_>>()
+    });
+    if !outstanding_allocation_indices.is_empty() {
+        return Err(format!(
+            "Leaked allocation indices: {outstanding_allocation_indices:?}",
+        ));
+    }
+    Ok(value)
+}
+
+fn record_alloc(ptr: *mut c_void) {
+    RECORDER.with(|recorder| {
+        if recorder.enabled.load(SeqCst) {
+            let count = recorder.allocation_count.fetch_add(1, SeqCst);
+            recorder
+                .outstanding_allocations
+                .lock()
+                .unwrap()
+                .insert(Allocation(ptr), count);
+        }
+    });
+}
+
+fn record_dealloc(ptr: *mut c_void) {
+    RECORDER.with(|recorder| {
+        if recorder.enabled.load(SeqCst) {
+            recorder
+                .outstanding_allocations
+                .lock()
+                .unwrap()
+                .remove(&Allocation(ptr));
+        }
+    });
+}
+
+unsafe extern "C" fn ts_record_malloc(size: usize) -> *mut c_void {
+    let result = malloc(size);
+    record_alloc(result);
+    result
+}
+
+unsafe extern "C" fn ts_record_calloc(count: usize, size: usize) -> *mut c_void {
+    let result = calloc(count, size);
+    record_alloc(result);
+    result
+}
+
+unsafe extern "C" fn ts_record_realloc(ptr: *mut c_void, size: usize) -> *mut c_void {
+    let result = realloc(ptr, size);
+    if ptr.is_null() {
+        record_alloc(result);
+    } else if !core::ptr::eq(ptr, result) {
+        record_dealloc(ptr);
+        record_alloc(result);
+    }
+    result
+}
+
+unsafe extern "C" fn ts_record_free(ptr: *mut c_void) {
+    record_dealloc(ptr);
+    free(ptr);
+}
--- a/crates/cli/src/fuzz/corpus_test.rs
+++ b/crates/cli/src/fuzz/corpus_test.rs
@ -0,0 +1,147 @@
+use tree_sitter::{LogType, Node, Parser, Point, Range, Tree};
+
+use super::{scope_sequence::ScopeSequence, LOG_ENABLED, LOG_GRAPH_ENABLED};
+use crate::util;
+
+pub fn check_consistent_sizes(tree: &Tree, input: &[u8]) {
+    fn check(node: Node, line_offsets: &[usize]) {
+        let start_byte = node.start_byte();
+        let end_byte = node.end_byte();
+        let start_point = node.start_position();
+        let end_point = node.end_position();
+
+        assert!(start_byte <= end_byte);
+        assert!(start_point <= end_point);
+        assert_eq!(
+            start_byte,
+            line_offsets[start_point.row] + start_point.column
+        );
+        assert_eq!(end_byte, line_offsets[end_point.row] + end_point.column);
+
+        let mut last_child_end_byte = start_byte;
+        let mut last_child_end_point = start_point;
+        let mut some_child_has_changes = false;
+        let mut actual_named_child_count = 0;
+        for i in 0..node.child_count() {
+            let child = node.child(i).unwrap();
+            assert!(child.start_byte() >= last_child_end_byte);
+            assert!(child.start_position() >= last_child_end_point);
+            check(child, line_offsets);
+            if child.has_changes() {
+                some_child_has_changes = true;
+            }
+            if child.is_named() {
+                actual_named_child_count += 1;
+            }
+            last_child_end_byte = child.end_byte();
+            last_child_end_point = child.end_position();
+        }
+
+        assert_eq!(actual_named_child_count, node.named_child_count());
+
+        if node.child_count() > 0 {
+            assert!(end_byte >= last_child_end_byte);
+            assert!(end_point >= last_child_end_point);
+        }
+
+        if some_child_has_changes {
+            assert!(node.has_changes());
+        }
+    }
+
+    let mut line_offsets = vec![0];
+    for (i, c) in input.iter().enumerate() {
+        if *c == b'\n' {
+            line_offsets.push(i + 1);
+        }
+    }
+
+    check(tree.root_node(), &line_offsets);
+}
+
+pub fn check_changed_ranges(old_tree: &Tree, new_tree: &Tree, input: &[u8]) -> Result<(), String> {
+    let changed_ranges = old_tree.changed_ranges(new_tree).collect::<Vec<_>>();
+    let old_scope_sequence = ScopeSequence::new(old_tree);
+    let new_scope_sequence = ScopeSequence::new(new_tree);
+
+    let old_range = old_tree.root_node().range();
+    let new_range = new_tree.root_node().range();
+
+    let byte_range =
+        old_range.start_byte.min(new_range.start_byte)..old_range.end_byte.max(new_range.end_byte);
+    let point_range = old_range.start_point.min(new_range.start_point)
+        ..old_range.end_point.max(new_range.end_point);
+
+    for range in &changed_ranges {
+        if range.end_byte > byte_range.end || range.end_point > point_range.end {
+            return Err(format!(
+                "changed range extends outside of the old and new trees {range:?}",
+            ));
+        }
+    }
+
+    old_scope_sequence.check_changes(&new_scope_sequence, input, &changed_ranges)
+}
+
+pub fn set_included_ranges(parser: &mut Parser, input: &[u8], delimiters: Option<(&str, &str)>) {
+    if let Some((start, end)) = delimiters {
+        let mut ranges = Vec::new();
+        let mut ix = 0;
+        while ix < input.len() {
+            let Some(mut start_ix) = input[ix..]
+                .windows(2)
+                .position(|win| win == start.as_bytes())
+            else {
+                break;
+            };
+            start_ix += ix + start.len();
+            let end_ix = input[start_ix..]
+                .windows(2)
+                .position(|win| win == end.as_bytes())
+                .map_or(input.len(), |ix| start_ix + ix);
+            ix = end_ix;
+            ranges.push(Range {
+                start_byte: start_ix,
+                end_byte: end_ix,
+                start_point: point_for_offset(input, start_ix),
+                end_point: point_for_offset(input, end_ix),
+            });
+        }
+
+        parser.set_included_ranges(&ranges).unwrap();
+    } else {
+        parser.set_included_ranges(&[]).unwrap();
+    }
+}
+
+fn point_for_offset(text: &[u8], offset: usize) -> Point {
+    let mut point = Point::default();
+    for byte in &text[..offset] {
+        if *byte == b'\n' {
+            point.row += 1;
+            point.column = 0;
+        } else {
+            point.column += 1;
+        }
+    }
+    point
+}
+
+pub fn get_parser(session: &mut Option<util::LogSession>, log_filename: &str) -> Parser {
+    let mut parser = Parser::new();
+
+    if *LOG_ENABLED {
+        parser.set_logger(Some(Box::new(|log_type, msg| {
+            if log_type == LogType::Lex {
+                eprintln!("  {msg}");
+            } else {
+                eprintln!("{msg}");
+            }
+        })));
+    }
+    if *LOG_GRAPH_ENABLED {
+        *session = Some(util::log_graphs(&mut parser, log_filename, false).unwrap());
+    }
+
+    parser
+}
--- a/crates/cli/src/fuzz/edits.rs
+++ b/crates/cli/src/fuzz/edits.rs
@ -0,0 +1,61 @@
+use super::random::Rand;
+
+#[derive(Debug)]
+pub struct Edit {
+    pub position: usize,
+    pub deleted_length: usize,
+    pub inserted_text: Vec<u8>,
+}
+
+#[must_use]
+pub fn invert_edit(input: &[u8], edit: &Edit) -> Edit {
+    let position = edit.position;
+    let removed_content = &input[position..(position + edit.deleted_length)];
+    Edit {
+        position,
+        deleted_length: edit.inserted_text.len(),
+        inserted_text: removed_content.to_vec(),
+    }
+}
+
+pub fn get_random_edit(rand: &mut Rand, input: &[u8]) -> Edit {
+    let choice = rand.unsigned(10);
+    if choice < 2 {
+        // Insert text at end
+        let inserted_text = rand.words(3);
+        Edit {
+            position: input.len(),
+            deleted_length: 0,
+            inserted_text,
+        }
+    } else if choice < 5 {
+        // Delete text from the end
+        let deleted_length = rand.unsigned(30).min(input.len());
+        Edit {
+            position: input.len() - deleted_length,
+            deleted_length,
+            inserted_text: vec![],
+        }
+    } else if choice < 8 {
+        // Insert at a random position
+        let position = rand.unsigned(input.len());
+        let word_count = 1 + rand.unsigned(3);
+        let inserted_text = rand.words(word_count);
+        Edit {
+            position,
+            deleted_length: 0,
+            inserted_text,
+        }
+    } else {
+        // Replace at random position
+        let position = rand.unsigned(input.len());
+        let deleted_length = rand.unsigned(input.len() - position);
+        let word_count = 1 + rand.unsigned(3);
+        let inserted_text = rand.words(word_count);
+        Edit {
+            position,
+            deleted_length,
+            inserted_text,
+        }
+    }
+}
--- a/crates/cli/src/fuzz/random.rs
+++ b/crates/cli/src/fuzz/random.rs
@ -0,0 +1,44 @@
+use rand::{
+    distributions::Alphanumeric,
+    prelude::{Rng, SeedableRng, StdRng},
+};
+
+const OPERATORS: &[char] = &[
+    '+', '-', '<', '>', '(', ')', '*', '/', '&', '|', '!', ',', '.', '%',
+];
+
+pub struct Rand(StdRng);
+
+impl Rand {
+    #[must_use]
+    pub fn new(seed: usize) -> Self {
+        Self(StdRng::seed_from_u64(seed as u64))
+    }
+
+    pub fn unsigned(&mut self, max: usize) -> usize {
+        self.0.gen_range(0..=max)
+    }
+
+    pub fn words(&mut self, max_count: usize) -> Vec<u8> {
+        let mut result = Vec::new();
+        let word_count = self.unsigned(max_count);
+        for i in 0..word_count {
+            if i > 0 {
+                if self.unsigned(5) == 0 {
+                    result.push(b'\n');
+                } else {
+                    result.push(b' ');
+                }
+            }
+            if self.unsigned(3) == 0 {
+                let index = self.unsigned(OPERATORS.len() - 1);
+                result.push(OPERATORS[index] as u8);
+            } else {
+                for _ in 0..self.unsigned(8) {
+                    result.push(self.0.sample(Alphanumeric));
+                }
+            }
+        }
+        result
+    }
+}
--- a/crates/cli/src/fuzz/scope_sequence.rs
+++ b/crates/cli/src/fuzz/scope_sequence.rs
@ -0,0 +1,91 @@
+use tree_sitter::{Point, Range, Tree};
+
+#[derive(Debug)]
+pub struct ScopeSequence(Vec<ScopeStack>);
+
+type ScopeStack = Vec<&'static str>;
+
+impl ScopeSequence {
+    #[must_use]
+    pub fn new(tree: &Tree) -> Self {
+        let mut result = Self(Vec::new());
+        let mut scope_stack = Vec::new();
+
+        let mut cursor = tree.walk();
+        let mut visited_children = false;
+        loop {
+            let node = cursor.node();
+            for _ in result.0.len()..node.start_byte() {
+                result.0.push(scope_stack.clone());
+            }
+            if visited_children {
+                for _ in result.0.len()..node.end_byte() {
+                    result.0.push(scope_stack.clone());
+                }
+                scope_stack.pop();
+                if cursor.goto_next_sibling() {
+                    visited_children = false;
+                } else if !cursor.goto_parent() {
+                    break;
+                }
+            } else {
+                scope_stack.push(cursor.node().kind());
+                if !cursor.goto_first_child() {
+                    visited_children = true;
+                }
+            }
+        }
+
+        result
+    }
+
+    pub fn check_changes(
+        &self,
+        other: &Self,
+        text: &[u8],
+        known_changed_ranges: &[Range],
+    ) -> Result<(), String> {
+        let mut position = Point { row: 0, column: 0 };
+        for i in 0..(self.0.len().max(other.0.len())) {
+            let stack = &self.0.get(i);
+            let other_stack = &other.0.get(i);
+            if *stack != *other_stack && ![b'\r', b'\n'].contains(&text[i]) {
+                let containing_range = known_changed_ranges
+                    .iter()
+                    .find(|range| range.start_point <= position && position < range.end_point);
+                if containing_range.is_none() {
+                    let line = &text[(i - position.column)..]
+                        .split(|c| *c == b'\n')
+                        .next()
+                        .unwrap();
+                    return Err(format!(
+                        concat!(
+                            "Position: {}\n",
+                            "Byte offset: {}\n",
+                            "Line: {}\n",
+                            "{}^\n",
+                            "Old scopes: {:?}\n",
+                            "New scopes: {:?}\n",
+                            "Invalidated ranges: {:?}",
+                        ),
+                        position,
+                        i,
+                        String::from_utf8_lossy(line),
+                        String::from(" ").repeat(position.column + "Line: ".len()),
+                        stack,
+                        other_stack,
+                        known_changed_ranges,
+                    ));
+                }
+            }
+
+            if text[i] == b'\n' {
+                position.row += 1;
+                position.column = 0;
+            } else {
+                position.column += 1;
+            }
+        }
+        Ok(())
+    }
+}
--- a/crates/cli/src/highlight.rs
+++ b/crates/cli/src/highlight.rs
@ -0,0 +1,510 @@
+use std::{
+    collections::{BTreeMap, HashSet},
+    fmt::Write,
+    fs,
+    io::{self, Write as _},
+    path::{self, Path, PathBuf},
+    str,
+    sync::{atomic::AtomicUsize, Arc},
+    time::Instant,
+};
+
+use ansi_colours::{ansi256_from_rgb, rgb_from_ansi256};
+use anstyle::{Ansi256Color, AnsiColor, Color, Effects, RgbColor};
+use anyhow::Result;
+use serde::{ser::SerializeMap, Deserialize, Deserializer, Serialize, Serializer};
+use serde_json::{json, Value};
+use tree_sitter_highlight::{HighlightConfiguration, HighlightEvent, Highlighter, HtmlRenderer};
+use tree_sitter_loader::Loader;
+
+pub const HTML_HEAD_HEADER: &str = "
+<!doctype HTML>
+<head>
+  <title>Tree-sitter Highlighting</title>
+  <style>
+    body {
+      font-family: monospace
+    }
+    .line-number {
+      user-select: none;
+      text-align: right;
+      color: rgba(27,31,35,.3);
+      padding: 0 10px;
+    }
+    .line {
+      white-space: pre;
+    }
+  </style>";
+
+pub const HTML_BODY_HEADER: &str = "
+</head>
+<body>
+";
+
+pub const HTML_FOOTER: &str = "
+</body>
+";
+
+#[derive(Debug, Default)]
+pub struct Style {
+    pub ansi: anstyle::Style,
+    pub css: Option<String>,
+}
+
+#[derive(Debug)]
+pub struct Theme {
+    pub styles: Vec<Style>,
+    pub highlight_names: Vec<String>,
+}
+
+#[derive(Default, Deserialize, Serialize)]
+pub struct ThemeConfig {
+    #[serde(default)]
+    pub theme: Theme,
+}
+
+impl Theme {
+    pub fn load(path: &path::Path) -> io::Result<Self> {
+        let json = fs::read_to_string(path)?;
+        Ok(serde_json::from_str(&json).unwrap_or_default())
+    }
+
+    #[must_use]
+    pub fn default_style(&self) -> Style {
+        Style::default()
+    }
+}
+
+impl<'de> Deserialize<'de> for Theme {
+    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
+    where
+        D: Deserializer<'de>,
+    {
+        let mut styles = Vec::new();
+        let mut highlight_names = Vec::new();
+        if let Ok(colors) = BTreeMap::<String, Value>::deserialize(deserializer) {
+            highlight_names.reserve(colors.len());
+            styles.reserve(colors.len());
+            for (name, style_value) in colors {
+                let mut style = Style::default();
+                parse_style(&mut style, style_value);
+                highlight_names.push(name);
+                styles.push(style);
+            }
+        }
+        Ok(Self {
+            styles,
+            highlight_names,
+        })
+    }
+}
+
+impl Serialize for Theme {
+    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
+    where
+        S: Serializer,
+    {
+        let mut map = serializer.serialize_map(Some(self.styles.len()))?;
+        for (name, style) in self.highlight_names.iter().zip(&self.styles) {
+            let style = &style.ansi;
+            let color = style.get_fg_color().map(|color| match color {
+                Color::Ansi(color) => match color {
+                    AnsiColor::Black => json!("black"),
+                    AnsiColor::Blue => json!("blue"),
+                    AnsiColor::Cyan => json!("cyan"),
+                    AnsiColor::Green => json!("green"),
+                    AnsiColor::Magenta => json!("purple"),
+                    AnsiColor::Red => json!("red"),
+                    AnsiColor::White => json!("white"),
+                    AnsiColor::Yellow => json!("yellow"),
+                    _ => unreachable!(),
+                },
+                Color::Ansi256(Ansi256Color(n)) => json!(n),
+                Color::Rgb(RgbColor(r, g, b)) => json!(format!("#{r:x?}{g:x?}{b:x?}")),
+            });
+            let effects = style.get_effects();
+            if effects.contains(Effects::BOLD)
+                || effects.contains(Effects::ITALIC)
+                || effects.contains(Effects::UNDERLINE)
+            {
+                let mut style_json = BTreeMap::new();
+                if let Some(color) = color {
+                    style_json.insert("color", color);
+                }
+                if effects.contains(Effects::BOLD) {
+                    style_json.insert("bold", Value::Bool(true));
+                }
+                if effects.contains(Effects::ITALIC) {
+                    style_json.insert("italic", Value::Bool(true));
+                }
+                if effects.contains(Effects::UNDERLINE) {
+                    style_json.insert("underline", Value::Bool(true));
+                }
+                map.serialize_entry(&name, &style_json)?;
+            } else if let Some(color) = color {
+                map.serialize_entry(&name, &color)?;
+            } else {
+                map.serialize_entry(&name, &Value::Null)?;
+            }
+        }
+        map.end()
+    }
+}
+
+impl Default for Theme {
+    fn default() -> Self {
+        serde_json::from_value(json!({
+            "attribute": {"color": 124, "italic": true},
+            "comment": {"color": 245, "italic": true},
+            "constant": 94,
+            "constant.builtin": {"color": 94, "bold": true},
+            "constructor": 136,
+            "embedded": null,
+            "function": 26,
+            "function.builtin": {"color": 26, "bold": true},
+            "keyword": 56,
+            "module": 136,
+            "number": {"color": 94, "bold": true},
+            "operator": {"color": 239, "bold": true},
+            "property": 124,
+            "property.builtin": {"color": 124, "bold": true},
+            "punctuation": 239,
+            "punctuation.bracket": 239,
+            "punctuation.delimiter": 239,
+            "punctuation.special": 239,
+            "string": 28,
+            "string.special": 30,
+            "tag": 18,
+            "type": 23,
+            "type.builtin": {"color": 23, "bold": true},
+            "variable": 252,
+            "variable.builtin": {"color": 252, "bold": true},
+            "variable.parameter": {"color": 252, "underline": true}
+        }))
+        .unwrap()
+    }
+}
+
+fn parse_style(style: &mut Style, json: Value) {
+    if let Value::Object(entries) = json {
+        for (property_name, value) in entries {
+            match property_name.as_str() {
+                "bold" => {
+                    if value == Value::Bool(true) {
+                        style.ansi = style.ansi.bold();
+                    }
+                }
+                "italic" => {
+                    if value == Value::Bool(true) {
+                        style.ansi = style.ansi.italic();
+                    }
+                }
+                "underline" => {
+                    if value == Value::Bool(true) {
+                        style.ansi = style.ansi.underline();
+                    }
+                }
+                "color" => {
+                    if let Some(color) = parse_color(value) {
+                        style.ansi = style.ansi.fg_color(Some(color));
+                    }
+                }
+                _ => {}
+            }
+        }
+        style.css = Some(style_to_css(style.ansi));
+    } else if let Some(color) = parse_color(json) {
+        style.ansi = style.ansi.fg_color(Some(color));
+        style.css = Some(style_to_css(style.ansi));
+    } else {
+        style.css = None;
+    }
+
+    if let Some(Color::Rgb(RgbColor(red, green, blue))) = style.ansi.get_fg_color() {
+        if !terminal_supports_truecolor() {
+            let ansi256 = Color::Ansi256(Ansi256Color(ansi256_from_rgb((red, green, blue))));
+            style.ansi = style.ansi.fg_color(Some(ansi256));
+        }
+    }
+}
+
+fn parse_color(json: Value) -> Option<Color> {
+    match json {
+        Value::Number(n) => n.as_u64().map(|n| Color::Ansi256(Ansi256Color(n as u8))),
+        Value::String(s) => match s.to_lowercase().as_str() {
+            "black" => Some(Color::Ansi(AnsiColor::Black)),
+            "blue" => Some(Color::Ansi(AnsiColor::Blue)),
+            "cyan" => Some(Color::Ansi(AnsiColor::Cyan)),
+            "green" => Some(Color::Ansi(AnsiColor::Green)),
+            "purple" => Some(Color::Ansi(AnsiColor::Magenta)),
+            "red" => Some(Color::Ansi(AnsiColor::Red)),
+            "white" => Some(Color::Ansi(AnsiColor::White)),
+            "yellow" => Some(Color::Ansi(AnsiColor::Yellow)),
+            s => {
+                if let Some((red, green, blue)) = hex_string_to_rgb(s) {
+                    Some(Color::Rgb(RgbColor(red, green, blue)))
+                } else {
+                    None
+                }
+            }
+        },
+        _ => None,
+    }
+}
+
+fn hex_string_to_rgb(s: &str) -> Option<(u8, u8, u8)> {
+    if s.starts_with('#') && s.len() >= 7 {
+        if let (Ok(red), Ok(green), Ok(blue)) = (
+            u8::from_str_radix(&s[1..3], 16),
+            u8::from_str_radix(&s[3..5], 16),
+            u8::from_str_radix(&s[5..7], 16),
+        ) {
+            Some((red, green, blue))
+        } else {
+            None
+        }
+    } else {
+        None
+    }
+}
+
+fn style_to_css(style: anstyle::Style) -> String {
+    let mut result = String::new();
+    let effects = style.get_effects();
+    if effects.contains(Effects::UNDERLINE) {
+        write!(&mut result, "text-decoration: underline;").unwrap();
+    }
+    if effects.contains(Effects::BOLD) {
+        write!(&mut result, "font-weight: bold;").unwrap();
+    }
+    if effects.contains(Effects::ITALIC) {
+        write!(&mut result, "font-style: italic;").unwrap();
+    }
+    if let Some(color) = style.get_fg_color() {
+        write_color(&mut result, color);
+    }
+    result
+}
+
+fn write_color(buffer: &mut String, color: Color) {
+    match color {
+        Color::Ansi(color) => match color {
+            AnsiColor::Black => write!(buffer, "color: black").unwrap(),
+            AnsiColor::Red => write!(buffer, "color: red").unwrap(),
+            AnsiColor::Green => write!(buffer, "color: green").unwrap(),
+            AnsiColor::Yellow => write!(buffer, "color: yellow").unwrap(),
+            AnsiColor::Blue => write!(buffer, "color: blue").unwrap(),
+            AnsiColor::Magenta => write!(buffer, "color: purple").unwrap(),
+            AnsiColor::Cyan => write!(buffer, "color: cyan").unwrap(),
+            AnsiColor::White => write!(buffer, "color: white").unwrap(),
+            _ => unreachable!(),
+        },
+        Color::Ansi256(Ansi256Color(n)) => {
+            let (r, g, b) = rgb_from_ansi256(n);
+            write!(buffer, "color: #{r:02x}{g:02x}{b:02x}").unwrap();
+        }
+        Color::Rgb(RgbColor(r, g, b)) => write!(buffer, "color: #{r:02x}{g:02x}{b:02x}").unwrap(),
+    }
+}
+
+fn terminal_supports_truecolor() -> bool {
+    std::env::var("COLORTERM")
+        .is_ok_and(|truecolor| truecolor == "truecolor" || truecolor == "24bit")
+}
+
+pub struct HighlightOptions {
+    pub theme: Theme,
+    pub check: bool,
+    pub captures_path: Option<PathBuf>,
+    pub inline_styles: bool,
+    pub html: bool,
+    pub quiet: bool,
+    pub print_time: bool,
+    pub cancellation_flag: Arc<AtomicUsize>,
+}
+
+pub fn highlight(
+    loader: &Loader,
+    path: &Path,
+    name: &str,
+    config: &HighlightConfiguration,
+    print_name: bool,
+    opts: &HighlightOptions,
+) -> Result<()> {
+    if opts.check {
+        let names = if let Some(path) = opts.captures_path.as_deref() {
+            let file = fs::read_to_string(path)?;
+            let capture_names = file
+                .lines()
+                .filter_map(|line| {
+                    if line.trim().is_empty() || line.trim().starts_with(';') {
+                        return None;
+                    }
+                    line.split(';').next().map(|s| s.trim().trim_matches('"'))
+                })
+                .collect::<HashSet<_>>();
+            config.nonconformant_capture_names(&capture_names)
+        } else {
+            config.nonconformant_capture_names(&HashSet::new())
+        };
+        if names.is_empty() {
+            eprintln!("All highlight captures conform to standards.");
+        } else {
+            eprintln!(
+                "Non-standard highlight {} detected:",
+                if names.len() > 1 {
+                    "captures"
+                } else {
+                    "capture"
+                }
+            );
+            for name in names {
+                eprintln!("* {name}");
+            }
+        }
+    }
+
+    let source = fs::read(path)?;
+    let stdout = io::stdout();
+    let mut stdout = stdout.lock();
+    let time = Instant::now();
+    let mut highlighter = Highlighter::new();
+    let events =
+        highlighter.highlight(config, &source, Some(&opts.cancellation_flag), |string| {
+            loader.highlight_config_for_injection_string(string)
+        })?;
+    let theme = &opts.theme;
+
+    if !opts.quiet && print_name {
+        writeln!(&mut stdout, "{name}")?;
+    }
+
+    if opts.html {
+        if !opts.quiet {
+            writeln!(&mut stdout, "{HTML_HEAD_HEADER}")?;
+            writeln!(&mut stdout, "  <style>")?;
+            let names = theme.highlight_names.iter();
+            let styles = theme.styles.iter();
+            for (name, style) in names.zip(styles) {
+                if let Some(css) = &style.css {
+                    writeln!(&mut stdout, "    .{name} {{ {css}; }}")?;
+                }
+            }
+            writeln!(&mut stdout, "  </style>")?;
+            writeln!(&mut stdout, "{HTML_BODY_HEADER}")?;
+        }
+
+        let mut renderer = HtmlRenderer::new();
+        renderer.render(events, &source, &move |highlight, output| {
+            if opts.inline_styles {
+                output.extend(b"style='");
+                output.extend(
+                    theme.styles[highlight.0]
+                        .css
+                        .as_ref()
+                        .map_or_else(|| "".as_bytes(), |css_style| css_style.as_bytes()),
+                );
+                output.extend(b"'");
+            } else {
+                output.extend(b"class='");
+                let mut parts = theme.highlight_names[highlight.0].split('.').peekable();
+                while let Some(part) = parts.next() {
+                    output.extend(part.as_bytes());
+                    if parts.peek().is_some() {
+                        output.extend(b" ");
+                    }
+                }
+                output.extend(b"'");
+            }
+        })?;
+
+        if !opts.quiet {
+            writeln!(&mut stdout, "<table>")?;
+            for (i, line) in renderer.lines().enumerate() {
+                writeln!(
+                    &mut stdout,
+                    "<tr><td class=line-number>{}</td><td class=line>{line}</td></tr>",
+                    i + 1,
+                )?;
+            }
+            writeln!(&mut stdout, "</table>")?;
+            writeln!(&mut stdout, "{HTML_FOOTER}")?;
+        }
+    } else {
+        let mut style_stack = vec![theme.default_style().ansi];
+        for event in events {
+            match event? {
+                HighlightEvent::HighlightStart(highlight) => {
+                    style_stack.push(theme.styles[highlight.0].ansi);
+                }
+                HighlightEvent::HighlightEnd => {
+                    style_stack.pop();
+                }
+                HighlightEvent::Source { start, end } => {
+                    let style = style_stack.last().unwrap();
+                    write!(&mut stdout, "{style}").unwrap();
+                    stdout.write_all(&source[start..end])?;
+                    write!(&mut stdout, "{style:#}").unwrap();
+                }
+            }
+        }
+    }
+
+    if opts.print_time {
+        eprintln!("Time: {}ms", time.elapsed().as_millis());
+    }
+
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+    use std::env;
+
+    use super::*;
+
+    const JUNGLE_GREEN: &str = "#26A69A";
+    const DARK_CYAN: &str = "#00AF87";
+
+    #[test]
+    fn test_parse_style() {
+        let original_environment_variable = env::var("COLORTERM");
+
+        let mut style = Style::default();
+        assert_eq!(style.ansi.get_fg_color(), None);
+        assert_eq!(style.css, None);
+
+        // darkcyan is an ANSI color and is preserved
+        env::set_var("COLORTERM", "");
+        parse_style(&mut style, Value::String(DARK_CYAN.to_string()));
+        assert_eq!(
+            style.ansi.get_fg_color(),
+            Some(Color::Ansi256(Ansi256Color(36)))
+        );
+        assert_eq!(style.css, Some("color: #00af87".to_string()));
+
+        // junglegreen is not an ANSI color and is preserved when the terminal supports it
+        env::set_var("COLORTERM", "truecolor");
+        parse_style(&mut style, Value::String(JUNGLE_GREEN.to_string()));
+        assert_eq!(
+            style.ansi.get_fg_color(),
+            Some(Color::Rgb(RgbColor(38, 166, 154)))
+        );
+        assert_eq!(style.css, Some("color: #26a69a".to_string()));
+
+        // junglegreen gets approximated as cadetblue when the terminal does not support it
+        env::set_var("COLORTERM", "");
+        parse_style(&mut style, Value::String(JUNGLE_GREEN.to_string()));
+        assert_eq!(
+            style.ansi.get_fg_color(),
+            Some(Color::Ansi256(Ansi256Color(72)))
+        );
+        assert_eq!(style.css, Some("color: #26a69a".to_string()));
+
+        if let Ok(environment_variable) = original_environment_variable {
+            env::set_var("COLORTERM", environment_variable);
+        } else {
+            env::remove_var("COLORTERM");
+        }
+    }
+}
--- a/crates/cli/src/init.rs
+++ b/crates/cli/src/init.rs
--- a/crates/cli/src/input.rs
+++ b/crates/cli/src/input.rs
@ -0,0 +1,187 @@
+use std::{
+    fs,
+    io::{Read, Write},
+    path::{Path, PathBuf},
+    sync::{
+        atomic::{AtomicUsize, Ordering},
+        mpsc, Arc,
+    },
+};
+
+use anyhow::{anyhow, bail, Context, Result};
+use glob::glob;
+
+use crate::test::{parse_tests, TestEntry};
+
+pub enum CliInput {
+    Paths(Vec<PathBuf>),
+    Test {
+        name: String,
+        contents: Vec<u8>,
+        languages: Vec<Box<str>>,
+    },
+    Stdin(Vec<u8>),
+}
+
+pub fn get_input(
+    paths_file: Option<&Path>,
+    paths: Option<Vec<PathBuf>>,
+    test_number: Option<u32>,
+    cancellation_flag: &Arc<AtomicUsize>,
+) -> Result<CliInput> {
+    if let Some(paths_file) = paths_file {
+        return Ok(CliInput::Paths(
+            fs::read_to_string(paths_file)
+                .with_context(|| format!("Failed to read paths file {}", paths_file.display()))?
+                .trim()
+                .lines()
+                .map(PathBuf::from)
+                .collect::<Vec<_>>(),
+        ));
+    }
+
+    if let Some(test_number) = test_number {
+        let current_dir = std::env::current_dir().unwrap();
+        let test_dir = current_dir.join("test").join("corpus");
+
+        if !test_dir.exists() {
+            return Err(anyhow!(
+                "Test corpus directory not found in current directory, see https://tree-sitter.github.io/tree-sitter/creating-parsers/5-writing-tests"
+            ));
+        }
+
+        let test_entry = parse_tests(&test_dir)?;
+        let mut test_num = 0;
+        let Some((name, contents, languages)) =
+            get_test_info(&test_entry, test_number.max(1) - 1, &mut test_num)
+        else {
+            return Err(anyhow!("Failed to fetch contents of test #{test_number}"));
+        };
+
+        return Ok(CliInput::Test {
+            name,
+            contents,
+            languages,
+        });
+    }
+
+    if let Some(paths) = paths {
+        let mut result = Vec::new();
+
+        let mut incorporate_path = |path: PathBuf, positive| {
+            if positive {
+                result.push(path);
+            } else if let Some(index) = result.iter().position(|p| *p == path) {
+                result.remove(index);
+            }
+        };
+
+        for mut path in paths {
+            let mut positive = true;
+            if path.starts_with("!") {
+                positive = false;
+                path = path.strip_prefix("!").unwrap().to_path_buf();
+            }
+
+            if path.exists() {
+                incorporate_path(path, positive);
+            } else {
+                let Some(path_str) = path.to_str() else {
+                    bail!("Invalid path: {}", path.display());
+                };
+                let paths = glob(path_str)
+                    .with_context(|| format!("Invalid glob pattern {}", path.display()))?;
+                for path in paths {
+                    incorporate_path(path?, positive);
+                }
+            }
+        }
+
+        if result.is_empty() {
+            return Err(anyhow!(
+                "No files were found at or matched by the provided pathname/glob"
+            ));
+        }
+
+        return Ok(CliInput::Paths(result));
+    }
+
+    let reader_flag = cancellation_flag.clone();
+    let (tx, rx) = mpsc::channel();
+
+    // Spawn a thread to read from stdin, until ctrl-c or EOF is received
+    std::thread::spawn(move || {
+        let mut input = Vec::new();
+        let stdin = std::io::stdin();
+        let mut handle = stdin.lock();
+
+        // Read in chunks, so we can check the ctrl-c flag
+        loop {
+            if reader_flag.load(Ordering::Relaxed) == 1 {
+                break;
+            }
+            let mut buffer = [0; 1024];
+            match handle.read(&mut buffer) {
+                Ok(0) | Err(_) => break,
+                Ok(n) => input.extend_from_slice(&buffer[..n]),
+            }
+        }
+
+        // Signal to the main thread that we're done
+        tx.send(input).ok();
+    });
+
+    loop {
+        // If we've received a ctrl-c signal, exit
+        if cancellation_flag.load(Ordering::Relaxed) == 1 {
+            bail!("\n");
+        }
+
+        // If we're done receiving input from stdin, return it
+        if let Ok(input) = rx.try_recv() {
+            return Ok(CliInput::Stdin(input));
+        }
+
+        std::thread::sleep(std::time::Duration::from_millis(50));
+    }
+}
+
+#[allow(clippy::type_complexity)]
+pub fn get_test_info(
+    test_entry: &TestEntry,
+    target_test: u32,
+    test_num: &mut u32,
+) -> Option<(String, Vec<u8>, Vec<Box<str>>)> {
+    match test_entry {
+        TestEntry::Example {
+            name,
+            input,
+            attributes,
+            ..
+        } => {
+            if *test_num == target_test {
+                return Some((name.clone(), input.clone(), attributes.languages.clone()));
+            }
+            *test_num += 1;
+        }
+        TestEntry::Group { children, .. } => {
+            for child in children {
+                if let Some((name, input, languages)) = get_test_info(child, target_test, test_num)
+                {
+                    return Some((name, input, languages));
+                }
+            }
+        }
+    }
+
+    None
+}
+
+/// Writes `contents` to a temporary file and returns the path to that file.
+pub fn get_tmp_source_file(contents: &[u8]) -> Result<PathBuf> {
+    let parse_path = std::env::temp_dir().join(".tree-sitter-temp");
+    let mut parse_file = std::fs::File::create(&parse_path)?;
+    parse_file.write_all(contents)?;
+
+    Ok(parse_path)
+}
--- a/crates/cli/src/logger.rs
+++ b/crates/cli/src/logger.rs
@ -0,0 +1,30 @@
+use log::{LevelFilter, Log, Metadata, Record};
+
+#[allow(dead_code)]
+struct Logger {
+    pub filter: Option<String>,
+}
+
+impl Log for Logger {
+    fn enabled(&self, _: &Metadata) -> bool {
+        true
+    }
+
+    fn log(&self, record: &Record) {
+        eprintln!(
+            "[{}] {}",
+            record
+                .module_path()
+                .unwrap_or_default()
+                .trim_start_matches("rust_tree_sitter_cli::"),
+            record.args()
+        );
+    }
+
+    fn flush(&self) {}
+}
+
+pub fn init() {
+    log::set_boxed_logger(Box::new(Logger { filter: None })).unwrap();
+    log::set_max_level(LevelFilter::Info);
+}
--- a/crates/cli/src/main.rs
+++ b/crates/cli/src/main.rs
--- a/crates/cli/src/parse.rs
+++ b/crates/cli/src/parse.rs
--- a/crates/cli/src/playground.html
+++ b/crates/cli/src/playground.html
@ -0,0 +1,410 @@
+<head>
+  <meta charset="utf-8">
+  <title>tree-sitter THE_LANGUAGE_NAME</title>
+  <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/codemirror/6.65.7/codemirror.min.css">
+  <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/clusterize.js/0.19.0/clusterize.min.css">
+  <link rel="icon" type="image/png" href="https://tree-sitter.github.io/tree-sitter/assets/images/favicon-32x32.png"
+    sizes="32x32" />
+  <link rel="icon" type="image/png" href="https://tree-sitter.github.io/tree-sitter/assets/images/favicon-16x16.png"
+    sizes="16x16" />
+</head>
+
+<body>
+  <div id="playground-container" style="visibility: hidden;">
+    <header>
+      <div class="header-item">
+        <span class="language-name">Language: THE_LANGUAGE_NAME</span>
+      </div>
+
+      <div class="header-item">
+        <input id="logging-checkbox" type="checkbox">
+        <label for="logging-checkbox">log</label>
+      </div>
+
+      <div class="header-item">
+        <input id="anonymous-nodes-checkbox" type="checkbox">
+        <label for="anonymous-nodes-checkbox">show anonymous nodes</label>
+      </div>
+
+      <div class="header-item">
+        <input id="query-checkbox" type="checkbox">
+        <label for="query-checkbox">query</label>
+      </div>
+
+      <div class="header-item">
+        <input id="accessibility-checkbox" type="checkbox">
+        <label for="accessibility-checkbox">accessibility</label>
+      </div>
+
+      <div class="header-item">
+        <label for="update-time">parse time: </label>
+        <span id="update-time"></span>
+      </div>
+
+      <div class="header-item">
+        <a href="https://tree-sitter.github.io/tree-sitter/7-playground.html#about">(?)</a>
+      </div>
+
+      <select id="language-select" style="display: none;">
+        <option value="parser">Parser</option>
+      </select>
+
+      <div class="header-item">
+        <button id="theme-toggle" class="theme-toggle" aria-label="Toggle theme">
+          <svg class="sun-icon" viewBox="0 0 24 24" width="16" height="16">
+            <path fill="currentColor"
+              d="M12 17.5a5.5 5.5 0 1 0 0-11 5.5 5.5 0 0 0 0 11zm0 1.5a7 7 0 1 1 0-14 7 7 0 0 1 0 14zm0-16a1 1 0 0 1 1 1v2a1 1 0 1 1-2 0V4a1 1 0 0 1 1-1zm0 15a1 1 0 0 1 1 1v2a1 1 0 1 1-2 0v-2a1 1 0 0 1 1-1zm9-9a1 1 0 0 1-1 1h-2a1 1 0 1 1 0-2h2a1 1 0 0 1 1 1zM4 12a1 1 0 0 1-1 1H1a1 1 0 1 1 0-2h2a1 1 0 0 1 1 1z" />
+          </svg>
+          <svg class="moon-icon" viewBox="0 0 24 24" width="16" height="16">
+            <path fill="currentColor"
+              d="M12.1 22c-5.5 0-10-4.5-10-10s4.5-10 10-10c.2 0 .3 0 .5.1-1.3 1.4-2 3.2-2 5.2 0 4.1 3.4 7.5 7.5 7.5 2 0 3.8-.7 5.2-2 .1.2.1.3.1.5 0 5.4-4.5 9.7-10 9.7z" />
+          </svg>
+        </button>
+      </div>
+    </header>
+
+    <main>
+      <div id="input-pane">
+        <div class="panel-header">Code</div>
+        <div id="code-container">
+          <textarea id="code-input"></textarea>
+        </div>
+
+        <div id="query-container" style="visibility: hidden; position: absolute;">
+          <div class="panel-header">Query</div>
+          <textarea id="query-input"></textarea>
+        </div>
+      </div>
+
+      <div id="output-container-scroll">
+        <div class="panel-header">Tree</div>
+        <pre id="output-container" class="highlight"></pre>
+      </div>
+    </main>
+  </div>
+
+  <script src="https://code.jquery.com/jquery-3.3.1.min.js" crossorigin="anonymous">
+  </script>
+
+  <script src="https://cdnjs.cloudflare.com/ajax/libs/codemirror/6.65.7/codemirror.min.js"></script>
+  <script src="https://cdnjs.cloudflare.com/ajax/libs/clusterize.js/0.19.0/clusterize.min.js"></script>
+
+  <script>LANGUAGE_BASE_URL = "";</script>
+  <script type="module" src="playground.js"></script>
+  <script type="module">
+    import * as TreeSitter from './web-tree-sitter.js';
+    window.TreeSitter = TreeSitter;
+    setTimeout(() => window.initializePlayground({local: true}), 1)
+  </script>
+
+  <style>
+    /* Base Variables */
+    :root {
+      --light-bg: #f9f9f9;
+      --light-border: #e0e0e0;
+      --light-text: #333;
+      --light-hover-border: #c1c1c1;
+      --light-scrollbar-track: #f1f1f1;
+      --light-scrollbar-thumb: #c1c1c1;
+      --light-scrollbar-thumb-hover: #a8a8a8;
+
+      --dark-bg: #1d1f21;
+      --dark-border: #2d2d2d;
+      --dark-text: #c5c8c6;
+      --dark-panel-bg: #252526;
+      --dark-code-bg: #1e1e1e;
+      --dark-scrollbar-track: #25282c;
+      --dark-scrollbar-thumb: #4a4d51;
+      --dark-scrollbar-thumb-hover: #5a5d61;
+
+      --primary-color: #0550ae;
+      --primary-color-alpha: rgba(5, 80, 174, 0.1);
+      --primary-color-alpha-dark: rgba(121, 192, 255, 0.1);
+      --selection-color: rgba(39, 95, 255, 0.3);
+    }
+
+    /* Theme Colors */
+    [data-theme="dark"] {
+      --bg-color: var(--dark-bg);
+      --border-color: var(--dark-border);
+      --text-color: var(--dark-text);
+      --panel-bg: var(--dark-panel-bg);
+      --code-bg: var(--dark-code-bg);
+    }
+
+    [data-theme="light"] {
+      --bg-color: var(--light-bg);
+      --border-color: var(--light-border);
+      --text-color: var(--light-text);
+      --panel-bg: white;
+      --code-bg: white;
+    }
+
+    /* Base Styles */
+    body {
+      margin: 0;
+      padding: 0;
+      font-family: system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
+      background-color: var(--bg-color);
+      color: var(--text-color);
+    }
+
+    /* Layout */
+    #playground-container {
+      width: 100%;
+      height: 100vh;
+      display: flex;
+      flex-direction: column;
+      background-color: var(--bg-color);
+    }
+
+    header {
+      padding: 16px 24px;
+      border-bottom: 1px solid var(--border-color);
+      display: flex;
+      align-items: center;
+      gap: 20px;
+      background-color: var(--panel-bg);
+      font-size: 14px;
+    }
+
+    .header-item {
+      display: flex;
+      align-items: center;
+      gap: 8px;
+    }
+
+    .language-name {
+      font-weight: 600;
+    }
+
+    main {
+      flex: 1;
+      display: flex;
+      overflow: hidden;
+    }
+
+    #input-pane {
+      width: 50%;
+      display: flex;
+      flex-direction: column;
+      border-right: 1px solid var(--border-color);
+      background-color: var(--panel-bg);
+      overflow: hidden;
+    }
+
+    #code-container {
+      flex: 1;
+      min-height: 0;
+      position: relative;
+      border-bottom: 1px solid var(--border-color);
+      display: flex;
+      flex-direction: column;
+    }
+
+    #query-container:not([style*="visibility: hidden"]) {
+      flex: 1;
+      min-height: 0;
+      display: flex;
+      flex-direction: column;
+    }
+
+    #query-container .panel-header {
+      flex: 0 0 auto;
+    }
+
+    #query-container .CodeMirror {
+      flex: 1;
+      position: relative;
+      min-height: 0;
+    }
+
+    #output-container-scroll {
+      width: 50%;
+      overflow: auto;
+      background-color: var(--panel-bg);
+      padding: 0;
+      display: flex;
+      flex-direction: column;
+    }
+
+    #output-container {
+      font-family: ui-monospace, "SF Mono", Menlo, Consolas, monospace;
+      line-height: 1.5;
+      margin: 0;
+      padding: 16px;
+    }
+
+    .panel-header {
+      padding: 8px 16px;
+      font-weight: 600;
+      font-size: 14px;
+      border-bottom: 1px solid var(--border-color);
+      background-color: var(--panel-bg);
+    }
+
+    .CodeMirror {
+      position: absolute;
+      top: 0;
+      left: 0;
+      right: 0;
+      bottom: 0;
+      height: 100%;
+      font-family: ui-monospace, "SF Mono", Menlo, Consolas, monospace;
+      font-size: 14px;
+      line-height: 1.6;
+      background-color: var(--code-bg) !important;
+      color: var(--text-color) !important;
+    }
+
+    .query-error {
+      text-decoration: underline red dashed;
+      -webkit-text-decoration: underline red dashed;
+    }
+
+    /* Scrollbars */
+    ::-webkit-scrollbar {
+      width: 8px;
+      height: 8px;
+    }
+
+    ::-webkit-scrollbar-track {
+      border-radius: 4px;
+      background: var(--light-scrollbar-track);
+    }
+
+    ::-webkit-scrollbar-thumb {
+      border-radius: 4px;
+      background: var(--light-scrollbar-thumb);
+    }
+
+    ::-webkit-scrollbar-thumb:hover {
+      background: var(--light-scrollbar-thumb-hover);
+    }
+
+    [data-theme="dark"] {
+      ::-webkit-scrollbar-track {
+        background: var(--dark-scrollbar-track) !important;
+      }
+
+      ::-webkit-scrollbar-thumb {
+        background: var(--dark-scrollbar-thumb) !important;
+      }
+
+      ::-webkit-scrollbar-thumb:hover {
+        background: var(--dark-scrollbar-thumb-hover) !important;
+      }
+    }
+
+    /* Theme Toggle */
+    .theme-toggle {
+      background: none;
+      border: 1px solid var(--border-color);
+      border-radius: 4px;
+      padding: 6px;
+      cursor: pointer;
+      color: var(--text-color);
+    }
+
+    .theme-toggle:hover {
+      background-color: var(--primary-color-alpha);
+    }
+
+    [data-theme="light"] .moon-icon,
+    [data-theme="dark"] .sun-icon {
+      display: none;
+    }
+
+    /* Form Elements */
+    input[type="checkbox"] {
+      margin-right: 6px;
+      vertical-align: middle;
+    }
+
+    label {
+      font-size: 14px;
+      margin-right: 16px;
+      cursor: pointer;
+    }
+
+    #output-container a {
+      cursor: pointer;
+      text-decoration: none;
+      color: #040404;
+      padding: 2px;
+    }
+
+    #output-container a:hover {
+      text-decoration: underline;
+    }
+
+    #output-container a.node-link.named {
+      color: #0550ae;
+    }
+
+    #output-container a.node-link.anonymous {
+      color: #116329;
+    }
+
+    #output-container a.node-link.anonymous:before {
+      content: '"';
+    }
+
+    #output-container a.node-link.anonymous:after {
+      content: '"';
+    }
+
+    #output-container a.node-link.error {
+      color: #cf222e;
+    }
+
+    #output-container a.highlighted {
+      background-color: #d9d9d9;
+      color: red;
+      border-radius: 3px;
+      text-decoration: underline;
+    }
+
+    /* Dark Theme Node Colors */
+    [data-theme="dark"] {
+      & #output-container a {
+        color: #d4d4d4;
+      }
+
+      & #output-container a.node-link.named {
+        color: #79c0ff;
+      }
+
+      & #output-container a.node-link.anonymous {
+        color: #7ee787;
+      }
+
+      & #output-container a.node-link.error {
+        color: #ff7b72;
+      }
+
+      & #output-container a.highlighted {
+        background-color: #373b41;
+        color: red;
+      }
+
+      & .CodeMirror {
+        background-color: var(--dark-code-bg) !important;
+        color: var(--dark-text) !important;
+      }
+
+      & .CodeMirror-gutters {
+        background-color: var(--dark-panel-bg) !important;
+        border-color: var(--dark-border) !important;
+      }
+
+      & .CodeMirror-cursor {
+        border-color: var(--dark-text) !important;
+      }
+
+      & .CodeMirror-selected {
+        background-color: rgba(255, 255, 255, 0.1) !important;
+      }
+    }
+  </style>
+</body>
--- a/crates/cli/src/playground.rs
+++ b/crates/cli/src/playground.rs
@ -0,0 +1,144 @@
+use std::{
+    borrow::Cow,
+    env, fs,
+    net::TcpListener,
+    path::{Path, PathBuf},
+    str::{self, FromStr as _},
+};
+
+use anyhow::{anyhow, Context, Result};
+use tiny_http::{Header, Response, Server};
+
+use super::wasm;
+
+macro_rules! optional_resource {
+    ($name:tt, $path:tt) => {
+        #[cfg(TREE_SITTER_EMBED_WASM_BINDING)]
+        fn $name(tree_sitter_dir: Option<&Path>) -> Cow<'static, [u8]> {
+            if let Some(tree_sitter_dir) = tree_sitter_dir {
+                Cow::Owned(fs::read(tree_sitter_dir.join($path)).unwrap())
+            } else {
+                Cow::Borrowed(include_bytes!(concat!("../../", $path)))
+            }
+        }
+
+        #[cfg(not(TREE_SITTER_EMBED_WASM_BINDING))]
+        fn $name(tree_sitter_dir: Option<&Path>) -> Cow<'static, [u8]> {
+            if let Some(tree_sitter_dir) = tree_sitter_dir {
+                Cow::Owned(fs::read(tree_sitter_dir.join($path)).unwrap())
+            } else {
+                Cow::Borrowed(&[])
+            }
+        }
+    };
+}
+
+optional_resource!(get_playground_js, "docs/src/assets/js/playground.js");
+optional_resource!(get_lib_js, "lib/binding_web/web-tree-sitter.js");
+optional_resource!(get_lib_wasm, "lib/binding_web/web-tree-sitter.wasm");
+
+fn get_main_html(tree_sitter_dir: Option<&Path>) -> Cow<'static, [u8]> {
+    tree_sitter_dir.map_or(
+        Cow::Borrowed(include_bytes!("playground.html")),
+        |tree_sitter_dir| {
+            Cow::Owned(fs::read(tree_sitter_dir.join("cli/src/playground.html")).unwrap())
+        },
+    )
+}
+
+pub fn serve(grammar_path: &Path, open_in_browser: bool) -> Result<()> {
+    let server = get_server()?;
+    let (grammar_name, language_wasm) = wasm::load_language_wasm_file(grammar_path)?;
+    let url = format!("http://{}", server.server_addr());
+    println!("Started playground on: {url}");
+    if open_in_browser && webbrowser::open(&url).is_err() {
+        eprintln!("Failed to open '{url}' in a web browser");
+    }
+
+    let tree_sitter_dir = env::var("TREE_SITTER_BASE_DIR").map(PathBuf::from).ok();
+    let main_html = str::from_utf8(&get_main_html(tree_sitter_dir.as_deref()))
+        .unwrap()
+        .replace("THE_LANGUAGE_NAME", &grammar_name)
+        .into_bytes();
+    let playground_js = get_playground_js(tree_sitter_dir.as_deref());
+    let lib_js = get_lib_js(tree_sitter_dir.as_deref());
+    let lib_wasm = get_lib_wasm(tree_sitter_dir.as_deref());
+
+    let html_header = Header::from_str("Content-Type: text/html").unwrap();
+    let js_header = Header::from_str("Content-Type: application/javascript").unwrap();
+    let wasm_header = Header::from_str("Content-Type: application/wasm").unwrap();
+
+    for request in server.incoming_requests() {
+        let res = match request.url() {
+            "/" => response(&main_html, &html_header),
+            "/tree-sitter-parser.wasm" => response(&language_wasm, &wasm_header),
+            "/playground.js" => {
+                if playground_js.is_empty() {
+                    redirect("https://tree-sitter.github.io/tree-sitter/assets/js/playground.js")
+                } else {
+                    response(&playground_js, &js_header)
+                }
+            }
+            "/web-tree-sitter.js" => {
+                if lib_js.is_empty() {
+                    redirect("https://tree-sitter.github.io/web-tree-sitter.js")
+                } else {
+                    response(&lib_js, &js_header)
+                }
+            }
+            "/web-tree-sitter.wasm" => {
+                if lib_wasm.is_empty() {
+                    redirect("https://tree-sitter.github.io/web-tree-sitter.wasm")
+                } else {
+                    response(&lib_wasm, &wasm_header)
+                }
+            }
+            _ => response(b"Not found", &html_header).with_status_code(404),
+        };
+        request
+            .respond(res)
+            .with_context(|| "Failed to write HTTP response")?;
+    }
+
+    Ok(())
+}
+
+fn redirect(url: &str) -> Response<&[u8]> {
+    Response::empty(302)
+        .with_data("".as_bytes(), Some(0))
+        .with_header(Header::from_bytes("Location", url.as_bytes()).unwrap())
+}
+
+fn response<'a>(data: &'a [u8], header: &Header) -> Response<&'a [u8]> {
+    Response::empty(200)
+        .with_data(data, Some(data.len()))
+        .with_header(header.clone())
+}
+
+fn get_server() -> Result<Server> {
+    let addr = env::var("TREE_SITTER_PLAYGROUND_ADDR").unwrap_or_else(|_| "127.0.0.1".to_owned());
+    let port = env::var("TREE_SITTER_PLAYGROUND_PORT")
+        .map(|v| {
+            v.parse::<u16>()
+                .with_context(|| "Invalid port specification")
+        })
+        .ok();
+    let listener = match port {
+        Some(port) => {
+            bind_to(&addr, port?).with_context(|| "Failed to bind to the specified port")?
+        }
+        None => get_listener_on_available_port(&addr)
+            .with_context(|| "Failed to find a free port to bind to it")?,
+    };
+    let server =
+        Server::from_listener(listener, None).map_err(|_| anyhow!("Failed to start web server"))?;
+    Ok(server)
+}
+
+fn get_listener_on_available_port(addr: &str) -> Option<TcpListener> {
+    (8000..12000).find_map(|port| bind_to(addr, port))
+}
+
+fn bind_to(addr: &str, port: u16) -> Option<TcpListener> {
+    TcpListener::bind(format!("{addr}:{port}")).ok()
+}
--- a/crates/cli/src/query.rs
+++ b/crates/cli/src/query.rs
@ -0,0 +1,148 @@
+use std::{
+    fs,
+    io::{self, Write},
+    ops::Range,
+    path::Path,
+    time::Instant,
+};
+
+use anstyle::AnsiColor;
+use anyhow::{Context, Result};
+use streaming_iterator::StreamingIterator;
+use tree_sitter::{Language, Parser, Point, Query, QueryCursor};
+
+use crate::{
+    query_testing::{self, to_utf8_point},
+    test::paint,
+};
+
+#[allow(clippy::too_many_arguments)]
+pub fn query_file_at_path(
+    language: &Language,
+    path: &Path,
+    name: &str,
+    query_path: &Path,
+    ordered_captures: bool,
+    byte_range: Option<Range<usize>>,
+    point_range: Option<Range<Point>>,
+    should_test: bool,
+    quiet: bool,
+    print_time: bool,
+    stdin: bool,
+) -> Result<()> {
+    let stdout = io::stdout();
+    let mut stdout = stdout.lock();
+
+    let query_source = fs::read_to_string(query_path)
+        .with_context(|| format!("Error reading query file {}", query_path.display()))?;
+    let query = Query::new(language, &query_source).with_context(|| "Query compilation failed")?;
+
+    let mut query_cursor = QueryCursor::new();
+    if let Some(range) = byte_range {
+        query_cursor.set_byte_range(range);
+    }
+    if let Some(range) = point_range {
+        query_cursor.set_point_range(range);
+    }
+
+    let mut parser = Parser::new();
+    parser.set_language(language)?;
+
+    let mut results = Vec::new();
+
+    if !should_test && !stdin {
+        writeln!(&mut stdout, "{name}")?;
+    }
+
+    let source_code =
+        fs::read(path).with_context(|| format!("Error reading source file {}", path.display()))?;
+    let tree = parser.parse(&source_code, None).unwrap();
+
+    let start = Instant::now();
+    if ordered_captures {
+        let mut captures = query_cursor.captures(&query, tree.root_node(), source_code.as_slice());
+        while let Some((mat, capture_index)) = captures.next() {
+            let capture = mat.captures[*capture_index];
+            let capture_name = &query.capture_names()[capture.index as usize];
+            if !quiet && !should_test {
+                writeln!(
+                        &mut stdout,
+                        "    pattern: {:>2}, capture: {} - {capture_name}, start: {}, end: {}, text: `{}`",
+                        mat.pattern_index,
+                        capture.index,
+                        capture.node.start_position(),
+                        capture.node.end_position(),
+                        capture.node.utf8_text(&source_code).unwrap_or("")
+                    )?;
+            }
+            results.push(query_testing::CaptureInfo {
+                name: (*capture_name).to_string(),
+                start: to_utf8_point(capture.node.start_position(), source_code.as_slice()),
+                end: to_utf8_point(capture.node.end_position(), source_code.as_slice()),
+            });
+        }
+    } else {
+        let mut matches = query_cursor.matches(&query, tree.root_node(), source_code.as_slice());
+        while let Some(m) = matches.next() {
+            if !quiet && !should_test {
+                writeln!(&mut stdout, "  pattern: {}", m.pattern_index)?;
+            }
+            for capture in m.captures {
+                let start = capture.node.start_position();
+                let end = capture.node.end_position();
+                let capture_name = &query.capture_names()[capture.index as usize];
+                if !quiet && !should_test {
+                    if end.row == start.row {
+                        writeln!(
+                                &mut stdout,
+                                "    capture: {} - {capture_name}, start: {start}, end: {end}, text: `{}`",
+                                capture.index,
+                                capture.node.utf8_text(&source_code).unwrap_or("")
+                            )?;
+                    } else {
+                        writeln!(
+                            &mut stdout,
+                            "    capture: {capture_name}, start: {start}, end: {end}",
+                        )?;
+                    }
+                }
+                results.push(query_testing::CaptureInfo {
+                    name: (*capture_name).to_string(),
+                    start: to_utf8_point(capture.node.start_position(), source_code.as_slice()),
+                    end: to_utf8_point(capture.node.end_position(), source_code.as_slice()),
+                });
+            }
+        }
+    }
+    if query_cursor.did_exceed_match_limit() {
+        writeln!(
+            &mut stdout,
+            "  WARNING: Query exceeded maximum number of in-progress captures!"
+        )?;
+    }
+    if should_test {
+        let path_name = if stdin {
+            "stdin"
+        } else {
+            Path::new(&path).file_name().unwrap().to_str().unwrap()
+        };
+        match query_testing::assert_expected_captures(&results, path, &mut parser, language) {
+            Ok(assertion_count) => {
+                println!(
+                    "  ✓ {} ({} assertions)",
+                    paint(Some(AnsiColor::Green), path_name),
+                    assertion_count
+                );
+            }
+            Err(e) => {
+                println!("  ✗ {}", paint(Some(AnsiColor::Red), path_name));
+                return Err(e);
+            }
+        }
+    }
+    if print_time {
+        writeln!(&mut stdout, "{:?}", start.elapsed())?;
+    }
+
+    Ok(())
+}
--- a/crates/cli/src/query_testing.rs
+++ b/crates/cli/src/query_testing.rs
@ -0,0 +1,254 @@
+use std::{fs, path::Path, sync::LazyLock};
+
+use anyhow::{anyhow, Result};
+use bstr::{BStr, ByteSlice};
+use regex::Regex;
+use tree_sitter::{Language, Parser, Point};
+
+static CAPTURE_NAME_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new("[\\w_\\-.]+").unwrap());
+
+#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash, PartialOrd, Ord)]
+pub struct Utf8Point {
+    pub row: usize,
+    pub column: usize,
+}
+
+impl std::fmt::Display for Utf8Point {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        write!(f, "({}, {})", self.row, self.column)
+    }
+}
+
+impl Utf8Point {
+    #[must_use]
+    pub const fn new(row: usize, column: usize) -> Self {
+        Self { row, column }
+    }
+}
+
+#[must_use]
+pub fn to_utf8_point(point: Point, source: &[u8]) -> Utf8Point {
+    if point.column == 0 {
+        return Utf8Point::new(point.row, 0);
+    }
+
+    let bstr = BStr::new(source);
+    let line = bstr.lines_with_terminator().nth(point.row).unwrap();
+    let mut utf8_column = 0;
+
+    for (_, grapheme_end, _) in line.grapheme_indices() {
+        utf8_column += 1;
+        if grapheme_end >= point.column {
+            break;
+        }
+    }
+
+    Utf8Point {
+        row: point.row,
+        column: utf8_column,
+    }
+}
+
+#[derive(Debug, Eq, PartialEq)]
+pub struct CaptureInfo {
+    pub name: String,
+    pub start: Utf8Point,
+    pub end: Utf8Point,
+}
+
+#[derive(Debug, PartialEq, Eq)]
+pub struct Assertion {
+    pub position: Utf8Point,
+    pub length: usize,
+    pub negative: bool,
+    pub expected_capture_name: String,
+}
+
+impl Assertion {
+    #[must_use]
+    pub const fn new(
+        row: usize,
+        col: usize,
+        length: usize,
+        negative: bool,
+        expected_capture_name: String,
+    ) -> Self {
+        Self {
+            position: Utf8Point::new(row, col),
+            length,
+            negative,
+            expected_capture_name,
+        }
+    }
+}
+
+/// Parse the given source code, finding all of the comments that contain
+/// highlighting assertions. Return a vector of (position, expected highlight name)
+/// pairs.
+pub fn parse_position_comments(
+    parser: &mut Parser,
+    language: &Language,
+    source: &[u8],
+) -> Result<Vec<Assertion>> {
+    let mut result = Vec::new();
+    let mut assertion_ranges = Vec::new();
+
+    // Parse the code.
+    parser.set_included_ranges(&[]).unwrap();
+    parser.set_language(language).unwrap();
+    let tree = parser.parse(source, None).unwrap();
+
+    // Walk the tree, finding comment nodes that contain assertions.
+    let mut ascending = false;
+    let mut cursor = tree.root_node().walk();
+    loop {
+        if ascending {
+            let node = cursor.node();
+
+            // Find every comment node.
+            if node.kind().to_lowercase().contains("comment") {
+                if let Ok(text) = node.utf8_text(source) {
+                    let mut position = node.start_position();
+                    if position.row > 0 {
+                        // Find the arrow character ("^" or "<-") in the comment. A left arrow
+                        // refers to the column where the comment node starts. An up arrow refers
+                        // to its own column.
+                        let mut has_left_caret = false;
+                        let mut has_arrow = false;
+                        let mut negative = false;
+                        let mut arrow_end = 0;
+                        let mut arrow_count = 1;
+                        for (i, c) in text.char_indices() {
+                            arrow_end = i + 1;
+                            if c == '-' && has_left_caret {
+                                has_arrow = true;
+                                break;
+                            }
+                            if c == '^' {
+                                has_arrow = true;
+                                position.column += i;
+                                // Continue counting remaining arrows and update their end column
+                                for (_, c) in text[arrow_end..].char_indices() {
+                                    if c != '^' {
+                                        arrow_end += arrow_count - 1;
+                                        break;
+                                    }
+                                    arrow_count += 1;
+                                }
+                                break;
+                            }
+                            has_left_caret = c == '<';
+                        }
+
+                        // find any ! after arrows but before capture name
+                        if has_arrow {
+                            for (i, c) in text[arrow_end..].char_indices() {
+                                if c == '!' {
+                                    negative = true;
+                                    arrow_end += i + 1;
+                                    break;
+                                } else if !c.is_whitespace() {
+                                    break;
+                                }
+                            }
+                        }
+
+                        // If the comment node contains an arrow and a highlight name, record the
+                        // highlight name and the position.
+                        if let (true, Some(mat)) =
+                            (has_arrow, CAPTURE_NAME_REGEX.find(&text[arrow_end..]))
+                        {
+                            assertion_ranges.push((node.start_position(), node.end_position()));
+                            result.push(Assertion {
+                                position: to_utf8_point(position, source),
+                                length: arrow_count,
+                                negative,
+                                expected_capture_name: mat.as_str().to_string(),
+                            });
+                        }
+                    }
+                }
+            }
+
+            // Continue walking the tree.
+            if cursor.goto_next_sibling() {
+                ascending = false;
+            } else if !cursor.goto_parent() {
+                break;
+            }
+        } else if !cursor.goto_first_child() {
+            ascending = true;
+        }
+    }
+
+    // Adjust the row number in each assertion's position to refer to the line of
+    // code *above* the assertion. There can be multiple lines of assertion comments and empty
+    // lines, so the positions may have to be decremented by more than one row.
+    let mut i = 0;
+    let lines = source.lines_with_terminator().collect::<Vec<_>>();
+    for assertion in &mut result {
+        let original_position = assertion.position;
+        loop {
+            let on_assertion_line = assertion_ranges[i..]
+                .iter()
+                .any(|(start, _)| start.row == assertion.position.row);
+            let on_empty_line = lines[assertion.position.row].len() <= assertion.position.column;
+            if on_assertion_line || on_empty_line {
+                if assertion.position.row > 0 {
+                    assertion.position.row -= 1;
+                } else {
+                    return Err(anyhow!(
+                        "Error: could not find a line that corresponds to the assertion `{}` located at {original_position}",
+                        assertion.expected_capture_name
+                    ));
+                }
+            } else {
+                while i < assertion_ranges.len()
+                    && assertion_ranges[i].0.row < assertion.position.row
+                {
+                    i += 1;
+                }
+                break;
+            }
+        }
+    }
+
+    // The assertions can end up out of order due to the line adjustments.
+    result.sort_unstable_by_key(|a| a.position);
+
+    Ok(result)
+}
+
+pub fn assert_expected_captures(
+    infos: &[CaptureInfo],
+    path: &Path,
+    parser: &mut Parser,
+    language: &Language,
+) -> Result<usize> {
+    let contents = fs::read_to_string(path)?;
+    let pairs = parse_position_comments(parser, language, contents.as_bytes())?;
+    for assertion in &pairs {
+        if let Some(found) = &infos.iter().find(|p| {
+            assertion.position >= p.start
+                && (assertion.position.row < p.end.row
+                    || assertion.position.column + assertion.length - 1 < p.end.column)
+        }) {
+            if assertion.expected_capture_name != found.name && found.name != "name" {
+                return Err(anyhow!(
+                    "Assertion failed: at {}, found {}, expected {}",
+                    found.start,
+                    found.name,
+                    assertion.expected_capture_name,
+                ));
+            }
+        } else {
+            return Err(anyhow!(
+                "Assertion failed: could not match {} at row {}, column {}",
+                assertion.expected_capture_name,
+                assertion.position.row,
+                assertion.position.column + assertion.length - 1,
+            ));
+        }
+    }
+    Ok(pairs.len())
+}
--- a/crates/cli/src/tags.rs
+++ b/crates/cli/src/tags.rs
@ -0,0 +1,78 @@
+use std::{
+    fs,
+    io::{self, Write},
+    path::Path,
+    str,
+    sync::{atomic::AtomicUsize, Arc},
+    time::Instant,
+};
+
+use anyhow::Result;
+use tree_sitter_tags::{TagsConfiguration, TagsContext};
+
+pub struct TagsOptions {
+    pub scope: Option<String>,
+    pub quiet: bool,
+    pub print_time: bool,
+    pub cancellation_flag: Arc<AtomicUsize>,
+}
+
+pub fn generate_tags(
+    path: &Path,
+    name: &str,
+    config: &TagsConfiguration,
+    indent: bool,
+    opts: &TagsOptions,
+) -> Result<()> {
+    let mut context = TagsContext::new();
+    let stdout = io::stdout();
+    let mut stdout = stdout.lock();
+
+    let indent_str = if indent {
+        if !opts.quiet {
+            writeln!(&mut stdout, "{name}")?;
+        }
+        "\t"
+    } else {
+        ""
+    };
+
+    let source = fs::read(path)?;
+    let start = Instant::now();
+    for tag in context
+        .generate_tags(config, &source, Some(&opts.cancellation_flag))?
+        .0
+    {
+        let tag = tag?;
+        if !opts.quiet {
+            write!(
+                &mut stdout,
+                "{indent_str}{:<10}\t | {:<8}\t{} {} - {} `{}`",
+                str::from_utf8(&source[tag.name_range]).unwrap_or(""),
+                &config.syntax_type_name(tag.syntax_type_id),
+                if tag.is_definition { "def" } else { "ref" },
+                tag.span.start,
+                tag.span.end,
+                str::from_utf8(&source[tag.line_range]).unwrap_or(""),
+            )?;
+            if let Some(docs) = tag.docs {
+                if docs.len() > 120 {
+                    write!(&mut stdout, "\t{:?}...", docs.get(0..120).unwrap_or(""))?;
+                } else {
+                    write!(&mut stdout, "\t{:?}", &docs)?;
+                }
+            }
+            writeln!(&mut stdout)?;
+        }
+    }
+
+    if opts.print_time {
+        writeln!(
+            &mut stdout,
+            "{indent_str}time: {}ms",
+            start.elapsed().as_millis(),
+        )?;
+    }
+
+    Ok(())
+}
--- a/crates/cli/src/templates/.editorconfig
+++ b/crates/cli/src/templates/.editorconfig
@ -0,0 +1,46 @@
+root = true
+
+[*]
+charset = utf-8
+
+[*.{json,toml,yml,gyp}]
+indent_style = space
+indent_size = 2
+
+[*.js]
+indent_style = space
+indent_size = 2
+
+[*.scm]
+indent_style = space
+indent_size = 2
+
+[*.{c,cc,h}]
+indent_style = space
+indent_size = 4
+
+[*.rs]
+indent_style = space
+indent_size = 4
+
+[*.{py,pyi}]
+indent_style = space
+indent_size = 4
+
+[*.swift]
+indent_style = space
+indent_size = 4
+
+[*.go]
+indent_style = tab
+indent_size = 8
+
+[Makefile]
+indent_style = tab
+indent_size = 8
+
+[parser.c]
+indent_size = 2
+
+[{alloc,array,parser}.h]
+indent_size = 2
--- a/crates/cli/src/templates/PARSER_NAME.h
+++ b/crates/cli/src/templates/PARSER_NAME.h
@ -0,0 +1,16 @@
+#ifndef TREE_SITTER_UPPER_PARSER_NAME_H_
+#define TREE_SITTER_UPPER_PARSER_NAME_H_
+
+typedef struct TSLanguage TSLanguage;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+const TSLanguage *tree_sitter_PARSER_NAME(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // TREE_SITTER_UPPER_PARSER_NAME_H_
--- a/crates/cli/src/templates/PARSER_NAME.pc.in
+++ b/crates/cli/src/templates/PARSER_NAME.pc.in
@ -0,0 +1,10 @@
+prefix=@CMAKE_INSTALL_PREFIX@
+libdir=${prefix}/@CMAKE_INSTALL_LIBDIR@
+includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@
+
+Name: tree-sitter-PARSER_NAME
+Description: @PROJECT_DESCRIPTION@
+URL: @PROJECT_HOMEPAGE_URL@
+Version: @PROJECT_VERSION@
+Libs: -L${libdir} -ltree-sitter-PARSER_NAME
+Cflags: -I${includedir}
--- a/crates/cli/src/templates/init.py
+++ b/crates/cli/src/templates/init.py
@ -0,0 +1,42 @@
+"""PARSER_DESCRIPTION"""
+
+from importlib.resources import files as _files
+
+from ._binding import language
+
+
+def _get_query(name, file):
+    query = _files(f"{__package__}.queries") / file
+    globals()[name] = query.read_text()
+    return globals()[name]
+
+
+def __getattr__(name):
+    # NOTE: uncomment these to include any queries that this grammar contains:
+
+    # if name == "HIGHLIGHTS_QUERY":
+    #     return _get_query("HIGHLIGHTS_QUERY", "highlights.scm")
+    # if name == "INJECTIONS_QUERY":
+    #     return _get_query("INJECTIONS_QUERY", "injections.scm")
+    # if name == "LOCALS_QUERY":
+    #     return _get_query("LOCALS_QUERY", "locals.scm")
+    # if name == "TAGS_QUERY":
+    #     return _get_query("TAGS_QUERY", "tags.scm")
+
+    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
+
+
+__all__ = [
+    "language",
+    # "HIGHLIGHTS_QUERY",
+    # "INJECTIONS_QUERY",
+    # "LOCALS_QUERY",
+    # "TAGS_QUERY",
+]
+
+
+def __dir__():
+    return sorted(__all__ + [
+        "__all__", "__builtins__", "__cached__", "__doc__", "__file__",
+        "__loader__", "__name__", "__package__", "__path__", "__spec__",
+    ])
--- a/crates/cli/src/templates/init.pyi
+++ b/crates/cli/src/templates/init.pyi
@ -0,0 +1,10 @@
+from typing import Final
+
+# NOTE: uncomment these to include any queries that this grammar contains:
+
+# HIGHLIGHTS_QUERY: Final[str]
+# INJECTIONS_QUERY: Final[str]
+# LOCALS_QUERY: Final[str]
+# TAGS_QUERY: Final[str]
+
+def language() -> object: ...
--- a/crates/cli/src/templates/_cargo.toml
+++ b/crates/cli/src/templates/_cargo.toml
@ -0,0 +1,34 @@
+[package]
+name = "tree-sitter-PARSER_NAME"
+description = "PARSER_DESCRIPTION"
+version = "PARSER_VERSION"
+authors = ["PARSER_AUTHOR_NAME PARSER_AUTHOR_EMAIL"]
+license = "PARSER_LICENSE"
+readme = "README.md"
+keywords = ["incremental", "parsing", "tree-sitter", "PARSER_NAME"]
+categories = ["parser-implementations", "parsing", "text-editors"]
+repository = "PARSER_URL"
+edition = "2021"
+autoexamples = false
+
+build = "bindings/rust/build.rs"
+include = [
+  "bindings/rust/*",
+  "grammar.js",
+  "queries/*",
+  "src/*",
+  "tree-sitter.json",
+  "LICENSE",
+]
+
+[lib]
+path = "bindings/rust/lib.rs"
+
+[dependencies]
+tree-sitter-language = "0.1"
+
+[build-dependencies]
+cc = "1.2"
+
+[dev-dependencies]
+tree-sitter = "RUST_BINDING_VERSION"
--- a/crates/cli/src/templates/binding.go
+++ b/crates/cli/src/templates/binding.go
@ -0,0 +1,15 @@
+package tree_sitter_LOWER_PARSER_NAME
+
+// #cgo CFLAGS: -std=c11 -fPIC
+// #include "../../src/parser.c"
+// #if __has_include("../../src/scanner.c")
+// #include "../../src/scanner.c"
+// #endif
+import "C"
+
+import "unsafe"
+
+// Get the tree-sitter Language for this grammar.
+func Language() unsafe.Pointer {
+	return unsafe.Pointer(C.tree_sitter_LOWER_PARSER_NAME())
+}
--- a/crates/cli/src/templates/binding.gyp
+++ b/crates/cli/src/templates/binding.gyp
@ -0,0 +1,35 @@
+{
+  "targets": [
+    {
+      "target_name": "tree_sitter_PARSER_NAME_binding",
+      "dependencies": [
+        "<!(node -p \"require('node-addon-api').targets\"):node_addon_api_except",
+      ],
+      "include_dirs": [
+        "src",
+      ],
+      "sources": [
+        "bindings/node/binding.cc",
+        "src/parser.c",
+      ],
+      "variables": {
+        "has_scanner": "<!(node -p \"fs.existsSync('src/scanner.c')\")"
+      },
+      "conditions": [
+        ["has_scanner=='true'", {
+          "sources+": ["src/scanner.c"],
+        }],
+        ["OS!='win'", {
+          "cflags_c": [
+            "-std=c11",
+          ],
+        }, { # OS == "win"
+          "cflags_c": [
+            "/std:c11",
+            "/utf-8",
+          ],
+        }],
+      ],
+    }
+  ]
+}
--- a/crates/cli/src/templates/binding_test.go
+++ b/crates/cli/src/templates/binding_test.go
@ -0,0 +1,15 @@
+package tree_sitter_LOWER_PARSER_NAME_test
+
+import (
+	"testing"
+
+	tree_sitter "github.com/tree-sitter/go-tree-sitter"
+	tree_sitter_LOWER_PARSER_NAME "PARSER_URL_STRIPPED/bindings/go"
+)
+
+func TestCanLoadGrammar(t *testing.T) {
+	language := tree_sitter.NewLanguage(tree_sitter_LOWER_PARSER_NAME.Language())
+	if language == nil {
+		t.Errorf("Error loading TITLE_PARSER_NAME grammar")
+	}
+}
--- a/crates/cli/src/templates/binding_test.js
+++ b/crates/cli/src/templates/binding_test.js
@ -0,0 +1,9 @@
+const assert = require("node:assert");
+const { test } = require("node:test");
+
+const Parser = require("tree-sitter");
+
+test("can load grammar", () => {
+  const parser = new Parser();
+  assert.doesNotThrow(() => parser.setLanguage(require(".")));
+});
--- a/crates/cli/src/templates/build.rs
+++ b/crates/cli/src/templates/build.rs
@ -0,0 +1,21 @@
+fn main() {
+    let src_dir = std::path::Path::new("src");
+
+    let mut c_config = cc::Build::new();
+    c_config.std("c11").include(src_dir);
+
+    #[cfg(target_env = "msvc")]
+    c_config.flag("-utf-8");
+
+    let parser_path = src_dir.join("parser.c");
+    c_config.file(&parser_path);
+    println!("cargo:rerun-if-changed={}", parser_path.to_str().unwrap());
+
+    let scanner_path = src_dir.join("scanner.c");
+    if scanner_path.exists() {
+        c_config.file(&scanner_path);
+        println!("cargo:rerun-if-changed={}", scanner_path.to_str().unwrap());
+    }
+
+    c_config.compile("tree-sitter-KEBAB_PARSER_NAME");
+}
--- a/crates/cli/src/templates/build.zig
+++ b/crates/cli/src/templates/build.zig
@ -0,0 +1,79 @@
+const std = @import("std");
+
+pub fn build(b: *std.Build) !void {
+    const target = b.standardTargetOptions(.{});
+    const optimize = b.standardOptimizeOption(.{});
+
+    const shared = b.option(bool, "build-shared", "Build a shared library") orelse true;
+    const reuse_alloc = b.option(bool, "reuse-allocator", "Reuse the library allocator") orelse false;
+
+    const lib: *std.Build.Step.Compile = if (shared) b.addSharedLibrary(.{
+        .name = "tree-sitter-PARSER_NAME",
+        .pic = true,
+        .target = target,
+        .optimize = optimize,
+        .link_libc = true,
+    }) else b.addStaticLibrary(.{
+        .name = "tree-sitter-PARSER_NAME",
+        .target = target,
+        .optimize = optimize,
+        .link_libc = true,
+    });
+
+    lib.addCSourceFile(.{
+        .file = b.path("src/parser.c"),
+        .flags = &.{"-std=c11"},
+    });
+    if (hasScanner(b.build_root.handle)) {
+        lib.addCSourceFile(.{
+            .file = b.path("src/scanner.c"),
+            .flags = &.{"-std=c11"},
+        });
+    }
+
+    if (reuse_alloc) {
+        lib.root_module.addCMacro("TREE_SITTER_REUSE_ALLOCATOR", "");
+    }
+    if (optimize == .Debug) {
+        lib.root_module.addCMacro("TREE_SITTER_DEBUG", "");
+    }
+
+    lib.addIncludePath(b.path("src"));
+
+    b.installArtifact(lib);
+    b.installFile("src/node-types.json", "node-types.json");
+    b.installDirectory(.{ .source_dir = b.path("queries"), .install_dir = .prefix, .install_subdir = "queries", .include_extensions = &.{"scm"} });
+
+    const module = b.addModule("tree-sitter-PARSER_NAME", .{
+        .root_source_file = b.path("bindings/zig/root.zig"),
+        .target = target,
+        .optimize = optimize,
+    });
+    module.linkLibrary(lib);
+
+    const ts_dep = b.dependency("tree-sitter", .{});
+    const ts_mod = ts_dep.module("tree-sitter");
+    module.addImport("tree-sitter", ts_mod);
+
+    // ╭─────────────────╮
+    // │      Tests      │
+    // ╰─────────────────╯
+
+    const tests = b.addTest(.{
+        .root_source_file = b.path("bindings/zig/root.zig"),
+        .target = target,
+        .optimize = optimize,
+    });
+    tests.linkLibrary(lib);
+    tests.root_module.addImport("tree-sitter", ts_mod);
+
+    const run_tests = b.addRunArtifact(tests);
+
+    const test_step = b.step("test", "Run unit tests");
+    test_step.dependOn(&run_tests.step);
+}
+
+inline fn hasScanner(dir: std.fs.Dir) bool {
+    dir.access("src/scanner.c", .{}) catch return false;
+    return true;
+}
--- a/crates/cli/src/templates/build.zig.zon
+++ b/crates/cli/src/templates/build.zig.zon
@ -0,0 +1,17 @@
+.{
+    .name = "tree-sitter-PARSER_NAME",
+    .version = "PARSER_VERSION",
+    .dependencies = .{ .@"tree-sitter" = .{
+        .url = "https://github.com/tree-sitter/zig-tree-sitter/archive/refs/tags/v0.25.0.tar.gz",
+        .hash = "12201a8d5e840678bbbf5128e605519c4024af422295d68e2ba2090e675328e5811d",
+    } },
+    .paths = .{
+        "build.zig",
+        "build.zig.zon",
+        "bindings/zig",
+        "src",
+        "queries",
+        "LICENSE",
+        "README.md",
+    },
+}
--- a/crates/cli/src/templates/cmakelists.cmake
+++ b/crates/cli/src/templates/cmakelists.cmake
@ -0,0 +1,66 @@
+cmake_minimum_required(VERSION 3.13)
+
+project(tree-sitter-KEBAB_PARSER_NAME
+        VERSION "PARSER_VERSION"
+        DESCRIPTION "PARSER_DESCRIPTION"
+        HOMEPAGE_URL "PARSER_URL"
+        LANGUAGES C)
+
+option(BUILD_SHARED_LIBS "Build using shared libraries" ON)
+option(TREE_SITTER_REUSE_ALLOCATOR "Reuse the library allocator" OFF)
+
+set(TREE_SITTER_ABI_VERSION ABI_VERSION_MAX CACHE STRING "Tree-sitter ABI version")
+if(NOT ${TREE_SITTER_ABI_VERSION} MATCHES "^[0-9]+$")
+    unset(TREE_SITTER_ABI_VERSION CACHE)
+    message(FATAL_ERROR "TREE_SITTER_ABI_VERSION must be an integer")
+endif()
+
+include(GNUInstallDirs)
+
+find_program(TREE_SITTER_CLI tree-sitter DOC "Tree-sitter CLI")
+
+add_custom_command(OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/src/parser.c"
+                   DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/src/grammar.json"
+                   COMMAND "${TREE_SITTER_CLI}" generate src/grammar.json
+                            --abi=${TREE_SITTER_ABI_VERSION}
+                   WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
+                   COMMENT "Generating parser.c")
+
+add_library(tree-sitter-KEBAB_PARSER_NAME src/parser.c)
+if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/src/scanner.c)
+  target_sources(tree-sitter-KEBAB_PARSER_NAME PRIVATE src/scanner.c)
+endif()
+target_include_directories(tree-sitter-KEBAB_PARSER_NAME
+                           PRIVATE src
+                           INTERFACE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/bindings/c>
+                                     $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>)
+
+target_compile_definitions(tree-sitter-KEBAB_PARSER_NAME PRIVATE
+                           $<$<BOOL:${TREE_SITTER_REUSE_ALLOCATOR}>:TREE_SITTER_REUSE_ALLOCATOR>
+                           $<$<CONFIG:Debug>:TREE_SITTER_DEBUG>)
+
+set_target_properties(tree-sitter-KEBAB_PARSER_NAME
+                      PROPERTIES
+                      C_STANDARD 11
+                      POSITION_INDEPENDENT_CODE ON
+                      SOVERSION "${TREE_SITTER_ABI_VERSION}.${PROJECT_VERSION_MAJOR}"
+                      DEFINE_SYMBOL "")
+
+configure_file(bindings/c/tree-sitter-KEBAB_PARSER_NAME.pc.in
+               "${CMAKE_CURRENT_BINARY_DIR}/tree-sitter-KEBAB_PARSER_NAME.pc" @ONLY)
+
+install(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/bindings/c/tree_sitter"
+        DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}"
+        FILES_MATCHING PATTERN "*.h")
+install(FILES "${CMAKE_CURRENT_BINARY_DIR}/tree-sitter-KEBAB_PARSER_NAME.pc"
+        DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig")
+install(TARGETS tree-sitter-KEBAB_PARSER_NAME
+        LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}")
+
+file(GLOB QUERIES queries/*.scm)
+install(FILES ${QUERIES}
+        DESTINATION "${CMAKE_INSTALL_DATADIR}/tree-sitter/queries/KEBAB_PARSER_NAME")
+
+add_custom_target(ts-test "${TREE_SITTER_CLI}" test
+                  WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
+                  COMMENT "tree-sitter test")
--- a/crates/cli/src/templates/gitattributes
+++ b/crates/cli/src/templates/gitattributes
@ -0,0 +1,41 @@
+* text=auto eol=lf
+
+# Generated source files
+src/*.json linguist-generated
+src/parser.c linguist-generated
+src/tree_sitter/* linguist-generated
+
+# C bindings
+bindings/c/** linguist-generated
+CMakeLists.txt linguist-generated
+Makefile linguist-generated
+
+# Rust bindings
+bindings/rust/* linguist-generated
+Cargo.toml linguist-generated
+Cargo.lock linguist-generated
+
+# Node.js bindings
+bindings/node/* linguist-generated
+binding.gyp linguist-generated
+package.json linguist-generated
+package-lock.json linguist-generated
+
+# Python bindings
+bindings/python/** linguist-generated
+setup.py linguist-generated
+pyproject.toml linguist-generated
+
+# Go bindings
+bindings/go/* linguist-generated
+go.mod linguist-generated
+go.sum linguist-generated
+
+# Swift bindings
+bindings/swift/** linguist-generated
+Package.swift linguist-generated
+Package.resolved linguist-generated
+
+# Zig bindings
+build.zig linguist-generated
+build.zig.zon linguist-generated
--- a/crates/cli/src/templates/gitignore
+++ b/crates/cli/src/templates/gitignore
@ -0,0 +1,50 @@
+# Rust artifacts
+target/
+Cargo.lock
+
+# Node artifacts
+build/
+prebuilds/
+node_modules/
+package-lock.json
+
+# Swift artifacts
+.build/
+Package.resolved
+
+# Go artifacts
+_obj/
+
+# Python artifacts
+.venv/
+dist/
+*.egg-info
+*.whl
+
+# C artifacts
+*.a
+*.so
+*.so.*
+*.dylib
+*.dll
+*.pc
+*.exp
+*.lib
+
+# Zig artifacts
+.zig-cache/
+zig-cache/
+zig-out/
+
+# Example dirs
+/examples/*/
+
+# Grammar volatiles
+*.wasm
+*.obj
+*.o
+
+# Archives
+*.tar.gz
+*.tgz
+*.zip
--- a/crates/cli/src/templates/go.mod
+++ b/crates/cli/src/templates/go.mod
@ -0,0 +1,5 @@
+module PARSER_URL_STRIPPED
+
+go 1.22
+
+require github.com/tree-sitter/go-tree-sitter v0.24.0
--- a/crates/cli/src/templates/grammar.js
+++ b/crates/cli/src/templates/grammar.js
@ -0,0 +1,17 @@
+/**
+ * @file PARSER_DESCRIPTION
+ * @author PARSER_AUTHOR_NAME PARSER_AUTHOR_EMAIL
+ * @license PARSER_LICENSE
+ */
+
+/// <reference types="tree-sitter-cli/dsl" />
+// @ts-check
+
+module.exports = grammar({
+  name: "LOWER_PARSER_NAME",
+
+  rules: {
+    // TODO: add the actual grammar rules
+    source_file: $ => "hello"
+  }
+});
--- a/crates/cli/src/templates/index.d.ts
+++ b/crates/cli/src/templates/index.d.ts
@ -0,0 +1,27 @@
+type BaseNode = {
+  type: string;
+  named: boolean;
+};
+
+type ChildNode = {
+  multiple: boolean;
+  required: boolean;
+  types: BaseNode[];
+};
+
+type NodeInfo =
+  | (BaseNode & {
+      subtypes: BaseNode[];
+    })
+  | (BaseNode & {
+      fields: { [name: string]: ChildNode };
+      children: ChildNode[];
+    });
+
+type Language = {
+  language: unknown;
+  nodeTypeInfo: NodeInfo[];
+};
+
+declare const language: Language;
+export = language;
--- a/crates/cli/src/templates/index.js
+++ b/crates/cli/src/templates/index.js
@ -0,0 +1,11 @@
+const root = require("path").join(__dirname, "..", "..");
+
+module.exports =
+  typeof process.versions.bun === "string"
+    // Support `bun build --compile` by being statically analyzable enough to find the .node file at build-time
+    ? require(`../../prebuilds/${process.platform}-${process.arch}/tree-sitter-KEBAB_PARSER_NAME.node`)
+    : require("node-gyp-build")(root);
+
+try {
+  module.exports.nodeTypeInfo = require("../../src/node-types.json");
+} catch (_) {}
--- a/crates/cli/src/templates/js-binding.cc
+++ b/crates/cli/src/templates/js-binding.cc
@ -0,0 +1,19 @@
+#include <napi.h>
+
+typedef struct TSLanguage TSLanguage;
+
+extern "C" TSLanguage *tree_sitter_PARSER_NAME();
+
+// "tree-sitter", "language" hashed with BLAKE2
+const napi_type_tag LANGUAGE_TYPE_TAG = {
+    0x8AF2E5212AD58ABF, 0xD5006CAD83ABBA16
+};
+
+Napi::Object Init(Napi::Env env, Napi::Object exports) {
+    auto language = Napi::External<TSLanguage>::New(env, tree_sitter_PARSER_NAME());
+    language.TypeTag(&LANGUAGE_TYPE_TAG);
+    exports["language"] = language;
+    return exports;
+}
+
+NODE_API_MODULE(tree_sitter_PARSER_NAME_binding, Init)
--- a/crates/cli/src/templates/lib.rs
+++ b/crates/cli/src/templates/lib.rs
@ -0,0 +1,51 @@
+//! This crate provides CAMEL_PARSER_NAME language support for the [tree-sitter] parsing library.
+//!
+//! Typically, you will use the [`LANGUAGE`] constant to add this language to a
+//! tree-sitter [`Parser`], and then use the parser to parse some code:
+//!
+//! ```
+//! let code = r#"
+//! "#;
+//! let mut parser = tree_sitter::Parser::new();
+//! let language = tree_sitter_PARSER_NAME::LANGUAGE;
+//! parser
+//!     .set_language(&language.into())
+//!     .expect("Error loading TITLE_PARSER_NAME parser");
+//! let tree = parser.parse(code, None).unwrap();
+//! assert!(!tree.root_node().has_error());
+//! ```
+//!
+//! [`Parser`]: https://docs.rs/tree-sitter/RUST_BINDING_VERSION/tree_sitter/struct.Parser.html
+//! [tree-sitter]: https://tree-sitter.github.io/
+
+use tree_sitter_language::LanguageFn;
+
+extern "C" {
+    fn tree_sitter_PARSER_NAME() -> *const ();
+}
+
+/// The tree-sitter [`LanguageFn`] for this grammar.
+pub const LANGUAGE: LanguageFn = unsafe { LanguageFn::from_raw(tree_sitter_PARSER_NAME) };
+
+/// The content of the [`node-types.json`] file for this grammar.
+///
+/// [`node-types.json`]: https://tree-sitter.github.io/tree-sitter/using-parsers/6-static-node-types
+pub const NODE_TYPES: &str = include_str!("../../src/node-types.json");
+
+// NOTE: uncomment these to include any queries that this grammar contains:
+
+// pub const HIGHLIGHTS_QUERY: &str = include_str!("../../queries/highlights.scm");
+// pub const INJECTIONS_QUERY: &str = include_str!("../../queries/injections.scm");
+// pub const LOCALS_QUERY: &str = include_str!("../../queries/locals.scm");
+// pub const TAGS_QUERY: &str = include_str!("../../queries/tags.scm");
+
+#[cfg(test)]
+mod tests {
+    #[test]
+    fn test_can_load_grammar() {
+        let mut parser = tree_sitter::Parser::new();
+        parser
+            .set_language(&super::LANGUAGE.into())
+            .expect("Error loading TITLE_PARSER_NAME parser");
+    }
+}
--- a/crates/cli/src/templates/makefile
+++ b/crates/cli/src/templates/makefile
@ -0,0 +1,99 @@
+LANGUAGE_NAME := tree-sitter-KEBAB_PARSER_NAME
+HOMEPAGE_URL := PARSER_URL
+VERSION := PARSER_VERSION
+
+# repository
+SRC_DIR := src
+
+TS ?= tree-sitter
+
+# install directory layout
+PREFIX ?= /usr/local
+DATADIR ?= $(PREFIX)/share
+INCLUDEDIR ?= $(PREFIX)/include
+LIBDIR ?= $(PREFIX)/lib
+PCLIBDIR ?= $(LIBDIR)/pkgconfig
+
+# source/object files
+PARSER := $(SRC_DIR)/parser.c
+EXTRAS := $(filter-out $(PARSER),$(wildcard $(SRC_DIR)/*.c))
+OBJS := $(patsubst %.c,%.o,$(PARSER) $(EXTRAS))
+
+# flags
+ARFLAGS ?= rcs
+override CFLAGS += -I$(SRC_DIR) -std=c11 -fPIC
+
+# ABI versioning
+SONAME_MAJOR = $(shell sed -n 's/\#define LANGUAGE_VERSION //p' $(PARSER))
+SONAME_MINOR = $(word 1,$(subst ., ,$(VERSION)))
+
+# OS-specific bits
+ifeq ($(shell uname),Darwin)
+	SOEXT = dylib
+	SOEXTVER_MAJOR = $(SONAME_MAJOR).$(SOEXT)
+	SOEXTVER = $(SONAME_MAJOR).$(SONAME_MINOR).$(SOEXT)
+	LINKSHARED = -dynamiclib -Wl,-install_name,$(LIBDIR)/lib$(LANGUAGE_NAME).$(SOEXTVER),-rpath,@executable_path/../Frameworks
+else ifneq ($(findstring mingw32,$(shell $(CC) -dumpmachine)),)
+	SOEXT = dll
+	LINKSHARED += -s -shared -Wl,--out-implib,$(@:dll=lib)
+lib$(LANGUAGE_NAME).lib: lib$(LANGUAGE_NAME).$(SOEXT)
+else
+	SOEXT = so
+	SOEXTVER_MAJOR = $(SOEXT).$(SONAME_MAJOR)
+	SOEXTVER = $(SOEXT).$(SONAME_MAJOR).$(SONAME_MINOR)
+	LINKSHARED = -shared -Wl,-soname,lib$(LANGUAGE_NAME).$(SOEXTVER)
+endif
+ifneq ($(filter $(shell uname),FreeBSD NetBSD DragonFly),)
+	PCLIBDIR := $(PREFIX)/libdata/pkgconfig
+endif
+
+all: lib$(LANGUAGE_NAME).a lib$(LANGUAGE_NAME).$(SOEXT) $(LANGUAGE_NAME).pc
+
+lib$(LANGUAGE_NAME).a: $(OBJS)
+	$(AR) $(ARFLAGS) $@ $^
+
+lib$(LANGUAGE_NAME).$(SOEXT): $(OBJS)
+	$(CC) $(LDFLAGS) $(LINKSHARED) $^ $(LDLIBS) -o $@
+ifneq ($(STRIP),)
+	$(STRIP) $@
+endif
+
+$(LANGUAGE_NAME).pc: bindings/c/$(LANGUAGE_NAME).pc.in
+	sed -e 's|@PROJECT_VERSION@|$(VERSION)|' \
+		-e 's|@CMAKE_INSTALL_LIBDIR@|$(LIBDIR:$(PREFIX)/%=%)|' \
+		-e 's|@CMAKE_INSTALL_INCLUDEDIR@|$(INCLUDEDIR:$(PREFIX)/%=%)|' \
+		-e 's|@PROJECT_DESCRIPTION@|$(DESCRIPTION)|' \
+		-e 's|@PROJECT_HOMEPAGE_URL@|$(HOMEPAGE_URL)|' \
+		-e 's|@CMAKE_INSTALL_PREFIX@|$(PREFIX)|' $< > $@
+
+$(PARSER): $(SRC_DIR)/grammar.json
+	$(TS) generate $^
+
+install: all
+	install -d '$(DESTDIR)$(DATADIR)'/tree-sitter/queries/KEBAB_PARSER_NAME '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter '$(DESTDIR)$(PCLIBDIR)' '$(DESTDIR)$(LIBDIR)'
+	install -m644 bindings/c/tree_sitter/$(LANGUAGE_NAME).h '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter/$(LANGUAGE_NAME).h
+	install -m644 $(LANGUAGE_NAME).pc '$(DESTDIR)$(PCLIBDIR)'/$(LANGUAGE_NAME).pc
+	install -m644 lib$(LANGUAGE_NAME).a '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).a
+	install -m755 lib$(LANGUAGE_NAME).$(SOEXT) '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXTVER)
+	ln -sf lib$(LANGUAGE_NAME).$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXTVER_MAJOR)
+	ln -sf lib$(LANGUAGE_NAME).$(SOEXTVER_MAJOR) '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXT)
+ifneq ($(wildcard queries/*.scm),)
+	install -m644 queries/*.scm '$(DESTDIR)$(DATADIR)'/tree-sitter/queries/KEBAB_PARSER_NAME
+endif
+
+uninstall:
+	$(RM) '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).a \
+		'$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXTVER) \
+		'$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXTVER_MAJOR) \
+		'$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXT) \
+		'$(DESTDIR)$(INCLUDEDIR)'/tree_sitter/$(LANGUAGE_NAME).h \
+		'$(DESTDIR)$(PCLIBDIR)'/$(LANGUAGE_NAME).pc
+	$(RM) -r '$(DESTDIR)$(DATADIR)'/tree-sitter/queries/KEBAB_PARSER_NAME
+
+clean:
+	$(RM) $(OBJS) $(LANGUAGE_NAME).pc lib$(LANGUAGE_NAME).a lib$(LANGUAGE_NAME).$(SOEXT) lib$(LANGUAGE_NAME).lib
+
+test:
+	$(TS) test
+
+.PHONY: all install uninstall clean test
--- a/crates/cli/src/templates/package.json
+++ b/crates/cli/src/templates/package.json
@ -0,0 +1,53 @@
+{
+  "name": "tree-sitter-PARSER_NAME",
+  "version": "PARSER_VERSION",
+  "description": "PARSER_DESCRIPTION",
+  "repository": "PARSER_URL",
+  "funding": "FUNDING_URL",
+  "license": "PARSER_LICENSE",
+  "author": {
+    "name": "PARSER_AUTHOR_NAME",
+    "email": "PARSER_AUTHOR_EMAIL",
+    "url": "PARSER_AUTHOR_URL"
+  },
+  "main": "bindings/node",
+  "types": "bindings/node",
+  "keywords": [
+    "incremental",
+    "parsing",
+    "tree-sitter",
+    "LOWER_PARSER_NAME"
+  ],
+  "files": [
+    "grammar.js",
+    "tree-sitter.json",
+    "binding.gyp",
+    "prebuilds/**",
+    "bindings/node/*",
+    "queries/*",
+    "src/**",
+    "*.wasm"
+  ],
+  "dependencies": {
+    "node-addon-api": "^8.2.1",
+    "node-gyp-build": "^4.8.2"
+  },
+  "devDependencies": {
+    "prebuildify": "^6.0.1",
+    "tree-sitter-cli": "^CLI_VERSION"
+  },
+  "peerDependencies": {
+    "tree-sitter": "^0.21.1"
+  },
+  "peerDependenciesMeta": {
+    "tree-sitter": {
+      "optional": true
+    }
+  },
+  "scripts": {
+    "install": "node-gyp-build",
+    "prestart": "tree-sitter build --wasm",
+    "start": "tree-sitter playground",
+    "test": "node --test bindings/node/*_test.js"
+  }
+}
--- a/crates/cli/src/templates/package.swift
+++ b/crates/cli/src/templates/package.swift
@ -0,0 +1,41 @@
+// swift-tools-version:5.3
+
+import Foundation
+import PackageDescription
+
+var sources = ["src/parser.c"]
+if FileManager.default.fileExists(atPath: "src/scanner.c") {
+    sources.append("src/scanner.c")
+}
+
+let package = Package(
+    name: "PARSER_CLASS_NAME",
+    products: [
+        .library(name: "PARSER_CLASS_NAME", targets: ["PARSER_CLASS_NAME"]),
+    ],
+    dependencies: [
+        .package(url: "https://github.com/tree-sitter/swift-tree-sitter", from: "0.8.0"),
+    ],
+    targets: [
+        .target(
+            name: "PARSER_CLASS_NAME",
+            dependencies: [],
+            path: ".",
+            sources: sources,
+            resources: [
+                .copy("queries")
+            ],
+            publicHeadersPath: "bindings/swift",
+            cSettings: [.headerSearchPath("src")]
+        ),
+        .testTarget(
+            name: "PARSER_CLASS_NAMETests",
+            dependencies: [
+                "SwiftTreeSitter",
+                "PARSER_CLASS_NAME",
+            ],
+            path: "bindings/swift/PARSER_CLASS_NAMETests"
+        )
+    ],
+    cLanguageStandard: .c11
+)
--- a/crates/cli/src/templates/py-binding.c
+++ b/crates/cli/src/templates/py-binding.c
@ -0,0 +1,35 @@
+#include <Python.h>
+
+typedef struct TSLanguage TSLanguage;
+
+TSLanguage *tree_sitter_LOWER_PARSER_NAME(void);
+
+static PyObject* _binding_language(PyObject *Py_UNUSED(self), PyObject *Py_UNUSED(args)) {
+    return PyCapsule_New(tree_sitter_LOWER_PARSER_NAME(), "tree_sitter.Language", NULL);
+}
+
+static struct PyModuleDef_Slot slots[] = {
+#ifdef Py_GIL_DISABLED
+    {Py_mod_gil, Py_MOD_GIL_NOT_USED},
+#endif
+    {0, NULL}
+};
+
+static PyMethodDef methods[] = {
+    {"language", _binding_language, METH_NOARGS,
+     "Get the tree-sitter language for this grammar."},
+    {NULL, NULL, 0, NULL}
+};
+
+static struct PyModuleDef module = {
+    .m_base = PyModuleDef_HEAD_INIT,
+    .m_name = "_binding",
+    .m_doc = NULL,
+    .m_size = 0,
+    .m_methods = methods,
+    .m_slots = slots,
+};
+
+PyMODINIT_FUNC PyInit__binding(void) {
+    return PyModuleDef_Init(&module);
+}
--- a/crates/cli/src/templates/pyproject.toml
+++ b/crates/cli/src/templates/pyproject.toml
@ -0,0 +1,30 @@
+[build-system]
+requires = ["setuptools>=42", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "tree-sitter-PARSER_NAME"
+description = "PARSER_DESCRIPTION"
+version = "PARSER_VERSION"
+keywords = ["incremental", "parsing", "tree-sitter", "PARSER_NAME"]
+classifiers = [
+  "Intended Audience :: Developers",
+  "Topic :: Software Development :: Compilers",
+  "Topic :: Text Processing :: Linguistic",
+  "Typing :: Typed",
+]
+authors = [{ name = "PARSER_AUTHOR_NAME", email = "PARSER_AUTHOR_EMAIL" }]
+requires-python = ">=3.10"
+license.text = "PARSER_LICENSE"
+readme = "README.md"
+
+[project.urls]
+Homepage = "PARSER_URL"
+Funding = "FUNDING_URL"
+
+[project.optional-dependencies]
+core = ["tree-sitter~=0.24"]
+
+[tool.cibuildwheel]
+build = "cp310-*"
+build-frontend = "build"
--- a/crates/cli/src/templates/root.zig
+++ b/crates/cli/src/templates/root.zig
@ -0,0 +1,19 @@
+const testing = @import("std").testing;
+
+const ts = @import("tree-sitter");
+const Language = ts.Language;
+const Parser = ts.Parser;
+
+pub extern fn tree_sitter_PARSER_NAME() callconv(.C) *const Language;
+
+pub export fn language() *const Language {
+    return tree_sitter_PARSER_NAME();
+}
+
+test "can load grammar" {
+    const parser = Parser.create();
+    defer parser.destroy();
+    try testing.expectEqual(parser.setLanguage(language()), void{});
+    try testing.expectEqual(parser.getLanguage(), tree_sitter_PARSER_NAME());
+}
+
--- a/crates/cli/src/templates/setup.py
+++ b/crates/cli/src/templates/setup.py
@ -0,0 +1,77 @@
+from os import path
+from platform import system
+from sysconfig import get_config_var
+
+from setuptools import Extension, find_packages, setup
+from setuptools.command.build import build
+from setuptools.command.egg_info import egg_info
+from wheel.bdist_wheel import bdist_wheel
+
+sources = [
+    "bindings/python/tree_sitter_LOWER_PARSER_NAME/binding.c",
+    "src/parser.c",
+]
+if path.exists("src/scanner.c"):
+    sources.append("src/scanner.c")
+
+macros: list[tuple[str, str | None]] = [
+    ("PY_SSIZE_T_CLEAN", None),
+    ("TREE_SITTER_HIDE_SYMBOLS", None),
+]
+if limited_api := not get_config_var("Py_GIL_DISABLED"):
+    macros.append(("Py_LIMITED_API", "0x030A0000"))
+
+if system() != "Windows":
+    cflags = ["-std=c11", "-fvisibility=hidden"]
+else:
+    cflags = ["/std:c11", "/utf-8"]
+
+
+class Build(build):
+    def run(self):
+        if path.isdir("queries"):
+            dest = path.join(self.build_lib, "tree_sitter_PARSER_NAME", "queries")
+            self.copy_tree("queries", dest)
+        super().run()
+
+
+class BdistWheel(bdist_wheel):
+    def get_tag(self):
+        python, abi, platform = super().get_tag()
+        if python.startswith("cp"):
+            python, abi = "cp310", "abi3"
+        return python, abi, platform
+
+
+class EggInfo(egg_info):
+    def find_sources(self):
+        super().find_sources()
+        self.filelist.recursive_include("queries", "*.scm")
+        self.filelist.include("src/tree_sitter/*.h")
+
+
+setup(
+    packages=find_packages("bindings/python"),
+    package_dir={"": "bindings/python"},
+    package_data={
+        "tree_sitter_LOWER_PARSER_NAME": ["*.pyi", "py.typed"],
+        "tree_sitter_LOWER_PARSER_NAME.queries": ["*.scm"],
+    },
+    ext_package="tree_sitter_LOWER_PARSER_NAME",
+    ext_modules=[
+        Extension(
+            name="_binding",
+            sources=sources,
+            extra_compile_args=cflags,
+            define_macros=macros,
+            include_dirs=["src"],
+            py_limited_api=limited_api,
+        )
+    ],
+    cmdclass={
+        "build": Build,
+        "bdist_wheel": BdistWheel,
+        "egg_info": EggInfo,
+    },
+    zip_safe=False
+)
--- a/crates/cli/src/templates/test_binding.py
+++ b/crates/cli/src/templates/test_binding.py
@ -0,0 +1,12 @@
+from unittest import TestCase
+
+import tree_sitter
+import tree_sitter_LOWER_PARSER_NAME
+
+
+class TestLanguage(TestCase):
+    def test_can_load_grammar(self):
+        try:
+            tree_sitter.Language(tree_sitter_LOWER_PARSER_NAME.language())
+        except Exception:
+            self.fail("Error loading TITLE_PARSER_NAME grammar")
--- a/crates/cli/src/templates/tests.swift
+++ b/crates/cli/src/templates/tests.swift
@ -0,0 +1,12 @@
+import XCTest
+import SwiftTreeSitter
+import PARSER_CLASS_NAME
+
+final class PARSER_CLASS_NAMETests: XCTestCase {
+    func testCanLoadGrammar() throws {
+        let parser = Parser()
+        let language = Language(language: tree_sitter_LOWER_PARSER_NAME())
+        XCTAssertNoThrow(try parser.setLanguage(language),
+                         "Error loading TITLE_PARSER_NAME grammar")
+    }
+}
--- a/crates/cli/src/test.rs
+++ b/crates/cli/src/test.rs
--- a/crates/cli/src/test_highlight.rs
+++ b/crates/cli/src/test_highlight.rs
@ -0,0 +1,276 @@
+use std::{fs, path::Path};
+
+use anstyle::AnsiColor;
+use anyhow::{anyhow, Result};
+use tree_sitter::Point;
+use tree_sitter_highlight::{Highlight, HighlightConfiguration, HighlightEvent, Highlighter};
+use tree_sitter_loader::{Config, Loader};
+
+use super::{
+    query_testing::{parse_position_comments, to_utf8_point, Assertion, Utf8Point},
+    test::paint,
+    util,
+};
+
+#[derive(Debug)]
+pub struct Failure {
+    row: usize,
+    column: usize,
+    expected_highlight: String,
+    actual_highlights: Vec<String>,
+}
+
+impl std::error::Error for Failure {}
+
+impl std::fmt::Display for Failure {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        write!(
+            f,
+            "Failure - row: {}, column: {}, expected highlight '{}', actual highlights: ",
+            self.row, self.column, self.expected_highlight
+        )?;
+        if self.actual_highlights.is_empty() {
+            write!(f, "none.")?;
+        } else {
+            for (i, actual_highlight) in self.actual_highlights.iter().enumerate() {
+                if i > 0 {
+                    write!(f, ", ")?;
+                }
+                write!(f, "'{actual_highlight}'")?;
+            }
+        }
+        Ok(())
+    }
+}
+
+pub fn test_highlights(
+    loader: &Loader,
+    loader_config: &Config,
+    highlighter: &mut Highlighter,
+    directory: &Path,
+    use_color: bool,
+) -> Result<()> {
+    println!("syntax highlighting:");
+    test_highlights_indented(loader, loader_config, highlighter, directory, use_color, 2)
+}
+
+fn test_highlights_indented(
+    loader: &Loader,
+    loader_config: &Config,
+    highlighter: &mut Highlighter,
+    directory: &Path,
+    use_color: bool,
+    indent_level: usize,
+) -> Result<()> {
+    let mut failed = false;
+
+    for highlight_test_file in fs::read_dir(directory)? {
+        let highlight_test_file = highlight_test_file?;
+        let test_file_path = highlight_test_file.path();
+        let test_file_name = highlight_test_file.file_name();
+        print!(
+            "{indent:indent_level$}",
+            indent = "",
+            indent_level = indent_level * 2
+        );
+        if test_file_path.is_dir() && test_file_path.read_dir()?.next().is_some() {
+            println!("{}:", test_file_name.to_string_lossy());
+            if test_highlights_indented(
+                loader,
+                loader_config,
+                highlighter,
+                &test_file_path,
+                use_color,
+                indent_level + 1,
+            )
+            .is_err()
+            {
+                failed = true;
+            }
+        } else {
+            let (language, language_config) = loader
+                .language_configuration_for_file_name(&test_file_path)?
+                .ok_or_else(|| {
+                    anyhow!(
+                        "{}",
+                        util::lang_not_found_for_path(test_file_path.as_path(), loader_config)
+                    )
+                })?;
+            let highlight_config = language_config
+                .highlight_config(language, None)?
+                .ok_or_else(|| anyhow!("No highlighting config found for {test_file_path:?}"))?;
+            match test_highlight(
+                loader,
+                highlighter,
+                highlight_config,
+                fs::read(&test_file_path)?.as_slice(),
+            ) {
+                Ok(assertion_count) => {
+                    println!(
+                        "✓ {} ({assertion_count} assertions)",
+                        paint(
+                            use_color.then_some(AnsiColor::Green),
+                            test_file_name.to_string_lossy().as_ref()
+                        ),
+                    );
+                }
+                Err(e) => {
+                    println!(
+                        "✗ {}",
+                        paint(
+                            use_color.then_some(AnsiColor::Red),
+                            test_file_name.to_string_lossy().as_ref()
+                        )
+                    );
+                    println!(
+                        "{indent:indent_level$}  {e}",
+                        indent = "",
+                        indent_level = indent_level * 2
+                    );
+                    failed = true;
+                }
+            }
+        }
+    }
+
+    if failed {
+        Err(anyhow!(""))
+    } else {
+        Ok(())
+    }
+}
+pub fn iterate_assertions(
+    assertions: &[Assertion],
+    highlights: &[(Utf8Point, Utf8Point, Highlight)],
+    highlight_names: &[String],
+) -> Result<usize> {
+    // Iterate through all of the highlighting assertions, checking each one against the
+    // actual highlights.
+    let mut i = 0;
+    let mut actual_highlights = Vec::new();
+    for Assertion {
+        position,
+        length,
+        negative,
+        expected_capture_name: expected_highlight,
+    } in assertions
+    {
+        let mut passed = false;
+        let mut end_column = position.column + length - 1;
+        actual_highlights.clear();
+
+        // The assertions are ordered by position, so skip past all of the highlights that
+        // end at or before this assertion's position.
+        'highlight_loop: while let Some(highlight) = highlights.get(i) {
+            if highlight.1 <= *position {
+                i += 1;
+                continue;
+            }
+
+            // Iterate through all of the highlights that start at or before this assertion's
+            // position, looking for one that matches the assertion.
+            let mut j = i;
+            while let (false, Some(highlight)) = (passed, highlights.get(j)) {
+                end_column = position.column + length - 1;
+                if highlight.0.row >= position.row && highlight.0.column > end_column {
+                    break 'highlight_loop;
+                }
+
+                // If the highlight matches the assertion, or if the highlight doesn't
+                // match the assertion but it's negative, this test passes. Otherwise,
+                // add this highlight to the list of actual highlights that span the
+                // assertion's position, in order to generate an error message in the event
+                // of a failure.
+                let highlight_name = &highlight_names[(highlight.2).0];
+                if (*highlight_name == *expected_highlight) == *negative {
+                    actual_highlights.push(highlight_name);
+                } else {
+                    passed = true;
+                    break 'highlight_loop;
+                }
+
+                j += 1;
+            }
+        }
+
+        if !passed {
+            return Err(Failure {
+                row: position.row,
+                column: end_column,
+                expected_highlight: expected_highlight.clone(),
+                actual_highlights: actual_highlights.into_iter().cloned().collect(),
+            }
+            .into());
+        }
+    }
+
+    Ok(assertions.len())
+}
+
+pub fn test_highlight(
+    loader: &Loader,
+    highlighter: &mut Highlighter,
+    highlight_config: &HighlightConfiguration,
+    source: &[u8],
+) -> Result<usize> {
+    // Highlight the file, and parse out all of the highlighting assertions.
+    let highlight_names = loader.highlight_names();
+    let highlights = get_highlight_positions(loader, highlighter, highlight_config, source)?;
+    let assertions =
+        parse_position_comments(highlighter.parser(), &highlight_config.language, source)?;
+
+    iterate_assertions(&assertions, &highlights, &highlight_names)
+}
+
+pub fn get_highlight_positions(
+    loader: &Loader,
+    highlighter: &mut Highlighter,
+    highlight_config: &HighlightConfiguration,
+    source: &[u8],
+) -> Result<Vec<(Utf8Point, Utf8Point, Highlight)>> {
+    let mut row = 0;
+    let mut column = 0;
+    let mut byte_offset = 0;
+    let mut was_newline = false;
+    let mut result = Vec::new();
+    let mut highlight_stack = Vec::new();
+    let source = String::from_utf8_lossy(source);
+    let mut char_indices = source.char_indices();
+    for event in highlighter.highlight(highlight_config, source.as_bytes(), None, |string| {
+        loader.highlight_config_for_injection_string(string)
+    })? {
+        match event? {
+            HighlightEvent::HighlightStart(h) => highlight_stack.push(h),
+            HighlightEvent::HighlightEnd => {
+                highlight_stack.pop();
+            }
+            HighlightEvent::Source { start, end } => {
+                let mut start_position = Point::new(row, column);
+                while byte_offset < end {
+                    if byte_offset <= start {
+                        start_position = Point::new(row, column);
+                    }
+                    if let Some((i, c)) = char_indices.next() {
+                        if was_newline {
+                            row += 1;
+                            column = 0;
+                        } else {
+                            column += i - byte_offset;
+                        }
+                        was_newline = c == '\n';
+                        byte_offset = i;
+                    } else {
+                        break;
+                    }
+                }
+                if let Some(highlight) = highlight_stack.last() {
+                    let utf8_start_position = to_utf8_point(start_position, source.as_bytes());
+                    let utf8_end_position =
+                        to_utf8_point(Point::new(row, column), source.as_bytes());
+                    result.push((utf8_start_position, utf8_end_position, *highlight));
+                }
+            }
+        }
+    }
+    Ok(result)
+}
--- a/crates/cli/src/test_tags.rs
+++ b/crates/cli/src/test_tags.rs
@ -0,0 +1,229 @@
+use std::{fs, path::Path};
+
+use anstyle::AnsiColor;
+use anyhow::{anyhow, Result};
+use tree_sitter_loader::{Config, Loader};
+use tree_sitter_tags::{TagsConfiguration, TagsContext};
+
+use super::{
+    query_testing::{parse_position_comments, to_utf8_point, Assertion, Utf8Point},
+    test::paint,
+    util,
+};
+
+#[derive(Debug)]
+pub struct Failure {
+    row: usize,
+    column: usize,
+    expected_tag: String,
+    actual_tags: Vec<String>,
+}
+
+impl std::error::Error for Failure {}
+
+impl std::fmt::Display for Failure {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        write!(
+            f,
+            "Failure - row: {}, column: {}, expected tag: '{}', actual tag: ",
+            self.row, self.column, self.expected_tag
+        )?;
+        if self.actual_tags.is_empty() {
+            write!(f, "none.")?;
+        } else {
+            for (i, actual_tag) in self.actual_tags.iter().enumerate() {
+                if i > 0 {
+                    write!(f, ", ")?;
+                }
+                write!(f, "'{actual_tag}'")?;
+            }
+        }
+        Ok(())
+    }
+}
+
+pub fn test_tags(
+    loader: &Loader,
+    loader_config: &Config,
+    tags_context: &mut TagsContext,
+    directory: &Path,
+    use_color: bool,
+) -> Result<()> {
+    println!("tags:");
+    test_tags_indented(loader, loader_config, tags_context, directory, use_color, 2)
+}
+
+pub fn test_tags_indented(
+    loader: &Loader,
+    loader_config: &Config,
+    tags_context: &mut TagsContext,
+    directory: &Path,
+    use_color: bool,
+    indent_level: usize,
+) -> Result<()> {
+    let mut failed = false;
+
+    for tag_test_file in fs::read_dir(directory)? {
+        let tag_test_file = tag_test_file?;
+        let test_file_path = tag_test_file.path();
+        let test_file_name = tag_test_file.file_name();
+        print!(
+            "{indent:indent_level$}",
+            indent = "",
+            indent_level = indent_level * 2
+        );
+        if test_file_path.is_dir() && test_file_path.read_dir()?.next().is_some() {
+            println!("{}:", test_file_name.to_string_lossy());
+            if test_tags_indented(
+                loader,
+                loader_config,
+                tags_context,
+                &test_file_path,
+                use_color,
+                indent_level + 1,
+            )
+            .is_err()
+            {
+                failed = true;
+            }
+        } else {
+            let (language, language_config) = loader
+                .language_configuration_for_file_name(&test_file_path)?
+                .ok_or_else(|| {
+                    anyhow!(
+                        "{}",
+                        util::lang_not_found_for_path(test_file_path.as_path(), loader_config)
+                    )
+                })?;
+            let tags_config = language_config
+                .tags_config(language)?
+                .ok_or_else(|| anyhow!("No tags config found for {test_file_path:?}"))?;
+            match test_tag(
+                tags_context,
+                tags_config,
+                fs::read(&test_file_path)?.as_slice(),
+            ) {
+                Ok(assertion_count) => {
+                    println!(
+                        "✓ {} ({assertion_count} assertions)",
+                        paint(
+                            use_color.then_some(AnsiColor::Green),
+                            test_file_name.to_string_lossy().as_ref()
+                        ),
+                    );
+                }
+                Err(e) => {
+                    println!(
+                        "✗ {}",
+                        paint(
+                            use_color.then_some(AnsiColor::Red),
+                            test_file_name.to_string_lossy().as_ref()
+                        )
+                    );
+                    println!(
+                        "{indent:indent_level$}  {e}",
+                        indent = "",
+                        indent_level = indent_level * 2
+                    );
+                    failed = true;
+                }
+            }
+        }
+    }
+
+    if failed {
+        Err(anyhow!(""))
+    } else {
+        Ok(())
+    }
+}
+
+pub fn test_tag(
+    tags_context: &mut TagsContext,
+    tags_config: &TagsConfiguration,
+    source: &[u8],
+) -> Result<usize> {
+    let tags = get_tag_positions(tags_context, tags_config, source)?;
+    let assertions = parse_position_comments(tags_context.parser(), &tags_config.language, source)?;
+
+    // Iterate through all of the assertions, checking against the actual tags.
+    let mut i = 0;
+    let mut actual_tags = Vec::<&String>::new();
+    for Assertion {
+        position,
+        length,
+        negative,
+        expected_capture_name: expected_tag,
+    } in &assertions
+    {
+        let mut passed = false;
+        let mut end_column = position.column + length - 1;
+
+        'tag_loop: while let Some(tag) = tags.get(i) {
+            if tag.1 <= *position {
+                i += 1;
+                continue;
+            }
+
+            // Iterate through all of the tags that start at or before this assertion's
+            // position, looking for one that matches the assertion
+            let mut j = i;
+            while let (false, Some(tag)) = (passed, tags.get(j)) {
+                end_column = position.column + length - 1;
+                if tag.0.column > end_column {
+                    break 'tag_loop;
+                }
+
+                let tag_name = &tag.2;
+                if (*tag_name == *expected_tag) == *negative {
+                    actual_tags.push(tag_name);
+                } else {
+                    passed = true;
+                    break 'tag_loop;
+                }
+
+                j += 1;
+                if tag == tags.last().unwrap() {
+                    break 'tag_loop;
+                }
+            }
+        }
+
+        if !passed {
+            return Err(Failure {
+                row: position.row,
+                column: end_column,
+                expected_tag: expected_tag.clone(),
+                actual_tags: actual_tags.into_iter().cloned().collect(),
+            }
+            .into());
+        }
+    }
+
+    Ok(assertions.len())
+}
+
+pub fn get_tag_positions(
+    tags_context: &mut TagsContext,
+    tags_config: &TagsConfiguration,
+    source: &[u8],
+) -> Result<Vec<(Utf8Point, Utf8Point, String)>> {
+    let (tags_iter, _has_error) = tags_context.generate_tags(tags_config, source, None)?;
+    let tag_positions = tags_iter
+        .filter_map(std::result::Result::ok)
+        .map(|tag| {
+            let tag_postfix = tags_config.syntax_type_name(tag.syntax_type_id).to_string();
+            let tag_name = if tag.is_definition {
+                format!("definition.{tag_postfix}")
+            } else {
+                format!("reference.{tag_postfix}")
+            };
+            (
+                to_utf8_point(tag.span.start, source),
+                to_utf8_point(tag.span.end, source),
+                tag_name,
+            )
+        })
+        .collect();
+    Ok(tag_positions)
+}
--- a/crates/cli/src/tests.rs
+++ b/crates/cli/src/tests.rs
@ -0,0 +1,34 @@
+mod async_context_test;
+mod corpus_test;
+mod detect_language;
+mod helpers;
+mod highlight_test;
+mod language_test;
+mod node_test;
+mod parser_hang_test;
+mod parser_test;
+mod pathological_test;
+mod query_test;
+mod tags_test;
+mod test_highlight_test;
+mod test_tags_test;
+mod text_provider_test;
+mod tree_test;
+
+#[cfg(feature = "wasm")]
+mod wasm_language_test;
+
+use tree_sitter_generate::GenerateResult;
+
+pub use crate::fuzz::{
+    allocations,
+    edits::{get_random_edit, invert_edit},
+    random::Rand,
+    ITERATION_COUNT,
+};
+
+/// This is a simple wrapper around [`tree_sitter_generate::generate_parser_for_grammar`], because
+/// our tests do not need to pass in a version number, only the grammar JSON.
+fn generate_parser(grammar_json: &str) -> GenerateResult<(String, String)> {
+    tree_sitter_generate::generate_parser_for_grammar(grammar_json, Some((0, 0, 0)))
+}
--- a/crates/cli/src/tests/async_context_test.rs
+++ b/crates/cli/src/tests/async_context_test.rs
@ -0,0 +1,278 @@
+use std::{
+    future::Future,
+    pin::{pin, Pin},
+    ptr,
+    task::{self, Context, Poll, RawWaker, RawWakerVTable, Waker},
+};
+
+use tree_sitter::Parser;
+
+use super::helpers::fixtures::get_language;
+
+#[test]
+fn test_node_in_fut() {
+    let (ret, pended) = tokio_like_spawn(async {
+        let mut parser = Parser::new();
+        let language = get_language("bash");
+        parser.set_language(&language).unwrap();
+
+        let tree = parser.parse("#", None).unwrap();
+
+        let root = tree.root_node();
+        let root_ref = &root;
+
+        let fut_val_fn = || async {
+            yield_now().await;
+            root.child(0).unwrap().kind()
+        };
+
+        yield_now().await;
+
+        let fut_ref_fn = || async {
+            yield_now().await;
+            root_ref.child(0).unwrap().kind()
+        };
+
+        let f1 = fut_val_fn().await;
+        let f2 = fut_ref_fn().await;
+        assert_eq!(f1, f2);
+
+        let fut_val = async {
+            yield_now().await;
+            root.child(0).unwrap().kind()
+        };
+
+        let fut_ref = async {
+            yield_now().await;
+            root_ref.child(0).unwrap().kind()
+        };
+
+        let f1 = fut_val.await;
+        let f2 = fut_ref.await;
+        assert_eq!(f1, f2);
+
+        f1
+    })
+    .join();
+    assert_eq!(ret, "comment");
+    assert_eq!(pended, 5);
+}
+
+#[test]
+fn test_node_and_cursor_ref_in_fut() {
+    let ((), pended) = tokio_like_spawn(async {
+        let mut parser = Parser::new();
+        let language = get_language("c");
+        parser.set_language(&language).unwrap();
+
+        let tree = parser.parse("#", None).unwrap();
+
+        let root = tree.root_node();
+        let root_ref = &root;
+
+        let mut cursor = tree.walk();
+        let cursor_ref = &mut cursor;
+
+        cursor_ref.goto_first_child();
+
+        let fut_val = async {
+            yield_now().await;
+            let _ = root.to_sexp();
+        };
+
+        yield_now().await;
+
+        let fut_ref = async {
+            yield_now().await;
+            let _ = root_ref.to_sexp();
+            cursor_ref.goto_first_child();
+        };
+
+        fut_val.await;
+        fut_ref.await;
+
+        cursor_ref.goto_first_child();
+    })
+    .join();
+    assert_eq!(pended, 3);
+}
+
+#[test]
+fn test_node_and_cursor_ref_in_fut_with_fut_fabrics() {
+    let ((), pended) = tokio_like_spawn(async {
+        let mut parser = Parser::new();
+        let language = get_language("javascript");
+        parser.set_language(&language).unwrap();
+
+        let tree = parser.parse("#", None).unwrap();
+
+        let root = tree.root_node();
+        let root_ref = &root;
+
+        let mut cursor = tree.walk();
+        let cursor_ref = &mut cursor;
+
+        cursor_ref.goto_first_child();
+
+        let fut_val = || async {
+            yield_now().await;
+            let _ = root.to_sexp();
+        };
+
+        yield_now().await;
+
+        let fut_ref = || async move {
+            yield_now().await;
+            let _ = root_ref.to_sexp();
+            cursor_ref.goto_first_child();
+        };
+
+        fut_val().await;
+        fut_val().await;
+        fut_ref().await;
+    })
+    .join();
+    assert_eq!(pended, 4);
+}
+
+#[test]
+fn test_node_and_cursor_ref_in_fut_with_inner_spawns() {
+    let (ret, pended) = tokio_like_spawn(async {
+        let mut parser = Parser::new();
+        let language = get_language("rust");
+        parser.set_language(&language).unwrap();
+
+        let tree = parser.parse("#", None).unwrap();
+
+        let mut cursor = tree.walk();
+        let cursor_ref = &mut cursor;
+
+        cursor_ref.goto_first_child();
+
+        let fut_val = || {
+            let tree = tree.clone();
+            async move {
+                let root = tree.root_node();
+                let mut cursor = tree.walk();
+                let cursor_ref = &mut cursor;
+                yield_now().await;
+                let _ = root.to_sexp();
+                cursor_ref.goto_first_child();
+            }
+        };
+
+        yield_now().await;
+
+        let fut_ref = || {
+            let tree = tree.clone();
+            async move {
+                let root = tree.root_node();
+                let root_ref = &root;
+                let mut cursor = tree.walk();
+                let cursor_ref = &mut cursor;
+                yield_now().await;
+                let _ = root_ref.to_sexp();
+                cursor_ref.goto_first_child();
+            }
+        };
+
+        let ((), p1) = tokio_like_spawn(fut_val()).await.unwrap();
+        let ((), p2) = tokio_like_spawn(fut_ref()).await.unwrap();
+
+        cursor_ref.goto_first_child();
+
+        fut_val().await;
+        fut_val().await;
+        fut_ref().await;
+
+        cursor_ref.goto_first_child();
+
+        p1 + p2
+    })
+    .join();
+    assert_eq!(pended, 4);
+    assert_eq!(ret, 2);
+}
+
+fn tokio_like_spawn<T>(future: T) -> JoinHandle<(T::Output, usize)>
+where
+    T: Future + Send + 'static,
+    T::Output: Send + 'static,
+{
+    // No runtime, just noop waker
+
+    let waker = noop_waker();
+    let mut cx = task::Context::from_waker(&waker);
+
+    let mut pending = 0;
+    let mut future = pin!(future);
+    let ret = loop {
+        match future.as_mut().poll(&mut cx) {
+            Poll::Pending => pending += 1,
+            Poll::Ready(r) => {
+                break r;
+            }
+        }
+    };
+    JoinHandle::new((ret, pending))
+}
+
+async fn yield_now() {
+    struct SimpleYieldNow {
+        yielded: bool,
+    }
+
+    impl Future for SimpleYieldNow {
+        type Output = ();
+
+        fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<()> {
+            cx.waker().wake_by_ref();
+            if self.yielded {
+                return Poll::Ready(());
+            }
+            self.yielded = true;
+            Poll::Pending
+        }
+    }
+
+    SimpleYieldNow { yielded: false }.await;
+}
+
+pub const fn noop_waker() -> Waker {
+    const VTABLE: RawWakerVTable = RawWakerVTable::new(
+        // Cloning just returns a new no-op raw waker
+        |_| RAW,
+        // `wake` does nothing
+        |_| {},
+        // `wake_by_ref` does nothing
+        |_| {},
+        // Dropping does nothing as we don't allocate anything
+        |_| {},
+    );
+    const RAW: RawWaker = RawWaker::new(ptr::null(), &VTABLE);
+    unsafe { Waker::from_raw(RAW) }
+}
+
+struct JoinHandle<T> {
+    data: Option<T>,
+}
+
+impl<T> JoinHandle<T> {
+    #[must_use]
+    const fn new(data: T) -> Self {
+        Self { data: Some(data) }
+    }
+
+    fn join(&mut self) -> T {
+        self.data.take().unwrap()
+    }
+}
+
+impl<T: Unpin> Future for JoinHandle<T> {
+    type Output = std::result::Result<T, ()>;
+
+    fn poll(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll<Self::Output> {
+        let data = self.get_mut().data.take().unwrap();
+        Poll::Ready(Ok(data))
+    }
+}
--- a/crates/cli/src/tests/corpus_test.rs
+++ b/crates/cli/src/tests/corpus_test.rs
@ -0,0 +1,439 @@
+use std::{collections::HashMap, env, fs};
+
+use tree_sitter::Parser;
+use tree_sitter_proc_macro::test_with_seed;
+
+use crate::{
+    fuzz::{
+        corpus_test::{
+            check_changed_ranges, check_consistent_sizes, get_parser, set_included_ranges,
+        },
+        edits::{get_random_edit, invert_edit},
+        flatten_tests, new_seed,
+        random::Rand,
+        EDIT_COUNT, EXAMPLE_EXCLUDE, EXAMPLE_INCLUDE, ITERATION_COUNT, LANGUAGE_FILTER,
+        LOG_GRAPH_ENABLED, START_SEED,
+    },
+    parse::perform_edit,
+    test::{parse_tests, print_diff, print_diff_key, strip_sexp_fields},
+    tests::{
+        allocations,
+        helpers::fixtures::{fixtures_dir, get_language, get_test_language, SCRATCH_BASE_DIR},
+    },
+};
+
+#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
+fn test_corpus_for_bash_language(seed: usize) {
+    test_language_corpus(
+        "bash",
+        seed,
+        Some(&[
+            // Fragile tests where edit customization changes
+            // lead to significant parse tree structure changes.
+            "bash - corpus - commands - Nested Heredocs",
+            "bash - corpus - commands - Quoted Heredocs",
+            "bash - corpus - commands - Heredocs with weird characters",
+        ]),
+        None,
+    );
+}
+
+#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
+fn test_corpus_for_c_language(seed: usize) {
+    test_language_corpus("c", seed, None, None);
+}
+
+#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
+fn test_corpus_for_cpp_language(seed: usize) {
+    test_language_corpus("cpp", seed, None, None);
+}
+
+#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
+fn test_corpus_for_embedded_template_language(seed: usize) {
+    test_language_corpus("embedded-template", seed, None, None);
+}
+
+#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
+fn test_corpus_for_go_language(seed: usize) {
+    test_language_corpus("go", seed, None, None);
+}
+
+#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
+fn test_corpus_for_html_language(seed: usize) {
+    test_language_corpus("html", seed, None, None);
+}
+
+#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
+fn test_corpus_for_java_language(seed: usize) {
+    test_language_corpus(
+        "java",
+        seed,
+        Some(&["java - corpus - expressions - switch with unnamed pattern variable"]),
+        None,
+    );
+}
+
+#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
+fn test_corpus_for_javascript_language(seed: usize) {
+    test_language_corpus("javascript", seed, None, None);
+}
+
+#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
+fn test_corpus_for_json_language(seed: usize) {
+    test_language_corpus("json", seed, None, None);
+}
+
+#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
+fn test_corpus_for_php_language(seed: usize) {
+    test_language_corpus("php", seed, None, Some("php"));
+}
+
+#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
+fn test_corpus_for_python_language(seed: usize) {
+    test_language_corpus("python", seed, None, None);
+}
+
+#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
+fn test_corpus_for_ruby_language(seed: usize) {
+    test_language_corpus("ruby", seed, None, None);
+}
+
+#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
+fn test_corpus_for_rust_language(seed: usize) {
+    test_language_corpus("rust", seed, None, None);
+}
+
+#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
+fn test_corpus_for_typescript_language(seed: usize) {
+    test_language_corpus("typescript", seed, None, Some("typescript"));
+}
+
+#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
+fn test_corpus_for_tsx_language(seed: usize) {
+    test_language_corpus("typescript", seed, None, Some("tsx"));
+}
+
+pub fn test_language_corpus(
+    language_name: &str,
+    start_seed: usize,
+    skipped: Option<&[&str]>,
+    language_dir: Option<&str>,
+) {
+    if let Some(filter) = LANGUAGE_FILTER.as_ref() {
+        if language_name != filter {
+            return;
+        }
+    }
+
+    let language_dir = language_dir.unwrap_or_default();
+
+    let grammars_dir = fixtures_dir().join("grammars");
+    let error_corpus_dir = fixtures_dir().join("error_corpus");
+    let template_corpus_dir = fixtures_dir().join("template_corpus");
+    let corpus_dir = grammars_dir.join(language_name).join("test").join("corpus");
+
+    println!("Testing {language_name} corpus @ {}", corpus_dir.display());
+
+    let error_corpus_file = error_corpus_dir.join(format!("{language_name}_errors.txt"));
+    let template_corpus_file = template_corpus_dir.join(format!("{language_name}_templates.txt"));
+    let main_tests = parse_tests(&corpus_dir).unwrap();
+    let error_tests = parse_tests(&error_corpus_file).unwrap_or_default();
+    let template_tests = parse_tests(&template_corpus_file).unwrap_or_default();
+    let mut tests = flatten_tests(
+        main_tests,
+        EXAMPLE_INCLUDE.as_ref(),
+        EXAMPLE_EXCLUDE.as_ref(),
+    );
+    tests.extend(flatten_tests(
+        error_tests,
+        EXAMPLE_INCLUDE.as_ref(),
+        EXAMPLE_EXCLUDE.as_ref(),
+    ));
+    tests.extend(
+        flatten_tests(
+            template_tests,
+            EXAMPLE_INCLUDE.as_ref(),
+            EXAMPLE_EXCLUDE.as_ref(),
+        )
+        .into_iter()
+        .map(|mut t| {
+            t.template_delimiters = Some(("<%", "%>"));
+            t
+        }),
+    );
+
+    tests.retain(|t| t.languages[0].is_empty() || t.languages.contains(&Box::from(language_dir)));
+
+    let mut skipped = skipped.map(|x| x.iter().map(|x| (*x, 0)).collect::<HashMap<&str, usize>>());
+
+    let language_path = if language_dir.is_empty() {
+        language_name.to_string()
+    } else {
+        format!("{language_name}/{language_dir}")
+    };
+    let language = get_language(&language_path);
+    let mut failure_count = 0;
+
+    let log_seed = env::var("TREE_SITTER_LOG_SEED").is_ok();
+    let dump_edits = env::var("TREE_SITTER_DUMP_EDITS").is_ok();
+
+    if log_seed {
+        println!("  start seed: {start_seed}");
+    }
+
+    println!();
+    for (test_index, test) in tests.iter().enumerate() {
+        let test_name = format!("{language_name} - {}", test.name);
+        if let Some(skipped) = skipped.as_mut() {
+            if let Some(counter) = skipped.get_mut(test_name.as_str()) {
+                println!("  {test_index}. {test_name} - SKIPPED");
+                *counter += 1;
+                continue;
+            }
+        }
+
+        println!("  {test_index}. {test_name}");
+
+        let passed = allocations::record(|| {
+            let mut log_session = None;
+            let mut parser = get_parser(&mut log_session, "log.html");
+            parser.set_language(&language).unwrap();
+            set_included_ranges(&mut parser, &test.input, test.template_delimiters);
+
+            let tree = parser.parse(&test.input, None).unwrap();
+            let mut actual_output = tree.root_node().to_sexp();
+            if !test.has_fields {
+                actual_output = strip_sexp_fields(&actual_output);
+            }
+
+            if actual_output != test.output {
+                println!("Incorrect initial parse for {test_name}");
+                print_diff_key();
+                print_diff(&actual_output, &test.output, true);
+                println!();
+                return false;
+            }
+
+            true
+        })
+        .unwrap();
+
+        if !passed {
+            failure_count += 1;
+            continue;
+        }
+
+        let mut parser = Parser::new();
+        parser.set_language(&language).unwrap();
+        let tree = parser.parse(&test.input, None).unwrap();
+        drop(parser);
+
+        for trial in 0..*ITERATION_COUNT {
+            let seed = start_seed + trial;
+            let passed = allocations::record(|| {
+                let mut rand = Rand::new(seed);
+                let mut log_session = None;
+                let mut parser = get_parser(&mut log_session, "log.html");
+                parser.set_language(&language).unwrap();
+                let mut tree = tree.clone();
+                let mut input = test.input.clone();
+
+                if *LOG_GRAPH_ENABLED {
+                    eprintln!("{}\n", String::from_utf8_lossy(&input));
+                }
+
+                // Perform a random series of edits and reparse.
+                let mut undo_stack = Vec::new();
+                for _ in 0..=rand.unsigned(*EDIT_COUNT) {
+                    let edit = get_random_edit(&mut rand, &input);
+                    undo_stack.push(invert_edit(&input, &edit));
+                    perform_edit(&mut tree, &mut input, &edit).unwrap();
+                }
+
+                if log_seed {
+                    println!("   {test_index}.{trial:<2} seed: {seed}");
+                }
+
+                if dump_edits {
+                    fs::write(
+                        SCRATCH_BASE_DIR
+                            .join(format!("edit.{seed}.{test_index}.{trial} {test_name}")),
+                        &input,
+                    )
+                    .unwrap();
+                }
+
+                if *LOG_GRAPH_ENABLED {
+                    eprintln!("{}\n", String::from_utf8_lossy(&input));
+                }
+
+                set_included_ranges(&mut parser, &input, test.template_delimiters);
+                let mut tree2 = parser.parse(&input, Some(&tree)).unwrap();
+
+                // Check that the new tree is consistent.
+                check_consistent_sizes(&tree2, &input);
+                if let Err(message) = check_changed_ranges(&tree, &tree2, &input) {
+                    println!("\nUnexpected scope change in seed {seed} with start seed {start_seed}\n{message}\n\n",);
+                    return false;
+                }
+
+                // Undo all of the edits and re-parse again.
+                while let Some(edit) = undo_stack.pop() {
+                    perform_edit(&mut tree2, &mut input, &edit).unwrap();
+                }
+                if *LOG_GRAPH_ENABLED {
+                    eprintln!("{}\n", String::from_utf8_lossy(&input));
+                }
+
+                set_included_ranges(&mut parser, &test.input, test.template_delimiters);
+                let tree3 = parser.parse(&input, Some(&tree2)).unwrap();
+
+                // Verify that the final tree matches the expectation from the corpus.
+                let mut actual_output = tree3.root_node().to_sexp();
+                if !test.has_fields {
+                    actual_output = strip_sexp_fields(&actual_output);
+                }
+
+                if actual_output != test.output {
+                    println!("Incorrect parse for {test_name} - seed {seed}");
+                    print_diff_key();
+                    print_diff(&actual_output, &test.output, true);
+                    println!();
+                    return false;
+                }
+
+                // Check that the edited tree is consistent.
+                check_consistent_sizes(&tree3, &input);
+                if let Err(message) = check_changed_ranges(&tree2, &tree3, &input) {
+                    println!("Unexpected scope change in seed {seed} with start seed {start_seed}\n{message}\n\n");
+                    return false;
+                }
+
+                true
+            }).unwrap();
+
+            if !passed {
+                failure_count += 1;
+                break;
+            }
+        }
+    }
+
+    assert!(
+        failure_count == 0,
+        "{failure_count} {language_name} corpus tests failed"
+    );
+
+    if let Some(skipped) = skipped.as_mut() {
+        skipped.retain(|_, v| *v == 0);
+
+        if !skipped.is_empty() {
+            println!("Non matchable skip definitions:");
+            for k in skipped.keys() {
+                println!("  {k}");
+            }
+            panic!("Non matchable skip definitions needs to be removed");
+        }
+    }
+}
+
+#[test]
+fn test_feature_corpus_files() {
+    let test_grammars_dir = fixtures_dir().join("test_grammars");
+
+    let mut failure_count = 0;
+    for entry in fs::read_dir(test_grammars_dir).unwrap() {
+        let entry = entry.unwrap();
+        if !entry.metadata().unwrap().is_dir() {
+            continue;
+        }
+        let language_name = entry.file_name();
+        let language_name = language_name.to_str().unwrap();
+
+        if let Some(filter) = LANGUAGE_FILTER.as_ref() {
+            if language_name != filter {
+                continue;
+            }
+        }
+
+        let test_path = entry.path();
+        let mut grammar_path = test_path.join("grammar.js");
+        if !grammar_path.exists() {
+            grammar_path = test_path.join("grammar.json");
+        }
+        let error_message_path = test_path.join("expected_error.txt");
+        let grammar_json = tree_sitter_generate::load_grammar_file(&grammar_path, None).unwrap();
+        let generate_result =
+            tree_sitter_generate::generate_parser_for_grammar(&grammar_json, Some((0, 0, 0)));
+
+        if error_message_path.exists() {
+            if EXAMPLE_INCLUDE.is_some() || EXAMPLE_EXCLUDE.is_some() {
+                continue;
+            }
+
+            eprintln!("test language: {language_name:?}");
+
+            let expected_message = fs::read_to_string(&error_message_path)
+                .unwrap()
+                .replace("\r\n", "\n");
+            if let Err(e) = generate_result {
+                let actual_message = e.to_string().replace("\r\n", "\n");
+                if expected_message != actual_message {
+                    eprintln!(
+                        "Unexpected error message.\n\nExpected:\n\n{expected_message}\nActual:\n\n{actual_message}\n",
+                    );
+                    failure_count += 1;
+                }
+            } else {
+                eprintln!("Expected error message but got none for test grammar '{language_name}'",);
+                failure_count += 1;
+            }
+        } else {
+            if let Err(e) = &generate_result {
+                eprintln!("Unexpected error for test grammar '{language_name}':\n{e}",);
+                failure_count += 1;
+                continue;
+            }
+
+            let corpus_path = test_path.join("corpus.txt");
+            let c_code = generate_result.unwrap().1;
+            let language = get_test_language(language_name, &c_code, Some(&test_path));
+            let test = parse_tests(&corpus_path).unwrap();
+            let tests = flatten_tests(test, EXAMPLE_INCLUDE.as_ref(), EXAMPLE_EXCLUDE.as_ref());
+
+            if !tests.is_empty() {
+                eprintln!("test language: {language_name:?}");
+            }
+
+            for test in tests {
+                eprintln!("  example: {:?}", test.name);
+
+                let passed = allocations::record(|| {
+                    let mut log_session = None;
+                    let mut parser = get_parser(&mut log_session, "log.html");
+                    parser.set_language(&language).unwrap();
+                    let tree = parser.parse(&test.input, None).unwrap();
+                    let mut actual_output = tree.root_node().to_sexp();
+                    if !test.has_fields {
+                        actual_output = strip_sexp_fields(&actual_output);
+                    }
+                    if actual_output == test.output {
+                        true
+                    } else {
+                        print_diff_key();
+                        print_diff(&actual_output, &test.output, true);
+                        println!();
+                        false
+                    }
+                })
+                .unwrap();
+
+                if !passed {
+                    failure_count += 1;
+                }
+            }
+        }
+    }
+
+    assert!(failure_count == 0, "{failure_count} corpus tests failed");
+}
--- a/crates/cli/src/tests/detect_language.rs
+++ b/crates/cli/src/tests/detect_language.rs
@ -0,0 +1,254 @@
+use std::{fs, path::Path};
+
+use tree_sitter_loader::Loader;
+
+use crate::tests::helpers::fixtures::scratch_dir;
+
+#[test]
+fn detect_language_by_first_line_regex() {
+    let strace_dir = tree_sitter_dir(
+        r#"{
+  "grammars": [
+    {
+      "name": "strace",
+      "path": ".",
+      "scope": "source.strace",
+      "file-types": [
+        "strace"
+      ],
+      "first-line-regex":  "[0-9:.]* *execve"
+    }
+  ],
+  "metadata": {
+    "version": "0.0.1"
+  }
+}
+"#,
+        "strace",
+    );
+
+    let mut loader = Loader::with_parser_lib_path(scratch_dir().to_path_buf());
+    let config = loader
+        .find_language_configurations_at_path(strace_dir.path(), false)
+        .unwrap();
+
+    // this is just to validate that we can read the tree-sitter.json correctly
+    assert_eq!(config[0].scope.as_ref().unwrap(), "source.strace");
+
+    let file_name = strace_dir.path().join("strace.log");
+    fs::write(&file_name, "execve\nworld").unwrap();
+    assert_eq!(
+        get_lang_scope(&loader, &file_name),
+        Some("source.strace".into())
+    );
+
+    let file_name = strace_dir.path().join("strace.log");
+    fs::write(&file_name, "447845 execve\nworld").unwrap();
+    assert_eq!(
+        get_lang_scope(&loader, &file_name),
+        Some("source.strace".into())
+    );
+
+    let file_name = strace_dir.path().join("strace.log");
+    fs::write(&file_name, "hello\nexecve").unwrap();
+    assert!(get_lang_scope(&loader, &file_name).is_none());
+
+    let file_name = strace_dir.path().join("strace.log");
+    fs::write(&file_name, "").unwrap();
+    assert!(get_lang_scope(&loader, &file_name).is_none());
+
+    let dummy_dir = tree_sitter_dir(
+        r#"{
+  "grammars": [
+    {
+      "name": "dummy",
+      "scope": "source.dummy",
+      "path": ".",
+      "file-types": [
+        "dummy"
+      ]
+    }
+  ],
+  "metadata": {
+    "version": "0.0.1"
+  }
+}
+"#,
+        "dummy",
+    );
+
+    // file-type takes precedence over first-line-regex
+    loader
+        .find_language_configurations_at_path(dummy_dir.path(), false)
+        .unwrap();
+    let file_name = dummy_dir.path().join("strace.dummy");
+    fs::write(&file_name, "execve").unwrap();
+    assert_eq!(
+        get_lang_scope(&loader, &file_name),
+        Some("source.dummy".into())
+    );
+}
+
+#[test]
+fn detect_langauge_by_double_barrel_file_extension() {
+    let blade_dir = tree_sitter_dir(
+        r#"{
+  "grammars": [
+    {
+      "name": "blade",
+      "path": ".",
+      "scope": "source.blade",
+      "file-types": [
+        "blade.php"
+      ]
+    }
+  ],
+  "metadata": {
+    "version": "0.0.1"
+  }
+}
+"#,
+        "blade",
+    );
+
+    let mut loader = Loader::with_parser_lib_path(scratch_dir().to_path_buf());
+    let config = loader
+        .find_language_configurations_at_path(blade_dir.path(), false)
+        .unwrap();
+
+    // this is just to validate that we can read the tree-sitter.json correctly
+    assert_eq!(config[0].scope.as_ref().unwrap(), "source.blade");
+
+    let file_name = blade_dir.path().join("foo.blade.php");
+    fs::write(&file_name, "").unwrap();
+    assert_eq!(
+        get_lang_scope(&loader, &file_name),
+        Some("source.blade".into())
+    );
+}
+
+#[test]
+fn detect_language_without_filename() {
+    let gitignore_dir = tree_sitter_dir(
+        r#"{
+  "grammars": [
+    {
+      "name": "gitignore",
+      "path": ".",
+      "scope": "source.gitignore",
+      "file-types": [
+        ".gitignore"
+      ]
+    }
+  ],
+  "metadata": {
+    "version": "0.0.1"
+  }
+}
+"#,
+        "gitignore",
+    );
+
+    let mut loader = Loader::with_parser_lib_path(scratch_dir().to_path_buf());
+    let config = loader
+        .find_language_configurations_at_path(gitignore_dir.path(), false)
+        .unwrap();
+
+    // this is just to validate that we can read the tree-sitter.json correctly
+    assert_eq!(config[0].scope.as_ref().unwrap(), "source.gitignore");
+
+    let file_name = gitignore_dir.path().join(".gitignore");
+    fs::write(&file_name, "").unwrap();
+    assert_eq!(
+        get_lang_scope(&loader, &file_name),
+        Some("source.gitignore".into())
+    );
+}
+
+#[test]
+fn detect_language_without_file_extension() {
+    let ssh_config_dir = tree_sitter_dir(
+        r#"{
+  "grammars": [
+    {
+      "name": "ssh_config",
+      "path": ".",
+      "scope": "source.ssh_config",
+      "file-types": [
+        "ssh_config"
+      ]
+    }
+  ],
+  "metadata": {
+    "version": "0.0.1"
+  }
+}
+"#,
+        "ssh_config",
+    );
+
+    let mut loader = Loader::with_parser_lib_path(scratch_dir().to_path_buf());
+    let config = loader
+        .find_language_configurations_at_path(ssh_config_dir.path(), false)
+        .unwrap();
+
+    // this is just to validate that we can read the tree-sitter.json correctly
+    assert_eq!(config[0].scope.as_ref().unwrap(), "source.ssh_config");
+
+    let file_name = ssh_config_dir.path().join("ssh_config");
+    fs::write(&file_name, "").unwrap();
+    assert_eq!(
+        get_lang_scope(&loader, &file_name),
+        Some("source.ssh_config".into())
+    );
+}
+
+fn tree_sitter_dir(tree_sitter_json: &str, name: &str) -> tempfile::TempDir {
+    let temp_dir = tempfile::tempdir().unwrap();
+    fs::write(temp_dir.path().join("tree-sitter.json"), tree_sitter_json).unwrap();
+    fs::create_dir_all(temp_dir.path().join("src/tree_sitter")).unwrap();
+    fs::write(
+        temp_dir.path().join("src/grammar.json"),
+        format!(r#"{{"name":"{name}"}}"#),
+    )
+    .unwrap();
+    fs::write(
+        temp_dir.path().join("src/parser.c"),
+        format!(
+            r#"
+                #include "tree_sitter/parser.h"
+                #ifdef _WIN32
+                #define TS_PUBLIC __declspec(dllexport)
+                #else
+                #define TS_PUBLIC __attribute__((visibility("default")))
+                #endif
+                TS_PUBLIC const TSLanguage *tree_sitter_{name}() {{}}
+            "#
+        ),
+    )
+    .unwrap();
+    fs::write(
+        temp_dir.path().join("src/tree_sitter/parser.h"),
+        include_str!("../../../../lib/src/parser.h"),
+    )
+    .unwrap();
+    temp_dir
+}
+
+// If we manage to get the language scope, it means we correctly detected the file-type
+fn get_lang_scope(loader: &Loader, file_name: &Path) -> Option<String> {
+    loader
+        .language_configuration_for_file_name(file_name)
+        .ok()
+        .and_then(|config| {
+            if let Some((_, config)) = config {
+                config.scope.clone()
+            } else if let Ok(Some((_, config))) =
+                loader.language_configuration_for_first_line_regex(file_name)
+            {
+                config.scope.clone()
+            } else {
+                None
+            }
+        })
+}
--- a/crates/cli/src/tests/helpers.rs
+++ b/crates/cli/src/tests/helpers.rs
@ -0,0 +1,4 @@
+pub mod allocations;
+pub mod edits;
+pub(super) mod fixtures;
+pub(super) mod query_helpers;
--- a/crates/cli/src/tests/helpers/allocations.rs
+++ b/crates/cli/src/tests/helpers/allocations.rs
@ -0,0 +1,121 @@
+use std::{
+    collections::HashMap,
+    os::raw::c_void,
+    sync::{
+        atomic::{AtomicBool, AtomicUsize, Ordering::SeqCst},
+        Mutex,
+    },
+};
+
+#[ctor::ctor]
+unsafe fn initialize_allocation_recording() {
+    tree_sitter::set_allocator(
+        Some(ts_record_malloc),
+        Some(ts_record_calloc),
+        Some(ts_record_realloc),
+        Some(ts_record_free),
+    );
+}
+
+#[derive(Debug, PartialEq, Eq, Hash)]
+struct Allocation(*const c_void);
+unsafe impl Send for Allocation {}
+unsafe impl Sync for Allocation {}
+
+#[derive(Default)]
+struct AllocationRecorder {
+    enabled: AtomicBool,
+    allocation_count: AtomicUsize,
+    outstanding_allocations: Mutex<HashMap<Allocation, usize>>,
+}
+
+thread_local! {
+    static RECORDER: AllocationRecorder = AllocationRecorder::default();
+}
+
+extern "C" {
+    fn malloc(size: usize) -> *mut c_void;
+    fn calloc(count: usize, size: usize) -> *mut c_void;
+    fn realloc(ptr: *mut c_void, size: usize) -> *mut c_void;
+    fn free(ptr: *mut c_void);
+}
+
+pub fn record<T>(f: impl FnOnce() -> T) -> T {
+    RECORDER.with(|recorder| {
+        recorder.enabled.store(true, SeqCst);
+        recorder.allocation_count.store(0, SeqCst);
+        recorder.outstanding_allocations.lock().unwrap().clear();
+    });
+
+    let value = f();
+
+    let outstanding_allocation_indices = RECORDER.with(|recorder| {
+        recorder.enabled.store(false, SeqCst);
+        recorder.allocation_count.store(0, SeqCst);
+        recorder
+            .outstanding_allocations
+            .lock()
+            .unwrap()
+            .drain()
+            .map(|e| e.1)
+            .collect::<Vec<_>>()
+    });
+    assert!(
+        outstanding_allocation_indices.is_empty(),
+        "Leaked allocation indices: {outstanding_allocation_indices:?}"
+    );
+    value
+}
+
+fn record_alloc(ptr: *mut c_void) {
+    RECORDER.with(|recorder| {
+        if recorder.enabled.load(SeqCst) {
+            let count = recorder.allocation_count.fetch_add(1, SeqCst);
+            recorder
+                .outstanding_allocations
+                .lock()
+                .unwrap()
+                .insert(Allocation(ptr), count);
+        }
+    });
+}
+
+fn record_dealloc(ptr: *mut c_void) {
+    RECORDER.with(|recorder| {
+        if recorder.enabled.load(SeqCst) {
+            recorder
+                .outstanding_allocations
+                .lock()
+                .unwrap()
+                .remove(&Allocation(ptr));
+        }
+    });
+}
+
+unsafe extern "C" fn ts_record_malloc(size: usize) -> *mut c_void {
+    let result = malloc(size);
+    record_alloc(result);
+    result
+}
+
+unsafe extern "C" fn ts_record_calloc(count: usize, size: usize) -> *mut c_void {
+    let result = calloc(count, size);
+    record_alloc(result);
+    result
+}
+
+unsafe extern "C" fn ts_record_realloc(ptr: *mut c_void, size: usize) -> *mut c_void {
+    let result = realloc(ptr, size);
+    if ptr.is_null() {
+        record_alloc(result);
+    } else if !core::ptr::eq(ptr, result) {
+        record_dealloc(ptr);
+        record_alloc(result);
+    }
+    result
+}
+
+unsafe extern "C" fn ts_record_free(ptr: *mut c_void) {
+    record_dealloc(ptr);
+    free(ptr);
+}
--- a/crates/cli/src/tests/helpers/dirs.rs
+++ b/crates/cli/src/tests/helpers/dirs.rs
@ -0,0 +1,65 @@
+pub static ROOT_DIR: LazyLock<PathBuf> = LazyLock::new(|| {
+    PathBuf::from(env!("CARGO_MANIFEST_DIR"))
+        .parent()
+        .unwrap()
+        .parent()
+        .unwrap()
+        .to_owned()
+});
+
+pub static FIXTURES_DIR: LazyLock<PathBuf> =
+    LazyLock::new(|| ROOT_DIR.join("test").join("fixtures"));
+
+pub static HEADER_DIR: LazyLock<PathBuf> = LazyLock::new(|| ROOT_DIR.join("lib").join("include"));
+
+pub static GRAMMARS_DIR: LazyLock<PathBuf> =
+    LazyLock::new(|| ROOT_DIR.join("test").join("fixtures").join("grammars"));
+
+pub static SCRATCH_BASE_DIR: LazyLock<PathBuf> = LazyLock::new(|| {
+    let result = ROOT_DIR.join("target").join("scratch");
+    fs::create_dir_all(&result).unwrap();
+    result
+});
+
+#[cfg(feature = "wasm")]
+pub static WASM_DIR: LazyLock<PathBuf> = LazyLock::new(|| ROOT_DIR.join("target").join("release"));
+
+pub static SCRATCH_DIR: LazyLock<PathBuf> = LazyLock::new(|| {
+    // https://doc.rust-lang.org/reference/conditional-compilation.html
+    let vendor = if cfg!(target_vendor = "apple") {
+        "apple"
+    } else if cfg!(target_vendor = "fortanix") {
+        "fortanix"
+    } else if cfg!(target_vendor = "pc") {
+        "pc"
+    } else {
+        "unknown"
+    };
+    let env = if cfg!(target_env = "gnu") {
+        "gnu"
+    } else if cfg!(target_env = "msvc") {
+        "msvc"
+    } else if cfg!(target_env = "musl") {
+        "musl"
+    } else if cfg!(target_env = "sgx") {
+        "sgx"
+    } else {
+        "unknown"
+    };
+    let endian = if cfg!(target_endian = "little") {
+        "little"
+    } else if cfg!(target_endian = "big") {
+        "big"
+    } else {
+        "unknown"
+    };
+
+    let machine = format!(
+        "{}-{}-{vendor}-{env}-{endian}",
+        std::env::consts::ARCH,
+        std::env::consts::OS
+    );
+    let result = SCRATCH_BASE_DIR.join(machine);
+    fs::create_dir_all(&result).unwrap();
+    result
+});
--- a/crates/cli/src/tests/helpers/edits.rs
+++ b/crates/cli/src/tests/helpers/edits.rs
@ -0,0 +1,49 @@
+use std::{ops::Range, str};
+
+#[derive(Debug)]
+pub struct ReadRecorder<'a> {
+    content: &'a [u8],
+    indices_read: Vec<usize>,
+}
+
+impl<'a> ReadRecorder<'a> {
+    #[must_use]
+    pub const fn new(content: &'a [u8]) -> Self {
+        Self {
+            content,
+            indices_read: Vec::new(),
+        }
+    }
+
+    pub fn read(&mut self, offset: usize) -> &'a [u8] {
+        if offset < self.content.len() {
+            if let Err(i) = self.indices_read.binary_search(&offset) {
+                self.indices_read.insert(i, offset);
+            }
+            &self.content[offset..(offset + 1)]
+        } else {
+            &[]
+        }
+    }
+
+    pub fn strings_read(&self) -> Vec<&'a str> {
+        let mut result = Vec::new();
+        let mut last_range = Option::<Range<usize>>::None;
+        for index in &self.indices_read {
+            if let Some(ref mut range) = &mut last_range {
+                if range.end == *index {
+                    range.end += 1;
+                } else {
+                    result.push(str::from_utf8(&self.content[range.clone()]).unwrap());
+                    last_range = None;
+                }
+            } else {
+                last_range = Some(*index..(*index + 1));
+            }
+        }
+        if let Some(range) = last_range {
+            result.push(str::from_utf8(&self.content[range]).unwrap());
+        }
+        result
+    }
+}
--- a/crates/cli/src/tests/helpers/fixtures.rs
+++ b/crates/cli/src/tests/helpers/fixtures.rs
@ -0,0 +1,140 @@
+use std::{
+    env, fs,
+    path::{Path, PathBuf},
+    sync::LazyLock,
+};
+
+use anyhow::Context;
+use tree_sitter::Language;
+use tree_sitter_generate::{load_grammar_file, ALLOC_HEADER, ARRAY_HEADER};
+use tree_sitter_highlight::HighlightConfiguration;
+use tree_sitter_loader::{CompileConfig, Loader};
+use tree_sitter_tags::TagsConfiguration;
+
+use crate::tests::generate_parser;
+
+include!("./dirs.rs");
+
+static TEST_LOADER: LazyLock<Loader> = LazyLock::new(|| {
+    let mut loader = Loader::with_parser_lib_path(SCRATCH_DIR.clone());
+    if env::var("TREE_SITTER_GRAMMAR_DEBUG").is_ok() {
+        loader.debug_build(true);
+    }
+    loader
+});
+
+pub fn test_loader() -> &'static Loader {
+    &TEST_LOADER
+}
+
+pub fn fixtures_dir() -> &'static Path {
+    &FIXTURES_DIR
+}
+
+pub fn scratch_dir() -> &'static Path {
+    &SCRATCH_DIR
+}
+
+pub fn get_language(name: &str) -> Language {
+    let src_dir = GRAMMARS_DIR.join(name).join("src");
+    let mut config = CompileConfig::new(&src_dir, None, None);
+    config.header_paths.push(&HEADER_DIR);
+    TEST_LOADER.load_language_at_path(config).unwrap()
+}
+
+pub fn get_test_fixture_language(name: &str) -> Language {
+    let grammar_dir_path = fixtures_dir().join("test_grammars").join(name);
+    let grammar_json = load_grammar_file(&grammar_dir_path.join("grammar.js"), None).unwrap();
+    let (parser_name, parser_code) = generate_parser(&grammar_json).unwrap();
+    get_test_language(&parser_name, &parser_code, Some(&grammar_dir_path))
+}
+
+pub fn get_language_queries_path(language_name: &str) -> PathBuf {
+    GRAMMARS_DIR.join(language_name).join("queries")
+}
+
+pub fn get_highlight_config(
+    language_name: &str,
+    injection_query_filename: Option<&str>,
+    highlight_names: &[String],
+) -> HighlightConfiguration {
+    let language = get_language(language_name);
+    let queries_path = get_language_queries_path(language_name);
+    let highlights_query = fs::read_to_string(queries_path.join("highlights.scm")).unwrap();
+    let injections_query =
+        injection_query_filename.map_or_else(String::new, |injection_query_filename| {
+            fs::read_to_string(queries_path.join(injection_query_filename)).unwrap()
+        });
+    let locals_query = fs::read_to_string(queries_path.join("locals.scm")).unwrap_or_default();
+    let mut result = HighlightConfiguration::new(
+        language,
+        language_name,
+        &highlights_query,
+        &injections_query,
+        &locals_query,
+    )
+    .unwrap();
+    result.configure(highlight_names);
+    result
+}
+
+pub fn get_tags_config(language_name: &str) -> TagsConfiguration {
+    let language = get_language(language_name);
+    let queries_path = get_language_queries_path(language_name);
+    let tags_query = fs::read_to_string(queries_path.join("tags.scm")).unwrap();
+    let locals_query = fs::read_to_string(queries_path.join("locals.scm")).unwrap_or_default();
+    TagsConfiguration::new(language, &tags_query, &locals_query).unwrap()
+}
+
+pub fn get_test_language(name: &str, parser_code: &str, path: Option<&Path>) -> Language {
+    let src_dir = scratch_dir().join("src").join(name);
+    fs::create_dir_all(&src_dir).unwrap();
+
+    let parser_path = src_dir.join("parser.c");
+    if !fs::read_to_string(&parser_path).is_ok_and(|content| content == parser_code) {
+        fs::write(&parser_path, parser_code).unwrap();
+    }
+
+    let scanner_path = if let Some(path) = path {
+        let scanner_path = path.join("scanner.c");
+        if scanner_path.exists() {
+            let scanner_code = fs::read_to_string(&scanner_path).unwrap();
+            let scanner_copy_path = src_dir.join("scanner.c");
+            if !fs::read_to_string(&scanner_copy_path).is_ok_and(|content| content == scanner_code)
+            {
+                fs::write(&scanner_copy_path, scanner_code).unwrap();
+            }
+            Some(scanner_copy_path)
+        } else {
+            None
+        }
+    } else {
+        None
+    };
+
+    let header_path = src_dir.join("tree_sitter");
+    fs::create_dir_all(&header_path).unwrap();
+
+    for (file, content) in [
+        ("alloc.h", ALLOC_HEADER),
+        ("array.h", ARRAY_HEADER),
+        ("parser.h", tree_sitter::PARSER_HEADER),
+    ] {
+        let file = header_path.join(file);
+        fs::write(&file, content)
+            .with_context(|| format!("Failed to write {:?}", file.file_name().unwrap()))
+            .unwrap();
+    }
+
+    let paths_to_check = if let Some(scanner_path) = &scanner_path {
+        vec![parser_path, scanner_path.clone()]
+    } else {
+        vec![parser_path]
+    };
+
+    let mut config = CompileConfig::new(&src_dir, Some(&paths_to_check), None);
+    config.header_paths = vec![&HEADER_DIR];
+    config.name = name.to_string();
+
+    TEST_LOADER.load_language_at_path_with_name(config).unwrap()
+}
--- a/crates/cli/src/tests/helpers/query_helpers.rs
+++ b/crates/cli/src/tests/helpers/query_helpers.rs
@ -0,0 +1,363 @@
+use std::{cmp::Ordering, fmt::Write, ops::Range};
+
+use rand::prelude::Rng;
+use streaming_iterator::{IntoStreamingIterator, StreamingIterator};
+use tree_sitter::{
+    Language, Node, Parser, Point, Query, QueryCapture, QueryCursor, QueryMatch, Tree, TreeCursor,
+};
+
+#[derive(Debug)]
+pub struct Pattern {
+    kind: Option<&'static str>,
+    named: bool,
+    field: Option<&'static str>,
+    capture: Option<String>,
+    children: Vec<Pattern>,
+}
+
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub struct Match<'a, 'tree> {
+    pub captures: Vec<(&'a str, Node<'tree>)>,
+    pub last_node: Option<Node<'tree>>,
+}
+
+const CAPTURE_NAMES: &[&str] = &[
+    "one", "two", "three", "four", "five", "six", "seven", "eight",
+];
+
+impl Pattern {
+    pub fn random_pattern_in_tree(tree: &Tree, rng: &mut impl Rng) -> (Self, Range<Point>) {
+        let mut cursor = tree.walk();
+
+        // Descend to the node at a random byte offset and depth.
+        let mut max_depth = 0;
+        let byte_offset = rng.gen_range(0..cursor.node().end_byte());
+        while cursor.goto_first_child_for_byte(byte_offset).is_some() {
+            max_depth += 1;
+        }
+        let depth = rng.gen_range(0..=max_depth);
+        for _ in 0..depth {
+            cursor.goto_parent();
+        }
+
+        // Build a pattern that matches that node.
+        // Sometimes include subsequent siblings of the node.
+        let pattern_start = cursor.node().start_position();
+        let mut roots = vec![Self::random_pattern_for_node(&mut cursor, rng)];
+        while roots.len() < 5 && cursor.goto_next_sibling() {
+            if rng.gen_bool(0.2) {
+                roots.push(Self::random_pattern_for_node(&mut cursor, rng));
+            }
+        }
+        let pattern_end = cursor.node().end_position();
+
+        let mut pattern = Self {
+            kind: None,
+            named: true,
+            field: None,
+            capture: None,
+            children: roots,
+        };
+
+        if pattern.children.len() == 1 ||
+        // In a parenthesized list of sibling patterns, the first
+        // sibling can't be an anonymous `_` wildcard.
+        (pattern.children[0].kind == Some("_") && !pattern.children[0].named)
+        {
+            pattern = pattern.children.pop().unwrap();
+        }
+        // In a parenthesized list of sibling patterns, the first
+        // sibling can't have a field name.
+        else {
+            pattern.children[0].field = None;
+        }
+
+        (pattern, pattern_start..pattern_end)
+    }
+
+    fn random_pattern_for_node(cursor: &mut TreeCursor, rng: &mut impl Rng) -> Self {
+        let node = cursor.node();
+
+        // Sometimes specify the node's type, sometimes use a wildcard.
+        let (kind, named) = if rng.gen_bool(0.9) {
+            (Some(node.kind()), node.is_named())
+        } else {
+            (Some("_"), node.is_named() && rng.gen_bool(0.8))
+        };
+
+        // Sometimes specify the node's field.
+        let field = if rng.gen_bool(0.75) {
+            cursor.field_name()
+        } else {
+            None
+        };
+
+        // Sometimes capture the node.
+        let capture = if rng.gen_bool(0.7) {
+            Some(CAPTURE_NAMES[rng.gen_range(0..CAPTURE_NAMES.len())].to_string())
+        } else {
+            None
+        };
+
+        // Walk the children and include child patterns for some of them.
+        let mut children = Vec::new();
+        if named && cursor.goto_first_child() {
+            let max_children = rng.gen_range(0..4);
+            while cursor.goto_next_sibling() {
+                if rng.gen_bool(0.6) {
+                    let child_ast = Self::random_pattern_for_node(cursor, rng);
+                    children.push(child_ast);
+                    if children.len() >= max_children {
+                        break;
+                    }
+                }
+            }
+            cursor.goto_parent();
+        }
+
+        Self {
+            kind,
+            named,
+            field,
+            capture,
+            children,
+        }
+    }
+
+    fn write_to_string(&self, string: &mut String, indent: usize) {
+        if let Some(field) = self.field {
+            write!(string, "{field}: ").unwrap();
+        }
+
+        if self.named {
+            string.push('(');
+            let mut has_contents = false;
+            if let Some(kind) = &self.kind {
+                write!(string, "{kind}").unwrap();
+                has_contents = true;
+            }
+            for child in &self.children {
+                let indent = indent + 2;
+                if has_contents {
+                    string.push('\n');
+                    string.push_str(&" ".repeat(indent));
+                }
+                child.write_to_string(string, indent);
+                has_contents = true;
+            }
+            string.push(')');
+        } else if self.kind == Some("_") {
+            string.push('_');
+        } else {
+            write!(string, "\"{}\"", self.kind.unwrap().replace('\"', "\\\"")).unwrap();
+        }
+
+        if let Some(capture) = &self.capture {
+            write!(string, " @{capture}").unwrap();
+        }
+    }
+
+    pub fn matches_in_tree<'tree>(&self, tree: &'tree Tree) -> Vec<Match<'_, 'tree>> {
+        let mut matches = Vec::new();
+
+        // Compute the matches naively: walk the tree and
+        // retry the entire pattern for each node.
+        let mut cursor = tree.walk();
+        let mut ascending = false;
+        loop {
+            if ascending {
+                if cursor.goto_next_sibling() {
+                    ascending = false;
+                } else if !cursor.goto_parent() {
+                    break;
+                }
+            } else {
+                let matches_here = self.match_node(&mut cursor);
+                matches.extend_from_slice(&matches_here);
+                if !cursor.goto_first_child() {
+                    ascending = true;
+                }
+            }
+        }
+
+        matches.sort_unstable();
+        matches.iter_mut().for_each(|m| m.last_node = None);
+        matches.dedup();
+        matches
+    }
+
+    pub fn match_node<'tree>(&self, cursor: &mut TreeCursor<'tree>) -> Vec<Match<'_, 'tree>> {
+        let node = cursor.node();
+
+        // If a kind is specified, check that it matches the node.
+        if let Some(kind) = self.kind {
+            if kind == "_" {
+                if self.named && !node.is_named() {
+                    return Vec::new();
+                }
+            } else if kind != node.kind() || self.named != node.is_named() {
+                return Vec::new();
+            }
+        }
+
+        // If a field is specified, check that it matches the node.
+        if let Some(field) = self.field {
+            if cursor.field_name() != Some(field) {
+                return Vec::new();
+            }
+        }
+
+        // Create a match for the current node.
+        let mat = Match {
+            captures: self
+                .capture
+                .as_ref()
+                .map_or_else(Vec::new, |name| vec![(name.as_str(), node)]),
+            last_node: Some(node),
+        };
+
+        // If there are no child patterns to match, then return this single match.
+        if self.children.is_empty() {
+            return vec![mat];
+        }
+
+        // Find every matching combination of child patterns and child nodes.
+        let mut finished_matches = Vec::<Match>::new();
+        if cursor.goto_first_child() {
+            let mut match_states = vec![(0, mat)];
+            loop {
+                let mut new_match_states = Vec::new();
+                for (pattern_index, mat) in &match_states {
+                    let child_pattern = &self.children[*pattern_index];
+                    let child_matches = child_pattern.match_node(cursor);
+                    for child_match in child_matches {
+                        let mut combined_match = mat.clone();
+                        combined_match.last_node = child_match.last_node;
+                        combined_match
+                            .captures
+                            .extend_from_slice(&child_match.captures);
+                        if pattern_index + 1 < self.children.len() {
+                            new_match_states.push((*pattern_index + 1, combined_match));
+                        } else {
+                            let mut existing = false;
+                            for existing_match in &mut finished_matches {
+                                if existing_match.captures == combined_match.captures {
+                                    if child_pattern.capture.is_some() {
+                                        existing_match.last_node = combined_match.last_node;
+                                    }
+                                    existing = true;
+                                }
+                            }
+                            if !existing {
+                                finished_matches.push(combined_match);
+                            }
+                        }
+                    }
+                }
+                match_states.extend_from_slice(&new_match_states);
+                if !cursor.goto_next_sibling() {
+                    break;
+                }
+            }
+            cursor.goto_parent();
+        }
+        finished_matches
+    }
+}
+
+impl std::fmt::Display for Pattern {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        let mut result = String::new();
+        self.write_to_string(&mut result, 0);
+        write!(f, "{result}")
+    }
+}
+
+impl PartialOrd for Match<'_, '_> {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+impl Ord for Match<'_, '_> {
+    // Tree-sitter returns matches in the order that they terminate
+    // during a depth-first walk of the tree. If multiple matches
+    // terminate on the same node, those matches are produced in the
+    // order that their captures were discovered.
+    fn cmp(&self, other: &Self) -> Ordering {
+        if let Some((last_node_a, last_node_b)) = self.last_node.zip(other.last_node) {
+            let cmp = compare_depth_first(last_node_a, last_node_b);
+            if cmp.is_ne() {
+                return cmp;
+            }
+        }
+
+        for (a, b) in self.captures.iter().zip(other.captures.iter()) {
+            let cmp = compare_depth_first(a.1, b.1);
+            if !cmp.is_eq() {
+                return cmp;
+            }
+        }
+
+        self.captures.len().cmp(&other.captures.len())
+    }
+}
+
+fn compare_depth_first(a: Node, b: Node) -> Ordering {
+    let a = a.byte_range();
+    let b = b.byte_range();
+    a.start.cmp(&b.start).then_with(|| b.end.cmp(&a.end))
+}
+
+pub fn assert_query_matches(
+    language: &Language,
+    query: &Query,
+    source: &str,
+    expected: &[(usize, Vec<(&str, &str)>)],
+) {
+    let mut parser = Parser::new();
+    parser.set_language(language).unwrap();
+    let tree = parser.parse(source, None).unwrap();
+    let mut cursor = QueryCursor::new();
+    let matches = cursor.matches(query, tree.root_node(), source.as_bytes());
+    pretty_assertions::assert_eq!(expected, collect_matches(matches, query, source));
+    pretty_assertions::assert_eq!(false, cursor.did_exceed_match_limit());
+}
+
+pub fn collect_matches<'a>(
+    mut matches: impl StreamingIterator<Item = QueryMatch<'a, 'a>>,
+    query: &'a Query,
+    source: &'a str,
+) -> Vec<(usize, Vec<(&'a str, &'a str)>)> {
+    let mut result = Vec::new();
+    while let Some(m) = matches.next() {
+        result.push((
+            m.pattern_index,
+            format_captures(m.captures.iter().into_streaming_iter_ref(), query, source),
+        ));
+    }
+    result
+}
+
+pub fn collect_captures<'a>(
+    captures: impl StreamingIterator<Item = (QueryMatch<'a, 'a>, usize)>,
+    query: &'a Query,
+    source: &'a str,
+) -> Vec<(&'a str, &'a str)> {
+    format_captures(captures.map(|(m, i)| m.captures[*i]), query, source)
+}
+
+fn format_captures<'a>(
+    mut captures: impl StreamingIterator<Item = QueryCapture<'a>>,
+    query: &'a Query,
+    source: &'a str,
+) -> Vec<(&'a str, &'a str)> {
+    let mut result = Vec::new();
+    while let Some(capture) = captures.next() {
+        result.push((
+            query.capture_names()[capture.index as usize],
+            capture.node.utf8_text(source.as_bytes()).unwrap(),
+        ));
+    }
+    result
+}
--- a/crates/cli/src/tests/highlight_test.rs
+++ b/crates/cli/src/tests/highlight_test.rs
@ -0,0 +1,786 @@
+use std::{
+    ffi::CString,
+    fs,
+    os::raw::c_char,
+    ptr, slice, str,
+    sync::{
+        atomic::{AtomicUsize, Ordering},
+        LazyLock,
+    },
+};
+
+use tree_sitter_highlight::{
+    c, Error, Highlight, HighlightConfiguration, HighlightEvent, Highlighter, HtmlRenderer,
+};
+
+use super::helpers::fixtures::{get_highlight_config, get_language, get_language_queries_path};
+
+static JS_HIGHLIGHT: LazyLock<HighlightConfiguration> =
+    LazyLock::new(|| get_highlight_config("javascript", Some("injections.scm"), &HIGHLIGHT_NAMES));
+
+static JSDOC_HIGHLIGHT: LazyLock<HighlightConfiguration> =
+    LazyLock::new(|| get_highlight_config("jsdoc", None, &HIGHLIGHT_NAMES));
+
+static HTML_HIGHLIGHT: LazyLock<HighlightConfiguration> =
+    LazyLock::new(|| get_highlight_config("html", Some("injections.scm"), &HIGHLIGHT_NAMES));
+
+static EJS_HIGHLIGHT: LazyLock<HighlightConfiguration> = LazyLock::new(|| {
+    get_highlight_config(
+        "embedded-template",
+        Some("injections-ejs.scm"),
+        &HIGHLIGHT_NAMES,
+    )
+});
+
+static RUST_HIGHLIGHT: LazyLock<HighlightConfiguration> =
+    LazyLock::new(|| get_highlight_config("rust", Some("injections.scm"), &HIGHLIGHT_NAMES));
+
+static HIGHLIGHT_NAMES: LazyLock<Vec<String>> = LazyLock::new(|| {
+    [
+        "attribute",
+        "boolean",
+        "carriage-return",
+        "comment",
+        "constant",
+        "constant.builtin",
+        "constructor",
+        "embedded",
+        "function",
+        "function.builtin",
+        "keyword",
+        "module",
+        "number",
+        "operator",
+        "property",
+        "property.builtin",
+        "punctuation",
+        "punctuation.bracket",
+        "punctuation.delimiter",
+        "punctuation.special",
+        "string",
+        "string.special",
+        "tag",
+        "type",
+        "type.builtin",
+        "variable",
+        "variable.builtin",
+        "variable.parameter",
+    ]
+    .iter()
+    .copied()
+    .map(String::from)
+    .collect()
+});
+
+static HTML_ATTRS: LazyLock<Vec<String>> = LazyLock::new(|| {
+    HIGHLIGHT_NAMES
+        .iter()
+        .map(|s| format!("class={s}"))
+        .collect()
+});
+
+#[test]
+fn test_highlighting_javascript() {
+    let source = "const a = function(b) { return b + c; }";
+    assert_eq!(
+        &to_token_vector(source, &JS_HIGHLIGHT).unwrap(),
+        &[vec![
+            ("const", vec!["keyword"]),
+            (" ", vec![]),
+            ("a", vec!["function"]),
+            (" ", vec![]),
+            ("=", vec!["operator"]),
+            (" ", vec![]),
+            ("function", vec!["keyword"]),
+            ("(", vec!["punctuation.bracket"]),
+            ("b", vec!["variable"]),
+            (")", vec!["punctuation.bracket"]),
+            (" ", vec![]),
+            ("{", vec!["punctuation.bracket"]),
+            (" ", vec![]),
+            ("return", vec!["keyword"]),
+            (" ", vec![]),
+            ("b", vec!["variable"]),
+            (" ", vec![]),
+            ("+", vec!["operator"]),
+            (" ", vec![]),
+            ("c", vec!["variable"]),
+            (";", vec!["punctuation.delimiter"]),
+            (" ", vec![]),
+            ("}", vec!["punctuation.bracket"]),
+        ]]
+    );
+}
+
+#[test]
+fn test_highlighting_injected_html_in_javascript() {
+    let source = ["const s = html `<div>${a < b}</div>`;"].join("\n");
+
+    assert_eq!(
+        &to_token_vector(&source, &JS_HIGHLIGHT).unwrap(),
+        &[vec![
+            ("const", vec!["keyword"]),
+            (" ", vec![]),
+            ("s", vec!["variable"]),
+            (" ", vec![]),
+            ("=", vec!["operator"]),
+            (" ", vec![]),
+            ("html", vec!["function"]),
+            (" ", vec![]),
+            ("`", vec!["string"]),
+            ("<", vec!["string", "punctuation.bracket"]),
+            ("div", vec!["string", "tag"]),
+            (">", vec!["string", "punctuation.bracket"]),
+            ("${", vec!["string", "embedded", "punctuation.special"]),
+            ("a", vec!["string", "embedded", "variable"]),
+            (" ", vec!["string", "embedded"]),
+            ("<", vec!["string", "embedded", "operator"]),
+            (" ", vec!["string", "embedded"]),
+            ("b", vec!["string", "embedded", "variable"]),
+            ("}", vec!["string", "embedded", "punctuation.special"]),
+            ("</", vec!["string", "punctuation.bracket"]),
+            ("div", vec!["string", "tag"]),
+            (">", vec!["string", "punctuation.bracket"]),
+            ("`", vec!["string"]),
+            (";", vec!["punctuation.delimiter"]),
+        ]]
+    );
+}
+
+#[test]
+fn test_highlighting_injected_javascript_in_html_mini() {
+    let source = "<script>const x = new Thing();</script>";
+
+    assert_eq!(
+        &to_token_vector(source, &HTML_HIGHLIGHT).unwrap(),
+        &[vec![
+            ("<", vec!["punctuation.bracket"]),
+            ("script", vec!["tag"]),
+            (">", vec!["punctuation.bracket"]),
+            ("const", vec!["keyword"]),
+            (" ", vec![]),
+            ("x", vec!["variable"]),
+            (" ", vec![]),
+            ("=", vec!["operator"]),
+            (" ", vec![]),
+            ("new", vec!["keyword"]),
+            (" ", vec![]),
+            ("Thing", vec!["constructor"]),
+            ("(", vec!["punctuation.bracket"]),
+            (")", vec!["punctuation.bracket"]),
+            (";", vec!["punctuation.delimiter"]),
+            ("</", vec!["punctuation.bracket"]),
+            ("script", vec!["tag"]),
+            (">", vec!["punctuation.bracket"]),
+        ],]
+    );
+}
+
+#[test]
+fn test_highlighting_injected_javascript_in_html() {
+    let source = [
+        "<body>",
+        "  <script>",
+        "    const x = new Thing();",
+        "  </script>",
+        "</body>",
+    ]
+    .join("\n");
+
+    assert_eq!(
+        &to_token_vector(&source, &HTML_HIGHLIGHT).unwrap(),
+        &[
+            vec![
+                ("<", vec!["punctuation.bracket"]),
+                ("body", vec!["tag"]),
+                (">", vec!["punctuation.bracket"]),
+            ],
+            vec![
+                ("  ", vec![]),
+                ("<", vec!["punctuation.bracket"]),
+                ("script", vec!["tag"]),
+                (">", vec!["punctuation.bracket"]),
+            ],
+            vec![
+                ("    ", vec![]),
+                ("const", vec!["keyword"]),
+                (" ", vec![]),
+                ("x", vec!["variable"]),
+                (" ", vec![]),
+                ("=", vec!["operator"]),
+                (" ", vec![]),
+                ("new", vec!["keyword"]),
+                (" ", vec![]),
+                ("Thing", vec!["constructor"]),
+                ("(", vec!["punctuation.bracket"]),
+                (")", vec!["punctuation.bracket"]),
+                (";", vec!["punctuation.delimiter"]),
+            ],
+            vec![
+                ("  ", vec![]),
+                ("</", vec!["punctuation.bracket"]),
+                ("script", vec!["tag"]),
+                (">", vec!["punctuation.bracket"]),
+            ],
+            vec![
+                ("</", vec!["punctuation.bracket"]),
+                ("body", vec!["tag"]),
+                (">", vec!["punctuation.bracket"]),
+            ],
+        ]
+    );
+}
+
+#[test]
+fn test_highlighting_multiline_nodes_to_html() {
+    let source = [
+        "const SOMETHING = `",
+        "  one ${",
+        "    two()",
+        "  } three",
+        "`",
+        "",
+    ]
+    .join("\n");
+
+    assert_eq!(
+        &to_html(&source, &JS_HIGHLIGHT).unwrap(),
+        &[
+            "<span class=keyword>const</span> <span class=constant>SOMETHING</span> <span class=operator>=</span> <span class=string>`</span>\n".to_string(),
+            "<span class=string>  one <span class=embedded><span class=punctuation.special>${</span></span></span>\n".to_string(),
+            "<span class=string><span class=embedded>    <span class=function>two</span><span class=punctuation.bracket>(</span><span class=punctuation.bracket>)</span></span></span>\n".to_string(),
+            "<span class=string><span class=embedded>  <span class=punctuation.special>}</span></span> three</span>\n".to_string(),
+            "<span class=string>`</span>\n".to_string(),
+        ]
+    );
+}
+
+#[test]
+fn test_highlighting_with_local_variable_tracking() {
+    let source = [
+        "module.exports = function a(b) {",
+        "  const module = c;",
+        "  console.log(module, b);",
+        "}",
+    ]
+    .join("\n");
+
+    assert_eq!(
+        &to_token_vector(&source, &JS_HIGHLIGHT).unwrap(),
+        &[
+            vec![
+                ("module", vec!["variable.builtin"]),
+                (".", vec!["punctuation.delimiter"]),
+                ("exports", vec!["function"]),
+                (" ", vec![]),
+                ("=", vec!["operator"]),
+                (" ", vec![]),
+                ("function", vec!["keyword"]),
+                (" ", vec![]),
+                ("a", vec!["function"]),
+                ("(", vec!["punctuation.bracket"]),
+                ("b", vec!["variable"]),
+                (")", vec!["punctuation.bracket"]),
+                (" ", vec![]),
+                ("{", vec!["punctuation.bracket"])
+            ],
+            vec![
+                ("  ", vec![]),
+                ("const", vec!["keyword"]),
+                (" ", vec![]),
+                ("module", vec!["variable"]),
+                (" ", vec![]),
+                ("=", vec!["operator"]),
+                (" ", vec![]),
+                ("c", vec!["variable"]),
+                (";", vec!["punctuation.delimiter"])
+            ],
+            vec![
+                ("  ", vec![]),
+                ("console", vec!["variable.builtin"]),
+                (".", vec!["punctuation.delimiter"]),
+                ("log", vec!["function"]),
+                ("(", vec!["punctuation.bracket"]),
+                // Not a builtin, because `module` was defined as a variable above.
+                ("module", vec!["variable"]),
+                (",", vec!["punctuation.delimiter"]),
+                (" ", vec![]),
+                // A parameter, because `b` was defined as a parameter above.
+                ("b", vec!["variable"]),
+                (")", vec!["punctuation.bracket"]),
+                (";", vec!["punctuation.delimiter"]),
+            ],
+            vec![("}", vec!["punctuation.bracket"])]
+        ],
+    );
+}
+
+#[test]
+fn test_highlighting_empty_lines() {
+    let source = [
+        "class A {",
+        "",
+        "  b(c) {",
+        "",
+        "    d(e)",
+        "",
+        "  }",
+        "",
+        "}",
+    ]
+    .join("\n");
+
+    assert_eq!(
+        &to_html(&source, &JS_HIGHLIGHT).unwrap(),
+        &[
+            "<span class=keyword>class</span> <span class=constructor>A</span> <span class=punctuation.bracket>{</span>\n".to_string(),
+            "\n".to_string(),
+            "  <span class=function>b</span><span class=punctuation.bracket>(</span><span class=variable>c</span><span class=punctuation.bracket>)</span> <span class=punctuation.bracket>{</span>\n".to_string(),
+            "\n".to_string(),
+            "    <span class=function>d</span><span class=punctuation.bracket>(</span><span class=variable>e</span><span class=punctuation.bracket>)</span>\n".to_string(),
+            "\n".to_string(),
+            "  <span class=punctuation.bracket>}</span>\n".to_string(),
+            "\n".to_string(),
+            "<span class=punctuation.bracket>}</span>\n".to_string(),
+        ]
+    );
+}
+
+#[test]
+fn test_highlighting_carriage_returns() {
+    let source = "a = \"a\rb\"\r\nb\r";
+
+    assert_eq!(
+        &to_html(source, &JS_HIGHLIGHT).unwrap(),
+        &[
+            "<span class=variable>a</span> <span class=operator>=</span> <span class=string>&quot;a<span class=carriage-return></span><span class=variable>b</span>&quot;</span>\n",
+            "<span class=variable>b</span><span class=carriage-return></span>\n",
+        ],
+    );
+}
+
+#[test]
+fn test_highlighting_ejs_with_html_and_javascript() {
+    let source = ["<div><% foo() %></div><script> bar() </script>"].join("\n");
+
+    assert_eq!(
+        &to_token_vector(&source, &EJS_HIGHLIGHT).unwrap(),
+        &[[
+            ("<", vec!["punctuation.bracket"]),
+            ("div", vec!["tag"]),
+            (">", vec!["punctuation.bracket"]),
+            ("<%", vec!["keyword"]),
+            (" ", vec![]),
+            ("foo", vec!["function"]),
+            ("(", vec!["punctuation.bracket"]),
+            (")", vec!["punctuation.bracket"]),
+            (" ", vec![]),
+            ("%>", vec!["keyword"]),
+            ("</", vec!["punctuation.bracket"]),
+            ("div", vec!["tag"]),
+            (">", vec!["punctuation.bracket"]),
+            ("<", vec!["punctuation.bracket"]),
+            ("script", vec!["tag"]),
+            (">", vec!["punctuation.bracket"]),
+            (" ", vec![]),
+            ("bar", vec!["function"]),
+            ("(", vec!["punctuation.bracket"]),
+            (")", vec!["punctuation.bracket"]),
+            (" ", vec![]),
+            ("</", vec!["punctuation.bracket"]),
+            ("script", vec!["tag"]),
+            (">", vec!["punctuation.bracket"]),
+        ]],
+    );
+}
+
+#[test]
+fn test_highlighting_javascript_with_jsdoc() {
+    // Regression test: the middle comment has no highlights. This should not prevent
+    // later injections from highlighting properly.
+    let source = ["a /* @see a */ b; /* nothing */ c; /* @see b */"].join("\n");
+
+    assert_eq!(
+        &to_token_vector(&source, &JS_HIGHLIGHT).unwrap(),
+        &[[
+            ("a", vec!["variable"]),
+            (" ", vec![]),
+            ("/* ", vec!["comment"]),
+            ("@see", vec!["comment", "keyword"]),
+            (" a */", vec!["comment"]),
+            (" ", vec![]),
+            ("b", vec!["variable"]),
+            (";", vec!["punctuation.delimiter"]),
+            (" ", vec![]),
+            ("/* nothing */", vec!["comment"]),
+            (" ", vec![]),
+            ("c", vec!["variable"]),
+            (";", vec!["punctuation.delimiter"]),
+            (" ", vec![]),
+            ("/* ", vec!["comment"]),
+            ("@see", vec!["comment", "keyword"]),
+            (" b */", vec!["comment"])
+        ]],
+    );
+}
+
+#[test]
+fn test_highlighting_with_content_children_included() {
+    let source = ["assert!(", "    a.b.c() < D::e::<F>()", ");"].join("\n");
+
+    assert_eq!(
+        &to_token_vector(&source, &RUST_HIGHLIGHT).unwrap(),
+        &[
+            vec![
+                ("assert", vec!["function"]),
+                ("!", vec!["function"]),
+                ("(", vec!["punctuation.bracket"]),
+            ],
+            vec![
+                ("    a", vec![]),
+                (".", vec!["punctuation.delimiter"]),
+                ("b", vec!["property"]),
+                (".", vec!["punctuation.delimiter"]),
+                ("c", vec!["function"]),
+                ("(", vec!["punctuation.bracket"]),
+                (")", vec!["punctuation.bracket"]),
+                (" < ", vec![]),
+                ("D", vec!["type"]),
+                ("::", vec!["punctuation.delimiter"]),
+                ("e", vec!["function"]),
+                ("::", vec!["punctuation.delimiter"]),
+                ("<", vec!["punctuation.bracket"]),
+                ("F", vec!["type"]),
+                (">", vec!["punctuation.bracket"]),
+                ("(", vec!["punctuation.bracket"]),
+                (")", vec!["punctuation.bracket"]),
+            ],
+            vec![
+                (")", vec!["punctuation.bracket"]),
+                (";", vec!["punctuation.delimiter"]),
+            ]
+        ],
+    );
+}
+
+#[test]
+fn test_highlighting_cancellation() {
+    // An HTML document with a large injected JavaScript document:
+    let mut source = "<script>\n".to_string();
+    for _ in 0..500 {
+        source += "function a() { console.log('hi'); }\n";
+    }
+    source += "</script>\n";
+
+    // Cancel the highlighting before parsing the injected document.
+    let cancellation_flag = AtomicUsize::new(0);
+    let injection_callback = |name: &str| {
+        cancellation_flag.store(1, Ordering::SeqCst);
+        test_language_for_injection_string(name)
+    };
+
+    // The initial `highlight` call, which eagerly parses the outer document, should not fail.
+    let mut highlighter = Highlighter::new();
+    let events = highlighter
+        .highlight(
+            &HTML_HIGHLIGHT,
+            source.as_bytes(),
+            Some(&cancellation_flag),
+            injection_callback,
+        )
+        .unwrap();
+
+    // Iterating the scopes should not panic. It should return an error once the
+    // cancellation is detected.
+    for event in events {
+        if let Err(e) = event {
+            assert_eq!(e, Error::Cancelled);
+            return;
+        }
+    }
+
+    panic!("Expected an error while iterating highlighter");
+}
+
+#[test]
+fn test_highlighting_via_c_api() {
+    let highlights = [
+        "class=tag\0",
+        "class=function\0",
+        "class=string\0",
+        "class=keyword\0",
+    ];
+    let highlight_names = highlights
+        .iter()
+        .map(|h| h["class=".len()..].as_ptr().cast::<c_char>())
+        .collect::<Vec<_>>();
+    let highlight_attrs = highlights
+        .iter()
+        .map(|h| h.as_bytes().as_ptr().cast::<c_char>())
+        .collect::<Vec<_>>();
+    let highlighter = unsafe {
+        c::ts_highlighter_new(
+            std::ptr::addr_of!(highlight_names[0]),
+            std::ptr::addr_of!(highlight_attrs[0]),
+            highlights.len() as u32,
+        )
+    };
+
+    let source_code = c_string("<script>\nconst a = b('c');\nc.d();\n</script>");
+
+    let js_scope = c_string("source.js");
+    let js_injection_regex = c_string("^javascript");
+    let language = get_language("javascript");
+    let lang_name = c_string("javascript");
+    let queries = get_language_queries_path("javascript");
+    let highlights_query = fs::read_to_string(queries.join("highlights.scm")).unwrap();
+    let injections_query = fs::read_to_string(queries.join("injections.scm")).unwrap();
+    let locals_query = fs::read_to_string(queries.join("locals.scm")).unwrap();
+    unsafe {
+        c::ts_highlighter_add_language(
+            highlighter,
+            lang_name.as_ptr(),
+            js_scope.as_ptr(),
+            js_injection_regex.as_ptr(),
+            language,
+            highlights_query.as_ptr().cast::<c_char>(),
+            injections_query.as_ptr().cast::<c_char>(),
+            locals_query.as_ptr().cast::<c_char>(),
+            highlights_query.len() as u32,
+            injections_query.len() as u32,
+            locals_query.len() as u32,
+        );
+    }
+
+    let html_scope = c_string("text.html.basic");
+    let html_injection_regex = c_string("^html");
+    let language = get_language("html");
+    let lang_name = c_string("html");
+    let queries = get_language_queries_path("html");
+    let highlights_query = fs::read_to_string(queries.join("highlights.scm")).unwrap();
+    let injections_query = fs::read_to_string(queries.join("injections.scm")).unwrap();
+    unsafe {
+        c::ts_highlighter_add_language(
+            highlighter,
+            lang_name.as_ptr(),
+            html_scope.as_ptr(),
+            html_injection_regex.as_ptr(),
+            language,
+            highlights_query.as_ptr().cast::<c_char>(),
+            injections_query.as_ptr().cast::<c_char>(),
+            ptr::null(),
+            highlights_query.len() as u32,
+            injections_query.len() as u32,
+            0,
+        );
+    }
+
+    let buffer = c::ts_highlight_buffer_new();
+
+    unsafe {
+        c::ts_highlighter_highlight(
+            highlighter,
+            html_scope.as_ptr(),
+            source_code.as_ptr(),
+            source_code.as_bytes().len() as u32,
+            buffer,
+            ptr::null_mut(),
+        );
+    }
+
+    let output_bytes = unsafe { c::ts_highlight_buffer_content(buffer) };
+    let output_line_offsets = unsafe { c::ts_highlight_buffer_line_offsets(buffer) };
+    let output_len = unsafe { c::ts_highlight_buffer_len(buffer) };
+    let output_line_count = unsafe { c::ts_highlight_buffer_line_count(buffer) };
+
+    let output_bytes = unsafe { slice::from_raw_parts(output_bytes, output_len as usize) };
+    let output_line_offsets =
+        unsafe { slice::from_raw_parts(output_line_offsets, output_line_count as usize) };
+
+    let mut lines = Vec::new();
+    for i in 0..(output_line_count as usize) {
+        let line_start = output_line_offsets[i] as usize;
+        let line_end = output_line_offsets
+            .get(i + 1)
+            .map_or(output_bytes.len(), |x| *x as usize);
+        lines.push(str::from_utf8(&output_bytes[line_start..line_end]).unwrap());
+    }
+
+    assert_eq!(
+        lines,
+        vec![
+            "&lt;<span class=tag>script</span>&gt;\n",
+            "<span class=keyword>const</span> a = <span class=function>b</span>(<span class=string>&#39;c&#39;</span>);\n",
+            "c.<span class=function>d</span>();\n",
+            "&lt;/<span class=tag>script</span>&gt;\n",
+        ]
+    );
+
+    unsafe {
+        c::ts_highlighter_delete(highlighter);
+        c::ts_highlight_buffer_delete(buffer);
+    }
+}
+
+#[test]
+fn test_highlighting_with_all_captures_applied() {
+    let source = "fn main(a: u32, b: u32) -> { let c = a + b; }";
+    let language = get_language("rust");
+    let highlights_query = indoc::indoc! {"
+        [
+          \"fn\"
+          \"let\"
+        ] @keyword
+        (identifier) @variable
+        (function_item name: (identifier) @function)
+        (parameter pattern: (identifier) @variable.parameter)
+        (primitive_type) @type.builtin
+        \"=\" @operator
+        [ \"->\" \":\" \";\" ] @punctuation.delimiter
+        [ \"{\" \"}\" \"(\" \")\" ] @punctuation.bracket
+    "};
+    let mut rust_highlight_reverse =
+        HighlightConfiguration::new(language, "rust", highlights_query, "", "").unwrap();
+    rust_highlight_reverse.configure(&HIGHLIGHT_NAMES);
+
+    assert_eq!(
+        &to_token_vector(source, &rust_highlight_reverse).unwrap(),
+        &[[
+            ("fn", vec!["keyword"]),
+            (" ", vec![]),
+            ("main", vec!["function"]),
+            ("(", vec!["punctuation.bracket"]),
+            ("a", vec!["variable.parameter"]),
+            (":", vec!["punctuation.delimiter"]),
+            (" ", vec![]),
+            ("u32", vec!["type.builtin"]),
+            (", ", vec![]),
+            ("b", vec!["variable.parameter"]),
+            (":", vec!["punctuation.delimiter"]),
+            (" ", vec![]),
+            ("u32", vec!["type.builtin"]),
+            (")", vec!["punctuation.bracket"]),
+            (" ", vec![]),
+            ("->", vec!["punctuation.delimiter"]),
+            (" ", vec![]),
+            ("{", vec!["punctuation.bracket"]),
+            (" ", vec![]),
+            ("let", vec!["keyword"]),
+            (" ", vec![]),
+            ("c", vec!["variable"]),
+            (" ", vec![]),
+            ("=", vec!["operator"]),
+            (" ", vec![]),
+            ("a", vec!["variable"]),
+            (" + ", vec![]),
+            ("b", vec!["variable"]),
+            (";", vec!["punctuation.delimiter"]),
+            (" ", vec![]),
+            ("}", vec!["punctuation.bracket"])
+        ]],
+    );
+}
+
+#[test]
+fn test_decode_utf8_lossy() {
+    use tree_sitter::LossyUtf8;
+
+    let parts = LossyUtf8::new(b"hi").collect::<Vec<_>>();
+    assert_eq!(parts, vec!["hi"]);
+
+    let parts = LossyUtf8::new(b"hi\xc0\xc1bye").collect::<Vec<_>>();
+    assert_eq!(parts, vec!["hi", "\u{fffd}", "\u{fffd}", "bye"]);
+
+    let parts = LossyUtf8::new(b"\xc0\xc1bye").collect::<Vec<_>>();
+    assert_eq!(parts, vec!["\u{fffd}", "\u{fffd}", "bye"]);
+
+    let parts = LossyUtf8::new(b"hello\xc0\xc1").collect::<Vec<_>>();
+    assert_eq!(parts, vec!["hello", "\u{fffd}", "\u{fffd}"]);
+}
+
+fn c_string(s: &str) -> CString {
+    CString::new(s.as_bytes().to_vec()).unwrap()
+}
+
+fn test_language_for_injection_string<'a>(string: &str) -> Option<&'a HighlightConfiguration> {
+    match string {
+        "javascript" => Some(&JS_HIGHLIGHT),
+        "html" => Some(&HTML_HIGHLIGHT),
+        "rust" => Some(&RUST_HIGHLIGHT),
+        "jsdoc" => Some(&JSDOC_HIGHLIGHT),
+        _ => None,
+    }
+}
+
+fn to_html<'a>(
+    src: &'a str,
+    language_config: &'a HighlightConfiguration,
+) -> Result<Vec<String>, Error> {
+    let src = src.as_bytes();
+    let mut renderer = HtmlRenderer::new();
+    let mut highlighter = Highlighter::new();
+    let events = highlighter.highlight(
+        language_config,
+        src,
+        None,
+        &test_language_for_injection_string,
+    )?;
+
+    renderer.set_carriage_return_highlight(
+        HIGHLIGHT_NAMES
+            .iter()
+            .position(|s| s == "carriage-return")
+            .map(Highlight),
+    );
+    renderer
+        .render(events, src, &|highlight, output| {
+            output.extend(HTML_ATTRS[highlight.0].as_bytes());
+        })
+        .unwrap();
+    Ok(renderer
+        .lines()
+        .map(std::string::ToString::to_string)
+        .collect())
+}
+
+#[allow(clippy::type_complexity)]
+fn to_token_vector<'a>(
+    src: &'a str,
+    language_config: &'a HighlightConfiguration,
+) -> Result<Vec<Vec<(&'a str, Vec<&'static str>)>>, Error> {
+    let src = src.as_bytes();
+    let mut highlighter = Highlighter::new();
+    let mut lines = Vec::new();
+    let mut highlights = Vec::new();
+    let mut line = Vec::new();
+    let events = highlighter.highlight(
+        language_config,
+        src,
+        None,
+        &test_language_for_injection_string,
+    )?;
+    for event in events {
+        match event? {
+            HighlightEvent::HighlightStart(s) => highlights.push(HIGHLIGHT_NAMES[s.0].as_str()),
+            HighlightEvent::HighlightEnd => {
+                highlights.pop();
+            }
+            HighlightEvent::Source { start, end } => {
+                let s = str::from_utf8(&src[start..end]).unwrap();
+                for (i, l) in s.split('\n').enumerate() {
+                    let l = l.trim_end_matches('\r');
+                    if i > 0 {
+                        lines.push(std::mem::take(&mut line));
+                    }
+                    if !l.is_empty() {
+                        line.push((l, highlights.clone()));
+                    }
+                }
+            }
+        }
+    }
+    if !line.is_empty() {
+        lines.push(line);
+    }
+    Ok(lines)
+}
--- a/crates/cli/src/tests/language_test.rs
+++ b/crates/cli/src/tests/language_test.rs
@ -0,0 +1,199 @@
+use tree_sitter::{self, Parser};
+
+use super::helpers::fixtures::get_language;
+
+#[test]
+fn test_lookahead_iterator() {
+    let mut parser = Parser::new();
+    let language = get_language("rust");
+    parser.set_language(&language).unwrap();
+
+    let tree = parser.parse("struct Stuff {}", None).unwrap();
+
+    let mut cursor = tree.walk();
+
+    assert!(cursor.goto_first_child()); // struct
+    assert!(cursor.goto_first_child()); // struct keyword
+
+    let next_state = cursor.node().next_parse_state();
+    assert_ne!(next_state, 0);
+    assert_eq!(
+        next_state,
+        language.next_state(cursor.node().parse_state(), cursor.node().grammar_id())
+    );
+    assert!((next_state as usize) < language.parse_state_count());
+    assert!(cursor.goto_next_sibling()); // type_identifier
+    assert_eq!(next_state, cursor.node().parse_state());
+    assert_eq!(cursor.node().grammar_name(), "identifier");
+    assert_ne!(cursor.node().grammar_id(), cursor.node().kind_id());
+
+    let expected_symbols = ["//", "/*", "identifier", "line_comment", "block_comment"];
+    let mut lookahead = language.lookahead_iterator(next_state).unwrap();
+    assert_eq!(*lookahead.language(), language);
+    assert!(lookahead.iter_names().eq(expected_symbols));
+
+    lookahead.reset_state(next_state);
+    assert!(lookahead.iter_names().eq(expected_symbols));
+
+    lookahead.reset(&language, next_state);
+    assert!(lookahead
+        .map(|s| language.node_kind_for_id(s).unwrap())
+        .eq(expected_symbols));
+}
+
+#[test]
+fn test_lookahead_iterator_modifiable_only_by_mut() {
+    let mut parser = Parser::new();
+    let language = get_language("rust");
+    parser.set_language(&language).unwrap();
+
+    let tree = parser.parse("struct Stuff {}", None).unwrap();
+
+    let mut cursor = tree.walk();
+
+    assert!(cursor.goto_first_child()); // struct
+    assert!(cursor.goto_first_child()); // struct keyword
+
+    let next_state = cursor.node().next_parse_state();
+    assert_ne!(next_state, 0);
+
+    let mut lookahead = language.lookahead_iterator(next_state).unwrap();
+    let _ = lookahead.next();
+
+    let mut names = lookahead.iter_names();
+    let _ = names.next();
+}
+
+#[test]
+fn test_symbol_metadata_checks() {
+    let language = get_language("rust");
+    for i in 0..language.node_kind_count() {
+        let sym = i as u16;
+        let name = language.node_kind_for_id(sym).unwrap();
+        match name {
+            "_type"
+            | "_expression"
+            | "_pattern"
+            | "_literal"
+            | "_literal_pattern"
+            | "_declaration_statement" => assert!(language.node_kind_is_supertype(sym)),
+
+            "_raw_string_literal_start"
+            | "_raw_string_literal_end"
+            | "_line_doc_comment"
+            | "_error_sentinel" => assert!(!language.node_kind_is_supertype(sym)),
+
+            "enum_item" | "struct_item" | "type_item" => {
+                assert!(language.node_kind_is_named(sym));
+            }
+
+            "=>" | "[" | "]" | "(" | ")" | "{" | "}" => {
+                assert!(language.node_kind_is_visible(sym));
+            }
+
+            _ => {}
+        }
+    }
+}
+
+#[test]
+fn test_supertypes() {
+    let language = get_language("rust");
+    let supertypes = language.supertypes();
+
+    if language.abi_version() < 15 {
+        return;
+    }
+
+    assert_eq!(supertypes.len(), 5);
+    assert_eq!(
+        supertypes
+            .iter()
+            .filter_map(|&s| language.node_kind_for_id(s))
+            .map(|s| s.to_string())
+            .collect::<Vec<String>>(),
+        vec![
+            "_expression",
+            "_literal",
+            "_literal_pattern",
+            "_pattern",
+            "_type"
+        ]
+    );
+
+    for &supertype in supertypes {
+        let mut subtypes = language
+            .subtypes_for_supertype(supertype)
+            .iter()
+            .filter_map(|symbol| language.node_kind_for_id(*symbol))
+            .collect::<Vec<&str>>();
+        subtypes.sort_unstable();
+        subtypes.dedup();
+
+        match language.node_kind_for_id(supertype) {
+            Some("_literal") => {
+                assert_eq!(
+                    subtypes,
+                    &[
+                        "boolean_literal",
+                        "char_literal",
+                        "float_literal",
+                        "integer_literal",
+                        "raw_string_literal",
+                        "string_literal"
+                    ]
+                );
+            }
+            Some("_pattern") => {
+                assert_eq!(
+                    subtypes,
+                    &[
+                        "_",
+                        "_literal_pattern",
+                        "captured_pattern",
+                        "const_block",
+                        "generic_pattern",
+                        "identifier",
+                        "macro_invocation",
+                        "mut_pattern",
+                        "or_pattern",
+                        "range_pattern",
+                        "ref_pattern",
+                        "reference_pattern",
+                        "remaining_field_pattern",
+                        "scoped_identifier",
+                        "slice_pattern",
+                        "struct_pattern",
+                        "tuple_pattern",
+                        "tuple_struct_pattern",
+                    ]
+                );
+            }
+            Some("_type") => {
+                assert_eq!(
+                    subtypes,
+                    &[
+                        "abstract_type",
+                        "array_type",
+                        "bounded_type",
+                        "dynamic_type",
+                        "function_type",
+                        "generic_type",
+                        "macro_invocation",
+                        "metavariable",
+                        "never_type",
+                        "pointer_type",
+                        "primitive_type",
+                        "reference_type",
+                        "removed_trait_bound",
+                        "scoped_type_identifier",
+                        "tuple_type",
+                        "type_identifier",
+                        "unit_type"
+                    ]
+                );
+            }
+            _ => {}
+        }
+    }
+}
--- a/crates/cli/src/tests/node_test.rs
+++ b/crates/cli/src/tests/node_test.rs
--- a/crates/cli/src/tests/parser_hang_test.rs
+++ b/crates/cli/src/tests/parser_hang_test.rs
@ -0,0 +1,104 @@
+// For some reasons `Command::spawn` doesn't work in CI env for many exotic arches.
+#![cfg(all(any(target_arch = "x86_64", target_arch = "x86"), not(sanitizing)))]
+
+use std::{
+    env::VarError,
+    process::{Command, Stdio},
+};
+
+use tree_sitter::Parser;
+use tree_sitter_generate::load_grammar_file;
+
+use super::generate_parser;
+use crate::tests::helpers::fixtures::{fixtures_dir, get_test_language};
+
+// The `sanitizing` cfg is required to don't run tests under specific sunitizer
+// because they don't work well with subprocesses _(it's an assumption)_.
+//
+// Below are two alternative examples of how to disable tests for some arches
+// if a way with excluding the whole mod from compilation wouldn't work well.
+//
+// XXX: Also may be it makes sense to keep such tests as ignored by default
+//      to omit surprises and enable them on CI by passing an extra option explicitly:
+//
+//        > cargo test -- --include-ignored
+//
+// #[cfg(all(any(target_arch = "x86_64", target_arch = "x86"), not(sanitizing)))]
+// #[cfg_attr(not(all(any(target_arch = "x86_64", target_arch = "x86"), not(sanitizing))), ignore)]
+//
+#[test]
+fn test_grammar_that_should_hang_and_not_segfault() {
+    let parent_sleep_millis = 1000;
+    let test_name = "test_grammar_that_should_hang_and_not_segfault";
+    let test_var = "CARGO_HANG_TEST";
+
+    eprintln!("  {test_name}");
+
+    let tests_exec_path = std::env::args()
+        .next()
+        .expect("Failed to get tests executable path");
+
+    match std::env::var(test_var) {
+        Ok(v) if v == test_name => {
+            eprintln!("    child process id {}", std::process::id());
+            hang_test();
+        }
+
+        Err(VarError::NotPresent) => {
+            eprintln!("    parent process id {}", std::process::id());
+            let mut command = Command::new(tests_exec_path);
+            command.arg(test_name).env(test_var, test_name);
+
+            if std::env::args().any(|x| x == "--nocapture") {
+                command.arg("--nocapture");
+            } else {
+                command.stdout(Stdio::null()).stderr(Stdio::null());
+            }
+
+            match command.spawn() {
+                Ok(mut child) => {
+                    std::thread::sleep(std::time::Duration::from_millis(parent_sleep_millis));
+                    match child.try_wait() {
+                        Ok(Some(status)) if status.success() => {
+                            panic!("Child didn't hang and exited successfully")
+                        }
+                        Ok(Some(status)) => panic!(
+                            "Child didn't hang and exited with status code: {:?}",
+                            status.code()
+                        ),
+                        _ => (),
+                    }
+                    if let Err(e) = child.kill() {
+                        eprintln!(
+                            "Failed to kill hang test's process id: {}, error: {e}",
+                            child.id()
+                        );
+                    }
+                }
+                Err(e) => panic!("{e}"),
+            }
+        }
+
+        Err(e) => panic!("Env var error: {e}"),
+
+        _ => unreachable!(),
+    }
+}
+
+fn hang_test() {
+    let test_grammar_dir = fixtures_dir()
+        .join("test_grammars")
+        .join("get_col_should_hang_not_crash");
+
+    let grammar_json = load_grammar_file(&test_grammar_dir.join("grammar.js"), None).unwrap();
+    let (parser_name, parser_code) = generate_parser(grammar_json.as_str()).unwrap();
+
+    let language = get_test_language(&parser_name, &parser_code, Some(test_grammar_dir.as_path()));
+
+    let mut parser = Parser::new();
+    parser.set_language(&language).unwrap();
+
+    let code_that_should_hang = "\nHello";
+
+    parser.parse(code_that_should_hang, None).unwrap();
+}
--- a/crates/cli/src/tests/parser_test.rs
+++ b/crates/cli/src/tests/parser_test.rs
--- a/crates/cli/src/tests/pathological_test.rs
+++ b/crates/cli/src/tests/pathological_test.rs
@ -0,0 +1,15 @@
+use tree_sitter::Parser;
+
+use super::helpers::{allocations, fixtures::get_language};
+
+#[test]
+fn test_pathological_example_1() {
+    let language = "cpp";
+    let source = r#"*ss<s"ss<sqXqss<s._<s<sq<(qqX<sqss<s.ss<sqsssq<(qss<qssqXqss<s._<s<sq<(qqX<sqss<s.ss<sqsssq<(qss<sqss<sqss<s._<s<sq>(qqX<sqss<s.ss<sqsssq<(qss<sq&=ss<s<sqss<s._<s<sq<(qqX<sqss<s.ss<sqs"#;
+
+    allocations::record(|| {
+        let mut parser = Parser::new();
+        parser.set_language(&get_language(language)).unwrap();
+        parser.parse(source, None).unwrap();
+    });
+}
--- a/crates/cli/src/tests/proc_macro/Cargo.toml
+++ b/crates/cli/src/tests/proc_macro/Cargo.toml
@ -0,0 +1,18 @@
+[package]
+name = "tree-sitter-tests-proc-macro"
+version = "0.0.0"
+edition.workspace = true
+rust-version.workspace = true
+publish = false
+
+[lints]
+workspace = true
+
+[lib]
+proc-macro = true
+
+[dependencies]
+proc-macro2 = "1.0.93"
+quote = "1.0.38"
+rand = "0.8.5"
+syn = { version = "2.0.96", features = ["full"] }
--- a/crates/cli/src/tests/proc_macro/src/lib.rs
+++ b/crates/cli/src/tests/proc_macro/src/lib.rs
@ -0,0 +1,135 @@
+use proc_macro::TokenStream;
+use proc_macro2::Span;
+use quote::quote;
+use syn::{
+    parse::{Parse, ParseStream},
+    parse_macro_input, Error, Expr, Ident, ItemFn, LitInt, Token,
+};
+
+#[proc_macro_attribute]
+pub fn retry(args: TokenStream, input: TokenStream) -> TokenStream {
+    let count = parse_macro_input!(args as LitInt);
+    let input = parse_macro_input!(input as ItemFn);
+    let attrs = &input.attrs;
+    let name = &input.sig.ident;
+
+    TokenStream::from(quote! {
+        #(#attrs),*
+        fn #name() {
+            #input
+
+            for i in 0..=#count {
+                let result = std::panic::catch_unwind(|| {
+                    #name();
+                });
+
+                if result.is_ok() {
+                    return;
+                }
+
+                if i == #count {
+                    std::panic::resume_unwind(result.unwrap_err());
+                }
+            }
+        }
+    })
+}
+
+#[proc_macro_attribute]
+pub fn test_with_seed(args: TokenStream, input: TokenStream) -> TokenStream {
+    struct Args {
+        retry: LitInt,
+        seed: Expr,
+        seed_fn: Option<Ident>,
+    }
+
+    impl Parse for Args {
+        fn parse(input: ParseStream) -> syn::Result<Self> {
+            let mut retry = None;
+            let mut seed = None;
+            let mut seed_fn = None;
+
+            while !input.is_empty() {
+                let name = input.parse::<Ident>()?;
+                match name.to_string().as_str() {
+                    "retry" => {
+                        input.parse::<Token![=]>()?;
+                        retry.replace(input.parse()?);
+                    }
+                    "seed" => {
+                        input.parse::<Token![=]>()?;
+                        seed.replace(input.parse()?);
+                    }
+                    "seed_fn" => {
+                        input.parse::<Token![=]>()?;
+                        seed_fn.replace(input.parse()?);
+                    }
+                    x => {
+                        return Err(Error::new(
+                            name.span(),
+                            format!("Unsupported parameter `{x}`"),
+                        ))
+                    }
+                }
+
+                if !input.is_empty() {
+                    input.parse::<Token![,]>()?;
+                }
+            }
+
+            if retry.is_none() {
+                retry.replace(LitInt::new("0", Span::mixed_site()));
+            }
+
+            Ok(Self {
+                retry: retry.expect("`retry` parameter is required"),
+                seed: seed.expect("`seed` parameter is required"),
+                seed_fn,
+            })
+        }
+    }
+
+    let Args {
+        retry,
+        seed,
+        seed_fn,
+    } = parse_macro_input!(args as Args);
+
+    let seed_fn = seed_fn.iter();
+
+    let func = parse_macro_input!(input as ItemFn);
+    let attrs = &func.attrs;
+    let name = &func.sig.ident;
+
+    TokenStream::from(quote! {
+        #[test]
+        #(#attrs),*
+        fn #name() {
+            #func
+
+            let mut seed = #seed;
+
+            for i in 0..=#retry {
+                let result = std::panic::catch_unwind(|| {
+                    #name(seed);
+                });
+
+                if result.is_ok() {
+                    return;
+                }
+
+                if i == #retry {
+                    std::panic::resume_unwind(result.unwrap_err());
+                }
+
+                #(
+                    seed = #seed_fn();
+                )*
+
+                if i < #retry {
+                    println!("\nRetry {}/{} with a new seed {}", i + 1, #retry, seed);
+                }
+            }
+        }
+    })
+}
--- a/crates/cli/src/tests/query_test.rs
+++ b/crates/cli/src/tests/query_test.rs
--- a/crates/cli/src/tests/tags_test.rs
+++ b/crates/cli/src/tests/tags_test.rs
@ -0,0 +1,448 @@
+use std::{
+    ffi::{CStr, CString},
+    fs, ptr, slice, str,
+};
+
+use tree_sitter::Point;
+use tree_sitter_tags::{c_lib as c, Error, TagsConfiguration, TagsContext};
+
+use super::helpers::{
+    allocations,
+    fixtures::{get_language, get_language_queries_path},
+};
+
+const PYTHON_TAG_QUERY: &str = r#"
+(
+  (function_definition
+    name: (identifier) @name
+    body: (block . (expression_statement (string) @doc))) @definition.function
+  (#strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)")
+)
+
+(function_definition
+  name: (identifier) @name) @definition.function
+
+(
+  (class_definition
+    name: (identifier) @name
+    body: (block
+      . (expression_statement (string) @doc))) @definition.class
+  (#strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)")
+)
+
+(class_definition
+  name: (identifier) @name) @definition.class
+
+(call
+  function: (identifier) @name) @reference.call
+
+(call
+  function: (attribute
+    attribute: (identifier) @name)) @reference.call
+"#;
+
+const JS_TAG_QUERY: &str = r#"
+(
+    (comment)* @doc .
+    (class_declaration
+        name: (identifier) @name) @definition.class
+    (#select-adjacent! @doc @definition.class)
+    (#strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)")
+)
+
+(
+    (comment)* @doc .
+    (method_definition
+        name: (property_identifier) @name) @definition.method
+    (#select-adjacent! @doc @definition.method)
+    (#strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)")
+)
+
+(
+    (comment)* @doc .
+    (function_declaration
+        name: (identifier) @name) @definition.function
+    (#select-adjacent! @doc @definition.function)
+    (#strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)")
+)
+
+(call_expression
+    function: (identifier) @name) @reference.call
+"#;
+
+const RUBY_TAG_QUERY: &str = r"
+(method
+    name: (_) @name) @definition.method
+
+(call
+    method: (identifier) @name) @reference.call
+
+(setter (identifier) @ignore)
+
+((identifier) @name @reference.call
+ (#is-not? local))
+";
+
+#[test]
+fn test_tags_python() {
+    let language = get_language("python");
+    let tags_config = TagsConfiguration::new(language, PYTHON_TAG_QUERY, "").unwrap();
+    let mut tag_context = TagsContext::new();
+
+    let source = br#"
+    class Customer:
+        """
+        Data about a customer
+        """
+
+        def age(self):
+            '''
+            Get the customer's age
+            '''
+            compute_age(self.id)
+    }
+    "#;
+
+    let tags = tag_context
+        .generate_tags(&tags_config, source, None)
+        .unwrap()
+        .0
+        .collect::<Result<Vec<_>, _>>()
+        .unwrap();
+
+    assert_eq!(
+        tags.iter()
+            .map(|t| (
+                substr(source, &t.name_range),
+                tags_config.syntax_type_name(t.syntax_type_id)
+            ))
+            .collect::<Vec<_>>(),
+        &[
+            ("Customer", "class"),
+            ("age", "function"),
+            ("compute_age", "call"),
+        ]
+    );
+
+    assert_eq!(substr(source, &tags[0].line_range), "class Customer:");
+    assert_eq!(substr(source, &tags[1].line_range), "def age(self):");
+    assert_eq!(tags[0].docs.as_ref().unwrap(), "Data about a customer");
+    assert_eq!(tags[1].docs.as_ref().unwrap(), "Get the customer's age");
+}
+
+#[test]
+fn test_tags_javascript() {
+    let language = get_language("javascript");
+    let tags_config = TagsConfiguration::new(language, JS_TAG_QUERY, "").unwrap();
+    let source = br"
+    // hi
+
+    // Data about a customer.
+    // bla bla bla
+    class Customer {
+        /*
+         * Get the customer's age
+         */
+        getAge() {
+        }
+    }
+
+    // ok
+
+    class Agent {
+
+    }
+    ";
+
+    let mut tag_context = TagsContext::new();
+    let tags = tag_context
+        .generate_tags(&tags_config, source, None)
+        .unwrap()
+        .0
+        .collect::<Result<Vec<_>, _>>()
+        .unwrap();
+
+    assert_eq!(
+        tags.iter()
+            .map(|t| (
+                substr(source, &t.name_range),
+                t.span.clone(),
+                tags_config.syntax_type_name(t.syntax_type_id)
+            ))
+            .collect::<Vec<_>>(),
+        &[
+            ("Customer", Point::new(5, 10)..Point::new(5, 18), "class",),
+            ("getAge", Point::new(9, 8)..Point::new(9, 14), "method",),
+            ("Agent", Point::new(15, 10)..Point::new(15, 15), "class",)
+        ]
+    );
+    assert_eq!(
+        tags[0].docs.as_ref().unwrap(),
+        "Data about a customer.\nbla bla bla"
+    );
+    assert_eq!(tags[1].docs.as_ref().unwrap(), "Get the customer's age");
+    assert_eq!(tags[2].docs, None);
+}
+
+#[test]
+fn test_tags_columns_measured_in_utf16_code_units() {
+    let language = get_language("python");
+    let tags_config = TagsConfiguration::new(language, PYTHON_TAG_QUERY, "").unwrap();
+    let mut tag_context = TagsContext::new();
+
+    let source = r#""❤️❤️❤️".hello_α_ω()"#.as_bytes();
+
+    let tag = tag_context
+        .generate_tags(&tags_config, source, None)
+        .unwrap()
+        .0
+        .next()
+        .unwrap()
+        .unwrap();
+
+    assert_eq!(substr(source, &tag.name_range), "hello_α_ω");
+    assert_eq!(tag.span, Point::new(0, 21)..Point::new(0, 32));
+    assert_eq!(tag.utf16_column_range, 9..18);
+}
+
+#[test]
+fn test_tags_ruby() {
+    let language = get_language("ruby");
+    let locals_query =
+        fs::read_to_string(get_language_queries_path("ruby").join("locals.scm")).unwrap();
+    let tags_config = TagsConfiguration::new(language, RUBY_TAG_QUERY, &locals_query).unwrap();
+    let source = strip_whitespace(
+        8,
+        "
+        b = 1
+
+        def foo=()
+            c = 1
+
+            # a is a method because it is not in scope
+            # b is a method because `b` doesn't capture variables from its containing scope
+            bar a, b, c
+
+            [1, 2, 3].each do |a|
+                # a is a parameter
+                # b is a method
+                # c is a variable, because the block captures variables from its containing scope.
+                baz a, b, c
+            end
+        end",
+    );
+
+    let mut tag_context = TagsContext::new();
+    let tags = tag_context
+        .generate_tags(&tags_config, source.as_bytes(), None)
+        .unwrap()
+        .0
+        .collect::<Result<Vec<_>, _>>()
+        .unwrap();
+
+    assert_eq!(
+        tags.iter()
+            .map(|t| (
+                substr(source.as_bytes(), &t.name_range),
+                tags_config.syntax_type_name(t.syntax_type_id),
+                (t.span.start.row, t.span.start.column),
+            ))
+            .collect::<Vec<_>>(),
+        &[
+            ("foo=", "method", (2, 4)),
+            ("bar", "call", (7, 4)),
+            ("a", "call", (7, 8)),
+            ("b", "call", (7, 11)),
+            ("each", "call", (9, 14)),
+            ("baz", "call", (13, 8)),
+            ("b", "call", (13, 15),),
+        ]
+    );
+}
+
+#[test]
+fn test_tags_cancellation() {
+    use std::sync::atomic::{AtomicUsize, Ordering};
+
+    allocations::record(|| {
+        // Large javascript document
+        let source = (0..500)
+            .map(|_| "/* hi */ class A { /* ok */ b() {} }\n")
+            .collect::<String>();
+
+        let cancellation_flag = AtomicUsize::new(0);
+        let language = get_language("javascript");
+        let tags_config = TagsConfiguration::new(language, JS_TAG_QUERY, "").unwrap();
+
+        let mut tag_context = TagsContext::new();
+        let tags = tag_context
+            .generate_tags(&tags_config, source.as_bytes(), Some(&cancellation_flag))
+            .unwrap();
+
+        for (i, tag) in tags.0.enumerate() {
+            if i == 150 {
+                cancellation_flag.store(1, Ordering::SeqCst);
+            }
+            if let Err(e) = tag {
+                assert_eq!(e, Error::Cancelled);
+                return;
+            }
+        }
+
+        panic!("Expected to halt tagging with an error");
+    });
+}
+
+#[test]
+fn test_invalid_capture() {
+    let language = get_language("python");
+    let e = TagsConfiguration::new(language, "(identifier) @method", "")
+        .expect_err("expected InvalidCapture error");
+    assert_eq!(e, Error::InvalidCapture("method".to_string()));
+}
+
+#[test]
+fn test_tags_with_parse_error() {
+    let language = get_language("python");
+    let tags_config = TagsConfiguration::new(language, PYTHON_TAG_QUERY, "").unwrap();
+    let mut tag_context = TagsContext::new();
+
+    let source = br"
+    class Fine: pass
+    class Bad
+    ";
+
+    let (tags, failed) = tag_context
+        .generate_tags(&tags_config, source, None)
+        .unwrap();
+
+    let newtags = tags.collect::<Result<Vec<_>, _>>().unwrap();
+
+    assert!(failed, "syntax error should have been detected");
+
+    assert_eq!(
+        newtags
+            .iter()
+            .map(|t| (
+                substr(source, &t.name_range),
+                tags_config.syntax_type_name(t.syntax_type_id)
+            ))
+            .collect::<Vec<_>>(),
+        &[("Fine", "class"),]
+    );
+}
+
+#[test]
+fn test_tags_via_c_api() {
+    allocations::record(|| {
+        let tagger = c::ts_tagger_new();
+        let buffer = c::ts_tags_buffer_new();
+        let scope_name = "source.js";
+        let language = get_language("javascript");
+
+        let source_code = strip_whitespace(
+            12,
+            "
+            var a = 1;
+
+            // one
+            // two
+            // three
+            function b() {
+            }
+
+            // four
+            // five
+            class C extends D {
+
+            }
+
+            b(a);",
+        );
+
+        let c_scope_name = CString::new(scope_name).unwrap();
+        let result = unsafe {
+            c::ts_tagger_add_language(
+                tagger,
+                c_scope_name.as_ptr(),
+                language,
+                JS_TAG_QUERY.as_ptr(),
+                ptr::null(),
+                JS_TAG_QUERY.len() as u32,
+                0,
+            )
+        };
+        assert_eq!(result, c::TSTagsError::Ok);
+
+        let result = unsafe {
+            c::ts_tagger_tag(
+                tagger,
+                c_scope_name.as_ptr(),
+                source_code.as_ptr(),
+                source_code.len() as u32,
+                buffer,
+                ptr::null(),
+            )
+        };
+        assert_eq!(result, c::TSTagsError::Ok);
+        let tags = unsafe {
+            slice::from_raw_parts(
+                c::ts_tags_buffer_tags(buffer),
+                c::ts_tags_buffer_tags_len(buffer) as usize,
+            )
+        };
+        let docs = str::from_utf8(unsafe {
+            slice::from_raw_parts(
+                c::ts_tags_buffer_docs(buffer).cast::<u8>(),
+                c::ts_tags_buffer_docs_len(buffer) as usize,
+            )
+        })
+        .unwrap();
+
+        let syntax_types = unsafe {
+            let mut len = 0;
+            let ptr = c::ts_tagger_syntax_kinds_for_scope_name(
+                tagger,
+                c_scope_name.as_ptr(),
+                &raw mut len,
+            );
+            slice::from_raw_parts(ptr, len as usize)
+                .iter()
+                .map(|i| CStr::from_ptr(*i).to_str().unwrap())
+                .collect::<Vec<_>>()
+        };
+
+        assert_eq!(
+            tags.iter()
+                .map(|tag| (
+                    syntax_types[tag.syntax_type_id as usize],
+                    &source_code[tag.name_start_byte as usize..tag.name_end_byte as usize],
+                    &source_code[tag.line_start_byte as usize..tag.line_end_byte as usize],
+                    &docs[tag.docs_start_byte as usize..tag.docs_end_byte as usize],
+                ))
+                .collect::<Vec<_>>(),
+            &[
+                ("function", "b", "function b() {", "one\ntwo\nthree"),
+                ("class", "C", "class C extends D {", "four\nfive"),
+                ("call", "b", "b(a);", "")
+            ]
+        );
+
+        unsafe {
+            c::ts_tags_buffer_delete(buffer);
+            c::ts_tagger_delete(tagger);
+        }
+    });
+}
+
+fn substr<'a>(source: &'a [u8], range: &std::ops::Range<usize>) -> &'a str {
+    std::str::from_utf8(&source[range.clone()]).unwrap()
+}
+
+fn strip_whitespace(indent: usize, s: &str) -> String {
+    s.lines()
+        .skip(1)
+        .map(|line| &line[line.len().min(indent)..])
+        .collect::<Vec<_>>()
+        .join("\n")
+}
--- a/crates/cli/src/tests/test_highlight_test.rs
+++ b/crates/cli/src/tests/test_highlight_test.rs
@ -0,0 +1,70 @@
+use tree_sitter::Parser;
+use tree_sitter_highlight::{Highlight, Highlighter};
+
+use super::helpers::fixtures::{get_highlight_config, get_language, test_loader};
+use crate::{
+    query_testing::{parse_position_comments, Assertion, Utf8Point},
+    test_highlight::get_highlight_positions,
+};
+
+#[test]
+fn test_highlight_test_with_basic_test() {
+    let language = get_language("javascript");
+    let config = get_highlight_config(
+        "javascript",
+        Some("injections.scm"),
+        &[
+            "function".to_string(),
+            "variable".to_string(),
+            "keyword".to_string(),
+        ],
+    );
+    let source = [
+        "// hi",
+        "var abc = function(d) {",
+        "  // ^ function",
+        "  //       ^^^ keyword",
+        "  return d + e;",
+        "  //     ^ variable",
+        "  //       ^ !variable",
+        "};",
+        "var y̆y̆y̆y̆ = function() {}",
+        "  // ^ function",
+        "  //       ^ keyword",
+    ]
+    .join("\n");
+
+    let assertions =
+        parse_position_comments(&mut Parser::new(), &language, source.as_bytes()).unwrap();
+    assert_eq!(
+        assertions,
+        &[
+            Assertion::new(1, 5, 1, false, String::from("function")),
+            Assertion::new(1, 11, 3, false, String::from("keyword")),
+            Assertion::new(4, 9, 1, false, String::from("variable")),
+            Assertion::new(4, 11, 1, true, String::from("variable")),
+            Assertion::new(8, 5, 1, false, String::from("function")),
+            Assertion::new(8, 11, 1, false, String::from("keyword")),
+        ]
+    );
+
+    let mut highlighter = Highlighter::new();
+    let highlight_positions =
+        get_highlight_positions(test_loader(), &mut highlighter, &config, source.as_bytes())
+            .unwrap();
+    assert_eq!(
+        highlight_positions,
+        &[
+            (Utf8Point::new(1, 0), Utf8Point::new(1, 3), Highlight(2)), // "var"
+            (Utf8Point::new(1, 4), Utf8Point::new(1, 7), Highlight(0)), // "abc"
+            (Utf8Point::new(1, 10), Utf8Point::new(1, 18), Highlight(2)), // "function"
+            (Utf8Point::new(1, 19), Utf8Point::new(1, 20), Highlight(1)), // "d"
+            (Utf8Point::new(4, 2), Utf8Point::new(4, 8), Highlight(2)), // "return"
+            (Utf8Point::new(4, 9), Utf8Point::new(4, 10), Highlight(1)), // "d"
+            (Utf8Point::new(4, 13), Utf8Point::new(4, 14), Highlight(1)), // "e"
+            (Utf8Point::new(8, 0), Utf8Point::new(8, 3), Highlight(2)), // "var"
+            (Utf8Point::new(8, 4), Utf8Point::new(8, 8), Highlight(0)), // "y̆y̆y̆y̆"
+            (Utf8Point::new(8, 11), Utf8Point::new(8, 19), Highlight(2)), // "function"
+        ]
+    );
+}
--- a/crates/cli/src/tests/test_tags_test.rs
+++ b/crates/cli/src/tests/test_tags_test.rs
@ -0,0 +1,62 @@
+use tree_sitter::Parser;
+use tree_sitter_tags::TagsContext;
+
+use super::helpers::fixtures::{get_language, get_tags_config};
+use crate::{
+    query_testing::{parse_position_comments, Assertion, Utf8Point},
+    test_tags::get_tag_positions,
+};
+
+#[test]
+fn test_tags_test_with_basic_test() {
+    let language = get_language("python");
+    let config = get_tags_config("python");
+    let source = [
+        "# hi",
+        "def abc(d):",
+        "    # <- definition.function",
+        "    e = fgh(d)",
+        "    #    ^ reference.call",
+        "    return d(e)",
+        "    #      ^ reference.call",
+        "    #        ^ !variable.parameter",
+        "",
+    ]
+    .join("\n");
+
+    let assertions =
+        parse_position_comments(&mut Parser::new(), &language, source.as_bytes()).unwrap();
+
+    assert_eq!(
+        assertions,
+        &[
+            Assertion::new(1, 4, 1, false, String::from("definition.function")),
+            Assertion::new(3, 9, 1, false, String::from("reference.call")),
+            Assertion::new(5, 11, 1, false, String::from("reference.call")),
+            Assertion::new(5, 13, 1, true, String::from("variable.parameter")),
+        ]
+    );
+
+    let mut tags_context = TagsContext::new();
+    let tag_positions = get_tag_positions(&mut tags_context, &config, source.as_bytes()).unwrap();
+    assert_eq!(
+        tag_positions,
+        &[
+            (
+                Utf8Point::new(1, 4),
+                Utf8Point::new(1, 7),
+                "definition.function".to_string()
+            ),
+            (
+                Utf8Point::new(3, 8),
+                Utf8Point::new(3, 11),
+                "reference.call".to_string()
+            ),
+            (
+                Utf8Point::new(5, 11),
+                Utf8Point::new(5, 12),
+                "reference.call".to_string()
+            ),
+        ]
+    );
+}
--- a/crates/cli/src/tests/text_provider_test.rs
+++ b/crates/cli/src/tests/text_provider_test.rs
@ -0,0 +1,174 @@
+use std::{iter, sync::Arc};
+
+use streaming_iterator::StreamingIterator;
+use tree_sitter::{Language, Node, Parser, Point, Query, QueryCursor, TextProvider, Tree};
+
+use crate::tests::helpers::fixtures::get_language;
+
+fn parse_text(text: impl AsRef<[u8]>) -> (Tree, Language) {
+    let language = get_language("c");
+    let mut parser = Parser::new();
+    parser.set_language(&language).unwrap();
+    (parser.parse(text, None).unwrap(), language)
+}
+
+fn parse_text_with<T, F>(callback: &mut F) -> (Tree, Language)
+where
+    T: AsRef<[u8]>,
+    F: FnMut(usize, Point) -> T,
+{
+    let language = get_language("c");
+    let mut parser = Parser::new();
+    parser.set_language(&language).unwrap();
+    let tree = parser.parse_with_options(callback, None, None).unwrap();
+    // eprintln!("{}", tree.clone().root_node().to_sexp());
+    assert_eq!("comment", tree.root_node().child(0).unwrap().kind());
+    (tree, language)
+}
+
+fn tree_query<I: AsRef<[u8]>>(tree: &Tree, text: impl TextProvider<I>, language: &Language) {
+    let query = Query::new(language, "((comment) @c (#eq? @c \"// comment\"))").unwrap();
+    let mut cursor = QueryCursor::new();
+    let mut captures = cursor.captures(&query, tree.root_node(), text);
+    let (match_, idx) = captures.next().unwrap();
+    let capture = match_.captures[*idx];
+    assert_eq!(capture.index as usize, *idx);
+    assert_eq!("comment", capture.node.kind());
+}
+
+fn check_parsing<I: AsRef<[u8]>>(
+    parser_text: impl AsRef<[u8]>,
+    text_provider: impl TextProvider<I>,
+) {
+    let (tree, language) = parse_text(parser_text);
+    tree_query(&tree, text_provider, &language);
+}
+
+fn check_parsing_callback<T, F, I: AsRef<[u8]>>(
+    parser_callback: &mut F,
+    text_provider: impl TextProvider<I>,
+) where
+    T: AsRef<[u8]>,
+    F: FnMut(usize, Point) -> T,
+{
+    let (tree, language) = parse_text_with(parser_callback);
+    tree_query(&tree, text_provider, &language);
+}
+
+#[test]
+fn test_text_provider_for_str_slice() {
+    let text: &str = "// comment";
+
+    check_parsing(text, text.as_bytes());
+    check_parsing(text.as_bytes(), text.as_bytes());
+}
+
+#[test]
+fn test_text_provider_for_string() {
+    let text: String = "// comment".to_owned();
+
+    check_parsing(text.clone(), text.as_bytes());
+    check_parsing(text.as_bytes(), text.as_bytes());
+    check_parsing(<_ as AsRef<[u8]>>::as_ref(&text), text.as_bytes());
+}
+
+#[test]
+fn test_text_provider_for_box_of_str_slice() {
+    let text = "// comment".to_owned().into_boxed_str();
+
+    check_parsing(text.as_bytes(), text.as_bytes());
+    check_parsing(<_ as AsRef<str>>::as_ref(&text), text.as_bytes());
+    check_parsing(text.as_ref(), text.as_ref().as_bytes());
+    check_parsing(text.as_ref(), text.as_bytes());
+}
+
+#[test]
+fn test_text_provider_for_box_of_bytes_slice() {
+    let text = "// comment".to_owned().into_boxed_str().into_boxed_bytes();
+
+    check_parsing(text.as_ref(), text.as_ref());
+    check_parsing(text.as_ref(), &*text);
+    check_parsing(&*text, &*text);
+}
+
+#[test]
+fn test_text_provider_for_vec_of_bytes() {
+    let text = "// comment".to_owned().into_bytes();
+
+    check_parsing(&*text, &*text);
+}
+
+#[test]
+fn test_text_provider_for_arc_of_bytes_slice() {
+    let text: Arc<[u8]> = Arc::from("// comment".to_owned().into_bytes());
+
+    check_parsing(&*text, &*text);
+    check_parsing(text.as_ref(), text.as_ref());
+    check_parsing(text.clone(), text.as_ref());
+}
+
+#[test]
+fn test_text_provider_callback_with_str_slice() {
+    let text: &str = "// comment";
+
+    check_parsing(text, |_node: Node<'_>| iter::once(text));
+    check_parsing_callback(
+        &mut |offset, _point| {
+            (offset < text.len())
+                .then_some(text.as_bytes())
+                .unwrap_or_default()
+        },
+        |_node: Node<'_>| iter::once(text),
+    );
+}
+
+#[test]
+fn test_text_provider_callback_with_owned_string_slice() {
+    let text: &str = "// comment";
+
+    check_parsing_callback(
+        &mut |offset, _point| {
+            (offset < text.len())
+                .then_some(text.as_bytes())
+                .unwrap_or_default()
+        },
+        |_node: Node<'_>| {
+            let slice: String = text.to_owned();
+            iter::once(slice)
+        },
+    );
+}
+
+#[test]
+fn test_text_provider_callback_with_owned_bytes_vec_slice() {
+    let text: &str = "// comment";
+
+    check_parsing_callback(
+        &mut |offset, _point| {
+            (offset < text.len())
+                .then_some(text.as_bytes())
+                .unwrap_or_default()
+        },
+        |_node: Node<'_>| {
+            let slice = text.to_owned().into_bytes();
+            iter::once(slice)
+        },
+    );
+}
+
+#[test]
+fn test_text_provider_callback_with_owned_arc_of_bytes_slice() {
+    let text: &str = "// comment";
+
+    check_parsing_callback(
+        &mut |offset, _point| {
+            (offset < text.len())
+                .then_some(text.as_bytes())
+                .unwrap_or_default()
+        },
+        |_node: Node<'_>| {
+            let slice: Arc<[u8]> = text.to_owned().into_bytes().into();
+            iter::once(slice)
+        },
+    );
+}
--- a/crates/cli/src/tests/tree_test.rs
+++ b/crates/cli/src/tests/tree_test.rs
@ -0,0 +1,797 @@
+use std::str;
+
+use tree_sitter::{InputEdit, Parser, Point, Range, Tree};
+
+use super::helpers::fixtures::get_language;
+use crate::{
+    fuzz::edits::Edit,
+    parse::perform_edit,
+    tests::{helpers::fixtures::get_test_fixture_language, invert_edit},
+};
+
+#[test]
+fn test_tree_edit() {
+    let mut parser = Parser::new();
+    parser.set_language(&get_language("javascript")).unwrap();
+    let tree = parser.parse("  abc  !==  def", None).unwrap();
+
+    assert_eq!(
+        tree.root_node().to_sexp(),
+        "(program (expression_statement (binary_expression left: (identifier) right: (identifier))))"
+    );
+
+    // edit entirely within the tree's padding:
+    // resize the padding of the tree and its leftmost descendants.
+    {
+        let mut tree = tree.clone();
+        tree.edit(&InputEdit {
+            start_byte: 1,
+            old_end_byte: 1,
+            new_end_byte: 2,
+            start_position: Point::new(0, 1),
+            old_end_position: Point::new(0, 1),
+            new_end_position: Point::new(0, 2),
+        });
+
+        let expr = tree.root_node().child(0).unwrap().child(0).unwrap();
+        let child1 = expr.child(0).unwrap();
+        let child2 = expr.child(1).unwrap();
+
+        assert!(expr.has_changes());
+        assert_eq!(expr.start_byte(), 3);
+        assert_eq!(expr.end_byte(), 16);
+        assert!(child1.has_changes());
+        assert_eq!(child1.start_byte(), 3);
+        assert_eq!(child1.end_byte(), 6);
+        assert!(!child2.has_changes());
+        assert_eq!(child2.start_byte(), 8);
+        assert_eq!(child2.end_byte(), 11);
+    }
+
+    // edit starting in the tree's padding but extending into its content:
+    // shrink the content to compensate for the expanded padding.
+    {
+        let mut tree = tree.clone();
+        tree.edit(&InputEdit {
+            start_byte: 1,
+            old_end_byte: 4,
+            new_end_byte: 5,
+            start_position: Point::new(0, 1),
+            old_end_position: Point::new(0, 5),
+            new_end_position: Point::new(0, 5),
+        });
+
+        let expr = tree.root_node().child(0).unwrap().child(0).unwrap();
+        let child1 = expr.child(0).unwrap();
+        let child2 = expr.child(1).unwrap();
+
+        assert!(expr.has_changes());
+        assert_eq!(expr.start_byte(), 5);
+        assert_eq!(expr.end_byte(), 16);
+        assert!(child1.has_changes());
+        assert_eq!(child1.start_byte(), 5);
+        assert_eq!(child1.end_byte(), 6);
+        assert!(!child2.has_changes());
+        assert_eq!(child2.start_byte(), 8);
+        assert_eq!(child2.end_byte(), 11);
+    }
+
+    // insertion at the edge of a tree's padding:
+    // expand the tree's padding.
+    {
+        let mut tree = tree.clone();
+        tree.edit(&InputEdit {
+            start_byte: 2,
+            old_end_byte: 2,
+            new_end_byte: 4,
+            start_position: Point::new(0, 2),
+            old_end_position: Point::new(0, 2),
+            new_end_position: Point::new(0, 4),
+        });
+
+        let expr = tree.root_node().child(0).unwrap().child(0).unwrap();
+        let child1 = expr.child(0).unwrap();
+        let child2 = expr.child(1).unwrap();
+
+        assert!(expr.has_changes());
+        assert_eq!(expr.byte_range(), 4..17);
+        assert!(child1.has_changes());
+        assert_eq!(child1.byte_range(), 4..7);
+        assert!(!child2.has_changes());
+        assert_eq!(child2.byte_range(), 9..12);
+    }
+
+    // replacement starting at the edge of the tree's padding:
+    // resize the content and not the padding.
+    {
+        let mut tree = tree.clone();
+        tree.edit(&InputEdit {
+            start_byte: 2,
+            old_end_byte: 2,
+            new_end_byte: 4,
+            start_position: Point::new(0, 2),
+            old_end_position: Point::new(0, 2),
+            new_end_position: Point::new(0, 4),
+        });
+
+        let expr = tree.root_node().child(0).unwrap().child(0).unwrap();
+        let child1 = expr.child(0).unwrap();
+        let child2 = expr.child(1).unwrap();
+
+        assert!(expr.has_changes());
+        assert_eq!(expr.byte_range(), 4..17);
+        assert!(child1.has_changes());
+        assert_eq!(child1.byte_range(), 4..7);
+        assert!(!child2.has_changes());
+        assert_eq!(child2.byte_range(), 9..12);
+    }
+
+    // deletion that spans more than one child node:
+    // shrink subsequent child nodes.
+    {
+        let mut tree = tree.clone();
+        tree.edit(&InputEdit {
+            start_byte: 1,
+            old_end_byte: 11,
+            new_end_byte: 4,
+            start_position: Point::new(0, 1),
+            old_end_position: Point::new(0, 11),
+            new_end_position: Point::new(0, 4),
+        });
+
+        let expr = tree.root_node().child(0).unwrap().child(0).unwrap();
+        let child1 = expr.child(0).unwrap();
+        let child2 = expr.child(1).unwrap();
+        let child3 = expr.child(2).unwrap();
+
+        assert!(expr.has_changes());
+        assert_eq!(expr.byte_range(), 4..8);
+        assert!(child1.has_changes());
+        assert_eq!(child1.byte_range(), 4..4);
+        assert!(child2.has_changes());
+        assert_eq!(child2.byte_range(), 4..4);
+        assert!(child3.has_changes());
+        assert_eq!(child3.byte_range(), 5..8);
+    }
+
+    // insertion at the end of the tree:
+    // extend the tree's content.
+    {
+        let mut tree = tree.clone();
+        tree.edit(&InputEdit {
+            start_byte: 15,
+            old_end_byte: 15,
+            new_end_byte: 16,
+            start_position: Point::new(0, 15),
+            old_end_position: Point::new(0, 15),
+            new_end_position: Point::new(0, 16),
+        });
+
+        let expr = tree.root_node().child(0).unwrap().child(0).unwrap();
+        let child1 = expr.child(0).unwrap();
+        let child2 = expr.child(1).unwrap();
+        let child3 = expr.child(2).unwrap();
+
+        assert!(expr.has_changes());
+        assert_eq!(expr.byte_range(), 2..16);
+        assert!(!child1.has_changes());
+        assert_eq!(child1.byte_range(), 2..5);
+        assert!(!child2.has_changes());
+        assert_eq!(child2.byte_range(), 7..10);
+        assert!(child3.has_changes());
+        assert_eq!(child3.byte_range(), 12..16);
+    }
+
+    // replacement that starts within a token and extends beyond the end of the tree:
+    // resize the token and empty out any subsequent child nodes.
+    {
+        let mut tree = tree.clone();
+        tree.edit(&InputEdit {
+            start_byte: 3,
+            old_end_byte: 90,
+            new_end_byte: 4,
+            start_position: Point::new(0, 3),
+            old_end_position: Point::new(0, 90),
+            new_end_position: Point::new(0, 4),
+        });
+
+        let expr = tree.root_node().child(0).unwrap().child(0).unwrap();
+        let child1 = expr.child(0).unwrap();
+        let child2 = expr.child(1).unwrap();
+        let child3 = expr.child(2).unwrap();
+        assert_eq!(expr.byte_range(), 2..4);
+        assert!(expr.has_changes());
+        assert_eq!(child1.byte_range(), 2..4);
+        assert!(child1.has_changes());
+        assert_eq!(child2.byte_range(), 4..4);
+        assert!(child2.has_changes());
+        assert_eq!(child3.byte_range(), 4..4);
+        assert!(child3.has_changes());
+    }
+
+    // replacement that starts in whitespace and extends beyond the end of the tree:
+    // shift the token's start position and empty out its content.
+    {
+        let mut tree = tree;
+        tree.edit(&InputEdit {
+            start_byte: 6,
+            old_end_byte: 90,
+            new_end_byte: 8,
+            start_position: Point::new(0, 6),
+            old_end_position: Point::new(0, 90),
+            new_end_position: Point::new(0, 8),
+        });
+
+        let expr = tree.root_node().child(0).unwrap().child(0).unwrap();
+        let child1 = expr.child(0).unwrap();
+        let child2 = expr.child(1).unwrap();
+        let child3 = expr.child(2).unwrap();
+        assert_eq!(expr.byte_range(), 2..8);
+        assert!(expr.has_changes());
+        assert_eq!(child1.byte_range(), 2..5);
+        assert!(!child1.has_changes());
+        assert_eq!(child2.byte_range(), 8..8);
+        assert!(child2.has_changes());
+        assert_eq!(child3.byte_range(), 8..8);
+        assert!(child3.has_changes());
+    }
+}
+
+#[test]
+fn test_tree_edit_with_included_ranges() {
+    let mut parser = Parser::new();
+    parser.set_language(&get_language("html")).unwrap();
+
+    let source = "<div><% if a %><span>a</span><% else %><span>b</span><% end %></div>";
+
+    let ranges = [0..5, 15..29, 39..53, 62..68];
+
+    parser
+        .set_included_ranges(
+            &ranges
+                .iter()
+                .map(|range| Range {
+                    start_byte: range.start,
+                    end_byte: range.end,
+                    start_point: Point::new(0, range.start),
+                    end_point: Point::new(0, range.end),
+                })
+                .collect::<Vec<_>>(),
+        )
+        .unwrap();
+
+    let mut tree = parser.parse(source, None).unwrap();
+
+    tree.edit(&InputEdit {
+        start_byte: 29,
+        old_end_byte: 53,
+        new_end_byte: 29,
+        start_position: Point::new(0, 29),
+        old_end_position: Point::new(0, 53),
+        new_end_position: Point::new(0, 29),
+    });
+
+    assert_eq!(
+        tree.included_ranges(),
+        &[
+            Range {
+                start_byte: 0,
+                end_byte: 5,
+                start_point: Point::new(0, 0),
+                end_point: Point::new(0, 5),
+            },
+            Range {
+                start_byte: 15,
+                end_byte: 29,
+                start_point: Point::new(0, 15),
+                end_point: Point::new(0, 29),
+            },
+            Range {
+                start_byte: 29,
+                end_byte: 29,
+                start_point: Point::new(0, 29),
+                end_point: Point::new(0, 29),
+            },
+            Range {
+                start_byte: 38,
+                end_byte: 44,
+                start_point: Point::new(0, 38),
+                end_point: Point::new(0, 44),
+            }
+        ]
+    );
+}
+
+#[test]
+fn test_tree_cursor() {
+    let mut parser = Parser::new();
+    parser.set_language(&get_language("rust")).unwrap();
+
+    let tree = parser
+        .parse(
+            "
+                struct Stuff {
+                    a: A,
+                    b: Option<B>,
+                }
+            ",
+            None,
+        )
+        .unwrap();
+
+    let mut cursor = tree.walk();
+    assert_eq!(cursor.node().kind(), "source_file");
+
+    assert!(cursor.goto_first_child());
+    assert_eq!(cursor.node().kind(), "struct_item");
+
+    assert!(cursor.goto_first_child());
+    assert_eq!(cursor.node().kind(), "struct");
+    assert!(!cursor.node().is_named());
+
+    assert!(cursor.goto_next_sibling());
+    assert_eq!(cursor.node().kind(), "type_identifier");
+    assert!(cursor.node().is_named());
+
+    assert!(cursor.goto_next_sibling());
+    assert_eq!(cursor.node().kind(), "field_declaration_list");
+    assert!(cursor.node().is_named());
+
+    assert!(cursor.goto_last_child());
+    assert_eq!(cursor.node().kind(), "}");
+    assert!(!cursor.node().is_named());
+    assert_eq!(cursor.node().start_position(), Point { row: 4, column: 16 });
+
+    assert!(cursor.goto_previous_sibling());
+    assert_eq!(cursor.node().kind(), ",");
+    assert!(!cursor.node().is_named());
+    assert_eq!(cursor.node().start_position(), Point { row: 3, column: 32 });
+
+    assert!(cursor.goto_previous_sibling());
+    assert_eq!(cursor.node().kind(), "field_declaration");
+    assert!(cursor.node().is_named());
+    assert_eq!(cursor.node().start_position(), Point { row: 3, column: 20 });
+
+    assert!(cursor.goto_previous_sibling());
+    assert_eq!(cursor.node().kind(), ",");
+    assert!(!cursor.node().is_named());
+    assert_eq!(cursor.node().start_position(), Point { row: 2, column: 24 });
+
+    assert!(cursor.goto_previous_sibling());
+    assert_eq!(cursor.node().kind(), "field_declaration");
+    assert!(cursor.node().is_named());
+    assert_eq!(cursor.node().start_position(), Point { row: 2, column: 20 });
+
+    assert!(cursor.goto_previous_sibling());
+    assert_eq!(cursor.node().kind(), "{");
+    assert!(!cursor.node().is_named());
+    assert_eq!(cursor.node().start_position(), Point { row: 1, column: 29 });
+
+    let mut copy = tree.walk();
+    copy.reset_to(&cursor);
+
+    assert_eq!(copy.node().kind(), "{");
+    assert!(!copy.node().is_named());
+
+    assert!(copy.goto_parent());
+    assert_eq!(copy.node().kind(), "field_declaration_list");
+    assert!(copy.node().is_named());
+
+    assert!(copy.goto_parent());
+    assert_eq!(copy.node().kind(), "struct_item");
+}
+
+#[test]
+fn test_tree_cursor_previous_sibling_with_aliases() {
+    let mut parser = Parser::new();
+    parser
+        .set_language(&get_test_fixture_language("aliases_in_root"))
+        .unwrap();
+
+    let text = "# comment\n# \nfoo foo";
+    let tree = parser.parse(text, None).unwrap();
+    let mut cursor = tree.walk();
+    assert_eq!(cursor.node().kind(), "document");
+
+    cursor.goto_first_child();
+    assert_eq!(cursor.node().kind(), "comment");
+
+    assert!(cursor.goto_next_sibling());
+    assert_eq!(cursor.node().kind(), "comment");
+
+    assert!(cursor.goto_next_sibling());
+    assert_eq!(cursor.node().kind(), "bar");
+
+    assert!(cursor.goto_previous_sibling());
+    assert_eq!(cursor.node().kind(), "comment");
+
+    assert!(cursor.goto_previous_sibling());
+    assert_eq!(cursor.node().kind(), "comment");
+
+    assert!(cursor.goto_next_sibling());
+    assert_eq!(cursor.node().kind(), "comment");
+
+    assert!(cursor.goto_next_sibling());
+    assert_eq!(cursor.node().kind(), "bar");
+}
+
+#[test]
+fn test_tree_cursor_previous_sibling() {
+    let mut parser = Parser::new();
+    parser.set_language(&get_language("rust")).unwrap();
+
+    let text = "
+    // Hi there
+    // This is fun!
+    // Another one!
+";
+    let tree = parser.parse(text, None).unwrap();
+
+    let mut cursor = tree.walk();
+    assert_eq!(cursor.node().kind(), "source_file");
+
+    assert!(cursor.goto_last_child());
+    assert_eq!(cursor.node().kind(), "line_comment");
+    assert_eq!(
+        cursor.node().utf8_text(text.as_bytes()).unwrap(),
+        "// Another one!"
+    );
+
+    assert!(cursor.goto_previous_sibling());
+    assert_eq!(cursor.node().kind(), "line_comment");
+    assert_eq!(
+        cursor.node().utf8_text(text.as_bytes()).unwrap(),
+        "// This is fun!"
+    );
+
+    assert!(cursor.goto_previous_sibling());
+    assert_eq!(cursor.node().kind(), "line_comment");
+    assert_eq!(
+        cursor.node().utf8_text(text.as_bytes()).unwrap(),
+        "// Hi there"
+    );
+
+    assert!(!cursor.goto_previous_sibling());
+}
+
+#[test]
+fn test_tree_cursor_fields() {
+    let mut parser = Parser::new();
+    parser.set_language(&get_language("javascript")).unwrap();
+
+    let tree = parser
+        .parse("function /*1*/ bar /*2*/ () {}", None)
+        .unwrap();
+
+    let mut cursor = tree.walk();
+    assert_eq!(cursor.node().kind(), "program");
+
+    cursor.goto_first_child();
+    assert_eq!(cursor.node().kind(), "function_declaration");
+    assert_eq!(cursor.field_name(), None);
+
+    cursor.goto_first_child();
+    assert_eq!(cursor.node().kind(), "function");
+    assert_eq!(cursor.field_name(), None);
+
+    cursor.goto_next_sibling();
+    assert_eq!(cursor.node().kind(), "comment");
+    assert_eq!(cursor.field_name(), None);
+
+    cursor.goto_next_sibling();
+    assert_eq!(cursor.node().kind(), "identifier");
+    assert_eq!(cursor.field_name(), Some("name"));
+
+    cursor.goto_next_sibling();
+    assert_eq!(cursor.node().kind(), "comment");
+    assert_eq!(cursor.field_name(), None);
+
+    cursor.goto_next_sibling();
+    assert_eq!(cursor.node().kind(), "formal_parameters");
+    assert_eq!(cursor.field_name(), Some("parameters"));
+}
+
+#[test]
+fn test_tree_cursor_child_for_point() {
+    let mut parser = Parser::new();
+    parser.set_language(&get_language("javascript")).unwrap();
+    let source = &"
+    [
+        one,
+        {
+            two: tree
+        },
+        four, five, six
+    ];"[1..];
+    let tree = parser.parse(source, None).unwrap();
+
+    let mut c = tree.walk();
+    assert_eq!(c.node().kind(), "program");
+
+    assert_eq!(c.goto_first_child_for_point(Point::new(7, 0)), None);
+    assert_eq!(c.goto_first_child_for_point(Point::new(6, 7)), None);
+    assert_eq!(c.node().kind(), "program");
+
+    // descend to expression statement
+    assert_eq!(c.goto_first_child_for_point(Point::new(6, 5)), Some(0));
+    assert_eq!(c.node().kind(), "expression_statement");
+
+    // step into ';' and back up
+    assert_eq!(c.goto_first_child_for_point(Point::new(7, 0)), None);
+    assert_eq!(c.goto_first_child_for_point(Point::new(6, 6)), None);
+    assert_eq!(c.goto_first_child_for_point(Point::new(6, 5)), Some(1));
+    assert_eq!(
+        (c.node().kind(), c.node().start_position()),
+        (";", Point::new(6, 5))
+    );
+    assert!(c.goto_parent());
+
+    // descend into array
+    assert_eq!(c.goto_first_child_for_point(Point::new(6, 4)), Some(0));
+    assert_eq!(
+        (c.node().kind(), c.node().start_position()),
+        ("array", Point::new(0, 4))
+    );
+
+    // step into '[' and back up
+    assert_eq!(c.goto_first_child_for_point(Point::new(0, 4)), Some(0));
+    assert_eq!(
+        (c.node().kind(), c.node().start_position()),
+        ("[", Point::new(0, 4))
+    );
+    assert!(c.goto_parent());
+
+    // step into identifier 'one' and back up
+    assert_eq!(c.goto_first_child_for_point(Point::new(1, 0)), Some(1));
+    assert_eq!(
+        (c.node().kind(), c.node().start_position()),
+        ("identifier", Point::new(1, 8))
+    );
+    assert!(c.goto_parent());
+    assert_eq!(c.goto_first_child_for_point(Point::new(1, 10)), Some(1));
+    assert_eq!(
+        (c.node().kind(), c.node().start_position()),
+        ("identifier", Point::new(1, 8))
+    );
+    assert!(c.goto_parent());
+
+    // step into first ',' and back up
+    assert_eq!(c.goto_first_child_for_point(Point::new(1, 11)), Some(2));
+    assert_eq!(
+        (c.node().kind(), c.node().start_position()),
+        (",", Point::new(1, 11))
+    );
+    assert!(c.goto_parent());
+
+    // step into identifier 'four' and back up
+    assert_eq!(c.goto_first_child_for_point(Point::new(5, 0)), Some(5));
+    assert_eq!(
+        (c.node().kind(), c.node().start_position()),
+        ("identifier", Point::new(5, 8))
+    );
+    assert!(c.goto_parent());
+    assert_eq!(c.goto_first_child_for_point(Point::new(5, 0)), Some(5));
+    assert_eq!(
+        (c.node().kind(), c.node().start_position()),
+        ("identifier", Point::new(5, 8))
+    );
+    assert!(c.goto_parent());
+
+    // step into ']' and back up
+    assert_eq!(c.goto_first_child_for_point(Point::new(6, 0)), Some(10));
+    assert_eq!(
+        (c.node().kind(), c.node().start_position()),
+        ("]", Point::new(6, 4))
+    );
+    assert!(c.goto_parent());
+    assert_eq!(c.goto_first_child_for_point(Point::new(6, 0)), Some(10));
+    assert_eq!(
+        (c.node().kind(), c.node().start_position()),
+        ("]", Point::new(6, 4))
+    );
+    assert!(c.goto_parent());
+
+    // descend into object
+    assert_eq!(c.goto_first_child_for_point(Point::new(2, 0)), Some(3));
+    assert_eq!(
+        (c.node().kind(), c.node().start_position()),
+        ("object", Point::new(2, 8))
+    );
+}
+
+#[test]
+fn test_tree_node_equality() {
+    let mut parser = Parser::new();
+    parser.set_language(&get_language("rust")).unwrap();
+    let tree = parser.parse("struct A {}", None).unwrap();
+    let node1 = tree.root_node();
+    let node2 = tree.root_node();
+    assert_eq!(node1, node2);
+    assert_eq!(node1.child(0).unwrap(), node2.child(0).unwrap());
+    assert_ne!(node1.child(0).unwrap(), node2);
+}
+
+#[test]
+fn test_get_changed_ranges() {
+    let source_code = b"{a: null};\n".to_vec();
+
+    let mut parser = Parser::new();
+    parser.set_language(&get_language("javascript")).unwrap();
+    let tree = parser.parse(&source_code, None).unwrap();
+
+    assert_eq!(
+        tree.root_node().to_sexp(),
+        "(program (expression_statement (object (pair key: (property_identifier) value: (null)))))"
+    );
+
+    // Updating one token
+    {
+        let mut tree = tree.clone();
+        let mut source_code = source_code.clone();
+
+        // Replace `null` with `nothing` - that token has changed syntax
+        let edit = Edit {
+            position: index_of(&source_code, "ull"),
+            deleted_length: 3,
+            inserted_text: b"othing".to_vec(),
+        };
+        let inverse_edit = invert_edit(&source_code, &edit);
+        let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &edit);
+        assert_eq!(ranges, vec![range_of(&source_code, "nothing")]);
+
+        // Replace `nothing` with `null` - that token has changed syntax
+        let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &inverse_edit);
+        assert_eq!(ranges, vec![range_of(&source_code, "null")]);
+    }
+
+    // Changing only leading whitespace
+    {
+        let mut tree = tree.clone();
+        let mut source_code = source_code.clone();
+
+        // Insert leading newline - no changed ranges
+        let edit = Edit {
+            position: 0,
+            deleted_length: 0,
+            inserted_text: b"\n".to_vec(),
+        };
+        let inverse_edit = invert_edit(&source_code, &edit);
+        let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &edit);
+        assert_eq!(ranges, vec![]);
+
+        // Remove leading newline - no changed ranges
+        let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &inverse_edit);
+        assert_eq!(ranges, vec![]);
+    }
+
+    // Inserting elements
+    {
+        let mut tree = tree.clone();
+        let mut source_code = source_code.clone();
+
+        // Insert a key-value pair before the `}` - those tokens are changed
+        let edit1 = Edit {
+            position: index_of(&source_code, "}"),
+            deleted_length: 0,
+            inserted_text: b", b: false".to_vec(),
+        };
+        let inverse_edit1 = invert_edit(&source_code, &edit1);
+        let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &edit1);
+        assert_eq!(ranges, vec![range_of(&source_code, ", b: false")]);
+
+        let edit2 = Edit {
+            position: index_of(&source_code, ", b"),
+            deleted_length: 0,
+            inserted_text: b", c: 1".to_vec(),
+        };
+        let inverse_edit2 = invert_edit(&source_code, &edit2);
+        let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &edit2);
+        assert_eq!(ranges, vec![range_of(&source_code, ", c: 1")]);
+
+        // Remove the middle pair
+        let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &inverse_edit2);
+        assert_eq!(ranges, vec![]);
+
+        // Remove the second pair
+        let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &inverse_edit1);
+        assert_eq!(ranges, vec![]);
+    }
+
+    // Wrapping elements in larger expressions
+    {
+        let mut tree = tree;
+        let mut source_code = source_code.clone();
+
+        // Replace `null` with the binary expression `b === null`
+        let edit1 = Edit {
+            position: index_of(&source_code, "null"),
+            deleted_length: 0,
+            inserted_text: b"b === ".to_vec(),
+        };
+        let inverse_edit1 = invert_edit(&source_code, &edit1);
+        let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &edit1);
+        assert_eq!(ranges, vec![range_of(&source_code, "b === null")]);
+
+        // Undo
+        let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &inverse_edit1);
+        assert_eq!(ranges, vec![range_of(&source_code, "null")]);
+    }
+}
+
+#[test]
+fn test_consistency_with_mid_codepoint_edit() {
+    let mut parser = Parser::new();
+    parser.set_language(&get_language("php/php")).unwrap();
+    let mut source_code =
+        b"\n<?php\n\n<<<'\xE5\xAD\x97\xE6\xBC\xA2'\n  T\n\xE5\xAD\x97\xE6\xBC\xA2;".to_vec();
+    let mut tree = parser.parse(&source_code, None).unwrap();
+
+    let edit = Edit {
+        position: 17,
+        deleted_length: 0,
+        inserted_text: vec![46],
+    };
+    perform_edit(&mut tree, &mut source_code, &edit).unwrap();
+    let mut tree2 = parser.parse(&source_code, Some(&tree)).unwrap();
+
+    let inverted = invert_edit(&source_code, &edit);
+    perform_edit(&mut tree2, &mut source_code, &inverted).unwrap();
+    let tree3 = parser.parse(&source_code, Some(&tree2)).unwrap();
+
+    assert_eq!(tree3.root_node().to_sexp(), tree.root_node().to_sexp());
+}
+
+#[test]
+fn test_tree_cursor_on_aliased_root_with_extra_child() {
+    let source = r"
+fn main() {
+    C/* hi */::<D>::E;
+}
+";
+
+    let mut parser = Parser::new();
+    parser.set_language(&get_language("rust")).unwrap();
+
+    let tree = parser.parse(source, None).unwrap();
+
+    let function = tree.root_node().child(0).unwrap();
+    let block = function.child(3).unwrap();
+    let expression_statement = block.child(1).unwrap();
+    let scoped_identifier = expression_statement.child(0).unwrap();
+    let generic_type = scoped_identifier.child(0).unwrap();
+    assert_eq!(generic_type.kind(), "generic_type");
+
+    let mut cursor = generic_type.walk();
+    assert!(cursor.goto_first_child());
+    assert_eq!(cursor.node().kind(), "type_identifier");
+    assert!(cursor.goto_next_sibling());
+    assert_eq!(cursor.node().kind(), "block_comment");
+}
+
+fn index_of(text: &[u8], substring: &str) -> usize {
+    str::from_utf8(text).unwrap().find(substring).unwrap()
+}
+
+fn range_of(text: &[u8], substring: &str) -> Range {
+    let start_byte = index_of(text, substring);
+    let end_byte = start_byte + substring.len();
+    Range {
+        start_byte,
+        end_byte,
+        start_point: Point::new(0, start_byte),
+        end_point: Point::new(0, end_byte),
+    }
+}
+
+fn get_changed_ranges(
+    parser: &mut Parser,
+    tree: &mut Tree,
+    source_code: &mut Vec<u8>,
+    edit: &Edit,
+) -> Vec<Range> {
+    perform_edit(tree, source_code, edit).unwrap();
+    let new_tree = parser.parse(source_code, Some(tree)).unwrap();
+    let result = tree.changed_ranges(&new_tree).collect();
+    *tree = new_tree;
+    result
+}
--- a/crates/cli/src/tests/wasm_language_test.rs
+++ b/crates/cli/src/tests/wasm_language_test.rs
@ -0,0 +1,273 @@
+use std::{fs, sync::LazyLock};
+
+use streaming_iterator::StreamingIterator;
+use tree_sitter::{
+    wasmtime::Engine, Parser, Query, QueryCursor, WasmError, WasmErrorKind, WasmStore,
+};
+
+use crate::tests::helpers::{allocations, fixtures::WASM_DIR};
+
+static ENGINE: LazyLock<Engine> = LazyLock::new(Engine::default);
+
+#[test]
+fn test_wasm_stdlib_symbols() {
+    let symbols = tree_sitter::wasm_stdlib_symbols().collect::<Vec<_>>();
+    assert_eq!(
+        symbols,
+        {
+            let mut symbols = symbols.clone();
+            symbols.sort_unstable();
+            symbols
+        },
+        "symbols aren't sorted"
+    );
+
+    assert!(symbols.contains(&"malloc"));
+    assert!(symbols.contains(&"free"));
+    assert!(symbols.contains(&"memset"));
+    assert!(symbols.contains(&"memcpy"));
+}
+
+#[test]
+fn test_load_wasm_ruby_language() {
+    allocations::record(|| {
+        let mut store = WasmStore::new(&ENGINE).unwrap();
+        let mut parser = Parser::new();
+        let wasm = fs::read(WASM_DIR.join("tree-sitter-ruby.wasm")).unwrap();
+        let language = store.load_language("ruby", &wasm).unwrap();
+        parser.set_wasm_store(store).unwrap();
+        parser.set_language(&language).unwrap();
+        let tree = parser.parse("class A; end", None).unwrap();
+        assert_eq!(
+            tree.root_node().to_sexp(),
+            "(program (class name: (constant)))"
+        );
+    });
+}
+
+#[test]
+fn test_load_wasm_html_language() {
+    allocations::record(|| {
+        let mut store = WasmStore::new(&ENGINE).unwrap();
+        let mut parser = Parser::new();
+        let wasm = fs::read(WASM_DIR.join("tree-sitter-html.wasm")).unwrap();
+        let language = store.load_language("html", &wasm).unwrap();
+        parser.set_wasm_store(store).unwrap();
+        parser.set_language(&language).unwrap();
+        let tree = parser
+            .parse("<div><span></span><p></p></div>", None)
+            .unwrap();
+        assert_eq!(
+            tree.root_node().to_sexp(),
+            "(document (element (start_tag (tag_name)) (element (start_tag (tag_name)) (end_tag (tag_name))) (element (start_tag (tag_name)) (end_tag (tag_name))) (end_tag (tag_name))))"
+        );
+    });
+}
+
+#[test]
+fn test_load_wasm_rust_language() {
+    allocations::record(|| {
+        let mut store = WasmStore::new(&ENGINE).unwrap();
+        let mut parser = Parser::new();
+        let wasm = fs::read(WASM_DIR.join("tree-sitter-rust.wasm")).unwrap();
+        let language = store.load_language("rust", &wasm).unwrap();
+        parser.set_wasm_store(store).unwrap();
+        parser.set_language(&language).unwrap();
+        let tree = parser.parse("fn main() {}", None).unwrap();
+        assert_eq!(tree.root_node().to_sexp(), "(source_file (function_item name: (identifier) parameters: (parameters) body: (block)))");
+    });
+}
+
+#[test]
+fn test_load_wasm_javascript_language() {
+    allocations::record(|| {
+        let mut store = WasmStore::new(&ENGINE).unwrap();
+        let mut parser = Parser::new();
+        let wasm = fs::read(WASM_DIR.join("tree-sitter-javascript.wasm")).unwrap();
+        let language = store.load_language("javascript", &wasm).unwrap();
+        parser.set_wasm_store(store).unwrap();
+        parser.set_language(&language).unwrap();
+        let tree = parser.parse("const a = b\nconst c = d", None).unwrap();
+        assert_eq!(tree.root_node().to_sexp(), "(program (lexical_declaration (variable_declarator name: (identifier) value: (identifier))) (lexical_declaration (variable_declarator name: (identifier) value: (identifier))))");
+    });
+}
+
+#[test]
+fn test_load_multiple_wasm_languages() {
+    allocations::record(|| {
+        let mut store = WasmStore::new(&ENGINE).unwrap();
+        let mut parser = Parser::new();
+
+        let wasm_cpp = fs::read(WASM_DIR.join("tree-sitter-cpp.wasm")).unwrap();
+        let wasm_rs = fs::read(WASM_DIR.join("tree-sitter-rust.wasm")).unwrap();
+        let wasm_rb = fs::read(WASM_DIR.join("tree-sitter-ruby.wasm")).unwrap();
+        let wasm_typescript = fs::read(WASM_DIR.join("tree-sitter-typescript.wasm")).unwrap();
+
+        let language_rust = store.load_language("rust", &wasm_rs).unwrap();
+        let language_cpp = store.load_language("cpp", &wasm_cpp).unwrap();
+        let language_ruby = store.load_language("ruby", &wasm_rb).unwrap();
+        let language_typescript = store.load_language("typescript", &wasm_typescript).unwrap();
+        parser.set_wasm_store(store).unwrap();
+
+        let mut parser2 = Parser::new();
+        parser2
+            .set_wasm_store(WasmStore::new(&ENGINE).unwrap())
+            .unwrap();
+        let mut query_cursor = QueryCursor::new();
+
+        // First, parse with the store that originally loaded the languages.
+        // Then parse with a new parser and wasm store, so that the languages
+        // are added one-by-one, in between parses.
+        for mut parser in [parser, parser2] {
+            for _ in 0..2 {
+                let query_rust = Query::new(&language_rust, "(const_item) @foo").unwrap();
+                let query_typescript =
+                    Query::new(&language_typescript, "(class_declaration) @foo").unwrap();
+
+                parser.set_language(&language_cpp).unwrap();
+                let tree = parser.parse("A<B> c = d();", None).unwrap();
+                assert_eq!(
+                    tree.root_node().to_sexp(),
+                    "(translation_unit (declaration type: (template_type name: (type_identifier) arguments: (template_argument_list (type_descriptor type: (type_identifier)))) declarator: (init_declarator declarator: (identifier) value: (call_expression function: (identifier) arguments: (argument_list)))))"
+                );
+
+                parser.set_language(&language_rust).unwrap();
+                let source = "const A: B = c();";
+                let tree = parser.parse(source, None).unwrap();
+                assert_eq!(
+                    tree.root_node().to_sexp(),
+                    "(source_file (const_item name: (identifier) type: (type_identifier) value: (call_expression function: (identifier) arguments: (arguments))))"
+                );
+                assert_eq!(
+                    query_cursor
+                        .matches(&query_rust, tree.root_node(), source.as_bytes())
+                        .count(),
+                    1
+                );
+
+                parser.set_language(&language_ruby).unwrap();
+                let tree = parser.parse("class A; end", None).unwrap();
+                assert_eq!(
+                    tree.root_node().to_sexp(),
+                    "(program (class name: (constant)))"
+                );
+
+                parser.set_language(&language_typescript).unwrap();
+                let tree = parser.parse("class A {}", None).unwrap();
+                assert_eq!(
+                    tree.root_node().to_sexp(),
+                    "(program (class_declaration name: (type_identifier) body: (class_body)))"
+                );
+                assert_eq!(
+                    query_cursor
+                        .matches(&query_typescript, tree.root_node(), source.as_bytes())
+                        .count(),
+                    1
+                );
+            }
+        }
+    });
+}
+
+#[test]
+fn test_load_and_reload_wasm_language() {
+    allocations::record(|| {
+        let mut store = WasmStore::new(&ENGINE).unwrap();
+
+        let wasm_rust = fs::read(WASM_DIR.join("tree-sitter-rust.wasm")).unwrap();
+        let wasm_typescript = fs::read(WASM_DIR.join("tree-sitter-typescript.wasm")).unwrap();
+
+        let language_rust = store.load_language("rust", &wasm_rust).unwrap();
+        let language_typescript = store.load_language("typescript", &wasm_typescript).unwrap();
+        assert_eq!(store.language_count(), 2);
+
+        // When a language is dropped, stores can release their instances of that language.
+        drop(language_rust);
+        assert_eq!(store.language_count(), 1);
+
+        let language_rust = store.load_language("rust", &wasm_rust).unwrap();
+        assert_eq!(store.language_count(), 2);
+
+        drop(language_rust);
+        drop(language_typescript);
+        assert_eq!(store.language_count(), 0);
+    });
+}
+
+#[test]
+fn test_reset_wasm_store() {
+    allocations::record(|| {
+        let mut language_store = WasmStore::new(&ENGINE).unwrap();
+        let wasm = fs::read(WASM_DIR.join("tree-sitter-rust.wasm")).unwrap();
+        let language = language_store.load_language("rust", &wasm).unwrap();
+
+        let mut parser = Parser::new();
+        let parser_store = WasmStore::new(&ENGINE).unwrap();
+        parser.set_wasm_store(parser_store).unwrap();
+        parser.set_language(&language).unwrap();
+        let tree = parser.parse("fn main() {}", None).unwrap();
+        assert_eq!(tree.root_node().to_sexp(), "(source_file (function_item name: (identifier) parameters: (parameters) body: (block)))");
+
+        let parser_store = WasmStore::new(&ENGINE).unwrap();
+        parser.set_wasm_store(parser_store).unwrap();
+        let tree = parser.parse("fn main() {}", None).unwrap();
+        assert_eq!(tree.root_node().to_sexp(), "(source_file (function_item name: (identifier) parameters: (parameters) body: (block)))");
+    });
+}
+
+#[test]
+fn test_load_wasm_errors() {
+    allocations::record(|| {
+        let mut store = WasmStore::new(&ENGINE).unwrap();
+        let wasm = fs::read(WASM_DIR.join("tree-sitter-rust.wasm")).unwrap();
+
+        let bad_wasm = &wasm[1..];
+        assert_eq!(
+            store.load_language("rust", bad_wasm).unwrap_err(),
+            WasmError {
+                kind: WasmErrorKind::Parse,
+                message: "failed to parse dylink section of wasm module".into(),
+            }
+        );
+
+        assert_eq!(
+            store.load_language("not_rust", &wasm).unwrap_err(),
+            WasmError {
+                kind: WasmErrorKind::Instantiate,
+                message: "module did not contain language function: tree_sitter_not_rust".into(),
+            }
+        );
+
+        let mut bad_wasm = wasm.clone();
+        bad_wasm[300..500].iter_mut().for_each(|b| *b = 0);
+        assert_eq!(
+            store.load_language("rust", &bad_wasm).unwrap_err().kind,
+            WasmErrorKind::Compile,
+        );
+    });
+}
+
+#[test]
+fn test_wasm_oom() {
+    allocations::record(|| {
+        let mut store = WasmStore::new(&ENGINE).unwrap();
+        let mut parser = Parser::new();
+        let wasm = fs::read(WASM_DIR.join("tree-sitter-html.wasm")).unwrap();
+        let language = store.load_language("html", &wasm).unwrap();
+        parser.set_wasm_store(store).unwrap();
+        parser.set_language(&language).unwrap();
+
+        let tag_name = "a-b".repeat(2 * 1024 * 1024);
+        let code = format!("<{tag_name}>hello world</{tag_name}>");
+        assert!(parser.parse(&code, None).is_none());
+
+        let tag_name = "a-b".repeat(20);
+        let code = format!("<{tag_name}>hello world</{tag_name}>");
+        parser.set_language(&language).unwrap();
+        let tree = parser.parse(&code, None).unwrap();
+        assert_eq!(
+            tree.root_node().to_sexp(),
+            "(document (element (start_tag (tag_name)) (text) (end_tag (tag_name))))"
+        );
+    });
+}
--- a/crates/cli/src/tree_sitter_cli.rs
+++ b/crates/cli/src/tree_sitter_cli.rs
@ -0,0 +1,25 @@
+#![doc = include_str!("../README.md")]
+
+pub mod fuzz;
+pub mod highlight;
+pub mod init;
+pub mod input;
+pub mod logger;
+pub mod parse;
+pub mod playground;
+pub mod query;
+pub mod query_testing;
+pub mod tags;
+pub mod test;
+pub mod test_highlight;
+pub mod test_tags;
+pub mod util;
+pub mod version;
+pub mod wasm;
+
+#[cfg(test)]
+mod tests;
+
+// To run compile fail tests
+#[cfg(doctest)]
+mod tests;
--- a/crates/cli/src/util.rs
+++ b/crates/cli/src/util.rs
@ -0,0 +1,130 @@
+use std::{
+    path::{Path, PathBuf},
+    process::{Child, ChildStdin, Command, Stdio},
+    sync::{
+        atomic::{AtomicUsize, Ordering},
+        Arc,
+    },
+};
+
+use anyhow::{anyhow, Context, Result};
+use indoc::indoc;
+use tree_sitter::{Parser, Tree};
+use tree_sitter_config::Config;
+use tree_sitter_loader::Config as LoaderConfig;
+
+const HTML_HEADER: &[u8] = b"
+<!DOCTYPE html>
+
+<style>
+svg { width: 100%; }
+</style>
+
+";
+
+#[must_use]
+pub fn lang_not_found_for_path(path: &Path, loader_config: &LoaderConfig) -> String {
+    let path = path.display();
+    format!(
+        indoc! {"
+            No language found for path `{}`
+
+            If a language should be associated with this file extension, please ensure the path to `{}` is inside one of the following directories as specified by your 'config.json':\n\n{}\n
+            If the directory that contains the relevant grammar for `{}` is not listed above, please add the directory to the list of directories in your config file, {}
+        "},
+        path,
+        path,
+        loader_config
+            .parser_directories
+            .iter()
+            .enumerate()
+            .map(|(i, d)| format!("  {}. {}", i + 1, d.display()))
+            .collect::<Vec<_>>()
+            .join("  \n"),
+        path,
+        if let Ok(Some(config_path)) = Config::find_config_file() {
+            format!("located at {}", config_path.display())
+        } else {
+            String::from("which you need to create by running `tree-sitter init-config`")
+        }
+    )
+}
+
+#[must_use]
+pub fn cancel_on_signal() -> Arc<AtomicUsize> {
+    let result = Arc::new(AtomicUsize::new(0));
+    ctrlc::set_handler({
+        let flag = result.clone();
+        move || {
+            flag.store(1, Ordering::Relaxed);
+        }
+    })
+    .expect("Error setting Ctrl-C handler");
+    result
+}
+
+pub struct LogSession {
+    path: PathBuf,
+    dot_process: Option<Child>,
+    dot_process_stdin: Option<ChildStdin>,
+    open_log: bool,
+}
+
+pub fn print_tree_graph(tree: &Tree, path: &str, quiet: bool) -> Result<()> {
+    let session = LogSession::new(path, quiet)?;
+    tree.print_dot_graph(session.dot_process_stdin.as_ref().unwrap());
+    Ok(())
+}
+
+pub fn log_graphs(parser: &mut Parser, path: &str, open_log: bool) -> Result<LogSession> {
+    let session = LogSession::new(path, open_log)?;
+    parser.print_dot_graphs(session.dot_process_stdin.as_ref().unwrap());
+    Ok(session)
+}
+
+impl LogSession {
+    fn new(path: &str, open_log: bool) -> Result<Self> {
+        use std::io::Write;
+
+        let mut dot_file = std::fs::File::create(path)?;
+        dot_file.write_all(HTML_HEADER)?;
+        let mut dot_process = Command::new("dot")
+            .arg("-Tsvg")
+            .stdin(Stdio::piped())
+            .stdout(dot_file)
+            .spawn()
+            .with_context(|| {
+                "Failed to run the `dot` command. Check that graphviz is installed."
+            })?;
+        let dot_stdin = dot_process
+            .stdin
+            .take()
+            .ok_or_else(|| anyhow!("Failed to open stdin for `dot` process."))?;
+        Ok(Self {
+            path: PathBuf::from(path),
+            dot_process: Some(dot_process),
+            dot_process_stdin: Some(dot_stdin),
+            open_log,
+        })
+    }
+}
+
+impl Drop for LogSession {
+    fn drop(&mut self) {
+        use std::fs;
+
+        drop(self.dot_process_stdin.take().unwrap());
+        let output = self.dot_process.take().unwrap().wait_with_output().unwrap();
+        if output.status.success() {
+            if self.open_log && fs::metadata(&self.path).unwrap().len() > HTML_HEADER.len() as u64 {
+                webbrowser::open(&self.path.to_string_lossy()).unwrap();
+            }
+        } else {
+            eprintln!(
+                "Dot failed: {} {}",
+                String::from_utf8_lossy(&output.stdout),
+                String::from_utf8_lossy(&output.stderr)
+            );
+        }
+    }
+}
--- a/crates/cli/src/version.rs
+++ b/crates/cli/src/version.rs
@ -0,0 +1,264 @@
+use std::{fs, path::PathBuf, process::Command};
+
+use anyhow::{anyhow, Context, Result};
+use regex::Regex;
+use tree_sitter_loader::TreeSitterJSON;
+
+pub struct Version {
+    pub version: String,
+    pub current_dir: PathBuf,
+}
+
+impl Version {
+    #[must_use]
+    pub const fn new(version: String, current_dir: PathBuf) -> Self {
+        Self {
+            version,
+            current_dir,
+        }
+    }
+
+    pub fn run(self) -> Result<()> {
+        let tree_sitter_json = self.current_dir.join("tree-sitter.json");
+
+        let tree_sitter_json =
+            serde_json::from_str::<TreeSitterJSON>(&fs::read_to_string(tree_sitter_json)?)?;
+
+        let is_multigrammar = tree_sitter_json.grammars.len() > 1;
+
+        self.update_treesitter_json().with_context(|| {
+            format!(
+                "Failed to update tree-sitter.json at {}",
+                self.current_dir.display()
+            )
+        })?;
+        self.update_cargo_toml().with_context(|| {
+            format!(
+                "Failed to update Cargo.toml at {}",
+                self.current_dir.display()
+            )
+        })?;
+        self.update_package_json().with_context(|| {
+            format!(
+                "Failed to update package.json at {}",
+                self.current_dir.display()
+            )
+        })?;
+        self.update_makefile(is_multigrammar).with_context(|| {
+            format!(
+                "Failed to update Makefile at {}",
+                self.current_dir.display()
+            )
+        })?;
+        self.update_cmakelists_txt().with_context(|| {
+            format!(
+                "Failed to update CMakeLists.txt at {}",
+                self.current_dir.display()
+            )
+        })?;
+        self.update_pyproject_toml().with_context(|| {
+            format!(
+                "Failed to update pyproject.toml at {}",
+                self.current_dir.display()
+            )
+        })?;
+
+        Ok(())
+    }
+
+    fn update_treesitter_json(&self) -> Result<()> {
+        let tree_sitter_json = &fs::read_to_string(self.current_dir.join("tree-sitter.json"))?;
+
+        let tree_sitter_json = tree_sitter_json
+            .lines()
+            .map(|line| {
+                if line.contains("\"version\":") {
+                    let prefix_index = line.find("\"version\":").unwrap() + "\"version\":".len();
+                    let start_quote = line[prefix_index..].find('"').unwrap() + prefix_index + 1;
+                    let end_quote = line[start_quote + 1..].find('"').unwrap() + start_quote + 1;
+
+                    format!(
+                        "{}{}{}",
+                        &line[..start_quote],
+                        self.version,
+                        &line[end_quote..]
+                    )
+                } else {
+                    line.to_string()
+                }
+            })
+            .collect::<Vec<_>>()
+            .join("\n")
+            + "\n";
+
+        fs::write(self.current_dir.join("tree-sitter.json"), tree_sitter_json)?;
+
+        Ok(())
+    }
+
+    fn update_cargo_toml(&self) -> Result<()> {
+        if !self.current_dir.join("Cargo.toml").exists() {
+            return Ok(());
+        }
+
+        let cargo_toml = fs::read_to_string(self.current_dir.join("Cargo.toml"))?;
+
+        let cargo_toml = cargo_toml
+            .lines()
+            .map(|line| {
+                if line.starts_with("version =") {
+                    format!("version = \"{}\"", self.version)
+                } else {
+                    line.to_string()
+                }
+            })
+            .collect::<Vec<_>>()
+            .join("\n")
+            + "\n";
+
+        fs::write(self.current_dir.join("Cargo.toml"), cargo_toml)?;
+
+        if self.current_dir.join("Cargo.lock").exists() {
+            let Ok(cmd) = Command::new("cargo")
+                .arg("generate-lockfile")
+                .arg("--offline")
+                .current_dir(&self.current_dir)
+                .output()
+            else {
+                return Ok(()); // cargo is not `executable`, ignore
+            };
+
+            if !cmd.status.success() {
+                let stderr = String::from_utf8_lossy(&cmd.stderr);
+                return Err(anyhow!(
+                    "Failed to run `cargo generate-lockfile`:\n{stderr}"
+                ));
+            }
+        }
+
+        Ok(())
+    }
+
+    fn update_package_json(&self) -> Result<()> {
+        if !self.current_dir.join("package.json").exists() {
+            return Ok(());
+        }
+
+        let package_json = &fs::read_to_string(self.current_dir.join("package.json"))?;
+
+        let package_json = package_json
+            .lines()
+            .map(|line| {
+                if line.contains("\"version\":") {
+                    let prefix_index = line.find("\"version\":").unwrap() + "\"version\":".len();
+                    let start_quote = line[prefix_index..].find('"').unwrap() + prefix_index + 1;
+                    let end_quote = line[start_quote + 1..].find('"').unwrap() + start_quote + 1;
+
+                    format!(
+                        "{}{}{}",
+                        &line[..start_quote],
+                        self.version,
+                        &line[end_quote..]
+                    )
+                } else {
+                    line.to_string()
+                }
+            })
+            .collect::<Vec<_>>()
+            .join("\n")
+            + "\n";
+
+        fs::write(self.current_dir.join("package.json"), package_json)?;
+
+        if self.current_dir.join("package-lock.json").exists() {
+            let Ok(cmd) = Command::new("npm")
+                .arg("install")
+                .arg("--package-lock-only")
+                .current_dir(&self.current_dir)
+                .output()
+            else {
+                return Ok(()); // npm is not `executable`, ignore
+            };
+
+            if !cmd.status.success() {
+                let stderr = String::from_utf8_lossy(&cmd.stderr);
+                return Err(anyhow!("Failed to run `npm install`:\n{stderr}"));
+            }
+        }
+
+        Ok(())
+    }
+
+    fn update_makefile(&self, is_multigrammar: bool) -> Result<()> {
+        let makefile = if is_multigrammar {
+            if !self.current_dir.join("common").join("common.mak").exists() {
+                return Ok(());
+            }
+
+            fs::read_to_string(self.current_dir.join("Makefile"))?
+        } else {
+            if !self.current_dir.join("Makefile").exists() {
+                return Ok(());
+            }
+
+            fs::read_to_string(self.current_dir.join("Makefile"))?
+        };
+
+        let makefile = makefile
+            .lines()
+            .map(|line| {
+                if line.starts_with("VERSION") {
+                    format!("VERSION := {}", self.version)
+                } else {
+                    line.to_string()
+                }
+            })
+            .collect::<Vec<_>>()
+            .join("\n")
+            + "\n";
+
+        fs::write(self.current_dir.join("Makefile"), makefile)?;
+
+        Ok(())
+    }
+
+    fn update_cmakelists_txt(&self) -> Result<()> {
+        if !self.current_dir.join("CMakeLists.txt").exists() {
+            return Ok(());
+        }
+
+        let cmake = fs::read_to_string(self.current_dir.join("CMakeLists.txt"))?;
+
+        let re = Regex::new(r#"(\s*VERSION\s+)"[0-9]+\.[0-9]+\.[0-9]+""#)?;
+        let cmake = re.replace(&cmake, format!(r#"$1"{}""#, self.version));
+
+        fs::write(self.current_dir.join("CMakeLists.txt"), cmake.as_bytes())?;
+
+        Ok(())
+    }
+
+    fn update_pyproject_toml(&self) -> Result<()> {
+        if !self.current_dir.join("pyproject.toml").exists() {
+            return Ok(());
+        }
+
+        let pyproject_toml = fs::read_to_string(self.current_dir.join("pyproject.toml"))?;
+
+        let pyproject_toml = pyproject_toml
+            .lines()
+            .map(|line| {
+                if line.starts_with("version =") {
+                    format!("version = \"{}\"", self.version)
+                } else {
+                    line.to_string()
+                }
+            })
+            .collect::<Vec<_>>()
+            .join("\n")
+            + "\n";
+
+        fs::write(self.current_dir.join("pyproject.toml"), pyproject_toml)?;
+
+        Ok(())
+    }
+}
--- a/crates/cli/src/wasm.rs
+++ b/crates/cli/src/wasm.rs
@ -0,0 +1,117 @@
+use std::{
+    fs,
+    path::{Path, PathBuf},
+};
+
+use anyhow::{anyhow, Context, Result};
+use tree_sitter::wasm_stdlib_symbols;
+use tree_sitter_generate::parse_grammar::GrammarJSON;
+use tree_sitter_loader::Loader;
+use wasmparser::Parser;
+
+pub fn load_language_wasm_file(language_dir: &Path) -> Result<(String, Vec<u8>)> {
+    let grammar_name = get_grammar_name(language_dir)
+        .with_context(|| "Failed to get wasm filename")
+        .unwrap();
+    let wasm_filename = format!("tree-sitter-{grammar_name}.wasm");
+    let contents = fs::read(language_dir.join(&wasm_filename)).with_context(|| {
+        format!("Failed to read {wasm_filename}. Run `tree-sitter build --wasm` first.",)
+    })?;
+    Ok((grammar_name, contents))
+}
+
+pub fn get_grammar_name(language_dir: &Path) -> Result<String> {
+    let src_dir = language_dir.join("src");
+    let grammar_json_path = src_dir.join("grammar.json");
+    let grammar_json = fs::read_to_string(&grammar_json_path).with_context(|| {
+        format!(
+            "Failed to read grammar file {}",
+            grammar_json_path.display()
+        )
+    })?;
+    let grammar: GrammarJSON = serde_json::from_str(&grammar_json).with_context(|| {
+        format!(
+            "Failed to parse grammar file {}",
+            grammar_json_path.display()
+        )
+    })?;
+    Ok(grammar.name)
+}
+
+pub fn compile_language_to_wasm(
+    loader: &Loader,
+    root_dir: Option<&Path>,
+    language_dir: &Path,
+    output_dir: &Path,
+    output_file: Option<PathBuf>,
+) -> Result<()> {
+    let grammar_name = get_grammar_name(language_dir)?;
+    let output_filename =
+        output_file.unwrap_or_else(|| output_dir.join(format!("tree-sitter-{grammar_name}.wasm")));
+    let src_path = language_dir.join("src");
+    let scanner_path = loader.get_scanner_path(&src_path);
+    loader.compile_parser_to_wasm(
+        &grammar_name,
+        root_dir,
+        &src_path,
+        scanner_path
+            .as_ref()
+            .and_then(|p| Some(Path::new(p.file_name()?))),
+        &output_filename,
+    )?;
+
+    // Exit with an error if the external scanner uses symbols from the
+    // C or C++ standard libraries that aren't available to wasm parsers.
+    let stdlib_symbols = wasm_stdlib_symbols().collect::<Vec<_>>();
+    let dylink_symbols = [
+        "__indirect_function_table",
+        "__memory_base",
+        "__stack_pointer",
+        "__table_base",
+        "__table_base",
+        "memory",
+    ];
+    let builtin_symbols = [
+        "__assert_fail",
+        "__cxa_atexit",
+        "abort",
+        "emscripten_notify_memory_growth",
+        "tree_sitter_debug_message",
+        "proc_exit",
+    ];
+
+    let mut missing_symbols = Vec::new();
+    let wasm_bytes = fs::read(&output_filename)?;
+    let parser = Parser::new(0);
+    for payload in parser.parse_all(&wasm_bytes) {
+        if let wasmparser::Payload::ImportSection(imports) = payload? {
+            for import in imports {
+                let import = import?.name;
+                if !builtin_symbols.contains(&import)
+                    && !stdlib_symbols.contains(&import)
+                    && !dylink_symbols.contains(&import)
+                {
+                    missing_symbols.push(import);
+                }
+            }
+        }
+    }
+
+    if !missing_symbols.is_empty() {
+        Err(anyhow!(
+            concat!(
+                "This external scanner uses a symbol that isn't available to wasm parsers.\n",
+                "\n",
+                "Missing symbols:\n",
+                "    {}\n",
+                "\n",
+                "Available symbols:\n",
+                "    {}",
+            ),
+            missing_symbols.join("\n    "),
+            stdlib_symbols.join("\n    ")
+        ))?;
+    }
+
+    Ok(())
+}