Merge branch 'master' into wasm-language

2023-10-27 11:57:04 +01:00 · 2023-10-27 11:57:04 +01:00 · f4e2f68f14
commit f4e2f68f14
parent f7c9989b48 ddfbbb00a1
161 changed files with 10293 additions and 4253 deletions
--- a/cli/src/generate/build_tables/build_parse_table.rs
+++ b/cli/src/generate/build_tables/build_parse_table.rs
@ -57,6 +57,7 @@ struct ParseTableBuilder<'a> {
    parse_state_info_by_id: Vec<ParseStateInfo<'a>>,
    parse_state_queue: VecDeque<ParseStateQueueEntry>,
    non_terminal_extra_states: Vec<(Symbol, usize)>,
+    actual_conflicts: HashSet<Vec<Symbol>>,
    parse_table: ParseTable,
 }

@ -132,6 +133,20 @@ impl<'a> ParseTableBuilder<'a> {
            )?;
        }

+        if !self.actual_conflicts.is_empty() {
+            println!("Warning: unnecessary conflicts");
+            for conflict in &self.actual_conflicts {
+                println!(
+                    "  {}",
+                    conflict
+                        .iter()
+                        .map(|symbol| format!("`{}`", self.symbol_name(symbol)))
+                        .collect::<Vec<_>>()
+                        .join(", ")
+                );
+            }
+        }
+
        Ok((self.parse_table, self.parse_state_info_by_id))
    }

@ -582,6 +597,7 @@ impl<'a> ParseTableBuilder<'a> {
            .expected_conflicts
            .contains(&actual_conflict)
        {
+            self.actual_conflicts.remove(&actual_conflict);
            return Ok(());
        }

@ -964,6 +980,7 @@ pub(crate) fn build_parse_table<'a>(
    inlines: &'a InlinedProductionMap,
    variable_info: &'a Vec<VariableInfo>,
 ) -> Result<(ParseTable, Vec<TokenSet>, Vec<ParseStateInfo<'a>>)> {
+    let actual_conflicts = syntax_grammar.expected_conflicts.iter().cloned().collect();
    let item_set_builder = ParseItemSetBuilder::new(syntax_grammar, lexical_grammar, inlines);
    let mut following_tokens = vec![TokenSet::new(); lexical_grammar.variables.len()];
    populate_following_tokens(
@ -979,6 +996,7 @@ pub(crate) fn build_parse_table<'a>(
        item_set_builder,
        variable_info,
        non_terminal_extra_states: Vec::new(),
+        actual_conflicts,
        state_ids_by_item_set: IndexMap::default(),
        core_ids_by_core: HashMap::new(),
        parse_state_info_by_id: Vec::new(),
--- a/cli/src/generate/build_tables/token_conflicts.rs
+++ b/cli/src/generate/build_tables/token_conflicts.rs
@ -390,12 +390,12 @@ mod tests {
                Variable {
                    name: "token_0".to_string(),
                    kind: VariableType::Named,
-                    rule: Rule::pattern("[a-f]1|0x\\d"),
+                    rule: Rule::pattern("[a-f]1|0x\\d", ""),
                },
                Variable {
                    name: "token_1".to_string(),
                    kind: VariableType::Named,
-                    rule: Rule::pattern("d*ef"),
+                    rule: Rule::pattern("d*ef", ""),
                },
            ],
        })
@ -426,7 +426,7 @@ mod tests {
                Variable {
                    name: "identifier".to_string(),
                    kind: VariableType::Named,
-                    rule: Rule::pattern("\\w+"),
+                    rule: Rule::pattern("\\w+", ""),
                },
                Variable {
                    name: "instanceof".to_string(),
@ -471,7 +471,7 @@ mod tests {
    #[test]
    fn test_token_conflicts_with_separators() {
        let grammar = expand_tokens(ExtractedLexicalGrammar {
-            separators: vec![Rule::pattern("\\s")],
+            separators: vec![Rule::pattern("\\s", "")],
            variables: vec![
                Variable {
                    name: "x".to_string(),
@ -498,7 +498,7 @@ mod tests {
    #[test]
    fn test_token_conflicts_with_open_ended_tokens() {
        let grammar = expand_tokens(ExtractedLexicalGrammar {
-            separators: vec![Rule::pattern("\\s")],
+            separators: vec![Rule::pattern("\\s", "")],
            variables: vec![
                Variable {
                    name: "x".to_string(),
@ -508,7 +508,7 @@ mod tests {
                Variable {
                    name: "anything".to_string(),
                    kind: VariableType::Named,
-                    rule: Rule::prec(Precedence::Integer(-1), Rule::pattern(".*")),
+                    rule: Rule::prec(Precedence::Integer(-1), Rule::pattern(".*", "")),
                },
            ],
        })
--- a/cli/src/generate/dsl.js
+++ b/cli/src/generate/dsl.js
@ -181,7 +181,11 @@ function normalize(value) {
        value
      };
    case RegExp:
-      return {
+      return value.flags ? {
+        type: 'PATTERN',
+        value: value.source,
+        flags: value.flags
+      } : {
        type: 'PATTERN',
        value: value.source
      };
--- a/cli/src/generate/grammar-schema.json
+++ b/cli/src/generate/grammar-schema.json
@ -63,7 +63,7 @@
    },

    "supertypes": {
-      "description": "A list of hidden rule names that should be considered supertypes in the generated node types file. See http://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types.",
+      "description": "A list of hidden rule names that should be considered supertypes in the generated node types file. See https://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types.",
      "type": "array",
      "items": {
        "description": "the name of a rule in `rules` or `extras`",
--- a/cli/src/generate/mod.rs
+++ b/cli/src/generate/mod.rs
@ -21,10 +21,10 @@ use anyhow::{anyhow, Context, Result};
 use lazy_static::lazy_static;
 use regex::{Regex, RegexBuilder};
 use semver::Version;
-use std::fs;
 use std::io::Write;
 use std::path::{Path, PathBuf};
 use std::process::{Command, Stdio};
+use std::{env, fs};

 lazy_static! {
    static ref JSON_COMMENT_REGEX: Regex = RegexBuilder::new("^\\s*//.*")
@ -44,25 +44,27 @@ pub fn generate_parser_in_directory(
    abi_version: usize,
    generate_bindings: bool,
    report_symbol_name: Option<&str>,
+    js_runtime: Option<&str>,
 ) -> Result<()> {
    let src_path = repo_path.join("src");
    let header_path = src_path.join("tree_sitter");

+    // Read the grammar.json.
+    let grammar_json = match grammar_path {
+        Some(path) => load_grammar_file(path.as_ref(), js_runtime)?,
+        None => {
+            let grammar_js_path = grammar_path.map_or(repo_path.join("grammar.js"), |s| s.into());
+            load_grammar_file(&grammar_js_path, js_runtime)?
+        }
+    };
+
    // Ensure that the output directories exist.
    fs::create_dir_all(&src_path)?;
    fs::create_dir_all(&header_path)?;

-    // Read the grammar.json.
-    let grammar_json;
-    match grammar_path {
-        Some(path) => {
-            grammar_json = load_grammar_file(path.as_ref())?;
-        }
-        None => {
-            let grammar_js_path = grammar_path.map_or(repo_path.join("grammar.js"), |s| s.into());
-            grammar_json = load_grammar_file(&grammar_js_path)?;
-            fs::write(&src_path.join("grammar.json"), &grammar_json)?;
-        }
+    if grammar_path.is_none() {
+        fs::write(&src_path.join("grammar.json"), &grammar_json)
+            .with_context(|| format!("Failed to write grammar.json to {:?}", src_path))?;
    }

    // Parse and preprocess the grammar.
@ -155,10 +157,18 @@ fn generate_parser_for_grammar_with_opts(
    })
 }

-pub fn load_grammar_file(grammar_path: &Path) -> Result<String> {
+pub fn load_grammar_file(grammar_path: &Path, js_runtime: Option<&str>) -> Result<String> {
+    if grammar_path.is_dir() {
+        return Err(anyhow!(
+            "Path to a grammar file with `.js` or `.json` extension is required"
+        ));
+    }
    match grammar_path.extension().and_then(|e| e.to_str()) {
-        Some("js") => Ok(load_js_grammar_file(grammar_path)?),
-        Some("json") => Ok(fs::read_to_string(grammar_path)?),
+        Some("js") => Ok(load_js_grammar_file(grammar_path, js_runtime)
+            .with_context(|| "Failed to load grammar.js")?),
+        Some("json") => {
+            Ok(fs::read_to_string(grammar_path).with_context(|| "Failed to load grammar.json")?)
+        }
        _ => Err(anyhow!(
            "Unknown grammar file extension: {:?}",
            grammar_path
@ -166,21 +176,24 @@ pub fn load_grammar_file(grammar_path: &Path) -> Result<String> {
    }
 }

-fn load_js_grammar_file(grammar_path: &Path) -> Result<String> {
+fn load_js_grammar_file(grammar_path: &Path, js_runtime: Option<&str>) -> Result<String> {
    let grammar_path = fs::canonicalize(grammar_path)?;
-    let mut node_process = Command::new("node")
+
+    let js_runtime = js_runtime.unwrap_or("node");
+
+    let mut node_process = Command::new(js_runtime)
        .env("TREE_SITTER_GRAMMAR_PATH", grammar_path)
        .stdin(Stdio::piped())
        .stdout(Stdio::piped())
        .spawn()
-        .expect("Failed to run `node`");
+        .with_context(|| format!("Failed to run `{js_runtime}`"))?;

    let mut node_stdin = node_process
        .stdin
        .take()
-        .expect("Failed to open stdin for node");
+        .with_context(|| "Failed to open stdin for node")?;
    let cli_version = Version::parse(env!("CARGO_PKG_VERSION"))
-        .expect("Could not parse this package's version as semver.");
+        .with_context(|| "Could not parse this package's version as semver.")?;
    write!(
        node_stdin,
        "global.TREE_SITTER_CLI_VERSION_MAJOR = {};
@ -188,22 +201,22 @@ fn load_js_grammar_file(grammar_path: &Path) -> Result<String> {
        global.TREE_SITTER_CLI_VERSION_PATCH = {};",
        cli_version.major, cli_version.minor, cli_version.patch,
    )
-    .expect("Failed to write tree-sitter version to node's stdin");
+    .with_context(|| "Failed to write tree-sitter version to node's stdin")?;
    let javascript_code = include_bytes!("./dsl.js");
    node_stdin
        .write(javascript_code)
-        .expect("Failed to write grammar dsl to node's stdin");
+        .with_context(|| "Failed to write grammar dsl to node's stdin")?;
    drop(node_stdin);
    let output = node_process
        .wait_with_output()
-        .expect("Failed to read output from node");
+        .with_context(|| "Failed to read output from node")?;
    match output.status.code() {
        None => panic!("Node process was killed"),
        Some(0) => {}
        Some(code) => return Err(anyhow!("Node process exited with status {}", code)),
    }
-
-    let mut result = String::from_utf8(output.stdout).expect("Got invalid UTF8 from node");
+    let mut result =
+        String::from_utf8(output.stdout).with_context(|| "Got invalid UTF8 from node")?;
    result.push('\n');
    Ok(result)
 }
--- a/cli/src/generate/node_types.rs
+++ b/cli/src/generate/node_types.rs
@ -1172,12 +1172,12 @@ mod tests {
                Variable {
                    name: "identifier".to_string(),
                    kind: VariableType::Named,
-                    rule: Rule::pattern("\\w+"),
+                    rule: Rule::pattern("\\w+", ""),
                },
                Variable {
                    name: "foo_identifier".to_string(),
                    kind: VariableType::Named,
-                    rule: Rule::pattern("[\\w-]+"),
+                    rule: Rule::pattern("[\\w-]+", ""),
                },
            ],
            ..Default::default()
@ -1275,8 +1275,8 @@ mod tests {
                name: "script".to_string(),
                kind: VariableType::Named,
                rule: Rule::seq(vec![
-                    Rule::field("a".to_string(), Rule::pattern("hi")),
-                    Rule::field("b".to_string(), Rule::pattern("bye")),
+                    Rule::field("a".to_string(), Rule::pattern("hi", "")),
+                    Rule::field("b".to_string(), Rule::pattern("bye", "")),
                ]),
            }],
            ..Default::default()
--- a/cli/src/generate/parse_grammar.rs
+++ b/cli/src/generate/parse_grammar.rs
@ -19,6 +19,7 @@ enum RuleJSON {
    },
    PATTERN {
        value: String,
+        flags: Option<String>,
    },
    SYMBOL {
        name: String,
@ -143,7 +144,21 @@ fn parse_rule(json: RuleJSON) -> Rule {
        } => Rule::alias(parse_rule(*content), value, named),
        RuleJSON::BLANK => Rule::Blank,
        RuleJSON::STRING { value } => Rule::String(value),
-        RuleJSON::PATTERN { value } => Rule::Pattern(value),
+        RuleJSON::PATTERN { value, flags } => Rule::Pattern(
+            value,
+            flags.map_or(String::new(), |f| {
+                f.chars()
+                    .filter(|c| {
+                        if *c != 'i' {
+                            eprintln!("Warning: unsupported flag {c}");
+                            false
+                        } else {
+                            *c != 'u' // silently ignore unicode flag
+                        }
+                    })
+                    .collect()
+            }),
+        ),
        RuleJSON::SYMBOL { name } => Rule::NamedSymbol(name),
        RuleJSON::CHOICE { members } => Rule::choice(members.into_iter().map(parse_rule).collect()),
        RuleJSON::FIELD { content, name } => Rule::field(name, parse_rule(*content)),
--- a/cli/src/generate/prepare_grammar/expand_tokens.rs
+++ b/cli/src/generate/prepare_grammar/expand_tokens.rs
@ -139,10 +139,10 @@ pub(crate) fn expand_tokens(mut grammar: ExtractedLexicalGrammar) -> Result<Lexi
 impl NfaBuilder {
    fn expand_rule(&mut self, rule: &Rule, mut next_state_id: u32) -> Result<bool> {
        match rule {
-            Rule::Pattern(s) => {
+            Rule::Pattern(s, f) => {
                let s = preprocess_regex(s);
                let ast = parse::Parser::new().parse(&s)?;
-                self.expand_regex(&ast, next_state_id)
+                self.expand_regex(&ast, next_state_id, f.contains('i'))
            }
            Rule::String(s) => {
                for c in s.chars().rev() {
@ -210,12 +210,42 @@ impl NfaBuilder {
        }
    }

-    fn expand_regex(&mut self, ast: &Ast, mut next_state_id: u32) -> Result<bool> {
+    fn expand_regex(
+        &mut self,
+        ast: &Ast,
+        mut next_state_id: u32,
+        case_insensitive: bool,
+    ) -> Result<bool> {
+        fn inverse_char(c: char) -> char {
+            match c {
+                'a'..='z' => (c as u8 - b'a' + b'A') as char,
+                'A'..='Z' => (c as u8 - b'A' + b'a') as char,
+                c => c,
+            }
+        }
+
+        fn with_inverse_char(mut chars: CharacterSet) -> CharacterSet {
+            for char in chars.clone().chars() {
+                let inverted = inverse_char(char);
+                if char != inverted {
+                    chars = chars.add_char(inverted);
+                }
+            }
+            chars
+        }
+
        match ast {
            Ast::Empty(_) => Ok(false),
            Ast::Flags(_) => Err(anyhow!("Regex error: Flags are not supported")),
            Ast::Literal(literal) => {
-                self.push_advance(CharacterSet::from_char(literal.c), next_state_id);
+                let mut char_set = CharacterSet::from_char(literal.c);
+                if case_insensitive {
+                    let inverted = inverse_char(literal.c);
+                    if literal.c != inverted {
+                        char_set = char_set.add_char(inverted);
+                    }
+                }
+                self.push_advance(char_set, next_state_id);
                Ok(true)
            }
            Ast::Dot(_) => {
@ -229,6 +259,9 @@ impl NfaBuilder {
                    if class.negated {
                        chars = chars.negate();
                    }
+                    if case_insensitive {
+                        chars = with_inverse_char(chars);
+                    }
                    self.push_advance(chars, next_state_id);
                    Ok(true)
                }
@ -237,6 +270,9 @@ impl NfaBuilder {
                    if class.negated {
                        chars = chars.negate();
                    }
+                    if case_insensitive {
+                        chars = with_inverse_char(chars);
+                    }
                    self.push_advance(chars, next_state_id);
                    Ok(true)
                }
@ -245,48 +281,56 @@ impl NfaBuilder {
                    if class.negated {
                        chars = chars.negate();
                    }
+                    if case_insensitive {
+                        chars = with_inverse_char(chars);
+                    }
                    self.push_advance(chars, next_state_id);
                    Ok(true)
                }
            },
            Ast::Repetition(repetition) => match repetition.op.kind {
                RepetitionKind::ZeroOrOne => {
-                    self.expand_zero_or_one(&repetition.ast, next_state_id)
+                    self.expand_zero_or_one(&repetition.ast, next_state_id, case_insensitive)
                }
                RepetitionKind::OneOrMore => {
-                    self.expand_one_or_more(&repetition.ast, next_state_id)
+                    self.expand_one_or_more(&repetition.ast, next_state_id, case_insensitive)
                }
                RepetitionKind::ZeroOrMore => {
-                    self.expand_zero_or_more(&repetition.ast, next_state_id)
+                    self.expand_zero_or_more(&repetition.ast, next_state_id, case_insensitive)
                }
                RepetitionKind::Range(RepetitionRange::Exactly(count)) => {
-                    self.expand_count(&repetition.ast, count, next_state_id)
+                    self.expand_count(&repetition.ast, count, next_state_id, case_insensitive)
                }
                RepetitionKind::Range(RepetitionRange::AtLeast(min)) => {
-                    if self.expand_zero_or_more(&repetition.ast, next_state_id)? {
-                        self.expand_count(&repetition.ast, min, next_state_id)
+                    if self.expand_zero_or_more(&repetition.ast, next_state_id, case_insensitive)? {
+                        self.expand_count(&repetition.ast, min, next_state_id, case_insensitive)
                    } else {
                        Ok(false)
                    }
                }
                RepetitionKind::Range(RepetitionRange::Bounded(min, max)) => {
-                    let mut result = self.expand_count(&repetition.ast, min, next_state_id)?;
+                    let mut result =
+                        self.expand_count(&repetition.ast, min, next_state_id, case_insensitive)?;
                    for _ in min..max {
                        if result {
                            next_state_id = self.nfa.last_state_id();
                        }
-                        if self.expand_zero_or_one(&repetition.ast, next_state_id)? {
+                        if self.expand_zero_or_one(
+                            &repetition.ast,
+                            next_state_id,
+                            case_insensitive,
+                        )? {
                            result = true;
                        }
                    }
                    Ok(result)
                }
            },
-            Ast::Group(group) => self.expand_regex(&group.ast, next_state_id),
+            Ast::Group(group) => self.expand_regex(&group.ast, next_state_id, case_insensitive),
            Ast::Alternation(alternation) => {
                let mut alternative_state_ids = Vec::new();
                for ast in alternation.asts.iter() {
-                    if self.expand_regex(&ast, next_state_id)? {
+                    if self.expand_regex(&ast, next_state_id, case_insensitive)? {
                        alternative_state_ids.push(self.nfa.last_state_id());
                    } else {
                        alternative_state_ids.push(next_state_id);
@ -304,7 +348,7 @@ impl NfaBuilder {
            Ast::Concat(concat) => {
                let mut result = false;
                for ast in concat.asts.iter().rev() {
-                    if self.expand_regex(&ast, next_state_id)? {
+                    if self.expand_regex(&ast, next_state_id, case_insensitive)? {
                        result = true;
                        next_state_id = self.nfa.last_state_id();
                    }
@ -335,13 +379,18 @@ impl NfaBuilder {
        }
    }

-    fn expand_one_or_more(&mut self, ast: &Ast, next_state_id: u32) -> Result<bool> {
+    fn expand_one_or_more(
+        &mut self,
+        ast: &Ast,
+        next_state_id: u32,
+        case_insensitive: bool,
+    ) -> Result<bool> {
        self.nfa.states.push(NfaState::Accept {
            variable_index: 0,
            precedence: 0,
        }); // Placeholder for split
        let split_state_id = self.nfa.last_state_id();
-        if self.expand_regex(&ast, split_state_id)? {
+        if self.expand_regex(&ast, split_state_id, case_insensitive)? {
            self.nfa.states[split_state_id as usize] =
                NfaState::Split(self.nfa.last_state_id(), next_state_id);
            Ok(true)
@ -351,8 +400,13 @@ impl NfaBuilder {
        }
    }

-    fn expand_zero_or_one(&mut self, ast: &Ast, next_state_id: u32) -> Result<bool> {
-        if self.expand_regex(ast, next_state_id)? {
+    fn expand_zero_or_one(
+        &mut self,
+        ast: &Ast,
+        next_state_id: u32,
+        case_insensitive: bool,
+    ) -> Result<bool> {
+        if self.expand_regex(ast, next_state_id, case_insensitive)? {
            self.push_split(next_state_id);
            Ok(true)
        } else {
@ -360,8 +414,13 @@ impl NfaBuilder {
        }
    }

-    fn expand_zero_or_more(&mut self, ast: &Ast, next_state_id: u32) -> Result<bool> {
-        if self.expand_one_or_more(&ast, next_state_id)? {
+    fn expand_zero_or_more(
+        &mut self,
+        ast: &Ast,
+        next_state_id: u32,
+        case_insensitive: bool,
+    ) -> Result<bool> {
+        if self.expand_one_or_more(&ast, next_state_id, case_insensitive)? {
            self.push_split(next_state_id);
            Ok(true)
        } else {
@ -369,10 +428,16 @@ impl NfaBuilder {
        }
    }

-    fn expand_count(&mut self, ast: &Ast, count: u32, mut next_state_id: u32) -> Result<bool> {
+    fn expand_count(
+        &mut self,
+        ast: &Ast,
+        count: u32,
+        mut next_state_id: u32,
+        case_insensitive: bool,
+    ) -> Result<bool> {
        let mut result = false;
        for _ in 0..count {
-            if self.expand_regex(ast, next_state_id)? {
+            if self.expand_regex(ast, next_state_id, case_insensitive)? {
                result = true;
                next_state_id = self.nfa.last_state_id();
            }
@ -475,7 +540,9 @@ impl NfaBuilder {
                .add_char(' ')
                .add_char('\t')
                .add_char('\r')
-                .add_char('\n'),
+                .add_char('\n')
+                .add_char('\x0B')
+                .add_char('\x0C'),
            ClassPerlKind::Word => CharacterSet::empty()
                .add_char('_')
                .add_range('A', 'Z')
@ -563,7 +630,7 @@ mod tests {
        let table = [
            // regex with sequences and alternatives
            Row {
-                rules: vec![Rule::pattern("(a|b|c)d(e|f|g)h?")],
+                rules: vec![Rule::pattern("(a|b|c)d(e|f|g)h?", "")],
                separators: vec![],
                examples: vec![
                    ("ade1", Some((0, "ade"))),
@ -574,13 +641,13 @@ mod tests {
            },
            // regex with repeats
            Row {
-                rules: vec![Rule::pattern("a*")],
+                rules: vec![Rule::pattern("a*", "")],
                separators: vec![],
                examples: vec![("aaa1", Some((0, "aaa"))), ("b", Some((0, "")))],
            },
            // regex with repeats in sequences
            Row {
-                rules: vec![Rule::pattern("a((bc)+|(de)*)f")],
+                rules: vec![Rule::pattern("a((bc)+|(de)*)f", "")],
                separators: vec![],
                examples: vec![
                    ("af1", Some((0, "af"))),
@ -591,13 +658,13 @@ mod tests {
            },
            // regex with character ranges
            Row {
-                rules: vec![Rule::pattern("[a-fA-F0-9]+")],
+                rules: vec![Rule::pattern("[a-fA-F0-9]+", "")],
                separators: vec![],
                examples: vec![("A1ff0.", Some((0, "A1ff0")))],
            },
            // regex with perl character classes
            Row {
-                rules: vec![Rule::pattern("\\w\\d\\s")],
+                rules: vec![Rule::pattern("\\w\\d\\s", "")],
                separators: vec![],
                examples: vec![("_0  ", Some((0, "_0 ")))],
            },
@ -611,7 +678,7 @@ mod tests {
            Row {
                rules: vec![Rule::repeat(Rule::seq(vec![
                    Rule::string("{"),
-                    Rule::pattern("[a-f]+"),
+                    Rule::pattern("[a-f]+", ""),
                    Rule::string("}"),
                ]))],
                separators: vec![],
@ -624,9 +691,9 @@ mod tests {
            // longest match rule
            Row {
                rules: vec![
-                    Rule::pattern("a|bc"),
-                    Rule::pattern("aa"),
-                    Rule::pattern("bcd"),
+                    Rule::pattern("a|bc", ""),
+                    Rule::pattern("aa", ""),
+                    Rule::pattern("bcd", ""),
                ],
                separators: vec![],
                examples: vec![
@ -640,7 +707,7 @@ mod tests {
            },
            // regex with an alternative including the empty string
            Row {
-                rules: vec![Rule::pattern("a(b|)+c")],
+                rules: vec![Rule::pattern("a(b|)+c", "")],
                separators: vec![],
                examples: vec![
                    ("ac.", Some((0, "ac"))),
@ -650,8 +717,8 @@ mod tests {
            },
            // separators
            Row {
-                rules: vec![Rule::pattern("[a-f]+")],
-                separators: vec![Rule::string("\\\n"), Rule::pattern("\\s")],
+                rules: vec![Rule::pattern("[a-f]+", "")],
+                separators: vec![Rule::string("\\\n"), Rule::pattern("\\s", "")],
                examples: vec![
                    ("  a", Some((0, "a"))),
                    ("  \nb", Some((0, "b"))),
@ -662,11 +729,11 @@ mod tests {
            // shorter tokens with higher precedence
            Row {
                rules: vec![
-                    Rule::prec(Precedence::Integer(2), Rule::pattern("abc")),
-                    Rule::prec(Precedence::Integer(1), Rule::pattern("ab[cd]e")),
-                    Rule::pattern("[a-e]+"),
+                    Rule::prec(Precedence::Integer(2), Rule::pattern("abc", "")),
+                    Rule::prec(Precedence::Integer(1), Rule::pattern("ab[cd]e", "")),
+                    Rule::pattern("[a-e]+", ""),
                ],
-                separators: vec![Rule::string("\\\n"), Rule::pattern("\\s")],
+                separators: vec![Rule::string("\\\n"), Rule::pattern("\\s", "")],
                examples: vec![
                    ("abceef", Some((0, "abc"))),
                    ("abdeef", Some((1, "abde"))),
@ -676,13 +743,13 @@ mod tests {
            // immediate tokens with higher precedence
            Row {
                rules: vec![
-                    Rule::prec(Precedence::Integer(1), Rule::pattern("[^a]+")),
+                    Rule::prec(Precedence::Integer(1), Rule::pattern("[^a]+", "")),
                    Rule::immediate_token(Rule::prec(
                        Precedence::Integer(2),
-                        Rule::pattern("[^ab]+"),
+                        Rule::pattern("[^ab]+", ""),
                    )),
                ],
-                separators: vec![Rule::pattern("\\s")],
+                separators: vec![Rule::pattern("\\s", "")],
                examples: vec![("cccb", Some((1, "ccc")))],
            },
            Row {
@ -704,7 +771,7 @@ mod tests {
            // nested choices within sequences
            Row {
                rules: vec![Rule::seq(vec![
-                    Rule::pattern("[0-9]+"),
+                    Rule::pattern("[0-9]+", ""),
                    Rule::choice(vec![
                        Rule::Blank,
                        Rule::choice(vec![Rule::seq(vec![
@ -713,7 +780,7 @@ mod tests {
                                Rule::Blank,
                                Rule::choice(vec![Rule::string("+"), Rule::string("-")]),
                            ]),
-                            Rule::pattern("[0-9]+"),
+                            Rule::pattern("[0-9]+", ""),
                        ])]),
                    ]),
                ])],
@ -730,7 +797,7 @@ mod tests {
            },
            // nested groups
            Row {
-                rules: vec![Rule::seq(vec![Rule::pattern(r#"([^x\\]|\\(.|\n))+"#)])],
+                rules: vec![Rule::seq(vec![Rule::pattern(r#"([^x\\]|\\(.|\n))+"#, "")])],
                separators: vec![],
                examples: vec![("abcx", Some((0, "abc"))), ("abc\\0x", Some((0, "abc\\0")))],
            },
@ -738,11 +805,11 @@ mod tests {
            Row {
                rules: vec![
                    // Escaped forward slash (used in JS because '/' is the regex delimiter)
-                    Rule::pattern(r#"\/"#),
+                    Rule::pattern(r#"\/"#, ""),
                    // Escaped quotes
-                    Rule::pattern(r#"\"\'"#),
+                    Rule::pattern(r#"\"\'"#, ""),
                    // Quote preceded by a literal backslash
-                    Rule::pattern(r#"[\\']+"#),
+                    Rule::pattern(r#"[\\']+"#, ""),
                ],
                separators: vec![],
                examples: vec![
@ -754,8 +821,8 @@ mod tests {
            // unicode property escapes
            Row {
                rules: vec![
-                    Rule::pattern(r#"\p{L}+\P{L}+"#),
-                    Rule::pattern(r#"\p{White_Space}+\P{White_Space}+[\p{White_Space}]*"#),
+                    Rule::pattern(r#"\p{L}+\P{L}+"#, ""),
+                    Rule::pattern(r#"\p{White_Space}+\P{White_Space}+[\p{White_Space}]*"#, ""),
                ],
                separators: vec![],
                examples: vec![
@ -765,17 +832,17 @@ mod tests {
            },
            // unicode property escapes in bracketed sets
            Row {
-                rules: vec![Rule::pattern(r#"[\p{L}\p{Nd}]+"#)],
+                rules: vec![Rule::pattern(r#"[\p{L}\p{Nd}]+"#, "")],
                separators: vec![],
                examples: vec![("abΨ12٣٣, ok", Some((0, "abΨ12٣٣")))],
            },
            // unicode character escapes
            Row {
                rules: vec![
-                    Rule::pattern(r#"\u{00dc}"#),
-                    Rule::pattern(r#"\U{000000dd}"#),
-                    Rule::pattern(r#"\u00de"#),
-                    Rule::pattern(r#"\U000000df"#),
+                    Rule::pattern(r#"\u{00dc}"#, ""),
+                    Rule::pattern(r#"\U{000000dd}"#, ""),
+                    Rule::pattern(r#"\u00de"#, ""),
+                    Rule::pattern(r#"\U000000df"#, ""),
                ],
                separators: vec![],
                examples: vec![
@ -789,13 +856,13 @@ mod tests {
            Row {
                rules: vec![
                    // Un-escaped curly braces
-                    Rule::pattern(r#"u{[0-9a-fA-F]+}"#),
+                    Rule::pattern(r#"u{[0-9a-fA-F]+}"#, ""),
                    // Already-escaped curly braces
-                    Rule::pattern(r#"\{[ab]{3}\}"#),
+                    Rule::pattern(r#"\{[ab]{3}\}"#, ""),
                    // Unicode codepoints
-                    Rule::pattern(r#"\u{1000A}"#),
+                    Rule::pattern(r#"\u{1000A}"#, ""),
                    // Unicode codepoints (lowercase)
-                    Rule::pattern(r#"\u{1000b}"#),
+                    Rule::pattern(r#"\u{1000b}"#, ""),
                ],
                separators: vec![],
                examples: vec![
@ -807,7 +874,7 @@ mod tests {
            },
            // Emojis
            Row {
-                rules: vec![Rule::pattern(r"\p{Emoji}+")],
+                rules: vec![Rule::pattern(r"\p{Emoji}+", "")],
                separators: vec![],
                examples: vec![
                    ("🐎", Some((0, "🐎"))),
@ -820,7 +887,7 @@ mod tests {
            },
            // Intersection
            Row {
-                rules: vec![Rule::pattern(r"[[0-7]&&[4-9]]+")],
+                rules: vec![Rule::pattern(r"[[0-7]&&[4-9]]+", "")],
                separators: vec![],
                examples: vec![
                    ("456", Some((0, "456"))),
@ -833,7 +900,7 @@ mod tests {
            },
            // Difference
            Row {
-                rules: vec![Rule::pattern(r"[[0-9]--[4-7]]+")],
+                rules: vec![Rule::pattern(r"[[0-9]--[4-7]]+", "")],
                separators: vec![],
                examples: vec![
                    ("123", Some((0, "123"))),
@ -846,7 +913,7 @@ mod tests {
            },
            // Symmetric difference
            Row {
-                rules: vec![Rule::pattern(r"[[0-7]~~[4-9]]+")],
+                rules: vec![Rule::pattern(r"[[0-7]~~[4-9]]+", "")],
                separators: vec![],
                examples: vec![
                    ("123", Some((0, "123"))),
@ -867,7 +934,7 @@ mod tests {
                // [6-7]:                    y y
                // [3-9]--[5-7]:       y y y     y y
                // final regex:  y y   y y       y y
-                rules: vec![Rule::pattern(r"[[[0-5]--[2-4]]~~[[3-9]--[6-7]]]+")],
+                rules: vec![Rule::pattern(r"[[[0-5]--[2-4]]~~[[3-9]--[6-7]]]+", "")],
                separators: vec![],
                examples: vec![
                    ("01", Some((0, "01"))),
--- a/cli/src/generate/prepare_grammar/extract_default_aliases.rs
+++ b/cli/src/generate/prepare_grammar/extract_default_aliases.rs
@ -31,7 +31,7 @@ pub(super) fn extract_default_aliases(
    for variable in syntax_grammar.variables.iter() {
        for production in variable.productions.iter() {
            for step in production.steps.iter() {
-                let mut status = match step.symbol.kind {
+                let status = match step.symbol.kind {
                    SymbolType::External => &mut external_status_list[step.symbol.index],
                    SymbolType::NonTerminal => &mut non_terminal_status_list[step.symbol.index],
                    SymbolType::Terminal => &mut terminal_status_list[step.symbol.index],
@ -63,7 +63,7 @@ pub(super) fn extract_default_aliases(
    }

    for symbol in syntax_grammar.extra_symbols.iter() {
-        let mut status = match symbol.kind {
+        let status = match symbol.kind {
            SymbolType::External => &mut external_status_list[symbol.index],
            SymbolType::NonTerminal => &mut non_terminal_status_list[symbol.index],
            SymbolType::Terminal => &mut terminal_status_list[symbol.index],
--- a/cli/src/generate/prepare_grammar/extract_tokens.rs
+++ b/cli/src/generate/prepare_grammar/extract_tokens.rs
@ -49,7 +49,7 @@ pub(super) fn extract_tokens(
        }) = variable.rule
        {
            if i > 0 && extractor.extracted_usage_counts[index] == 1 {
-                let mut lexical_variable = &mut lexical_variables[index];
+                let lexical_variable = &mut lexical_variables[index];
                lexical_variable.kind = variable.kind;
                lexical_variable.name = variable.name;
                symbol_replacer.replacements.insert(i, index);
@ -209,7 +209,7 @@ impl TokenExtractor {
                } else {
                    Rule::Metadata {
                        params: params.clone(),
-                        rule: Box::new(self.extract_tokens_in_rule((&rule).clone())),
+                        rule: Box::new(self.extract_tokens_in_rule(&rule)),
                    }
                }
            }
@ -320,7 +320,7 @@ mod test {
                "rule_0",
                Rule::repeat(Rule::seq(vec![
                    Rule::string("a"),
-                    Rule::pattern("b"),
+                    Rule::pattern("b", ""),
                    Rule::choice(vec![
                        Rule::non_terminal(1),
                        Rule::non_terminal(2),
@ -331,8 +331,8 @@ mod test {
                    ]),
                ])),
            ),
-            Variable::named("rule_1", Rule::pattern("e")),
-            Variable::named("rule_2", Rule::pattern("b")),
+            Variable::named("rule_1", Rule::pattern("e", "")),
+            Variable::named("rule_2", Rule::pattern("b", "")),
            Variable::named(
                "rule_3",
                Rule::seq(vec![Rule::non_terminal(2), Rule::Blank]),
@ -378,12 +378,12 @@ mod test {
            lexical_grammar.variables,
            vec![
                Variable::anonymous("a", Rule::string("a")),
-                Variable::auxiliary("rule_0_token1", Rule::pattern("b")),
+                Variable::auxiliary("rule_0_token1", Rule::pattern("b", "")),
                Variable::auxiliary(
                    "rule_0_token2",
                    Rule::repeat(Rule::choice(vec![Rule::string("c"), Rule::string("d"),]))
                ),
-                Variable::named("rule_1", Rule::pattern("e")),
+                Variable::named("rule_1", Rule::pattern("e", "")),
            ]
        );
    }
@ -411,7 +411,7 @@ mod test {
    fn test_extracting_extra_symbols() {
        let mut grammar = build_grammar(vec![
            Variable::named("rule_0", Rule::string("x")),
-            Variable::named("comment", Rule::pattern("//.*")),
+            Variable::named("comment", Rule::pattern("//.*", "")),
        ]);
        grammar.extra_symbols = vec![Rule::string(" "), Rule::non_terminal(1)];

--- a/cli/src/generate/prepare_grammar/process_inlines.rs
+++ b/cli/src/generate/prepare_grammar/process_inlines.rs
@ -203,6 +203,12 @@ pub(super) fn process_inlines(
                    lexical_grammar.variables[symbol.index].name,
                ))
            }
+            SymbolType::NonTerminal if symbol.index == 0 => {
+                return Err(anyhow!(
+                    "Rule `{}` cannot be inlined because it is the first rule",
+                    grammar.variables[symbol.index].name,
+                ))
+            }
            _ => {}
        }
    }
--- a/cli/src/generate/render.rs
+++ b/cli/src/generate/render.rs
@ -129,6 +129,7 @@ impl Generator {
        }

        self.add_lex_modes_list();
+        self.add_parse_table();

        if !self.syntax_grammar.external_tokens.is_empty() {
            self.add_external_token_enum();
@ -136,7 +137,6 @@ impl Generator {
            self.add_external_scanner_states_list();
        }

-        self.add_parse_table();
        self.add_parser_export();

        self.buffer
@ -152,49 +152,51 @@ impl Generator {
            self.symbol_ids[&Symbol::end()].clone(),
        );

-        self.symbol_map = self
-            .parse_table
-            .symbols
-            .iter()
-            .map(|symbol| {
-                let mut mapping = symbol;
+        self.symbol_map = HashMap::new();

-                // There can be multiple symbols in the grammar that have the same name and kind,
-                // due to simple aliases. When that happens, ensure that they map to the same
-                // public-facing symbol. If one of the symbols is not aliased, choose that one
-                // to be the public-facing symbol. Otherwise, pick the symbol with the lowest
-                // numeric value.
-                if let Some(alias) = self.default_aliases.get(symbol) {
-                    let kind = alias.kind();
-                    for other_symbol in &self.parse_table.symbols {
-                        if let Some(other_alias) = self.default_aliases.get(other_symbol) {
-                            if other_symbol < mapping && other_alias == alias {
-                                mapping = other_symbol;
+        for symbol in self.parse_table.symbols.iter() {
+            let mut mapping = symbol;
+
+            // There can be multiple symbols in the grammar that have the same name and kind,
+            // due to simple aliases. When that happens, ensure that they map to the same
+            // public-facing symbol. If one of the symbols is not aliased, choose that one
+            // to be the public-facing symbol. Otherwise, pick the symbol with the lowest
+            // numeric value.
+            if let Some(alias) = self.default_aliases.get(symbol) {
+                let kind = alias.kind();
+                for other_symbol in &self.parse_table.symbols {
+                    if let Some(other_alias) = self.default_aliases.get(other_symbol) {
+                        if other_symbol < mapping && other_alias == alias {
+                            mapping = other_symbol;
+                        }
+                    } else if self.metadata_for_symbol(*other_symbol) == (&alias.value, kind) {
+                        mapping = other_symbol;
+                        break;
+                    }
+                }
+            }
+            // Two anonymous tokens with different flags but the same string value
+            // should be represented with the same symbol in the public API. Examples:
+            // *  "<" and token(prec(1, "<"))
+            // *  "(" and token.immediate("(")
+            else if symbol.is_terminal() {
+                let metadata = self.metadata_for_symbol(*symbol);
+                for other_symbol in &self.parse_table.symbols {
+                    let other_metadata = self.metadata_for_symbol(*other_symbol);
+                    if other_metadata == metadata {
+                        if let Some(mapped) = self.symbol_map.get(other_symbol) {
+                            if mapped == symbol {
+                                break;
                            }
-                        } else if self.metadata_for_symbol(*other_symbol) == (&alias.value, kind) {
-                            mapping = other_symbol;
-                            break;
-                        }
-                    }
-                }
-                // Two anonymous tokens with different flags but the same string value
-                // should be represented with the same symbol in the public API. Examples:
-                // *  "<" and token(prec(1, "<"))
-                // *  "(" and token.immediate("(")
-                else if symbol.is_terminal() {
-                    let metadata = self.metadata_for_symbol(*symbol);
-                    for other_symbol in &self.parse_table.symbols {
-                        let other_metadata = self.metadata_for_symbol(*other_symbol);
-                        if other_metadata == metadata {
-                            mapping = other_symbol;
-                            break;
                        }
+                        mapping = other_symbol;
+                        break;
                    }
                }
+            }

-                (*symbol, *mapping)
-            })
-            .collect();
+            self.symbol_map.insert(*symbol, *mapping);
+        }

        for production_info in &self.parse_table.production_infos {
            // Build a list of all field names
@ -254,7 +256,7 @@ impl Generator {
    }

    fn add_includes(&mut self) {
-        add_line!(self, "#include <tree_sitter/parser.h>");
+        add_line!(self, "#include \"tree_sitter/parser.h\"");
        add_line!(self, "");
    }

@ -336,7 +338,7 @@ impl Generator {
    }

    fn add_symbol_enum(&mut self) {
-        add_line!(self, "enum {{");
+        add_line!(self, "enum ts_symbol_identifiers {{");
        indent!(self);
        self.symbol_order.insert(Symbol::end(), 0);
        let mut i = 1;
@ -408,7 +410,7 @@ impl Generator {
    }

    fn add_field_name_enum(&mut self) {
-        add_line!(self, "enum {{");
+        add_line!(self, "enum ts_field_identifiers {{");
        indent!(self);
        for (i, field_name) in self.field_names.iter().enumerate() {
            add_line!(self, "{} = {},", self.field_id(field_name), i + 1);
@ -764,7 +766,6 @@ impl Generator {
        indent!(self);

        add_line!(self, "START_LEXER();");
-        add_line!(self, "eof = lexer->eof(lexer);");
        add_line!(self, "switch (state) {{");

        indent!(self);
@ -879,14 +880,23 @@ impl Generator {
                    add!(self, " ||{}", line_break);
                }
                if range.end == range.start {
+                    if range.start == '\0' {
+                        add!(self, "!eof && ");
+                    }
                    add!(self, "lookahead == ");
                    self.add_character(range.start);
                } else if range.end as u32 == range.start as u32 + 1 {
+                    if range.start == '\0' {
+                        add!(self, "!eof && ");
+                    }
                    add!(self, "lookahead == ");
                    self.add_character(range.start);
                    add!(self, " ||{}lookahead == ", line_break);
                    self.add_character(range.end);
                } else {
+                    if range.start == '\0' {
+                        add!(self, "!eof && ");
+                    }
                    add!(self, "(");
                    self.add_character(range.start);
                    add!(self, " <= lookahead && lookahead <= ");
@ -1016,7 +1026,7 @@ impl Generator {
    }

    fn add_external_token_enum(&mut self) {
-        add_line!(self, "enum {{");
+        add_line!(self, "enum ts_external_scanner_symbol_identifiers {{");
        indent!(self);
        for i in 0..self.syntax_grammar.external_tokens.len() {
            add_line!(
@ -1525,54 +1535,93 @@ impl Generator {
    fn sanitize_identifier(&self, name: &str) -> String {
        let mut result = String::with_capacity(name.len());
        for c in name.chars() {
-            if ('a' <= c && c <= 'z')
-                || ('A' <= c && c <= 'Z')
-                || ('0' <= c && c <= '9')
-                || c == '_'
-            {
+            if c.is_ascii_alphanumeric() || c == '_' {
                result.push(c);
            } else {
-                let replacement = match c {
-                    '~' => "TILDE",
-                    '`' => "BQUOTE",
-                    '!' => "BANG",
-                    '@' => "AT",
-                    '#' => "POUND",
-                    '$' => "DOLLAR",
-                    '%' => "PERCENT",
-                    '^' => "CARET",
-                    '&' => "AMP",
-                    '*' => "STAR",
-                    '(' => "LPAREN",
-                    ')' => "RPAREN",
-                    '-' => "DASH",
-                    '+' => "PLUS",
-                    '=' => "EQ",
-                    '{' => "LBRACE",
-                    '}' => "RBRACE",
-                    '[' => "LBRACK",
-                    ']' => "RBRACK",
-                    '\\' => "BSLASH",
-                    '|' => "PIPE",
-                    ':' => "COLON",
-                    ';' => "SEMI",
-                    '"' => "DQUOTE",
-                    '\'' => "SQUOTE",
-                    '<' => "LT",
-                    '>' => "GT",
-                    ',' => "COMMA",
-                    '.' => "DOT",
-                    '?' => "QMARK",
-                    '/' => "SLASH",
-                    '\n' => "LF",
-                    '\r' => "CR",
-                    '\t' => "TAB",
-                    _ => continue,
-                };
-                if !result.is_empty() && !result.ends_with("_") {
-                    result.push('_');
+                'special_chars: {
+                    let replacement = match c {
+                        ' ' if name.len() == 1 => "SPACE",
+                        '~' => "TILDE",
+                        '`' => "BQUOTE",
+                        '!' => "BANG",
+                        '@' => "AT",
+                        '#' => "POUND",
+                        '$' => "DOLLAR",
+                        '%' => "PERCENT",
+                        '^' => "CARET",
+                        '&' => "AMP",
+                        '*' => "STAR",
+                        '(' => "LPAREN",
+                        ')' => "RPAREN",
+                        '-' => "DASH",
+                        '+' => "PLUS",
+                        '=' => "EQ",
+                        '{' => "LBRACE",
+                        '}' => "RBRACE",
+                        '[' => "LBRACK",
+                        ']' => "RBRACK",
+                        '\\' => "BSLASH",
+                        '|' => "PIPE",
+                        ':' => "COLON",
+                        ';' => "SEMI",
+                        '"' => "DQUOTE",
+                        '\'' => "SQUOTE",
+                        '<' => "LT",
+                        '>' => "GT",
+                        ',' => "COMMA",
+                        '.' => "DOT",
+                        '?' => "QMARK",
+                        '/' => "SLASH",
+                        '\n' => "LF",
+                        '\r' => "CR",
+                        '\t' => "TAB",
+                        '\0' => "NULL",
+                        '\u{0001}' => "SOH",
+                        '\u{0002}' => "STX",
+                        '\u{0003}' => "ETX",
+                        '\u{0004}' => "EOT",
+                        '\u{0005}' => "ENQ",
+                        '\u{0006}' => "ACK",
+                        '\u{0007}' => "BEL",
+                        '\u{0008}' => "BS",
+                        '\u{000b}' => "VTAB",
+                        '\u{000c}' => "FF",
+                        '\u{000e}' => "SO",
+                        '\u{000f}' => "SI",
+                        '\u{0010}' => "DLE",
+                        '\u{0011}' => "DC1",
+                        '\u{0012}' => "DC2",
+                        '\u{0013}' => "DC3",
+                        '\u{0014}' => "DC4",
+                        '\u{0015}' => "NAK",
+                        '\u{0016}' => "SYN",
+                        '\u{0017}' => "ETB",
+                        '\u{0018}' => "CAN",
+                        '\u{0019}' => "EM",
+                        '\u{001a}' => "SUB",
+                        '\u{001b}' => "ESC",
+                        '\u{001c}' => "FS",
+                        '\u{001d}' => "GS",
+                        '\u{001e}' => "RS",
+                        '\u{001f}' => "US",
+                        '\u{007F}' => "DEL",
+                        '\u{FEFF}' => "BOM",
+                        '\u{0080}'..='\u{FFFF}' => {
+                            result.push_str(&format!("u{:04x}", c as u32));
+                            break 'special_chars;
+                        }
+                        '\u{10000}'..='\u{10FFFF}' => {
+                            result.push_str(&format!("U{:08x}", c as u32));
+                            break 'special_chars;
+                        }
+                        '0'..='9' | 'a'..='z' | 'A'..='Z' | '_' => unreachable!(),
+                        ' ' => break 'special_chars,
+                    };
+                    if !result.is_empty() && !result.ends_with("_") {
+                        result.push('_');
+                    }
+                    result += replacement;
                }
-                result += replacement;
            }
        }
        result
@ -1585,10 +1634,19 @@ impl Generator {
                '\"' => result += "\\\"",
                '?' => result += "\\?",
                '\\' => result += "\\\\",
+                '\u{0007}' => result += "\\a",
+                '\u{0008}' => result += "\\b",
+                '\u{000b}' => result += "\\v",
                '\u{000c}' => result += "\\f",
                '\n' => result += "\\n",
                '\r' => result += "\\r",
                '\t' => result += "\\t",
+                '\0' => result += "\\0",
+                '\u{0001}'..='\u{001f}' => result += &format!("\\x{:02x}", c as u32),
+                '\u{007F}'..='\u{FFFF}' => result += &format!("\\u{:04x}", c as u32),
+                '\u{10000}'..='\u{10FFFF}' => {
+                    result.push_str(&format!("\\U{:08x}", c as u32));
+                }
                _ => result.push(c),
            }
        }
--- a/cli/src/generate/rules.rs
+++ b/cli/src/generate/rules.rs
@ -56,7 +56,7 @@ pub(crate) struct Symbol {
 pub(crate) enum Rule {
    Blank,
    String(String),
-    Pattern(String),
+    Pattern(String, String),
    NamedSymbol(String),
    Symbol(Symbol),
    Choice(Vec<Rule>),
@ -187,8 +187,8 @@ impl Rule {
        Rule::String(value.to_string())
    }

-    pub fn pattern(value: &'static str) -> Self {
-        Rule::Pattern(value.to_string())
+    pub fn pattern(value: &'static str, flags: &'static str) -> Self {
+        Rule::Pattern(value.to_string(), flags.to_string())
    }
 }

--- a/cli/src/highlight.rs
+++ b/cli/src/highlight.rs
@ -1,4 +1,3 @@
-use super::util;
 use ansi_term::Color;
 use anyhow::Result;
 use lazy_static::lazy_static;
@ -281,7 +280,7 @@ fn style_to_css(style: ansi_term::Style) -> String {

 fn write_color(buffer: &mut String, color: Color) {
    if let Color::RGB(r, g, b) = &color {
-        write!(buffer, "color: #{:x?}{:x?}{:x?}", r, g, b).unwrap()
+        write!(buffer, "color: #{r:02x}{g:02x}{b:02x}").unwrap()
    } else {
        write!(
            buffer,
@ -349,7 +348,7 @@ pub fn ansi(
    let mut highlighter = Highlighter::new();

    let events = highlighter.highlight(config, source, cancellation_flag, |string| {
-        loader.highlight_config_for_injection_string(string)
+        loader.highlight_config_for_injection_string(string, config.apply_all_captures)
    })?;

    let mut style_stack = vec![theme.default_style().ansi];
@ -385,17 +384,17 @@ pub fn html(
    config: &HighlightConfiguration,
    quiet: bool,
    print_time: bool,
+    cancellation_flag: Option<&AtomicUsize>,
 ) -> Result<()> {
    use std::io::Write;

    let stdout = io::stdout();
    let mut stdout = stdout.lock();
    let time = Instant::now();
-    let cancellation_flag = util::cancel_on_stdin();
    let mut highlighter = Highlighter::new();

-    let events = highlighter.highlight(config, source, Some(&cancellation_flag), |string| {
-        loader.highlight_config_for_injection_string(string)
+    let events = highlighter.highlight(config, source, cancellation_flag, |string| {
+        loader.highlight_config_for_injection_string(string, config.apply_all_captures)
    })?;

    let mut renderer = HtmlRenderer::new();
@ -448,7 +447,7 @@ mod tests {
        env::set_var("COLORTERM", "");
        parse_style(&mut style, Value::String(DARK_CYAN.to_string()));
        assert_eq!(style.ansi.foreground, Some(Color::Fixed(36)));
-        assert_eq!(style.css, Some("style=\'color: #0af87\'".to_string()));
+        assert_eq!(style.css, Some("style=\'color: #00af87\'".to_string()));

        // junglegreen is not an ANSI color and is preserved when the terminal supports it
        env::set_var("COLORTERM", "truecolor");
--- a/cli/src/lib.rs
+++ b/cli/src/lib.rs
@ -1,3 +1,5 @@
+#![doc = include_str!("../README.md")]
+
 pub mod generate;
 pub mod highlight;
 pub mod logger;
@ -14,3 +16,7 @@ pub mod wasm;

 #[cfg(test)]
 mod tests;
+
+// To run compile fail tests
+#[cfg(doctest)]
+mod tests;
--- a/cli/src/main.rs
+++ b/cli/src/main.rs
@ -1,12 +1,14 @@
-use anyhow::{anyhow, Context, Result};
+use anyhow::{anyhow, Context, Error, Result};
 use clap::{App, AppSettings, Arg, SubCommand};
 use glob::glob;
-use std::path::Path;
+use std::collections::HashSet;
+use std::path::{Path, PathBuf};
 use std::{env, fs, u64};
-use tree_sitter::{Parser, WasmStore};
+use tree_sitter::{ffi, Parser, Point, WasmStore};
 use tree_sitter_cli::{
-    generate, highlight, logger, parse, playground, query, tags, test, test_highlight, test_tags,
-    util, wasm,
+    generate, highlight, logger,
+    parse::{self, ParseFileOptions, ParseOutput},
+    playground, query, tags, test, test_highlight, test_tags, util, wasm,
 };
 use tree_sitter_config::Config;
 use tree_sitter_highlight::Highlighter;
@ -82,6 +84,9 @@ fn run() -> Result<()> {
    let wasm_arg = Arg::with_name("wasm")
        .long("wasm")
        .help("compile parsers to wasm instead of native dynamic libraries");
+    let apply_all_captures_arg = Arg::with_name("apply-all-captures")
+        .help("Apply all captures to highlights")
+        .long("apply-all-captures");

    let matches = App::new("tree-sitter")
        .author("Max Brunsfeld <maxbrunsfeld@gmail.com>")
@ -113,13 +118,33 @@ fn run() -> Result<()> {
                        )),
                )
                .arg(Arg::with_name("no-bindings").long("no-bindings"))
+                .arg(
+                    Arg::with_name("build")
+                        .long("build")
+                        .short("b")
+                        .help("Compile all defined languages in the current dir"),
+                )
+                .arg(&debug_build_arg)
+                .arg(
+                    Arg::with_name("libdir")
+                        .long("libdir")
+                        .takes_value(true)
+                        .value_name("path"),
+                )
                .arg(
                    Arg::with_name("report-states-for-rule")
                        .long("report-states-for-rule")
                        .value_name("rule-name")
                        .takes_value(true),
                )
-                .arg(Arg::with_name("no-minimize").long("no-minimize")),
+                .arg(
+                    Arg::with_name("js-runtime")
+                        .long("js-runtime")
+                        .takes_value(true)
+                        .value_name("executable")
+                        .env("TREE_SITTER_JS_RUNTIME")
+                        .help("Use a JavaScript runtime other than node"),
+                ),
        )
        .subcommand(
            SubCommand::with_name("parse")
@ -132,7 +157,8 @@ fn run() -> Result<()> {
                .arg(&debug_build_arg)
                .arg(&debug_graph_arg)
                .arg(&wasm_arg)
-                .arg(Arg::with_name("debug-xml").long("xml").short("x"))
+                .arg(Arg::with_name("output-dot").long("dot"))
+                .arg(Arg::with_name("output-xml").long("xml").short("x"))
                .arg(
                    Arg::with_name("stat")
                        .help("Show parsing statistic")
@ -155,6 +181,12 @@ fn run() -> Result<()> {
                        .takes_value(true)
                        .multiple(true)
                        .number_of_values(1),
+                )
+                .arg(
+                    Arg::with_name("encoding")
+                        .help("The encoding of the input files")
+                        .long("encoding")
+                        .takes_value(true),
                ),
        )
        .subcommand(
@ -167,6 +199,8 @@ fn run() -> Result<()> {
                        .index(1)
                        .required(true),
                )
+                .arg(&time_arg)
+                .arg(&quiet_arg)
                .arg(&paths_file_arg)
                .arg(&paths_arg.clone().index(2))
                .arg(
@ -175,6 +209,12 @@ fn run() -> Result<()> {
                        .long("byte-range")
                        .takes_value(true),
                )
+                .arg(
+                    Arg::with_name("row-range")
+                        .help("The range of rows in which the query will be executed")
+                        .long("row-range")
+                        .takes_value(true),
+                )
                .arg(&scope_arg)
                .arg(Arg::with_name("captures").long("captures").short("c"))
                .arg(Arg::with_name("test").long("test")),
@ -208,7 +248,8 @@ fn run() -> Result<()> {
                .arg(&debug_arg)
                .arg(&debug_build_arg)
                .arg(&debug_graph_arg)
-                .arg(&wasm_arg),
+                .arg(&wasm_arg)
+                .arg(&apply_all_captures_arg),
        )
        .subcommand(
            SubCommand::with_name("highlight")
@ -219,11 +260,31 @@ fn run() -> Result<()> {
                        .long("html")
                        .short("H"),
                )
+                .arg(
+                    Arg::with_name("check")
+                        .help("Check that highlighting captures conform strictly to standards")
+                        .long("check"),
+                )
+                .arg(
+                    Arg::with_name("captures-path")
+                        .help("Path to a file with captures")
+                        .long("captures-path")
+                        .takes_value(true),
+                )
+                .arg(
+                    Arg::with_name("query-paths")
+                        .help("Paths to files with queries")
+                        .long("query-paths")
+                        .takes_value(true)
+                        .multiple(true)
+                        .number_of_values(1),
+                )
                .arg(&scope_arg)
                .arg(&time_arg)
                .arg(&quiet_arg)
                .arg(&paths_file_arg)
-                .arg(&paths_arg),
+                .arg(&paths_arg)
+                .arg(&apply_all_captures_arg),
        )
        .subcommand(
            SubCommand::with_name("build-wasm")
@ -279,6 +340,10 @@ fn run() -> Result<()> {

        ("generate", Some(matches)) => {
            let grammar_path = matches.value_of("grammar-path");
+            let debug_build = matches.is_present("debug-build");
+            let build = matches.is_present("build");
+            let libdir = matches.value_of("libdir");
+            let js_runtime = matches.value_of("js-runtime");
            let report_symbol_name = matches.value_of("report-states-for-rule").or_else(|| {
                if matches.is_present("report-states") {
                    Some("")
@ -289,16 +354,18 @@ fn run() -> Result<()> {
            if matches.is_present("log") {
                logger::init();
            }
-            let abi_version =
-                matches
-                    .value_of("abi-version")
-                    .map_or(DEFAULT_GENERATE_ABI_VERSION, |version| {
-                        if version == "latest" {
-                            tree_sitter::LANGUAGE_VERSION
-                        } else {
-                            version.parse().expect("invalid abi version flag")
-                        }
-                    });
+            let abi_version = matches.value_of("abi-version").map_or(
+                Ok::<_, Error>(DEFAULT_GENERATE_ABI_VERSION),
+                |version| {
+                    Ok(if version == "latest" {
+                        tree_sitter::LANGUAGE_VERSION
+                    } else {
+                        version
+                            .parse()
+                            .with_context(|| "invalid abi version flag")?
+                    })
+                },
+            )?;
            let generate_bindings = !matches.is_present("no-bindings");
            generate::generate_parser_in_directory(
                &current_dir,
@ -306,7 +373,15 @@ fn run() -> Result<()> {
                abi_version,
                generate_bindings,
                report_symbol_name,
+                js_runtime,
            )?;
+            if build {
+                if let Some(path) = libdir {
+                    loader = loader::Loader::with_parser_lib_path(PathBuf::from(path));
+                }
+                loader.use_debug_build(debug_build);
+                loader.languages_at_path(&current_dir)?;
+            }
        }

        ("test", Some(matches)) => {
@ -317,6 +392,12 @@ fn run() -> Result<()> {
            let filter = matches.value_of("filter");
            let wasm = matches.is_present("wasm");
            let mut parser = Parser::new();
+            let apply_all_captures = matches.is_present("apply-all-captures");
+
+            if debug {
+                // For augmenting debug logging in external scanners
+                env::set_var("TREE_SITTER_DEBUG", "1");
+            }

            loader.use_debug_build(debug_build);

@ -364,7 +445,12 @@ fn run() -> Result<()> {
                if let Some(store) = store.take() {
                    highlighter.parser().set_wasm_store(store).unwrap();
                }
-                test_highlight::test_highlights(&loader, &mut highlighter, &test_highlight_dir)?;
+                test_highlight::test_highlights(
+                    &loader,
+                    &mut highlighter,
+                    &test_highlight_dir,
+                    apply_all_captures,
+                )?;
                store = highlighter.parser().take_wasm_store();
            }

@ -382,14 +468,33 @@ fn run() -> Result<()> {
            let debug = matches.is_present("debug");
            let debug_graph = matches.is_present("debug-graph");
            let debug_build = matches.is_present("debug-build");
-            let debug_xml = matches.is_present("debug-xml");
-            let quiet = matches.is_present("quiet");
+
+            let output = if matches.is_present("output-dot") {
+                ParseOutput::Dot
+            } else if matches.is_present("output-xml") {
+                ParseOutput::Xml
+            } else if matches.is_present("quiet") {
+                ParseOutput::Quiet
+            } else {
+                ParseOutput::Normal
+            };
+
+            let encoding =
+                matches
+                    .values_of("encoding")
+                    .map_or(Ok(None), |mut e| match e.next() {
+                        Some("utf16") => Ok(Some(ffi::TSInputEncodingUTF16)),
+                        Some("utf8") => Ok(Some(ffi::TSInputEncodingUTF8)),
+                        Some(_) => Err(anyhow!("Invalid encoding. Expected one of: utf8, utf16")),
+                        None => Ok(None),
+                    })?;
+
            let time = matches.is_present("time");
            let wasm = matches.is_present("wasm");
            let edits = matches
                .values_of("edits")
                .map_or(Vec::new(), |e| e.collect());
-            let cancellation_flag = util::cancel_on_stdin();
+            let cancellation_flag = util::cancel_on_signal();
            let mut parser = Parser::new();

            if debug {
@ -430,19 +535,21 @@ fn run() -> Result<()> {
                    .set_language(language)
                    .context("incompatible language")?;

-                let this_file_errored = parse::parse_file_at_path(
-                    &mut parser,
+                let opts = ParseFileOptions {
+                    language,
                    path,
-                    &edits,
+                    edits: &edits,
                    max_path_length,
-                    quiet,
-                    time,
+                    output,
+                    print_time: time,
                    timeout,
                    debug,
                    debug_graph,
-                    debug_xml,
-                    Some(&cancellation_flag),
-                )?;
+                    cancellation_flag: Some(&cancellation_flag),
+                    encoding,
+                };
+
+                let this_file_errored = parse::parse_file_at_path(&mut parser, opts)?;

                if should_track_stats {
                    stats.total_parses += 1;
@ -465,6 +572,8 @@ fn run() -> Result<()> {

        ("query", Some(matches)) => {
            let ordered_captures = matches.values_of("captures").is_some();
+            let quiet = matches.values_of("quiet").is_some();
+            let time = matches.values_of("time").is_some();
            let paths = collect_paths(matches.value_of("paths-file"), matches.values_of("paths"))?;
            let loader_config = config.get()?;
            loader.find_all_languages(&loader_config)?;
@ -474,9 +583,17 @@ fn run() -> Result<()> {
                matches.value_of("scope"),
            )?;
            let query_path = Path::new(matches.value_of("query-path").unwrap());
-            let range = matches.value_of("byte-range").map(|br| {
-                let r: Vec<&str> = br.split(":").collect();
-                r[0].parse().unwrap()..r[1].parse().unwrap()
+            let byte_range = matches.value_of("byte-range").and_then(|arg| {
+                let mut parts = arg.split(":");
+                let start = parts.next()?.parse().ok()?;
+                let end = parts.next().unwrap().parse().ok()?;
+                Some(start..end)
+            });
+            let point_range = matches.value_of("row-range").and_then(|arg| {
+                let mut parts = arg.split(":");
+                let start = parts.next()?.parse().ok()?;
+                let end = parts.next().unwrap().parse().ok()?;
+                Some(Point::new(start, 0)..Point::new(end, 0))
            });
            let should_test = matches.is_present("test");
            query::query_files_at_paths(
@ -484,8 +601,11 @@ fn run() -> Result<()> {
                paths,
                query_path,
                ordered_captures,
-                range,
+                byte_range,
+                point_range,
                should_test,
+                quiet,
+                time,
            )?;
        }

@ -511,13 +631,15 @@ fn run() -> Result<()> {
            let time = matches.is_present("time");
            let quiet = matches.is_present("quiet");
            let html_mode = quiet || matches.is_present("html");
+            let should_check = matches.is_present("check");
            let paths = collect_paths(matches.value_of("paths-file"), matches.values_of("paths"))?;
+            let apply_all_captures = matches.is_present("apply-all-captures");

            if html_mode && !quiet {
                println!("{}", highlight::HTML_HEADER);
            }

-            let cancellation_flag = util::cancel_on_stdin();
+            let cancellation_flag = util::cancel_on_signal();

            let mut lang = None;
            if let Some(scope) = matches.value_of("scope") {
@ -527,6 +649,15 @@ fn run() -> Result<()> {
                }
            }

+            let query_paths = matches.values_of("query-paths").map_or(None, |e| {
+                Some(
+                    e.collect::<Vec<_>>()
+                        .into_iter()
+                        .map(|s| s.to_string())
+                        .collect::<Vec<_>>(),
+                )
+            });
+
            for path in paths {
                let path = Path::new(&path);
                let (language, language_config) = match lang {
@ -540,7 +671,45 @@ fn run() -> Result<()> {
                    },
                };

-                if let Some(highlight_config) = language_config.highlight_config(language)? {
+                if let Some(highlight_config) = language_config.highlight_config(
+                    language,
+                    apply_all_captures,
+                    query_paths.as_deref(),
+                )? {
+                    if should_check {
+                        let names = if let Some(path) = matches.value_of("captures-path") {
+                            let path = Path::new(path);
+                            let file = fs::read_to_string(path)?;
+                            let capture_names = file
+                                .lines()
+                                .filter_map(|line| {
+                                    if line.trim().is_empty() || line.trim().starts_with(';') {
+                                        return None;
+                                    }
+                                    line.split(';').next().map(|s| s.trim().trim_matches('"'))
+                                })
+                                .collect::<HashSet<_>>();
+                            highlight_config.nonconformant_capture_names(&capture_names)
+                        } else {
+                            highlight_config.nonconformant_capture_names(&HashSet::new())
+                        };
+                        if names.is_empty() {
+                            eprintln!("All highlight captures conform to standards.");
+                        } else {
+                            eprintln!(
+                                "Non-standard highlight {} detected:",
+                                if names.len() > 1 {
+                                    "captures"
+                                } else {
+                                    "capture"
+                                }
+                            );
+                            for name in names {
+                                eprintln!("* {}", name);
+                            }
+                        }
+                    }
+
                    let source = fs::read(path)?;
                    if html_mode {
                        highlight::html(
@ -550,6 +719,7 @@ fn run() -> Result<()> {
                            highlight_config,
                            quiet,
                            time,
+                            Some(&cancellation_flag),
                        )?;
                    } else {
                        highlight::ansi(
@ -582,7 +752,7 @@ fn run() -> Result<()> {

        ("playground", Some(matches)) => {
            let open_in_browser = !matches.is_present("quiet");
-            playground::serve(&current_dir, open_in_browser);
+            playground::serve(&current_dir, open_in_browser)?;
        }

        ("dump-languages", Some(_)) => {
--- a/cli/src/parse.rs
+++ b/cli/src/parse.rs
@ -5,7 +5,7 @@ use std::path::Path;
 use std::sync::atomic::AtomicUsize;
 use std::time::Instant;
 use std::{fmt, fs, usize};
-use tree_sitter::{InputEdit, LogType, Parser, Point, Tree};
+use tree_sitter::{ffi, InputEdit, Language, LogType, Parser, Point, Tree};

 #[derive(Debug)]
 pub struct Edit {
@ -30,36 +30,47 @@ impl fmt::Display for Stats {
    }
 }

-pub fn parse_file_at_path(
-    parser: &mut Parser,
-    path: &Path,
-    edits: &Vec<&str>,
-    max_path_length: usize,
-    quiet: bool,
-    print_time: bool,
-    timeout: u64,
-    debug: bool,
-    debug_graph: bool,
-    debug_xml: bool,
-    cancellation_flag: Option<&AtomicUsize>,
-) -> Result<bool> {
+#[derive(Copy, Clone)]
+pub enum ParseOutput {
+    Normal,
+    Quiet,
+    Xml,
+    Dot,
+}
+
+pub struct ParseFileOptions<'a> {
+    pub language: Language,
+    pub path: &'a Path,
+    pub edits: &'a [&'a str],
+    pub max_path_length: usize,
+    pub output: ParseOutput,
+    pub print_time: bool,
+    pub timeout: u64,
+    pub debug: bool,
+    pub debug_graph: bool,
+    pub cancellation_flag: Option<&'a AtomicUsize>,
+    pub encoding: Option<u32>,
+}
+
+pub fn parse_file_at_path(parser: &mut Parser, opts: ParseFileOptions) -> Result<bool> {
    let mut _log_session = None;
-    let mut source_code =
-        fs::read(path).with_context(|| format!("Error reading source file {:?}", path))?;
+    parser.set_language(opts.language)?;
+    let mut source_code = fs::read(opts.path)
+        .with_context(|| format!("Error reading source file {:?}", opts.path))?;

    // If the `--cancel` flag was passed, then cancel the parse
    // when the user types a newline.
-    unsafe { parser.set_cancellation_flag(cancellation_flag) };
+    unsafe { parser.set_cancellation_flag(opts.cancellation_flag) };

    // Set a timeout based on the `--time` flag.
-    parser.set_timeout_micros(timeout);
+    parser.set_timeout_micros(opts.timeout);

    // Render an HTML graph if `--debug-graph` was passed
-    if debug_graph {
+    if opts.debug_graph {
        _log_session = Some(util::log_graphs(parser, "log.html")?);
    }
    // Log to stderr if `--debug` was passed
-    else if debug {
+    else if opts.debug {
        parser.set_logger(Some(Box::new(|log_type, message| {
            if log_type == LogType::Lex {
                io::stderr().write(b"  ").unwrap();
@ -69,22 +80,44 @@ pub fn parse_file_at_path(
    }

    let time = Instant::now();
-    let tree = parser.parse(&source_code, None);
+
+    #[inline(always)]
+    fn is_utf16_bom(bom_bytes: &[u8]) -> bool {
+        bom_bytes == [0xFF, 0xFE] || bom_bytes == [0xFE, 0xFF]
+    }
+
+    let tree = match opts.encoding {
+        Some(encoding) if encoding == ffi::TSInputEncodingUTF16 => {
+            let source_code_utf16 = source_code
+                .chunks_exact(2)
+                .map(|chunk| u16::from_le_bytes([chunk[0], chunk[1]]))
+                .collect::<Vec<_>>();
+            parser.parse_utf16(&source_code_utf16, None)
+        }
+        None if source_code.len() >= 2 && is_utf16_bom(&source_code[0..2]) => {
+            let source_code_utf16 = source_code
+                .chunks_exact(2)
+                .map(|chunk| u16::from_le_bytes([chunk[0], chunk[1]]))
+                .collect::<Vec<_>>();
+            parser.parse_utf16(&source_code_utf16, None)
+        }
+        _ => parser.parse(&source_code, None),
+    };

    let stdout = io::stdout();
    let mut stdout = stdout.lock();

    if let Some(mut tree) = tree {
-        if debug_graph && !edits.is_empty() {
+        if opts.debug_graph && !opts.edits.is_empty() {
            println!("BEFORE:\n{}", String::from_utf8_lossy(&source_code));
        }

-        for (i, edit) in edits.iter().enumerate() {
+        for (i, edit) in opts.edits.iter().enumerate() {
            let edit = parse_edit_flag(&source_code, edit)?;
-            perform_edit(&mut tree, &mut source_code, &edit);
+            perform_edit(&mut tree, &mut source_code, &edit)?;
            tree = parser.parse(&source_code, Some(&tree)).unwrap();

-            if debug_graph {
+            if opts.debug_graph {
                println!("AFTER {}:\n{}", i, String::from_utf8_lossy(&source_code));
            }
        }
@ -93,7 +126,7 @@ pub fn parse_file_at_path(
        let duration_ms = duration.as_secs() * 1000 + duration.subsec_nanos() as u64 / 1000000;
        let mut cursor = tree.walk();

-        if !quiet {
+        if matches!(opts.output, ParseOutput::Normal) {
            let mut needs_newline = false;
            let mut indent_level = 0;
            let mut did_visit_children = false;
@ -149,7 +182,7 @@ pub fn parse_file_at_path(
            println!("");
        }

-        if debug_xml {
+        if matches!(opts.output, ParseOutput::Xml) {
            let mut needs_newline = false;
            let mut indent_level = 0;
            let mut did_visit_children = false;
@ -204,6 +237,10 @@ pub fn parse_file_at_path(
            println!("");
        }

+        if matches!(opts.output, ParseOutput::Dot) {
+            util::print_tree_graph(&tree, "log.html").unwrap();
+        }
+
        let mut first_error = None;
        loop {
            let node = cursor.node();
@ -221,13 +258,13 @@ pub fn parse_file_at_path(
            }
        }

-        if first_error.is_some() || print_time {
+        if first_error.is_some() || opts.print_time {
            write!(
                &mut stdout,
                "{:width$}\t{} ms",
-                path.to_str().unwrap(),
+                opts.path.to_str().unwrap(),
                duration_ms,
-                width = max_path_length
+                width = opts.max_path_length
            )?;
            if let Some(node) = first_error {
                let start = node.start_position();
@ -256,29 +293,29 @@ pub fn parse_file_at_path(
        }

        return Ok(first_error.is_some());
-    } else if print_time {
+    } else if opts.print_time {
        let duration = time.elapsed();
        let duration_ms = duration.as_secs() * 1000 + duration.subsec_nanos() as u64 / 1000000;
        writeln!(
            &mut stdout,
            "{:width$}\t{} ms (timed out)",
-            path.to_str().unwrap(),
+            opts.path.to_str().unwrap(),
            duration_ms,
-            width = max_path_length
+            width = opts.max_path_length
        )?;
    }

    Ok(false)
 }

-pub fn perform_edit(tree: &mut Tree, input: &mut Vec<u8>, edit: &Edit) -> InputEdit {
+pub fn perform_edit(tree: &mut Tree, input: &mut Vec<u8>, edit: &Edit) -> Result<InputEdit> {
    let start_byte = edit.position;
    let old_end_byte = edit.position + edit.deleted_length;
    let new_end_byte = edit.position + edit.inserted_text.len();
-    let start_position = position_for_offset(input, start_byte);
-    let old_end_position = position_for_offset(input, old_end_byte);
+    let start_position = position_for_offset(input, start_byte)?;
+    let old_end_position = position_for_offset(input, old_end_byte)?;
    input.splice(start_byte..old_end_byte, edit.inserted_text.iter().cloned());
-    let new_end_position = position_for_offset(input, new_end_byte);
+    let new_end_position = position_for_offset(input, new_end_byte)?;
    let edit = InputEdit {
        start_byte,
        old_end_byte,
@ -288,7 +325,7 @@ pub fn perform_edit(tree: &mut Tree, input: &mut Vec<u8>, edit: &Edit) -> InputE
        new_end_position,
    };
    tree.edit(&edit);
-    edit
+    Ok(edit)
 }

 fn parse_edit_flag(source_code: &Vec<u8>, flag: &str) -> Result<Edit> {
@ -317,7 +354,7 @@ fn parse_edit_flag(source_code: &Vec<u8>, flag: &str) -> Result<Edit> {
        let row = usize::from_str_radix(row, 10).map_err(|_| error())?;
        let column = parts.next().ok_or_else(error)?;
        let column = usize::from_str_radix(column, 10).map_err(|_| error())?;
-        offset_for_position(source_code, Point { row, column })
+        offset_for_position(source_code, Point { row, column })?
    } else {
        usize::from_str_radix(position, 10).map_err(|_| error())?
    };
@ -332,31 +369,48 @@ fn parse_edit_flag(source_code: &Vec<u8>, flag: &str) -> Result<Edit> {
    })
 }

-fn offset_for_position(input: &Vec<u8>, position: Point) -> usize {
-    let mut current_position = Point { row: 0, column: 0 };
-    for (i, c) in input.iter().enumerate() {
-        if *c as char == '\n' {
-            current_position.row += 1;
-            current_position.column = 0;
-        } else {
-            current_position.column += 1;
-        }
-        if current_position > position {
-            return i;
+pub fn offset_for_position(input: &[u8], position: Point) -> Result<usize> {
+    let mut row = 0;
+    let mut offset = 0;
+    let mut iter = memchr::memchr_iter(b'\n', input);
+    loop {
+        if let Some(pos) = iter.next() {
+            if row < position.row {
+                row += 1;
+                offset = pos;
+                continue;
+            }
        }
+        offset += 1;
+        break;
    }
-    return input.len();
+    if position.row - row > 0 {
+        return Err(anyhow!("Failed to address a row: {}", position.row));
+    }
+    if let Some(pos) = iter.next() {
+        if (pos - offset < position.column) || (input[offset] == b'\n' && position.column > 0) {
+            return Err(anyhow!("Failed to address a column: {}", position.column));
+        };
+    } else if input.len() - offset < position.column {
+        return Err(anyhow!("Failed to address a column over the end"));
+    }
+    Ok(offset + position.column)
 }

-fn position_for_offset(input: &Vec<u8>, offset: usize) -> Point {
-    let mut result = Point { row: 0, column: 0 };
-    for c in &input[0..offset] {
-        if *c as char == '\n' {
-            result.row += 1;
-            result.column = 0;
-        } else {
-            result.column += 1;
-        }
+pub fn position_for_offset(input: &[u8], offset: usize) -> Result<Point> {
+    if offset > input.len() {
+        return Err(anyhow!("Failed to address an offset: {offset}"));
    }
-    result
+    let mut result = Point { row: 0, column: 0 };
+    let mut last = 0;
+    for pos in memchr::memchr_iter(b'\n', &input[..offset]) {
+        result.row += 1;
+        last = pos;
+    }
+    result.column = if result.row > 0 {
+        offset - last - 1
+    } else {
+        offset
+    };
+    Ok(result)
 }
--- a/cli/src/playground.html
+++ b/cli/src/playground.html
@ -3,8 +3,8 @@
  <title>tree-sitter THE_LANGUAGE_NAME</title>
  <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/codemirror/5.45.0/codemirror.min.css">
  <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/clusterize.js/0.18.0/clusterize.min.css">
-  <link rel="icon" type="image/png" href="http://tree-sitter.github.io/tree-sitter/assets/images/favicon-32x32.png" sizes="32x32" />
-  <link rel="icon" type="image/png" href="http://tree-sitter.github.io/tree-sitter/assets/images/favicon-16x16.png" sizes="16x16" />
+  <link rel="icon" type="image/png" href="https://tree-sitter.github.io/tree-sitter/assets/images/favicon-32x32.png" sizes="32x32" />
+  <link rel="icon" type="image/png" href="https://tree-sitter.github.io/tree-sitter/assets/images/favicon-16x16.png" sizes="16x16" />
 </head>

 <body>
--- a/cli/src/playground.rs
+++ b/cli/src/playground.rs
@ -1,4 +1,5 @@
 use super::wasm;
+use anyhow::{anyhow, Context, Result};
 use std::{
    borrow::Cow,
    env, fs,
@ -7,12 +8,11 @@ use std::{
    str::{self, FromStr as _},
 };
 use tiny_http::{Header, Response, Server};
-use webbrowser;

 macro_rules! optional_resource {
    ($name: tt, $path: tt) => {
        #[cfg(TREE_SITTER_EMBED_WASM_BINDING)]
-        fn $name(tree_sitter_dir: &Option<PathBuf>) -> Cow<'static, [u8]> {
+        fn $name(tree_sitter_dir: Option<&PathBuf>) -> Cow<'static, [u8]> {
            if let Some(tree_sitter_dir) = tree_sitter_dir {
                Cow::Owned(fs::read(tree_sitter_dir.join($path)).unwrap())
            } else {
@ -21,7 +21,7 @@ macro_rules! optional_resource {
        }

        #[cfg(not(TREE_SITTER_EMBED_WASM_BINDING))]
-        fn $name(tree_sitter_dir: &Option<PathBuf>) -> Cow<'static, [u8]> {
+        fn $name(tree_sitter_dir: Option<&PathBuf>) -> Cow<'static, [u8]> {
            if let Some(tree_sitter_dir) = tree_sitter_dir {
                Cow::Owned(fs::read(tree_sitter_dir.join($path)).unwrap())
            } else {
@ -35,7 +35,7 @@ optional_resource!(get_playground_js, "docs/assets/js/playground.js");
 optional_resource!(get_lib_js, "lib/binding_web/tree-sitter.js");
 optional_resource!(get_lib_wasm, "lib/binding_web/tree-sitter.wasm");

-fn get_main_html(tree_sitter_dir: &Option<PathBuf>) -> Cow<'static, [u8]> {
+fn get_main_html(tree_sitter_dir: Option<&PathBuf>) -> Cow<'static, [u8]> {
    if let Some(tree_sitter_dir) = tree_sitter_dir {
        Cow::Owned(fs::read(tree_sitter_dir.join("cli/src/playground.html")).unwrap())
    } else {
@ -43,23 +43,10 @@ fn get_main_html(tree_sitter_dir: &Option<PathBuf>) -> Cow<'static, [u8]> {
    }
 }

-pub fn serve(grammar_path: &Path, open_in_browser: bool) {
-    let port = env::var("TREE_SITTER_PLAYGROUND_PORT")
-        .map(|v| v.parse::<u16>().expect("Invalid port specification"))
-        .unwrap_or_else(
-            |_| get_available_port().expect(
-                "Couldn't find an available port, try providing a port number via the TREE_SITTER_PLAYGROUND_PORT \
-                 environment variable"
-            )
-        );
-    let addr = format!(
-        "{}:{}",
-        env::var("TREE_SITTER_PLAYGROUND_ADDR").unwrap_or("127.0.0.1".to_owned()),
-        port
-    );
+pub fn serve(grammar_path: &Path, open_in_browser: bool) -> Result<()> {
+    let server = get_server()?;
    let (grammar_name, language_wasm) = wasm::load_language_wasm_file(&grammar_path).unwrap();
-    let server = Server::http(&addr).expect("Failed to start web server");
-    let url = format!("http://{}", addr);
+    let url = format!("http://{}", server.server_addr());
    println!("Started playground on: {}", url);
    if open_in_browser {
        if let Err(_) = webbrowser::open(&url) {
@ -68,13 +55,13 @@ pub fn serve(grammar_path: &Path, open_in_browser: bool) {
    }

    let tree_sitter_dir = env::var("TREE_SITTER_BASE_DIR").map(PathBuf::from).ok();
-    let main_html = str::from_utf8(&get_main_html(&tree_sitter_dir))
+    let main_html = str::from_utf8(&get_main_html(tree_sitter_dir.as_ref()))
        .unwrap()
        .replace("THE_LANGUAGE_NAME", &grammar_name)
        .into_bytes();
-    let playground_js = get_playground_js(&tree_sitter_dir);
-    let lib_js = get_lib_js(&tree_sitter_dir);
-    let lib_wasm = get_lib_wasm(&tree_sitter_dir);
+    let playground_js = get_playground_js(tree_sitter_dir.as_ref());
+    let lib_js = get_lib_js(tree_sitter_dir.as_ref());
+    let lib_wasm = get_lib_wasm(tree_sitter_dir.as_ref());

    let html_header = Header::from_str("Content-Type: text/html").unwrap();
    let js_header = Header::from_str("Content-Type: application/javascript").unwrap();
@ -107,8 +94,12 @@ pub fn serve(grammar_path: &Path, open_in_browser: bool) {
            }
            _ => response(b"Not found", &html_header).with_status_code(404),
        };
-        request.respond(res).expect("Failed to write HTTP response");
+        request
+            .respond(res)
+            .with_context(|| "Failed to write HTTP response")?;
    }
+
+    Ok(())
 }

 fn redirect<'a>(url: &'a str) -> Response<&'a [u8]> {
@ -123,10 +114,30 @@ fn response<'a>(data: &'a [u8], header: &Header) -> Response<&'a [u8]> {
        .with_header(header.clone())
 }

-fn get_available_port() -> Option<u16> {
-    (8000..12000).find(port_is_available)
+fn get_server() -> Result<Server> {
+    let addr = env::var("TREE_SITTER_PLAYGROUND_ADDR").unwrap_or("127.0.0.1".to_owned());
+    let port = env::var("TREE_SITTER_PLAYGROUND_PORT")
+        .map(|v| {
+            v.parse::<u16>()
+                .with_context(|| "Invalid port specification")
+        })
+        .ok();
+    let listener = match port {
+        Some(port) => {
+            bind_to(&*addr, port?).with_context(|| "Failed to bind to the specified port")?
+        }
+        None => get_listener_on_available_port(&*addr)
+            .with_context(|| "Failed to find a free port to bind to it")?,
+    };
+    let server =
+        Server::from_listener(listener, None).map_err(|_| anyhow!("Failed to start web server"))?;
+    Ok(server)
 }

-fn port_is_available(port: &u16) -> bool {
-    TcpListener::bind(("127.0.0.1", *port)).is_ok()
+fn get_listener_on_available_port(addr: &str) -> Option<TcpListener> {
+    (8000..12000).find_map(|port| bind_to(addr, port))
+}
+
+fn bind_to(addr: &str, port: u16) -> Option<TcpListener> {
+    TcpListener::bind(format!("{addr}:{port}")).ok()
 }
--- a/cli/src/query.rs
+++ b/cli/src/query.rs
@ -5,16 +5,20 @@ use std::{
    io::{self, Write},
    ops::Range,
    path::Path,
+    time::Instant,
 };
-use tree_sitter::{Language, Parser, Query, QueryCursor};
+use tree_sitter::{Language, Parser, Point, Query, QueryCursor};

 pub fn query_files_at_paths(
    language: Language,
    paths: Vec<String>,
    query_path: &Path,
    ordered_captures: bool,
-    range: Option<Range<usize>>,
+    byte_range: Option<Range<usize>>,
+    point_range: Option<Range<Point>>,
    should_test: bool,
+    quiet: bool,
+    print_time: bool,
 ) -> Result<()> {
    let stdout = io::stdout();
    let mut stdout = stdout.lock();
@ -24,9 +28,12 @@ pub fn query_files_at_paths(
    let query = Query::new(language, &query_source).with_context(|| "Query compilation failed")?;

    let mut query_cursor = QueryCursor::new();
-    if let Some(range) = range {
+    if let Some(range) = byte_range {
        query_cursor.set_byte_range(range);
    }
+    if let Some(range) = point_range {
+        query_cursor.set_point_range(range);
+    }

    let mut parser = Parser::new();
    parser.set_language(language)?;
@ -40,22 +47,25 @@ pub fn query_files_at_paths(
            fs::read(&path).with_context(|| format!("Error reading source file {:?}", path))?;
        let tree = parser.parse(&source_code, None).unwrap();

+        let start = Instant::now();
        if ordered_captures {
            for (mat, capture_index) in
                query_cursor.captures(&query, tree.root_node(), source_code.as_slice())
            {
                let capture = mat.captures[capture_index];
                let capture_name = &query.capture_names()[capture.index as usize];
-                writeln!(
-                    &mut stdout,
-                    "    pattern: {:>2}, capture: {} - {}, start: {}, end: {}, text: `{}`",
-                    mat.pattern_index,
-                    capture.index,
-                    capture_name,
-                    capture.node.start_position(),
-                    capture.node.end_position(),
-                    capture.node.utf8_text(&source_code).unwrap_or("")
-                )?;
+                if !quiet {
+                    writeln!(
+                        &mut stdout,
+                        "    pattern: {:>2}, capture: {} - {}, start: {}, end: {}, text: `{}`",
+                        mat.pattern_index,
+                        capture.index,
+                        capture_name,
+                        capture.node.start_position(),
+                        capture.node.end_position(),
+                        capture.node.utf8_text(&source_code).unwrap_or("")
+                    )?;
+                }
                results.push(query_testing::CaptureInfo {
                    name: capture_name.to_string(),
                    start: capture.node.start_position(),
@ -64,27 +74,31 @@ pub fn query_files_at_paths(
            }
        } else {
            for m in query_cursor.matches(&query, tree.root_node(), source_code.as_slice()) {
-                writeln!(&mut stdout, "  pattern: {}", m.pattern_index)?;
+                if !quiet {
+                    writeln!(&mut stdout, "  pattern: {}", m.pattern_index)?;
+                }
                for capture in m.captures {
                    let start = capture.node.start_position();
                    let end = capture.node.end_position();
                    let capture_name = &query.capture_names()[capture.index as usize];
-                    if end.row == start.row {
-                        writeln!(
-                            &mut stdout,
-                            "    capture: {} - {}, start: {}, end: {}, text: `{}`",
-                            capture.index,
-                            capture_name,
-                            start,
-                            end,
-                            capture.node.utf8_text(&source_code).unwrap_or("")
-                        )?;
-                    } else {
-                        writeln!(
-                            &mut stdout,
-                            "    capture: {}, start: {}, end: {}",
-                            capture_name, start, end,
-                        )?;
+                    if !quiet {
+                        if end.row == start.row {
+                            writeln!(
+                                &mut stdout,
+                                "    capture: {} - {}, start: {}, end: {}, text: `{}`",
+                                capture.index,
+                                capture_name,
+                                start,
+                                end,
+                                capture.node.utf8_text(&source_code).unwrap_or("")
+                            )?;
+                        } else {
+                            writeln!(
+                                &mut stdout,
+                                "    capture: {}, start: {}, end: {}",
+                                capture_name, start, end,
+                            )?;
+                        }
                    }
                    results.push(query_testing::CaptureInfo {
                        name: capture_name.to_string(),
@ -103,6 +117,9 @@ pub fn query_files_at_paths(
        if should_test {
            query_testing::assert_expected_captures(results, path, &mut parser, language)?
        }
+        if print_time {
+            writeln!(&mut stdout, "{:?}", start.elapsed())?;
+        }
    }

    Ok(())
--- a/cli/src/query_testing.rs
+++ b/cli/src/query_testing.rs
@ -18,9 +18,20 @@ pub struct CaptureInfo {
 #[derive(Debug, PartialEq, Eq)]
 pub struct Assertion {
    pub position: Point,
+    pub negative: bool,
    pub expected_capture_name: String,
 }

+impl Assertion {
+    pub fn new(row: usize, col: usize, negative: bool, expected_capture_name: String) -> Self {
+        Self {
+            position: Point::new(row, col),
+            negative,
+            expected_capture_name,
+        }
+    }
+}
+
 /// Parse the given source code, finding all of the comments that contain
 /// highlighting assertions. Return a vector of (position, expected highlight name)
 /// pairs.
@ -54,6 +65,7 @@ pub fn parse_position_comments(
                        // to its own column.
                        let mut has_left_caret = false;
                        let mut has_arrow = false;
+                        let mut negative = false;
                        let mut arrow_end = 0;
                        for (i, c) in text.char_indices() {
                            arrow_end = i + 1;
@ -69,6 +81,19 @@ pub fn parse_position_comments(
                            has_left_caret = c == '<';
                        }

+                        // find any ! after arrows but before capture name
+                        if has_arrow {
+                            for (i, c) in text[arrow_end..].char_indices() {
+                                if c == '!' {
+                                    negative = true;
+                                    arrow_end += i + 1;
+                                    break;
+                                } else if !c.is_whitespace() {
+                                    break;
+                                }
+                            }
+                        }
+
                        // If the comment node contains an arrow and a highlight name, record the
                        // highlight name and the position.
                        if let (true, Some(mat)) =
@ -76,7 +101,8 @@ pub fn parse_position_comments(
                        {
                            assertion_ranges.push((node.start_position(), node.end_position()));
                            result.push(Assertion {
-                                position: position,
+                                position,
+                                negative,
                                expected_capture_name: mat.as_str().to_string(),
                            });
                        }
--- a/cli/src/tags.rs
+++ b/cli/src/tags.rs
@ -23,7 +23,7 @@ pub fn generate_tags(
    }

    let mut context = TagsContext::new();
-    let cancellation_flag = util::cancel_on_stdin();
+    let cancellation_flag = util::cancel_on_signal();
    let stdout = io::stdout();
    let mut stdout = stdout.lock();

--- a/cli/src/test.rs
+++ b/cli/src/test.rs
@ -16,11 +16,11 @@ use walkdir::WalkDir;

 lazy_static! {
    static ref HEADER_REGEX: ByteRegex =
-        ByteRegexBuilder::new(r"^===+(?P<suffix1>[^=\r\n][^\r\n]*)?\r?\n(?P<test_name>([^=\r\n][^\r\n]*\r?\n)+)===+(?P<suffix2>[^=\r\n][^\r\n]*)?\r?\n")
+        ByteRegexBuilder::new(r"^(?P<equals>(?:=+){3,})(?P<suffix1>[^=\r\n][^\r\n]*)?\r?\n(?P<test_name>([^=\r\n][^\r\n]*\r?\n)+)===+(?P<suffix2>[^=\r\n][^\r\n]*)?\r?\n")
            .multi_line(true)
            .build()
            .unwrap();
-    static ref DIVIDER_REGEX: ByteRegex = ByteRegexBuilder::new(r"^---+(?P<suffix>[^-\r\n][^\r\n]*)?\r?\n")
+    static ref DIVIDER_REGEX: ByteRegex = ByteRegexBuilder::new(r"^(?P<hyphens>(?:-+){3,})(?P<suffix>[^-\r\n][^\r\n]*)?\r?\n")
        .multi_line(true)
        .build()
        .unwrap();
@ -40,6 +40,8 @@ pub enum TestEntry {
        name: String,
        input: Vec<u8>,
        output: String,
+        header_delim_len: usize,
+        divider_delim_len: usize,
        has_fields: bool,
    },
 }
@ -177,13 +179,15 @@ fn run_tests(
    mut indent_level: i32,
    failures: &mut Vec<(String, String, String)>,
    update: bool,
-    corrected_entries: &mut Vec<(String, String, String)>,
+    corrected_entries: &mut Vec<(String, String, String, usize, usize)>,
 ) -> Result<()> {
    match test_entry {
        TestEntry::Example {
            name,
            input,
            output,
+            header_delim_len,
+            divider_delim_len,
            has_fields,
        } => {
            if let Some(filter) = filter {
@ -191,7 +195,13 @@ fn run_tests(
                    if update {
                        let input = String::from_utf8(input).unwrap();
                        let output = format_sexp(&output);
-                        corrected_entries.push((name, input, output));
+                        corrected_entries.push((
+                            name,
+                            input,
+                            output,
+                            header_delim_len,
+                            divider_delim_len,
+                        ));
                    }
                    return Ok(());
                }
@ -201,21 +211,31 @@ fn run_tests(
            if !has_fields {
                actual = strip_sexp_fields(actual);
            }
-            for _ in 0..indent_level {
-                print!("  ");
-            }
+            print!("{}", "  ".repeat(indent_level as usize));
            if actual == output {
                println!("✓ {}", Colour::Green.paint(&name));
                if update {
                    let input = String::from_utf8(input).unwrap();
                    let output = format_sexp(&output);
-                    corrected_entries.push((name, input, output));
+                    corrected_entries.push((
+                        name,
+                        input,
+                        output,
+                        header_delim_len,
+                        divider_delim_len,
+                    ));
                }
            } else {
                if update {
                    let input = String::from_utf8(input).unwrap();
                    let output = format_sexp(&actual);
-                    corrected_entries.push((name.clone(), input, output));
+                    corrected_entries.push((
+                        name.clone(),
+                        input,
+                        output,
+                        header_delim_len,
+                        divider_delim_len,
+                    ));
                    println!("✓ {}", Colour::Blue.paint(&name));
                } else {
                    println!("✗ {}", Colour::Red.paint(&name));
@ -229,9 +249,7 @@ fn run_tests(
            file_path,
        } => {
            if indent_level > 0 {
-                for _ in 0..indent_level {
-                    print!("  ");
-                }
+                print!("{}", "  ".repeat(indent_level as usize));
                println!("{}:", name);
            }

@ -312,27 +330,32 @@ fn format_sexp_indented(sexp: &String, initial_indent_level: u32) -> String {
    formatted
 }

-fn write_tests(file_path: &Path, corrected_entries: &Vec<(String, String, String)>) -> Result<()> {
+fn write_tests(
+    file_path: &Path,
+    corrected_entries: &Vec<(String, String, String, usize, usize)>,
+) -> Result<()> {
    let mut buffer = fs::File::create(file_path)?;
    write_tests_to_buffer(&mut buffer, corrected_entries)
 }

 fn write_tests_to_buffer(
    buffer: &mut impl Write,
-    corrected_entries: &Vec<(String, String, String)>,
+    corrected_entries: &Vec<(String, String, String, usize, usize)>,
 ) -> Result<()> {
-    for (i, (name, input, output)) in corrected_entries.iter().enumerate() {
+    for (i, (name, input, output, header_delim_len, divider_delim_len)) in
+        corrected_entries.iter().enumerate()
+    {
        if i > 0 {
            write!(buffer, "\n")?;
        }
        write!(
            buffer,
            "{}\n{}\n{}\n{}\n{}\n\n{}\n",
-            "=".repeat(80),
+            "=".repeat(*header_delim_len),
            name,
-            "=".repeat(80),
+            "=".repeat(*header_delim_len),
            input,
-            "-".repeat(80),
+            "-".repeat(*divider_delim_len),
            output.trim()
        )?;
    }
@ -351,9 +374,18 @@ pub fn parse_tests(path: &Path) -> io::Result<TestEntry> {
            let entry = entry?;
            let hidden = entry.file_name().to_str().unwrap_or("").starts_with(".");
            if !hidden {
-                children.push(parse_tests(&entry.path())?);
+                children.push(entry.path());
            }
        }
+        children.sort_by(|a, b| {
+            a.file_name()
+                .unwrap_or_default()
+                .cmp(&b.file_name().unwrap_or_default())
+        });
+        let children = children
+            .iter()
+            .map(|path| parse_tests(path))
+            .collect::<io::Result<Vec<TestEntry>>>()?;
        Ok(TestEntry::Group {
            name,
            children,
@ -387,6 +419,7 @@ fn parse_test_content(name: String, content: String, file_path: Option<PathBuf>)
    // Ignore any matches whose suffix does not match the first header
    // suffix in the file.
    let header_matches = HEADER_REGEX.captures_iter(&bytes).filter_map(|c| {
+        let header_delim_len = c.name("equals").map(|n| n.as_bytes().len()).unwrap_or(80);
        let suffix1 = c
            .name("suffix1")
            .map(|m| String::from_utf8_lossy(m.as_bytes()));
@ -398,13 +431,17 @@ fn parse_test_content(name: String, content: String, file_path: Option<PathBuf>)
            let test_name = c
                .name("test_name")
                .map(|c| String::from_utf8_lossy(c.as_bytes()).trim_end().to_string());
-            Some((header_range, test_name))
+            let res = Some((header_delim_len, header_range, test_name));
+            res
        } else {
            None
        }
    });

-    for (header_range, test_name) in header_matches.chain(Some((bytes.len()..bytes.len(), None))) {
+    let mut prev_header_len = 80;
+    for (header_delim_len, header_range, test_name) in
+        header_matches.chain(Some((80, bytes.len()..bytes.len(), None)))
+    {
        // Find the longest line of dashes following each test description. That line
        // separates the input from the expected output. Ignore any matches whose suffix
        // does not match the first suffix in the file.
@ -412,19 +449,25 @@ fn parse_test_content(name: String, content: String, file_path: Option<PathBuf>)
            let divider_range = DIVIDER_REGEX
                .captures_iter(&bytes[prev_header_end..header_range.start])
                .filter_map(|m| {
+                    let divider_delim_len =
+                        m.name("hyphens").map(|m| m.as_bytes().len()).unwrap_or(80);
                    let suffix = m
                        .name("suffix")
                        .map(|m| String::from_utf8_lossy(m.as_bytes()));
                    if suffix == first_suffix {
                        let range = m.get(0).unwrap().range();
-                        Some((prev_header_end + range.start)..(prev_header_end + range.end))
+                        let res = Some((
+                            divider_delim_len,
+                            (prev_header_end + range.start)..(prev_header_end + range.end),
+                        ));
+                        res
                    } else {
                        None
                    }
                })
-                .max_by_key(|range| range.len());
+                .max_by_key(|(_, range)| range.len());

-            if let Some(divider_range) = divider_range {
+            if let Some((divider_delim_len, divider_range)) = divider_range {
                if let Ok(output) = str::from_utf8(&bytes[divider_range.end..header_range.start]) {
                    let mut input = bytes[prev_header_end..divider_range.start].to_vec();

@ -449,12 +492,15 @@ fn parse_test_content(name: String, content: String, file_path: Option<PathBuf>)
                        name: prev_name,
                        input,
                        output,
+                        header_delim_len: prev_header_len,
+                        divider_delim_len,
                        has_fields,
                    });
                }
            }
        }
        prev_name = test_name.unwrap_or(String::new());
+        prev_header_len = header_delim_len;
        prev_header_end = header_range.end;
    }
    TestEntry::Group {
@ -505,12 +551,16 @@ d
                        name: "The first test".to_string(),
                        input: "\na b c\n".as_bytes().to_vec(),
                        output: "(a (b c))".to_string(),
+                        header_delim_len: 15,
+                        divider_delim_len: 3,
                        has_fields: false,
                    },
                    TestEntry::Example {
                        name: "The second test".to_string(),
                        input: "d".as_bytes().to_vec(),
                        output: "(d)".to_string(),
+                        header_delim_len: 16,
+                        divider_delim_len: 3,
                        has_fields: false,
                    },
                ],
@ -559,12 +609,16 @@ abc
                        name: "Code with dashes".to_string(),
                        input: "abc\n---\ndefg\n----\nhijkl".as_bytes().to_vec(),
                        output: "(a (b))".to_string(),
+                        header_delim_len: 18,
+                        divider_delim_len: 7,
                        has_fields: false,
                    },
                    TestEntry::Example {
                        name: "Code ending with dashes".to_string(),
                        input: "abc\n-----------".as_bytes().to_vec(),
                        output: "(c (d))".to_string(),
+                        header_delim_len: 25,
+                        divider_delim_len: 19,
                        has_fields: false,
                    },
                ],
@ -608,11 +662,15 @@ abc
                "title 1".to_string(),
                "input 1".to_string(),
                "output 1".to_string(),
+                80,
+                80,
            ),
            (
                "title 2".to_string(),
                "input 2".to_string(),
                "output 2".to_string(),
+                80,
+                80,
            ),
        ];
        write_tests_to_buffer(&mut buffer, &corrected_entries).unwrap();
@ -689,18 +747,24 @@ code
                        name: "sexp with comment".to_string(),
                        input: "code".as_bytes().to_vec(),
                        output: "(a (b))".to_string(),
+                        header_delim_len: 18,
+                        divider_delim_len: 3,
                        has_fields: false,
                    },
                    TestEntry::Example {
                        name: "sexp with comment between".to_string(),
                        input: "code".as_bytes().to_vec(),
                        output: "(a (b))".to_string(),
+                        header_delim_len: 18,
+                        divider_delim_len: 3,
                        has_fields: false,
                    },
                    TestEntry::Example {
                        name: "sexp with ';'".to_string(),
                        input: "code".as_bytes().to_vec(),
                        output: "(MISSING \";\")".to_string(),
+                        header_delim_len: 25,
+                        divider_delim_len: 3,
                        has_fields: false,
                    }
                ],
@ -773,18 +837,24 @@ NOT A TEST HEADER
                        name: "First test".to_string(),
                        input: expected_input.clone(),
                        output: "(a)".to_string(),
+                        header_delim_len: 18,
+                        divider_delim_len: 3,
                        has_fields: false,
                    },
                    TestEntry::Example {
                        name: "Second test".to_string(),
                        input: expected_input.clone(),
                        output: "(a)".to_string(),
+                        header_delim_len: 18,
+                        divider_delim_len: 3,
                        has_fields: false,
                    },
                    TestEntry::Example {
                        name: "Test name with = symbol".to_string(),
                        input: expected_input.clone(),
                        output: "(a)".to_string(),
+                        header_delim_len: 25,
+                        divider_delim_len: 3,
                        has_fields: false,
                    }
                ],
@ -828,12 +898,16 @@ code with ----
                        name: "name\nwith\nnewlines".to_string(),
                        input: b"a".to_vec(),
                        output: "(b)".to_string(),
+                        header_delim_len: 15,
+                        divider_delim_len: 3,
                        has_fields: false,
                    },
                    TestEntry::Example {
                        name: "name with === signs".to_string(),
                        input: b"code with ----".to_vec(),
                        output: "(d)".to_string(),
+                        header_delim_len: 20,
+                        divider_delim_len: 3,
                        has_fields: false,
                    }
                ]
--- a/cli/src/test_highlight.rs
+++ b/cli/src/test_highlight.rs
@ -42,41 +42,74 @@ pub fn test_highlights(
    loader: &Loader,
    highlighter: &mut Highlighter,
    directory: &Path,
+    apply_all_captures: bool,
+) -> Result<()> {
+    println!("syntax highlighting:");
+    test_highlights_indented(loader, highlighter, directory, apply_all_captures, 2)
+}
+
+fn test_highlights_indented(
+    loader: &Loader,
+    highlighter: &mut Highlighter,
+    directory: &Path,
+    apply_all_captures: bool,
+    indent_level: usize,
 ) -> Result<()> {
    let mut failed = false;

-    println!("syntax highlighting:");
    for highlight_test_file in fs::read_dir(directory)? {
        let highlight_test_file = highlight_test_file?;
        let test_file_path = highlight_test_file.path();
        let test_file_name = highlight_test_file.file_name();
-        let (language, language_config) = loader
-            .language_configuration_for_file_name(&test_file_path)?
-            .ok_or_else(|| anyhow!("No language found for path {:?}", test_file_path))?;
-        let highlight_config = language_config
-            .highlight_config(language)?
-            .ok_or_else(|| anyhow!("No highlighting config found for {:?}", test_file_path))?;
-        match test_highlight(
-            &loader,
-            highlighter,
-            highlight_config,
-            fs::read(&test_file_path)?.as_slice(),
-        ) {
-            Ok(assertion_count) => {
-                println!(
-                    "  ✓ {} ({} assertions)",
-                    Colour::Green.paint(test_file_name.to_string_lossy().as_ref()),
-                    assertion_count
-                );
-            }
-            Err(e) => {
-                println!(
-                    "  ✗ {}",
-                    Colour::Red.paint(test_file_name.to_string_lossy().as_ref())
-                );
-                println!("    {}", e);
+        print!(
+            "{indent:indent_level$}",
+            indent = "",
+            indent_level = indent_level * 2
+        );
+        if test_file_path.is_dir() && !test_file_path.read_dir()?.next().is_none() {
+            println!("{}:", test_file_name.into_string().unwrap());
+            if let Err(_) = test_highlights_indented(
+                loader,
+                highlighter,
+                &test_file_path,
+                apply_all_captures,
+                indent_level + 1,
+            ) {
                failed = true;
            }
+        } else {
+            let (language, language_config) = loader
+                .language_configuration_for_file_name(&test_file_path)?
+                .ok_or_else(|| anyhow!("No language found for path {:?}", test_file_path))?;
+            let highlight_config = language_config
+                .highlight_config(language, apply_all_captures, None)?
+                .ok_or_else(|| anyhow!("No highlighting config found for {:?}", test_file_path))?;
+            match test_highlight(
+                &loader,
+                highlighter,
+                highlight_config,
+                fs::read(&test_file_path)?.as_slice(),
+            ) {
+                Ok(assertion_count) => {
+                    println!(
+                        "✓ {} ({} assertions)",
+                        Colour::Green.paint(test_file_name.to_string_lossy().as_ref()),
+                        assertion_count
+                    );
+                }
+                Err(e) => {
+                    println!(
+                        "✗ {}",
+                        Colour::Red.paint(test_file_name.to_string_lossy().as_ref())
+                    );
+                    println!(
+                        "{indent:indent_level$}  {e}",
+                        indent = "",
+                        indent_level = indent_level * 2
+                    );
+                    failed = true;
+                }
+            }
        }
    }

@ -94,9 +127,10 @@ pub fn iterate_assertions(
    // Iterate through all of the highlighting assertions, checking each one against the
    // actual highlights.
    let mut i = 0;
-    let mut actual_highlights = Vec::<&String>::new();
+    let mut actual_highlights = Vec::new();
    for Assertion {
        position,
+        negative,
        expected_capture_name: expected_highlight,
    } in assertions
    {
@ -120,12 +154,13 @@ pub fn iterate_assertions(
                        break 'highlight_loop;
                    }

-                    // If the highlight matches the assertion, this test passes. Otherwise,
+                    // If the highlight matches the assertion, or if the highlight doesn't
+                    // match the assertion but it's negative, this test passes. Otherwise,
                    // add this highlight to the list of actual highlights that span the
                    // assertion's position, in order to generate an error message in the event
                    // of a failure.
                    let highlight_name = &highlight_names[(highlight.2).0];
-                    if *highlight_name == *expected_highlight {
+                    if (*highlight_name == *expected_highlight) == !negative {
                        passed = true;
                        break 'highlight_loop;
                    } else {
@ -165,68 +200,7 @@ pub fn test_highlight(
    let assertions =
        parse_position_comments(highlighter.parser(), highlight_config.language, source)?;

-    iterate_assertions(&assertions, &highlights, &highlight_names)?;
-
-    // Iterate through all of the highlighting assertions, checking each one against the
-    // actual highlights.
-    let mut i = 0;
-    let mut actual_highlights = Vec::<&String>::new();
-    for Assertion {
-        position,
-        expected_capture_name: expected_highlight,
-    } in &assertions
-    {
-        let mut passed = false;
-        actual_highlights.clear();
-
-        'highlight_loop: loop {
-            // The assertions are ordered by position, so skip past all of the highlights that
-            // end at or before this assertion's position.
-            if let Some(highlight) = highlights.get(i) {
-                if highlight.1 <= *position {
-                    i += 1;
-                    continue;
-                }
-
-                // Iterate through all of the highlights that start at or before this assertion's,
-                // position, looking for one that matches the assertion.
-                let mut j = i;
-                while let (false, Some(highlight)) = (passed, highlights.get(j)) {
-                    if highlight.0 > *position {
-                        break 'highlight_loop;
-                    }
-
-                    // If the highlight matches the assertion, this test passes. Otherwise,
-                    // add this highlight to the list of actual highlights that span the
-                    // assertion's position, in order to generate an error message in the event
-                    // of a failure.
-                    let highlight_name = &highlight_names[(highlight.2).0];
-                    if *highlight_name == *expected_highlight {
-                        passed = true;
-                        break 'highlight_loop;
-                    } else {
-                        actual_highlights.push(highlight_name);
-                    }
-
-                    j += 1;
-                }
-            } else {
-                break;
-            }
-        }
-
-        if !passed {
-            return Err(Failure {
-                row: position.row,
-                column: position.column,
-                expected_highlight: expected_highlight.clone(),
-                actual_highlights: actual_highlights.into_iter().cloned().collect(),
-            }
-            .into());
-        }
-    }
-
-    Ok(assertions.len())
+    iterate_assertions(&assertions, &highlights, &highlight_names)
 }

 pub fn get_highlight_positions(
@ -244,7 +218,7 @@ pub fn get_highlight_positions(
    let source = String::from_utf8_lossy(source);
    let mut char_indices = source.char_indices();
    for event in highlighter.highlight(highlight_config, source.as_bytes(), None, |string| {
-        loader.highlight_config_for_injection_string(string)
+        loader.highlight_config_for_injection_string(string, highlight_config.apply_all_captures)
    })? {
        match event? {
            HighlightEvent::HighlightStart(h) => highlight_stack.push(h),
--- a/cli/src/test_tags.rs
+++ b/cli/src/test_tags.rs
@ -95,6 +95,7 @@ pub fn test_tag(
    let mut actual_tags = Vec::<&String>::new();
    for Assertion {
        position,
+        negative,
        expected_capture_name: expected_tag,
    } in &assertions
    {
@ -116,7 +117,7 @@ pub fn test_tag(
                    }

                    let tag_name = &tag.2;
-                    if *tag_name == *expected_tag {
+                    if (*tag_name == *expected_tag) == !negative {
                        passed = true;
                        break 'tag_loop;
                    } else {
@ -124,6 +125,9 @@ pub fn test_tag(
                    }

                    j += 1;
+                    if tag == tags.last().unwrap() {
+                        break 'tag_loop;
+                    }
                }
            } else {
                break;
--- a/cli/src/tests/async_context_test.rs
+++ b/cli/src/tests/async_context_test.rs
@ -0,0 +1,279 @@
+use super::helpers::fixtures::get_language;
+use std::future::Future;
+use std::pin::{pin, Pin};
+use std::ptr;
+use std::task::{self, Context, Poll, RawWaker, RawWakerVTable, Waker};
+use tree_sitter::Parser;
+
+#[test]
+fn test_node_in_fut() {
+    let (ret, pended) = tokio_like_spawn(async {
+        let mut parser = Parser::new();
+        let language = get_language("bash");
+        parser.set_language(language).unwrap();
+
+        let tree = parser.parse("#", None).unwrap();
+
+        let root = tree.root_node();
+        let root_ref = &root;
+
+        let fut_val_fn = || async {
+            // eprintln!("fut_val_fn: {}", root.child(0).unwrap().kind());
+            yield_now().await;
+            root.child(0).unwrap().kind()
+        };
+
+        yield_now().await;
+
+        let fut_ref_fn = || async {
+            // eprintln!("fut_ref_fn: {}", root_ref.child(0).unwrap().kind());
+            yield_now().await;
+            root_ref.child(0).unwrap().kind()
+        };
+
+        let f1 = fut_val_fn().await;
+        let f2 = fut_ref_fn().await;
+        assert_eq!(f1, f2);
+
+        let fut_val = async {
+            // eprintln!("fut_val: {}", root.child(0).unwrap().kind());
+            yield_now().await;
+            root.child(0).unwrap().kind()
+        };
+
+        let fut_ref = async {
+            // eprintln!("fut_ref: {}", root_ref.child(0).unwrap().kind());
+            yield_now().await;
+            root_ref.child(0).unwrap().kind()
+        };
+
+        let f1 = fut_val.await;
+        let f2 = fut_ref.await;
+        assert_eq!(f1, f2);
+
+        f1
+    })
+    .join();
+    // eprintln!("pended: {pended:?}");
+    assert_eq!(ret, "comment");
+    assert_eq!(pended, 5);
+}
+
+#[test]
+fn test_node_and_cursor_ref_in_fut() {
+    let (_, pended) = tokio_like_spawn(async {
+        let mut parser = Parser::new();
+        let language = get_language("bash");
+        parser.set_language(language).unwrap();
+
+        let tree = parser.parse("#", None).unwrap();
+
+        let root = tree.root_node();
+        let root_ref = &root;
+
+        let mut cursor = tree.walk();
+        let cursor_ref = &mut cursor;
+
+        cursor_ref.goto_first_child();
+
+        let fut_val = async {
+            yield_now().await;
+            root.to_sexp();
+        };
+
+        yield_now().await;
+
+        let fut_ref = async {
+            yield_now().await;
+            root_ref.to_sexp();
+            cursor_ref.goto_first_child();
+        };
+
+        fut_val.await;
+        fut_ref.await;
+
+        cursor_ref.goto_first_child();
+    })
+    .join();
+    assert_eq!(pended, 3);
+}
+
+#[test]
+fn test_node_and_cursor_ref_in_fut_with_fut_fabrics() {
+    let (_, pended) = tokio_like_spawn(async {
+        let mut parser = Parser::new();
+        let language = get_language("bash");
+        parser.set_language(language).unwrap();
+
+        let tree = parser.parse("#", None).unwrap();
+
+        let root = tree.root_node();
+        let root_ref = &root;
+
+        let mut cursor = tree.walk();
+        let cursor_ref = &mut cursor;
+
+        cursor_ref.goto_first_child();
+
+        let fut_val = || async {
+            yield_now().await;
+            root.to_sexp();
+        };
+
+        yield_now().await;
+
+        let fut_ref = || async move {
+            yield_now().await;
+            root_ref.to_sexp();
+            cursor_ref.goto_first_child();
+        };
+
+        fut_val().await;
+        fut_val().await;
+        fut_ref().await;
+    })
+    .join();
+    assert_eq!(pended, 4);
+}
+
+#[test]
+fn test_node_and_cursor_ref_in_fut_with_inner_spawns() {
+    let (ret, pended) = tokio_like_spawn(async {
+        let mut parser = Parser::new();
+        let language = get_language("bash");
+        parser.set_language(language).unwrap();
+
+        let tree = parser.parse("#", None).unwrap();
+
+        let mut cursor = tree.walk();
+        let cursor_ref = &mut cursor;
+
+        cursor_ref.goto_first_child();
+
+        let fut_val = || {
+            let tree = tree.clone();
+            async move {
+                let root = tree.root_node();
+                let mut cursor = tree.walk();
+                let cursor_ref = &mut cursor;
+                yield_now().await;
+                root.to_sexp();
+                cursor_ref.goto_first_child();
+            }
+        };
+
+        yield_now().await;
+
+        let fut_ref = || {
+            let tree = tree.clone();
+            async move {
+                let root = tree.root_node();
+                let root_ref = &root;
+                let mut cursor = tree.walk();
+                let cursor_ref = &mut cursor;
+                yield_now().await;
+                root_ref.to_sexp();
+                cursor_ref.goto_first_child();
+            }
+        };
+
+        let (_, p1) = tokio_like_spawn(fut_val()).await.unwrap();
+        let (_, p2) = tokio_like_spawn(fut_ref()).await.unwrap();
+
+        cursor_ref.goto_first_child();
+
+        fut_val().await;
+        fut_val().await;
+        fut_ref().await;
+
+        cursor_ref.goto_first_child();
+
+        p1 + p2
+    })
+    .join();
+    assert_eq!(pended, 4);
+    assert_eq!(ret, 2);
+}
+
+fn tokio_like_spawn<T>(future: T) -> JoinHandle<(T::Output, usize)>
+where
+    T: Future + Send + 'static,
+    T::Output: Send + 'static,
+{
+    // No runtime, just noop waker
+
+    let waker = noop_waker();
+    let mut cx = task::Context::from_waker(&waker);
+
+    let mut pending = 0;
+    let mut future = pin!(future);
+    let ret = loop {
+        match future.as_mut().poll(&mut cx) {
+            Poll::Pending => pending += 1,
+            Poll::Ready(r) => {
+                // eprintln!("ready, pended: {pending}");
+                break r;
+            }
+        }
+    };
+    JoinHandle::new((ret, pending))
+}
+
+async fn yield_now() {
+    struct SimpleYieldNow {
+        yielded: bool,
+    }
+
+    impl Future for SimpleYieldNow {
+        type Output = ();
+
+        fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<()> {
+            cx.waker().clone().wake();
+            if self.yielded {
+                return Poll::Ready(());
+            }
+            self.yielded = true;
+            Poll::Pending
+        }
+    }
+
+    SimpleYieldNow { yielded: false }.await
+}
+
+pub fn noop_waker() -> Waker {
+    const VTABLE: RawWakerVTable = RawWakerVTable::new(
+        // Cloning just returns a new no-op raw waker
+        |_| RAW,
+        // `wake` does nothing
+        |_| {},
+        // `wake_by_ref` does nothing
+        |_| {},
+        // Dropping does nothing as we don't allocate anything
+        |_| {},
+    );
+    const RAW: RawWaker = RawWaker::new(ptr::null(), &VTABLE);
+    unsafe { Waker::from_raw(RAW) }
+}
+
+struct JoinHandle<T> {
+    data: Option<T>,
+}
+
+impl<T> JoinHandle<T> {
+    fn new(data: T) -> Self {
+        Self { data: Some(data) }
+    }
+
+    fn join(&mut self) -> T {
+        self.data.take().unwrap()
+    }
+}
+
+impl<T: Unpin> Future for JoinHandle<T> {
+    type Output = std::result::Result<T, ()>;
+
+    fn poll(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll<Self::Output> {
+        let data = self.get_mut().data.take().unwrap();
+        Poll::Ready(Ok(data))
+    }
+}
--- a/cli/src/tests/corpus_test.rs
+++ b/cli/src/tests/corpus_test.rs
@ -1,7 +1,8 @@
 use super::helpers::{
    allocations,
    edits::{get_random_edit, invert_edit},
-    fixtures::{fixtures_dir, get_language, get_test_language},
+    fixtures::{fixtures_dir, get_language, get_test_language, SCRATCH_BASE_DIR},
+    new_seed,
    random::Rand,
    scope_sequence::ScopeSequence,
    EDIT_COUNT, EXAMPLE_FILTER, ITERATION_COUNT, LANGUAGE_FILTER, LOG_ENABLED, LOG_GRAPH_ENABLED,
@ -13,70 +14,81 @@ use crate::{
    test::{parse_tests, print_diff, print_diff_key, strip_sexp_fields, TestEntry},
    util,
 };
-use std::fs;
+use std::{collections::HashMap, env, fs};
 use tree_sitter::{LogType, Node, Parser, Point, Range, Tree};
+use tree_sitter_proc_macro::test_with_seed;

-#[test]
-fn test_bash_corpus() {
-    test_language_corpus("bash");
+#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
+fn test_corpus_for_bash(seed: usize) {
+    test_language_corpus(
+        "bash",
+        seed,
+        Some(&[
+            // Fragile tests where edit customization changes
+            // lead to significant parse tree structure changes.
+            "bash - corpus - commands - Nested Heredocs",
+            "bash - corpus - commands - Quoted Heredocs",
+            "bash - corpus - commands - Heredocs with weird characters",
+        ]),
+    );
 }

-#[test]
-fn test_c_corpus() {
-    test_language_corpus("c");
+#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
+fn test_corpus_for_c(seed: usize) {
+    test_language_corpus("c", seed, None);
 }

-#[test]
-fn test_cpp_corpus() {
-    test_language_corpus("cpp");
+#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
+fn test_corpus_for_cpp(seed: usize) {
+    test_language_corpus("cpp", seed, None);
 }

-#[test]
-fn test_embedded_template_corpus() {
-    test_language_corpus("embedded-template");
+#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
+fn test_corpus_for_embedded_template(seed: usize) {
+    test_language_corpus("embedded-template", seed, None);
 }

-#[test]
-fn test_go_corpus() {
-    test_language_corpus("go");
+#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
+fn test_corpus_for_go(seed: usize) {
+    test_language_corpus("go", seed, None);
 }

-#[test]
-fn test_html_corpus() {
-    test_language_corpus("html");
+#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
+fn test_corpus_for_html(seed: usize) {
+    test_language_corpus("html", seed, None);
 }

-#[test]
-fn test_javascript_corpus() {
-    test_language_corpus("javascript");
+#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
+fn test_corpus_for_javascript(seed: usize) {
+    test_language_corpus("javascript", seed, None);
 }

-#[test]
-fn test_json_corpus() {
-    test_language_corpus("json");
+#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
+fn test_corpus_for_json(seed: usize) {
+    test_language_corpus("json", seed, None);
 }

-#[test]
-fn test_php_corpus() {
-    test_language_corpus("php");
+#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
+fn test_corpus_for_php(seed: usize) {
+    test_language_corpus("php", seed, None);
 }

-#[test]
-fn test_python_corpus() {
-    test_language_corpus("python");
+#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
+fn test_corpus_for_python(seed: usize) {
+    test_language_corpus("python", seed, None);
 }

-#[test]
-fn test_ruby_corpus() {
-    test_language_corpus("ruby");
+#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
+fn test_corpus_for_ruby(seed: usize) {
+    test_language_corpus("ruby", seed, None);
 }

-#[test]
-fn test_rust_corpus() {
-    test_language_corpus("rust");
+#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
+fn test_corpus_for_rust(seed: usize) {
+    test_language_corpus("rust", seed, None);
 }

-fn test_language_corpus(language_name: &str) {
+fn test_language_corpus(language_name: &str, start_seed: usize, skipped: Option<&[&str]>) {
    let grammars_dir = fixtures_dir().join("grammars");
    let error_corpus_dir = fixtures_dir().join("error_corpus");
    let template_corpus_dir = fixtures_dir().join("template_corpus");
@ -98,10 +110,30 @@ fn test_language_corpus(language_name: &str) {
        t
    }));

+    let mut skipped = skipped.map(|x| HashMap::<&str, usize>::from_iter(x.iter().map(|x| (*x, 0))));
+
    let language = get_language(language_name);
    let mut failure_count = 0;
-    for test in tests {
-        println!("  {} example - {}", language_name, test.name);
+
+    let log_seed = env::var("TREE_SITTER_LOG_SEED").is_ok();
+    let dump_edits = env::var("TREE_SITTER_DUMP_EDITS").is_ok();
+
+    if log_seed {
+        println!("  start seed: {}", start_seed);
+    }
+
+    println!();
+    for (test_index, test) in tests.iter().enumerate() {
+        let test_name = format!("{language_name} - {}", test.name);
+        if let Some(skipped) = skipped.as_mut() {
+            if let Some(counter) = skipped.get_mut(test_name.as_str()) {
+                println!("  {test_index}. {test_name} - SKIPPED");
+                *counter += 1;
+                continue;
+            }
+        }
+
+        println!("  {test_index}. {test_name}");

        let passed = allocations::record(|| {
            let mut log_session = None;
@ -116,10 +148,7 @@ fn test_language_corpus(language_name: &str) {
            }

            if actual_output != test.output {
-                println!(
-                    "Incorrect initial parse for {} - {}",
-                    language_name, test.name,
-                );
+                println!("Incorrect initial parse for {test_name}");
                print_diff_key();
                print_diff(&actual_output, &test.output);
                println!("");
@ -140,7 +169,7 @@ fn test_language_corpus(language_name: &str) {
        drop(parser);

        for trial in 0..*ITERATION_COUNT {
-            let seed = *START_SEED + trial;
+            let seed = start_seed + trial;
            let passed = allocations::record(|| {
                let mut rand = Rand::new(seed);
                let mut log_session = None;
@ -158,10 +187,21 @@ fn test_language_corpus(language_name: &str) {
                for _ in 0..1 + rand.unsigned(*EDIT_COUNT) {
                    let edit = get_random_edit(&mut rand, &input);
                    undo_stack.push(invert_edit(&input, &edit));
-                    perform_edit(&mut tree, &mut input, &edit);
+                    perform_edit(&mut tree, &mut input, &edit).unwrap();
                }

-                // println!("    seed: {}", seed);
+                if log_seed {
+                    println!("   {test_index}.{trial:<2} seed: {}", seed);
+                }
+
+                if dump_edits {
+                    fs::write(
+                        SCRATCH_BASE_DIR
+                            .join(format!("edit.{seed}.{test_index}.{trial} {test_name}")),
+                        &input,
+                    )
+                    .unwrap();
+                }

                if *LOG_GRAPH_ENABLED {
                    eprintln!("{}\n", String::from_utf8_lossy(&input));
@ -173,16 +213,13 @@ fn test_language_corpus(language_name: &str) {
                // Check that the new tree is consistent.
                check_consistent_sizes(&tree2, &input);
                if let Err(message) = check_changed_ranges(&tree, &tree2, &input) {
-                    println!(
-                        "\nUnexpected scope change in seed {}\n{}\n\n",
-                        seed, message
-                    );
+                    println!("\nUnexpected scope change in seed {seed} with start seed {start_seed}\n{message}\n\n",);
                    return false;
                }

                // Undo all of the edits and re-parse again.
                while let Some(edit) = undo_stack.pop() {
-                    perform_edit(&mut tree2, &mut input, &edit);
+                    perform_edit(&mut tree2, &mut input, &edit).unwrap();
                }
                if *LOG_GRAPH_ENABLED {
                    eprintln!("{}\n", String::from_utf8_lossy(&input));
@ -198,10 +235,7 @@ fn test_language_corpus(language_name: &str) {
                }

                if actual_output != test.output {
-                    println!(
-                        "Incorrect parse for {} - {} - seed {}",
-                        language_name, test.name, seed
-                    );
+                    println!("Incorrect parse for {test_name} - seed {seed}");
                    print_diff_key();
                    print_diff(&actual_output, &test.output);
                    println!("");
@ -211,7 +245,7 @@ fn test_language_corpus(language_name: &str) {
                // Check that the edited tree is consistent.
                check_consistent_sizes(&tree3, &input);
                if let Err(message) = check_changed_ranges(&tree2, &tree3, &input) {
-                    eprintln!("Unexpected scope change in seed {}\n{}\n\n", seed, message);
+                    println!("Unexpected scope change in seed {seed} with start seed {start_seed}\n{message}\n\n");
                    return false;
                }

@ -228,6 +262,18 @@ fn test_language_corpus(language_name: &str) {
    if failure_count > 0 {
        panic!("{} {} corpus tests failed", failure_count, language_name);
    }
+
+    if let Some(skipped) = skipped.as_mut() {
+        skipped.retain(|_, v| *v == 0);
+
+        if skipped.len() > 0 {
+            println!("Non matchable skip definitions:");
+            for k in skipped.keys() {
+                println!("  {k}");
+            }
+            panic!("Non matchable skip definitions needs to be removed");
+        }
+    }
 }

 #[test]
@ -255,7 +301,7 @@ fn test_feature_corpus_files() {
            grammar_path = test_path.join("grammar.json");
        }
        let error_message_path = test_path.join("expected_error.txt");
-        let grammar_json = generate::load_grammar_file(&grammar_path).unwrap();
+        let grammar_json = generate::load_grammar_file(&grammar_path, None).unwrap();
        let generate_result = generate::generate_parser_for_grammar(&grammar_json);

        if error_message_path.exists() {
@ -424,7 +470,12 @@ fn set_included_ranges(parser: &mut Parser, input: &[u8], delimiters: Option<(&s
        let mut ranges = Vec::new();
        let mut ix = 0;
        while ix < input.len() {
-            let Some(mut start_ix) = input[ix..].windows(2).position(|win| win == start.as_bytes()) else { break };
+            let Some(mut start_ix) = input[ix..]
+                .windows(2)
+                .position(|win| win == start.as_bytes())
+            else {
+                break;
+            };
            start_ix += ix + start.len();
            let end_ix = input[start_ix..]
                .windows(2)
@ -492,6 +543,7 @@ fn flatten_tests(test: TestEntry) -> Vec<FlattenedTest> {
                input,
                output,
                has_fields,
+                ..
            } => {
                if !prefix.is_empty() {
                    name.insert_str(0, " - ");
--- a/cli/src/tests/github_issue_test.rs
+++ b/cli/src/tests/github_issue_test.rs
@ -0,0 +1,42 @@
+// Tests in this mod need be executed with enabled UBSAN library:
+// ```
+// UBSAN_OPTIONS="halt_on_error=1" \
+// CFLAGS="-fsanitize=undefined"   \
+// RUSTFLAGS="-lubsan"             \
+// cargo test --target $(rustc -vV | sed -nr 's/^host: //p') -- --test-threads 1
+// ```
+
+use super::helpers::query_helpers::assert_query_matches;
+use crate::tests::helpers::fixtures::get_language;
+use indoc::indoc;
+use tree_sitter::Query;
+
+#[test]
+fn issue_2162_out_of_bound() {
+    let language = get_language("java");
+    assert!(Query::new(language, "(package_declaration _ (_) @name _)").is_ok());
+}
+
+#[test]
+fn issue_2107_first_child_group_anchor_had_no_effect() {
+    let language = get_language("c");
+    let source_code = indoc! {r#"
+        void fun(int a, char b, int c) { };
+    "#};
+    let query = indoc! {r#"
+        (parameter_list
+            .
+            (
+                (parameter_declaration) @constant
+                (#match? @constant "^int")
+            )
+        )
+    "#};
+    let query = Query::new(language, query).unwrap();
+    assert_query_matches(
+        language,
+        &query,
+        source_code,
+        &[(0, vec![("constant", "int a")])],
+    );
+}
--- a/cli/src/tests/helpers/allocations.rs
+++ b/cli/src/tests/helpers/allocations.rs
@ -2,7 +2,7 @@ use std::{
    collections::HashMap,
    os::raw::c_void,
    sync::{
-        atomic::{AtomicBool, AtomicU64, Ordering::SeqCst},
+        atomic::{AtomicBool, AtomicUsize, Ordering::SeqCst},
        Mutex,
    },
 };
@ -25,8 +25,8 @@ unsafe impl Sync for Allocation {}
 #[derive(Default)]
 struct AllocationRecorder {
    enabled: AtomicBool,
-    allocation_count: AtomicU64,
-    outstanding_allocations: Mutex<HashMap<Allocation, u64>>,
+    allocation_count: AtomicUsize,
+    outstanding_allocations: Mutex<HashMap<Allocation, usize>>,
 }

 thread_local! {
@ -83,6 +83,9 @@ fn record_alloc(ptr: *mut c_void) {
 }

 fn record_dealloc(ptr: *mut c_void) {
+    if ptr.is_null() {
+        panic!("Zero pointer deallocation!");
+    }
    RECORDER.with(|recorder| {
        if recorder.enabled.load(SeqCst) {
            recorder
@ -107,9 +110,13 @@ unsafe extern "C" fn ts_record_calloc(count: usize, size: usize) -> *mut c_void
 }

 unsafe extern "C" fn ts_record_realloc(ptr: *mut c_void, size: usize) -> *mut c_void {
-    record_dealloc(ptr);
    let result = realloc(ptr, size);
-    record_alloc(result);
+    if ptr.is_null() {
+        record_alloc(result);
+    } else if ptr != result {
+        record_dealloc(ptr);
+        record_alloc(result);
+    }
    result
 }

--- a/cli/src/tests/helpers/dirs.rs
+++ b/cli/src/tests/helpers/dirs.rs
@ -1,11 +1,46 @@
 lazy_static! {
-    static ref ROOT_DIR: PathBuf = PathBuf::from(env!("CARGO_MANIFEST_DIR")).parent().unwrap().to_owned();
-    static ref FIXTURES_DIR: PathBuf = ROOT_DIR.join("test").join("fixtures");
-    static ref HEADER_DIR: PathBuf = ROOT_DIR.join("lib").join("include");
-    static ref GRAMMARS_DIR: PathBuf = ROOT_DIR.join("test").join("fixtures").join("grammars");
-    static ref SCRATCH_DIR: PathBuf = {
+    pub static ref ROOT_DIR: PathBuf = PathBuf::from(env!("CARGO_MANIFEST_DIR")).parent().unwrap().to_owned();
+    pub static ref FIXTURES_DIR: PathBuf = ROOT_DIR.join("test").join("fixtures");
+    pub static ref HEADER_DIR: PathBuf = ROOT_DIR.join("lib").join("include");
+    pub static ref GRAMMARS_DIR: PathBuf = ROOT_DIR.join("test").join("fixtures").join("grammars");
+    pub static ref SCRATCH_BASE_DIR: PathBuf = {
        let result = ROOT_DIR.join("target").join("scratch");
        fs::create_dir_all(&result).unwrap();
        result
    };
+    pub static ref SCRATCH_DIR: PathBuf = {
+        // https://doc.rust-lang.org/reference/conditional-compilation.html
+        let vendor = if cfg!(target_vendor = "apple") {
+            "apple"
+        } else if cfg!(target_vendor = "fortanix") {
+            "fortanix"
+        } else if cfg!(target_vendor = "pc") {
+            "pc"
+        } else {
+            "unknown"
+        };
+        let env = if cfg!(target_env = "gnu") {
+            "gnu"
+        } else if cfg!(target_env = "msvc") {
+            "msvc"
+        } else if cfg!(target_env = "musl") {
+            "musl"
+        } else if cfg!(target_env = "sgx") {
+            "sgx"
+        } else {
+            "unknown"
+        };
+        let endian = if cfg!(target_endian = "little") {
+            "little"
+        } else if cfg!(target_endian = "big") {
+            "big"
+        } else {
+            "unknown"
+        };
+
+        let machine = format!("{}-{}-{}-{}-{}", std::env::consts::ARCH, std::env::consts::OS, vendor, env, endian);
+        let result = SCRATCH_BASE_DIR.join(machine);
+        fs::create_dir_all(&result).unwrap();
+        result
+    };
 }
--- a/cli/src/tests/helpers/fixtures.rs
+++ b/cli/src/tests/helpers/fixtures.rs
@ -1,6 +1,6 @@
 use lazy_static::lazy_static;
-use std::fs;
 use std::path::{Path, PathBuf};
+use std::{env, fs};
 use tree_sitter::Language;
 use tree_sitter_highlight::HighlightConfiguration;
 use tree_sitter_loader::Loader;
@ -9,7 +9,13 @@ use tree_sitter_tags::TagsConfiguration;
 include!("./dirs.rs");

 lazy_static! {
-    static ref TEST_LOADER: Loader = Loader::with_parser_lib_path(SCRATCH_DIR.join("lib"));
+    static ref TEST_LOADER: Loader = {
+        let mut loader = Loader::with_parser_lib_path(SCRATCH_DIR.clone());
+        if env::var("TREE_SITTER_GRAMMAR_DEBUG").is_ok() {
+            loader.use_debug_build(true);
+        }
+        loader
+    };
 }

 pub fn test_loader<'a>() -> &'a Loader {
@ -46,9 +52,11 @@ pub fn get_highlight_config(
    let locals_query = fs::read_to_string(queries_path.join("locals.scm")).unwrap_or(String::new());
    let mut result = HighlightConfiguration::new(
        language,
+        language_name,
        &highlights_query,
        &injections_query,
        &locals_query,
+        false,
    )
    .unwrap();
    result.configure(&highlight_names);
@ -63,11 +71,7 @@ pub fn get_tags_config(language_name: &str) -> TagsConfiguration {
    TagsConfiguration::new(language, &tags_query, &locals_query).unwrap()
 }

-pub fn get_test_language(
-    name: &str,
-    parser_code: &str,
-    scanner_src_path: Option<&Path>,
-) -> Language {
+pub fn get_test_language(name: &str, parser_code: &str, path: Option<&Path>) -> Language {
    let src_dir = SCRATCH_DIR.join("src").join(name);
    fs::create_dir_all(&src_dir).unwrap();

@ -76,11 +80,16 @@ pub fn get_test_language(
        fs::write(&parser_path, parser_code).unwrap();
    }

-    if let Some(scanner_src_path) = scanner_src_path {
-        let scanner_code = fs::read_to_string(&scanner_src_path).unwrap();
-        let scanner_path = src_dir.join("scanner.c");
-        if !fs::read_to_string(&scanner_path).map_or(false, |content| content == scanner_code) {
-            fs::write(&scanner_path, scanner_code).unwrap();
+    if let Some(path) = path {
+        let scanner_path = path.join("scanner.c");
+        if scanner_path.exists() {
+            let scanner_code = fs::read_to_string(&scanner_path).unwrap();
+            let scanner_copy_path = src_dir.join("scanner.c");
+            if !fs::read_to_string(&scanner_copy_path)
+                .map_or(false, |content| content == scanner_code)
+            {
+                fs::write(&scanner_copy_path, scanner_code).unwrap();
+            }
        }
    }

--- a/cli/src/tests/helpers/mod.rs
+++ b/cli/src/tests/helpers/mod.rs
@ -6,7 +6,8 @@ pub(super) mod random;
 pub(super) mod scope_sequence;

 use lazy_static::lazy_static;
-use std::{env, time, usize};
+use rand::Rng;
+use std::env;

 lazy_static! {
    pub static ref LOG_ENABLED: bool = env::var("TREE_SITTER_LOG").is_ok();
@ -16,11 +17,7 @@ lazy_static! {
 }

 lazy_static! {
-    pub static ref START_SEED: usize =
-        int_env_var("TREE_SITTER_SEED").unwrap_or_else(|| time::SystemTime::now()
-            .duration_since(time::UNIX_EPOCH)
-            .unwrap()
-            .as_secs() as usize,);
+    pub static ref START_SEED: usize = new_seed();
    pub static ref EDIT_COUNT: usize = int_env_var("TREE_SITTER_EDITS").unwrap_or(3);
    pub static ref ITERATION_COUNT: usize = int_env_var("TREE_SITTER_ITERATIONS").unwrap_or(10);
 }
@ -28,3 +25,10 @@ lazy_static! {
 fn int_env_var(name: &'static str) -> Option<usize> {
    env::var(name).ok().and_then(|e| e.parse().ok())
 }
+
+pub(crate) fn new_seed() -> usize {
+    int_env_var("TREE_SITTER_SEED").unwrap_or_else(|| {
+        let mut rng = rand::thread_rng();
+        rng.gen::<usize>()
+    })
+}
--- a/cli/src/tests/helpers/query_helpers.rs
+++ b/cli/src/tests/helpers/query_helpers.rs
@ -1,6 +1,8 @@
 use rand::prelude::Rng;
 use std::{cmp::Ordering, fmt::Write, ops::Range};
-use tree_sitter::{Node, Point, Tree, TreeCursor};
+use tree_sitter::{
+    Language, Node, Parser, Point, Query, QueryCapture, QueryCursor, QueryMatch, Tree, TreeCursor,
+};

 #[derive(Debug)]
 pub struct Pattern {
@ -304,3 +306,56 @@ fn compare_depth_first(a: Node, b: Node) -> Ordering {
    let b = b.byte_range();
    a.start.cmp(&b.start).then_with(|| b.end.cmp(&a.end))
 }
+
+pub fn assert_query_matches(
+    language: Language,
+    query: &Query,
+    source: &str,
+    expected: &[(usize, Vec<(&str, &str)>)],
+) {
+    let mut parser = Parser::new();
+    parser.set_language(language).unwrap();
+    let tree = parser.parse(source, None).unwrap();
+    let mut cursor = QueryCursor::new();
+    let matches = cursor.matches(&query, tree.root_node(), source.as_bytes());
+    pretty_assertions::assert_eq!(collect_matches(matches, &query, source), expected);
+    pretty_assertions::assert_eq!(cursor.did_exceed_match_limit(), false);
+}
+
+pub fn collect_matches<'a>(
+    matches: impl Iterator<Item = QueryMatch<'a, 'a>>,
+    query: &'a Query,
+    source: &'a str,
+) -> Vec<(usize, Vec<(&'a str, &'a str)>)> {
+    matches
+        .map(|m| {
+            (
+                m.pattern_index,
+                format_captures(m.captures.iter().cloned(), query, source),
+            )
+        })
+        .collect()
+}
+
+pub fn collect_captures<'a>(
+    captures: impl Iterator<Item = (QueryMatch<'a, 'a>, usize)>,
+    query: &'a Query,
+    source: &'a str,
+) -> Vec<(&'a str, &'a str)> {
+    format_captures(captures.map(|(m, i)| m.captures[i]), query, source)
+}
+
+fn format_captures<'a>(
+    captures: impl Iterator<Item = QueryCapture<'a>>,
+    query: &'a Query,
+    source: &'a str,
+) -> Vec<(&'a str, &'a str)> {
+    captures
+        .map(|capture| {
+            (
+                query.capture_names()[capture.index as usize],
+                capture.node.utf8_text(source.as_bytes()).unwrap(),
+            )
+        })
+        .collect()
+}
--- a/cli/src/tests/highlight_test.rs
+++ b/cli/src/tests/highlight_test.rs
@ -24,6 +24,7 @@ lazy_static! {
        get_highlight_config("rust", Some("injections.scm"), &HIGHLIGHT_NAMES);
    static ref HIGHLIGHT_NAMES: Vec<String> = [
        "attribute",
+        "boolean",
        "carriage-return",
        "comment",
        "constant",
@ -61,7 +62,7 @@ lazy_static! {
 fn test_highlighting_javascript() {
    let source = "const a = function(b) { return b + c; }";
    assert_eq!(
-        &to_token_vector(&source, &JS_HIGHLIGHT).unwrap(),
+        &to_token_vector(source, &JS_HIGHLIGHT).unwrap(),
        &[vec![
            ("const", vec!["keyword"]),
            (" ", vec![]),
@ -71,14 +72,14 @@ fn test_highlighting_javascript() {
            (" ", vec![]),
            ("function", vec!["keyword"]),
            ("(", vec!["punctuation.bracket"]),
-            ("b", vec!["variable.parameter"]),
+            ("b", vec!["variable"]),
            (")", vec!["punctuation.bracket"]),
            (" ", vec![]),
            ("{", vec!["punctuation.bracket"]),
            (" ", vec![]),
            ("return", vec!["keyword"]),
            (" ", vec![]),
-            ("b", vec!["variable.parameter"]),
+            ("b", vec!["variable"]),
            (" ", vec![]),
            ("+", vec!["operator"]),
            (" ", vec![]),
@ -92,7 +93,7 @@ fn test_highlighting_javascript() {

 #[test]
 fn test_highlighting_injected_html_in_javascript() {
-    let source = vec!["const s = html `<div>${a < b}</div>`;"].join("\n");
+    let source = ["const s = html `<div>${a < b}</div>`;"].join("\n");

    assert_eq!(
        &to_token_vector(&source, &JS_HIGHLIGHT).unwrap(),
@ -156,7 +157,7 @@ fn test_highlighting_injected_javascript_in_html_mini() {

 #[test]
 fn test_highlighting_injected_javascript_in_html() {
-    let source = vec![
+    let source = [
        "<body>",
        "  <script>",
        "    const x = new Thing();",
@ -211,7 +212,7 @@ fn test_highlighting_injected_javascript_in_html() {

 #[test]
 fn test_highlighting_multiline_nodes_to_html() {
-    let source = vec![
+    let source = [
        "const SOMETHING = `",
        "  one ${",
        "    two()",
@ -235,7 +236,7 @@ fn test_highlighting_multiline_nodes_to_html() {

 #[test]
 fn test_highlighting_with_local_variable_tracking() {
-    let source = vec![
+    let source = [
        "module.exports = function a(b) {",
        "  const module = c;",
        "  console.log(module, b);",
@ -257,7 +258,7 @@ fn test_highlighting_with_local_variable_tracking() {
                (" ", vec![]),
                ("a", vec!["function"]),
                ("(", vec!["punctuation.bracket"]),
-                ("b", vec!["variable.parameter"]),
+                ("b", vec!["variable"]),
                (")", vec!["punctuation.bracket"]),
                (" ", vec![]),
                ("{", vec!["punctuation.bracket"])
@ -284,7 +285,7 @@ fn test_highlighting_with_local_variable_tracking() {
                (",", vec!["punctuation.delimiter"]),
                (" ", vec![]),
                // A parameter, because `b` was defined as a parameter above.
-                ("b", vec!["variable.parameter"]),
+                ("b", vec!["variable"]),
                (")", vec!["punctuation.bracket"]),
                (";", vec!["punctuation.delimiter"]),
            ],
@ -295,7 +296,7 @@ fn test_highlighting_with_local_variable_tracking() {

 #[test]
 fn test_highlighting_empty_lines() {
-    let source = vec![
+    let source = [
        "class A {",
        "",
        "  b(c) {",
@ -313,7 +314,7 @@ fn test_highlighting_empty_lines() {
        &[
            "<span class=keyword>class</span> <span class=constructor>A</span> <span class=punctuation.bracket>{</span>\n".to_string(),
            "\n".to_string(),
-            "  <span class=function>b</span><span class=punctuation.bracket>(</span><span class=variable.parameter>c</span><span class=punctuation.bracket>)</span> <span class=punctuation.bracket>{</span>\n".to_string(),
+            "  <span class=function>b</span><span class=punctuation.bracket>(</span><span class=variable>c</span><span class=punctuation.bracket>)</span> <span class=punctuation.bracket>{</span>\n".to_string(),
            "\n".to_string(),
            "    <span class=function>d</span><span class=punctuation.bracket>(</span><span class=variable>e</span><span class=punctuation.bracket>)</span>\n".to_string(),
            "\n".to_string(),
@ -329,7 +330,7 @@ fn test_highlighting_carriage_returns() {
    let source = "a = \"a\rb\"\r\nb\r";

    assert_eq!(
-        &to_html(&source, &JS_HIGHLIGHT).unwrap(),
+        &to_html(source, &JS_HIGHLIGHT).unwrap(),
        &[
            "<span class=variable>a</span> <span class=operator>=</span> <span class=string>&quot;a<span class=carriage-return></span>b&quot;</span>\n",
            "<span class=variable>b</span>\n",
@ -339,7 +340,7 @@ fn test_highlighting_carriage_returns() {

 #[test]
 fn test_highlighting_ejs_with_html_and_javascript() {
-    let source = vec!["<div><% foo() %></div><script> bar() </script>"].join("\n");
+    let source = ["<div><% foo() %></div><script> bar() </script>"].join("\n");

    assert_eq!(
        &to_token_vector(&source, &EJS_HIGHLIGHT).unwrap(),
@ -376,7 +377,7 @@ fn test_highlighting_ejs_with_html_and_javascript() {
 fn test_highlighting_javascript_with_jsdoc() {
    // Regression test: the middle comment has no highlights. This should not prevent
    // later injections from highlighting properly.
-    let source = vec!["a /* @see a */ b; /* nothing */ c; /* @see b */"].join("\n");
+    let source = ["a /* @see a */ b; /* nothing */ c; /* @see b */"].join("\n");

    assert_eq!(
        &to_token_vector(&source, &JS_HIGHLIGHT).unwrap(),
@ -404,7 +405,7 @@ fn test_highlighting_javascript_with_jsdoc() {

 #[test]
 fn test_highlighting_with_content_children_included() {
-    let source = vec!["assert!(", "    a.b.c() < D::e::<F>()", ");"].join("\n");
+    let source = ["assert!(", "    a.b.c() < D::e::<F>()", ");"].join("\n");

    assert_eq!(
        &to_token_vector(&source, &RUST_HIGHLIGHT).unwrap(),
@ -482,7 +483,7 @@ fn test_highlighting_cancellation() {

 #[test]
 fn test_highlighting_via_c_api() {
-    let highlights = vec![
+    let highlights = [
        "class=tag\0",
        "class=function\0",
        "class=string\0",
@ -496,68 +497,82 @@ fn test_highlighting_via_c_api() {
        .iter()
        .map(|h| h.as_bytes().as_ptr() as *const c_char)
        .collect::<Vec<_>>();
-    let highlighter = c::ts_highlighter_new(
-        &highlight_names[0] as *const *const c_char,
-        &highlight_attrs[0] as *const *const c_char,
-        highlights.len() as u32,
-    );
+    let highlighter = unsafe {
+        c::ts_highlighter_new(
+            &highlight_names[0] as *const *const c_char,
+            &highlight_attrs[0] as *const *const c_char,
+            highlights.len() as u32,
+        )
+    };

    let source_code = c_string("<script>\nconst a = b('c');\nc.d();\n</script>");

    let js_scope = c_string("source.js");
    let js_injection_regex = c_string("^javascript");
    let language = get_language("javascript");
+    let lang_name = c_string("javascript");
    let queries = get_language_queries_path("javascript");
    let highlights_query = fs::read_to_string(queries.join("highlights.scm")).unwrap();
    let injections_query = fs::read_to_string(queries.join("injections.scm")).unwrap();
    let locals_query = fs::read_to_string(queries.join("locals.scm")).unwrap();
-    c::ts_highlighter_add_language(
-        highlighter,
-        js_scope.as_ptr(),
-        js_injection_regex.as_ptr(),
-        language,
-        highlights_query.as_ptr() as *const c_char,
-        injections_query.as_ptr() as *const c_char,
-        locals_query.as_ptr() as *const c_char,
-        highlights_query.len() as u32,
-        injections_query.len() as u32,
-        locals_query.len() as u32,
-    );
+    unsafe {
+        c::ts_highlighter_add_language(
+            highlighter,
+            lang_name.as_ptr(),
+            js_scope.as_ptr(),
+            js_injection_regex.as_ptr(),
+            language,
+            highlights_query.as_ptr() as *const c_char,
+            injections_query.as_ptr() as *const c_char,
+            locals_query.as_ptr() as *const c_char,
+            highlights_query.len() as u32,
+            injections_query.len() as u32,
+            locals_query.len() as u32,
+            false,
+        );
+    }

    let html_scope = c_string("text.html.basic");
    let html_injection_regex = c_string("^html");
    let language = get_language("html");
+    let lang_name = c_string("html");
    let queries = get_language_queries_path("html");
    let highlights_query = fs::read_to_string(queries.join("highlights.scm")).unwrap();
    let injections_query = fs::read_to_string(queries.join("injections.scm")).unwrap();
-    c::ts_highlighter_add_language(
-        highlighter,
-        html_scope.as_ptr(),
-        html_injection_regex.as_ptr(),
-        language,
-        highlights_query.as_ptr() as *const c_char,
-        injections_query.as_ptr() as *const c_char,
-        ptr::null(),
-        highlights_query.len() as u32,
-        injections_query.len() as u32,
-        0,
-    );
+    unsafe {
+        c::ts_highlighter_add_language(
+            highlighter,
+            lang_name.as_ptr(),
+            html_scope.as_ptr(),
+            html_injection_regex.as_ptr(),
+            language,
+            highlights_query.as_ptr() as *const c_char,
+            injections_query.as_ptr() as *const c_char,
+            ptr::null(),
+            highlights_query.len() as u32,
+            injections_query.len() as u32,
+            0,
+            false,
+        );
+    }

    let buffer = c::ts_highlight_buffer_new();

-    c::ts_highlighter_highlight(
-        highlighter,
-        html_scope.as_ptr(),
-        source_code.as_ptr(),
-        source_code.as_bytes().len() as u32,
-        buffer,
-        ptr::null_mut(),
-    );
+    unsafe {
+        c::ts_highlighter_highlight(
+            highlighter,
+            html_scope.as_ptr(),
+            source_code.as_ptr(),
+            source_code.as_bytes().len() as u32,
+            buffer,
+            ptr::null_mut(),
+        );
+    }

-    let output_bytes = c::ts_highlight_buffer_content(buffer);
-    let output_line_offsets = c::ts_highlight_buffer_line_offsets(buffer);
-    let output_len = c::ts_highlight_buffer_len(buffer);
-    let output_line_count = c::ts_highlight_buffer_line_count(buffer);
+    let output_bytes = unsafe { c::ts_highlight_buffer_content(buffer) };
+    let output_line_offsets = unsafe { c::ts_highlight_buffer_line_offsets(buffer) };
+    let output_len = unsafe { c::ts_highlight_buffer_len(buffer) };
+    let output_line_count = unsafe { c::ts_highlight_buffer_line_count(buffer) };

    let output_bytes = unsafe { slice::from_raw_parts(output_bytes, output_len as usize) };
    let output_line_offsets =
@ -583,8 +598,69 @@ fn test_highlighting_via_c_api() {
        ]
    );

-    c::ts_highlighter_delete(highlighter);
-    c::ts_highlight_buffer_delete(buffer);
+    unsafe {
+        c::ts_highlighter_delete(highlighter);
+        c::ts_highlight_buffer_delete(buffer);
+    }
+}
+
+#[test]
+fn test_highlighting_with_all_captures_applied() {
+    let source = "fn main(a: u32, b: u32) -> { let c = a + b; }";
+    let language = get_language("rust");
+    let highlights_query = indoc::indoc! {"
+        [
+          \"fn\"
+          \"let\"
+        ] @keyword
+        (identifier) @variable
+        (function_item name: (identifier) @function)
+        (parameter pattern: (identifier) @variable.parameter)
+        (primitive_type) @type.builtin
+        \"=\" @operator
+        [ \"->\" \":\" \";\" ] @punctuation.delimiter
+        [ \"{\" \"}\" \"(\" \")\" ] @punctuation.bracket
+    "};
+    let mut rust_highlight_reverse =
+        HighlightConfiguration::new(language, "rust", highlights_query, "", "", true).unwrap();
+    rust_highlight_reverse.configure(&HIGHLIGHT_NAMES);
+
+    assert_eq!(
+        &to_token_vector(source, &rust_highlight_reverse).unwrap(),
+        &[[
+            ("fn", vec!["keyword"]),
+            (" ", vec![]),
+            ("main", vec!["function"]),
+            ("(", vec!["punctuation.bracket"]),
+            ("a", vec!["variable.parameter"]),
+            (":", vec!["punctuation.delimiter"]),
+            (" ", vec![]),
+            ("u32", vec!["type.builtin"]),
+            (", ", vec![]),
+            ("b", vec!["variable.parameter"]),
+            (":", vec!["punctuation.delimiter"]),
+            (" ", vec![]),
+            ("u32", vec!["type.builtin"]),
+            (")", vec!["punctuation.bracket"]),
+            (" ", vec![]),
+            ("->", vec!["punctuation.delimiter"]),
+            (" ", vec![]),
+            ("{", vec!["punctuation.bracket"]),
+            (" ", vec![]),
+            ("let", vec!["keyword"]),
+            (" ", vec![]),
+            ("c", vec!["variable"]),
+            (" ", vec![]),
+            ("=", vec!["operator"]),
+            (" ", vec![]),
+            ("a", vec!["variable"]),
+            (" + ", vec![]),
+            ("b", vec!["variable"]),
+            (";", vec!["punctuation.delimiter"]),
+            (" ", vec![]),
+            ("}", vec!["punctuation.bracket"])
+        ]],
+    );
 }

 #[test]
@ -667,20 +743,20 @@ fn to_token_vector<'a>(
            }
            HighlightEvent::Source { start, end } => {
                let s = str::from_utf8(&src[start..end]).unwrap();
-                for (i, l) in s.split("\n").enumerate() {
+                for (i, l) in s.split('\n').enumerate() {
                    let l = l.trim_end_matches('\r');
                    if i > 0 {
                        lines.push(line);
                        line = Vec::new();
                    }
-                    if l.len() > 0 {
+                    if !l.is_empty() {
                        line.push((l, highlights.clone()));
                    }
                }
            }
        }
    }
-    if line.len() > 0 {
+    if !line.is_empty() {
        lines.push(line);
    }
    Ok(lines)
--- a/cli/src/tests/language_test.rs
+++ b/cli/src/tests/language_test.rs
@ -0,0 +1,95 @@
+use super::helpers::fixtures::get_language;
+use tree_sitter::Parser;
+
+#[test]
+fn test_lookahead_iterator() {
+    let mut parser = Parser::new();
+    let language = get_language("rust");
+    parser.set_language(language).unwrap();
+
+    let tree = parser.parse("struct Stuff {}", None).unwrap();
+
+    let mut cursor = tree.walk();
+
+    assert!(cursor.goto_first_child()); // struct
+    assert!(cursor.goto_first_child()); // struct keyword
+
+    let next_state = cursor.node().next_parse_state();
+    assert_ne!(next_state, 0);
+    assert_eq!(
+        next_state,
+        language.next_state(cursor.node().parse_state(), cursor.node().grammar_id())
+    );
+    assert!((next_state as usize) < language.parse_state_count());
+    assert!(cursor.goto_next_sibling()); // type_identifier
+    assert_eq!(next_state, cursor.node().parse_state());
+    assert_eq!(cursor.node().grammar_name(), "identifier");
+    assert_ne!(cursor.node().grammar_id(), cursor.node().kind_id());
+
+    let expected_symbols = ["identifier", "block_comment", "line_comment"];
+    let mut lookahead = language.lookahead_iterator(next_state).unwrap();
+    assert_eq!(lookahead.language(), language);
+    assert!(lookahead.iter_names().eq(expected_symbols));
+
+    lookahead.reset_state(next_state);
+    assert!(lookahead.iter_names().eq(expected_symbols));
+
+    lookahead.reset(language, next_state);
+    assert!(lookahead
+        .map(|s| language.node_kind_for_id(s).unwrap())
+        .eq(expected_symbols));
+}
+
+#[test]
+fn test_lookahead_iterator_modifiable_only_by_mut() {
+    let mut parser = Parser::new();
+    let language = get_language("rust");
+    parser.set_language(language).unwrap();
+
+    let tree = parser.parse("struct Stuff {}", None).unwrap();
+
+    let mut cursor = tree.walk();
+
+    assert!(cursor.goto_first_child()); // struct
+    assert!(cursor.goto_first_child()); // struct keyword
+
+    let next_state = cursor.node().next_parse_state();
+    assert_ne!(next_state, 0);
+
+    let mut lookahead = language.lookahead_iterator(next_state).unwrap();
+    let _ = lookahead.next();
+
+    let mut names = lookahead.iter_names();
+    let _ = names.next();
+}
+
+/// It doesn't allowed to use lookahead iterator by shared ref:
+///     error[E0596]: cannot borrow `lookahead` as mutable, as it is not declared as mutable
+/// ```compile_fail
+/// use tree_sitter::{Parser, Language};
+/// let mut parser = Parser::new();
+/// let language = unsafe { Language::from_raw(std::ptr::null()) };
+/// let tree = parser.parse("", None).unwrap();
+/// let mut cursor = tree.walk();
+/// let next_state = cursor.node().next_parse_state();
+/// let lookahead = language.lookahead_iterator(next_state).unwrap();
+/// let _ = lookahead.next();
+/// ```
+
+/// It doesn't allowed to use lookahead names iterator by shared ref:
+///     error[E0596]: cannot borrow `names` as mutable, as it is not declared as mutable
+/// ```compile_fail
+/// use tree_sitter::{Parser, Language};
+/// let mut parser = Parser::new();
+/// let language = unsafe { Language::from_raw(std::ptr::null()) };
+/// let tree = parser.parse("", None).unwrap();
+/// let mut cursor = tree.walk();
+/// let next_state = cursor.node().next_parse_state();
+/// if let Some(mut lookahead) = language.lookahead_iterator(next_state) {
+///     let _ = lookahead.next();
+///     let names = lookahead.iter_names();
+///     let _ = names.next();
+/// }
+/// ```
+
+fn _dummy() {}
--- a/cli/src/tests/mod.rs
+++ b/cli/src/tests/mod.rs
@ -1,11 +1,16 @@
+mod async_context_test;
 mod corpus_test;
+mod github_issue_test;
 mod helpers;
 mod highlight_test;
+mod language_test;
 mod node_test;
+mod parser_hang_test;
 mod parser_test;
 mod pathological_test;
 mod query_test;
 mod tags_test;
 mod test_highlight_test;
 mod test_tags_test;
+mod text_provider_test;
 mod tree_test;
--- a/cli/src/tests/node_test.rs
+++ b/cli/src/tests/node_test.rs
@ -252,12 +252,14 @@ fn test_node_parent_of_child_by_field_name() {
 fn test_node_field_name_for_child() {
    let mut parser = Parser::new();
    parser.set_language(get_language("c")).unwrap();
-    let tree = parser.parse("x + y;", None).unwrap();
+    let tree = parser.parse("int w = x + y;", None).unwrap();
    let translation_unit_node = tree.root_node();
-    let binary_expression_node = translation_unit_node
-        .named_child(0)
+    let declaration_node = translation_unit_node.named_child(0).unwrap();
+
+    let binary_expression_node = declaration_node
+        .child_by_field_name("declarator")
        .unwrap()
-        .named_child(0)
+        .child_by_field_name("value")
        .unwrap();

    assert_eq!(binary_expression_node.field_name_for_child(0), Some("left"));
@ -385,10 +387,52 @@ fn test_node_named_child_with_aliases_and_extras() {
    assert_eq!(root.named_child(4).unwrap().kind(), "C");
 }

+#[test]
+fn test_node_descendant_count() {
+    let tree = parse_json_example();
+    let value_node = tree.root_node();
+    let all_nodes = get_all_nodes(&tree);
+
+    assert_eq!(value_node.descendant_count(), all_nodes.len());
+
+    let mut cursor = value_node.walk();
+    for (i, node) in all_nodes.iter().enumerate() {
+        cursor.goto_descendant(i);
+        assert_eq!(cursor.node(), *node, "index {i}");
+    }
+
+    for (i, node) in all_nodes.iter().enumerate().rev() {
+        cursor.goto_descendant(i);
+        assert_eq!(cursor.node(), *node, "rev index {i}");
+    }
+}
+
+#[test]
+fn test_descendant_count_single_node_tree() {
+    let mut parser = Parser::new();
+    parser
+        .set_language(get_language("embedded-template"))
+        .unwrap();
+    let tree = parser.parse("hello", None).unwrap();
+
+    let nodes = get_all_nodes(&tree);
+    assert_eq!(nodes.len(), 2);
+    assert_eq!(tree.root_node().descendant_count(), 2);
+
+    let mut cursor = tree.root_node().walk();
+
+    cursor.goto_descendant(0);
+    assert_eq!(cursor.depth(), 0);
+    assert_eq!(cursor.node(), nodes[0]);
+    cursor.goto_descendant(1);
+    assert_eq!(cursor.depth(), 1);
+    assert_eq!(cursor.node(), nodes[1]);
+}
+
 #[test]
 fn test_node_descendant_for_range() {
    let tree = parse_json_example();
-    let array_node = tree.root_node().child(0).unwrap();
+    let array_node = tree.root_node();

    // Leaf node exactly matches the given bounds - byte query
    let colon_index = JSON_EXAMPLE.find(":").unwrap();
@ -508,7 +552,7 @@ fn test_node_edit() {

        let edit = get_random_edit(&mut rand, &mut code);
        let mut tree2 = tree.clone();
-        let edit = perform_edit(&mut tree2, &mut code, &edit);
+        let edit = perform_edit(&mut tree2, &mut code, &edit).unwrap();
        for node in nodes_before.iter_mut() {
            node.edit(&edit);
        }
@ -841,15 +885,17 @@ fn get_all_nodes(tree: &Tree) -> Vec<Node> {
    let mut visited_children = false;
    let mut cursor = tree.walk();
    loop {
-        result.push(cursor.node());
-        if !visited_children && cursor.goto_first_child() {
-            continue;
-        } else if cursor.goto_next_sibling() {
-            visited_children = false;
-        } else if cursor.goto_parent() {
-            visited_children = true;
+        if !visited_children {
+            result.push(cursor.node());
+            if !cursor.goto_first_child() {
+                visited_children = true;
+            }
        } else {
-            break;
+            if cursor.goto_next_sibling() {
+                visited_children = false;
+            } else if !cursor.goto_parent() {
+                break;
+            }
        }
    }
    return result;
--- a/cli/src/tests/parser_hang_test.rs
+++ b/cli/src/tests/parser_hang_test.rs
@ -0,0 +1,104 @@
+// For some reasons `Command::spawn` doesn't work in CI env for many exotic arches.
+#![cfg(all(any(target_arch = "x86_64", target_arch = "x86"), not(sanitizing)))]
+
+use crate::{
+    generate::{generate_parser_for_grammar, load_grammar_file},
+    tests::helpers::fixtures::{fixtures_dir, get_test_language},
+};
+use std::{
+    env::VarError,
+    process::{Command, Stdio},
+};
+use tree_sitter::Parser;
+
+// The `sanitizing` cfg is required to don't run tests under specific sunitizer
+// because they don't work well with subprocesses _(it's an assumption)_.
+//
+// Bellow are two alternative examples of how to disable tests for some arches
+// if a way with excluding the whole mod from compilation wouldn't work well.
+//
+// XXX: Also may be it makes sense to keep such tests as ignored by default
+//      to omit surprises and enable them on CI by passing an extra option explicitly:
+//
+//        > cargo test -- --include-ignored
+//
+// #[cfg(all(any(target_arch = "x86_64", target_arch = "x86"), not(sanitizing)))]
+// #[cfg_attr(not(all(any(target_arch = "x86_64", target_arch = "x86"), not(sanitizing))), ignore)]
+//
+#[test]
+fn test_grammar_that_should_hang_and_not_segfault() {
+    let parent_sleep_millis = 1000;
+    let test_name = "test_grammar_that_should_hang_and_not_segfault";
+    let test_var = "CARGO_HANG_TEST";
+
+    eprintln!("  {test_name}");
+
+    let tests_exec_path = std::env::args()
+        .nth(0)
+        .expect("Failed get get tests executable path");
+
+    match std::env::var(test_var) {
+        Ok(v) if v == test_name => {
+            eprintln!("    child process id {}", std::process::id());
+            hang_test();
+        }
+
+        Err(VarError::NotPresent) => {
+            eprintln!("    parent process id {}", std::process::id());
+            if true {
+                let mut command = Command::new(tests_exec_path);
+                command.arg(test_name).env(test_var, test_name);
+                if std::env::args().any(|x| x == "--nocapture") {
+                    command.arg("--nocapture");
+                } else {
+                    command.stdout(Stdio::null()).stderr(Stdio::null());
+                }
+                match command.spawn() {
+                    Ok(mut child) => {
+                        std::thread::sleep(std::time::Duration::from_millis(parent_sleep_millis));
+                        match child.try_wait() {
+                            Ok(Some(status)) if status.success() => {
+                                panic!("Child wasn't hang and exited successfully")
+                            }
+                            Ok(Some(status)) => panic!(
+                                "Child wasn't hang and exited with status code: {:?}",
+                                status.code()
+                            ),
+                            _ => (),
+                        }
+                        if let Err(e) = child.kill() {
+                            eprintln!(
+                                "Failed to kill hang test sub process id: {}, error: {e}",
+                                child.id()
+                            );
+                        }
+                    }
+                    Err(e) => panic!("{e}"),
+                }
+            }
+        }
+
+        Err(e) => panic!("Env var error: {e}"),
+        _ => unreachable!(),
+    }
+
+    fn hang_test() {
+        let test_grammar_dir = fixtures_dir()
+            .join("test_grammars")
+            .join("get_col_should_hang_not_crash");
+
+        let grammar_json = load_grammar_file(&test_grammar_dir.join("grammar.js"), None).unwrap();
+        let (parser_name, parser_code) =
+            generate_parser_for_grammar(grammar_json.as_str()).unwrap();
+
+        let language =
+            get_test_language(&parser_name, &parser_code, Some(test_grammar_dir.as_path()));
+
+        let mut parser = Parser::new();
+        parser.set_language(language).unwrap();
+
+        let code_that_should_hang = "\nHello";
+
+        parser.parse(code_that_should_hang, None).unwrap();
+    }
+}
--- a/cli/src/tests/parser_test.rs
+++ b/cli/src/tests/parser_test.rs
@ -15,6 +15,7 @@ use std::{
    thread, time,
 };
 use tree_sitter::{IncludedRangesError, InputEdit, LogType, Parser, Point, Range};
+use tree_sitter_proc_macro::retry;

 #[test]
 fn test_parsing_simple_string() {
@ -149,7 +150,7 @@ fn test_parsing_with_custom_utf8_input() {
        )
    );
    assert_eq!(root.kind(), "source_file");
-    assert_eq!(root.has_error(), false);
+    assert!(!root.has_error());
    assert_eq!(root.child(0).unwrap().kind(), "function_item");
 }

@ -188,7 +189,7 @@ fn test_parsing_with_custom_utf16_input() {
        "(source_file (function_item (visibility_modifier) name: (identifier) parameters: (parameters) body: (block (integer_literal))))"
    );
    assert_eq!(root.kind(), "source_file");
-    assert_eq!(root.has_error(), false);
+    assert!(!root.has_error());
    assert_eq!(root.child(0).unwrap().kind(), "function_item");
 }

@ -277,7 +278,10 @@ fn test_parsing_invalid_chars_at_eof() {
    let mut parser = Parser::new();
    parser.set_language(get_language("json")).unwrap();
    let tree = parser.parse(b"\xdf", None).unwrap();
-    assert_eq!(tree.root_node().to_sexp(), "(ERROR (UNEXPECTED INVALID))");
+    assert_eq!(
+        tree.root_node().to_sexp(),
+        "(document (ERROR (UNEXPECTED INVALID)))"
+    );
 }

 #[test]
@ -340,7 +344,8 @@ fn test_parsing_after_editing_beginning_of_code() {
            deleted_length: 0,
            inserted_text: b" || 5".to_vec(),
        },
-    );
+    )
+    .unwrap();

    let mut recorder = ReadRecorder::new(&code);
    let tree = parser
@ -387,7 +392,8 @@ fn test_parsing_after_editing_end_of_code() {
            deleted_length: 0,
            inserted_text: b".d".to_vec(),
        },
-    );
+    )
+    .unwrap();

    let mut recorder = ReadRecorder::new(&code);
    let tree = parser
@ -466,7 +472,8 @@ h + i
            deleted_length: 0,
            inserted_text: b"1234".to_vec(),
        },
-    );
+    )
+    .unwrap();

    assert_eq!(
        code,
@ -511,7 +518,7 @@ fn test_parsing_after_detecting_error_in_the_middle_of_a_string_token() {
    let tree = parser.parse(&source, None).unwrap();
    assert_eq!(
        tree.root_node().to_sexp(),
-        "(module (expression_statement (assignment left: (identifier) right: (expression_list (identifier) (string)))))"
+        "(module (expression_statement (assignment left: (identifier) right: (expression_list (identifier) (string (string_start) (string_content) (string_end))))))"
    );

    // Delete a suffix of the source code, starting in the middle of the string
@ -530,12 +537,12 @@ fn test_parsing_after_detecting_error_in_the_middle_of_a_string_token() {
    let undo = invert_edit(&source, &edit);

    let mut tree2 = tree.clone();
-    perform_edit(&mut tree2, &mut source, &edit);
+    perform_edit(&mut tree2, &mut source, &edit).unwrap();
    tree2 = parser.parse(&source, Some(&tree2)).unwrap();
    assert!(tree2.root_node().has_error());

    let mut tree3 = tree2.clone();
-    perform_edit(&mut tree3, &mut source, &undo);
+    perform_edit(&mut tree3, &mut source, &undo).unwrap();
    tree3 = parser.parse(&source, Some(&tree3)).unwrap();
    assert_eq!(tree3.root_node().to_sexp(), tree.root_node().to_sexp(),);
 }
@ -644,6 +651,7 @@ fn test_parsing_cancelled_by_another_thread() {
 // Timeouts

 #[test]
+#[retry(10)]
 fn test_parsing_with_a_timeout() {
    let mut parser = Parser::new();
    parser.set_language(get_language("json")).unwrap();
@ -662,8 +670,12 @@ fn test_parsing_with_a_timeout() {
        None,
    );
    assert!(tree.is_none());
+    #[cfg(not(target_arch = "sparc64"))]
    assert!(start_time.elapsed().as_micros() < 2000);

+    #[cfg(target_arch = "sparc64")]
+    assert!(start_time.elapsed().as_micros() < 8000);
+
    // Continue parsing, but pause after 1 ms of processing.
    parser.set_timeout_micros(5000);
    let start_time = time::Instant::now();
@ -701,6 +713,7 @@ fn test_parsing_with_a_timeout() {
 }

 #[test]
+#[retry(10)]
 fn test_parsing_with_a_timeout_and_a_reset() {
    let mut parser = Parser::new();
    parser.set_language(get_language("json")).unwrap();
@ -756,6 +769,7 @@ fn test_parsing_with_a_timeout_and_a_reset() {
 }

 #[test]
+#[retry(10)]
 fn test_parsing_with_a_timeout_and_implicit_reset() {
    allocations::record(|| {
        let mut parser = Parser::new();
@ -789,6 +803,7 @@ fn test_parsing_with_a_timeout_and_implicit_reset() {
 }

 #[test]
+#[retry(10)]
 fn test_parsing_with_timeout_and_no_completion() {
    allocations::record(|| {
        let mut parser = Parser::new();
@ -828,7 +843,7 @@ fn test_parsing_with_one_included_range() {
        concat!(
            "(program (expression_statement (call_expression ",
            "function: (member_expression object: (identifier) property: (property_identifier)) ",
-            "arguments: (arguments (string)))))",
+            "arguments: (arguments (string (string_fragment))))))",
        )
    );
    assert_eq!(
@ -1177,7 +1192,7 @@ fn test_parsing_with_a_newly_included_range() {
        .set_included_ranges(&[simple_range(range1_start, range1_end)])
        .unwrap();
    let tree = parser
-        .parse_with(&mut chunked_input(&source_code, 3), None)
+        .parse_with(&mut chunked_input(source_code, 3), None)
        .unwrap();
    assert_eq!(
        tree.root_node().to_sexp(),
@ -1196,7 +1211,7 @@ fn test_parsing_with_a_newly_included_range() {
        ])
        .unwrap();
    let tree2 = parser
-        .parse_with(&mut chunked_input(&source_code, 3), Some(&tree))
+        .parse_with(&mut chunked_input(source_code, 3), Some(&tree))
        .unwrap();
    assert_eq!(
        tree2.root_node().to_sexp(),
@ -1220,7 +1235,7 @@ fn test_parsing_with_a_newly_included_range() {
            simple_range(range3_start, range3_end),
        ])
        .unwrap();
-    let tree3 = parser.parse(&source_code, Some(&tree)).unwrap();
+    let tree3 = parser.parse(source_code, Some(&tree)).unwrap();
    assert_eq!(
        tree3.root_node().to_sexp(),
        concat!(
@ -1297,6 +1312,85 @@ fn test_parsing_with_included_ranges_and_missing_tokens() {
    assert_eq!(root.child(3).unwrap().start_byte(), 4);
 }

+#[test]
+fn test_grammars_that_can_hang_on_eof() {
+    let (parser_name, parser_code) = generate_parser_for_grammar(
+        r#"
+        {
+            "name": "test_single_null_char_regex",
+            "rules": {
+                "source_file": {
+                    "type": "SEQ",
+                    "members": [
+                        { "type": "STRING", "value": "\"" },
+                        { "type": "PATTERN", "value": "[\\x00]*" },
+                        { "type": "STRING", "value": "\"" }
+                    ]
+                }
+            },
+            "extras": [ { "type": "PATTERN", "value": "\\s" } ]
+        }
+        "#,
+    )
+    .unwrap();
+
+    let mut parser = Parser::new();
+    parser
+        .set_language(get_test_language(&parser_name, &parser_code, None))
+        .unwrap();
+    parser.parse("\"", None).unwrap();
+
+    let (parser_name, parser_code) = generate_parser_for_grammar(
+        r#"
+        {
+            "name": "test_null_char_with_next_char_regex",
+            "rules": {
+                "source_file": {
+                    "type": "SEQ",
+                    "members": [
+                        { "type": "STRING", "value": "\"" },
+                        { "type": "PATTERN", "value": "[\\x00-\\x01]*" },
+                        { "type": "STRING", "value": "\"" }
+                    ]
+                }
+            },
+            "extras": [ { "type": "PATTERN", "value": "\\s" } ]
+        }
+        "#,
+    )
+    .unwrap();
+
+    parser
+        .set_language(get_test_language(&parser_name, &parser_code, None))
+        .unwrap();
+    parser.parse("\"", None).unwrap();
+
+    let (parser_name, parser_code) = generate_parser_for_grammar(
+        r#"
+        {
+            "name": "test_null_char_with_range_regex",
+            "rules": {
+                "source_file": {
+                    "type": "SEQ",
+                    "members": [
+                        { "type": "STRING", "value": "\"" },
+                        { "type": "PATTERN", "value": "[\\x00-\\x7F]*" },
+                        { "type": "STRING", "value": "\"" }
+                    ]
+                }
+            },
+            "extras": [ { "type": "PATTERN", "value": "\\s" } ]
+        }
+        "#,
+    )
+    .unwrap();
+
+    parser
+        .set_language(get_test_language(&parser_name, &parser_code, None))
+        .unwrap();
+    parser.parse("\"", None).unwrap();
+}
+
 fn simple_range(start: usize, end: usize) -> Range {
    Range {
        start_byte: start,
--- a/cli/src/tests/proc_macro/Cargo.toml
+++ b/cli/src/tests/proc_macro/Cargo.toml
@ -0,0 +1,15 @@
+[package]
+name = "tree-sitter-tests-proc-macro"
+version = "0.0.0"
+edition = "2021"
+publish = false
+rust-version.workspace = true
+
+[lib]
+proc-macro = true
+
+[dependencies]
+proc-macro2 = "1.0.63"
+quote = "1"
+rand = "0.8.5"
+syn = { version = "1", features = ["full"] }
--- a/cli/src/tests/proc_macro/src/lib.rs
+++ b/cli/src/tests/proc_macro/src/lib.rs
@ -0,0 +1,137 @@
+use proc_macro::TokenStream;
+use proc_macro2::Span;
+use quote::quote;
+use syn::{
+    parse::{Parse, ParseStream},
+    parse_macro_input, Error, Expr, Ident, ItemFn, LitInt, Token,
+};
+
+#[proc_macro_attribute]
+pub fn retry(args: TokenStream, input: TokenStream) -> TokenStream {
+    let count = parse_macro_input!(args as LitInt);
+    let input = parse_macro_input!(input as ItemFn);
+    let attrs = input.attrs.clone();
+    let name = input.sig.ident.clone();
+
+    TokenStream::from(quote! {
+        #(#attrs),*
+        fn #name() {
+            #input
+
+            for i in 0..=#count {
+                let result = std::panic::catch_unwind(|| {
+                    #name();
+                });
+
+                if result.is_ok() {
+                    return;
+                }
+
+                if i == #count {
+                    std::panic::resume_unwind(result.unwrap_err());
+                }
+            }
+        }
+    })
+}
+
+#[proc_macro_attribute]
+pub fn test_with_seed(args: TokenStream, input: TokenStream) -> TokenStream {
+    struct Args {
+        retry: LitInt,
+        seed: Expr,
+        seed_fn: Option<Ident>,
+    }
+
+    impl Parse for Args {
+        fn parse(input: ParseStream) -> syn::Result<Self> {
+            let mut retry = None;
+            let mut seed = None;
+            let mut seed_fn = None;
+
+            while !input.is_empty() {
+                let name = input.parse::<Ident>()?;
+                match name.to_string().as_str() {
+                    "retry" => {
+                        input.parse::<Token![=]>()?;
+                        retry.replace(input.parse()?);
+                    }
+                    "seed" => {
+                        input.parse::<Token![=]>()?;
+                        seed.replace(input.parse()?);
+                    }
+                    "seed_fn" => {
+                        input.parse::<Token![=]>()?;
+                        seed_fn.replace(input.parse()?);
+                    }
+                    x => {
+                        return Err(Error::new(
+                            name.span(),
+                            format!("Unsupported parameter `{x}`"),
+                        ))
+                    }
+                }
+
+                if !input.is_empty() {
+                    input.parse::<Token![,]>()?;
+                }
+            }
+
+            if retry.is_none() {
+                retry.replace(LitInt::new("0", Span::mixed_site()));
+            }
+
+            Ok(Args {
+                retry: retry.expect("`retry` parameter is required"),
+                seed: seed.expect("`seed` parameter is required"),
+                seed_fn,
+            })
+        }
+    }
+
+    let Args {
+        retry,
+        seed,
+        seed_fn,
+    } = parse_macro_input!(args as Args);
+
+    let seed_fn = seed_fn.iter();
+
+    let func = parse_macro_input!(input as ItemFn);
+    let attrs = func.attrs.clone();
+    let name = func.sig.ident.clone();
+
+    // dbg!(quote::ToTokens::into_token_stream(&func));
+
+    TokenStream::from(quote! {
+        #[test]
+        #(#attrs),*
+        fn #name() {
+            #func
+
+            let mut seed = #seed;
+
+            for i in 0..=#retry {
+                let result = std::panic::catch_unwind(|| {
+                    #name(seed);
+                });
+
+                if result.is_ok() {
+                    return;
+                }
+
+                if i == #retry {
+                    std::panic::resume_unwind(result.unwrap_err());
+                }
+
+                #(
+                    seed = #seed_fn();
+                )*
+
+                if i < #retry {
+                    println!("\nRetry {}/{} with a new seed {}", i + 1, #retry, seed);
+                }
+            }
+        }
+    })
+}
--- a/cli/src/tests/query_test.rs
+++ b/cli/src/tests/query_test.rs
--- a/cli/src/tests/tags_test.rs
+++ b/cli/src/tests/tags_test.rs
@ -9,7 +9,7 @@ use std::{
 use tree_sitter::Point;
 use tree_sitter_tags::{c_lib as c, Error, TagsConfiguration, TagsContext};

-const PYTHON_TAG_QUERY: &'static str = r#"
+const PYTHON_TAG_QUERY: &str = r#"
 (
  (function_definition
    name: (identifier) @name
@ -39,7 +39,7 @@ const PYTHON_TAG_QUERY: &'static str = r#"
    attribute: (identifier) @name)) @reference.call
 "#;

-const JS_TAG_QUERY: &'static str = r#"
+const JS_TAG_QUERY: &str = r#"
 (
    (comment)* @doc .
    (class_declaration
@ -68,7 +68,7 @@ const JS_TAG_QUERY: &'static str = r#"
    function: (identifier) @name) @reference.call
 "#;

-const RUBY_TAG_QUERY: &'static str = r#"
+const RUBY_TAG_QUERY: &str = r#"
 (method
    name: (_) @name) @definition.method

@ -359,25 +359,29 @@ fn test_tags_via_c_api() {
        );

        let c_scope_name = CString::new(scope_name).unwrap();
-        let result = c::ts_tagger_add_language(
-            tagger,
-            c_scope_name.as_ptr(),
-            language,
-            JS_TAG_QUERY.as_ptr(),
-            ptr::null(),
-            JS_TAG_QUERY.len() as u32,
-            0,
-        );
+        let result = unsafe {
+            c::ts_tagger_add_language(
+                tagger,
+                c_scope_name.as_ptr(),
+                language,
+                JS_TAG_QUERY.as_ptr(),
+                ptr::null(),
+                JS_TAG_QUERY.len() as u32,
+                0,
+            )
+        };
        assert_eq!(result, c::TSTagsError::Ok);

-        let result = c::ts_tagger_tag(
-            tagger,
-            c_scope_name.as_ptr(),
-            source_code.as_ptr(),
-            source_code.len() as u32,
-            buffer,
-            ptr::null(),
-        );
+        let result = unsafe {
+            c::ts_tagger_tag(
+                tagger,
+                c_scope_name.as_ptr(),
+                source_code.as_ptr(),
+                source_code.len() as u32,
+                buffer,
+                ptr::null(),
+            )
+        };
        assert_eq!(result, c::TSTagsError::Ok);
        let tags = unsafe {
            slice::from_raw_parts(
@ -419,8 +423,10 @@ fn test_tags_via_c_api() {
            ]
        );

-        c::ts_tags_buffer_delete(buffer);
-        c::ts_tagger_delete(tagger);
+        unsafe {
+            c::ts_tags_buffer_delete(buffer);
+            c::ts_tagger_delete(tagger);
+        }
    });
 }

--- a/cli/src/tests/test_highlight_test.rs
+++ b/cli/src/tests/test_highlight_test.rs
@ -12,7 +12,7 @@ fn test_highlight_test_with_basic_test() {
        Some("injections.scm"),
        &[
            "function".to_string(),
-            "variable.parameter".to_string(),
+            "variable".to_string(),
            "keyword".to_string(),
        ],
    );
@ -22,7 +22,8 @@ fn test_highlight_test_with_basic_test() {
        "  // ^ function",
        "  //       ^ keyword",
        "  return d + e;",
-        "  //     ^ variable.parameter",
+        "  //     ^ variable",
+        "  //       ^ !variable",
        "};",
    ]
    .join("\n");
@ -32,18 +33,10 @@ fn test_highlight_test_with_basic_test() {
    assert_eq!(
        assertions,
        &[
-            Assertion {
-                position: Point::new(1, 5),
-                expected_capture_name: "function".to_string()
-            },
-            Assertion {
-                position: Point::new(1, 11),
-                expected_capture_name: "keyword".to_string()
-            },
-            Assertion {
-                position: Point::new(4, 9),
-                expected_capture_name: "variable.parameter".to_string()
-            },
+            Assertion::new(1, 5, false, String::from("function")),
+            Assertion::new(1, 11, false, String::from("keyword")),
+            Assertion::new(4, 9, false, String::from("variable")),
+            Assertion::new(4, 11, true, String::from("variable")),
        ]
    );

@ -60,6 +53,7 @@ fn test_highlight_test_with_basic_test() {
            (Point::new(1, 19), Point::new(1, 20), Highlight(1)), // "d"
            (Point::new(4, 2), Point::new(4, 8), Highlight(2)), // "return"
            (Point::new(4, 9), Point::new(4, 10), Highlight(1)), // "d"
+            (Point::new(4, 13), Point::new(4, 14), Highlight(1)), // "e"
        ]
    );
 }
--- a/cli/src/tests/test_tags_test.rs
+++ b/cli/src/tests/test_tags_test.rs
@ -16,6 +16,7 @@ fn test_tags_test_with_basic_test() {
        "    #    ^ reference.call",
        "    return d(e)",
        "    #      ^ reference.call",
+        "    #        ^ !variable.parameter",
        "",
    ]
    .join("\n");
@ -26,18 +27,10 @@ fn test_tags_test_with_basic_test() {
    assert_eq!(
        assertions,
        &[
-            Assertion {
-                position: Point::new(1, 4),
-                expected_capture_name: "definition.function".to_string(),
-            },
-            Assertion {
-                position: Point::new(3, 9),
-                expected_capture_name: "reference.call".to_string(),
-            },
-            Assertion {
-                position: Point::new(5, 11),
-                expected_capture_name: "reference.call".to_string(),
-            },
+            Assertion::new(1, 4, false, String::from("definition.function")),
+            Assertion::new(3, 9, false, String::from("reference.call")),
+            Assertion::new(5, 11, false, String::from("reference.call")),
+            Assertion::new(5, 13, true, String::from("variable.parameter")),
        ]
    );

--- a/cli/src/tests/text_provider_test.rs
+++ b/cli/src/tests/text_provider_test.rs
@ -0,0 +1,173 @@
+use std::{iter, sync::Arc};
+
+use crate::tests::helpers::fixtures::get_language;
+use tree_sitter::{Language, Node, Parser, Point, Query, QueryCursor, TextProvider, Tree};
+
+fn parse_text(text: impl AsRef<[u8]>) -> (Tree, Language) {
+    let language = get_language("c");
+    let mut parser = Parser::new();
+    parser.set_language(language).unwrap();
+    (parser.parse(text, None).unwrap(), language)
+}
+
+fn parse_text_with<T, F>(callback: &mut F) -> (Tree, Language)
+where
+    T: AsRef<[u8]>,
+    F: FnMut(usize, Point) -> T,
+{
+    let language = get_language("c");
+    let mut parser = Parser::new();
+    parser.set_language(language).unwrap();
+    let tree = parser.parse_with(callback, None).unwrap();
+    // eprintln!("{}", tree.clone().root_node().to_sexp());
+    assert_eq!("comment", tree.clone().root_node().child(0).unwrap().kind());
+    (tree, language)
+}
+
+fn tree_query<I: AsRef<[u8]>>(tree: &Tree, text: impl TextProvider<I>, language: Language) {
+    let query = Query::new(language, "((comment) @c (#eq? @c \"// comment\"))").unwrap();
+    let mut cursor = QueryCursor::new();
+    let mut captures = cursor.captures(&query, tree.root_node(), text);
+    let (match_, idx) = captures.next().unwrap();
+    let capture = match_.captures[idx];
+    assert_eq!(capture.index as usize, idx);
+    assert_eq!("comment", capture.node.kind());
+}
+
+fn check_parsing<I: AsRef<[u8]>>(
+    parser_text: impl AsRef<[u8]>,
+    text_provider: impl TextProvider<I>,
+) {
+    let (tree, language) = parse_text(parser_text);
+    tree_query(&tree, text_provider, language);
+}
+
+fn check_parsing_callback<T, F, I: AsRef<[u8]>>(
+    parser_callback: &mut F,
+    text_provider: impl TextProvider<I>,
+) where
+    T: AsRef<[u8]>,
+    F: FnMut(usize, Point) -> T,
+{
+    let (tree, language) = parse_text_with(parser_callback);
+    tree_query(&tree, text_provider, language);
+}
+
+#[test]
+fn test_text_provider_for_str_slice() {
+    let text: &str = "// comment";
+
+    check_parsing(text, text.as_bytes());
+    check_parsing(text.as_bytes(), text.as_bytes());
+}
+
+#[test]
+fn test_text_provider_for_string() {
+    let text: String = "// comment".to_owned();
+
+    check_parsing(text.clone(), text.as_bytes());
+    check_parsing(text.as_bytes(), text.as_bytes());
+    check_parsing(<_ as AsRef<[u8]>>::as_ref(&text), text.as_bytes());
+}
+
+#[test]
+fn test_text_provider_for_box_of_str_slice() {
+    let text: Box<str> = "// comment".to_owned().into_boxed_str();
+
+    check_parsing(text.as_bytes(), text.as_bytes());
+    check_parsing(<_ as AsRef<str>>::as_ref(&text), text.as_bytes());
+    check_parsing(text.as_ref(), text.as_ref().as_bytes());
+    check_parsing(text.as_ref(), text.as_bytes());
+}
+
+#[test]
+fn test_text_provider_for_box_of_bytes_slice() {
+    let text: Box<[u8]> = "// comment".to_owned().into_boxed_str().into_boxed_bytes();
+
+    check_parsing(text.as_ref(), text.as_ref());
+    check_parsing(text.as_ref(), &*text);
+    check_parsing(&*text, &*text);
+}
+
+#[test]
+fn test_text_provider_for_vec_of_bytes() {
+    let text: Vec<u8> = "// comment".to_owned().into_bytes();
+
+    check_parsing(&*text, &*text);
+}
+
+#[test]
+fn test_text_provider_for_arc_of_bytes_slice() {
+    let text: Vec<u8> = "// comment".to_owned().into_bytes();
+    let text: Arc<[u8]> = Arc::from(text);
+
+    check_parsing(&*text, &*text);
+    check_parsing(text.as_ref(), text.as_ref());
+    check_parsing(text.clone(), text.as_ref());
+}
+
+#[test]
+fn test_text_provider_callback_with_str_slice() {
+    let text: &str = "// comment";
+
+    check_parsing(text, |_node: Node<'_>| iter::once(text));
+    check_parsing_callback(
+        &mut |offset, _point| {
+            (offset < text.len())
+                .then(|| text.as_bytes())
+                .unwrap_or_default()
+        },
+        |_node: Node<'_>| iter::once(text),
+    );
+}
+
+#[test]
+fn test_text_provider_callback_with_owned_string_slice() {
+    let text: &str = "// comment";
+
+    check_parsing_callback(
+        &mut |offset, _point| {
+            (offset < text.len())
+                .then(|| text.as_bytes())
+                .unwrap_or_default()
+        },
+        |_node: Node<'_>| {
+            let slice: String = text.to_owned();
+            iter::once(slice)
+        },
+    );
+}
+
+#[test]
+fn test_text_provider_callback_with_owned_bytes_vec_slice() {
+    let text: &str = "// comment";
+
+    check_parsing_callback(
+        &mut |offset, _point| {
+            (offset < text.len())
+                .then(|| text.as_bytes())
+                .unwrap_or_default()
+        },
+        |_node: Node<'_>| {
+            let slice: Vec<u8> = text.to_owned().into_bytes();
+            iter::once(slice)
+        },
+    );
+}
+
+#[test]
+fn test_text_provider_callback_with_owned_arc_of_bytes_slice() {
+    let text: &str = "// comment";
+
+    check_parsing_callback(
+        &mut |offset, _point| {
+            (offset < text.len())
+                .then(|| text.as_bytes())
+                .unwrap_or_default()
+        },
+        |_node: Node<'_>| {
+            let slice: Arc<[u8]> = text.to_owned().into_bytes().into();
+            iter::once(slice)
+        },
+    );
+}
--- a/cli/src/tests/tree_test.rs
+++ b/cli/src/tests/tree_test.rs
@ -306,7 +306,7 @@ fn test_tree_cursor() {
        .parse(
            "
                struct Stuff {
-                    a: A;
+                    a: A,
                    b: Option<B>,
                }
            ",
@ -331,6 +331,88 @@ fn test_tree_cursor() {
    assert!(cursor.goto_next_sibling());
    assert_eq!(cursor.node().kind(), "field_declaration_list");
    assert_eq!(cursor.node().is_named(), true);
+
+    assert!(cursor.goto_last_child());
+    assert_eq!(cursor.node().kind(), "}");
+    assert_eq!(cursor.node().is_named(), false);
+    assert_eq!(cursor.node().start_position(), Point { row: 4, column: 16 });
+
+    assert!(cursor.goto_previous_sibling());
+    assert_eq!(cursor.node().kind(), ",");
+    assert_eq!(cursor.node().is_named(), false);
+    assert_eq!(cursor.node().start_position(), Point { row: 3, column: 32 });
+
+    assert!(cursor.goto_previous_sibling());
+    assert_eq!(cursor.node().kind(), "field_declaration");
+    assert_eq!(cursor.node().is_named(), true);
+    assert_eq!(cursor.node().start_position(), Point { row: 3, column: 20 });
+
+    assert!(cursor.goto_previous_sibling());
+    assert_eq!(cursor.node().kind(), ",");
+    assert_eq!(cursor.node().is_named(), false);
+    assert_eq!(cursor.node().start_position(), Point { row: 2, column: 24 });
+
+    assert!(cursor.goto_previous_sibling());
+    assert_eq!(cursor.node().kind(), "field_declaration");
+    assert_eq!(cursor.node().is_named(), true);
+    assert_eq!(cursor.node().start_position(), Point { row: 2, column: 20 });
+
+    assert!(cursor.goto_previous_sibling());
+    assert_eq!(cursor.node().kind(), "{");
+    assert_eq!(cursor.node().is_named(), false);
+    assert_eq!(cursor.node().start_position(), Point { row: 1, column: 29 });
+
+    let mut copy = tree.walk();
+    copy.reset_to(cursor);
+
+    assert_eq!(copy.node().kind(), "{");
+    assert_eq!(copy.node().is_named(), false);
+
+    assert!(copy.goto_parent());
+    assert_eq!(copy.node().kind(), "field_declaration_list");
+    assert_eq!(copy.node().is_named(), true);
+
+    assert!(copy.goto_parent());
+    assert_eq!(copy.node().kind(), "struct_item");
+}
+
+#[test]
+fn test_tree_cursor_previous_sibling() {
+    let mut parser = Parser::new();
+    parser.set_language(get_language("rust")).unwrap();
+
+    let text = "
+    // Hi there
+    // This is fun!
+    // Another one!
+";
+    let tree = parser.parse(text, None).unwrap();
+
+    let mut cursor = tree.walk();
+    assert_eq!(cursor.node().kind(), "source_file");
+
+    assert!(cursor.goto_last_child());
+    assert_eq!(cursor.node().kind(), "line_comment");
+    assert_eq!(
+        cursor.node().utf8_text(text.as_bytes()).unwrap(),
+        "// Another one!"
+    );
+
+    assert!(cursor.goto_previous_sibling());
+    assert_eq!(cursor.node().kind(), "line_comment");
+    assert_eq!(
+        cursor.node().utf8_text(text.as_bytes()).unwrap(),
+        "// This is fun!"
+    );
+
+    assert!(cursor.goto_previous_sibling());
+    assert_eq!(cursor.node().kind(), "line_comment");
+    assert_eq!(
+        cursor.node().utf8_text(text.as_bytes()).unwrap(),
+        "// Hi there"
+    );
+
+    assert!(!cursor.goto_previous_sibling());
 }

 #[test]
@ -620,7 +702,7 @@ fn get_changed_ranges(
    source_code: &mut Vec<u8>,
    edit: Edit,
 ) -> Vec<Range> {
-    perform_edit(tree, source_code, &edit);
+    perform_edit(tree, source_code, &edit).unwrap();
    let new_tree = parser.parse(&source_code, Some(tree)).unwrap();
    let result = tree.changed_ranges(&new_tree).collect();
    *tree = new_tree;
--- a/cli/src/util.rs
+++ b/cli/src/util.rs
@ -1,9 +1,7 @@
 use anyhow::Result;
-use std::io;
 use std::sync::atomic::{AtomicUsize, Ordering};
 use std::sync::Arc;
-use std::thread;
-use tree_sitter::Parser;
+use tree_sitter::{Parser, Tree};

 #[cfg(unix)]
 use anyhow::{anyhow, Context};
@ -13,55 +11,86 @@ use std::path::PathBuf;
 use std::process::{Child, ChildStdin, Command, Stdio};

 #[cfg(unix)]
-const HTML_HEADER: &[u8] = b"<!DOCTYPE html>\n<style>svg { width: 100%; }</style>\n\n";
+const HTML_HEADER: &[u8] = b"
+<!DOCTYPE html>

-pub fn cancel_on_stdin() -> Arc<AtomicUsize> {
+<style>
+svg { width: 100%; }
+</style>
+
+";
+
+pub fn cancel_on_signal() -> Arc<AtomicUsize> {
    let result = Arc::new(AtomicUsize::new(0));
-    if atty::is(atty::Stream::Stdin) {
-        thread::spawn({
-            let flag = result.clone();
-            move || {
-                let mut line = String::new();
-                io::stdin().read_line(&mut line).unwrap();
-                flag.store(1, Ordering::Relaxed);
-            }
-        });
-    }
+    ctrlc::set_handler({
+        let flag = result.clone();
+        move || {
+            flag.store(1, Ordering::Relaxed);
+        }
+    })
+    .expect("Error setting Ctrl-C handler");
    result
 }
+
 #[cfg(windows)]
-pub struct LogSession();
+pub struct LogSession;

 #[cfg(unix)]
-pub struct LogSession(PathBuf, Option<Child>, Option<ChildStdin>);
+pub struct LogSession {
+    path: PathBuf,
+    dot_process: Option<Child>,
+    dot_process_stdin: Option<ChildStdin>,
+}
+
+#[cfg(windows)]
+pub fn print_tree_graph(_tree: &Tree, _path: &str) -> Result<()> {
+    Ok(())
+}

 #[cfg(windows)]
 pub fn log_graphs(_parser: &mut Parser, _path: &str) -> Result<LogSession> {
-    Ok(LogSession())
+    Ok(LogSession)
+}
+
+#[cfg(unix)]
+pub fn print_tree_graph(tree: &Tree, path: &str) -> Result<()> {
+    let session = LogSession::new(path)?;
+    tree.print_dot_graph(session.dot_process_stdin.as_ref().unwrap());
+    Ok(())
 }

 #[cfg(unix)]
 pub fn log_graphs(parser: &mut Parser, path: &str) -> Result<LogSession> {
-    use std::io::Write;
+    let session = LogSession::new(path)?;
+    parser.print_dot_graphs(session.dot_process_stdin.as_ref().unwrap());
+    Ok(session)
+}

-    let mut dot_file = std::fs::File::create(path)?;
-    dot_file.write(HTML_HEADER)?;
-    let mut dot_process = Command::new("dot")
-        .arg("-Tsvg")
-        .stdin(Stdio::piped())
-        .stdout(dot_file)
-        .spawn()
-        .with_context(|| "Failed to run the `dot` command. Check that graphviz is installed.")?;
-    let dot_stdin = dot_process
-        .stdin
-        .take()
-        .ok_or_else(|| anyhow!("Failed to open stdin for `dot` process."))?;
-    parser.print_dot_graphs(&dot_stdin);
-    Ok(LogSession(
-        PathBuf::from(path),
-        Some(dot_process),
-        Some(dot_stdin),
-    ))
+#[cfg(unix)]
+impl LogSession {
+    fn new(path: &str) -> Result<Self> {
+        use std::io::Write;
+
+        let mut dot_file = std::fs::File::create(path)?;
+        dot_file.write(HTML_HEADER)?;
+        let mut dot_process = Command::new("dot")
+            .arg("-Tsvg")
+            .stdin(Stdio::piped())
+            .stdout(dot_file)
+            .spawn()
+            .with_context(|| {
+                "Failed to run the `dot` command. Check that graphviz is installed."
+            })?;
+        let dot_stdin = dot_process
+            .stdin
+            .take()
+            .ok_or_else(|| anyhow!("Failed to open stdin for `dot` process."))?;
+        Ok(Self {
+            path: PathBuf::from(path),
+            dot_process: Some(dot_process),
+            dot_process_stdin: Some(dot_stdin),
+        })
+    }
 }

 #[cfg(unix)]
@ -69,13 +98,13 @@ impl Drop for LogSession {
    fn drop(&mut self) {
        use std::fs;

-        drop(self.2.take().unwrap());
-        let output = self.1.take().unwrap().wait_with_output().unwrap();
+        drop(self.dot_process_stdin.take().unwrap());
+        let output = self.dot_process.take().unwrap().wait_with_output().unwrap();
        if output.status.success() {
            if cfg!(target_os = "macos")
-                && fs::metadata(&self.0).unwrap().len() > HTML_HEADER.len() as u64
+                && fs::metadata(&self.path).unwrap().len() > HTML_HEADER.len() as u64
            {
-                Command::new("open").arg(&self.0).output().unwrap();
+                Command::new("open").arg(&self.path).output().unwrap();
            }
        } else {
            eprintln!(
--- a/cli/src/wasm.rs
+++ b/cli/src/wasm.rs
@ -1,5 +1,6 @@
 use super::generate::parse_grammar::GrammarJSON;
 use anyhow::{anyhow, Context, Result};
+use path_slash::PathExt as _;
 use std::{
    ffi::{OsStr, OsString},
    fs,
@ -60,7 +61,7 @@ pub fn compile_language_to_wasm(
            volume_string = OsString::from(parent);
            volume_string.push(":/src:Z");
            command.arg("--workdir");
-            command.arg(&Path::new("/src").join(filename));
+            command.arg(Path::new("/src").join(filename).to_slash_lossy().as_ref());
        } else {
            volume_string = OsString::from(language_dir);
            volume_string.push(":/src:Z");
@ -84,6 +85,11 @@ pub fn compile_language_to_wasm(
        // Run `emcc` in a container using the `emscripten-slim` image
        command.args(&[EMSCRIPTEN_TAG, "emcc"]);
    } else {
+        if force_docker {
+            return Err(anyhow!(
+                "You must have docker on your PATH to run this command with --docker"
+            ));
+        }
        return Err(anyhow!(
            "You must have either emcc or docker on your PATH to run this command"
        ));
@ -116,14 +122,18 @@ pub fn compile_language_to_wasm(
    let scanner_cpp_path = src.join("scanner.cpp");

    if language_dir.join(&scanner_cc_path).exists() {
-        command.arg("-xc++").arg(&scanner_cc_path);
+        command
+            .arg("-xc++")
+            .arg(scanner_cc_path.to_slash_lossy().as_ref());
    } else if language_dir.join(&scanner_cpp_path).exists() {
-        command.arg("-xc++").arg(&scanner_cpp_path);
+        command
+            .arg("-xc++")
+            .arg(scanner_cpp_path.to_slash_lossy().as_ref());
    } else if language_dir.join(&scanner_c_path).exists() {
-        command.arg(&scanner_c_path);
+        command.arg(scanner_c_path.to_slash_lossy().as_ref());
    }

-    command.arg(&parser_c_path);
+    command.arg(parser_c_path.to_slash_lossy().as_ref());

    let output = command
        .output()