feat: move generate logic to its own crate

2024-09-27 16:28:50 -04:00 · 2024-09-27 16:28:50 -04:00 · 31f24395b4
commit 31f24395b4
parent 90efa34608
47 changed files with 103 additions and 57 deletions
--- a/cli/src/generate/build_tables/build_lex_table.rs
+++ b/cli/src/generate/build_tables/build_lex_table.rs
@ -1,430 +0,0 @@
-use std::{
-    collections::{hash_map::Entry, HashMap, VecDeque},
-    mem,
-};
-
-use log::info;
-
-use super::{coincident_tokens::CoincidentTokenIndex, token_conflicts::TokenConflictMap};
-use crate::generate::{
-    dedup::split_state_id_groups,
-    grammars::{LexicalGrammar, SyntaxGrammar},
-    nfa::{CharacterSet, NfaCursor},
-    prepare_grammar::symbol_is_used,
-    rules::{Symbol, TokenSet},
-    tables::{AdvanceAction, LexState, LexTable, ParseStateId, ParseTable},
-};
-
-pub const LARGE_CHARACTER_RANGE_COUNT: usize = 8;
-
-pub struct LexTables {
-    pub main_lex_table: LexTable,
-    pub keyword_lex_table: LexTable,
-    pub large_character_sets: Vec<(Option<Symbol>, CharacterSet)>,
-}
-
-pub fn build_lex_table(
-    parse_table: &mut ParseTable,
-    syntax_grammar: &SyntaxGrammar,
-    lexical_grammar: &LexicalGrammar,
-    keywords: &TokenSet,
-    coincident_token_index: &CoincidentTokenIndex,
-    token_conflict_map: &TokenConflictMap,
-) -> LexTables {
-    let keyword_lex_table = if syntax_grammar.word_token.is_some() {
-        let mut builder = LexTableBuilder::new(lexical_grammar);
-        builder.add_state_for_tokens(keywords);
-        builder.table
-    } else {
-        LexTable::default()
-    };
-
-    let mut parse_state_ids_by_token_set = Vec::<(TokenSet, Vec<ParseStateId>)>::new();
-    for (i, state) in parse_table.states.iter().enumerate() {
-        let tokens = state
-            .terminal_entries
-            .keys()
-            .filter_map(|token| {
-                if token.is_terminal() {
-                    if keywords.contains(token) {
-                        syntax_grammar.word_token
-                    } else {
-                        Some(*token)
-                    }
-                } else if token.is_eof() {
-                    Some(*token)
-                } else {
-                    None
-                }
-            })
-            .collect();
-
-        let mut did_merge = false;
-        for entry in &mut parse_state_ids_by_token_set {
-            if merge_token_set(
-                &mut entry.0,
-                &tokens,
-                lexical_grammar,
-                token_conflict_map,
-                coincident_token_index,
-            ) {
-                did_merge = true;
-                entry.1.push(i);
-                break;
-            }
-        }
-
-        if !did_merge {
-            parse_state_ids_by_token_set.push((tokens, vec![i]));
-        }
-    }
-
-    let mut builder = LexTableBuilder::new(lexical_grammar);
-    for (tokens, parse_state_ids) in parse_state_ids_by_token_set {
-        let lex_state_id = builder.add_state_for_tokens(&tokens);
-        for id in parse_state_ids {
-            parse_table.states[id].lex_state_id = lex_state_id;
-        }
-    }
-
-    let mut main_lex_table = mem::take(&mut builder.table);
-    minimize_lex_table(&mut main_lex_table, parse_table);
-    sort_states(&mut main_lex_table, parse_table);
-
-    let mut large_character_sets = Vec::new();
-    for (variable_ix, _variable) in lexical_grammar.variables.iter().enumerate() {
-        let symbol = Symbol::terminal(variable_ix);
-        if !symbol_is_used(&syntax_grammar.variables, symbol) {
-            continue;
-        }
-        builder.reset();
-        builder.add_state_for_tokens(&TokenSet::from_iter([symbol]));
-        for state in &builder.table.states {
-            let mut characters = CharacterSet::empty();
-            for (chars, action) in &state.advance_actions {
-                if action.in_main_token {
-                    characters = characters.add(chars);
-                    continue;
-                }
-
-                if chars.range_count() > LARGE_CHARACTER_RANGE_COUNT
-                    && !large_character_sets.iter().any(|(_, set)| set == chars)
-                {
-                    large_character_sets.push((None, chars.clone()));
-                }
-            }
-
-            if characters.range_count() > LARGE_CHARACTER_RANGE_COUNT
-                && !large_character_sets
-                    .iter()
-                    .any(|(_, set)| *set == characters)
-            {
-                large_character_sets.push((Some(symbol), characters));
-            }
-        }
-    }
-
-    LexTables {
-        main_lex_table,
-        keyword_lex_table,
-        large_character_sets,
-    }
-}
-
-struct QueueEntry {
-    state_id: usize,
-    nfa_states: Vec<u32>,
-    eof_valid: bool,
-}
-
-struct LexTableBuilder<'a> {
-    lexical_grammar: &'a LexicalGrammar,
-    cursor: NfaCursor<'a>,
-    table: LexTable,
-    state_queue: VecDeque<QueueEntry>,
-    state_ids_by_nfa_state_set: HashMap<(Vec<u32>, bool), usize>,
-}
-
-impl<'a> LexTableBuilder<'a> {
-    fn new(lexical_grammar: &'a LexicalGrammar) -> Self {
-        Self {
-            lexical_grammar,
-            cursor: NfaCursor::new(&lexical_grammar.nfa, vec![]),
-            table: LexTable::default(),
-            state_queue: VecDeque::new(),
-            state_ids_by_nfa_state_set: HashMap::new(),
-        }
-    }
-
-    fn reset(&mut self) {
-        self.table = LexTable::default();
-        self.state_queue.clear();
-        self.state_ids_by_nfa_state_set.clear();
-    }
-
-    fn add_state_for_tokens(&mut self, tokens: &TokenSet) -> usize {
-        let mut eof_valid = false;
-        let nfa_states = tokens
-            .iter()
-            .filter_map(|token| {
-                if token.is_terminal() {
-                    Some(self.lexical_grammar.variables[token.index].start_state)
-                } else {
-                    eof_valid = true;
-                    None
-                }
-            })
-            .collect();
-        let (state_id, is_new) = self.add_state(nfa_states, eof_valid);
-
-        if is_new {
-            info!(
-                "entry point state: {}, tokens: {:?}",
-                state_id,
-                tokens
-                    .iter()
-                    .map(|t| &self.lexical_grammar.variables[t.index].name)
-                    .collect::<Vec<_>>()
-            );
-        }
-
-        while let Some(QueueEntry {
-            state_id,
-            nfa_states,
-            eof_valid,
-        }) = self.state_queue.pop_front()
-        {
-            self.populate_state(state_id, nfa_states, eof_valid);
-        }
-        state_id
-    }
-
-    fn add_state(&mut self, nfa_states: Vec<u32>, eof_valid: bool) -> (usize, bool) {
-        self.cursor.reset(nfa_states);
-        match self
-            .state_ids_by_nfa_state_set
-            .entry((self.cursor.state_ids.clone(), eof_valid))
-        {
-            Entry::Occupied(o) => (*o.get(), false),
-            Entry::Vacant(v) => {
-                let state_id = self.table.states.len();
-                self.table.states.push(LexState::default());
-                self.state_queue.push_back(QueueEntry {
-                    state_id,
-                    nfa_states: v.key().0.clone(),
-                    eof_valid,
-                });
-                v.insert(state_id);
-                (state_id, true)
-            }
-        }
-    }
-
-    fn populate_state(&mut self, state_id: usize, nfa_states: Vec<u32>, eof_valid: bool) {
-        self.cursor.force_reset(nfa_states);
-
-        // The EOF state is represented as an empty list of NFA states.
-        let mut completion = None;
-        for (id, prec) in self.cursor.completions() {
-            if let Some((prev_id, prev_precedence)) = completion {
-                if TokenConflictMap::prefer_token(
-                    self.lexical_grammar,
-                    (prev_precedence, prev_id),
-                    (prec, id),
-                ) {
-                    continue;
-                }
-            }
-            completion = Some((id, prec));
-        }
-
-        let transitions = self.cursor.transitions();
-        let has_sep = self.cursor.transition_chars().any(|(_, sep)| sep);
-
-        // If EOF is a valid lookahead token, add a transition predicated on the null
-        // character that leads to the empty set of NFA states.
-        if eof_valid {
-            let (next_state_id, _) = self.add_state(Vec::new(), false);
-            self.table.states[state_id].eof_action = Some(AdvanceAction {
-                state: next_state_id,
-                in_main_token: true,
-            });
-        }
-
-        for transition in transitions {
-            if let Some((completed_id, completed_precedence)) = completion {
-                if !TokenConflictMap::prefer_transition(
-                    self.lexical_grammar,
-                    &transition,
-                    completed_id,
-                    completed_precedence,
-                    has_sep,
-                ) {
-                    continue;
-                }
-            }
-
-            let (next_state_id, _) =
-                self.add_state(transition.states, eof_valid && transition.is_separator);
-            self.table.states[state_id].advance_actions.push((
-                transition.characters,
-                AdvanceAction {
-                    state: next_state_id,
-                    in_main_token: !transition.is_separator,
-                },
-            ));
-        }
-
-        if let Some((complete_id, _)) = completion {
-            self.table.states[state_id].accept_action = Some(Symbol::terminal(complete_id));
-        } else if self.cursor.state_ids.is_empty() {
-            self.table.states[state_id].accept_action = Some(Symbol::end());
-        }
-    }
-}
-
-fn merge_token_set(
-    tokens: &mut TokenSet,
-    other: &TokenSet,
-    lexical_grammar: &LexicalGrammar,
-    token_conflict_map: &TokenConflictMap,
-    coincident_token_index: &CoincidentTokenIndex,
-) -> bool {
-    for i in 0..lexical_grammar.variables.len() {
-        let symbol = Symbol::terminal(i);
-        let set_without_terminal = match (tokens.contains_terminal(i), other.contains_terminal(i)) {
-            (true, false) => other,
-            (false, true) => tokens,
-            _ => continue,
-        };
-
-        for existing_token in set_without_terminal.terminals() {
-            if token_conflict_map.does_conflict(i, existing_token.index)
-                || token_conflict_map.does_match_prefix(i, existing_token.index)
-            {
-                return false;
-            }
-            if !coincident_token_index.contains(symbol, existing_token)
-                && (token_conflict_map.does_overlap(existing_token.index, i)
-                    || token_conflict_map.does_overlap(i, existing_token.index))
-            {
-                return false;
-            }
-        }
-    }
-
-    tokens.insert_all(other);
-    true
-}
-
-fn minimize_lex_table(table: &mut LexTable, parse_table: &mut ParseTable) {
-    // Initially group the states by their accept action and their
-    // valid lookahead characters.
-    let mut state_ids_by_signature = HashMap::new();
-    for (i, state) in table.states.iter().enumerate() {
-        let signature = (
-            i == 0,
-            state.accept_action,
-            state.eof_action.is_some(),
-            state
-                .advance_actions
-                .iter()
-                .map(|(characters, action)| (characters.clone(), action.in_main_token))
-                .collect::<Vec<_>>(),
-        );
-        state_ids_by_signature
-            .entry(signature)
-            .or_insert(Vec::new())
-            .push(i);
-    }
-    let mut state_ids_by_group_id = state_ids_by_signature
-        .into_iter()
-        .map(|e| e.1)
-        .collect::<Vec<_>>();
-    state_ids_by_group_id.sort();
-    let error_group_index = state_ids_by_group_id
-        .iter()
-        .position(|g| g.contains(&0))
-        .unwrap();
-    state_ids_by_group_id.swap(error_group_index, 0);
-
-    let mut group_ids_by_state_id = vec![0; table.states.len()];
-    for (group_id, state_ids) in state_ids_by_group_id.iter().enumerate() {
-        for state_id in state_ids {
-            group_ids_by_state_id[*state_id] = group_id;
-        }
-    }
-
-    while split_state_id_groups(
-        &table.states,
-        &mut state_ids_by_group_id,
-        &mut group_ids_by_state_id,
-        1,
-        lex_states_differ,
-    ) {
-        continue;
-    }
-
-    let mut new_states = Vec::with_capacity(state_ids_by_group_id.len());
-    for state_ids in &state_ids_by_group_id {
-        let mut new_state = LexState::default();
-        mem::swap(&mut new_state, &mut table.states[state_ids[0]]);
-
-        for (_, advance_action) in &mut new_state.advance_actions {
-            advance_action.state = group_ids_by_state_id[advance_action.state];
-        }
-        if let Some(eof_action) = &mut new_state.eof_action {
-            eof_action.state = group_ids_by_state_id[eof_action.state];
-        }
-        new_states.push(new_state);
-    }
-
-    for state in &mut parse_table.states {
-        state.lex_state_id = group_ids_by_state_id[state.lex_state_id];
-    }
-
-    table.states = new_states;
-}
-
-fn lex_states_differ(left: &LexState, right: &LexState, group_ids_by_state_id: &[usize]) -> bool {
-    left.advance_actions
-        .iter()
-        .zip(right.advance_actions.iter())
-        .any(|(left, right)| {
-            group_ids_by_state_id[left.1.state] != group_ids_by_state_id[right.1.state]
-        })
-}
-
-fn sort_states(table: &mut LexTable, parse_table: &mut ParseTable) {
-    // Get a mapping of old state index -> new_state_index
-    let mut old_ids_by_new_id = (0..table.states.len()).collect::<Vec<_>>();
-    old_ids_by_new_id[1..].sort_by_key(|id| &table.states[*id]);
-
-    // Get the inverse mapping
-    let mut new_ids_by_old_id = vec![0; old_ids_by_new_id.len()];
-    for (id, old_id) in old_ids_by_new_id.iter().enumerate() {
-        new_ids_by_old_id[*old_id] = id;
-    }
-
-    // Reorder the parse states and update their references to reflect
-    // the new ordering.
-    table.states = old_ids_by_new_id
-        .iter()
-        .map(|old_id| {
-            let mut state = LexState::default();
-            mem::swap(&mut state, &mut table.states[*old_id]);
-            for (_, advance_action) in &mut state.advance_actions {
-                advance_action.state = new_ids_by_old_id[advance_action.state];
-            }
-            if let Some(eof_action) = &mut state.eof_action {
-                eof_action.state = new_ids_by_old_id[eof_action.state];
-            }
-            state
-        })
-        .collect();
-
-    // Update the parse table's lex state references
-    for state in &mut parse_table.states {
-        state.lex_state_id = new_ids_by_old_id[state.lex_state_id];
-    }
-}
--- a/cli/src/generate/build_tables/build_parse_table.rs
+++ b/cli/src/generate/build_tables/build_parse_table.rs
--- a/cli/src/generate/build_tables/coincident_tokens.rs
+++ b/cli/src/generate/build_tables/coincident_tokens.rs
@ -1,79 +0,0 @@
-use std::fmt;
-
-use crate::generate::{
-    grammars::LexicalGrammar,
-    rules::Symbol,
-    tables::{ParseStateId, ParseTable},
-};
-
-pub struct CoincidentTokenIndex<'a> {
-    entries: Vec<Vec<ParseStateId>>,
-    grammar: &'a LexicalGrammar,
-    n: usize,
-}
-
-impl<'a> CoincidentTokenIndex<'a> {
-    pub fn new(table: &ParseTable, lexical_grammar: &'a LexicalGrammar) -> Self {
-        let n = lexical_grammar.variables.len();
-        let mut result = Self {
-            n,
-            grammar: lexical_grammar,
-            entries: vec![Vec::new(); n * n],
-        };
-        for (i, state) in table.states.iter().enumerate() {
-            for symbol in state.terminal_entries.keys() {
-                if symbol.is_terminal() {
-                    for other_symbol in state.terminal_entries.keys() {
-                        if other_symbol.is_terminal() {
-                            let index = result.index(symbol.index, other_symbol.index);
-                            if result.entries[index].last().copied() != Some(i) {
-                                result.entries[index].push(i);
-                            }
-                        }
-                    }
-                }
-            }
-        }
-        result
-    }
-
-    pub fn states_with(&self, a: Symbol, b: Symbol) -> &[ParseStateId] {
-        &self.entries[self.index(a.index, b.index)]
-    }
-
-    pub fn contains(&self, a: Symbol, b: Symbol) -> bool {
-        !self.entries[self.index(a.index, b.index)].is_empty()
-    }
-
-    #[must_use]
-    const fn index(&self, a: usize, b: usize) -> usize {
-        if a < b {
-            a * self.n + b
-        } else {
-            b * self.n + a
-        }
-    }
-}
-
-impl<'a> fmt::Debug for CoincidentTokenIndex<'a> {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        writeln!(f, "CoincidentTokenIndex {{")?;
-
-        writeln!(f, "  entries: {{")?;
-        for i in 0..self.n {
-            writeln!(f, "    {}: {{", self.grammar.variables[i].name)?;
-            for j in 0..self.n {
-                writeln!(
-                    f,
-                    "      {}: {:?},",
-                    self.grammar.variables[j].name,
-                    self.entries[self.index(i, j)].len()
-                )?;
-            }
-            writeln!(f, "    }},")?;
-        }
-        write!(f, "  }},")?;
-        write!(f, "}}")?;
-        Ok(())
-    }
-}
--- a/cli/src/generate/build_tables/item.rs
+++ b/cli/src/generate/build_tables/item.rs
@ -1,416 +0,0 @@
-use std::{
-    cmp::Ordering,
-    fmt,
-    hash::{Hash, Hasher},
-};
-
-use lazy_static::lazy_static;
-
-use crate::generate::{
-    grammars::{LexicalGrammar, Production, ProductionStep, SyntaxGrammar},
-    rules::{Associativity, Precedence, Symbol, SymbolType, TokenSet},
-};
-
-lazy_static! {
-    static ref START_PRODUCTION: Production = Production {
-        dynamic_precedence: 0,
-        steps: vec![ProductionStep {
-            symbol: Symbol {
-                index: 0,
-                kind: SymbolType::NonTerminal,
-            },
-            precedence: Precedence::None,
-            associativity: None,
-            alias: None,
-            field_name: None,
-        }],
-    };
-}
-
-/// A [`ParseItem`] represents an in-progress match of a single production in a grammar.
-#[derive(Clone, Copy, Debug)]
-pub struct ParseItem<'a> {
-    /// The index of the parent rule within the grammar.
-    pub variable_index: u32,
-    /// The number of symbols that have already been matched.
-    pub step_index: u32,
-    /// The production being matched.
-    pub production: &'a Production,
-    /// A boolean indicating whether any of the already-matched children were
-    /// hidden nodes and had fields. Ordinarily, a parse item's behavior is not
-    /// affected by the symbols of its preceding children; it only needs to
-    /// keep track of their fields and aliases.
-    ///
-    /// Take for example these two items:
-    ///   X -> a b • c
-    ///   X -> a g • c
-    ///
-    /// They can be considered equivalent, for the purposes of parse table
-    /// generation, because they entail the same actions. But if this flag is
-    /// true, then the item's set of inherited fields may depend on the specific
-    /// symbols of its preceding children.
-    pub has_preceding_inherited_fields: bool,
-}
-
-/// A [`ParseItemSet`] represents a set of in-progress matches of productions in a
-/// grammar, and for each in-progress match, a set of "lookaheads" - tokens that
-/// are allowed to *follow* the in-progress rule. This object corresponds directly
-/// to a state in the final parse table.
-#[derive(Clone, Debug, PartialEq, Eq, Default)]
-pub struct ParseItemSet<'a> {
-    pub entries: Vec<(ParseItem<'a>, TokenSet)>,
-}
-
-/// A [`ParseItemSetCore`] is like a [`ParseItemSet`], but without the lookahead
-/// information. Parse states with the same core are candidates for merging.
-#[derive(Clone, Debug, PartialEq, Eq)]
-pub struct ParseItemSetCore<'a> {
-    pub entries: Vec<ParseItem<'a>>,
-}
-
-pub struct ParseItemDisplay<'a>(
-    pub &'a ParseItem<'a>,
-    pub &'a SyntaxGrammar,
-    pub &'a LexicalGrammar,
-);
-
-pub struct TokenSetDisplay<'a>(
-    pub &'a TokenSet,
-    pub &'a SyntaxGrammar,
-    pub &'a LexicalGrammar,
-);
-
-pub struct ParseItemSetDisplay<'a>(
-    pub &'a ParseItemSet<'a>,
-    pub &'a SyntaxGrammar,
-    pub &'a LexicalGrammar,
-);
-
-impl<'a> ParseItem<'a> {
-    pub fn start() -> Self {
-        ParseItem {
-            variable_index: u32::MAX,
-            production: &START_PRODUCTION,
-            step_index: 0,
-            has_preceding_inherited_fields: false,
-        }
-    }
-
-    pub fn step(&self) -> Option<&'a ProductionStep> {
-        self.production.steps.get(self.step_index as usize)
-    }
-
-    pub fn symbol(&self) -> Option<Symbol> {
-        self.step().map(|step| step.symbol)
-    }
-
-    pub fn associativity(&self) -> Option<Associativity> {
-        self.prev_step().and_then(|step| step.associativity)
-    }
-
-    pub fn precedence(&self) -> &Precedence {
-        self.prev_step()
-            .map_or(&Precedence::None, |step| &step.precedence)
-    }
-
-    pub fn prev_step(&self) -> Option<&'a ProductionStep> {
-        if self.step_index > 0 {
-            Some(&self.production.steps[self.step_index as usize - 1])
-        } else {
-            None
-        }
-    }
-
-    #[must_use]
-    pub fn is_done(&self) -> bool {
-        self.step_index as usize == self.production.steps.len()
-    }
-
-    #[must_use]
-    pub const fn is_augmented(&self) -> bool {
-        self.variable_index == u32::MAX
-    }
-
-    /// Create an item like this one, but advanced by one step.
-    #[must_use]
-    pub const fn successor(&self) -> Self {
-        ParseItem {
-            variable_index: self.variable_index,
-            production: self.production,
-            step_index: self.step_index + 1,
-            has_preceding_inherited_fields: self.has_preceding_inherited_fields,
-        }
-    }
-
-    /// Create an item identical to this one, but with a different production.
-    /// This is used when dynamically "inlining" certain symbols in a production.
-    pub const fn substitute_production(&self, production: &'a Production) -> Self {
-        let mut result = *self;
-        result.production = production;
-        result
-    }
-}
-
-impl<'a> ParseItemSet<'a> {
-    pub fn with(elements: impl IntoIterator<Item = (ParseItem<'a>, TokenSet)>) -> Self {
-        let mut result = Self::default();
-        for (item, lookaheads) in elements {
-            result.insert(item, &lookaheads);
-        }
-        result
-    }
-
-    pub fn insert(&mut self, item: ParseItem<'a>, lookaheads: &TokenSet) -> &mut TokenSet {
-        match self.entries.binary_search_by(|(i, _)| i.cmp(&item)) {
-            Err(i) => {
-                self.entries.insert(i, (item, lookaheads.clone()));
-                &mut self.entries[i].1
-            }
-            Ok(i) => {
-                self.entries[i].1.insert_all(lookaheads);
-                &mut self.entries[i].1
-            }
-        }
-    }
-
-    pub fn core(&self) -> ParseItemSetCore<'a> {
-        ParseItemSetCore {
-            entries: self.entries.iter().map(|e| e.0).collect(),
-        }
-    }
-}
-
-impl<'a> fmt::Display for ParseItemDisplay<'a> {
-    fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
-        if self.0.is_augmented() {
-            write!(f, "START →")?;
-        } else {
-            write!(
-                f,
-                "{} →",
-                &self.1.variables[self.0.variable_index as usize].name
-            )?;
-        }
-
-        for (i, step) in self.0.production.steps.iter().enumerate() {
-            if i == self.0.step_index as usize {
-                write!(f, " •")?;
-                if let Some(associativity) = step.associativity {
-                    if step.precedence.is_none() {
-                        write!(f, " ({associativity:?})")?;
-                    } else {
-                        write!(f, " ({} {associativity:?})", step.precedence)?;
-                    }
-                } else if !step.precedence.is_none() {
-                    write!(f, " ({})", step.precedence)?;
-                }
-            }
-
-            write!(f, " ")?;
-            if step.symbol.is_terminal() {
-                if let Some(variable) = self.2.variables.get(step.symbol.index) {
-                    write!(f, "{}", &variable.name)?;
-                } else {
-                    write!(f, "terminal-{}", step.symbol.index)?;
-                }
-            } else if step.symbol.is_external() {
-                write!(f, "{}", &self.1.external_tokens[step.symbol.index].name)?;
-            } else {
-                write!(f, "{}", &self.1.variables[step.symbol.index].name)?;
-            }
-
-            if let Some(alias) = &step.alias {
-                write!(f, "@{}", alias.value)?;
-            }
-        }
-
-        if self.0.is_done() {
-            write!(f, " •")?;
-            if let Some(step) = self.0.production.steps.last() {
-                if let Some(associativity) = step.associativity {
-                    if step.precedence.is_none() {
-                        write!(f, " ({associativity:?})")?;
-                    } else {
-                        write!(f, " ({} {associativity:?})", step.precedence)?;
-                    }
-                } else if !step.precedence.is_none() {
-                    write!(f, " ({})", step.precedence)?;
-                }
-            }
-        }
-
-        Ok(())
-    }
-}
-
-impl<'a> fmt::Display for TokenSetDisplay<'a> {
-    fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
-        write!(f, "[")?;
-        for (i, symbol) in self.0.iter().enumerate() {
-            if i > 0 {
-                write!(f, ", ")?;
-            }
-
-            if symbol.is_terminal() {
-                if let Some(variable) = self.2.variables.get(symbol.index) {
-                    write!(f, "{}", &variable.name)?;
-                } else {
-                    write!(f, "terminal-{}", symbol.index)?;
-                }
-            } else if symbol.is_external() {
-                write!(f, "{}", &self.1.external_tokens[symbol.index].name)?;
-            } else {
-                write!(f, "{}", &self.1.variables[symbol.index].name)?;
-            }
-        }
-        write!(f, "]")?;
-        Ok(())
-    }
-}
-
-impl<'a> fmt::Display for ParseItemSetDisplay<'a> {
-    fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
-        for (item, lookaheads) in &self.0.entries {
-            writeln!(
-                f,
-                "{}\t{}",
-                ParseItemDisplay(item, self.1, self.2),
-                TokenSetDisplay(lookaheads, self.1, self.2)
-            )?;
-        }
-        Ok(())
-    }
-}
-
-impl<'a> Hash for ParseItem<'a> {
-    fn hash<H: Hasher>(&self, hasher: &mut H) {
-        hasher.write_u32(self.variable_index);
-        hasher.write_u32(self.step_index);
-        hasher.write_i32(self.production.dynamic_precedence);
-        hasher.write_usize(self.production.steps.len());
-        hasher.write_i32(i32::from(self.has_preceding_inherited_fields));
-        self.precedence().hash(hasher);
-        self.associativity().hash(hasher);
-
-        // The already-matched children don't play any role in the parse state for
-        // this item, unless any of the following are true:
-        //   * the children have fields
-        //   * the children have aliases
-        //   * the children are hidden and
-        // See the docs for `has_preceding_inherited_fields`.
-        for step in &self.production.steps[0..self.step_index as usize] {
-            step.alias.hash(hasher);
-            step.field_name.hash(hasher);
-            if self.has_preceding_inherited_fields {
-                step.symbol.hash(hasher);
-            }
-        }
-        for step in &self.production.steps[self.step_index as usize..] {
-            step.hash(hasher);
-        }
-    }
-}
-
-impl<'a> PartialEq for ParseItem<'a> {
-    fn eq(&self, other: &Self) -> bool {
-        if self.variable_index != other.variable_index
-            || self.step_index != other.step_index
-            || self.production.dynamic_precedence != other.production.dynamic_precedence
-            || self.production.steps.len() != other.production.steps.len()
-            || self.precedence() != other.precedence()
-            || self.associativity() != other.associativity()
-            || self.has_preceding_inherited_fields != other.has_preceding_inherited_fields
-        {
-            return false;
-        }
-
-        for (i, step) in self.production.steps.iter().enumerate() {
-            // See the previous comment (in the `Hash::hash` impl) regarding comparisons
-            // of parse items' already-completed steps.
-            if i < self.step_index as usize {
-                if step.alias != other.production.steps[i].alias {
-                    return false;
-                }
-                if step.field_name != other.production.steps[i].field_name {
-                    return false;
-                }
-                if self.has_preceding_inherited_fields
-                    && step.symbol != other.production.steps[i].symbol
-                {
-                    return false;
-                }
-            } else if *step != other.production.steps[i] {
-                return false;
-            }
-        }
-
-        true
-    }
-}
-
-impl<'a> Ord for ParseItem<'a> {
-    fn cmp(&self, other: &Self) -> Ordering {
-        self.step_index
-            .cmp(&other.step_index)
-            .then_with(|| self.variable_index.cmp(&other.variable_index))
-            .then_with(|| {
-                self.production
-                    .dynamic_precedence
-                    .cmp(&other.production.dynamic_precedence)
-            })
-            .then_with(|| {
-                self.production
-                    .steps
-                    .len()
-                    .cmp(&other.production.steps.len())
-            })
-            .then_with(|| self.precedence().cmp(other.precedence()))
-            .then_with(|| self.associativity().cmp(&other.associativity()))
-            .then_with(|| {
-                for (i, step) in self.production.steps.iter().enumerate() {
-                    // See the previous comment (in the `Hash::hash` impl) regarding comparisons
-                    // of parse items' already-completed steps.
-                    let o = if i < self.step_index as usize {
-                        step.alias
-                            .cmp(&other.production.steps[i].alias)
-                            .then_with(|| {
-                                step.field_name.cmp(&other.production.steps[i].field_name)
-                            })
-                    } else {
-                        step.cmp(&other.production.steps[i])
-                    };
-                    if o != Ordering::Equal {
-                        return o;
-                    }
-                }
-                Ordering::Equal
-            })
-    }
-}
-
-impl<'a> PartialOrd for ParseItem<'a> {
-    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
-        Some(self.cmp(other))
-    }
-}
-
-impl<'a> Eq for ParseItem<'a> {}
-
-impl<'a> Hash for ParseItemSet<'a> {
-    fn hash<H: Hasher>(&self, hasher: &mut H) {
-        hasher.write_usize(self.entries.len());
-        for (item, lookaheads) in &self.entries {
-            item.hash(hasher);
-            lookaheads.hash(hasher);
-        }
-    }
-}
-
-impl<'a> Hash for ParseItemSetCore<'a> {
-    fn hash<H: Hasher>(&self, hasher: &mut H) {
-        hasher.write_usize(self.entries.len());
-        for item in &self.entries {
-            item.hash(hasher);
-        }
-    }
-}
--- a/cli/src/generate/build_tables/item_set_builder.rs
+++ b/cli/src/generate/build_tables/item_set_builder.rs
@ -1,345 +0,0 @@
-use std::{
-    collections::{HashMap, HashSet},
-    fmt,
-};
-
-use super::item::{ParseItem, ParseItemDisplay, ParseItemSet, TokenSetDisplay};
-use crate::generate::{
-    grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar},
-    rules::{Symbol, SymbolType, TokenSet},
-};
-
-#[derive(Clone, Debug, PartialEq, Eq)]
-struct TransitiveClosureAddition<'a> {
-    item: ParseItem<'a>,
-    info: FollowSetInfo,
-}
-
-#[derive(Clone, Debug, PartialEq, Eq)]
-struct FollowSetInfo {
-    lookaheads: TokenSet,
-    propagates_lookaheads: bool,
-}
-
-pub struct ParseItemSetBuilder<'a> {
-    syntax_grammar: &'a SyntaxGrammar,
-    lexical_grammar: &'a LexicalGrammar,
-    first_sets: HashMap<Symbol, TokenSet>,
-    last_sets: HashMap<Symbol, TokenSet>,
-    inlines: &'a InlinedProductionMap,
-    transitive_closure_additions: Vec<Vec<TransitiveClosureAddition<'a>>>,
-}
-
-fn find_or_push<T: Eq>(vector: &mut Vec<T>, value: T) {
-    if !vector.contains(&value) {
-        vector.push(value);
-    }
-}
-
-impl<'a> ParseItemSetBuilder<'a> {
-    pub fn new(
-        syntax_grammar: &'a SyntaxGrammar,
-        lexical_grammar: &'a LexicalGrammar,
-        inlines: &'a InlinedProductionMap,
-    ) -> Self {
-        let mut result = Self {
-            syntax_grammar,
-            lexical_grammar,
-            first_sets: HashMap::new(),
-            last_sets: HashMap::new(),
-            inlines,
-            transitive_closure_additions: vec![Vec::new(); syntax_grammar.variables.len()],
-        };
-
-        // For each grammar symbol, populate the FIRST and LAST sets: the set of
-        // terminals that appear at the beginning and end that symbol's productions,
-        // respectively.
-        //
-        // For a terminal symbol, the FIRST and LAST set just consists of the
-        // terminal itself.
-        for i in 0..lexical_grammar.variables.len() {
-            let symbol = Symbol::terminal(i);
-            let mut set = TokenSet::new();
-            set.insert(symbol);
-            result.first_sets.insert(symbol, set.clone());
-            result.last_sets.insert(symbol, set);
-        }
-
-        for i in 0..syntax_grammar.external_tokens.len() {
-            let symbol = Symbol::external(i);
-            let mut set = TokenSet::new();
-            set.insert(symbol);
-            result.first_sets.insert(symbol, set.clone());
-            result.last_sets.insert(symbol, set);
-        }
-
-        // The FIRST set of a non-terminal `i` is the union of the following sets:
-        // * the set of all terminals that appear at the beginnings of i's productions
-        // * the FIRST sets of all the non-terminals that appear at the beginnings of i's
-        //   productions
-        //
-        // Rather than computing these sets using recursion, we use an explicit stack
-        // called `symbols_to_process`.
-        let mut symbols_to_process = Vec::new();
-        let mut processed_non_terminals = HashSet::new();
-        for i in 0..syntax_grammar.variables.len() {
-            let symbol = Symbol::non_terminal(i);
-
-            let first_set = result
-                .first_sets
-                .entry(symbol)
-                .or_insert_with(TokenSet::new);
-            processed_non_terminals.clear();
-            symbols_to_process.clear();
-            symbols_to_process.push(symbol);
-            while let Some(current_symbol) = symbols_to_process.pop() {
-                if current_symbol.is_terminal() || current_symbol.is_external() {
-                    first_set.insert(current_symbol);
-                } else if processed_non_terminals.insert(current_symbol) {
-                    for production in &syntax_grammar.variables[current_symbol.index].productions {
-                        if let Some(step) = production.steps.first() {
-                            symbols_to_process.push(step.symbol);
-                        }
-                    }
-                }
-            }
-
-            // The LAST set is defined in a similar way to the FIRST set.
-            let last_set = result.last_sets.entry(symbol).or_insert_with(TokenSet::new);
-            processed_non_terminals.clear();
-            symbols_to_process.clear();
-            symbols_to_process.push(symbol);
-            while let Some(current_symbol) = symbols_to_process.pop() {
-                if current_symbol.is_terminal() || current_symbol.is_external() {
-                    last_set.insert(current_symbol);
-                } else if processed_non_terminals.insert(current_symbol) {
-                    for production in &syntax_grammar.variables[current_symbol.index].productions {
-                        if let Some(step) = production.steps.last() {
-                            symbols_to_process.push(step.symbol);
-                        }
-                    }
-                }
-            }
-        }
-
-        // To compute an item set's transitive closure, we find each item in the set
-        // whose next symbol is a non-terminal, and we add new items to the set for
-        // each of that symbols' productions. These productions might themselves begin
-        // with non-terminals, so the process continues recursively. In this process,
-        // the total set of entries that get added depends only on two things:
-        //   * the set of non-terminal symbols that occur at each item's current position
-        //   * the set of terminals that occurs after each of these non-terminal symbols
-        //
-        // So we can avoid a lot of duplicated recursive work by precomputing, for each
-        // non-terminal symbol `i`, a final list of *additions* that must be made to an
-        // item set when `i` occurs as the next symbol in one if its core items. The
-        // structure of an *addition* is as follows:
-        //   * `item` - the new item that must be added as part of the expansion of `i`
-        //   * `lookaheads` - lookahead tokens that can always come after that item in the expansion
-        //     of `i`
-        //   * `propagates_lookaheads` - a boolean indicating whether or not `item` can occur at the
-        //     *end* of the expansion of `i`, so that i's own current lookahead tokens can occur
-        //     after `item`.
-        //
-        // Again, rather than computing these additions recursively, we use an explicit
-        // stack called `entries_to_process`.
-        for i in 0..syntax_grammar.variables.len() {
-            let empty_lookaheads = TokenSet::new();
-            let mut entries_to_process = vec![(i, &empty_lookaheads, true)];
-
-            // First, build up a map whose keys are all of the non-terminals that can
-            // appear at the beginning of non-terminal `i`, and whose values store
-            // information about the tokens that can follow each non-terminal.
-            let mut follow_set_info_by_non_terminal = HashMap::new();
-            while let Some(entry) = entries_to_process.pop() {
-                let (variable_index, lookaheads, propagates_lookaheads) = entry;
-                let existing_info = follow_set_info_by_non_terminal
-                    .entry(variable_index)
-                    .or_insert_with(|| FollowSetInfo {
-                        lookaheads: TokenSet::new(),
-                        propagates_lookaheads: false,
-                    });
-
-                let did_add_follow_set_info;
-                if propagates_lookaheads {
-                    did_add_follow_set_info = !existing_info.propagates_lookaheads;
-                    existing_info.propagates_lookaheads = true;
-                } else {
-                    did_add_follow_set_info = existing_info.lookaheads.insert_all(lookaheads);
-                }
-
-                if did_add_follow_set_info {
-                    for production in &syntax_grammar.variables[variable_index].productions {
-                        if let Some(symbol) = production.first_symbol() {
-                            if symbol.is_non_terminal() {
-                                if production.steps.len() == 1 {
-                                    entries_to_process.push((
-                                        symbol.index,
-                                        lookaheads,
-                                        propagates_lookaheads,
-                                    ));
-                                } else {
-                                    entries_to_process.push((
-                                        symbol.index,
-                                        &result.first_sets[&production.steps[1].symbol],
-                                        false,
-                                    ));
-                                }
-                            }
-                        }
-                    }
-                }
-            }
-
-            // Store all of those non-terminals' productions, along with their associated
-            // lookahead info, as *additions* associated with non-terminal `i`.
-            let additions_for_non_terminal = &mut result.transitive_closure_additions[i];
-            for (variable_index, follow_set_info) in follow_set_info_by_non_terminal {
-                let variable = &syntax_grammar.variables[variable_index];
-                let non_terminal = Symbol::non_terminal(variable_index);
-                let variable_index = variable_index as u32;
-                if syntax_grammar.variables_to_inline.contains(&non_terminal) {
-                    continue;
-                }
-                for production in &variable.productions {
-                    let item = ParseItem {
-                        variable_index,
-                        production,
-                        step_index: 0,
-                        has_preceding_inherited_fields: false,
-                    };
-
-                    if let Some(inlined_productions) =
-                        inlines.inlined_productions(item.production, item.step_index)
-                    {
-                        for production in inlined_productions {
-                            find_or_push(
-                                additions_for_non_terminal,
-                                TransitiveClosureAddition {
-                                    item: item.substitute_production(production),
-                                    info: follow_set_info.clone(),
-                                },
-                            );
-                        }
-                    } else {
-                        find_or_push(
-                            additions_for_non_terminal,
-                            TransitiveClosureAddition {
-                                item,
-                                info: follow_set_info.clone(),
-                            },
-                        );
-                    }
-                }
-            }
-        }
-
-        result
-    }
-
-    pub fn transitive_closure(&self, item_set: &ParseItemSet<'a>) -> ParseItemSet<'a> {
-        let mut result = ParseItemSet::default();
-        for (item, lookaheads) in &item_set.entries {
-            if let Some(productions) = self
-                .inlines
-                .inlined_productions(item.production, item.step_index)
-            {
-                for production in productions {
-                    self.add_item(
-                        &mut result,
-                        item.substitute_production(production),
-                        lookaheads,
-                    );
-                }
-            } else {
-                self.add_item(&mut result, *item, lookaheads);
-            }
-        }
-        result
-    }
-
-    pub fn first_set(&self, symbol: &Symbol) -> &TokenSet {
-        &self.first_sets[symbol]
-    }
-
-    pub fn last_set(&self, symbol: &Symbol) -> &TokenSet {
-        &self.last_sets[symbol]
-    }
-
-    fn add_item(&self, set: &mut ParseItemSet<'a>, item: ParseItem<'a>, lookaheads: &TokenSet) {
-        if let Some(step) = item.step() {
-            if step.symbol.is_non_terminal() {
-                let next_step = item.successor().step();
-
-                // Determine which tokens can follow this non-terminal.
-                let following_tokens = next_step.map_or(lookaheads, |next_step| {
-                    self.first_sets.get(&next_step.symbol).unwrap()
-                });
-
-                // Use the pre-computed *additions* to expand the non-terminal.
-                for addition in &self.transitive_closure_additions[step.symbol.index] {
-                    let lookaheads = set.insert(addition.item, &addition.info.lookaheads);
-                    if addition.info.propagates_lookaheads {
-                        lookaheads.insert_all(following_tokens);
-                    }
-                }
-            }
-        }
-        set.insert(item, lookaheads);
-    }
-}
-
-impl<'a> fmt::Debug for ParseItemSetBuilder<'a> {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        writeln!(f, "ParseItemSetBuilder {{")?;
-
-        writeln!(f, "  first_sets: {{")?;
-        for (symbol, first_set) in &self.first_sets {
-            let name = match symbol.kind {
-                SymbolType::NonTerminal => &self.syntax_grammar.variables[symbol.index].name,
-                SymbolType::External => &self.syntax_grammar.external_tokens[symbol.index].name,
-                SymbolType::Terminal => &self.lexical_grammar.variables[symbol.index].name,
-                SymbolType::End | SymbolType::EndOfNonTerminalExtra => "END",
-            };
-            writeln!(
-                f,
-                "    first({name:?}): {}",
-                TokenSetDisplay(first_set, self.syntax_grammar, self.lexical_grammar)
-            )?;
-        }
-        writeln!(f, "  }}")?;
-
-        writeln!(f, "  last_sets: {{")?;
-        for (symbol, last_set) in &self.last_sets {
-            let name = match symbol.kind {
-                SymbolType::NonTerminal => &self.syntax_grammar.variables[symbol.index].name,
-                SymbolType::External => &self.syntax_grammar.external_tokens[symbol.index].name,
-                SymbolType::Terminal => &self.lexical_grammar.variables[symbol.index].name,
-                SymbolType::End | SymbolType::EndOfNonTerminalExtra => "END",
-            };
-            writeln!(
-                f,
-                "    last({name:?}): {}",
-                TokenSetDisplay(last_set, self.syntax_grammar, self.lexical_grammar)
-            )?;
-        }
-        writeln!(f, "  }}")?;
-
-        writeln!(f, "  additions: {{")?;
-        for (i, variable) in self.syntax_grammar.variables.iter().enumerate() {
-            writeln!(f, "    {}: {{", variable.name)?;
-            for addition in &self.transitive_closure_additions[i] {
-                writeln!(
-                    f,
-                    "      {}",
-                    ParseItemDisplay(&addition.item, self.syntax_grammar, self.lexical_grammar)
-                )?;
-            }
-            writeln!(f, "    }},")?;
-        }
-        write!(f, "  }},")?;
-
-        write!(f, "}}")?;
-        Ok(())
-    }
-}
--- a/cli/src/generate/build_tables/minimize_parse_table.rs
+++ b/cli/src/generate/build_tables/minimize_parse_table.rs
@ -1,493 +0,0 @@
-use std::{
-    collections::{HashMap, HashSet},
-    mem,
-};
-
-use log::info;
-
-use super::token_conflicts::TokenConflictMap;
-use crate::generate::{
-    dedup::split_state_id_groups,
-    grammars::{LexicalGrammar, SyntaxGrammar, VariableType},
-    rules::{AliasMap, Symbol, TokenSet},
-    tables::{GotoAction, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry},
-};
-
-pub fn minimize_parse_table(
-    parse_table: &mut ParseTable,
-    syntax_grammar: &SyntaxGrammar,
-    lexical_grammar: &LexicalGrammar,
-    simple_aliases: &AliasMap,
-    token_conflict_map: &TokenConflictMap,
-    keywords: &TokenSet,
-) {
-    let mut minimizer = Minimizer {
-        parse_table,
-        syntax_grammar,
-        lexical_grammar,
-        token_conflict_map,
-        keywords,
-        simple_aliases,
-    };
-    minimizer.merge_compatible_states();
-    minimizer.remove_unit_reductions();
-    minimizer.remove_unused_states();
-    minimizer.reorder_states_by_descending_size();
-}
-
-struct Minimizer<'a> {
-    parse_table: &'a mut ParseTable,
-    syntax_grammar: &'a SyntaxGrammar,
-    lexical_grammar: &'a LexicalGrammar,
-    token_conflict_map: &'a TokenConflictMap<'a>,
-    keywords: &'a TokenSet,
-    simple_aliases: &'a AliasMap,
-}
-
-impl<'a> Minimizer<'a> {
-    fn remove_unit_reductions(&mut self) {
-        let mut aliased_symbols = HashSet::new();
-        for variable in &self.syntax_grammar.variables {
-            for production in &variable.productions {
-                for step in &production.steps {
-                    if step.alias.is_some() {
-                        aliased_symbols.insert(step.symbol);
-                    }
-                }
-            }
-        }
-
-        let mut unit_reduction_symbols_by_state = HashMap::new();
-        for (i, state) in self.parse_table.states.iter().enumerate() {
-            let mut only_unit_reductions = true;
-            let mut unit_reduction_symbol = None;
-            for (_, entry) in &state.terminal_entries {
-                for action in &entry.actions {
-                    match action {
-                        ParseAction::ShiftExtra => continue,
-                        ParseAction::Reduce {
-                            child_count: 1,
-                            production_id: 0,
-                            symbol,
-                            ..
-                        } => {
-                            if !self.simple_aliases.contains_key(symbol)
-                                && !self.syntax_grammar.supertype_symbols.contains(symbol)
-                                && !aliased_symbols.contains(symbol)
-                                && self.syntax_grammar.variables[symbol.index].kind
-                                    != VariableType::Named
-                                && (unit_reduction_symbol.is_none()
-                                    || unit_reduction_symbol == Some(symbol))
-                            {
-                                unit_reduction_symbol = Some(symbol);
-                                continue;
-                            }
-                        }
-                        _ => {}
-                    }
-                    only_unit_reductions = false;
-                    break;
-                }
-
-                if !only_unit_reductions {
-                    break;
-                }
-            }
-
-            if let Some(symbol) = unit_reduction_symbol {
-                if only_unit_reductions {
-                    unit_reduction_symbols_by_state.insert(i, *symbol);
-                }
-            }
-        }
-
-        for state in &mut self.parse_table.states {
-            let mut done = false;
-            while !done {
-                done = true;
-                state.update_referenced_states(|other_state_id, state| {
-                    unit_reduction_symbols_by_state.get(&other_state_id).map_or(
-                        other_state_id,
-                        |symbol| {
-                            done = false;
-                            match state.nonterminal_entries.get(symbol) {
-                                Some(GotoAction::Goto(state_id)) => *state_id,
-                                _ => other_state_id,
-                            }
-                        },
-                    )
-                });
-            }
-        }
-    }
-
-    fn merge_compatible_states(&mut self) {
-        let core_count = 1 + self
-            .parse_table
-            .states
-            .iter()
-            .map(|state| state.core_id)
-            .max()
-            .unwrap();
-
-        // Initially group the states by their parse item set core.
-        let mut group_ids_by_state_id = Vec::with_capacity(self.parse_table.states.len());
-        let mut state_ids_by_group_id = vec![Vec::<ParseStateId>::new(); core_count];
-        for (i, state) in self.parse_table.states.iter().enumerate() {
-            state_ids_by_group_id[state.core_id].push(i);
-            group_ids_by_state_id.push(state.core_id);
-        }
-
-        split_state_id_groups(
-            &self.parse_table.states,
-            &mut state_ids_by_group_id,
-            &mut group_ids_by_state_id,
-            0,
-            |left, right, groups| self.states_conflict(left, right, groups),
-        );
-
-        while split_state_id_groups(
-            &self.parse_table.states,
-            &mut state_ids_by_group_id,
-            &mut group_ids_by_state_id,
-            0,
-            |left, right, groups| self.state_successors_differ(left, right, groups),
-        ) {
-            continue;
-        }
-
-        let error_group_index = state_ids_by_group_id
-            .iter()
-            .position(|g| g.contains(&0))
-            .unwrap();
-        let start_group_index = state_ids_by_group_id
-            .iter()
-            .position(|g| g.contains(&1))
-            .unwrap();
-        state_ids_by_group_id.swap(error_group_index, 0);
-        state_ids_by_group_id.swap(start_group_index, 1);
-
-        // Create a list of new parse states: one state for each group of old states.
-        let mut new_states = Vec::with_capacity(state_ids_by_group_id.len());
-        for state_ids in &state_ids_by_group_id {
-            // Initialize the new state based on the first old state in the group.
-            let mut parse_state = ParseState::default();
-            mem::swap(&mut parse_state, &mut self.parse_table.states[state_ids[0]]);
-
-            // Extend the new state with all of the actions from the other old states
-            // in the group.
-            for state_id in &state_ids[1..] {
-                let mut other_parse_state = ParseState::default();
-                mem::swap(
-                    &mut other_parse_state,
-                    &mut self.parse_table.states[*state_id],
-                );
-
-                parse_state
-                    .terminal_entries
-                    .extend(other_parse_state.terminal_entries);
-                parse_state
-                    .nonterminal_entries
-                    .extend(other_parse_state.nonterminal_entries);
-            }
-
-            // Update the new state's outgoing references using the new grouping.
-            parse_state.update_referenced_states(|state_id, _| group_ids_by_state_id[state_id]);
-            new_states.push(parse_state);
-        }
-
-        self.parse_table.states = new_states;
-    }
-
-    fn states_conflict(
-        &self,
-        left_state: &ParseState,
-        right_state: &ParseState,
-        group_ids_by_state_id: &[ParseStateId],
-    ) -> bool {
-        for (token, left_entry) in &left_state.terminal_entries {
-            if let Some(right_entry) = right_state.terminal_entries.get(token) {
-                if self.entries_conflict(
-                    left_state.id,
-                    right_state.id,
-                    token,
-                    left_entry,
-                    right_entry,
-                    group_ids_by_state_id,
-                ) {
-                    return true;
-                }
-            } else if self.token_conflicts(
-                left_state.id,
-                right_state.id,
-                right_state.terminal_entries.keys(),
-                *token,
-            ) {
-                return true;
-            }
-        }
-
-        for token in right_state.terminal_entries.keys() {
-            if !left_state.terminal_entries.contains_key(token)
-                && self.token_conflicts(
-                    left_state.id,
-                    right_state.id,
-                    left_state.terminal_entries.keys(),
-                    *token,
-                )
-            {
-                return true;
-            }
-        }
-
-        false
-    }
-
-    fn state_successors_differ(
-        &self,
-        state1: &ParseState,
-        state2: &ParseState,
-        group_ids_by_state_id: &[ParseStateId],
-    ) -> bool {
-        for (token, entry1) in &state1.terminal_entries {
-            if let ParseAction::Shift { state: s1, .. } = entry1.actions.last().unwrap() {
-                if let Some(entry2) = state2.terminal_entries.get(token) {
-                    if let ParseAction::Shift { state: s2, .. } = entry2.actions.last().unwrap() {
-                        let group1 = group_ids_by_state_id[*s1];
-                        let group2 = group_ids_by_state_id[*s2];
-                        if group1 != group2 {
-                            info!(
-                                "split states {} {} - successors for {} are split: {s1} {s2}",
-                                state1.id,
-                                state2.id,
-                                self.symbol_name(token),
-                            );
-                            return true;
-                        }
-                    }
-                }
-            }
-        }
-
-        for (symbol, s1) in &state1.nonterminal_entries {
-            if let Some(s2) = state2.nonterminal_entries.get(symbol) {
-                match (s1, s2) {
-                    (GotoAction::ShiftExtra, GotoAction::ShiftExtra) => continue,
-                    (GotoAction::Goto(s1), GotoAction::Goto(s2)) => {
-                        let group1 = group_ids_by_state_id[*s1];
-                        let group2 = group_ids_by_state_id[*s2];
-                        if group1 != group2 {
-                            info!(
-                                "split states {} {} - successors for {} are split: {s1} {s2}",
-                                state1.id,
-                                state2.id,
-                                self.symbol_name(symbol),
-                            );
-                            return true;
-                        }
-                    }
-                    _ => return true,
-                }
-            }
-        }
-
-        false
-    }
-
-    fn entries_conflict(
-        &self,
-        state_id1: ParseStateId,
-        state_id2: ParseStateId,
-        token: &Symbol,
-        entry1: &ParseTableEntry,
-        entry2: &ParseTableEntry,
-        group_ids_by_state_id: &[ParseStateId],
-    ) -> bool {
-        // To be compatible, entries need to have the same actions.
-        let actions1 = &entry1.actions;
-        let actions2 = &entry2.actions;
-        if actions1.len() != actions2.len() {
-            info!(
-                "split states {state_id1} {state_id2} - differing action counts for token {}",
-                self.symbol_name(token)
-            );
-            return true;
-        }
-
-        for (i, action1) in actions1.iter().enumerate() {
-            let action2 = &actions2[i];
-
-            // Two shift actions are equivalent if their destinations are in the same group.
-            if let (
-                ParseAction::Shift {
-                    state: s1,
-                    is_repetition: is_repetition1,
-                },
-                ParseAction::Shift {
-                    state: s2,
-                    is_repetition: is_repetition2,
-                },
-            ) = (action1, action2)
-            {
-                let group1 = group_ids_by_state_id[*s1];
-                let group2 = group_ids_by_state_id[*s2];
-                if group1 == group2 && is_repetition1 == is_repetition2 {
-                    continue;
-                }
-                info!(
-                    "split states {state_id1} {state_id2} - successors for {} are split: {s1} {s2}",
-                    self.symbol_name(token),
-                );
-                return true;
-            } else if action1 != action2 {
-                info!(
-                    "split states {state_id1} {state_id2} - unequal actions for {}",
-                    self.symbol_name(token),
-                );
-                return true;
-            }
-        }
-
-        false
-    }
-
-    fn token_conflicts<'b>(
-        &self,
-        left_id: ParseStateId,
-        right_id: ParseStateId,
-        existing_tokens: impl Iterator<Item = &'b Symbol>,
-        new_token: Symbol,
-    ) -> bool {
-        if new_token == Symbol::end_of_nonterminal_extra() {
-            info!("split states {left_id} {right_id} - end of non-terminal extra",);
-            return true;
-        }
-
-        // Do not add external tokens; they could conflict lexically with any of the state's
-        // existing lookahead tokens.
-        if new_token.is_external() {
-            info!(
-                "split states {left_id} {right_id} - external token {}",
-                self.symbol_name(&new_token),
-            );
-            return true;
-        }
-
-        // Do not add tokens which are both internal and external. Their validity could
-        // influence the behavior of the external scanner.
-        if self
-            .syntax_grammar
-            .external_tokens
-            .iter()
-            .any(|external| external.corresponding_internal_token == Some(new_token))
-        {
-            info!(
-                "split states {left_id} {right_id} - internal/external token {}",
-                self.symbol_name(&new_token),
-            );
-            return true;
-        }
-
-        // Do not add a token if it conflicts with an existing token.
-        for token in existing_tokens {
-            if token.is_terminal()
-                && !(self.syntax_grammar.word_token == Some(*token)
-                    && self.keywords.contains(&new_token))
-                && !(self.syntax_grammar.word_token == Some(new_token)
-                    && self.keywords.contains(token))
-                && (self
-                    .token_conflict_map
-                    .does_conflict(new_token.index, token.index)
-                    || self
-                        .token_conflict_map
-                        .does_match_same_string(new_token.index, token.index))
-            {
-                info!(
-                    "split states {left_id} {right_id} - token {} conflicts with {}",
-                    self.symbol_name(&new_token),
-                    self.symbol_name(token),
-                );
-                return true;
-            }
-        }
-
-        false
-    }
-
-    fn symbol_name(&self, symbol: &Symbol) -> &String {
-        if symbol.is_non_terminal() {
-            &self.syntax_grammar.variables[symbol.index].name
-        } else if symbol.is_external() {
-            &self.syntax_grammar.external_tokens[symbol.index].name
-        } else {
-            &self.lexical_grammar.variables[symbol.index].name
-        }
-    }
-
-    fn remove_unused_states(&mut self) {
-        let mut state_usage_map = vec![false; self.parse_table.states.len()];
-
-        state_usage_map[0] = true;
-        state_usage_map[1] = true;
-
-        for state in &self.parse_table.states {
-            for referenced_state in state.referenced_states() {
-                state_usage_map[referenced_state] = true;
-            }
-        }
-        let mut removed_predecessor_count = 0;
-        let mut state_replacement_map = vec![0; self.parse_table.states.len()];
-        for state_id in 0..self.parse_table.states.len() {
-            state_replacement_map[state_id] = state_id - removed_predecessor_count;
-            if !state_usage_map[state_id] {
-                removed_predecessor_count += 1;
-            }
-        }
-        let mut state_id = 0;
-        let mut original_state_id = 0;
-        while state_id < self.parse_table.states.len() {
-            if state_usage_map[original_state_id] {
-                self.parse_table.states[state_id].update_referenced_states(|other_state_id, _| {
-                    state_replacement_map[other_state_id]
-                });
-                state_id += 1;
-            } else {
-                self.parse_table.states.remove(state_id);
-            }
-            original_state_id += 1;
-        }
-    }
-
-    fn reorder_states_by_descending_size(&mut self) {
-        // Get a mapping of old state index -> new_state_index
-        let mut old_ids_by_new_id = (0..self.parse_table.states.len()).collect::<Vec<_>>();
-        old_ids_by_new_id.sort_unstable_by_key(|i| {
-            // Don't changes states 0 (the error state) or 1 (the start state).
-            if *i <= 1 {
-                return *i as i64 - 1_000_000;
-            }
-
-            // Reorder all the other states by descending symbol count.
-            let state = &self.parse_table.states[*i];
-            -((state.terminal_entries.len() + state.nonterminal_entries.len()) as i64)
-        });
-
-        // Get the inverse mapping
-        let mut new_ids_by_old_id = vec![0; old_ids_by_new_id.len()];
-        for (id, old_id) in old_ids_by_new_id.iter().enumerate() {
-            new_ids_by_old_id[*old_id] = id;
-        }
-
-        // Reorder the parse states and update their references to reflect
-        // the new ordering.
-        self.parse_table.states = old_ids_by_new_id
-            .iter()
-            .map(|old_id| {
-                let mut state = ParseState::default();
-                mem::swap(&mut state, &mut self.parse_table.states[*old_id]);
-                state.update_referenced_states(|id, _| new_ids_by_old_id[id]);
-                state
-            })
-            .collect();
-    }
-}
--- a/cli/src/generate/build_tables/mod.rs
+++ b/cli/src/generate/build_tables/mod.rs
@ -1,494 +0,0 @@
-mod build_lex_table;
-mod build_parse_table;
-mod coincident_tokens;
-mod item;
-mod item_set_builder;
-mod minimize_parse_table;
-mod token_conflicts;
-
-use std::collections::{BTreeSet, HashMap};
-
-use anyhow::Result;
-pub use build_lex_table::LARGE_CHARACTER_RANGE_COUNT;
-use log::info;
-
-use self::{
-    build_lex_table::build_lex_table,
-    build_parse_table::{build_parse_table, ParseStateInfo},
-    coincident_tokens::CoincidentTokenIndex,
-    minimize_parse_table::minimize_parse_table,
-    token_conflicts::TokenConflictMap,
-};
-use crate::generate::{
-    grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar},
-    nfa::{CharacterSet, NfaCursor},
-    node_types::VariableInfo,
-    rules::{AliasMap, Symbol, SymbolType, TokenSet},
-    tables::{LexTable, ParseAction, ParseTable, ParseTableEntry},
-};
-
-pub struct Tables {
-    pub parse_table: ParseTable,
-    pub main_lex_table: LexTable,
-    pub keyword_lex_table: LexTable,
-    pub word_token: Option<Symbol>,
-    pub large_character_sets: Vec<(Option<Symbol>, CharacterSet)>,
-}
-
-pub fn build_tables(
-    syntax_grammar: &SyntaxGrammar,
-    lexical_grammar: &LexicalGrammar,
-    simple_aliases: &AliasMap,
-    variable_info: &[VariableInfo],
-    inlines: &InlinedProductionMap,
-    report_symbol_name: Option<&str>,
-) -> Result<Tables> {
-    let (mut parse_table, following_tokens, parse_state_info) =
-        build_parse_table(syntax_grammar, lexical_grammar, inlines, variable_info)?;
-    let token_conflict_map = TokenConflictMap::new(lexical_grammar, following_tokens);
-    let coincident_token_index = CoincidentTokenIndex::new(&parse_table, lexical_grammar);
-    let keywords = identify_keywords(
-        lexical_grammar,
-        &parse_table,
-        syntax_grammar.word_token,
-        &token_conflict_map,
-        &coincident_token_index,
-    );
-    populate_error_state(
-        &mut parse_table,
-        syntax_grammar,
-        lexical_grammar,
-        &coincident_token_index,
-        &token_conflict_map,
-        &keywords,
-    );
-    populate_used_symbols(&mut parse_table, syntax_grammar, lexical_grammar);
-    minimize_parse_table(
-        &mut parse_table,
-        syntax_grammar,
-        lexical_grammar,
-        simple_aliases,
-        &token_conflict_map,
-        &keywords,
-    );
-    let lex_tables = build_lex_table(
-        &mut parse_table,
-        syntax_grammar,
-        lexical_grammar,
-        &keywords,
-        &coincident_token_index,
-        &token_conflict_map,
-    );
-    populate_external_lex_states(&mut parse_table, syntax_grammar);
-    mark_fragile_tokens(&mut parse_table, lexical_grammar, &token_conflict_map);
-
-    if let Some(report_symbol_name) = report_symbol_name {
-        report_state_info(
-            syntax_grammar,
-            lexical_grammar,
-            &parse_table,
-            &parse_state_info,
-            report_symbol_name,
-        );
-    }
-
-    Ok(Tables {
-        parse_table,
-        main_lex_table: lex_tables.main_lex_table,
-        keyword_lex_table: lex_tables.keyword_lex_table,
-        large_character_sets: lex_tables.large_character_sets,
-        word_token: syntax_grammar.word_token,
-    })
-}
-
-fn populate_error_state(
-    parse_table: &mut ParseTable,
-    syntax_grammar: &SyntaxGrammar,
-    lexical_grammar: &LexicalGrammar,
-    coincident_token_index: &CoincidentTokenIndex,
-    token_conflict_map: &TokenConflictMap,
-    keywords: &TokenSet,
-) {
-    let state = &mut parse_table.states[0];
-    let n = lexical_grammar.variables.len();
-
-    // First identify the *conflict-free tokens*: tokens that do not overlap with
-    // any other token in any way, besides matching exactly the same string.
-    let conflict_free_tokens = (0..n)
-        .filter_map(|i| {
-            let conflicts_with_other_tokens = (0..n).any(|j| {
-                j != i
-                    && !coincident_token_index.contains(Symbol::terminal(i), Symbol::terminal(j))
-                    && token_conflict_map.does_match_shorter_or_longer(i, j)
-            });
-            if conflicts_with_other_tokens {
-                None
-            } else {
-                info!(
-                    "error recovery - token {} has no conflicts",
-                    lexical_grammar.variables[i].name
-                );
-                Some(Symbol::terminal(i))
-            }
-        })
-        .collect::<TokenSet>();
-
-    let recover_entry = ParseTableEntry {
-        reusable: false,
-        actions: vec![ParseAction::Recover],
-    };
-
-    // Exclude from the error-recovery state any token that conflicts with one of
-    // the *conflict-free tokens* identified above.
-    for i in 0..n {
-        let symbol = Symbol::terminal(i);
-        if !conflict_free_tokens.contains(&symbol)
-            && !keywords.contains(&symbol)
-            && syntax_grammar.word_token != Some(symbol)
-        {
-            if let Some(t) = conflict_free_tokens.iter().find(|t| {
-                !coincident_token_index.contains(symbol, *t)
-                    && token_conflict_map.does_conflict(symbol.index, t.index)
-            }) {
-                info!(
-                    "error recovery - exclude token {} because of conflict with {}",
-                    lexical_grammar.variables[i].name, lexical_grammar.variables[t.index].name
-                );
-                continue;
-            }
-        }
-        info!(
-            "error recovery - include token {}",
-            lexical_grammar.variables[i].name
-        );
-        state
-            .terminal_entries
-            .entry(symbol)
-            .or_insert_with(|| recover_entry.clone());
-    }
-
-    for (i, external_token) in syntax_grammar.external_tokens.iter().enumerate() {
-        if external_token.corresponding_internal_token.is_none() {
-            state
-                .terminal_entries
-                .entry(Symbol::external(i))
-                .or_insert_with(|| recover_entry.clone());
-        }
-    }
-
-    state.terminal_entries.insert(Symbol::end(), recover_entry);
-}
-
-fn populate_used_symbols(
-    parse_table: &mut ParseTable,
-    syntax_grammar: &SyntaxGrammar,
-    lexical_grammar: &LexicalGrammar,
-) {
-    let mut terminal_usages = vec![false; lexical_grammar.variables.len()];
-    let mut non_terminal_usages = vec![false; syntax_grammar.variables.len()];
-    let mut external_usages = vec![false; syntax_grammar.external_tokens.len()];
-    for state in &parse_table.states {
-        for symbol in state.terminal_entries.keys() {
-            match symbol.kind {
-                SymbolType::Terminal => terminal_usages[symbol.index] = true,
-                SymbolType::External => external_usages[symbol.index] = true,
-                _ => {}
-            }
-        }
-        for symbol in state.nonterminal_entries.keys() {
-            non_terminal_usages[symbol.index] = true;
-        }
-    }
-    parse_table.symbols.push(Symbol::end());
-    for (i, value) in terminal_usages.into_iter().enumerate() {
-        if value {
-            // Assign the grammar's word token a low numerical index. This ensures that
-            // it can be stored in a subtree with no heap allocations, even for grammars with
-            // very large numbers of tokens. This is an optimization, but it's also important to
-            // ensure that a subtree's symbol can be successfully reassigned to the word token
-            // without having to move the subtree to the heap.
-            // See https://github.com/tree-sitter/tree-sitter/issues/258
-            if syntax_grammar.word_token.map_or(false, |t| t.index == i) {
-                parse_table.symbols.insert(1, Symbol::terminal(i));
-            } else {
-                parse_table.symbols.push(Symbol::terminal(i));
-            }
-        }
-    }
-    for (i, value) in external_usages.into_iter().enumerate() {
-        if value {
-            parse_table.symbols.push(Symbol::external(i));
-        }
-    }
-    for (i, value) in non_terminal_usages.into_iter().enumerate() {
-        if value {
-            parse_table.symbols.push(Symbol::non_terminal(i));
-        }
-    }
-}
-
-fn populate_external_lex_states(parse_table: &mut ParseTable, syntax_grammar: &SyntaxGrammar) {
-    let mut external_tokens_by_corresponding_internal_token = HashMap::new();
-    for (i, external_token) in syntax_grammar.external_tokens.iter().enumerate() {
-        if let Some(symbol) = external_token.corresponding_internal_token {
-            external_tokens_by_corresponding_internal_token.insert(symbol.index, i);
-        }
-    }
-
-    // Ensure that external lex state 0 represents the absence of any
-    // external tokens.
-    parse_table.external_lex_states.push(TokenSet::new());
-
-    for i in 0..parse_table.states.len() {
-        let mut external_tokens = TokenSet::new();
-        for token in parse_table.states[i].terminal_entries.keys() {
-            if token.is_external() {
-                external_tokens.insert(*token);
-            } else if token.is_terminal() {
-                if let Some(index) =
-                    external_tokens_by_corresponding_internal_token.get(&token.index)
-                {
-                    external_tokens.insert(Symbol::external(*index));
-                }
-            }
-        }
-
-        parse_table.states[i].external_lex_state_id = parse_table
-            .external_lex_states
-            .iter()
-            .position(|tokens| *tokens == external_tokens)
-            .unwrap_or_else(|| {
-                parse_table.external_lex_states.push(external_tokens);
-                parse_table.external_lex_states.len() - 1
-            });
-    }
-}
-
-fn identify_keywords(
-    lexical_grammar: &LexicalGrammar,
-    parse_table: &ParseTable,
-    word_token: Option<Symbol>,
-    token_conflict_map: &TokenConflictMap,
-    coincident_token_index: &CoincidentTokenIndex,
-) -> TokenSet {
-    if word_token.is_none() {
-        return TokenSet::new();
-    }
-
-    let word_token = word_token.unwrap();
-    let mut cursor = NfaCursor::new(&lexical_grammar.nfa, Vec::new());
-
-    // First find all of the candidate keyword tokens: tokens that start with
-    // letters or underscore and can match the same string as a word token.
-    let keyword_candidates = lexical_grammar
-        .variables
-        .iter()
-        .enumerate()
-        .filter_map(|(i, variable)| {
-            cursor.reset(vec![variable.start_state]);
-            if all_chars_are_alphabetical(&cursor)
-                && token_conflict_map.does_match_same_string(i, word_token.index)
-                && !token_conflict_map.does_match_different_string(i, word_token.index)
-            {
-                info!(
-                    "Keywords - add candidate {}",
-                    lexical_grammar.variables[i].name
-                );
-                Some(Symbol::terminal(i))
-            } else {
-                None
-            }
-        })
-        .collect::<TokenSet>();
-
-    // Exclude keyword candidates that shadow another keyword candidate.
-    let keywords = keyword_candidates
-        .iter()
-        .filter(|token| {
-            for other_token in keyword_candidates.iter() {
-                if other_token != *token
-                    && token_conflict_map.does_match_same_string(other_token.index, token.index)
-                {
-                    info!(
-                        "Keywords - exclude {} because it matches the same string as {}",
-                        lexical_grammar.variables[token.index].name,
-                        lexical_grammar.variables[other_token.index].name
-                    );
-                    return false;
-                }
-            }
-            true
-        })
-        .collect::<TokenSet>();
-
-    // Exclude keyword candidates for which substituting the keyword capture
-    // token would introduce new lexical conflicts with other tokens.
-    let keywords = keywords
-        .iter()
-        .filter(|token| {
-            for other_index in 0..lexical_grammar.variables.len() {
-                if keyword_candidates.contains(&Symbol::terminal(other_index)) {
-                    continue;
-                }
-
-                // If the word token was already valid in every state containing
-                // this keyword candidate, then substituting the word token won't
-                // introduce any new lexical conflicts.
-                if coincident_token_index
-                    .states_with(*token, Symbol::terminal(other_index))
-                    .iter()
-                    .all(|state_id| {
-                        parse_table.states[*state_id]
-                            .terminal_entries
-                            .contains_key(&word_token)
-                    })
-                {
-                    continue;
-                }
-
-                if !token_conflict_map.has_same_conflict_status(
-                    token.index,
-                    word_token.index,
-                    other_index,
-                ) {
-                    info!(
-                        "Keywords - exclude {} because of conflict with {}",
-                        lexical_grammar.variables[token.index].name,
-                        lexical_grammar.variables[other_index].name
-                    );
-                    return false;
-                }
-            }
-
-            info!(
-                "Keywords - include {}",
-                lexical_grammar.variables[token.index].name,
-            );
-            true
-        })
-        .collect();
-
-    keywords
-}
-
-fn mark_fragile_tokens(
-    parse_table: &mut ParseTable,
-    lexical_grammar: &LexicalGrammar,
-    token_conflict_map: &TokenConflictMap,
-) {
-    let n = lexical_grammar.variables.len();
-    let mut valid_tokens_mask = Vec::with_capacity(n);
-    for state in &mut parse_table.states {
-        valid_tokens_mask.clear();
-        valid_tokens_mask.resize(n, false);
-        for token in state.terminal_entries.keys() {
-            if token.is_terminal() {
-                valid_tokens_mask[token.index] = true;
-            }
-        }
-        for (token, entry) in &mut state.terminal_entries {
-            if token.is_terminal() {
-                for (i, is_valid) in valid_tokens_mask.iter().enumerate() {
-                    if *is_valid && token_conflict_map.does_overlap(i, token.index) {
-                        entry.reusable = false;
-                        break;
-                    }
-                }
-            }
-        }
-    }
-}
-
-fn report_state_info<'a>(
-    syntax_grammar: &SyntaxGrammar,
-    lexical_grammar: &LexicalGrammar,
-    parse_table: &ParseTable,
-    parse_state_info: &[ParseStateInfo<'a>],
-    report_symbol_name: &'a str,
-) {
-    let mut all_state_indices = BTreeSet::new();
-    let mut symbols_with_state_indices = (0..syntax_grammar.variables.len())
-        .map(|i| (Symbol::non_terminal(i), BTreeSet::new()))
-        .collect::<Vec<_>>();
-
-    for (i, state) in parse_table.states.iter().enumerate() {
-        all_state_indices.insert(i);
-        let item_set = &parse_state_info[state.id];
-        for (item, _) in &item_set.1.entries {
-            if !item.is_augmented() {
-                symbols_with_state_indices[item.variable_index as usize]
-                    .1
-                    .insert(i);
-            }
-        }
-    }
-
-    symbols_with_state_indices.sort_unstable_by_key(|(_, states)| -(states.len() as i32));
-
-    let max_symbol_name_length = syntax_grammar
-        .variables
-        .iter()
-        .map(|v| v.name.len())
-        .max()
-        .unwrap();
-    for (symbol, states) in &symbols_with_state_indices {
-        eprintln!(
-            "{:width$}\t{}",
-            syntax_grammar.variables[symbol.index].name,
-            states.len(),
-            width = max_symbol_name_length
-        );
-    }
-    eprintln!();
-
-    let state_indices = if report_symbol_name == "*" {
-        Some(&all_state_indices)
-    } else {
-        symbols_with_state_indices
-            .iter()
-            .find_map(|(symbol, state_indices)| {
-                if syntax_grammar.variables[symbol.index].name == report_symbol_name {
-                    Some(state_indices)
-                } else {
-                    None
-                }
-            })
-    };
-
-    if let Some(state_indices) = state_indices {
-        let mut state_indices = state_indices.iter().copied().collect::<Vec<_>>();
-        state_indices.sort_unstable_by_key(|i| (parse_table.states[*i].core_id, *i));
-
-        for state_index in state_indices {
-            let id = parse_table.states[state_index].id;
-            let (preceding_symbols, item_set) = &parse_state_info[id];
-            eprintln!("state index: {state_index}");
-            eprintln!("state id: {id}");
-            eprint!("symbol sequence:");
-            for symbol in preceding_symbols {
-                let name = if symbol.is_terminal() {
-                    &lexical_grammar.variables[symbol.index].name
-                } else if symbol.is_external() {
-                    &syntax_grammar.external_tokens[symbol.index].name
-                } else {
-                    &syntax_grammar.variables[symbol.index].name
-                };
-                eprint!(" {name}");
-            }
-            eprintln!(
-                "\nitems:\n{}",
-                self::item::ParseItemSetDisplay(item_set, syntax_grammar, lexical_grammar,),
-            );
-        }
-    }
-}
-
-fn all_chars_are_alphabetical(cursor: &NfaCursor) -> bool {
-    cursor.transition_chars().all(|(chars, is_sep)| {
-        if is_sep {
-            true
-        } else {
-            chars.chars().all(|c| c.is_alphabetic() || c == '_')
-        }
-    })
-}
--- a/cli/src/generate/build_tables/token_conflicts.rs
+++ b/cli/src/generate/build_tables/token_conflicts.rs
@ -1,529 +0,0 @@
-use std::{cmp::Ordering, collections::HashSet, fmt};
-
-use crate::generate::{
-    build_tables::item::TokenSetDisplay,
-    grammars::{LexicalGrammar, SyntaxGrammar},
-    nfa::{CharacterSet, NfaCursor, NfaTransition},
-    rules::TokenSet,
-};
-
-#[derive(Clone, Debug, Default, PartialEq, Eq)]
-struct TokenConflictStatus {
-    matches_prefix: bool,
-    does_match_continuation: bool,
-    does_match_valid_continuation: bool,
-    does_match_separators: bool,
-    matches_same_string: bool,
-    matches_different_string: bool,
-}
-
-pub struct TokenConflictMap<'a> {
-    n: usize,
-    status_matrix: Vec<TokenConflictStatus>,
-    following_tokens: Vec<TokenSet>,
-    starting_chars_by_index: Vec<CharacterSet>,
-    following_chars_by_index: Vec<CharacterSet>,
-    grammar: &'a LexicalGrammar,
-}
-
-impl<'a> TokenConflictMap<'a> {
-    /// Create a token conflict map based on a lexical grammar, which describes the structure
-    /// each token, and a `following_token` map, which indicates which tokens may be appear
-    /// immediately after each other token.
-    ///
-    /// This analyzes the possible kinds of overlap between each pair of tokens and stores
-    /// them in a matrix.
-    pub fn new(grammar: &'a LexicalGrammar, following_tokens: Vec<TokenSet>) -> Self {
-        let mut cursor = NfaCursor::new(&grammar.nfa, Vec::new());
-        let starting_chars = get_starting_chars(&mut cursor, grammar);
-        let following_chars = get_following_chars(&starting_chars, &following_tokens);
-
-        let n = grammar.variables.len();
-        let mut status_matrix = vec![TokenConflictStatus::default(); n * n];
-        for i in 0..grammar.variables.len() {
-            for j in 0..i {
-                let status = compute_conflict_status(&mut cursor, grammar, &following_chars, i, j);
-                status_matrix[matrix_index(n, i, j)] = status.0;
-                status_matrix[matrix_index(n, j, i)] = status.1;
-            }
-        }
-
-        TokenConflictMap {
-            n,
-            status_matrix,
-            following_tokens,
-            starting_chars_by_index: starting_chars,
-            following_chars_by_index: following_chars,
-            grammar,
-        }
-    }
-
-    /// Does token `i` match any strings that token `j` also matches, such that token `i`
-    /// is preferred over token `j`?
-    pub fn has_same_conflict_status(&self, a: usize, b: usize, other: usize) -> bool {
-        let left = &self.status_matrix[matrix_index(self.n, a, other)];
-        let right = &self.status_matrix[matrix_index(self.n, b, other)];
-        left == right
-    }
-
-    /// Does token `i` match any strings that token `j` does *not* match?
-    pub fn does_match_different_string(&self, i: usize, j: usize) -> bool {
-        self.status_matrix[matrix_index(self.n, i, j)].matches_different_string
-    }
-
-    /// Does token `i` match any strings that token `j` also matches, where
-    /// token `i` is preferred over token `j`?
-    pub fn does_match_same_string(&self, i: usize, j: usize) -> bool {
-        self.status_matrix[matrix_index(self.n, i, j)].matches_same_string
-    }
-
-    pub fn does_conflict(&self, i: usize, j: usize) -> bool {
-        let entry = &self.status_matrix[matrix_index(self.n, i, j)];
-        entry.does_match_valid_continuation
-            || entry.does_match_separators
-            || entry.matches_same_string
-    }
-
-    /// Does token `i` match any strings that are *prefixes* of strings matched by `j`?
-    pub fn does_match_prefix(&self, i: usize, j: usize) -> bool {
-        self.status_matrix[matrix_index(self.n, i, j)].matches_prefix
-    }
-
-    pub fn does_match_shorter_or_longer(&self, i: usize, j: usize) -> bool {
-        let entry = &self.status_matrix[matrix_index(self.n, i, j)];
-        let reverse_entry = &self.status_matrix[matrix_index(self.n, j, i)];
-        (entry.does_match_valid_continuation || entry.does_match_separators)
-            && !reverse_entry.does_match_separators
-    }
-
-    pub fn does_overlap(&self, i: usize, j: usize) -> bool {
-        let status = &self.status_matrix[matrix_index(self.n, i, j)];
-        status.does_match_separators
-            || status.matches_prefix
-            || status.matches_same_string
-            || status.does_match_continuation
-    }
-
-    pub fn prefer_token(grammar: &LexicalGrammar, left: (i32, usize), right: (i32, usize)) -> bool {
-        match left.0.cmp(&right.0) {
-            Ordering::Less => false,
-            Ordering::Greater => true,
-            Ordering::Equal => match grammar.variables[left.1]
-                .implicit_precedence
-                .cmp(&grammar.variables[right.1].implicit_precedence)
-            {
-                Ordering::Less => false,
-                Ordering::Greater => true,
-                Ordering::Equal => left.1 < right.1,
-            },
-        }
-    }
-
-    pub fn prefer_transition(
-        grammar: &LexicalGrammar,
-        t: &NfaTransition,
-        completed_id: usize,
-        completed_precedence: i32,
-        has_separator_transitions: bool,
-    ) -> bool {
-        if t.precedence < completed_precedence {
-            return false;
-        }
-        if t.precedence == completed_precedence {
-            if t.is_separator {
-                return false;
-            }
-            if has_separator_transitions
-                && !grammar
-                    .variable_indices_for_nfa_states(&t.states)
-                    .any(|i| i == completed_id)
-            {
-                return false;
-            }
-        }
-        true
-    }
-}
-
-impl<'a> fmt::Debug for TokenConflictMap<'a> {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        writeln!(f, "TokenConflictMap {{")?;
-
-        let syntax_grammar = SyntaxGrammar::default();
-
-        writeln!(f, "  following_tokens: {{")?;
-        for (i, following_tokens) in self.following_tokens.iter().enumerate() {
-            writeln!(
-                f,
-                "    follow({:?}): {},",
-                self.grammar.variables[i].name,
-                TokenSetDisplay(following_tokens, &syntax_grammar, self.grammar)
-            )?;
-        }
-        writeln!(f, "  }},")?;
-
-        writeln!(f, "  starting_characters: {{")?;
-        for i in 0..self.n {
-            writeln!(
-                f,
-                "    {:?}: {:?},",
-                self.grammar.variables[i].name, self.starting_chars_by_index[i]
-            )?;
-        }
-        writeln!(f, "  }},")?;
-
-        writeln!(f, "  following_characters: {{")?;
-        for i in 0..self.n {
-            writeln!(
-                f,
-                "    {:?}: {:?},",
-                self.grammar.variables[i].name, self.following_chars_by_index[i]
-            )?;
-        }
-        writeln!(f, "  }},")?;
-
-        writeln!(f, "  status_matrix: {{")?;
-        for i in 0..self.n {
-            writeln!(f, "    {:?}: {{", self.grammar.variables[i].name)?;
-            for j in 0..self.n {
-                writeln!(
-                    f,
-                    "      {:?}: {:?},",
-                    self.grammar.variables[j].name,
-                    self.status_matrix[matrix_index(self.n, i, j)]
-                )?;
-            }
-            writeln!(f, "    }},")?;
-        }
-        write!(f, "  }},")?;
-        write!(f, "}}")?;
-        Ok(())
-    }
-}
-
-const fn matrix_index(variable_count: usize, i: usize, j: usize) -> usize {
-    variable_count * i + j
-}
-
-fn get_starting_chars(cursor: &mut NfaCursor, grammar: &LexicalGrammar) -> Vec<CharacterSet> {
-    let mut result = Vec::with_capacity(grammar.variables.len());
-    for variable in &grammar.variables {
-        cursor.reset(vec![variable.start_state]);
-        let mut all_chars = CharacterSet::empty();
-        for (chars, _) in cursor.transition_chars() {
-            all_chars = all_chars.add(chars);
-        }
-        result.push(all_chars);
-    }
-    result
-}
-
-fn get_following_chars(
-    starting_chars: &[CharacterSet],
-    following_tokens: &[TokenSet],
-) -> Vec<CharacterSet> {
-    following_tokens
-        .iter()
-        .map(|following_tokens| {
-            let mut chars = CharacterSet::empty();
-            for token in following_tokens.iter() {
-                if token.is_terminal() {
-                    chars = chars.add(&starting_chars[token.index]);
-                }
-            }
-            chars
-        })
-        .collect()
-}
-
-fn compute_conflict_status(
-    cursor: &mut NfaCursor,
-    grammar: &LexicalGrammar,
-    following_chars: &[CharacterSet],
-    i: usize,
-    j: usize,
-) -> (TokenConflictStatus, TokenConflictStatus) {
-    let mut visited_state_sets = HashSet::new();
-    let mut state_set_queue = vec![vec![
-        grammar.variables[i].start_state,
-        grammar.variables[j].start_state,
-    ]];
-    let mut result = (
-        TokenConflictStatus::default(),
-        TokenConflictStatus::default(),
-    );
-
-    while let Some(state_set) = state_set_queue.pop() {
-        let mut live_variable_indices = grammar.variable_indices_for_nfa_states(&state_set);
-
-        // If only one of the two tokens could possibly match from this state, then
-        // there is no reason to analyze any of its successors. Just record the fact
-        // that the token matches a string that the other token does not match.
-        let first_live_variable_index = live_variable_indices.next().unwrap();
-        if live_variable_indices.count() == 0 {
-            if first_live_variable_index == i {
-                result.0.matches_different_string = true;
-            } else {
-                result.1.matches_different_string = true;
-            }
-            continue;
-        }
-
-        // Don't pursue states where there's no potential for conflict.
-        cursor.reset(state_set);
-        let within_separator = cursor.transition_chars().any(|(_, sep)| sep);
-
-        // Examine each possible completed token in this state.
-        let mut completion = None;
-        for (id, precedence) in cursor.completions() {
-            if within_separator {
-                if id == i {
-                    result.0.does_match_separators = true;
-                } else {
-                    result.1.does_match_separators = true;
-                }
-            }
-
-            // If the other token has already completed, then this is
-            // a same-string conflict.
-            if let Some((prev_id, prev_precedence)) = completion {
-                if id == prev_id {
-                    continue;
-                }
-
-                // Determine which of the two tokens is preferred.
-                let preferred_id;
-                if TokenConflictMap::prefer_token(
-                    grammar,
-                    (prev_precedence, prev_id),
-                    (precedence, id),
-                ) {
-                    preferred_id = prev_id;
-                } else {
-                    preferred_id = id;
-                    completion = Some((id, precedence));
-                }
-
-                if preferred_id == i {
-                    result.0.matches_same_string = true;
-                } else {
-                    result.1.matches_same_string = true;
-                }
-            } else {
-                completion = Some((id, precedence));
-            }
-        }
-
-        // Examine each possible transition from this state to detect substring conflicts.
-        for transition in cursor.transitions() {
-            let mut can_advance = true;
-
-            // If there is already a completed token in this state, then determine
-            // if the next state can also match the completed token. If so, then
-            // this is *not* a conflict.
-            if let Some((completed_id, completed_precedence)) = completion {
-                let mut advanced_id = None;
-                let mut successor_contains_completed_id = false;
-                for variable_id in grammar.variable_indices_for_nfa_states(&transition.states) {
-                    if variable_id == completed_id {
-                        successor_contains_completed_id = true;
-                        break;
-                    }
-                    advanced_id = Some(variable_id);
-                }
-
-                // Determine which action is preferred: matching the already complete
-                // token, or continuing on to try and match the other longer token.
-                if let (Some(advanced_id), false) = (advanced_id, successor_contains_completed_id) {
-                    if TokenConflictMap::prefer_transition(
-                        grammar,
-                        &transition,
-                        completed_id,
-                        completed_precedence,
-                        within_separator,
-                    ) {
-                        can_advance = true;
-                        if advanced_id == i {
-                            result.0.does_match_continuation = true;
-                            if transition.characters.does_intersect(&following_chars[j]) {
-                                result.0.does_match_valid_continuation = true;
-                            }
-                        } else {
-                            result.1.does_match_continuation = true;
-                            if transition.characters.does_intersect(&following_chars[i]) {
-                                result.1.does_match_valid_continuation = true;
-                            }
-                        }
-                    } else if completed_id == i {
-                        result.0.matches_prefix = true;
-                    } else {
-                        result.1.matches_prefix = true;
-                    }
-                }
-            }
-
-            if can_advance && visited_state_sets.insert(transition.states.clone()) {
-                state_set_queue.push(transition.states);
-            }
-        }
-    }
-    result
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::generate::{
-        grammars::{Variable, VariableType},
-        prepare_grammar::{expand_tokens, ExtractedLexicalGrammar},
-        rules::{Precedence, Rule, Symbol},
-    };
-
-    #[test]
-    fn test_starting_characters() {
-        let grammar = expand_tokens(ExtractedLexicalGrammar {
-            separators: Vec::new(),
-            variables: vec![
-                Variable {
-                    name: "token_0".to_string(),
-                    kind: VariableType::Named,
-                    rule: Rule::pattern("[a-f]1|0x\\d", ""),
-                },
-                Variable {
-                    name: "token_1".to_string(),
-                    kind: VariableType::Named,
-                    rule: Rule::pattern("d*ef", ""),
-                },
-            ],
-        })
-        .unwrap();
-
-        let token_map = TokenConflictMap::new(&grammar, Vec::new());
-
-        assert_eq!(
-            token_map.starting_chars_by_index[0],
-            CharacterSet::empty().add_range('a', 'f').add_char('0')
-        );
-        assert_eq!(
-            token_map.starting_chars_by_index[1],
-            CharacterSet::empty().add_range('d', 'e')
-        );
-    }
-
-    #[test]
-    fn test_token_conflicts() {
-        let grammar = expand_tokens(ExtractedLexicalGrammar {
-            separators: Vec::new(),
-            variables: vec![
-                Variable {
-                    name: "in".to_string(),
-                    kind: VariableType::Named,
-                    rule: Rule::string("in"),
-                },
-                Variable {
-                    name: "identifier".to_string(),
-                    kind: VariableType::Named,
-                    rule: Rule::pattern("\\w+", ""),
-                },
-                Variable {
-                    name: "instanceof".to_string(),
-                    kind: VariableType::Named,
-                    rule: Rule::string("instanceof"),
-                },
-            ],
-        })
-        .unwrap();
-
-        let var = |name| index_of_var(&grammar, name);
-
-        let token_map = TokenConflictMap::new(
-            &grammar,
-            vec![
-                std::iter::once(&Symbol::terminal(var("identifier")))
-                    .copied()
-                    .collect(),
-                std::iter::once(&Symbol::terminal(var("in")))
-                    .copied()
-                    .collect(),
-                std::iter::once(&Symbol::terminal(var("identifier")))
-                    .copied()
-                    .collect(),
-            ],
-        );
-
-        // Given the string "in", the `in` token is preferred over the `identifier` token
-        assert!(token_map.does_match_same_string(var("in"), var("identifier")));
-        assert!(!token_map.does_match_same_string(var("identifier"), var("in")));
-
-        // Depending on what character follows, the string "in" may be treated as part of an
-        // `identifier` token.
-        assert!(token_map.does_conflict(var("identifier"), var("in")));
-
-        // Depending on what character follows, the string "instanceof" may be treated as part of
-        // an `identifier` token.
-        assert!(token_map.does_conflict(var("identifier"), var("instanceof")));
-        assert!(token_map.does_conflict(var("instanceof"), var("in")));
-    }
-
-    #[test]
-    fn test_token_conflicts_with_separators() {
-        let grammar = expand_tokens(ExtractedLexicalGrammar {
-            separators: vec![Rule::pattern("\\s", "")],
-            variables: vec![
-                Variable {
-                    name: "x".to_string(),
-                    kind: VariableType::Named,
-                    rule: Rule::string("x"),
-                },
-                Variable {
-                    name: "newline".to_string(),
-                    kind: VariableType::Named,
-                    rule: Rule::string("\n"),
-                },
-            ],
-        })
-        .unwrap();
-
-        let var = |name| index_of_var(&grammar, name);
-
-        let token_map = TokenConflictMap::new(&grammar, vec![TokenSet::new(); 4]);
-
-        assert!(token_map.does_conflict(var("newline"), var("x")));
-        assert!(!token_map.does_conflict(var("x"), var("newline")));
-    }
-
-    #[test]
-    fn test_token_conflicts_with_open_ended_tokens() {
-        let grammar = expand_tokens(ExtractedLexicalGrammar {
-            separators: vec![Rule::pattern("\\s", "")],
-            variables: vec![
-                Variable {
-                    name: "x".to_string(),
-                    kind: VariableType::Named,
-                    rule: Rule::string("x"),
-                },
-                Variable {
-                    name: "anything".to_string(),
-                    kind: VariableType::Named,
-                    rule: Rule::prec(Precedence::Integer(-1), Rule::pattern(".*", "")),
-                },
-            ],
-        })
-        .unwrap();
-
-        let var = |name| index_of_var(&grammar, name);
-
-        let token_map = TokenConflictMap::new(&grammar, vec![TokenSet::new(); 4]);
-
-        assert!(token_map.does_match_shorter_or_longer(var("anything"), var("x")));
-        assert!(!token_map.does_match_shorter_or_longer(var("x"), var("anything")));
-    }
-
-    fn index_of_var(grammar: &LexicalGrammar, name: &str) -> usize {
-        grammar
-            .variables
-            .iter()
-            .position(|v| v.name == name)
-            .unwrap()
-    }
-}
--- a/cli/src/generate/dedup.rs
+++ b/cli/src/generate/dedup.rs
@ -1,63 +0,0 @@
-pub fn split_state_id_groups<S>(
-    states: &[S],
-    state_ids_by_group_id: &mut Vec<Vec<usize>>,
-    group_ids_by_state_id: &mut [usize],
-    start_group_id: usize,
-    mut f: impl FnMut(&S, &S, &[usize]) -> bool,
-) -> bool {
-    let mut result = false;
-
-    let mut group_id = start_group_id;
-    while group_id < state_ids_by_group_id.len() {
-        let state_ids = &state_ids_by_group_id[group_id];
-        let mut split_state_ids = Vec::new();
-
-        let mut i = 0;
-        while i < state_ids.len() {
-            let left_state_id = state_ids[i];
-            if split_state_ids.contains(&left_state_id) {
-                i += 1;
-                continue;
-            }
-
-            let left_state = &states[left_state_id];
-
-            // Identify all of the other states in the group that are incompatible with
-            // this state.
-            let mut j = i + 1;
-            while j < state_ids.len() {
-                let right_state_id = state_ids[j];
-                if split_state_ids.contains(&right_state_id) {
-                    j += 1;
-                    continue;
-                }
-                let right_state = &states[right_state_id];
-
-                if f(left_state, right_state, group_ids_by_state_id) {
-                    split_state_ids.push(right_state_id);
-                }
-
-                j += 1;
-            }
-
-            i += 1;
-        }
-
-        // If any states were removed from the group, add them all as a new group.
-        if !split_state_ids.is_empty() {
-            result = true;
-            state_ids_by_group_id[group_id].retain(|i| !split_state_ids.contains(i));
-
-            let new_group_id = state_ids_by_group_id.len();
-            for id in &split_state_ids {
-                group_ids_by_state_id[*id] = new_group_id;
-            }
-
-            state_ids_by_group_id.push(split_state_ids);
-        }
-
-        group_id += 1;
-    }
-
-    result
-}
--- a/cli/src/generate/dsl.js
+++ b/cli/src/generate/dsl.js
@ -1,496 +0,0 @@
-function alias(rule, value) {
-  const result = {
-    type: "ALIAS",
-    content: normalize(rule),
-    named: false,
-    value: null
-  };
-
-  switch (value.constructor) {
-    case String:
-      result.named = false;
-      result.value = value;
-      return result;
-    case ReferenceError:
-      result.named = true;
-      result.value = value.symbol.name;
-      return result;
-    case Object:
-      if (typeof value.type === 'string' && value.type === 'SYMBOL') {
-        result.named = true;
-        result.value = value.name;
-        return result;
-      }
-  }
-
-  throw new Error(`Invalid alias value ${value}`);
-}
-
-function blank() {
-  return {
-    type: "BLANK"
-  };
-}
-
-function field(name, rule) {
-  return {
-    type: "FIELD",
-    name,
-    content: normalize(rule)
-  }
-}
-
-function choice(...elements) {
-  return {
-    type: "CHOICE",
-    members: elements.map(normalize)
-  };
-}
-
-function optional(value) {
-  checkArguments(arguments, arguments.length, optional, 'optional');
-  return choice(value, blank());
-}
-
-function prec(number, rule) {
-  checkPrecedence(number);
-  checkArguments(
-    arguments,
-    arguments.length - 1,
-    prec,
-    'prec',
-    ' and a precedence argument'
-  );
-
-  return {
-    type: "PREC",
-    value: number,
-    content: normalize(rule)
-  };
-}
-
-prec.left = function(number, rule) {
-  if (rule == null) {
-    rule = number;
-    number = 0;
-  }
-
-  checkPrecedence(number);
-  checkArguments(
-    arguments,
-    arguments.length - 1,
-    prec.left,
-    'prec.left',
-    ' and an optional precedence argument'
-  );
-
-  return {
-    type: "PREC_LEFT",
-    value: number,
-    content: normalize(rule)
-  };
-}
-
-prec.right = function(number, rule) {
-  if (rule == null) {
-    rule = number;
-    number = 0;
-  }
-
-  checkPrecedence(number);
-  checkArguments(
-    arguments,
-    arguments.length - 1,
-    prec.right,
-    'prec.right',
-    ' and an optional precedence argument'
-  );
-
-  return {
-    type: "PREC_RIGHT",
-    value: number,
-    content: normalize(rule)
-  };
-}
-
-prec.dynamic = function(number, rule) {
-  checkPrecedence(number);
-  checkArguments(
-    arguments,
-    arguments.length - 1,
-    prec.dynamic,
-    'prec.dynamic',
-    ' and a precedence argument'
-  );
-
-  return {
-    type: "PREC_DYNAMIC",
-    value: number,
-    content: normalize(rule)
-  };
-}
-
-function repeat(rule) {
-  checkArguments(arguments, arguments.length, repeat, 'repeat');
-  return {
-    type: "REPEAT",
-    content: normalize(rule)
-  };
-}
-
-function repeat1(rule) {
-  checkArguments(arguments, arguments.length, repeat1, 'repeat1');
-  return {
-    type: "REPEAT1",
-    content: normalize(rule)
-  };
-}
-
-function seq(...elements) {
-  return {
-    type: "SEQ",
-    members: elements.map(normalize)
-  };
-}
-
-function sym(name) {
-  return {
-    type: "SYMBOL",
-    name
-  };
-}
-
-function token(value) {
-  checkArguments(arguments, arguments.length, token, 'token', '', 'literal');
-  return {
-    type: "TOKEN",
-    content: normalize(value)
-  };
-}
-
-token.immediate = function(value) {
-  checkArguments(arguments, arguments.length, token.immediate, 'token.immediate', '', 'literal');
-  return {
-    type: "IMMEDIATE_TOKEN",
-    content: normalize(value)
-  };
-}
-
-function normalize(value) {
-  if (typeof value == "undefined")
-    throw new Error("Undefined symbol");
-
-  switch (value.constructor) {
-    case String:
-      return {
-        type: 'STRING',
-        value
-      };
-    case RegExp:
-      return value.flags ? {
-        type: 'PATTERN',
-        value: value.source,
-        flags: value.flags
-      } : {
-        type: 'PATTERN',
-        value: value.source
-      };
-    case ReferenceError:
-      throw value
-    default:
-      if (typeof value.type === 'string') {
-        return value;
-      } else {
-        throw new TypeError(`Invalid rule: ${value}`);
-      }
-  }
-}
-
-function RuleBuilder(ruleMap) {
-  return new Proxy({}, {
-    get(_, propertyName) {
-      const symbol = sym(propertyName);
-
-      if (!ruleMap || Object.prototype.hasOwnProperty.call(ruleMap, propertyName)) {
-        return symbol;
-      } else {
-        const error = new ReferenceError(`Undefined symbol '${propertyName}'`);
-        error.symbol = symbol;
-        return error;
-      }
-    }
-  })
-}
-
-function grammar(baseGrammar, options) {
-  let inherits = undefined;
-
-  if (!options) {
-    options = baseGrammar;
-    baseGrammar = {
-      name: null,
-      rules: {},
-      extras: [normalize(/\s/)],
-      conflicts: [],
-      externals: [],
-      inline: [],
-      supertypes: [],
-      precedences: [],
-    };
-  } else {
-    baseGrammar = baseGrammar.grammar;
-    inherits = baseGrammar.name;
-  }
-
-  let externals = baseGrammar.externals;
-  if (options.externals) {
-    if (typeof options.externals !== "function") {
-      throw new Error("Grammar's 'externals' property must be a function.");
-    }
-
-    const externalsRuleBuilder = RuleBuilder(null)
-    const externalRules = options.externals.call(externalsRuleBuilder, externalsRuleBuilder, baseGrammar.externals);
-
-    if (!Array.isArray(externalRules)) {
-      throw new Error("Grammar's 'externals' property must return an array of rules.");
-    }
-
-    externals = externalRules.map(normalize);
-  }
-
-  const ruleMap = {};
-  for (const key of Object.keys(options.rules)) {
-    ruleMap[key] = true;
-  }
-  for (const key of Object.keys(baseGrammar.rules)) {
-    ruleMap[key] = true;
-  }
-  for (const external of externals) {
-    if (typeof external.name === 'string') {
-      ruleMap[external.name] = true;
-    }
-  }
-
-  const ruleBuilder = RuleBuilder(ruleMap);
-
-  const name = options.name;
-  if (typeof name !== "string") {
-    throw new Error("Grammar's 'name' property must be a string.");
-  }
-
-  if (!/^[a-zA-Z_]\w*$/.test(name)) {
-    throw new Error("Grammar's 'name' property must not start with a digit and cannot contain non-word characters.");
-  }
-
-  if (inherits && typeof inherits !== "string") {
-    throw new Error("Base grammar's 'name' property must be a string.");
-  }
-
-  if (inherits && !/^[a-zA-Z_]\w*$/.test(name)) {
-    throw new Error("Base grammar's 'name' property must not start with a digit and cannot contain non-word characters.");
-  }
-
-  const rules = Object.assign({}, baseGrammar.rules);
-  if (options.rules) {
-    if (typeof options.rules !== "object") {
-      throw new Error("Grammar's 'rules' property must be an object.");
-    }
-
-    for (const ruleName of Object.keys(options.rules)) {
-      const ruleFn = options.rules[ruleName];
-      if (typeof ruleFn !== "function") {
-        throw new Error(`Grammar rules must all be functions. '${ruleName}' rule is not.`);
-      }
-      const rule = ruleFn.call(ruleBuilder, ruleBuilder, baseGrammar.rules[ruleName]);
-      if (rule === undefined) {
-        throw new Error(`Rule '${ruleName}' returned undefined.`);
-      }
-      rules[ruleName] = normalize(rule);
-    }
-  }
-
-  let extras = baseGrammar.extras.slice();
-  if (options.extras) {
-    if (typeof options.extras !== "function") {
-      throw new Error("Grammar's 'extras' property must be a function.");
-    }
-
-    extras = options.extras
-      .call(ruleBuilder, ruleBuilder, baseGrammar.extras)
-
-    if (!Array.isArray(extras)) {
-      throw new Error("Grammar's 'extras' function must return an array.")
-    }
-
-    extras = extras.map(normalize);
-  }
-
-  let word = baseGrammar.word;
-  if (options.word) {
-    word = options.word.call(ruleBuilder, ruleBuilder).name;
-    if (typeof word != 'string') {
-      throw new Error("Grammar's 'word' property must be a named rule.");
-    }
-
-    if (word === 'ReferenceError') {
-      throw new Error("Grammar's 'word' property must be a valid rule name.");
-    }
-  }
-
-  let conflicts = baseGrammar.conflicts;
-  if (options.conflicts) {
-    if (typeof options.conflicts !== "function") {
-      throw new Error("Grammar's 'conflicts' property must be a function.");
-    }
-
-    const baseConflictRules = baseGrammar.conflicts.map(conflict => conflict.map(sym));
-    const conflictRules = options.conflicts.call(ruleBuilder, ruleBuilder, baseConflictRules);
-
-    if (!Array.isArray(conflictRules)) {
-      throw new Error("Grammar's conflicts must be an array of arrays of rules.");
-    }
-
-    conflicts = conflictRules.map(conflictSet => {
-      if (!Array.isArray(conflictSet)) {
-        throw new Error("Grammar's conflicts must be an array of arrays of rules.");
-      }
-
-      return conflictSet.map(symbol => normalize(symbol).name);
-    });
-  }
-
-  let inline = baseGrammar.inline;
-  if (options.inline) {
-    if (typeof options.inline !== "function") {
-      throw new Error("Grammar's 'inline' property must be a function.");
-    }
-
-    const baseInlineRules = baseGrammar.inline.map(sym);
-    const inlineRules = options.inline.call(ruleBuilder, ruleBuilder, baseInlineRules);
-
-    if (!Array.isArray(inlineRules)) {
-      throw new Error("Grammar's inline must be an array of rules.");
-    }
-
-    inline = inlineRules.filter((symbol, index, self) => {
-      if (self.findIndex(s => s.name === symbol.name) !== index) {
-        console.log(`Warning: duplicate inline rule '${symbol.name}'`);
-        return false;
-      }
-      if (symbol.name === 'ReferenceError') {
-        console.log(`Warning: inline rule '${symbol.symbol.name}' is not defined.`);
-        return false;
-      }
-      return true;
-    }).map(symbol => symbol.name);
-  }
-
-  let supertypes = baseGrammar.supertypes;
-  if (options.supertypes) {
-    if (typeof options.supertypes !== "function") {
-      throw new Error("Grammar's 'supertypes' property must be a function.");
-    }
-
-    const baseSupertypeRules = baseGrammar.supertypes.map(sym);
-    const supertypeRules = options.supertypes.call(ruleBuilder, ruleBuilder, baseSupertypeRules);
-
-    if (!Array.isArray(supertypeRules)) {
-      throw new Error("Grammar's supertypes must be an array of rules.");
-    }
-
-    supertypes = supertypeRules.map(symbol => {
-      if (symbol.name === 'ReferenceError') {
-        throw new Error(`Supertype rule \`${symbol.symbol.name}\` is not defined.`);
-      }
-      return symbol.name;
-    });
-  }
-
-  let precedences = baseGrammar.precedences;
-  if (options.precedences) {
-    if (typeof options.precedences !== "function") {
-      throw new Error("Grammar's 'precedences' property must be a function");
-    }
-    precedences = options.precedences.call(ruleBuilder, ruleBuilder, baseGrammar.precedences);
-    if (!Array.isArray(precedences)) {
-      throw new Error("Grammar's precedences must be an array of arrays of rules.");
-    }
-    precedences = precedences.map(list => {
-      if (!Array.isArray(list)) {
-        throw new Error("Grammar's precedences must be an array of arrays of rules.");
-      }
-      return list.map(normalize);
-    });
-  }
-
-  if (Object.keys(rules).length === 0) {
-    throw new Error("Grammar must have at least one rule.");
-  }
-
-  return {
-    grammar: {
-      name,
-      inherits,
-      word,
-      rules,
-      extras,
-      conflicts,
-      precedences,
-      externals,
-      inline,
-      supertypes,
-    },
-  };
-}
-
-function checkArguments(args, ruleCount, caller, callerName, suffix = '', argType = 'rule') {
-  // Allow for .map() usage where additional arguments are index and the entire array.
-  const isMapCall = ruleCount === 3 && typeof args[1] === 'number' && Array.isArray(args[2]);
-  if (isMapCall) {
-    ruleCount = typeof args[2] === 'number' ? 1 : args[2].length;
-  }
-  if (ruleCount > 1 && !isMapCall) {
-    const error = new Error([
-      `The \`${callerName}\` function only takes one ${argType} argument${suffix}.`,
-      `You passed in multiple ${argType}s. Did you mean to call \`seq\`?\n`
-    ].join('\n'));
-    Error.captureStackTrace(error, caller);
-    throw error
-  }
-}
-
-function checkPrecedence(value) {
-  if (value == null) {
-    throw new Error('Missing precedence value');
-  }
-}
-
-function getEnv(name) {
-  if (globalThis.process) return process.env[name]; // Node/Bun
-  if (globalThis.Deno) return Deno.env.get(name); // Deno
-  throw Error("Unsupported JS runtime");
-}
-
-globalThis.alias = alias;
-globalThis.blank = blank;
-globalThis.choice = choice;
-globalThis.optional = optional;
-globalThis.prec = prec;
-globalThis.repeat = repeat;
-globalThis.repeat1 = repeat1;
-globalThis.seq = seq;
-globalThis.sym = sym;
-globalThis.token = token;
-globalThis.grammar = grammar;
-globalThis.field = field;
-
-const result = await import(getEnv("TREE_SITTER_GRAMMAR_PATH"));
-const output = JSON.stringify(result.default?.grammar ?? result.grammar);
-
-if (globalThis.process) { // Node/Bun
-  process.stdout.write(output);
-} else if (globalThis.Deno) { // Deno
-  Deno.stdout.writeSync(new TextEncoder().encode(output));
-} else {
-  throw Error("Unsupported JS runtime");
-}
--- a/cli/src/generate/grammar-schema.json
+++ b/cli/src/generate/grammar-schema.json
@ -1,300 +0,0 @@
-{
-  "$schema": "http://json-schema.org/draft-07/schema#",
-  "title": "tree-sitter grammar specification",
-  "type": "object",
-
-  "required": ["name", "rules"],
-
-  "additionalProperties": false,
-
-  "properties": {
-    "name": {
-      "description": "the name of the grammar",
-      "type": "string",
-      "pattern": "^[a-zA-Z_]\\w*"
-    },
-
-    "inherits": {
-      "description": "the name of the parent grammar",
-      "type": "string",
-      "pattern": "^[a-zA-Z_]\\w*"
-    },
-
-    "rules": {
-      "type": "object",
-      "patternProperties": {
-        "^[a-zA-Z_]\\w*$": {
-          "$ref": "#/definitions/rule"
-        }
-      },
-      "additionalProperties": false
-    },
-
-    "extras": {
-      "type": "array",
-      "uniqueItems": true,
-      "items": {
-        "$ref": "#/definitions/rule"
-      }
-    },
-
-    "precedences": {
-      "type": "array",
-      "uniqueItems": true,
-      "items": {
-        "type": "array",
-        "uniqueItems": true,
-        "items": {
-          "oneOf": [
-            { "type": "string" },
-            { "$ref": "#/definitions/symbol-rule" }
-          ]
-        }
-      }
-    },
-
-    "externals": {
-      "type": "array",
-      "uniqueItems": true,
-      "items": {
-        "$ref": "#/definitions/rule"
-      }
-    },
-
-    "inline": {
-      "type": "array",
-      "uniqueItems": true,
-      "items": {
-        "type": "string",
-        "pattern": "^[a-zA-Z_]\\w*$"
-      }
-    },
-
-    "conflicts": {
-      "type": "array",
-      "uniqueItems": true,
-      "items": {
-        "type": "array",
-        "uniqueItems": true,
-        "items": {
-          "type": "string",
-          "pattern": "^[a-zA-Z_]\\w*$"
-        }
-      }
-    },
-
-    "word": {
-      "type": "string",
-      "pattern": "^[a-zA-Z_]\\w*"
-    },
-
-    "supertypes": {
-      "description": "A list of hidden rule names that should be considered supertypes in the generated node types file. See https://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types.",
-      "type": "array",
-      "uniqueItems": true,
-      "items": {
-        "description": "the name of a rule in `rules` or `extras`",
-        "type": "string"
-      }
-    }
-  },
-
-  "definitions": {
-    "blank-rule": {
-      "type": "object",
-      "properties": {
-        "type": {
-          "type": "string",
-          "pattern": "^BLANK$"
-        }
-      },
-      "required": ["type"]
-    },
-
-    "string-rule": {
-      "type": "object",
-      "properties": {
-        "type": {
-          "type": "string",
-          "pattern": "^STRING$"
-        },
-        "value": {
-          "type": "string"
-        }
-      },
-      "required": ["type", "value"]
-    },
-
-    "pattern-rule": {
-      "type": "object",
-      "properties": {
-        "type": {
-          "type": "string",
-          "pattern": "^PATTERN$"
-        },
-        "value": { "type": "string" },
-        "flags": { "type": "string" }
-      },
-      "required": ["type", "value"]
-    },
-
-    "symbol-rule": {
-      "type": "object",
-      "properties": {
-        "type": {
-          "type": "string",
-          "pattern": "^SYMBOL$"
-        },
-        "name": { "type": "string" }
-      },
-      "required": ["type", "name"]
-    },
-
-    "seq-rule": {
-      "type": "object",
-      "properties": {
-        "type": {
-          "type": "string",
-          "pattern": "^SEQ$"
-        },
-        "members": {
-          "type": "array",
-          "items": {
-            "$ref": "#/definitions/rule"
-          }
-        }
-      },
-      "required": ["type", "members"]
-    },
-
-    "choice-rule": {
-      "type": "object",
-      "properties": {
-        "type": {
-          "type": "string",
-          "pattern": "^CHOICE$"
-        },
-        "members": {
-          "type": "array",
-          "items": {
-            "$ref": "#/definitions/rule"
-          }
-        }
-      },
-      "required": ["type", "members"]
-    },
-
-    "alias-rule": {
-      "type": "object",
-      "properties": {
-        "type": {
-          "type": "string",
-          "pattern": "^ALIAS$"
-        },
-        "value": {
-          "type": "string"
-        },
-        "named": {
-          "type": "boolean"
-        },
-        "content": {
-          "$ref": "#/definitions/rule"
-        }
-      },
-      "required": ["type", "named", "content", "value"]
-    },
-
-    "repeat-rule": {
-      "type": "object",
-      "properties": {
-        "type": {
-          "type": "string",
-          "pattern": "^REPEAT$"
-        },
-        "content": {
-          "$ref": "#/definitions/rule"
-        }
-      },
-      "required": ["type", "content"]
-    },
-
-    "repeat1-rule": {
-      "type": "object",
-      "properties": {
-        "type": {
-          "type": "string",
-          "pattern": "^REPEAT1$"
-        },
-        "content": {
-          "$ref": "#/definitions/rule"
-        }
-      },
-      "required": ["type", "content"]
-    },
-
-    "token-rule": {
-      "type": "object",
-      "properties": {
-        "type": {
-          "type": "string",
-          "pattern": "^(TOKEN|IMMEDIATE_TOKEN)$"
-        },
-        "content": {
-          "$ref": "#/definitions/rule"
-        }
-      },
-      "required": ["type", "content"]
-    },
-
-    "field-rule": {
-      "properties": {
-        "name": { "type": "string" },
-        "type": {
-          "type": "string",
-          "pattern": "^FIELD$"
-        },
-        "content": {
-          "$ref": "#/definitions/rule"
-        }
-      },
-      "required": ["name", "type", "content"]
-    },
-
-    "prec-rule": {
-      "type": "object",
-      "properties": {
-        "type": {
-          "type": "string",
-          "pattern": "^(PREC|PREC_LEFT|PREC_RIGHT|PREC_DYNAMIC)$"
-        },
-        "value": {
-          "oneof": [
-            { "type": "integer" },
-            { "type": "string" }
-          ]
-        },
-        "content": {
-          "$ref": "#/definitions/rule"
-        }
-      },
-      "required": ["type", "content", "value"]
-    },
-
-    "rule": {
-      "oneOf": [
-        { "$ref": "#/definitions/alias-rule" },
-        { "$ref": "#/definitions/blank-rule" },
-        { "$ref": "#/definitions/string-rule" },
-        { "$ref": "#/definitions/pattern-rule" },
-        { "$ref": "#/definitions/symbol-rule" },
-        { "$ref": "#/definitions/seq-rule" },
-        { "$ref": "#/definitions/choice-rule" },
-        { "$ref": "#/definitions/repeat1-rule" },
-        { "$ref": "#/definitions/repeat-rule" },
-        { "$ref": "#/definitions/token-rule" },
-        { "$ref": "#/definitions/field-rule" },
-        { "$ref": "#/definitions/prec-rule" }
-      ]
-    }
-  }
-}
--- a/cli/src/generate/grammar_files.rs
+++ b/cli/src/generate/grammar_files.rs
@ -1 +0,0 @@
-
--- a/cli/src/generate/grammars.rs
+++ b/cli/src/generate/grammars.rs
@ -1,261 +0,0 @@
-use std::{collections::HashMap, fmt};
-
-use super::{
-    nfa::Nfa,
-    rules::{Alias, Associativity, Precedence, Rule, Symbol},
-};
-
-#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
-pub enum VariableType {
-    Hidden,
-    Auxiliary,
-    Anonymous,
-    Named,
-}
-
-// Input grammar
-
-#[derive(Clone, Debug, PartialEq, Eq)]
-pub struct Variable {
-    pub name: String,
-    pub kind: VariableType,
-    pub rule: Rule,
-}
-
-#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
-pub enum PrecedenceEntry {
-    Name(String),
-    Symbol(String),
-}
-
-#[derive(Debug, Default, PartialEq, Eq)]
-pub struct InputGrammar {
-    pub name: String,
-    pub variables: Vec<Variable>,
-    pub extra_symbols: Vec<Rule>,
-    pub expected_conflicts: Vec<Vec<String>>,
-    pub precedence_orderings: Vec<Vec<PrecedenceEntry>>,
-    pub external_tokens: Vec<Rule>,
-    pub variables_to_inline: Vec<String>,
-    pub supertype_symbols: Vec<String>,
-    pub word_token: Option<String>,
-}
-
-// Extracted lexical grammar
-
-#[derive(Debug, PartialEq, Eq)]
-pub struct LexicalVariable {
-    pub name: String,
-    pub kind: VariableType,
-    pub implicit_precedence: i32,
-    pub start_state: u32,
-}
-
-#[derive(Debug, Default, PartialEq, Eq)]
-pub struct LexicalGrammar {
-    pub nfa: Nfa,
-    pub variables: Vec<LexicalVariable>,
-}
-
-// Extracted syntax grammar
-
-#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
-pub struct ProductionStep {
-    pub symbol: Symbol,
-    pub precedence: Precedence,
-    pub associativity: Option<Associativity>,
-    pub alias: Option<Alias>,
-    pub field_name: Option<String>,
-}
-
-#[derive(Clone, Debug, Default, PartialEq, Eq)]
-pub struct Production {
-    pub steps: Vec<ProductionStep>,
-    pub dynamic_precedence: i32,
-}
-
-#[derive(Default)]
-pub struct InlinedProductionMap {
-    pub productions: Vec<Production>,
-    pub production_map: HashMap<(*const Production, u32), Vec<usize>>,
-}
-
-#[derive(Clone, Debug, PartialEq, Eq)]
-pub struct SyntaxVariable {
-    pub name: String,
-    pub kind: VariableType,
-    pub productions: Vec<Production>,
-}
-
-#[derive(Clone, Debug, PartialEq, Eq)]
-pub struct ExternalToken {
-    pub name: String,
-    pub kind: VariableType,
-    pub corresponding_internal_token: Option<Symbol>,
-}
-
-#[derive(Debug, Default)]
-pub struct SyntaxGrammar {
-    pub variables: Vec<SyntaxVariable>,
-    pub extra_symbols: Vec<Symbol>,
-    pub expected_conflicts: Vec<Vec<Symbol>>,
-    pub external_tokens: Vec<ExternalToken>,
-    pub supertype_symbols: Vec<Symbol>,
-    pub variables_to_inline: Vec<Symbol>,
-    pub word_token: Option<Symbol>,
-    pub precedence_orderings: Vec<Vec<PrecedenceEntry>>,
-}
-
-#[cfg(test)]
-impl ProductionStep {
-    #[must_use]
-    pub const fn new(symbol: Symbol) -> Self {
-        Self {
-            symbol,
-            precedence: Precedence::None,
-            associativity: None,
-            alias: None,
-            field_name: None,
-        }
-    }
-
-    pub fn with_prec(self, precedence: Precedence, associativity: Option<Associativity>) -> Self {
-        Self {
-            symbol: self.symbol,
-            precedence,
-            associativity,
-            alias: self.alias,
-            field_name: self.field_name,
-        }
-    }
-
-    pub fn with_alias(self, value: &str, is_named: bool) -> Self {
-        Self {
-            symbol: self.symbol,
-            precedence: self.precedence,
-            associativity: self.associativity,
-            alias: Some(Alias {
-                value: value.to_string(),
-                is_named,
-            }),
-            field_name: self.field_name,
-        }
-    }
-    pub fn with_field_name(self, name: &str) -> Self {
-        Self {
-            symbol: self.symbol,
-            precedence: self.precedence,
-            associativity: self.associativity,
-            alias: self.alias,
-            field_name: Some(name.to_string()),
-        }
-    }
-}
-
-impl Production {
-    pub fn first_symbol(&self) -> Option<Symbol> {
-        self.steps.first().map(|s| s.symbol)
-    }
-}
-
-#[cfg(test)]
-impl Variable {
-    pub fn named(name: &str, rule: Rule) -> Self {
-        Self {
-            name: name.to_string(),
-            kind: VariableType::Named,
-            rule,
-        }
-    }
-
-    pub fn auxiliary(name: &str, rule: Rule) -> Self {
-        Self {
-            name: name.to_string(),
-            kind: VariableType::Auxiliary,
-            rule,
-        }
-    }
-
-    pub fn hidden(name: &str, rule: Rule) -> Self {
-        Self {
-            name: name.to_string(),
-            kind: VariableType::Hidden,
-            rule,
-        }
-    }
-
-    pub fn anonymous(name: &str, rule: Rule) -> Self {
-        Self {
-            name: name.to_string(),
-            kind: VariableType::Anonymous,
-            rule,
-        }
-    }
-}
-
-impl VariableType {
-    pub fn is_visible(self) -> bool {
-        self == Self::Named || self == Self::Anonymous
-    }
-}
-
-impl LexicalGrammar {
-    pub fn variable_indices_for_nfa_states<'a>(
-        &'a self,
-        state_ids: &'a [u32],
-    ) -> impl Iterator<Item = usize> + 'a {
-        let mut prev = None;
-        state_ids.iter().filter_map(move |state_id| {
-            let variable_id = self.variable_index_for_nfa_state(*state_id);
-            if prev == Some(variable_id) {
-                None
-            } else {
-                prev = Some(variable_id);
-                prev
-            }
-        })
-    }
-
-    pub fn variable_index_for_nfa_state(&self, state_id: u32) -> usize {
-        self.variables
-            .iter()
-            .position(|v| v.start_state >= state_id)
-            .unwrap()
-    }
-}
-
-impl SyntaxVariable {
-    pub fn is_auxiliary(&self) -> bool {
-        self.kind == VariableType::Auxiliary
-    }
-
-    pub fn is_hidden(&self) -> bool {
-        self.kind == VariableType::Hidden || self.kind == VariableType::Auxiliary
-    }
-}
-
-impl InlinedProductionMap {
-    pub fn inlined_productions<'a>(
-        &'a self,
-        production: &Production,
-        step_index: u32,
-    ) -> Option<impl Iterator<Item = &'a Production> + 'a> {
-        self.production_map
-            .get(&(production as *const Production, step_index))
-            .map(|production_indices| {
-                production_indices
-                    .iter()
-                    .copied()
-                    .map(move |index| &self.productions[index])
-            })
-    }
-}
-
-impl fmt::Display for PrecedenceEntry {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        match self {
-            Self::Name(n) => write!(f, "'{n}'"),
-            Self::Symbol(s) => write!(f, "$.{s}"),
-        }
-    }
-}
--- a/cli/src/generate/mod.rs
+++ b/cli/src/generate/mod.rs
@ -1,252 +0,0 @@
-use std::{
-    env, fs,
-    io::Write,
-    path::{Path, PathBuf},
-    process::{Command, Stdio},
-};
-
-use anyhow::{anyhow, Context, Result};
-use build_tables::build_tables;
-use grammars::InputGrammar;
-use lazy_static::lazy_static;
-use parse_grammar::parse_grammar;
-use prepare_grammar::prepare_grammar;
-use regex::{Regex, RegexBuilder};
-use render::render_c_code;
-use semver::Version;
-
-mod build_tables;
-mod dedup;
-mod grammar_files;
-mod grammars;
-mod nfa;
-mod node_types;
-pub mod parse_grammar;
-mod prepare_grammar;
-mod render;
-mod rules;
-mod tables;
-
-lazy_static! {
-    static ref JSON_COMMENT_REGEX: Regex = RegexBuilder::new("^\\s*//.*")
-        .multi_line(true)
-        .build()
-        .unwrap();
-}
-
-struct GeneratedParser {
-    c_code: String,
-    node_types_json: String,
-}
-
-pub const ALLOC_HEADER: &str = include_str!("../templates/alloc.h");
-pub const ARRAY_HEADER: &str = include_str!("../templates/array.h");
-
-pub fn generate_parser_in_directory(
-    repo_path: &Path,
-    grammar_path: Option<&str>,
-    abi_version: usize,
-    report_symbol_name: Option<&str>,
-    js_runtime: Option<&str>,
-) -> Result<()> {
-    let mut repo_path = repo_path.to_owned();
-    let mut grammar_path = grammar_path;
-
-    // Populate a new empty grammar directory.
-    if let Some(path) = grammar_path {
-        let path = PathBuf::from(path);
-        if !path
-            .try_exists()
-            .with_context(|| "Some error with specified path")?
-        {
-            fs::create_dir_all(&path)?;
-            grammar_path = None;
-            repo_path = path;
-        }
-    }
-
-    let grammar_path = grammar_path
-        .map(PathBuf::from)
-        .unwrap_or_else(|| repo_path.join("grammar.js"));
-
-    // Read the grammar file.
-    let grammar_json = load_grammar_file(&grammar_path, js_runtime)?;
-
-    let src_path = repo_path.join("src");
-    let header_path = src_path.join("tree_sitter");
-
-    // Ensure that the output directories exist.
-    fs::create_dir_all(&src_path)?;
-    fs::create_dir_all(&header_path)?;
-
-    if grammar_path.file_name().unwrap() != "grammar.json" {
-        fs::write(src_path.join("grammar.json"), &grammar_json)
-            .with_context(|| format!("Failed to write grammar.json to {src_path:?}"))?;
-    }
-
-    // Parse and preprocess the grammar.
-    let input_grammar = parse_grammar(&grammar_json)?;
-
-    // Generate the parser and related files.
-    let GeneratedParser {
-        c_code,
-        node_types_json,
-    } = generate_parser_for_grammar_with_opts(&input_grammar, abi_version, report_symbol_name)?;
-
-    write_file(&src_path.join("parser.c"), c_code)?;
-    write_file(&src_path.join("node-types.json"), node_types_json)?;
-    write_file(&header_path.join("alloc.h"), ALLOC_HEADER)?;
-    write_file(&header_path.join("array.h"), ARRAY_HEADER)?;
-    write_file(&header_path.join("parser.h"), tree_sitter::PARSER_HEADER)?;
-
-    Ok(())
-}
-
-pub fn generate_parser_for_grammar(grammar_json: &str) -> Result<(String, String)> {
-    let grammar_json = JSON_COMMENT_REGEX.replace_all(grammar_json, "\n");
-    let input_grammar = parse_grammar(&grammar_json)?;
-    let parser =
-        generate_parser_for_grammar_with_opts(&input_grammar, tree_sitter::LANGUAGE_VERSION, None)?;
-    Ok((input_grammar.name, parser.c_code))
-}
-
-fn generate_parser_for_grammar_with_opts(
-    input_grammar: &InputGrammar,
-    abi_version: usize,
-    report_symbol_name: Option<&str>,
-) -> Result<GeneratedParser> {
-    let (syntax_grammar, lexical_grammar, inlines, simple_aliases) =
-        prepare_grammar(input_grammar)?;
-    let variable_info =
-        node_types::get_variable_info(&syntax_grammar, &lexical_grammar, &simple_aliases)?;
-    let node_types_json = node_types::generate_node_types_json(
-        &syntax_grammar,
-        &lexical_grammar,
-        &simple_aliases,
-        &variable_info,
-    );
-    let tables = build_tables(
-        &syntax_grammar,
-        &lexical_grammar,
-        &simple_aliases,
-        &variable_info,
-        &inlines,
-        report_symbol_name,
-    )?;
-    let c_code = render_c_code(
-        &input_grammar.name,
-        tables,
-        syntax_grammar,
-        lexical_grammar,
-        simple_aliases,
-        abi_version,
-    );
-    Ok(GeneratedParser {
-        c_code,
-        node_types_json: serde_json::to_string_pretty(&node_types_json).unwrap(),
-    })
-}
-
-pub fn load_grammar_file(grammar_path: &Path, js_runtime: Option<&str>) -> Result<String> {
-    if grammar_path.is_dir() {
-        return Err(anyhow!(
-            "Path to a grammar file with `.js` or `.json` extension is required"
-        ));
-    }
-    match grammar_path.extension().and_then(|e| e.to_str()) {
-        Some("js") => Ok(load_js_grammar_file(grammar_path, js_runtime)
-            .with_context(|| "Failed to load grammar.js")?),
-        Some("json") => {
-            Ok(fs::read_to_string(grammar_path).with_context(|| "Failed to load grammar.json")?)
-        }
-        _ => Err(anyhow!("Unknown grammar file extension: {grammar_path:?}",)),
-    }
-}
-
-fn load_js_grammar_file(grammar_path: &Path, js_runtime: Option<&str>) -> Result<String> {
-    let grammar_path = fs::canonicalize(grammar_path)?;
-
-    #[cfg(windows)]
-    let grammar_path = url::Url::from_file_path(grammar_path)
-        .expect("Failed to convert path to URL")
-        .to_string();
-
-    let js_runtime = js_runtime.unwrap_or("node");
-
-    let mut js_command = Command::new(js_runtime);
-    match js_runtime {
-        "node" => {
-            js_command.args(["--input-type=module", "-"]);
-        }
-        "bun" => {
-            js_command.arg("-");
-        }
-        "deno" => {
-            js_command.args(["run", "--allow-all", "-"]);
-        }
-        _ => {}
-    }
-
-    let mut js_process = js_command
-        .env("TREE_SITTER_GRAMMAR_PATH", grammar_path)
-        .stdin(Stdio::piped())
-        .stdout(Stdio::piped())
-        .spawn()
-        .with_context(|| format!("Failed to run `{js_runtime}`"))?;
-
-    let mut js_stdin = js_process
-        .stdin
-        .take()
-        .with_context(|| format!("Failed to open stdin for {js_runtime}"))?;
-    let cli_version = Version::parse(env!("CARGO_PKG_VERSION"))
-        .with_context(|| "Could not parse this package's version as semver.")?;
-    write!(
-        js_stdin,
-        "globalThis.TREE_SITTER_CLI_VERSION_MAJOR = {};
-         globalThis.TREE_SITTER_CLI_VERSION_MINOR = {};
-         globalThis.TREE_SITTER_CLI_VERSION_PATCH = {};",
-        cli_version.major, cli_version.minor, cli_version.patch,
-    )
-    .with_context(|| format!("Failed to write tree-sitter version to {js_runtime}'s stdin"))?;
-    js_stdin
-        .write(include_bytes!("./dsl.js"))
-        .with_context(|| format!("Failed to write grammar dsl to {js_runtime}'s stdin"))?;
-    drop(js_stdin);
-
-    let output = js_process
-        .wait_with_output()
-        .with_context(|| format!("Failed to read output from {js_runtime}"))?;
-    match output.status.code() {
-        None => panic!("{js_runtime} process was killed"),
-        Some(0) => {
-            let stdout = String::from_utf8(output.stdout)
-                .with_context(|| format!("Got invalid UTF8 from {js_runtime}"))?;
-
-            let mut grammar_json = &stdout[..];
-
-            if let Some(pos) = stdout.rfind('\n') {
-                // If there's a newline, split the last line from the rest of the output
-                let node_output = &stdout[..pos];
-                grammar_json = &stdout[pos + 1..];
-
-                let mut stdout = std::io::stdout().lock();
-                stdout.write_all(node_output.as_bytes())?;
-                stdout.write_all(b"\n")?;
-                stdout.flush()?;
-            }
-
-            Ok(serde_json::to_string_pretty(
-                &serde_json::from_str::<serde_json::Value>(grammar_json)
-                    .with_context(|| "Failed to parse grammar JSON")?,
-            )
-            .with_context(|| "Failed to serialize grammar JSON")?
-                + "\n")
-        }
-        Some(code) => Err(anyhow!("{js_runtime} process exited with status {code}")),
-    }
-}
-
-pub fn write_file(path: &Path, body: impl AsRef<[u8]>) -> Result<()> {
-    fs::write(path, body)
-        .with_context(|| format!("Failed to write {:?}", path.file_name().unwrap()))
-}
--- a/cli/src/generate/nfa.rs
+++ b/cli/src/generate/nfa.rs
--- a/cli/src/generate/node_types.rs
+++ b/cli/src/generate/node_types.rs
--- a/cli/src/generate/parse_grammar.rs
+++ b/cli/src/generate/parse_grammar.rs
@ -1,258 +0,0 @@
-use anyhow::{anyhow, Result};
-use serde::Deserialize;
-use serde_json::{Map, Value};
-
-use super::{
-    grammars::{InputGrammar, PrecedenceEntry, Variable, VariableType},
-    rules::{Precedence, Rule},
-};
-
-#[derive(Deserialize)]
-#[serde(tag = "type")]
-#[allow(non_camel_case_types)]
-#[allow(clippy::upper_case_acronyms)]
-enum RuleJSON {
-    ALIAS {
-        content: Box<RuleJSON>,
-        named: bool,
-        value: String,
-    },
-    BLANK,
-    STRING {
-        value: String,
-    },
-    PATTERN {
-        value: String,
-        flags: Option<String>,
-    },
-    SYMBOL {
-        name: String,
-    },
-    CHOICE {
-        members: Vec<RuleJSON>,
-    },
-    FIELD {
-        name: String,
-        content: Box<RuleJSON>,
-    },
-    SEQ {
-        members: Vec<RuleJSON>,
-    },
-    REPEAT {
-        content: Box<RuleJSON>,
-    },
-    REPEAT1 {
-        content: Box<RuleJSON>,
-    },
-    PREC_DYNAMIC {
-        value: i32,
-        content: Box<RuleJSON>,
-    },
-    PREC_LEFT {
-        value: PrecedenceValueJSON,
-        content: Box<RuleJSON>,
-    },
-    PREC_RIGHT {
-        value: PrecedenceValueJSON,
-        content: Box<RuleJSON>,
-    },
-    PREC {
-        value: PrecedenceValueJSON,
-        content: Box<RuleJSON>,
-    },
-    TOKEN {
-        content: Box<RuleJSON>,
-    },
-    IMMEDIATE_TOKEN {
-        content: Box<RuleJSON>,
-    },
-}
-
-#[derive(Deserialize)]
-#[serde(untagged)]
-enum PrecedenceValueJSON {
-    Integer(i32),
-    Name(String),
-}
-
-#[derive(Deserialize)]
-pub(crate) struct GrammarJSON {
-    pub(crate) name: String,
-    rules: Map<String, Value>,
-    #[serde(default)]
-    precedences: Vec<Vec<RuleJSON>>,
-    #[serde(default)]
-    conflicts: Vec<Vec<String>>,
-    #[serde(default)]
-    externals: Vec<RuleJSON>,
-    #[serde(default)]
-    extras: Vec<RuleJSON>,
-    #[serde(default)]
-    inline: Vec<String>,
-    #[serde(default)]
-    supertypes: Vec<String>,
-    word: Option<String>,
-}
-
-pub(crate) fn parse_grammar(input: &str) -> Result<InputGrammar> {
-    let grammar_json = serde_json::from_str::<GrammarJSON>(input)?;
-
-    let mut variables = Vec::with_capacity(grammar_json.rules.len());
-    for (name, value) in grammar_json.rules {
-        variables.push(Variable {
-            name: name.clone(),
-            kind: VariableType::Named,
-            rule: parse_rule(serde_json::from_value(value)?),
-        });
-    }
-
-    let mut precedence_orderings = Vec::with_capacity(grammar_json.precedences.len());
-    for list in grammar_json.precedences {
-        let mut ordering = Vec::with_capacity(list.len());
-        for entry in list {
-            ordering.push(match entry {
-                RuleJSON::STRING { value } => PrecedenceEntry::Name(value),
-                RuleJSON::SYMBOL { name } => PrecedenceEntry::Symbol(name),
-                _ => {
-                    return Err(anyhow!(
-                        "Invalid rule in precedences array. Only strings and symbols are allowed"
-                    ))
-                }
-            });
-        }
-        precedence_orderings.push(ordering);
-    }
-
-    let extra_symbols = grammar_json
-        .extras
-        .into_iter()
-        .try_fold(Vec::new(), |mut acc, item| {
-            let rule = parse_rule(item);
-            if let Rule::String(ref value) = rule {
-                if value.is_empty() {
-                    return Err(anyhow!(
-                        "Rules in the `extras` array must not contain empty strings"
-                    ));
-                }
-            }
-            acc.push(rule);
-            Ok(acc)
-        })?;
-
-    let external_tokens = grammar_json.externals.into_iter().map(parse_rule).collect();
-
-    Ok(InputGrammar {
-        name: grammar_json.name,
-        word_token: grammar_json.word,
-        expected_conflicts: grammar_json.conflicts,
-        supertype_symbols: grammar_json.supertypes,
-        variables_to_inline: grammar_json.inline,
-        precedence_orderings,
-        variables,
-        extra_symbols,
-        external_tokens,
-    })
-}
-
-fn parse_rule(json: RuleJSON) -> Rule {
-    match json {
-        RuleJSON::ALIAS {
-            content,
-            value,
-            named,
-        } => Rule::alias(parse_rule(*content), value, named),
-        RuleJSON::BLANK => Rule::Blank,
-        RuleJSON::STRING { value } => Rule::String(value),
-        RuleJSON::PATTERN { value, flags } => Rule::Pattern(
-            value,
-            flags.map_or(String::new(), |f| {
-                f.matches(|c| {
-                    if c == 'i' {
-                        true
-                    } else {
-                        // silently ignore unicode flags
-                        if c != 'u' && c != 'v' {
-                            eprintln!("Warning: unsupported flag {c}");
-                        }
-                        false
-                    }
-                })
-                .collect()
-            }),
-        ),
-        RuleJSON::SYMBOL { name } => Rule::NamedSymbol(name),
-        RuleJSON::CHOICE { members } => Rule::choice(members.into_iter().map(parse_rule).collect()),
-        RuleJSON::FIELD { content, name } => Rule::field(name, parse_rule(*content)),
-        RuleJSON::SEQ { members } => Rule::seq(members.into_iter().map(parse_rule).collect()),
-        RuleJSON::REPEAT1 { content } => Rule::repeat(parse_rule(*content)),
-        RuleJSON::REPEAT { content } => {
-            Rule::choice(vec![Rule::repeat(parse_rule(*content)), Rule::Blank])
-        }
-        RuleJSON::PREC { value, content } => Rule::prec(value.into(), parse_rule(*content)),
-        RuleJSON::PREC_LEFT { value, content } => {
-            Rule::prec_left(value.into(), parse_rule(*content))
-        }
-        RuleJSON::PREC_RIGHT { value, content } => {
-            Rule::prec_right(value.into(), parse_rule(*content))
-        }
-        RuleJSON::PREC_DYNAMIC { value, content } => {
-            Rule::prec_dynamic(value, parse_rule(*content))
-        }
-        RuleJSON::TOKEN { content } => Rule::token(parse_rule(*content)),
-        RuleJSON::IMMEDIATE_TOKEN { content } => Rule::immediate_token(parse_rule(*content)),
-    }
-}
-
-impl From<PrecedenceValueJSON> for Precedence {
-    fn from(val: PrecedenceValueJSON) -> Self {
-        match val {
-            PrecedenceValueJSON::Integer(i) => Self::Integer(i),
-            PrecedenceValueJSON::Name(i) => Self::Name(i),
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_parse_grammar() {
-        let grammar = parse_grammar(
-            r#"{
-            "name": "my_lang",
-            "rules": {
-                "file": {
-                    "type": "REPEAT1",
-                    "content": {
-                        "type": "SYMBOL",
-                        "name": "statement"
-                    }
-                },
-                "statement": {
-                    "type": "STRING",
-                    "value": "foo"
-                }
-            }
-        }"#,
-        )
-        .unwrap();
-
-        assert_eq!(grammar.name, "my_lang");
-        assert_eq!(
-            grammar.variables,
-            vec![
-                Variable {
-                    name: "file".to_string(),
-                    kind: VariableType::Named,
-                    rule: Rule::repeat(Rule::NamedSymbol("statement".to_string()))
-                },
-                Variable {
-                    name: "statement".to_string(),
-                    kind: VariableType::Named,
-                    rule: Rule::String("foo".to_string())
-                },
-            ]
-        );
-    }
-}
--- a/cli/src/generate/prepare_grammar/expand_repeats.rs
+++ b/cli/src/generate/prepare_grammar/expand_repeats.rs
@ -1,289 +0,0 @@
-use std::{collections::HashMap, mem};
-
-use super::ExtractedSyntaxGrammar;
-use crate::generate::{
-    grammars::{Variable, VariableType},
-    rules::{Rule, Symbol},
-};
-
-struct Expander {
-    variable_name: String,
-    repeat_count_in_variable: usize,
-    preceding_symbol_count: usize,
-    auxiliary_variables: Vec<Variable>,
-    existing_repeats: HashMap<Rule, Symbol>,
-}
-
-impl Expander {
-    fn expand_variable(&mut self, index: usize, variable: &mut Variable) -> bool {
-        self.variable_name.clear();
-        self.variable_name.push_str(&variable.name);
-        self.repeat_count_in_variable = 0;
-        let mut rule = Rule::Blank;
-        mem::swap(&mut rule, &mut variable.rule);
-
-        // In the special case of a hidden variable with a repetition at its top level,
-        // convert that rule itself into a binary tree structure instead of introducing
-        // another auxiliary rule.
-        if let (VariableType::Hidden, Rule::Repeat(repeated_content)) = (variable.kind, &rule) {
-            let inner_rule = self.expand_rule(repeated_content);
-            variable.rule = self.wrap_rule_in_binary_tree(Symbol::non_terminal(index), inner_rule);
-            variable.kind = VariableType::Auxiliary;
-            return true;
-        }
-
-        variable.rule = self.expand_rule(&rule);
-        false
-    }
-
-    fn expand_rule(&mut self, rule: &Rule) -> Rule {
-        match rule {
-            // For choices, sequences, and metadata, descend into the child rules,
-            // replacing any nested repetitions.
-            Rule::Choice(elements) => Rule::Choice(
-                elements
-                    .iter()
-                    .map(|element| self.expand_rule(element))
-                    .collect(),
-            ),
-
-            Rule::Seq(elements) => Rule::Seq(
-                elements
-                    .iter()
-                    .map(|element| self.expand_rule(element))
-                    .collect(),
-            ),
-
-            Rule::Metadata { rule, params } => Rule::Metadata {
-                rule: Box::new(self.expand_rule(rule)),
-                params: params.clone(),
-            },
-
-            // For repetitions, introduce an auxiliary rule that contains the
-            // repeated content, but can also contain a recursive binary tree structure.
-            Rule::Repeat(content) => {
-                let inner_rule = self.expand_rule(content);
-
-                if let Some(existing_symbol) = self.existing_repeats.get(&inner_rule) {
-                    return Rule::Symbol(*existing_symbol);
-                }
-
-                self.repeat_count_in_variable += 1;
-                let rule_name = format!(
-                    "{}_repeat{}",
-                    self.variable_name, self.repeat_count_in_variable
-                );
-                let repeat_symbol = Symbol::non_terminal(
-                    self.preceding_symbol_count + self.auxiliary_variables.len(),
-                );
-                self.existing_repeats
-                    .insert(inner_rule.clone(), repeat_symbol);
-                self.auxiliary_variables.push(Variable {
-                    name: rule_name,
-                    kind: VariableType::Auxiliary,
-                    rule: self.wrap_rule_in_binary_tree(repeat_symbol, inner_rule),
-                });
-
-                Rule::Symbol(repeat_symbol)
-            }
-
-            // For primitive rules, don't change anything.
-            _ => rule.clone(),
-        }
-    }
-
-    fn wrap_rule_in_binary_tree(&self, symbol: Symbol, rule: Rule) -> Rule {
-        Rule::choice(vec![
-            Rule::Seq(vec![Rule::Symbol(symbol), Rule::Symbol(symbol)]),
-            rule,
-        ])
-    }
-}
-
-pub(super) fn expand_repeats(mut grammar: ExtractedSyntaxGrammar) -> ExtractedSyntaxGrammar {
-    let mut expander = Expander {
-        variable_name: String::new(),
-        repeat_count_in_variable: 0,
-        preceding_symbol_count: grammar.variables.len(),
-        auxiliary_variables: Vec::new(),
-        existing_repeats: HashMap::new(),
-    };
-
-    for (i, variable) in grammar.variables.iter_mut().enumerate() {
-        let expanded_top_level_repetition = expander.expand_variable(i, variable);
-
-        // If a hidden variable had a top-level repetition and it was converted to
-        // a recursive rule, then it can't be inlined.
-        if expanded_top_level_repetition {
-            grammar
-                .variables_to_inline
-                .retain(|symbol| *symbol != Symbol::non_terminal(i));
-        }
-    }
-
-    grammar.variables.extend(expander.auxiliary_variables);
-    grammar
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_basic_repeat_expansion() {
-        // Repeats nested inside of sequences and choices are expanded.
-        let grammar = expand_repeats(build_grammar(vec![Variable::named(
-            "rule0",
-            Rule::seq(vec![
-                Rule::terminal(10),
-                Rule::choice(vec![
-                    Rule::repeat(Rule::terminal(11)),
-                    Rule::repeat(Rule::terminal(12)),
-                ]),
-                Rule::terminal(13),
-            ]),
-        )]));
-
-        assert_eq!(
-            grammar.variables,
-            vec![
-                Variable::named(
-                    "rule0",
-                    Rule::seq(vec![
-                        Rule::terminal(10),
-                        Rule::choice(vec![Rule::non_terminal(1), Rule::non_terminal(2),]),
-                        Rule::terminal(13),
-                    ])
-                ),
-                Variable::auxiliary(
-                    "rule0_repeat1",
-                    Rule::choice(vec![
-                        Rule::seq(vec![Rule::non_terminal(1), Rule::non_terminal(1),]),
-                        Rule::terminal(11),
-                    ])
-                ),
-                Variable::auxiliary(
-                    "rule0_repeat2",
-                    Rule::choice(vec![
-                        Rule::seq(vec![Rule::non_terminal(2), Rule::non_terminal(2),]),
-                        Rule::terminal(12),
-                    ])
-                ),
-            ]
-        );
-    }
-
-    #[test]
-    fn test_repeat_deduplication() {
-        // Terminal 4 appears inside of a repeat in three different places.
-        let grammar = expand_repeats(build_grammar(vec![
-            Variable::named(
-                "rule0",
-                Rule::choice(vec![
-                    Rule::seq(vec![Rule::terminal(1), Rule::repeat(Rule::terminal(4))]),
-                    Rule::seq(vec![Rule::terminal(2), Rule::repeat(Rule::terminal(4))]),
-                ]),
-            ),
-            Variable::named(
-                "rule1",
-                Rule::seq(vec![Rule::terminal(3), Rule::repeat(Rule::terminal(4))]),
-            ),
-        ]));
-
-        // Only one auxiliary rule is created for repeating terminal 4.
-        assert_eq!(
-            grammar.variables,
-            vec![
-                Variable::named(
-                    "rule0",
-                    Rule::choice(vec![
-                        Rule::seq(vec![Rule::terminal(1), Rule::non_terminal(2)]),
-                        Rule::seq(vec![Rule::terminal(2), Rule::non_terminal(2)]),
-                    ])
-                ),
-                Variable::named(
-                    "rule1",
-                    Rule::seq(vec![Rule::terminal(3), Rule::non_terminal(2),])
-                ),
-                Variable::auxiliary(
-                    "rule0_repeat1",
-                    Rule::choice(vec![
-                        Rule::seq(vec![Rule::non_terminal(2), Rule::non_terminal(2),]),
-                        Rule::terminal(4),
-                    ])
-                )
-            ]
-        );
-    }
-
-    #[test]
-    fn test_expansion_of_nested_repeats() {
-        let grammar = expand_repeats(build_grammar(vec![Variable::named(
-            "rule0",
-            Rule::seq(vec![
-                Rule::terminal(10),
-                Rule::repeat(Rule::seq(vec![
-                    Rule::terminal(11),
-                    Rule::repeat(Rule::terminal(12)),
-                ])),
-            ]),
-        )]));
-
-        assert_eq!(
-            grammar.variables,
-            vec![
-                Variable::named(
-                    "rule0",
-                    Rule::seq(vec![Rule::terminal(10), Rule::non_terminal(2),])
-                ),
-                Variable::auxiliary(
-                    "rule0_repeat1",
-                    Rule::choice(vec![
-                        Rule::seq(vec![Rule::non_terminal(1), Rule::non_terminal(1),]),
-                        Rule::terminal(12),
-                    ])
-                ),
-                Variable::auxiliary(
-                    "rule0_repeat2",
-                    Rule::choice(vec![
-                        Rule::seq(vec![Rule::non_terminal(2), Rule::non_terminal(2),]),
-                        Rule::seq(vec![Rule::terminal(11), Rule::non_terminal(1),]),
-                    ])
-                ),
-            ]
-        );
-    }
-
-    #[test]
-    fn test_expansion_of_repeats_at_top_of_hidden_rules() {
-        let grammar = expand_repeats(build_grammar(vec![
-            Variable::named("rule0", Rule::non_terminal(1)),
-            Variable::hidden(
-                "_rule1",
-                Rule::repeat(Rule::choice(vec![Rule::terminal(11), Rule::terminal(12)])),
-            ),
-        ]));
-
-        assert_eq!(
-            grammar.variables,
-            vec![
-                Variable::named("rule0", Rule::non_terminal(1),),
-                Variable::auxiliary(
-                    "_rule1",
-                    Rule::choice(vec![
-                        Rule::seq(vec![Rule::non_terminal(1), Rule::non_terminal(1)]),
-                        Rule::terminal(11),
-                        Rule::terminal(12),
-                    ]),
-                ),
-            ]
-        );
-    }
-
-    fn build_grammar(variables: Vec<Variable>) -> ExtractedSyntaxGrammar {
-        ExtractedSyntaxGrammar {
-            variables,
-            ..Default::default()
-        }
-    }
-}
--- a/cli/src/generate/prepare_grammar/expand_tokens.rs
+++ b/cli/src/generate/prepare_grammar/expand_tokens.rs
@ -1,940 +0,0 @@
-use std::collections::HashMap;
-
-use anyhow::{anyhow, Context, Result};
-use lazy_static::lazy_static;
-use regex_syntax::ast::{
-    parse, Ast, ClassPerlKind, ClassSet, ClassSetBinaryOpKind, ClassSetItem, ClassUnicodeKind,
-    RepetitionKind, RepetitionRange,
-};
-
-use super::ExtractedLexicalGrammar;
-use crate::generate::{
-    grammars::{LexicalGrammar, LexicalVariable},
-    nfa::{CharacterSet, Nfa, NfaState},
-    rules::{Precedence, Rule},
-};
-
-lazy_static! {
-    static ref UNICODE_CATEGORIES: HashMap<&'static str, Vec<u32>> =
-        serde_json::from_str(UNICODE_CATEGORIES_JSON).unwrap();
-    static ref UNICODE_PROPERTIES: HashMap<&'static str, Vec<u32>> =
-        serde_json::from_str(UNICODE_PROPERTIES_JSON).unwrap();
-    static ref UNICODE_CATEGORY_ALIASES: HashMap<&'static str, String> =
-        serde_json::from_str(UNICODE_CATEGORY_ALIASES_JSON).unwrap();
-    static ref UNICODE_PROPERTY_ALIASES: HashMap<&'static str, String> =
-        serde_json::from_str(UNICODE_PROPERTY_ALIASES_JSON).unwrap();
-}
-
-const UNICODE_CATEGORIES_JSON: &str = include_str!("./unicode-categories.json");
-const UNICODE_PROPERTIES_JSON: &str = include_str!("./unicode-properties.json");
-const UNICODE_CATEGORY_ALIASES_JSON: &str = include_str!("./unicode-category-aliases.json");
-const UNICODE_PROPERTY_ALIASES_JSON: &str = include_str!("./unicode-property-aliases.json");
-
-struct NfaBuilder {
-    nfa: Nfa,
-    is_sep: bool,
-    precedence_stack: Vec<i32>,
-}
-
-fn get_implicit_precedence(rule: &Rule) -> i32 {
-    match rule {
-        Rule::String(_) => 2,
-        Rule::Metadata { rule, params } => {
-            if params.is_main_token {
-                get_implicit_precedence(rule) + 1
-            } else {
-                get_implicit_precedence(rule)
-            }
-        }
-        _ => 0,
-    }
-}
-
-const fn get_completion_precedence(rule: &Rule) -> i32 {
-    if let Rule::Metadata { params, .. } = rule {
-        if let Precedence::Integer(p) = params.precedence {
-            return p;
-        }
-    }
-    0
-}
-
-pub fn expand_tokens(mut grammar: ExtractedLexicalGrammar) -> Result<LexicalGrammar> {
-    let mut builder = NfaBuilder {
-        nfa: Nfa::new(),
-        is_sep: true,
-        precedence_stack: vec![0],
-    };
-
-    let separator_rule = if grammar.separators.is_empty() {
-        Rule::Blank
-    } else {
-        grammar.separators.push(Rule::Blank);
-        Rule::repeat(Rule::choice(grammar.separators))
-    };
-
-    let mut variables = Vec::new();
-    for (i, variable) in grammar.variables.into_iter().enumerate() {
-        let is_immediate_token = match &variable.rule {
-            Rule::Metadata { params, .. } => params.is_main_token,
-            _ => false,
-        };
-
-        builder.is_sep = false;
-        builder.nfa.states.push(NfaState::Accept {
-            variable_index: i,
-            precedence: get_completion_precedence(&variable.rule),
-        });
-        let last_state_id = builder.nfa.last_state_id();
-        builder
-            .expand_rule(&variable.rule, last_state_id)
-            .with_context(|| format!("Error processing rule {}", variable.name))?;
-
-        if !is_immediate_token {
-            builder.is_sep = true;
-            let last_state_id = builder.nfa.last_state_id();
-            builder.expand_rule(&separator_rule, last_state_id)?;
-        }
-
-        variables.push(LexicalVariable {
-            name: variable.name,
-            kind: variable.kind,
-            implicit_precedence: get_implicit_precedence(&variable.rule),
-            start_state: builder.nfa.last_state_id(),
-        });
-    }
-
-    Ok(LexicalGrammar {
-        nfa: builder.nfa,
-        variables,
-    })
-}
-
-impl NfaBuilder {
-    fn expand_rule(&mut self, rule: &Rule, mut next_state_id: u32) -> Result<bool> {
-        match rule {
-            Rule::Pattern(s, f) => {
-                let ast = parse::Parser::new().parse(s)?;
-                self.expand_regex(&ast, next_state_id, f.contains('i'))
-            }
-            Rule::String(s) => {
-                for c in s.chars().rev() {
-                    self.push_advance(CharacterSet::empty().add_char(c), next_state_id);
-                    next_state_id = self.nfa.last_state_id();
-                }
-                Ok(!s.is_empty())
-            }
-            Rule::Choice(elements) => {
-                let mut alternative_state_ids = Vec::new();
-                for element in elements {
-                    if self.expand_rule(element, next_state_id)? {
-                        alternative_state_ids.push(self.nfa.last_state_id());
-                    } else {
-                        alternative_state_ids.push(next_state_id);
-                    }
-                }
-                alternative_state_ids.sort_unstable();
-                alternative_state_ids.dedup();
-                alternative_state_ids.retain(|i| *i != self.nfa.last_state_id());
-                for alternative_state_id in alternative_state_ids {
-                    self.push_split(alternative_state_id);
-                }
-                Ok(true)
-            }
-            Rule::Seq(elements) => {
-                let mut result = false;
-                for element in elements.iter().rev() {
-                    if self.expand_rule(element, next_state_id)? {
-                        result = true;
-                    }
-                    next_state_id = self.nfa.last_state_id();
-                }
-                Ok(result)
-            }
-            Rule::Repeat(rule) => {
-                self.nfa.states.push(NfaState::Accept {
-                    variable_index: 0,
-                    precedence: 0,
-                }); // Placeholder for split
-                let split_state_id = self.nfa.last_state_id();
-                if self.expand_rule(rule, split_state_id)? {
-                    self.nfa.states[split_state_id as usize] =
-                        NfaState::Split(self.nfa.last_state_id(), next_state_id);
-                    Ok(true)
-                } else {
-                    Ok(false)
-                }
-            }
-            Rule::Metadata { rule, params } => {
-                let has_precedence = if let Precedence::Integer(precedence) = &params.precedence {
-                    self.precedence_stack.push(*precedence);
-                    true
-                } else {
-                    false
-                };
-                let result = self.expand_rule(rule, next_state_id);
-                if has_precedence {
-                    self.precedence_stack.pop();
-                }
-                result
-            }
-            Rule::Blank => Ok(false),
-            _ => Err(anyhow!("Grammar error: Unexpected rule {rule:?}")),
-        }
-    }
-
-    fn expand_regex(
-        &mut self,
-        ast: &Ast,
-        mut next_state_id: u32,
-        case_insensitive: bool,
-    ) -> Result<bool> {
-        const fn inverse_char(c: char) -> char {
-            match c {
-                'a'..='z' => (c as u8 - b'a' + b'A') as char,
-                'A'..='Z' => (c as u8 - b'A' + b'a') as char,
-                c => c,
-            }
-        }
-
-        fn with_inverse_char(mut chars: CharacterSet) -> CharacterSet {
-            for char in chars.clone().chars() {
-                let inverted = inverse_char(char);
-                if char != inverted {
-                    chars = chars.add_char(inverted);
-                }
-            }
-            chars
-        }
-
-        match ast {
-            Ast::Empty(_) => Ok(false),
-            Ast::Flags(_) => Err(anyhow!("Regex error: Flags are not supported")),
-            Ast::Literal(literal) => {
-                let mut char_set = CharacterSet::from_char(literal.c);
-                if case_insensitive {
-                    let inverted = inverse_char(literal.c);
-                    if literal.c != inverted {
-                        char_set = char_set.add_char(inverted);
-                    }
-                }
-                self.push_advance(char_set, next_state_id);
-                Ok(true)
-            }
-            Ast::Dot(_) => {
-                self.push_advance(CharacterSet::from_char('\n').negate(), next_state_id);
-                Ok(true)
-            }
-            Ast::Assertion(_) => Err(anyhow!("Regex error: Assertions are not supported")),
-            Ast::ClassUnicode(class) => {
-                let mut chars = self.expand_unicode_character_class(&class.kind)?;
-                if class.negated {
-                    chars = chars.negate();
-                }
-                if case_insensitive {
-                    chars = with_inverse_char(chars);
-                }
-                self.push_advance(chars, next_state_id);
-                Ok(true)
-            }
-            Ast::ClassPerl(class) => {
-                let mut chars = self.expand_perl_character_class(&class.kind);
-                if class.negated {
-                    chars = chars.negate();
-                }
-                if case_insensitive {
-                    chars = with_inverse_char(chars);
-                }
-                self.push_advance(chars, next_state_id);
-                Ok(true)
-            }
-            Ast::ClassBracketed(class) => {
-                let mut chars = self.translate_class_set(&class.kind)?;
-                if class.negated {
-                    chars = chars.negate();
-                }
-                if case_insensitive {
-                    chars = with_inverse_char(chars);
-                }
-                self.push_advance(chars, next_state_id);
-                Ok(true)
-            }
-            Ast::Repetition(repetition) => match repetition.op.kind {
-                RepetitionKind::ZeroOrOne => {
-                    self.expand_zero_or_one(&repetition.ast, next_state_id, case_insensitive)
-                }
-                RepetitionKind::OneOrMore => {
-                    self.expand_one_or_more(&repetition.ast, next_state_id, case_insensitive)
-                }
-                RepetitionKind::ZeroOrMore => {
-                    self.expand_zero_or_more(&repetition.ast, next_state_id, case_insensitive)
-                }
-                RepetitionKind::Range(RepetitionRange::Exactly(count)) => {
-                    self.expand_count(&repetition.ast, count, next_state_id, case_insensitive)
-                }
-                RepetitionKind::Range(RepetitionRange::AtLeast(min)) => {
-                    if self.expand_zero_or_more(&repetition.ast, next_state_id, case_insensitive)? {
-                        self.expand_count(&repetition.ast, min, next_state_id, case_insensitive)
-                    } else {
-                        Ok(false)
-                    }
-                }
-                RepetitionKind::Range(RepetitionRange::Bounded(min, max)) => {
-                    let mut result =
-                        self.expand_count(&repetition.ast, min, next_state_id, case_insensitive)?;
-                    for _ in min..max {
-                        if result {
-                            next_state_id = self.nfa.last_state_id();
-                        }
-                        if self.expand_zero_or_one(
-                            &repetition.ast,
-                            next_state_id,
-                            case_insensitive,
-                        )? {
-                            result = true;
-                        }
-                    }
-                    Ok(result)
-                }
-            },
-            Ast::Group(group) => self.expand_regex(&group.ast, next_state_id, case_insensitive),
-            Ast::Alternation(alternation) => {
-                let mut alternative_state_ids = Vec::new();
-                for ast in &alternation.asts {
-                    if self.expand_regex(ast, next_state_id, case_insensitive)? {
-                        alternative_state_ids.push(self.nfa.last_state_id());
-                    } else {
-                        alternative_state_ids.push(next_state_id);
-                    }
-                }
-                alternative_state_ids.sort_unstable();
-                alternative_state_ids.dedup();
-                alternative_state_ids.retain(|i| *i != self.nfa.last_state_id());
-
-                for alternative_state_id in alternative_state_ids {
-                    self.push_split(alternative_state_id);
-                }
-                Ok(true)
-            }
-            Ast::Concat(concat) => {
-                let mut result = false;
-                for ast in concat.asts.iter().rev() {
-                    if self.expand_regex(ast, next_state_id, case_insensitive)? {
-                        result = true;
-                        next_state_id = self.nfa.last_state_id();
-                    }
-                }
-                Ok(result)
-            }
-        }
-    }
-
-    fn translate_class_set(&self, class_set: &ClassSet) -> Result<CharacterSet> {
-        match &class_set {
-            ClassSet::Item(item) => self.expand_character_class(item),
-            ClassSet::BinaryOp(binary_op) => {
-                let mut lhs_char_class = self.translate_class_set(&binary_op.lhs)?;
-                let mut rhs_char_class = self.translate_class_set(&binary_op.rhs)?;
-                match binary_op.kind {
-                    ClassSetBinaryOpKind::Intersection => {
-                        Ok(lhs_char_class.remove_intersection(&mut rhs_char_class))
-                    }
-                    ClassSetBinaryOpKind::Difference => {
-                        Ok(lhs_char_class.difference(rhs_char_class))
-                    }
-                    ClassSetBinaryOpKind::SymmetricDifference => {
-                        Ok(lhs_char_class.symmetric_difference(rhs_char_class))
-                    }
-                }
-            }
-        }
-    }
-
-    fn expand_one_or_more(
-        &mut self,
-        ast: &Ast,
-        next_state_id: u32,
-        case_insensitive: bool,
-    ) -> Result<bool> {
-        self.nfa.states.push(NfaState::Accept {
-            variable_index: 0,
-            precedence: 0,
-        }); // Placeholder for split
-        let split_state_id = self.nfa.last_state_id();
-        if self.expand_regex(ast, split_state_id, case_insensitive)? {
-            self.nfa.states[split_state_id as usize] =
-                NfaState::Split(self.nfa.last_state_id(), next_state_id);
-            Ok(true)
-        } else {
-            self.nfa.states.pop();
-            Ok(false)
-        }
-    }
-
-    fn expand_zero_or_one(
-        &mut self,
-        ast: &Ast,
-        next_state_id: u32,
-        case_insensitive: bool,
-    ) -> Result<bool> {
-        if self.expand_regex(ast, next_state_id, case_insensitive)? {
-            self.push_split(next_state_id);
-            Ok(true)
-        } else {
-            Ok(false)
-        }
-    }
-
-    fn expand_zero_or_more(
-        &mut self,
-        ast: &Ast,
-        next_state_id: u32,
-        case_insensitive: bool,
-    ) -> Result<bool> {
-        if self.expand_one_or_more(ast, next_state_id, case_insensitive)? {
-            self.push_split(next_state_id);
-            Ok(true)
-        } else {
-            Ok(false)
-        }
-    }
-
-    fn expand_count(
-        &mut self,
-        ast: &Ast,
-        count: u32,
-        mut next_state_id: u32,
-        case_insensitive: bool,
-    ) -> Result<bool> {
-        let mut result = false;
-        for _ in 0..count {
-            if self.expand_regex(ast, next_state_id, case_insensitive)? {
-                result = true;
-                next_state_id = self.nfa.last_state_id();
-            }
-        }
-        Ok(result)
-    }
-
-    fn expand_character_class(&self, item: &ClassSetItem) -> Result<CharacterSet> {
-        match item {
-            ClassSetItem::Empty(_) => Ok(CharacterSet::empty()),
-            ClassSetItem::Literal(literal) => Ok(CharacterSet::from_char(literal.c)),
-            ClassSetItem::Range(range) => Ok(CharacterSet::from_range(range.start.c, range.end.c)),
-            ClassSetItem::Union(union) => {
-                let mut result = CharacterSet::empty();
-                for item in &union.items {
-                    result = result.add(&self.expand_character_class(item)?);
-                }
-                Ok(result)
-            }
-            ClassSetItem::Perl(class) => Ok(self.expand_perl_character_class(&class.kind)),
-            ClassSetItem::Unicode(class) => {
-                let mut set = self.expand_unicode_character_class(&class.kind)?;
-                if class.negated {
-                    set = set.negate();
-                }
-                Ok(set)
-            }
-            ClassSetItem::Bracketed(class) => {
-                let mut set = self.translate_class_set(&class.kind)?;
-                if class.negated {
-                    set = set.negate();
-                }
-                Ok(set)
-            }
-            ClassSetItem::Ascii(_) => Err(anyhow!(
-                "Regex error: Unsupported character class syntax {item:?}",
-            )),
-        }
-    }
-
-    fn expand_unicode_character_class(&self, class: &ClassUnicodeKind) -> Result<CharacterSet> {
-        let mut chars = CharacterSet::empty();
-
-        let category_letter;
-        match class {
-            ClassUnicodeKind::OneLetter(le) => {
-                category_letter = le.to_string();
-            }
-            ClassUnicodeKind::Named(class_name) => {
-                let actual_class_name = UNICODE_CATEGORY_ALIASES
-                    .get(class_name.as_str())
-                    .or_else(|| UNICODE_PROPERTY_ALIASES.get(class_name.as_str()))
-                    .unwrap_or(class_name);
-                if actual_class_name.len() == 1 {
-                    category_letter = actual_class_name.clone();
-                } else {
-                    let code_points =
-                        UNICODE_CATEGORIES
-                            .get(actual_class_name.as_str())
-                            .or_else(|| UNICODE_PROPERTIES.get(actual_class_name.as_str()))
-                            .ok_or_else(|| {
-                                anyhow!(
-                                    "Regex error: Unsupported unicode character class {class_name}",
-                                )
-                            })?;
-                    for c in code_points {
-                        if let Some(c) = char::from_u32(*c) {
-                            chars = chars.add_char(c);
-                        }
-                    }
-
-                    return Ok(chars);
-                }
-            }
-            ClassUnicodeKind::NamedValue { .. } => {
-                return Err(anyhow!(
-                    "Regex error: Key-value unicode properties are not supported"
-                ))
-            }
-        }
-
-        for (category, code_points) in UNICODE_CATEGORIES.iter() {
-            if category.starts_with(&category_letter) {
-                for c in code_points {
-                    if let Some(c) = char::from_u32(*c) {
-                        chars = chars.add_char(c);
-                    }
-                }
-            }
-        }
-
-        Ok(chars)
-    }
-
-    fn expand_perl_character_class(&self, item: &ClassPerlKind) -> CharacterSet {
-        match item {
-            ClassPerlKind::Digit => CharacterSet::from_range('0', '9'),
-            ClassPerlKind::Space => CharacterSet::empty()
-                .add_char(' ')
-                .add_char('\t')
-                .add_char('\r')
-                .add_char('\n')
-                .add_char('\x0B')
-                .add_char('\x0C'),
-            ClassPerlKind::Word => CharacterSet::empty()
-                .add_char('_')
-                .add_range('A', 'Z')
-                .add_range('a', 'z')
-                .add_range('0', '9'),
-        }
-    }
-
-    fn push_advance(&mut self, chars: CharacterSet, state_id: u32) {
-        let precedence = *self.precedence_stack.last().unwrap();
-        self.nfa.states.push(NfaState::Advance {
-            chars,
-            state_id,
-            precedence,
-            is_sep: self.is_sep,
-        });
-    }
-
-    fn push_split(&mut self, state_id: u32) {
-        let last_state_id = self.nfa.last_state_id();
-        self.nfa
-            .states
-            .push(NfaState::Split(state_id, last_state_id));
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::generate::{
-        grammars::Variable,
-        nfa::{NfaCursor, NfaTransition},
-    };
-
-    fn simulate_nfa<'a>(grammar: &'a LexicalGrammar, s: &'a str) -> Option<(usize, &'a str)> {
-        let start_states = grammar.variables.iter().map(|v| v.start_state).collect();
-        let mut cursor = NfaCursor::new(&grammar.nfa, start_states);
-
-        let mut result = None;
-        let mut result_precedence = i32::MIN;
-        let mut start_char = 0;
-        let mut end_char = 0;
-        for c in s.chars() {
-            for (id, precedence) in cursor.completions() {
-                if result.is_none() || result_precedence <= precedence {
-                    result = Some((id, &s[start_char..end_char]));
-                    result_precedence = precedence;
-                }
-            }
-            if let Some(NfaTransition {
-                states,
-                is_separator,
-                ..
-            }) = cursor
-                .transitions()
-                .into_iter()
-                .find(|t| t.characters.contains(c) && t.precedence >= result_precedence)
-            {
-                cursor.reset(states);
-                end_char += c.len_utf8();
-                if is_separator {
-                    start_char = end_char;
-                }
-            } else {
-                break;
-            }
-        }
-
-        for (id, precedence) in cursor.completions() {
-            if result.is_none() || result_precedence <= precedence {
-                result = Some((id, &s[start_char..end_char]));
-                result_precedence = precedence;
-            }
-        }
-
-        result
-    }
-
-    #[test]
-    fn test_rule_expansion() {
-        struct Row {
-            rules: Vec<Rule>,
-            separators: Vec<Rule>,
-            examples: Vec<(&'static str, Option<(usize, &'static str)>)>,
-        }
-
-        let table = [
-            // regex with sequences and alternatives
-            Row {
-                rules: vec![Rule::pattern("(a|b|c)d(e|f|g)h?", "")],
-                separators: vec![],
-                examples: vec![
-                    ("ade1", Some((0, "ade"))),
-                    ("bdf1", Some((0, "bdf"))),
-                    ("bdfh1", Some((0, "bdfh"))),
-                    ("ad1", None),
-                ],
-            },
-            // regex with repeats
-            Row {
-                rules: vec![Rule::pattern("a*", "")],
-                separators: vec![],
-                examples: vec![("aaa1", Some((0, "aaa"))), ("b", Some((0, "")))],
-            },
-            // regex with repeats in sequences
-            Row {
-                rules: vec![Rule::pattern("a((bc)+|(de)*)f", "")],
-                separators: vec![],
-                examples: vec![
-                    ("af1", Some((0, "af"))),
-                    ("adedef1", Some((0, "adedef"))),
-                    ("abcbcbcf1", Some((0, "abcbcbcf"))),
-                    ("a", None),
-                ],
-            },
-            // regex with character ranges
-            Row {
-                rules: vec![Rule::pattern("[a-fA-F0-9]+", "")],
-                separators: vec![],
-                examples: vec![("A1ff0.", Some((0, "A1ff0")))],
-            },
-            // regex with perl character classes
-            Row {
-                rules: vec![Rule::pattern("\\w\\d\\s", "")],
-                separators: vec![],
-                examples: vec![("_0  ", Some((0, "_0 ")))],
-            },
-            // string
-            Row {
-                rules: vec![Rule::string("abc")],
-                separators: vec![],
-                examples: vec![("abcd", Some((0, "abc"))), ("ab", None)],
-            },
-            // complex rule containing strings and regexes
-            Row {
-                rules: vec![Rule::repeat(Rule::seq(vec![
-                    Rule::string("{"),
-                    Rule::pattern("[a-f]+", ""),
-                    Rule::string("}"),
-                ]))],
-                separators: vec![],
-                examples: vec![
-                    ("{a}{", Some((0, "{a}"))),
-                    ("{a}{d", Some((0, "{a}"))),
-                    ("ab", None),
-                ],
-            },
-            // longest match rule
-            Row {
-                rules: vec![
-                    Rule::pattern("a|bc", ""),
-                    Rule::pattern("aa", ""),
-                    Rule::pattern("bcd", ""),
-                ],
-                separators: vec![],
-                examples: vec![
-                    ("a.", Some((0, "a"))),
-                    ("bc.", Some((0, "bc"))),
-                    ("aa.", Some((1, "aa"))),
-                    ("bcd?", Some((2, "bcd"))),
-                    ("b.", None),
-                    ("c.", None),
-                ],
-            },
-            // regex with an alternative including the empty string
-            Row {
-                rules: vec![Rule::pattern("a(b|)+c", "")],
-                separators: vec![],
-                examples: vec![
-                    ("ac.", Some((0, "ac"))),
-                    ("abc.", Some((0, "abc"))),
-                    ("abbc.", Some((0, "abbc"))),
-                ],
-            },
-            // separators
-            Row {
-                rules: vec![Rule::pattern("[a-f]+", "")],
-                separators: vec![Rule::string("\\\n"), Rule::pattern("\\s", "")],
-                examples: vec![
-                    ("  a", Some((0, "a"))),
-                    ("  \nb", Some((0, "b"))),
-                    ("  \\a", None),
-                    ("  \\\na", Some((0, "a"))),
-                ],
-            },
-            // shorter tokens with higher precedence
-            Row {
-                rules: vec![
-                    Rule::prec(Precedence::Integer(2), Rule::pattern("abc", "")),
-                    Rule::prec(Precedence::Integer(1), Rule::pattern("ab[cd]e", "")),
-                    Rule::pattern("[a-e]+", ""),
-                ],
-                separators: vec![Rule::string("\\\n"), Rule::pattern("\\s", "")],
-                examples: vec![
-                    ("abceef", Some((0, "abc"))),
-                    ("abdeef", Some((1, "abde"))),
-                    ("aeeeef", Some((2, "aeeee"))),
-                ],
-            },
-            // immediate tokens with higher precedence
-            Row {
-                rules: vec![
-                    Rule::prec(Precedence::Integer(1), Rule::pattern("[^a]+", "")),
-                    Rule::immediate_token(Rule::prec(
-                        Precedence::Integer(2),
-                        Rule::pattern("[^ab]+", ""),
-                    )),
-                ],
-                separators: vec![Rule::pattern("\\s", "")],
-                examples: vec![("cccb", Some((1, "ccc")))],
-            },
-            Row {
-                rules: vec![Rule::seq(vec![
-                    Rule::string("a"),
-                    Rule::choice(vec![Rule::string("b"), Rule::string("c")]),
-                    Rule::string("d"),
-                ])],
-                separators: vec![],
-                examples: vec![
-                    ("abd", Some((0, "abd"))),
-                    ("acd", Some((0, "acd"))),
-                    ("abc", None),
-                    ("ad", None),
-                    ("d", None),
-                    ("a", None),
-                ],
-            },
-            // nested choices within sequences
-            Row {
-                rules: vec![Rule::seq(vec![
-                    Rule::pattern("[0-9]+", ""),
-                    Rule::choice(vec![
-                        Rule::Blank,
-                        Rule::choice(vec![Rule::seq(vec![
-                            Rule::choice(vec![Rule::string("e"), Rule::string("E")]),
-                            Rule::choice(vec![
-                                Rule::Blank,
-                                Rule::choice(vec![Rule::string("+"), Rule::string("-")]),
-                            ]),
-                            Rule::pattern("[0-9]+", ""),
-                        ])]),
-                    ]),
-                ])],
-                separators: vec![],
-                examples: vec![
-                    ("12", Some((0, "12"))),
-                    ("12e", Some((0, "12"))),
-                    ("12g", Some((0, "12"))),
-                    ("12e3", Some((0, "12e3"))),
-                    ("12e+", Some((0, "12"))),
-                    ("12E+34 +", Some((0, "12E+34"))),
-                    ("12e34", Some((0, "12e34"))),
-                ],
-            },
-            // nested groups
-            Row {
-                rules: vec![Rule::seq(vec![Rule::pattern(r"([^x\\]|\\(.|\n))+", "")])],
-                separators: vec![],
-                examples: vec![("abcx", Some((0, "abc"))), ("abc\\0x", Some((0, "abc\\0")))],
-            },
-            // allowing unrecognized escape sequences
-            Row {
-                rules: vec![
-                    // Escaped forward slash (used in JS because '/' is the regex delimiter)
-                    Rule::pattern(r"\/", ""),
-                    // Escaped quotes
-                    Rule::pattern(r#"\"\'"#, ""),
-                    // Quote preceded by a literal backslash
-                    Rule::pattern(r"[\\']+", ""),
-                ],
-                separators: vec![],
-                examples: vec![
-                    ("/", Some((0, "/"))),
-                    ("\"\'", Some((1, "\"\'"))),
-                    (r"'\'a", Some((2, r"'\'"))),
-                ],
-            },
-            // unicode property escapes
-            Row {
-                rules: vec![
-                    Rule::pattern(r"\p{L}+\P{L}+", ""),
-                    Rule::pattern(r"\p{White_Space}+\P{White_Space}+[\p{White_Space}]*", ""),
-                ],
-                separators: vec![],
-                examples: vec![
-                    ("  123   abc", Some((1, "  123   "))),
-                    ("ბΨƁ___ƀƔ", Some((0, "ბΨƁ___"))),
-                ],
-            },
-            // unicode property escapes in bracketed sets
-            Row {
-                rules: vec![Rule::pattern(r"[\p{L}\p{Nd}]+", "")],
-                separators: vec![],
-                examples: vec![("abΨ12٣٣, ok", Some((0, "abΨ12٣٣")))],
-            },
-            // unicode character escapes
-            Row {
-                rules: vec![
-                    Rule::pattern(r"\u{00dc}", ""),
-                    Rule::pattern(r"\U{000000dd}", ""),
-                    Rule::pattern(r"\u00de", ""),
-                    Rule::pattern(r"\U000000df", ""),
-                ],
-                separators: vec![],
-                examples: vec![
-                    ("\u{00dc}", Some((0, "\u{00dc}"))),
-                    ("\u{00dd}", Some((1, "\u{00dd}"))),
-                    ("\u{00de}", Some((2, "\u{00de}"))),
-                    ("\u{00df}", Some((3, "\u{00df}"))),
-                ],
-            },
-            Row {
-                rules: vec![
-                    Rule::pattern(r"u\{[0-9a-fA-F]+\}", ""),
-                    // Already-escaped curly braces
-                    Rule::pattern(r"\{[ab]{3}\}", ""),
-                    // Unicode codepoints
-                    Rule::pattern(r"\u{1000A}", ""),
-                    // Unicode codepoints (lowercase)
-                    Rule::pattern(r"\u{1000b}", ""),
-                ],
-                separators: vec![],
-                examples: vec![
-                    ("u{1234} ok", Some((0, "u{1234}"))),
-                    ("{aba}}", Some((1, "{aba}"))),
-                    ("\u{1000A}", Some((2, "\u{1000A}"))),
-                    ("\u{1000b}", Some((3, "\u{1000b}"))),
-                ],
-            },
-            // Emojis
-            Row {
-                rules: vec![Rule::pattern(r"\p{Emoji}+", "")],
-                separators: vec![],
-                examples: vec![
-                    ("🐎", Some((0, "🐎"))),
-                    ("🐴🐴", Some((0, "🐴🐴"))),
-                    ("#0", Some((0, "#0"))), // These chars are technically emojis!
-                    ("⻢", None),
-                    ("♞", None),
-                    ("horse", None),
-                ],
-            },
-            // Intersection
-            Row {
-                rules: vec![Rule::pattern(r"[[0-7]&&[4-9]]+", "")],
-                separators: vec![],
-                examples: vec![
-                    ("456", Some((0, "456"))),
-                    ("64", Some((0, "64"))),
-                    ("452", Some((0, "45"))),
-                    ("91", None),
-                    ("8", None),
-                    ("3", None),
-                ],
-            },
-            // Difference
-            Row {
-                rules: vec![Rule::pattern(r"[[0-9]--[4-7]]+", "")],
-                separators: vec![],
-                examples: vec![
-                    ("123", Some((0, "123"))),
-                    ("83", Some((0, "83"))),
-                    ("9", Some((0, "9"))),
-                    ("124", Some((0, "12"))),
-                    ("67", None),
-                    ("4", None),
-                ],
-            },
-            // Symmetric difference
-            Row {
-                rules: vec![Rule::pattern(r"[[0-7]~~[4-9]]+", "")],
-                separators: vec![],
-                examples: vec![
-                    ("123", Some((0, "123"))),
-                    ("83", Some((0, "83"))),
-                    ("9", Some((0, "9"))),
-                    ("124", Some((0, "12"))),
-                    ("67", None),
-                    ("4", None),
-                ],
-            },
-            // Nested set operations
-            Row {
-                //               0 1 2 3 4 5 6 7 8 9
-                // [0-5]:        y y y y y y
-                // [2-4]:            y y y
-                // [0-5]--[2-4]: y y       y
-                // [3-9]:              y y y y y y y
-                // [6-7]:                    y y
-                // [3-9]--[5-7]:       y y y     y y
-                // final regex:  y y   y y       y y
-                rules: vec![Rule::pattern(r"[[[0-5]--[2-4]]~~[[3-9]--[6-7]]]+", "")],
-                separators: vec![],
-                examples: vec![
-                    ("01", Some((0, "01"))),
-                    ("432", Some((0, "43"))),
-                    ("8", Some((0, "8"))),
-                    ("9", Some((0, "9"))),
-                    ("2", None),
-                    ("567", None),
-                ],
-            },
-        ];
-
-        for Row {
-            rules,
-            separators,
-            examples,
-        } in &table
-        {
-            let grammar = expand_tokens(ExtractedLexicalGrammar {
-                separators: separators.clone(),
-                variables: rules
-                    .iter()
-                    .map(|rule| Variable::named("", rule.clone()))
-                    .collect(),
-            })
-            .unwrap();
-
-            for (haystack, needle) in examples {
-                assert_eq!(simulate_nfa(&grammar, haystack), *needle);
-            }
-        }
-    }
-}
--- a/cli/src/generate/prepare_grammar/extract_default_aliases.rs
+++ b/cli/src/generate/prepare_grammar/extract_default_aliases.rs
@ -1,304 +0,0 @@
-use crate::generate::{
-    grammars::{LexicalGrammar, SyntaxGrammar},
-    rules::{Alias, AliasMap, Symbol, SymbolType},
-};
-
-#[derive(Clone, Default)]
-struct SymbolStatus {
-    aliases: Vec<(Alias, usize)>,
-    appears_unaliased: bool,
-}
-
-// Update the grammar by finding symbols that always are aliased, and for each such symbol,
-// promoting one of its aliases to a "default alias", which is applied globally instead
-// of in a context-specific way.
-//
-// This has two benefits:
-// * It reduces the overhead of storing production-specific alias info in the parse table.
-// * Within an `ERROR` node, no context-specific aliases will be applied. This transformation
-//   ensures that the children of an `ERROR` node have symbols that are consistent with the way that
-//   they would appear in a valid syntax tree.
-pub(super) fn extract_default_aliases(
-    syntax_grammar: &mut SyntaxGrammar,
-    lexical_grammar: &LexicalGrammar,
-) -> AliasMap {
-    let mut terminal_status_list = vec![SymbolStatus::default(); lexical_grammar.variables.len()];
-    let mut non_terminal_status_list =
-        vec![SymbolStatus::default(); syntax_grammar.variables.len()];
-    let mut external_status_list =
-        vec![SymbolStatus::default(); syntax_grammar.external_tokens.len()];
-
-    // For each grammar symbol, find all of the aliases under which the symbol appears,
-    // and determine whether or not the symbol ever appears *unaliased*.
-    for variable in &syntax_grammar.variables {
-        for production in &variable.productions {
-            for step in &production.steps {
-                let status = match step.symbol.kind {
-                    SymbolType::External => &mut external_status_list[step.symbol.index],
-                    SymbolType::NonTerminal => &mut non_terminal_status_list[step.symbol.index],
-                    SymbolType::Terminal => &mut terminal_status_list[step.symbol.index],
-                    SymbolType::End | SymbolType::EndOfNonTerminalExtra => {
-                        panic!("Unexpected end token")
-                    }
-                };
-
-                // Default aliases don't work for inlined variables.
-                if syntax_grammar.variables_to_inline.contains(&step.symbol) {
-                    continue;
-                }
-
-                if let Some(alias) = &step.alias {
-                    if let Some(count_for_alias) = status
-                        .aliases
-                        .iter_mut()
-                        .find_map(|(a, count)| if a == alias { Some(count) } else { None })
-                    {
-                        *count_for_alias += 1;
-                    } else {
-                        status.aliases.push((alias.clone(), 1));
-                    }
-                } else {
-                    status.appears_unaliased = true;
-                }
-            }
-        }
-    }
-
-    for symbol in &syntax_grammar.extra_symbols {
-        let status = match symbol.kind {
-            SymbolType::External => &mut external_status_list[symbol.index],
-            SymbolType::NonTerminal => &mut non_terminal_status_list[symbol.index],
-            SymbolType::Terminal => &mut terminal_status_list[symbol.index],
-            SymbolType::End | SymbolType::EndOfNonTerminalExtra => {
-                panic!("Unexpected end token")
-            }
-        };
-        status.appears_unaliased = true;
-    }
-
-    let symbols_with_statuses = (terminal_status_list
-        .iter_mut()
-        .enumerate()
-        .map(|(i, status)| (Symbol::terminal(i), status)))
-    .chain(
-        non_terminal_status_list
-            .iter_mut()
-            .enumerate()
-            .map(|(i, status)| (Symbol::non_terminal(i), status)),
-    )
-    .chain(
-        external_status_list
-            .iter_mut()
-            .enumerate()
-            .map(|(i, status)| (Symbol::external(i), status)),
-    );
-
-    // For each symbol that always appears aliased, find the alias the occurs most often,
-    // and designate that alias as the symbol's "default alias". Store all of these
-    // default aliases in a map that will be returned.
-    let mut result = AliasMap::new();
-    for (symbol, status) in symbols_with_statuses {
-        if status.appears_unaliased {
-            status.aliases.clear();
-        } else if let Some(default_entry) = status
-            .aliases
-            .iter()
-            .enumerate()
-            .max_by_key(|(i, (_, count))| (count, -(*i as i64)))
-            .map(|(_, entry)| entry.clone())
-        {
-            status.aliases.clear();
-            status.aliases.push(default_entry.clone());
-            result.insert(symbol, default_entry.0);
-        }
-    }
-
-    // Wherever a symbol is aliased as its default alias, remove the usage of the alias,
-    // because it will now be redundant.
-    let mut alias_positions_to_clear = Vec::new();
-    for variable in &mut syntax_grammar.variables {
-        alias_positions_to_clear.clear();
-
-        for (i, production) in variable.productions.iter().enumerate() {
-            for (j, step) in production.steps.iter().enumerate() {
-                let status = match step.symbol.kind {
-                    SymbolType::External => &mut external_status_list[step.symbol.index],
-                    SymbolType::NonTerminal => &mut non_terminal_status_list[step.symbol.index],
-                    SymbolType::Terminal => &mut terminal_status_list[step.symbol.index],
-                    SymbolType::End | SymbolType::EndOfNonTerminalExtra => {
-                        panic!("Unexpected end token")
-                    }
-                };
-
-                // If this step is aliased as the symbol's default alias, then remove that alias.
-                if step.alias.is_some()
-                    && step.alias.as_ref() == status.aliases.first().map(|t| &t.0)
-                {
-                    let mut other_productions_must_use_this_alias_at_this_index = false;
-                    for (other_i, other_production) in variable.productions.iter().enumerate() {
-                        if other_i != i
-                            && other_production.steps.len() > j
-                            && other_production.steps[j].alias == step.alias
-                            && result.get(&other_production.steps[j].symbol) != step.alias.as_ref()
-                        {
-                            other_productions_must_use_this_alias_at_this_index = true;
-                            break;
-                        }
-                    }
-
-                    if !other_productions_must_use_this_alias_at_this_index {
-                        alias_positions_to_clear.push((i, j));
-                    }
-                }
-            }
-        }
-
-        for (production_index, step_index) in &alias_positions_to_clear {
-            variable.productions[*production_index].steps[*step_index].alias = None;
-        }
-    }
-
-    result
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::generate::{
-        grammars::{LexicalVariable, Production, ProductionStep, SyntaxVariable, VariableType},
-        nfa::Nfa,
-    };
-
-    #[test]
-    fn test_extract_simple_aliases() {
-        let mut syntax_grammar = SyntaxGrammar {
-            variables: vec![
-                SyntaxVariable {
-                    name: "v1".to_owned(),
-                    kind: VariableType::Named,
-                    productions: vec![Production {
-                        dynamic_precedence: 0,
-                        steps: vec![
-                            ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true),
-                            ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true),
-                            ProductionStep::new(Symbol::terminal(2)).with_alias("a3", true),
-                            ProductionStep::new(Symbol::terminal(3)).with_alias("a4", true),
-                        ],
-                    }],
-                },
-                SyntaxVariable {
-                    name: "v2".to_owned(),
-                    kind: VariableType::Named,
-                    productions: vec![Production {
-                        dynamic_precedence: 0,
-                        steps: vec![
-                            // Token 0 is always aliased as "a1".
-                            ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true),
-                            // Token 1 is aliased within rule `v1` above, but not here.
-                            ProductionStep::new(Symbol::terminal(1)),
-                            // Token 2 is aliased differently here than in `v1`. The alias from
-                            // `v1` should be promoted to the default alias, because `v1` appears
-                            // first in the grammar.
-                            ProductionStep::new(Symbol::terminal(2)).with_alias("a5", true),
-                            // Token 3 is also aliased differently here than in `v1`. In this case,
-                            // this alias should be promoted to the default alias, because it is
-                            // used a greater number of times (twice).
-                            ProductionStep::new(Symbol::terminal(3)).with_alias("a6", true),
-                            ProductionStep::new(Symbol::terminal(3)).with_alias("a6", true),
-                        ],
-                    }],
-                },
-            ],
-            ..Default::default()
-        };
-
-        let lexical_grammar = LexicalGrammar {
-            nfa: Nfa::new(),
-            variables: vec![
-                LexicalVariable {
-                    name: "t0".to_string(),
-                    kind: VariableType::Anonymous,
-                    implicit_precedence: 0,
-                    start_state: 0,
-                },
-                LexicalVariable {
-                    name: "t1".to_string(),
-                    kind: VariableType::Anonymous,
-                    implicit_precedence: 0,
-                    start_state: 0,
-                },
-                LexicalVariable {
-                    name: "t2".to_string(),
-                    kind: VariableType::Anonymous,
-                    implicit_precedence: 0,
-                    start_state: 0,
-                },
-                LexicalVariable {
-                    name: "t3".to_string(),
-                    kind: VariableType::Anonymous,
-                    implicit_precedence: 0,
-                    start_state: 0,
-                },
-            ],
-        };
-
-        let default_aliases = extract_default_aliases(&mut syntax_grammar, &lexical_grammar);
-        assert_eq!(default_aliases.len(), 3);
-
-        assert_eq!(
-            default_aliases.get(&Symbol::terminal(0)),
-            Some(&Alias {
-                value: "a1".to_string(),
-                is_named: true,
-            })
-        );
-        assert_eq!(
-            default_aliases.get(&Symbol::terminal(2)),
-            Some(&Alias {
-                value: "a3".to_string(),
-                is_named: true,
-            })
-        );
-        assert_eq!(
-            default_aliases.get(&Symbol::terminal(3)),
-            Some(&Alias {
-                value: "a6".to_string(),
-                is_named: true,
-            })
-        );
-        assert_eq!(default_aliases.get(&Symbol::terminal(1)), None);
-
-        assert_eq!(
-            syntax_grammar.variables,
-            vec![
-                SyntaxVariable {
-                    name: "v1".to_owned(),
-                    kind: VariableType::Named,
-                    productions: vec![Production {
-                        dynamic_precedence: 0,
-                        steps: vec![
-                            ProductionStep::new(Symbol::terminal(0)),
-                            ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true),
-                            ProductionStep::new(Symbol::terminal(2)),
-                            ProductionStep::new(Symbol::terminal(3)).with_alias("a4", true),
-                        ],
-                    },],
-                },
-                SyntaxVariable {
-                    name: "v2".to_owned(),
-                    kind: VariableType::Named,
-                    productions: vec![Production {
-                        dynamic_precedence: 0,
-                        steps: vec![
-                            ProductionStep::new(Symbol::terminal(0)),
-                            ProductionStep::new(Symbol::terminal(1)),
-                            ProductionStep::new(Symbol::terminal(2)).with_alias("a5", true),
-                            ProductionStep::new(Symbol::terminal(3)),
-                            ProductionStep::new(Symbol::terminal(3)),
-                        ],
-                    },],
-                },
-            ]
-        );
-    }
-}
--- a/cli/src/generate/prepare_grammar/extract_tokens.rs
+++ b/cli/src/generate/prepare_grammar/extract_tokens.rs
@ -1,554 +0,0 @@
-use std::{collections::HashMap, mem};
-
-use anyhow::{anyhow, Result};
-
-use super::{ExtractedLexicalGrammar, ExtractedSyntaxGrammar, InternedGrammar};
-use crate::generate::{
-    grammars::{ExternalToken, Variable, VariableType},
-    rules::{MetadataParams, Rule, Symbol, SymbolType},
-};
-
-pub(super) fn extract_tokens(
-    mut grammar: InternedGrammar,
-) -> Result<(ExtractedSyntaxGrammar, ExtractedLexicalGrammar)> {
-    let mut extractor = TokenExtractor {
-        current_variable_name: String::new(),
-        current_variable_token_count: 0,
-        is_first_rule: false,
-        extracted_variables: Vec::new(),
-        extracted_usage_counts: Vec::new(),
-    };
-
-    for (i, variable) in &mut grammar.variables.iter_mut().enumerate() {
-        extractor.extract_tokens_in_variable(i == 0, variable)?;
-    }
-
-    for variable in &mut grammar.external_tokens {
-        extractor.extract_tokens_in_variable(false, variable)?;
-    }
-
-    let mut lexical_variables = Vec::with_capacity(extractor.extracted_variables.len());
-    for variable in extractor.extracted_variables {
-        lexical_variables.push(variable);
-    }
-
-    // If a variable's entire rule was extracted as a token and that token didn't
-    // appear within any other rule, then remove that variable from the syntax
-    // grammar, giving its name to the token in the lexical grammar. Any symbols
-    // that pointed to that variable will need to be updated to point to the
-    // variable in the lexical grammar. Symbols that pointed to later variables
-    // will need to have their indices decremented.
-    let mut variables = Vec::new();
-    let mut symbol_replacer = SymbolReplacer {
-        replacements: HashMap::new(),
-    };
-    for (i, variable) in grammar.variables.into_iter().enumerate() {
-        if let Rule::Symbol(Symbol {
-            kind: SymbolType::Terminal,
-            index,
-        }) = variable.rule
-        {
-            if i > 0 && extractor.extracted_usage_counts[index] == 1 {
-                let lexical_variable = &mut lexical_variables[index];
-                if lexical_variable.kind == VariableType::Auxiliary
-                    || variable.kind != VariableType::Hidden
-                {
-                    lexical_variable.kind = variable.kind;
-                    lexical_variable.name = variable.name;
-                    symbol_replacer.replacements.insert(i, index);
-                    continue;
-                }
-            }
-        }
-        variables.push(variable);
-    }
-
-    for variable in &mut variables {
-        variable.rule = symbol_replacer.replace_symbols_in_rule(&variable.rule);
-    }
-
-    let expected_conflicts = grammar
-        .expected_conflicts
-        .into_iter()
-        .map(|conflict| {
-            let mut result = conflict
-                .iter()
-                .map(|symbol| symbol_replacer.replace_symbol(*symbol))
-                .collect::<Vec<_>>();
-            result.sort_unstable();
-            result.dedup();
-            result
-        })
-        .collect();
-
-    let supertype_symbols = grammar
-        .supertype_symbols
-        .into_iter()
-        .map(|symbol| symbol_replacer.replace_symbol(symbol))
-        .collect();
-
-    let variables_to_inline = grammar
-        .variables_to_inline
-        .into_iter()
-        .map(|symbol| symbol_replacer.replace_symbol(symbol))
-        .collect();
-
-    let mut separators = Vec::new();
-    let mut extra_symbols = Vec::new();
-    for rule in grammar.extra_symbols {
-        if let Rule::Symbol(symbol) = rule {
-            extra_symbols.push(symbol_replacer.replace_symbol(symbol));
-        } else if let Some(index) = lexical_variables.iter().position(|v| v.rule == rule) {
-            extra_symbols.push(Symbol::terminal(index));
-        } else {
-            separators.push(rule);
-        }
-    }
-
-    let mut external_tokens = Vec::new();
-    for external_token in grammar.external_tokens {
-        let rule = symbol_replacer.replace_symbols_in_rule(&external_token.rule);
-        if let Rule::Symbol(symbol) = rule {
-            if symbol.is_non_terminal() {
-                return Err(anyhow!(
-                    "Rule '{}' cannot be used as both an external token and a non-terminal rule",
-                    &variables[symbol.index].name,
-                ));
-            }
-
-            if symbol.is_external() {
-                external_tokens.push(ExternalToken {
-                    name: external_token.name,
-                    kind: external_token.kind,
-                    corresponding_internal_token: None,
-                });
-            } else {
-                external_tokens.push(ExternalToken {
-                    name: lexical_variables[symbol.index].name.clone(),
-                    kind: external_token.kind,
-                    corresponding_internal_token: Some(symbol),
-                });
-            }
-        } else {
-            return Err(anyhow!(
-                "Non-symbol rules cannot be used as external tokens"
-            ));
-        }
-    }
-
-    let mut word_token = None;
-    if let Some(token) = grammar.word_token {
-        let token = symbol_replacer.replace_symbol(token);
-        if token.is_non_terminal() {
-            return Err(anyhow!(
-                "Non-terminal symbol '{}' cannot be used as the word token",
-                &variables[token.index].name
-            ));
-        }
-        word_token = Some(token);
-    }
-
-    Ok((
-        ExtractedSyntaxGrammar {
-            variables,
-            expected_conflicts,
-            extra_symbols,
-            variables_to_inline,
-            supertype_symbols,
-            external_tokens,
-            word_token,
-            precedence_orderings: grammar.precedence_orderings,
-        },
-        ExtractedLexicalGrammar {
-            variables: lexical_variables,
-            separators,
-        },
-    ))
-}
-
-struct TokenExtractor {
-    current_variable_name: String,
-    current_variable_token_count: usize,
-    is_first_rule: bool,
-    extracted_variables: Vec<Variable>,
-    extracted_usage_counts: Vec<usize>,
-}
-
-struct SymbolReplacer {
-    replacements: HashMap<usize, usize>,
-}
-
-impl TokenExtractor {
-    fn extract_tokens_in_variable(
-        &mut self,
-        is_first: bool,
-        variable: &mut Variable,
-    ) -> Result<()> {
-        self.current_variable_name.clear();
-        self.current_variable_name.push_str(&variable.name);
-        self.current_variable_token_count = 0;
-        self.is_first_rule = is_first;
-        let mut rule = Rule::Blank;
-        mem::swap(&mut rule, &mut variable.rule);
-        variable.rule = self.extract_tokens_in_rule(&rule)?;
-        Ok(())
-    }
-
-    fn extract_tokens_in_rule(&mut self, input: &Rule) -> Result<Rule> {
-        match input {
-            Rule::String(name) => Ok(self.extract_token(input, Some(name))?.into()),
-            Rule::Pattern(..) => Ok(self.extract_token(input, None)?.into()),
-            Rule::Metadata { params, rule } => {
-                if params.is_token {
-                    let mut params = params.clone();
-                    params.is_token = false;
-
-                    let mut string_value = None;
-                    if let Rule::String(value) = rule.as_ref() {
-                        string_value = Some(value);
-                    }
-
-                    let rule_to_extract = if params == MetadataParams::default() {
-                        rule.as_ref()
-                    } else {
-                        input
-                    };
-
-                    Ok(self.extract_token(rule_to_extract, string_value)?.into())
-                } else {
-                    Ok(Rule::Metadata {
-                        params: params.clone(),
-                        rule: Box::new(self.extract_tokens_in_rule(rule)?),
-                    })
-                }
-            }
-            Rule::Repeat(content) => Ok(Rule::Repeat(Box::new(
-                self.extract_tokens_in_rule(content)?,
-            ))),
-            Rule::Seq(elements) => Ok(Rule::Seq(
-                elements
-                    .iter()
-                    .map(|e| self.extract_tokens_in_rule(e))
-                    .collect::<Result<Vec<_>>>()?,
-            )),
-            Rule::Choice(elements) => Ok(Rule::Choice(
-                elements
-                    .iter()
-                    .map(|e| self.extract_tokens_in_rule(e))
-                    .collect::<Result<Vec<_>>>()?,
-            )),
-            _ => Ok(input.clone()),
-        }
-    }
-
-    fn extract_token(&mut self, rule: &Rule, string_value: Option<&String>) -> Result<Symbol> {
-        for (i, variable) in self.extracted_variables.iter_mut().enumerate() {
-            if variable.rule == *rule {
-                self.extracted_usage_counts[i] += 1;
-                return Ok(Symbol::terminal(i));
-            }
-        }
-
-        let index = self.extracted_variables.len();
-        let variable = if let Some(string_value) = string_value {
-            if string_value.is_empty() && !self.is_first_rule {
-                return Err(anyhow!(
-                    "The rule `{}` contains an empty string.
-
-Tree-sitter does not support syntactic rules that contain an empty string
-unless they are used only as the grammar's start rule.
-",
-                    self.current_variable_name
-                ));
-            }
-            Variable {
-                name: string_value.clone(),
-                kind: VariableType::Anonymous,
-                rule: rule.clone(),
-            }
-        } else {
-            self.current_variable_token_count += 1;
-            Variable {
-                name: format!(
-                    "{}_token{}",
-                    &self.current_variable_name, self.current_variable_token_count
-                ),
-                kind: VariableType::Auxiliary,
-                rule: rule.clone(),
-            }
-        };
-
-        self.extracted_variables.push(variable);
-        self.extracted_usage_counts.push(1);
-        Ok(Symbol::terminal(index))
-    }
-}
-
-impl SymbolReplacer {
-    fn replace_symbols_in_rule(&mut self, rule: &Rule) -> Rule {
-        match rule {
-            Rule::Symbol(symbol) => self.replace_symbol(*symbol).into(),
-            Rule::Choice(elements) => Rule::Choice(
-                elements
-                    .iter()
-                    .map(|e| self.replace_symbols_in_rule(e))
-                    .collect(),
-            ),
-            Rule::Seq(elements) => Rule::Seq(
-                elements
-                    .iter()
-                    .map(|e| self.replace_symbols_in_rule(e))
-                    .collect(),
-            ),
-            Rule::Repeat(content) => Rule::Repeat(Box::new(self.replace_symbols_in_rule(content))),
-            Rule::Metadata { rule, params } => Rule::Metadata {
-                params: params.clone(),
-                rule: Box::new(self.replace_symbols_in_rule(rule)),
-            },
-            _ => rule.clone(),
-        }
-    }
-
-    fn replace_symbol(&self, symbol: Symbol) -> Symbol {
-        if !symbol.is_non_terminal() {
-            return symbol;
-        }
-
-        if let Some(replacement) = self.replacements.get(&symbol.index) {
-            return Symbol::terminal(*replacement);
-        }
-
-        let mut adjusted_index = symbol.index;
-        for replaced_index in self.replacements.keys() {
-            if *replaced_index < symbol.index {
-                adjusted_index -= 1;
-            }
-        }
-
-        Symbol::non_terminal(adjusted_index)
-    }
-}
-
-#[cfg(test)]
-mod test {
-    use super::*;
-
-    #[test]
-    fn test_extraction() {
-        let (syntax_grammar, lexical_grammar) = extract_tokens(build_grammar(vec![
-            Variable::named(
-                "rule_0",
-                Rule::repeat(Rule::seq(vec![
-                    Rule::string("a"),
-                    Rule::pattern("b", ""),
-                    Rule::choice(vec![
-                        Rule::non_terminal(1),
-                        Rule::non_terminal(2),
-                        Rule::token(Rule::repeat(Rule::choice(vec![
-                            Rule::string("c"),
-                            Rule::string("d"),
-                        ]))),
-                    ]),
-                ])),
-            ),
-            Variable::named("rule_1", Rule::pattern("e", "")),
-            Variable::named("rule_2", Rule::pattern("b", "")),
-            Variable::named(
-                "rule_3",
-                Rule::seq(vec![Rule::non_terminal(2), Rule::Blank]),
-            ),
-        ]))
-        .unwrap();
-
-        assert_eq!(
-            syntax_grammar.variables,
-            vec![
-                Variable::named(
-                    "rule_0",
-                    Rule::repeat(Rule::seq(vec![
-                        // The string "a" was replaced by a symbol referencing the lexical grammar
-                        Rule::terminal(0),
-                        // The pattern "b" was replaced by a symbol referencing the lexical grammar
-                        Rule::terminal(1),
-                        Rule::choice(vec![
-                            // The symbol referencing `rule_1` was replaced by a symbol referencing
-                            // the lexical grammar.
-                            Rule::terminal(3),
-                            // The symbol referencing `rule_2` had its index decremented because
-                            // `rule_1` was moved to the lexical grammar.
-                            Rule::non_terminal(1),
-                            // The rule wrapped in `token` was replaced by a symbol referencing
-                            // the lexical grammar.
-                            Rule::terminal(2),
-                        ])
-                    ]))
-                ),
-                // The pattern "e" was only used in once place: as the definition of `rule_1`,
-                // so that rule was moved to the lexical grammar. The pattern "b" appeared in
-                // two places, so it was not moved into the lexical grammar.
-                Variable::named("rule_2", Rule::terminal(1)),
-                Variable::named(
-                    "rule_3",
-                    Rule::seq(vec![Rule::non_terminal(1), Rule::Blank,])
-                ),
-            ]
-        );
-
-        assert_eq!(
-            lexical_grammar.variables,
-            vec![
-                Variable::anonymous("a", Rule::string("a")),
-                Variable::auxiliary("rule_0_token1", Rule::pattern("b", "")),
-                Variable::auxiliary(
-                    "rule_0_token2",
-                    Rule::repeat(Rule::choice(vec![Rule::string("c"), Rule::string("d"),]))
-                ),
-                Variable::named("rule_1", Rule::pattern("e", "")),
-            ]
-        );
-    }
-
-    #[test]
-    fn test_start_rule_is_token() {
-        let (syntax_grammar, lexical_grammar) =
-            extract_tokens(build_grammar(vec![Variable::named(
-                "rule_0",
-                Rule::string("hello"),
-            )]))
-            .unwrap();
-
-        assert_eq!(
-            syntax_grammar.variables,
-            vec![Variable::named("rule_0", Rule::terminal(0)),]
-        );
-        assert_eq!(
-            lexical_grammar.variables,
-            vec![Variable::anonymous("hello", Rule::string("hello")),]
-        );
-    }
-
-    #[test]
-    fn test_extracting_extra_symbols() {
-        let mut grammar = build_grammar(vec![
-            Variable::named("rule_0", Rule::string("x")),
-            Variable::named("comment", Rule::pattern("//.*", "")),
-        ]);
-        grammar.extra_symbols = vec![Rule::string(" "), Rule::non_terminal(1)];
-
-        let (syntax_grammar, lexical_grammar) = extract_tokens(grammar).unwrap();
-        assert_eq!(syntax_grammar.extra_symbols, vec![Symbol::terminal(1),]);
-        assert_eq!(lexical_grammar.separators, vec![Rule::string(" "),]);
-    }
-
-    #[test]
-    fn test_extract_externals() {
-        let mut grammar = build_grammar(vec![
-            Variable::named(
-                "rule_0",
-                Rule::seq(vec![
-                    Rule::external(0),
-                    Rule::string("a"),
-                    Rule::non_terminal(1),
-                    Rule::non_terminal(2),
-                ]),
-            ),
-            Variable::named("rule_1", Rule::string("b")),
-            Variable::named("rule_2", Rule::string("c")),
-        ]);
-        grammar.external_tokens = vec![
-            Variable::named("external_0", Rule::external(0)),
-            Variable::anonymous("a", Rule::string("a")),
-            Variable::named("rule_2", Rule::non_terminal(2)),
-        ];
-
-        let (syntax_grammar, _) = extract_tokens(grammar).unwrap();
-
-        assert_eq!(
-            syntax_grammar.external_tokens,
-            vec![
-                ExternalToken {
-                    name: "external_0".to_string(),
-                    kind: VariableType::Named,
-                    corresponding_internal_token: None,
-                },
-                ExternalToken {
-                    name: "a".to_string(),
-                    kind: VariableType::Anonymous,
-                    corresponding_internal_token: Some(Symbol::terminal(0)),
-                },
-                ExternalToken {
-                    name: "rule_2".to_string(),
-                    kind: VariableType::Named,
-                    corresponding_internal_token: Some(Symbol::terminal(2)),
-                },
-            ]
-        );
-    }
-
-    #[test]
-    fn test_error_on_external_with_same_name_as_non_terminal() {
-        let mut grammar = build_grammar(vec![
-            Variable::named(
-                "rule_0",
-                Rule::seq(vec![Rule::non_terminal(1), Rule::non_terminal(2)]),
-            ),
-            Variable::named(
-                "rule_1",
-                Rule::seq(vec![Rule::non_terminal(2), Rule::non_terminal(2)]),
-            ),
-            Variable::named("rule_2", Rule::string("a")),
-        ]);
-        grammar.external_tokens = vec![Variable::named("rule_1", Rule::non_terminal(1))];
-
-        match extract_tokens(grammar) {
-            Err(e) => {
-                assert_eq!(e.to_string(), "Rule 'rule_1' cannot be used as both an external token and a non-terminal rule");
-            }
-            _ => {
-                panic!("Expected an error but got no error");
-            }
-        }
-    }
-
-    #[test]
-    fn test_extraction_on_hidden_terminal() {
-        let (syntax_grammar, lexical_grammar) = extract_tokens(build_grammar(vec![
-            Variable::named("rule_0", Rule::non_terminal(1)),
-            Variable::hidden("_rule_1", Rule::string("a")),
-        ]))
-        .unwrap();
-
-        // The rule `_rule_1` should not "absorb" the
-        // terminal "a", since it is hidden,
-        // so we expect two variables still
-        assert_eq!(
-            syntax_grammar.variables,
-            vec![
-                Variable::named("rule_0", Rule::non_terminal(1)),
-                Variable::hidden("_rule_1", Rule::terminal(0)),
-            ]
-        );
-
-        // We should not have a hidden rule in our lexical grammar, only the terminal "a"
-        assert_eq!(
-            lexical_grammar.variables,
-            vec![Variable::anonymous("a", Rule::string("a"))]
-        );
-    }
-
-    #[test]
-    fn test_extraction_with_empty_string() {
-        assert!(extract_tokens(build_grammar(vec![
-            Variable::named("rule_0", Rule::non_terminal(1)),
-            Variable::hidden("_rule_1", Rule::string("")),
-        ]))
-        .is_err());
-    }
-
-    fn build_grammar(variables: Vec<Variable>) -> InternedGrammar {
-        InternedGrammar {
-            variables,
-            ..Default::default()
-        }
-    }
-}
--- a/cli/src/generate/prepare_grammar/flatten_grammar.rs
+++ b/cli/src/generate/prepare_grammar/flatten_grammar.rs
@ -1,453 +0,0 @@
-use anyhow::{anyhow, Result};
-
-use super::ExtractedSyntaxGrammar;
-use crate::generate::{
-    grammars::{Production, ProductionStep, SyntaxGrammar, SyntaxVariable, Variable},
-    rules::{Alias, Associativity, Precedence, Rule, Symbol},
-};
-
-struct RuleFlattener {
-    production: Production,
-    precedence_stack: Vec<Precedence>,
-    associativity_stack: Vec<Associativity>,
-    alias_stack: Vec<Alias>,
-    field_name_stack: Vec<String>,
-}
-
-impl RuleFlattener {
-    const fn new() -> Self {
-        Self {
-            production: Production {
-                steps: Vec::new(),
-                dynamic_precedence: 0,
-            },
-            precedence_stack: Vec::new(),
-            associativity_stack: Vec::new(),
-            alias_stack: Vec::new(),
-            field_name_stack: Vec::new(),
-        }
-    }
-
-    fn flatten(mut self, rule: Rule) -> Production {
-        self.apply(rule, true);
-        self.production
-    }
-
-    fn apply(&mut self, rule: Rule, at_end: bool) -> bool {
-        match rule {
-            Rule::Seq(members) => {
-                let mut result = false;
-                let last_index = members.len() - 1;
-                for (i, member) in members.into_iter().enumerate() {
-                    result |= self.apply(member, i == last_index && at_end);
-                }
-                result
-            }
-            Rule::Metadata { rule, params } => {
-                let mut has_precedence = false;
-                if !params.precedence.is_none() {
-                    has_precedence = true;
-                    self.precedence_stack.push(params.precedence);
-                }
-
-                let mut has_associativity = false;
-                if let Some(associativity) = params.associativity {
-                    has_associativity = true;
-                    self.associativity_stack.push(associativity);
-                }
-
-                let mut has_alias = false;
-                if let Some(alias) = params.alias {
-                    has_alias = true;
-                    self.alias_stack.push(alias);
-                }
-
-                let mut has_field_name = false;
-                if let Some(field_name) = params.field_name {
-                    has_field_name = true;
-                    self.field_name_stack.push(field_name);
-                }
-
-                if params.dynamic_precedence.abs() > self.production.dynamic_precedence.abs() {
-                    self.production.dynamic_precedence = params.dynamic_precedence;
-                }
-
-                let did_push = self.apply(*rule, at_end);
-
-                if has_precedence {
-                    self.precedence_stack.pop();
-                    if did_push && !at_end {
-                        self.production.steps.last_mut().unwrap().precedence = self
-                            .precedence_stack
-                            .last()
-                            .cloned()
-                            .unwrap_or(Precedence::None);
-                    }
-                }
-
-                if has_associativity {
-                    self.associativity_stack.pop();
-                    if did_push && !at_end {
-                        self.production.steps.last_mut().unwrap().associativity =
-                            self.associativity_stack.last().copied();
-                    }
-                }
-
-                if has_alias {
-                    self.alias_stack.pop();
-                }
-
-                if has_field_name {
-                    self.field_name_stack.pop();
-                }
-
-                did_push
-            }
-            Rule::Symbol(symbol) => {
-                self.production.steps.push(ProductionStep {
-                    symbol,
-                    precedence: self
-                        .precedence_stack
-                        .last()
-                        .cloned()
-                        .unwrap_or(Precedence::None),
-                    associativity: self.associativity_stack.last().copied(),
-                    alias: self.alias_stack.last().cloned(),
-                    field_name: self.field_name_stack.last().cloned(),
-                });
-                true
-            }
-            _ => false,
-        }
-    }
-}
-
-fn extract_choices(rule: Rule) -> Vec<Rule> {
-    match rule {
-        Rule::Seq(elements) => {
-            let mut result = vec![Rule::Blank];
-            for element in elements {
-                let extraction = extract_choices(element);
-                let mut next_result = Vec::new();
-                for entry in result {
-                    for extraction_entry in &extraction {
-                        next_result.push(Rule::Seq(vec![entry.clone(), extraction_entry.clone()]));
-                    }
-                }
-                result = next_result;
-            }
-            result
-        }
-        Rule::Choice(elements) => {
-            let mut result = Vec::new();
-            for element in elements {
-                for rule in extract_choices(element) {
-                    result.push(rule);
-                }
-            }
-            result
-        }
-        Rule::Metadata { rule, params } => extract_choices(*rule)
-            .into_iter()
-            .map(|rule| Rule::Metadata {
-                rule: Box::new(rule),
-                params: params.clone(),
-            })
-            .collect(),
-        _ => vec![rule],
-    }
-}
-
-fn flatten_variable(variable: Variable) -> SyntaxVariable {
-    let mut productions = Vec::new();
-    for rule in extract_choices(variable.rule) {
-        let production = RuleFlattener::new().flatten(rule);
-        if !productions.contains(&production) {
-            productions.push(production);
-        }
-    }
-    SyntaxVariable {
-        name: variable.name,
-        kind: variable.kind,
-        productions,
-    }
-}
-
-pub fn symbol_is_used(variables: &[SyntaxVariable], symbol: Symbol) -> bool {
-    for variable in variables {
-        for production in &variable.productions {
-            for step in &production.steps {
-                if step.symbol == symbol {
-                    return true;
-                }
-            }
-        }
-    }
-    false
-}
-
-pub(super) fn flatten_grammar(grammar: ExtractedSyntaxGrammar) -> Result<SyntaxGrammar> {
-    let mut variables = Vec::new();
-    for variable in grammar.variables {
-        variables.push(flatten_variable(variable));
-    }
-    for (i, variable) in variables.iter().enumerate() {
-        let symbol = Symbol::non_terminal(i);
-
-        for production in &variable.productions {
-            if production.steps.is_empty() && symbol_is_used(&variables, symbol) {
-                return Err(anyhow!(
-                    "The rule `{}` matches the empty string.
-
-Tree-sitter does not support syntactic rules that match the empty string
-unless they are used only as the grammar's start rule.
-",
-                    variable.name
-                ));
-            }
-
-            if grammar.variables_to_inline.contains(&symbol)
-                && production.steps.iter().any(|step| step.symbol == symbol)
-            {
-                return Err(anyhow!(
-                    "Rule `{}` cannot be inlined because it contains a reference to itself.",
-                    variable.name,
-                ));
-            }
-        }
-    }
-    Ok(SyntaxGrammar {
-        extra_symbols: grammar.extra_symbols,
-        expected_conflicts: grammar.expected_conflicts,
-        variables_to_inline: grammar.variables_to_inline,
-        precedence_orderings: grammar.precedence_orderings,
-        external_tokens: grammar.external_tokens,
-        supertype_symbols: grammar.supertype_symbols,
-        word_token: grammar.word_token,
-        variables,
-    })
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::generate::grammars::VariableType;
-
-    #[test]
-    fn test_flatten_grammar() {
-        let result = flatten_variable(Variable {
-            name: "test".to_string(),
-            kind: VariableType::Named,
-            rule: Rule::seq(vec![
-                Rule::non_terminal(1),
-                Rule::prec_left(
-                    Precedence::Integer(101),
-                    Rule::seq(vec![
-                        Rule::non_terminal(2),
-                        Rule::choice(vec![
-                            Rule::prec_right(
-                                Precedence::Integer(102),
-                                Rule::seq(vec![Rule::non_terminal(3), Rule::non_terminal(4)]),
-                            ),
-                            Rule::non_terminal(5),
-                        ]),
-                        Rule::non_terminal(6),
-                    ]),
-                ),
-                Rule::non_terminal(7),
-            ]),
-        });
-
-        assert_eq!(
-            result.productions,
-            vec![
-                Production {
-                    dynamic_precedence: 0,
-                    steps: vec![
-                        ProductionStep::new(Symbol::non_terminal(1)),
-                        ProductionStep::new(Symbol::non_terminal(2))
-                            .with_prec(Precedence::Integer(101), Some(Associativity::Left)),
-                        ProductionStep::new(Symbol::non_terminal(3))
-                            .with_prec(Precedence::Integer(102), Some(Associativity::Right)),
-                        ProductionStep::new(Symbol::non_terminal(4))
-                            .with_prec(Precedence::Integer(101), Some(Associativity::Left)),
-                        ProductionStep::new(Symbol::non_terminal(6)),
-                        ProductionStep::new(Symbol::non_terminal(7)),
-                    ]
-                },
-                Production {
-                    dynamic_precedence: 0,
-                    steps: vec![
-                        ProductionStep::new(Symbol::non_terminal(1)),
-                        ProductionStep::new(Symbol::non_terminal(2))
-                            .with_prec(Precedence::Integer(101), Some(Associativity::Left)),
-                        ProductionStep::new(Symbol::non_terminal(5))
-                            .with_prec(Precedence::Integer(101), Some(Associativity::Left)),
-                        ProductionStep::new(Symbol::non_terminal(6)),
-                        ProductionStep::new(Symbol::non_terminal(7)),
-                    ]
-                },
-            ]
-        );
-    }
-
-    #[test]
-    fn test_flatten_grammar_with_maximum_dynamic_precedence() {
-        let result = flatten_variable(Variable {
-            name: "test".to_string(),
-            kind: VariableType::Named,
-            rule: Rule::seq(vec![
-                Rule::non_terminal(1),
-                Rule::prec_dynamic(
-                    101,
-                    Rule::seq(vec![
-                        Rule::non_terminal(2),
-                        Rule::choice(vec![
-                            Rule::prec_dynamic(
-                                102,
-                                Rule::seq(vec![Rule::non_terminal(3), Rule::non_terminal(4)]),
-                            ),
-                            Rule::non_terminal(5),
-                        ]),
-                        Rule::non_terminal(6),
-                    ]),
-                ),
-                Rule::non_terminal(7),
-            ]),
-        });
-
-        assert_eq!(
-            result.productions,
-            vec![
-                Production {
-                    dynamic_precedence: 102,
-                    steps: vec![
-                        ProductionStep::new(Symbol::non_terminal(1)),
-                        ProductionStep::new(Symbol::non_terminal(2)),
-                        ProductionStep::new(Symbol::non_terminal(3)),
-                        ProductionStep::new(Symbol::non_terminal(4)),
-                        ProductionStep::new(Symbol::non_terminal(6)),
-                        ProductionStep::new(Symbol::non_terminal(7)),
-                    ],
-                },
-                Production {
-                    dynamic_precedence: 101,
-                    steps: vec![
-                        ProductionStep::new(Symbol::non_terminal(1)),
-                        ProductionStep::new(Symbol::non_terminal(2)),
-                        ProductionStep::new(Symbol::non_terminal(5)),
-                        ProductionStep::new(Symbol::non_terminal(6)),
-                        ProductionStep::new(Symbol::non_terminal(7)),
-                    ],
-                },
-            ]
-        );
-    }
-
-    #[test]
-    fn test_flatten_grammar_with_final_precedence() {
-        let result = flatten_variable(Variable {
-            name: "test".to_string(),
-            kind: VariableType::Named,
-            rule: Rule::prec_left(
-                Precedence::Integer(101),
-                Rule::seq(vec![Rule::non_terminal(1), Rule::non_terminal(2)]),
-            ),
-        });
-
-        assert_eq!(
-            result.productions,
-            vec![Production {
-                dynamic_precedence: 0,
-                steps: vec![
-                    ProductionStep::new(Symbol::non_terminal(1))
-                        .with_prec(Precedence::Integer(101), Some(Associativity::Left)),
-                    ProductionStep::new(Symbol::non_terminal(2))
-                        .with_prec(Precedence::Integer(101), Some(Associativity::Left)),
-                ]
-            }]
-        );
-
-        let result = flatten_variable(Variable {
-            name: "test".to_string(),
-            kind: VariableType::Named,
-            rule: Rule::prec_left(
-                Precedence::Integer(101),
-                Rule::seq(vec![Rule::non_terminal(1)]),
-            ),
-        });
-
-        assert_eq!(
-            result.productions,
-            vec![Production {
-                dynamic_precedence: 0,
-                steps: vec![ProductionStep::new(Symbol::non_terminal(1))
-                    .with_prec(Precedence::Integer(101), Some(Associativity::Left)),]
-            }]
-        );
-    }
-
-    #[test]
-    fn test_flatten_grammar_with_field_names() {
-        let result = flatten_variable(Variable {
-            name: "test".to_string(),
-            kind: VariableType::Named,
-            rule: Rule::seq(vec![
-                Rule::field("first-thing".to_string(), Rule::terminal(1)),
-                Rule::terminal(2),
-                Rule::choice(vec![
-                    Rule::Blank,
-                    Rule::field("second-thing".to_string(), Rule::terminal(3)),
-                ]),
-            ]),
-        });
-
-        assert_eq!(
-            result.productions,
-            vec![
-                Production {
-                    dynamic_precedence: 0,
-                    steps: vec![
-                        ProductionStep::new(Symbol::terminal(1)).with_field_name("first-thing"),
-                        ProductionStep::new(Symbol::terminal(2))
-                    ]
-                },
-                Production {
-                    dynamic_precedence: 0,
-                    steps: vec![
-                        ProductionStep::new(Symbol::terminal(1)).with_field_name("first-thing"),
-                        ProductionStep::new(Symbol::terminal(2)),
-                        ProductionStep::new(Symbol::terminal(3)).with_field_name("second-thing"),
-                    ]
-                },
-            ]
-        );
-    }
-
-    #[test]
-    fn test_flatten_grammar_with_recursive_inline_variable() {
-        let result = flatten_grammar(ExtractedSyntaxGrammar {
-            extra_symbols: Vec::new(),
-            expected_conflicts: Vec::new(),
-            variables_to_inline: vec![Symbol::non_terminal(0)],
-            precedence_orderings: Vec::new(),
-            external_tokens: Vec::new(),
-            supertype_symbols: Vec::new(),
-            word_token: None,
-            variables: vec![Variable {
-                name: "test".to_string(),
-                kind: VariableType::Named,
-                rule: Rule::seq(vec![
-                    Rule::non_terminal(0),
-                    Rule::non_terminal(1),
-                    Rule::non_terminal(2),
-                ]),
-            }],
-        });
-
-        assert_eq!(
-            result.unwrap_err().to_string(),
-            "Rule `test` cannot be inlined because it contains a reference to itself.",
-        );
-    }
-}
--- a/cli/src/generate/prepare_grammar/intern_symbols.rs
+++ b/cli/src/generate/prepare_grammar/intern_symbols.rs
@ -1,260 +0,0 @@
-use anyhow::{anyhow, Result};
-
-use super::InternedGrammar;
-use crate::generate::{
-    grammars::{InputGrammar, Variable, VariableType},
-    rules::{Rule, Symbol},
-};
-
-pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result<InternedGrammar> {
-    let interner = Interner { grammar };
-
-    if variable_type_for_name(&grammar.variables[0].name) == VariableType::Hidden {
-        return Err(anyhow!("A grammar's start rule must be visible."));
-    }
-
-    let mut variables = Vec::with_capacity(grammar.variables.len());
-    for variable in &grammar.variables {
-        variables.push(Variable {
-            name: variable.name.clone(),
-            kind: variable_type_for_name(&variable.name),
-            rule: interner.intern_rule(&variable.rule, Some(&variable.name))?,
-        });
-    }
-
-    let mut external_tokens = Vec::with_capacity(grammar.external_tokens.len());
-    for external_token in &grammar.external_tokens {
-        let rule = interner.intern_rule(external_token, None)?;
-        let (name, kind) = if let Rule::NamedSymbol(name) = external_token {
-            (name.clone(), variable_type_for_name(name))
-        } else {
-            (String::new(), VariableType::Anonymous)
-        };
-        external_tokens.push(Variable { name, kind, rule });
-    }
-
-    let mut extra_symbols = Vec::with_capacity(grammar.extra_symbols.len());
-    for extra_token in &grammar.extra_symbols {
-        extra_symbols.push(interner.intern_rule(extra_token, None)?);
-    }
-
-    let mut supertype_symbols = Vec::with_capacity(grammar.supertype_symbols.len());
-    for supertype_symbol_name in &grammar.supertype_symbols {
-        supertype_symbols.push(
-            interner
-                .intern_name(supertype_symbol_name)
-                .ok_or_else(|| anyhow!("Undefined symbol `{supertype_symbol_name}`"))?,
-        );
-    }
-
-    let mut expected_conflicts = Vec::new();
-    for conflict in &grammar.expected_conflicts {
-        let mut interned_conflict = Vec::with_capacity(conflict.len());
-        for name in conflict {
-            interned_conflict.push(
-                interner
-                    .intern_name(name)
-                    .ok_or_else(|| anyhow!("Undefined symbol `{name}`"))?,
-            );
-        }
-        expected_conflicts.push(interned_conflict);
-    }
-
-    let mut variables_to_inline = Vec::new();
-    for name in &grammar.variables_to_inline {
-        if let Some(symbol) = interner.intern_name(name) {
-            variables_to_inline.push(symbol);
-        }
-    }
-
-    let mut word_token = None;
-    if let Some(name) = grammar.word_token.as_ref() {
-        word_token = Some(
-            interner
-                .intern_name(name)
-                .ok_or_else(|| anyhow!("Undefined symbol `{name}`"))?,
-        );
-    }
-
-    for (i, variable) in variables.iter_mut().enumerate() {
-        if supertype_symbols.contains(&Symbol::non_terminal(i)) {
-            variable.kind = VariableType::Hidden;
-        }
-    }
-
-    Ok(InternedGrammar {
-        variables,
-        external_tokens,
-        extra_symbols,
-        expected_conflicts,
-        variables_to_inline,
-        supertype_symbols,
-        word_token,
-        precedence_orderings: grammar.precedence_orderings.clone(),
-    })
-}
-
-struct Interner<'a> {
-    grammar: &'a InputGrammar,
-}
-
-impl<'a> Interner<'a> {
-    fn intern_rule(&self, rule: &Rule, name: Option<&str>) -> Result<Rule> {
-        match rule {
-            Rule::Choice(elements) => {
-                self.check_single(elements, name);
-                let mut result = Vec::with_capacity(elements.len());
-                for element in elements {
-                    result.push(self.intern_rule(element, name)?);
-                }
-                Ok(Rule::Choice(result))
-            }
-            Rule::Seq(elements) => {
-                self.check_single(elements, name);
-                let mut result = Vec::with_capacity(elements.len());
-                for element in elements {
-                    result.push(self.intern_rule(element, name)?);
-                }
-                Ok(Rule::Seq(result))
-            }
-            Rule::Repeat(content) => Ok(Rule::Repeat(Box::new(self.intern_rule(content, name)?))),
-            Rule::Metadata { rule, params } => Ok(Rule::Metadata {
-                rule: Box::new(self.intern_rule(rule, name)?),
-                params: params.clone(),
-            }),
-            Rule::NamedSymbol(name) => self.intern_name(name).map_or_else(
-                || Err(anyhow!("Undefined symbol `{name}`")),
-                |symbol| Ok(Rule::Symbol(symbol)),
-            ),
-            _ => Ok(rule.clone()),
-        }
-    }
-
-    fn intern_name(&self, symbol: &str) -> Option<Symbol> {
-        for (i, variable) in self.grammar.variables.iter().enumerate() {
-            if variable.name == symbol {
-                return Some(Symbol::non_terminal(i));
-            }
-        }
-
-        for (i, external_token) in self.grammar.external_tokens.iter().enumerate() {
-            if let Rule::NamedSymbol(name) = external_token {
-                if name == symbol {
-                    return Some(Symbol::external(i));
-                }
-            }
-        }
-
-        None
-    }
-
-    // In the case of a seq or choice rule of 1 element in a hidden rule, weird
-    // inconsistent behavior with queries can occur. So we should warn the user about it.
-    fn check_single(&self, elements: &[Rule], name: Option<&str>) {
-        if elements.len() == 1 && matches!(elements[0], Rule::String(_) | Rule::Pattern(_, _)) {
-            eprintln!(
-                "Warning: rule {} is just a `seq` or `choice` rule with a single element. This is unnecessary.",
-                name.unwrap_or_default()
-            );
-        }
-    }
-}
-
-fn variable_type_for_name(name: &str) -> VariableType {
-    if name.starts_with('_') {
-        VariableType::Hidden
-    } else {
-        VariableType::Named
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_basic_repeat_expansion() {
-        let grammar = intern_symbols(&build_grammar(vec![
-            Variable::named("x", Rule::choice(vec![Rule::named("y"), Rule::named("_z")])),
-            Variable::named("y", Rule::named("_z")),
-            Variable::named("_z", Rule::string("a")),
-        ]))
-        .unwrap();
-
-        assert_eq!(
-            grammar.variables,
-            vec![
-                Variable::named(
-                    "x",
-                    Rule::choice(vec![Rule::non_terminal(1), Rule::non_terminal(2),])
-                ),
-                Variable::named("y", Rule::non_terminal(2)),
-                Variable::hidden("_z", Rule::string("a")),
-            ]
-        );
-    }
-
-    #[test]
-    fn test_interning_external_token_names() {
-        // Variable `y` is both an internal and an external token.
-        // Variable `z` is just an external token.
-        let mut input_grammar = build_grammar(vec![
-            Variable::named(
-                "w",
-                Rule::choice(vec![Rule::named("x"), Rule::named("y"), Rule::named("z")]),
-            ),
-            Variable::named("x", Rule::string("a")),
-            Variable::named("y", Rule::string("b")),
-        ]);
-        input_grammar
-            .external_tokens
-            .extend(vec![Rule::named("y"), Rule::named("z")]);
-
-        let grammar = intern_symbols(&input_grammar).unwrap();
-
-        // Variable `y` is referred to by its internal index.
-        // Variable `z` is referred to by its external index.
-        assert_eq!(
-            grammar.variables,
-            vec![
-                Variable::named(
-                    "w",
-                    Rule::choice(vec![
-                        Rule::non_terminal(1),
-                        Rule::non_terminal(2),
-                        Rule::external(1),
-                    ])
-                ),
-                Variable::named("x", Rule::string("a")),
-                Variable::named("y", Rule::string("b")),
-            ]
-        );
-
-        // The external token for `y` refers back to its internal index.
-        assert_eq!(
-            grammar.external_tokens,
-            vec![
-                Variable::named("y", Rule::non_terminal(2)),
-                Variable::named("z", Rule::external(1)),
-            ]
-        );
-    }
-
-    #[test]
-    fn test_grammar_with_undefined_symbols() {
-        let result = intern_symbols(&build_grammar(vec![Variable::named("x", Rule::named("y"))]));
-
-        match result {
-            Err(e) => assert_eq!(e.to_string(), "Undefined symbol `y`"),
-            _ => panic!("Expected an error but got none"),
-        }
-    }
-
-    fn build_grammar(variables: Vec<Variable>) -> InputGrammar {
-        InputGrammar {
-            variables,
-            name: "the_language".to_string(),
-            ..Default::default()
-        }
-    }
-}
--- a/cli/src/generate/prepare_grammar/mod.rs
+++ b/cli/src/generate/prepare_grammar/mod.rs
@ -1,252 +0,0 @@
-mod expand_repeats;
-mod expand_tokens;
-mod extract_default_aliases;
-mod extract_tokens;
-mod flatten_grammar;
-mod intern_symbols;
-mod process_inlines;
-
-use std::{
-    cmp::Ordering,
-    collections::{hash_map, HashMap, HashSet},
-    mem,
-};
-
-use anyhow::{anyhow, Result};
-pub(super) use flatten_grammar::symbol_is_used;
-
-pub use self::expand_tokens::expand_tokens;
-use self::{
-    expand_repeats::expand_repeats, extract_default_aliases::extract_default_aliases,
-    extract_tokens::extract_tokens, flatten_grammar::flatten_grammar,
-    intern_symbols::intern_symbols, process_inlines::process_inlines,
-};
-use super::{
-    grammars::{
-        ExternalToken, InlinedProductionMap, InputGrammar, LexicalGrammar, PrecedenceEntry,
-        SyntaxGrammar, Variable,
-    },
-    rules::{AliasMap, Precedence, Rule, Symbol},
-};
-
-pub struct IntermediateGrammar<T, U> {
-    variables: Vec<Variable>,
-    extra_symbols: Vec<T>,
-    expected_conflicts: Vec<Vec<Symbol>>,
-    precedence_orderings: Vec<Vec<PrecedenceEntry>>,
-    external_tokens: Vec<U>,
-    variables_to_inline: Vec<Symbol>,
-    supertype_symbols: Vec<Symbol>,
-    word_token: Option<Symbol>,
-}
-
-pub type InternedGrammar = IntermediateGrammar<Rule, Variable>;
-
-pub type ExtractedSyntaxGrammar = IntermediateGrammar<Symbol, ExternalToken>;
-
-#[derive(Debug, PartialEq, Eq)]
-pub struct ExtractedLexicalGrammar {
-    pub variables: Vec<Variable>,
-    pub separators: Vec<Rule>,
-}
-
-impl<T, U> Default for IntermediateGrammar<T, U> {
-    fn default() -> Self {
-        Self {
-            variables: Vec::default(),
-            extra_symbols: Vec::default(),
-            expected_conflicts: Vec::default(),
-            precedence_orderings: Vec::default(),
-            external_tokens: Vec::default(),
-            variables_to_inline: Vec::default(),
-            supertype_symbols: Vec::default(),
-            word_token: Option::default(),
-        }
-    }
-}
-
-/// Transform an input grammar into separate components that are ready
-/// for parse table construction.
-pub fn prepare_grammar(
-    input_grammar: &InputGrammar,
-) -> Result<(
-    SyntaxGrammar,
-    LexicalGrammar,
-    InlinedProductionMap,
-    AliasMap,
-)> {
-    validate_precedences(input_grammar)?;
-
-    let interned_grammar = intern_symbols(input_grammar)?;
-    let (syntax_grammar, lexical_grammar) = extract_tokens(interned_grammar)?;
-    let syntax_grammar = expand_repeats(syntax_grammar);
-    let mut syntax_grammar = flatten_grammar(syntax_grammar)?;
-    let lexical_grammar = expand_tokens(lexical_grammar)?;
-    let default_aliases = extract_default_aliases(&mut syntax_grammar, &lexical_grammar);
-    let inlines = process_inlines(&syntax_grammar, &lexical_grammar)?;
-    Ok((syntax_grammar, lexical_grammar, inlines, default_aliases))
-}
-
-/// Check that all of the named precedences used in the grammar are declared
-/// within the `precedences` lists, and also that there are no conflicting
-/// precedence orderings declared in those lists.
-fn validate_precedences(grammar: &InputGrammar) -> Result<()> {
-    // Check that no rule contains a named precedence that is not present in
-    // any of the `precedences` lists.
-    fn validate(rule_name: &str, rule: &Rule, names: &HashSet<&String>) -> Result<()> {
-        match rule {
-            Rule::Repeat(rule) => validate(rule_name, rule, names),
-            Rule::Seq(elements) | Rule::Choice(elements) => elements
-                .iter()
-                .try_for_each(|e| validate(rule_name, e, names)),
-            Rule::Metadata { rule, params } => {
-                if let Precedence::Name(n) = &params.precedence {
-                    if !names.contains(n) {
-                        return Err(anyhow!("Undeclared precedence '{n}' in rule '{rule_name}'"));
-                    }
-                }
-                validate(rule_name, rule, names)?;
-                Ok(())
-            }
-            _ => Ok(()),
-        }
-    }
-
-    // For any two precedence names `a` and `b`, if `a` comes before `b`
-    // in some list, then it cannot come *after* `b` in any list.
-    let mut pairs = HashMap::new();
-    for list in &grammar.precedence_orderings {
-        for (i, mut entry1) in list.iter().enumerate() {
-            for mut entry2 in list.iter().skip(i + 1) {
-                if entry2 == entry1 {
-                    continue;
-                }
-                let mut ordering = Ordering::Greater;
-                if entry1 > entry2 {
-                    ordering = Ordering::Less;
-                    mem::swap(&mut entry1, &mut entry2);
-                }
-                match pairs.entry((entry1, entry2)) {
-                    hash_map::Entry::Vacant(e) => {
-                        e.insert(ordering);
-                    }
-                    hash_map::Entry::Occupied(e) => {
-                        if e.get() != &ordering {
-                            return Err(anyhow!(
-                                "Conflicting orderings for precedences {entry1} and {entry2}",
-                            ));
-                        }
-                    }
-                }
-            }
-        }
-    }
-
-    let precedence_names = grammar
-        .precedence_orderings
-        .iter()
-        .flat_map(|l| l.iter())
-        .filter_map(|p| {
-            if let PrecedenceEntry::Name(n) = p {
-                Some(n)
-            } else {
-                None
-            }
-        })
-        .collect::<HashSet<&String>>();
-    for variable in &grammar.variables {
-        validate(&variable.name, &variable.rule, &precedence_names)?;
-    }
-
-    Ok(())
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::generate::grammars::VariableType;
-
-    #[test]
-    fn test_validate_precedences_with_undeclared_precedence() {
-        let grammar = InputGrammar {
-            precedence_orderings: vec![
-                vec![
-                    PrecedenceEntry::Name("a".to_string()),
-                    PrecedenceEntry::Name("b".to_string()),
-                ],
-                vec![
-                    PrecedenceEntry::Name("b".to_string()),
-                    PrecedenceEntry::Name("c".to_string()),
-                    PrecedenceEntry::Name("d".to_string()),
-                ],
-            ],
-            variables: vec![
-                Variable {
-                    name: "v1".to_string(),
-                    kind: VariableType::Named,
-                    rule: Rule::Seq(vec![
-                        Rule::prec_left(Precedence::Name("b".to_string()), Rule::string("w")),
-                        Rule::prec(Precedence::Name("c".to_string()), Rule::string("x")),
-                    ]),
-                },
-                Variable {
-                    name: "v2".to_string(),
-                    kind: VariableType::Named,
-                    rule: Rule::repeat(Rule::Choice(vec![
-                        Rule::prec_left(Precedence::Name("omg".to_string()), Rule::string("y")),
-                        Rule::prec(Precedence::Name("c".to_string()), Rule::string("z")),
-                    ])),
-                },
-            ],
-            ..Default::default()
-        };
-
-        let result = validate_precedences(&grammar);
-        assert_eq!(
-            result.unwrap_err().to_string(),
-            "Undeclared precedence 'omg' in rule 'v2'",
-        );
-    }
-
-    #[test]
-    fn test_validate_precedences_with_conflicting_order() {
-        let grammar = InputGrammar {
-            precedence_orderings: vec![
-                vec![
-                    PrecedenceEntry::Name("a".to_string()),
-                    PrecedenceEntry::Name("b".to_string()),
-                ],
-                vec![
-                    PrecedenceEntry::Name("b".to_string()),
-                    PrecedenceEntry::Name("c".to_string()),
-                    PrecedenceEntry::Name("a".to_string()),
-                ],
-            ],
-            variables: vec![
-                Variable {
-                    name: "v1".to_string(),
-                    kind: VariableType::Named,
-                    rule: Rule::Seq(vec![
-                        Rule::prec_left(Precedence::Name("b".to_string()), Rule::string("w")),
-                        Rule::prec(Precedence::Name("c".to_string()), Rule::string("x")),
-                    ]),
-                },
-                Variable {
-                    name: "v2".to_string(),
-                    kind: VariableType::Named,
-                    rule: Rule::repeat(Rule::Choice(vec![
-                        Rule::prec_left(Precedence::Name("a".to_string()), Rule::string("y")),
-                        Rule::prec(Precedence::Name("c".to_string()), Rule::string("z")),
-                    ])),
-                },
-            ],
-            ..Default::default()
-        };
-
-        let result = validate_precedences(&grammar);
-        assert_eq!(
-            result.unwrap_err().to_string(),
-            "Conflicting orderings for precedences 'a' and 'b'",
-        );
-    }
-}
--- a/cli/src/generate/prepare_grammar/process_inlines.rs
+++ b/cli/src/generate/prepare_grammar/process_inlines.rs
@ -1,547 +0,0 @@
-use std::collections::HashMap;
-
-use anyhow::{anyhow, Result};
-
-use crate::generate::{
-    grammars::{InlinedProductionMap, LexicalGrammar, Production, ProductionStep, SyntaxGrammar},
-    rules::SymbolType,
-};
-
-#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
-struct ProductionStepId {
-    // A `None` value here means that the production itself was produced via inlining,
-    // and is stored in the builder's `productions` vector, as opposed to being
-    // stored in one of the grammar's variables.
-    variable_index: Option<usize>,
-    production_index: usize,
-    step_index: usize,
-}
-
-struct InlinedProductionMapBuilder {
-    production_indices_by_step_id: HashMap<ProductionStepId, Vec<usize>>,
-    productions: Vec<Production>,
-}
-
-impl InlinedProductionMapBuilder {
-    fn build(mut self, grammar: &SyntaxGrammar) -> InlinedProductionMap {
-        let mut step_ids_to_process = Vec::new();
-        for (variable_index, variable) in grammar.variables.iter().enumerate() {
-            for production_index in 0..variable.productions.len() {
-                step_ids_to_process.push(ProductionStepId {
-                    variable_index: Some(variable_index),
-                    production_index,
-                    step_index: 0,
-                });
-                while !step_ids_to_process.is_empty() {
-                    let mut i = 0;
-                    while i < step_ids_to_process.len() {
-                        let step_id = step_ids_to_process[i];
-                        if let Some(step) = self.production_step_for_id(step_id, grammar) {
-                            if grammar.variables_to_inline.contains(&step.symbol) {
-                                let inlined_step_ids = self
-                                    .inline_production_at_step(step_id, grammar)
-                                    .iter()
-                                    .copied()
-                                    .map(|production_index| ProductionStepId {
-                                        variable_index: None,
-                                        production_index,
-                                        step_index: step_id.step_index,
-                                    });
-                                step_ids_to_process.splice(i..=i, inlined_step_ids);
-                            } else {
-                                step_ids_to_process[i] = ProductionStepId {
-                                    variable_index: step_id.variable_index,
-                                    production_index: step_id.production_index,
-                                    step_index: step_id.step_index + 1,
-                                };
-                                i += 1;
-                            }
-                        } else {
-                            step_ids_to_process.remove(i);
-                        }
-                    }
-                }
-            }
-        }
-
-        let productions = self.productions;
-        let production_indices_by_step_id = self.production_indices_by_step_id;
-        let production_map = production_indices_by_step_id
-            .into_iter()
-            .map(|(step_id, production_indices)| {
-                let production = step_id.variable_index.map_or_else(
-                    || &productions[step_id.production_index],
-                    |variable_index| {
-                        &grammar.variables[variable_index].productions[step_id.production_index]
-                    },
-                ) as *const Production;
-                ((production, step_id.step_index as u32), production_indices)
-            })
-            .collect();
-
-        InlinedProductionMap {
-            productions,
-            production_map,
-        }
-    }
-
-    fn inline_production_at_step<'a>(
-        &'a mut self,
-        step_id: ProductionStepId,
-        grammar: &'a SyntaxGrammar,
-    ) -> &'a [usize] {
-        // Build a list of productions produced by inlining rules.
-        let mut i = 0;
-        let step_index = step_id.step_index;
-        let mut productions_to_add = vec![self.production_for_id(step_id, grammar).clone()];
-        while i < productions_to_add.len() {
-            if let Some(step) = productions_to_add[i].steps.get(step_index) {
-                let symbol = step.symbol;
-                if grammar.variables_to_inline.contains(&symbol) {
-                    // Remove the production from the vector, replacing it with a placeholder.
-                    let production = productions_to_add
-                        .splice(i..=i, std::iter::once(&Production::default()).cloned())
-                        .next()
-                        .unwrap();
-
-                    // Replace the placeholder with the inlined productions.
-                    productions_to_add.splice(
-                        i..=i,
-                        grammar.variables[symbol.index].productions.iter().map(|p| {
-                            let mut production = production.clone();
-                            let removed_step = production
-                                .steps
-                                .splice(step_index..=step_index, p.steps.iter().cloned())
-                                .next()
-                                .unwrap();
-                            let inserted_steps =
-                                &mut production.steps[step_index..(step_index + p.steps.len())];
-                            if let Some(alias) = removed_step.alias {
-                                for inserted_step in inserted_steps.iter_mut() {
-                                    inserted_step.alias = Some(alias.clone());
-                                }
-                            }
-                            if let Some(field_name) = removed_step.field_name {
-                                for inserted_step in inserted_steps.iter_mut() {
-                                    inserted_step.field_name = Some(field_name.clone());
-                                }
-                            }
-                            if let Some(last_inserted_step) = inserted_steps.last_mut() {
-                                if last_inserted_step.precedence.is_none() {
-                                    last_inserted_step.precedence = removed_step.precedence;
-                                }
-                                if last_inserted_step.associativity.is_none() {
-                                    last_inserted_step.associativity = removed_step.associativity;
-                                }
-                            }
-                            if p.dynamic_precedence.abs() > production.dynamic_precedence.abs() {
-                                production.dynamic_precedence = p.dynamic_precedence;
-                            }
-                            production
-                        }),
-                    );
-
-                    continue;
-                }
-            }
-            i += 1;
-        }
-
-        // Store all the computed productions.
-        let result = productions_to_add
-            .into_iter()
-            .map(|production| {
-                self.productions
-                    .iter()
-                    .position(|p| *p == production)
-                    .unwrap_or_else(|| {
-                        self.productions.push(production);
-                        self.productions.len() - 1
-                    })
-            })
-            .collect();
-
-        // Cache these productions based on the original production step.
-        self.production_indices_by_step_id
-            .entry(step_id)
-            .or_insert(result)
-    }
-
-    fn production_for_id<'a>(
-        &'a self,
-        id: ProductionStepId,
-        grammar: &'a SyntaxGrammar,
-    ) -> &'a Production {
-        id.variable_index.map_or_else(
-            || &self.productions[id.production_index],
-            |variable_index| &grammar.variables[variable_index].productions[id.production_index],
-        )
-    }
-
-    fn production_step_for_id<'a>(
-        &'a self,
-        id: ProductionStepId,
-        grammar: &'a SyntaxGrammar,
-    ) -> Option<&'a ProductionStep> {
-        self.production_for_id(id, grammar).steps.get(id.step_index)
-    }
-}
-
-pub(super) fn process_inlines(
-    grammar: &SyntaxGrammar,
-    lexical_grammar: &LexicalGrammar,
-) -> Result<InlinedProductionMap> {
-    for symbol in &grammar.variables_to_inline {
-        match symbol.kind {
-            SymbolType::External => {
-                return Err(anyhow!(
-                    "External token `{}` cannot be inlined",
-                    grammar.external_tokens[symbol.index].name
-                ))
-            }
-            SymbolType::Terminal => {
-                return Err(anyhow!(
-                    "Token `{}` cannot be inlined",
-                    lexical_grammar.variables[symbol.index].name,
-                ))
-            }
-            SymbolType::NonTerminal if symbol.index == 0 => {
-                return Err(anyhow!(
-                    "Rule `{}` cannot be inlined because it is the first rule",
-                    grammar.variables[symbol.index].name,
-                ))
-            }
-            _ => {}
-        }
-    }
-
-    Ok(InlinedProductionMapBuilder {
-        productions: Vec::new(),
-        production_indices_by_step_id: HashMap::new(),
-    }
-    .build(grammar))
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::generate::{
-        grammars::{LexicalVariable, SyntaxVariable, VariableType},
-        rules::{Associativity, Precedence, Symbol},
-    };
-
-    #[test]
-    fn test_basic_inlining() {
-        let grammar = SyntaxGrammar {
-            variables_to_inline: vec![Symbol::non_terminal(1)],
-            variables: vec![
-                SyntaxVariable {
-                    name: "non-terminal-0".to_string(),
-                    kind: VariableType::Named,
-                    productions: vec![Production {
-                        dynamic_precedence: 0,
-                        steps: vec![
-                            ProductionStep::new(Symbol::terminal(10)),
-                            ProductionStep::new(Symbol::non_terminal(1)), // inlined
-                            ProductionStep::new(Symbol::terminal(11)),
-                        ],
-                    }],
-                },
-                SyntaxVariable {
-                    name: "non-terminal-1".to_string(),
-                    kind: VariableType::Named,
-                    productions: vec![
-                        Production {
-                            dynamic_precedence: 0,
-                            steps: vec![
-                                ProductionStep::new(Symbol::terminal(12)),
-                                ProductionStep::new(Symbol::terminal(13)),
-                            ],
-                        },
-                        Production {
-                            dynamic_precedence: -2,
-                            steps: vec![ProductionStep::new(Symbol::terminal(14))],
-                        },
-                    ],
-                },
-            ],
-            ..Default::default()
-        };
-
-        let inline_map = process_inlines(&grammar, &LexicalGrammar::default()).unwrap();
-
-        // Nothing to inline at step 0.
-        assert!(inline_map
-            .inlined_productions(&grammar.variables[0].productions[0], 0)
-            .is_none());
-
-        // Inlining variable 1 yields two productions.
-        assert_eq!(
-            inline_map
-                .inlined_productions(&grammar.variables[0].productions[0], 1)
-                .unwrap()
-                .cloned()
-                .collect::<Vec<_>>(),
-            vec![
-                Production {
-                    dynamic_precedence: 0,
-                    steps: vec![
-                        ProductionStep::new(Symbol::terminal(10)),
-                        ProductionStep::new(Symbol::terminal(12)),
-                        ProductionStep::new(Symbol::terminal(13)),
-                        ProductionStep::new(Symbol::terminal(11)),
-                    ],
-                },
-                Production {
-                    dynamic_precedence: -2,
-                    steps: vec![
-                        ProductionStep::new(Symbol::terminal(10)),
-                        ProductionStep::new(Symbol::terminal(14)),
-                        ProductionStep::new(Symbol::terminal(11)),
-                    ],
-                },
-            ]
-        );
-    }
-
-    #[test]
-    fn test_nested_inlining() {
-        let grammar = SyntaxGrammar {
-            variables: vec![
-                SyntaxVariable {
-                    name: "non-terminal-0".to_string(),
-                    kind: VariableType::Named,
-                    productions: vec![Production {
-                        dynamic_precedence: 0,
-                        steps: vec![
-                            ProductionStep::new(Symbol::terminal(10)),
-                            ProductionStep::new(Symbol::non_terminal(1)), // inlined
-                            ProductionStep::new(Symbol::terminal(11)),
-                            ProductionStep::new(Symbol::non_terminal(2)), // inlined
-                            ProductionStep::new(Symbol::terminal(12)),
-                        ],
-                    }],
-                },
-                SyntaxVariable {
-                    name: "non-terminal-1".to_string(),
-                    kind: VariableType::Named,
-                    productions: vec![
-                        Production {
-                            dynamic_precedence: 0,
-                            steps: vec![ProductionStep::new(Symbol::terminal(13))],
-                        },
-                        Production {
-                            dynamic_precedence: 0,
-                            steps: vec![
-                                ProductionStep::new(Symbol::non_terminal(3)), // inlined
-                                ProductionStep::new(Symbol::terminal(14)),
-                            ],
-                        },
-                    ],
-                },
-                SyntaxVariable {
-                    name: "non-terminal-2".to_string(),
-                    kind: VariableType::Named,
-                    productions: vec![Production {
-                        dynamic_precedence: 0,
-                        steps: vec![ProductionStep::new(Symbol::terminal(15))],
-                    }],
-                },
-                SyntaxVariable {
-                    name: "non-terminal-3".to_string(),
-                    kind: VariableType::Named,
-                    productions: vec![Production {
-                        dynamic_precedence: 0,
-                        steps: vec![ProductionStep::new(Symbol::terminal(16))],
-                    }],
-                },
-            ],
-            variables_to_inline: vec![
-                Symbol::non_terminal(1),
-                Symbol::non_terminal(2),
-                Symbol::non_terminal(3),
-            ],
-            ..Default::default()
-        };
-
-        let inline_map = process_inlines(&grammar, &LexicalGrammar::default()).unwrap();
-
-        let productions = inline_map
-            .inlined_productions(&grammar.variables[0].productions[0], 1)
-            .unwrap()
-            .collect::<Vec<_>>();
-
-        assert_eq!(
-            productions.iter().copied().cloned().collect::<Vec<_>>(),
-            vec![
-                Production {
-                    dynamic_precedence: 0,
-                    steps: vec![
-                        ProductionStep::new(Symbol::terminal(10)),
-                        ProductionStep::new(Symbol::terminal(13)),
-                        ProductionStep::new(Symbol::terminal(11)),
-                        ProductionStep::new(Symbol::non_terminal(2)),
-                        ProductionStep::new(Symbol::terminal(12)),
-                    ],
-                },
-                Production {
-                    dynamic_precedence: 0,
-                    steps: vec![
-                        ProductionStep::new(Symbol::terminal(10)),
-                        ProductionStep::new(Symbol::terminal(16)),
-                        ProductionStep::new(Symbol::terminal(14)),
-                        ProductionStep::new(Symbol::terminal(11)),
-                        ProductionStep::new(Symbol::non_terminal(2)),
-                        ProductionStep::new(Symbol::terminal(12)),
-                    ],
-                },
-            ]
-        );
-
-        assert_eq!(
-            inline_map
-                .inlined_productions(productions[0], 3)
-                .unwrap()
-                .cloned()
-                .collect::<Vec<_>>(),
-            vec![Production {
-                dynamic_precedence: 0,
-                steps: vec![
-                    ProductionStep::new(Symbol::terminal(10)),
-                    ProductionStep::new(Symbol::terminal(13)),
-                    ProductionStep::new(Symbol::terminal(11)),
-                    ProductionStep::new(Symbol::terminal(15)),
-                    ProductionStep::new(Symbol::terminal(12)),
-                ],
-            },]
-        );
-    }
-
-    #[test]
-    fn test_inlining_with_precedence_and_alias() {
-        let grammar = SyntaxGrammar {
-            variables_to_inline: vec![Symbol::non_terminal(1), Symbol::non_terminal(2)],
-            variables: vec![
-                SyntaxVariable {
-                    name: "non-terminal-0".to_string(),
-                    kind: VariableType::Named,
-                    productions: vec![Production {
-                        dynamic_precedence: 0,
-                        steps: vec![
-                            // inlined
-                            ProductionStep::new(Symbol::non_terminal(1))
-                                .with_prec(Precedence::Integer(1), Some(Associativity::Left)),
-                            ProductionStep::new(Symbol::terminal(10)),
-                            // inlined
-                            ProductionStep::new(Symbol::non_terminal(2))
-                                .with_alias("outer_alias", true),
-                        ],
-                    }],
-                },
-                SyntaxVariable {
-                    name: "non-terminal-1".to_string(),
-                    kind: VariableType::Named,
-                    productions: vec![Production {
-                        dynamic_precedence: 0,
-                        steps: vec![
-                            ProductionStep::new(Symbol::terminal(11))
-                                .with_prec(Precedence::Integer(2), None)
-                                .with_alias("inner_alias", true),
-                            ProductionStep::new(Symbol::terminal(12)),
-                        ],
-                    }],
-                },
-                SyntaxVariable {
-                    name: "non-terminal-2".to_string(),
-                    kind: VariableType::Named,
-                    productions: vec![Production {
-                        dynamic_precedence: 0,
-                        steps: vec![ProductionStep::new(Symbol::terminal(13))],
-                    }],
-                },
-            ],
-            ..Default::default()
-        };
-
-        let inline_map = process_inlines(&grammar, &LexicalGrammar::default()).unwrap();
-
-        let productions = inline_map
-            .inlined_productions(&grammar.variables[0].productions[0], 0)
-            .unwrap()
-            .collect::<Vec<_>>();
-
-        assert_eq!(
-            productions.iter().copied().cloned().collect::<Vec<_>>(),
-            vec![Production {
-                dynamic_precedence: 0,
-                steps: vec![
-                    // The first step in the inlined production retains its precedence
-                    // and alias.
-                    ProductionStep::new(Symbol::terminal(11))
-                        .with_prec(Precedence::Integer(2), None)
-                        .with_alias("inner_alias", true),
-                    // The final step of the inlined production inherits the precedence of
-                    // the inlined step.
-                    ProductionStep::new(Symbol::terminal(12))
-                        .with_prec(Precedence::Integer(1), Some(Associativity::Left)),
-                    ProductionStep::new(Symbol::terminal(10)),
-                    ProductionStep::new(Symbol::non_terminal(2)).with_alias("outer_alias", true),
-                ]
-            }],
-        );
-
-        assert_eq!(
-            inline_map
-                .inlined_productions(productions[0], 3)
-                .unwrap()
-                .cloned()
-                .collect::<Vec<_>>(),
-            vec![Production {
-                dynamic_precedence: 0,
-                steps: vec![
-                    ProductionStep::new(Symbol::terminal(11))
-                        .with_prec(Precedence::Integer(2), None)
-                        .with_alias("inner_alias", true),
-                    ProductionStep::new(Symbol::terminal(12))
-                        .with_prec(Precedence::Integer(1), Some(Associativity::Left)),
-                    ProductionStep::new(Symbol::terminal(10)),
-                    // All steps of the inlined production inherit their alias from the
-                    // inlined step.
-                    ProductionStep::new(Symbol::terminal(13)).with_alias("outer_alias", true),
-                ]
-            }],
-        );
-    }
-
-    #[test]
-    fn test_error_when_inlining_tokens() {
-        let lexical_grammar = LexicalGrammar {
-            variables: vec![LexicalVariable {
-                name: "something".to_string(),
-                kind: VariableType::Named,
-                implicit_precedence: 0,
-                start_state: 0,
-            }],
-            ..Default::default()
-        };
-
-        let grammar = SyntaxGrammar {
-            variables_to_inline: vec![Symbol::terminal(0)],
-            variables: vec![SyntaxVariable {
-                name: "non-terminal-0".to_string(),
-                kind: VariableType::Named,
-                productions: vec![Production {
-                    dynamic_precedence: 0,
-                    steps: vec![ProductionStep::new(Symbol::terminal(0))],
-                }],
-            }],
-            ..Default::default()
-        };
-
-        if let Err(error) = process_inlines(&grammar, &lexical_grammar) {
-            assert_eq!(error.to_string(), "Token `something` cannot be inlined");
-        } else {
-            panic!("expected an error, but got none");
-        }
-    }
-}
--- a/cli/src/generate/prepare_grammar/unicode-categories.json
+++ b/cli/src/generate/prepare_grammar/unicode-categories.json
--- a/cli/src/generate/prepare_grammar/unicode-category-aliases.json
+++ b/cli/src/generate/prepare_grammar/unicode-category-aliases.json
@ -1 +0,0 @@
-{"Other":"C","Control":"Cc","cntrl":"Cc","Format":"Cf","Unassigned":"Cn","Private_Use":"Co","Surrogate":"Cs","Letter":"L","Cased_Letter":"LC","Lowercase_Letter":"Ll","Modifier_Letter":"Lm","Other_Letter":"Lo","Titlecase_Letter":"Lt","Uppercase_Letter":"Lu","Mark":"M","Combining_Mark":"M","Spacing_Mark":"Mc","Enclosing_Mark":"Me","Nonspacing_Mark":"Mn","Number":"N","Decimal_Number":"Nd","digit":"Nd","Letter_Number":"Nl","Other_Number":"No","Punctuation":"P","punct":"P","Connector_Punctuation":"Pc","Dash_Punctuation":"Pd","Close_Punctuation":"Pe","Final_Punctuation":"Pf","Initial_Punctuation":"Pi","Other_Punctuation":"Po","Open_Punctuation":"Ps","Symbol":"S","Currency_Symbol":"Sc","Modifier_Symbol":"Sk","Math_Symbol":"Sm","Other_Symbol":"So","Separator":"Z","Line_Separator":"Zl","Paragraph_Separator":"Zp","Space_Separator":"Zs"}
--- a/cli/src/generate/prepare_grammar/unicode-properties.json
+++ b/cli/src/generate/prepare_grammar/unicode-properties.json
--- a/cli/src/generate/prepare_grammar/unicode-property-aliases.json
+++ b/cli/src/generate/prepare_grammar/unicode-property-aliases.json
@ -1 +0,0 @@
-{"cjkAccountingNumeric":"kAccountingNumeric","cjkOtherNumeric":"kOtherNumeric","cjkPrimaryNumeric":"kPrimaryNumeric","nv":"Numeric_Value","bmg":"Bidi_Mirroring_Glyph","bpb":"Bidi_Paired_Bracket","cf":"Case_Folding","cjkCompatibilityVariant":"kCompatibilityVariant","dm":"Decomposition_Mapping","EqUIdeo":"Equivalent_Unified_Ideograph","FC_NFKC":"FC_NFKC_Closure","lc":"Lowercase_Mapping","NFKC_CF":"NFKC_Casefold","NFKC_SCF":"NFKC_Simple_Casefold","scf":"Simple_Case_Folding","sfc":"Simple_Case_Folding","slc":"Simple_Lowercase_Mapping","stc":"Simple_Titlecase_Mapping","suc":"Simple_Uppercase_Mapping","tc":"Titlecase_Mapping","uc":"Uppercase_Mapping","cjkIICore":"kIICore","cjkIRG_GSource":"kIRG_GSource","cjkIRG_HSource":"kIRG_HSource","cjkIRG_JSource":"kIRG_JSource","cjkIRG_KPSource":"kIRG_KPSource","cjkIRG_KSource":"kIRG_KSource","cjkIRG_MSource":"kIRG_MSource","cjkIRG_SSource":"kIRG_SSource","cjkIRG_TSource":"kIRG_TSource","cjkIRG_UKSource":"kIRG_UKSource","cjkIRG_USource":"kIRG_USource","cjkIRG_VSource":"kIRG_VSource","cjkRSUnicode":"kRSUnicode","Unicode_Radical_Stroke":"kRSUnicode","URS":"kRSUnicode","isc":"ISO_Comment","JSN":"Jamo_Short_Name","na":"Name","na1":"Unicode_1_Name","Name_Alias":"Name_Alias","scx":"Script_Extensions","age":"Age","blk":"Block","sc":"Script","bc":"Bidi_Class","bpt":"Bidi_Paired_Bracket_Type","ccc":"Canonical_Combining_Class","dt":"Decomposition_Type","ea":"East_Asian_Width","gc":"General_Category","GCB":"Grapheme_Cluster_Break","hst":"Hangul_Syllable_Type","InCB":"Indic_Conjunct_Break","InPC":"Indic_Positional_Category","InSC":"Indic_Syllabic_Category","jg":"Joining_Group","jt":"Joining_Type","lb":"Line_Break","NFC_QC":"NFC_Quick_Check","NFD_QC":"NFD_Quick_Check","NFKC_QC":"NFKC_Quick_Check","NFKD_QC":"NFKD_Quick_Check","nt":"Numeric_Type","SB":"Sentence_Break","vo":"Vertical_Orientation","WB":"Word_Break","AHex":"ASCII_Hex_Digit","Alpha":"Alphabetic","Bidi_C":"Bidi_Control","Bidi_M":"Bidi_Mirrored","Cased":"Cased","CE":"Composition_Exclusion","CI":"Case_Ignorable","Comp_Ex":"Full_Composition_Exclusion","CWCF":"Changes_When_Casefolded","CWCM":"Changes_When_Casemapped","CWKCF":"Changes_When_NFKC_Casefolded","CWL":"Changes_When_Lowercased","CWT":"Changes_When_Titlecased","CWU":"Changes_When_Uppercased","Dash":"Dash","Dep":"Deprecated","DI":"Default_Ignorable_Code_Point","Dia":"Diacritic","EBase":"Emoji_Modifier_Base","EComp":"Emoji_Component","EMod":"Emoji_Modifier","Emoji":"Emoji","EPres":"Emoji_Presentation","Ext":"Extender","ExtPict":"Extended_Pictographic","Gr_Base":"Grapheme_Base","Gr_Ext":"Grapheme_Extend","Gr_Link":"Grapheme_Link","Hex":"Hex_Digit","Hyphen":"Hyphen","ID_Compat_Math_Continue":"ID_Compat_Math_Continue","ID_Compat_Math_Start":"ID_Compat_Math_Start","IDC":"ID_Continue","Ideo":"Ideographic","IDS":"ID_Start","IDSB":"IDS_Binary_Operator","IDST":"IDS_Trinary_Operator","IDSU":"IDS_Unary_Operator","Join_C":"Join_Control","LOE":"Logical_Order_Exception","Lower":"Lowercase","Math":"Math","NChar":"Noncharacter_Code_Point","OAlpha":"Other_Alphabetic","ODI":"Other_Default_Ignorable_Code_Point","OGr_Ext":"Other_Grapheme_Extend","OIDC":"Other_ID_Continue","OIDS":"Other_ID_Start","OLower":"Other_Lowercase","OMath":"Other_Math","OUpper":"Other_Uppercase","Pat_Syn":"Pattern_Syntax","Pat_WS":"Pattern_White_Space","PCM":"Prepended_Concatenation_Mark","QMark":"Quotation_Mark","Radical":"Radical","RI":"Regional_Indicator","SD":"Soft_Dotted","STerm":"Sentence_Terminal","Term":"Terminal_Punctuation","UIdeo":"Unified_Ideograph","Upper":"Uppercase","VS":"Variation_Selector","WSpace":"White_Space","space":"White_Space","XIDC":"XID_Continue","XIDS":"XID_Start","XO_NFC":"Expands_On_NFC","XO_NFD":"Expands_On_NFD","XO_NFKC":"Expands_On_NFKC","XO_NFKD":"Expands_On_NFKD"}
--- a/cli/src/generate/render.rs
+++ b/cli/src/generate/render.rs
--- a/cli/src/generate/rules.rs
+++ b/cli/src/generate/rules.rs
@ -1,493 +0,0 @@
-use std::{collections::HashMap, fmt};
-
-use smallbitvec::SmallBitVec;
-
-use super::grammars::VariableType;
-
-#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
-pub enum SymbolType {
-    External,
-    End,
-    EndOfNonTerminalExtra,
-    Terminal,
-    NonTerminal,
-}
-
-#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
-pub enum Associativity {
-    Left,
-    Right,
-}
-
-#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
-pub struct Alias {
-    pub value: String,
-    pub is_named: bool,
-}
-
-#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Default)]
-pub enum Precedence {
-    #[default]
-    None,
-    Integer(i32),
-    Name(String),
-}
-
-pub type AliasMap = HashMap<Symbol, Alias>;
-
-#[derive(Clone, Debug, Default, PartialEq, Eq, Hash)]
-pub struct MetadataParams {
-    pub precedence: Precedence,
-    pub dynamic_precedence: i32,
-    pub associativity: Option<Associativity>,
-    pub is_token: bool,
-    pub is_string: bool,
-    pub is_active: bool,
-    pub is_main_token: bool,
-    pub alias: Option<Alias>,
-    pub field_name: Option<String>,
-}
-
-#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
-pub struct Symbol {
-    pub kind: SymbolType,
-    pub index: usize,
-}
-
-#[derive(Clone, Debug, PartialEq, Eq, Hash)]
-pub enum Rule {
-    Blank,
-    String(String),
-    Pattern(String, String),
-    NamedSymbol(String),
-    Symbol(Symbol),
-    Choice(Vec<Rule>),
-    Metadata {
-        params: MetadataParams,
-        rule: Box<Rule>,
-    },
-    Repeat(Box<Rule>),
-    Seq(Vec<Rule>),
-}
-
-// Because tokens are represented as small (~400 max) unsigned integers,
-// sets of tokens can be efficiently represented as bit vectors with each
-// index corresponding to a token, and each value representing whether or not
-// the token is present in the set.
-#[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub struct TokenSet {
-    terminal_bits: SmallBitVec,
-    external_bits: SmallBitVec,
-    eof: bool,
-    end_of_nonterminal_extra: bool,
-}
-
-impl Rule {
-    pub fn field(name: String, content: Self) -> Self {
-        add_metadata(content, move |params| {
-            params.field_name = Some(name);
-        })
-    }
-
-    pub fn alias(content: Self, value: String, is_named: bool) -> Self {
-        add_metadata(content, move |params| {
-            params.alias = Some(Alias { value, is_named });
-        })
-    }
-
-    pub fn token(content: Self) -> Self {
-        add_metadata(content, |params| {
-            params.is_token = true;
-        })
-    }
-
-    pub fn immediate_token(content: Self) -> Self {
-        add_metadata(content, |params| {
-            params.is_token = true;
-            params.is_main_token = true;
-        })
-    }
-
-    pub fn prec(value: Precedence, content: Self) -> Self {
-        add_metadata(content, |params| {
-            params.precedence = value;
-        })
-    }
-
-    pub fn prec_left(value: Precedence, content: Self) -> Self {
-        add_metadata(content, |params| {
-            params.associativity = Some(Associativity::Left);
-            params.precedence = value;
-        })
-    }
-
-    pub fn prec_right(value: Precedence, content: Self) -> Self {
-        add_metadata(content, |params| {
-            params.associativity = Some(Associativity::Right);
-            params.precedence = value;
-        })
-    }
-
-    pub fn prec_dynamic(value: i32, content: Self) -> Self {
-        add_metadata(content, |params| {
-            params.dynamic_precedence = value;
-        })
-    }
-
-    pub fn repeat(rule: Self) -> Self {
-        Self::Repeat(Box::new(rule))
-    }
-
-    pub fn choice(rules: Vec<Self>) -> Self {
-        let mut elements = Vec::with_capacity(rules.len());
-        for rule in rules {
-            choice_helper(&mut elements, rule);
-        }
-        Self::Choice(elements)
-    }
-
-    pub const fn seq(rules: Vec<Self>) -> Self {
-        Self::Seq(rules)
-    }
-}
-
-impl Alias {
-    #[must_use]
-    pub const fn kind(&self) -> VariableType {
-        if self.is_named {
-            VariableType::Named
-        } else {
-            VariableType::Anonymous
-        }
-    }
-}
-
-impl Precedence {
-    #[must_use]
-    pub const fn is_none(&self) -> bool {
-        matches!(self, Self::None)
-    }
-}
-
-#[cfg(test)]
-impl Rule {
-    #[must_use]
-    pub const fn terminal(index: usize) -> Self {
-        Self::Symbol(Symbol::terminal(index))
-    }
-
-    #[must_use]
-    pub const fn non_terminal(index: usize) -> Self {
-        Self::Symbol(Symbol::non_terminal(index))
-    }
-
-    #[must_use]
-    pub const fn external(index: usize) -> Self {
-        Self::Symbol(Symbol::external(index))
-    }
-
-    #[must_use]
-    pub fn named(name: &'static str) -> Self {
-        Self::NamedSymbol(name.to_string())
-    }
-
-    #[must_use]
-    pub fn string(value: &'static str) -> Self {
-        Self::String(value.to_string())
-    }
-
-    #[must_use]
-    pub fn pattern(value: &'static str, flags: &'static str) -> Self {
-        Self::Pattern(value.to_string(), flags.to_string())
-    }
-}
-
-impl Symbol {
-    #[must_use]
-    pub fn is_terminal(&self) -> bool {
-        self.kind == SymbolType::Terminal
-    }
-
-    #[must_use]
-    pub fn is_non_terminal(&self) -> bool {
-        self.kind == SymbolType::NonTerminal
-    }
-
-    #[must_use]
-    pub fn is_external(&self) -> bool {
-        self.kind == SymbolType::External
-    }
-
-    #[must_use]
-    pub fn is_eof(&self) -> bool {
-        self.kind == SymbolType::End
-    }
-
-    #[must_use]
-    pub const fn non_terminal(index: usize) -> Self {
-        Self {
-            kind: SymbolType::NonTerminal,
-            index,
-        }
-    }
-
-    #[must_use]
-    pub const fn terminal(index: usize) -> Self {
-        Self {
-            kind: SymbolType::Terminal,
-            index,
-        }
-    }
-
-    #[must_use]
-    pub const fn external(index: usize) -> Self {
-        Self {
-            kind: SymbolType::External,
-            index,
-        }
-    }
-
-    #[must_use]
-    pub const fn end() -> Self {
-        Self {
-            kind: SymbolType::End,
-            index: 0,
-        }
-    }
-
-    #[must_use]
-    pub const fn end_of_nonterminal_extra() -> Self {
-        Self {
-            kind: SymbolType::EndOfNonTerminalExtra,
-            index: 0,
-        }
-    }
-}
-
-impl From<Symbol> for Rule {
-    #[must_use]
-    fn from(symbol: Symbol) -> Self {
-        Self::Symbol(symbol)
-    }
-}
-
-impl TokenSet {
-    #[must_use]
-    pub const fn new() -> Self {
-        Self {
-            terminal_bits: SmallBitVec::new(),
-            external_bits: SmallBitVec::new(),
-            eof: false,
-            end_of_nonterminal_extra: false,
-        }
-    }
-
-    pub fn iter(&self) -> impl Iterator<Item = Symbol> + '_ {
-        self.terminal_bits
-            .iter()
-            .enumerate()
-            .filter_map(|(i, value)| {
-                if value {
-                    Some(Symbol::terminal(i))
-                } else {
-                    None
-                }
-            })
-            .chain(
-                self.external_bits
-                    .iter()
-                    .enumerate()
-                    .filter_map(|(i, value)| {
-                        if value {
-                            Some(Symbol::external(i))
-                        } else {
-                            None
-                        }
-                    }),
-            )
-            .chain(if self.eof { Some(Symbol::end()) } else { None })
-            .chain(if self.end_of_nonterminal_extra {
-                Some(Symbol::end_of_nonterminal_extra())
-            } else {
-                None
-            })
-    }
-
-    pub fn terminals(&self) -> impl Iterator<Item = Symbol> + '_ {
-        self.terminal_bits
-            .iter()
-            .enumerate()
-            .filter_map(|(i, value)| {
-                if value {
-                    Some(Symbol::terminal(i))
-                } else {
-                    None
-                }
-            })
-    }
-
-    pub fn contains(&self, symbol: &Symbol) -> bool {
-        match symbol.kind {
-            SymbolType::NonTerminal => panic!("Cannot store non-terminals in a TokenSet"),
-            SymbolType::Terminal => self.terminal_bits.get(symbol.index).unwrap_or(false),
-            SymbolType::External => self.external_bits.get(symbol.index).unwrap_or(false),
-            SymbolType::End => self.eof,
-            SymbolType::EndOfNonTerminalExtra => self.end_of_nonterminal_extra,
-        }
-    }
-
-    pub fn contains_terminal(&self, index: usize) -> bool {
-        self.terminal_bits.get(index).unwrap_or(false)
-    }
-
-    pub fn insert(&mut self, other: Symbol) {
-        let vec = match other.kind {
-            SymbolType::NonTerminal => panic!("Cannot store non-terminals in a TokenSet"),
-            SymbolType::Terminal => &mut self.terminal_bits,
-            SymbolType::External => &mut self.external_bits,
-            SymbolType::End => {
-                self.eof = true;
-                return;
-            }
-            SymbolType::EndOfNonTerminalExtra => {
-                self.end_of_nonterminal_extra = true;
-                return;
-            }
-        };
-        if other.index >= vec.len() {
-            vec.resize(other.index + 1, false);
-        }
-        vec.set(other.index, true);
-    }
-
-    pub fn remove(&mut self, other: &Symbol) -> bool {
-        let vec = match other.kind {
-            SymbolType::NonTerminal => panic!("Cannot store non-terminals in a TokenSet"),
-            SymbolType::Terminal => &mut self.terminal_bits,
-            SymbolType::External => &mut self.external_bits,
-            SymbolType::End => {
-                return if self.eof {
-                    self.eof = false;
-                    true
-                } else {
-                    false
-                }
-            }
-            SymbolType::EndOfNonTerminalExtra => {
-                return if self.end_of_nonterminal_extra {
-                    self.end_of_nonterminal_extra = false;
-                    true
-                } else {
-                    false
-                };
-            }
-        };
-        if other.index < vec.len() && vec[other.index] {
-            vec.set(other.index, false);
-            return true;
-        }
-        false
-    }
-
-    pub fn is_empty(&self) -> bool {
-        !self.eof
-            && !self.end_of_nonterminal_extra
-            && !self.terminal_bits.iter().any(|a| a)
-            && !self.external_bits.iter().any(|a| a)
-    }
-
-    pub fn insert_all_terminals(&mut self, other: &Self) -> bool {
-        let mut result = false;
-        if other.terminal_bits.len() > self.terminal_bits.len() {
-            self.terminal_bits.resize(other.terminal_bits.len(), false);
-        }
-        for (i, element) in other.terminal_bits.iter().enumerate() {
-            if element {
-                result |= !self.terminal_bits[i];
-                self.terminal_bits.set(i, element);
-            }
-        }
-        result
-    }
-
-    fn insert_all_externals(&mut self, other: &Self) -> bool {
-        let mut result = false;
-        if other.external_bits.len() > self.external_bits.len() {
-            self.external_bits.resize(other.external_bits.len(), false);
-        }
-        for (i, element) in other.external_bits.iter().enumerate() {
-            if element {
-                result |= !self.external_bits[i];
-                self.external_bits.set(i, element);
-            }
-        }
-        result
-    }
-
-    pub fn insert_all(&mut self, other: &Self) -> bool {
-        let mut result = false;
-        if other.eof {
-            result |= !self.eof;
-            self.eof = true;
-        }
-        if other.end_of_nonterminal_extra {
-            result |= !self.end_of_nonterminal_extra;
-            self.end_of_nonterminal_extra = true;
-        }
-        result |= self.insert_all_terminals(other);
-        result |= self.insert_all_externals(other);
-        result
-    }
-}
-
-impl FromIterator<Symbol> for TokenSet {
-    fn from_iter<T: IntoIterator<Item = Symbol>>(iter: T) -> Self {
-        let mut result = Self::new();
-        for symbol in iter {
-            result.insert(symbol);
-        }
-        result
-    }
-}
-
-fn add_metadata<T: FnOnce(&mut MetadataParams)>(input: Rule, f: T) -> Rule {
-    match input {
-        Rule::Metadata { rule, mut params } if !params.is_token => {
-            f(&mut params);
-            Rule::Metadata { rule, params }
-        }
-        _ => {
-            let mut params = MetadataParams::default();
-            f(&mut params);
-            Rule::Metadata {
-                rule: Box::new(input),
-                params,
-            }
-        }
-    }
-}
-
-fn choice_helper(result: &mut Vec<Rule>, rule: Rule) {
-    match rule {
-        Rule::Choice(elements) => {
-            for element in elements {
-                choice_helper(result, element);
-            }
-        }
-        _ => {
-            if !result.contains(&rule) {
-                result.push(rule);
-            }
-        }
-    }
-}
-
-impl fmt::Display for Precedence {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        match self {
-            Self::Integer(i) => write!(f, "{i}"),
-            Self::Name(s) => write!(f, "'{s}'"),
-            Self::None => write!(f, "none"),
-        }
-    }
-}
--- a/cli/src/generate/tables.rs
+++ b/cli/src/generate/tables.rs
@ -1,166 +0,0 @@
-use std::collections::BTreeMap;
-
-use super::{
-    nfa::CharacterSet,
-    rules::{Alias, Symbol, TokenSet},
-};
-pub type ProductionInfoId = usize;
-pub type ParseStateId = usize;
-pub type LexStateId = usize;
-
-use std::hash::BuildHasherDefault;
-
-use indexmap::IndexMap;
-use rustc_hash::FxHasher;
-
-#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
-pub enum ParseAction {
-    Accept,
-    Shift {
-        state: ParseStateId,
-        is_repetition: bool,
-    },
-    ShiftExtra,
-    Recover,
-    Reduce {
-        symbol: Symbol,
-        child_count: usize,
-        dynamic_precedence: i32,
-        production_id: ProductionInfoId,
-    },
-}
-
-#[derive(Clone, Copy, Debug, PartialEq, Eq)]
-pub enum GotoAction {
-    Goto(ParseStateId),
-    ShiftExtra,
-}
-
-#[derive(Clone, Debug, PartialEq, Eq, Hash)]
-pub struct ParseTableEntry {
-    pub actions: Vec<ParseAction>,
-    pub reusable: bool,
-}
-
-#[derive(Clone, Debug, Default, PartialEq, Eq)]
-pub struct ParseState {
-    pub id: ParseStateId,
-    pub terminal_entries: IndexMap<Symbol, ParseTableEntry, BuildHasherDefault<FxHasher>>,
-    pub nonterminal_entries: IndexMap<Symbol, GotoAction, BuildHasherDefault<FxHasher>>,
-    pub lex_state_id: usize,
-    pub external_lex_state_id: usize,
-    pub core_id: usize,
-}
-
-#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
-pub struct FieldLocation {
-    pub index: usize,
-    pub inherited: bool,
-}
-
-#[derive(Debug, Default, PartialEq, Eq)]
-pub struct ProductionInfo {
-    pub alias_sequence: Vec<Option<Alias>>,
-    pub field_map: BTreeMap<String, Vec<FieldLocation>>,
-}
-
-#[derive(Debug, PartialEq, Eq)]
-pub struct ParseTable {
-    pub states: Vec<ParseState>,
-    pub symbols: Vec<Symbol>,
-    pub production_infos: Vec<ProductionInfo>,
-    pub max_aliased_production_length: usize,
-    pub external_lex_states: Vec<TokenSet>,
-}
-
-#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
-pub struct AdvanceAction {
-    pub state: LexStateId,
-    pub in_main_token: bool,
-}
-
-#[derive(Clone, Debug, Default, PartialEq, Eq, PartialOrd, Ord)]
-pub struct LexState {
-    pub accept_action: Option<Symbol>,
-    pub eof_action: Option<AdvanceAction>,
-    pub advance_actions: Vec<(CharacterSet, AdvanceAction)>,
-}
-
-#[derive(Debug, PartialEq, Eq, Default)]
-pub struct LexTable {
-    pub states: Vec<LexState>,
-}
-
-impl ParseTableEntry {
-    #[must_use]
-    pub const fn new() -> Self {
-        Self {
-            reusable: true,
-            actions: Vec::new(),
-        }
-    }
-}
-
-impl ParseState {
-    pub fn is_end_of_non_terminal_extra(&self) -> bool {
-        self.terminal_entries
-            .contains_key(&Symbol::end_of_nonterminal_extra())
-    }
-
-    pub fn referenced_states(&self) -> impl Iterator<Item = ParseStateId> + '_ {
-        self.terminal_entries
-            .iter()
-            .flat_map(|(_, entry)| {
-                entry.actions.iter().filter_map(|action| match action {
-                    ParseAction::Shift { state, .. } => Some(*state),
-                    _ => None,
-                })
-            })
-            .chain(self.nonterminal_entries.iter().filter_map(|(_, action)| {
-                if let GotoAction::Goto(state) = action {
-                    Some(*state)
-                } else {
-                    None
-                }
-            }))
-    }
-
-    pub fn update_referenced_states<F>(&mut self, mut f: F)
-    where
-        F: FnMut(usize, &Self) -> usize,
-    {
-        let mut updates = Vec::new();
-        for (symbol, entry) in &self.terminal_entries {
-            for (i, action) in entry.actions.iter().enumerate() {
-                if let ParseAction::Shift { state, .. } = action {
-                    let result = f(*state, self);
-                    if result != *state {
-                        updates.push((*symbol, i, result));
-                    }
-                }
-            }
-        }
-        for (symbol, action) in &self.nonterminal_entries {
-            if let GotoAction::Goto(other_state) = action {
-                let result = f(*other_state, self);
-                if result != *other_state {
-                    updates.push((*symbol, 0, result));
-                }
-            }
-        }
-        for (symbol, action_index, new_state) in updates {
-            if symbol.is_non_terminal() {
-                self.nonterminal_entries
-                    .insert(symbol, GotoAction::Goto(new_state));
-            } else {
-                let entry = self.terminal_entries.get_mut(&symbol).unwrap();
-                if let ParseAction::Shift { is_repetition, .. } = entry.actions[action_index] {
-                    entry.actions[action_index] = ParseAction::Shift {
-                        state: new_state,
-                        is_repetition,
-                    };
-                }
-            }
-        }
-    }
-}
--- a/cli/src/init.rs
+++ b/cli/src/init.rs
@ -11,8 +11,7 @@ use heck::{ToKebabCase, ToShoutySnakeCase, ToSnakeCase, ToUpperCamelCase};
 use indoc::indoc;
 use serde::Deserialize;
 use serde_json::{json, Map, Value};
-
-use crate::generate::write_file;
+use tree_sitter_generate::write_file;

 const CLI_VERSION: &str = env!("CARGO_PKG_VERSION");
 const CLI_VERSION_PLACEHOLDER: &str = "CLI_VERSION";
--- a/cli/src/lib.rs
+++ b/cli/src/lib.rs
@ -1,7 +1,6 @@
 #![doc = include_str!("../README.md")]

 pub mod fuzz;
-pub mod generate;
 pub mod highlight;
 pub mod init;
 pub mod logger;
--- a/cli/src/main.rs
+++ b/cli/src/main.rs
@ -16,7 +16,7 @@ use tree_sitter_cli::{
        fuzz_language_corpus, FuzzOptions, EDIT_COUNT, ITERATION_COUNT, LOG_ENABLED,
        LOG_GRAPH_ENABLED, START_SEED,
    },
-    generate, highlight,
+    highlight,
    init::{generate_grammar_files, lookup_package_json_for_path},
    logger,
    parse::{self, ParseFileOptions, ParseOutput},
@ -461,7 +461,7 @@ impl Generate {
                        version.parse().expect("invalid abi version flag")
                    }
                });
-        generate::generate_parser_in_directory(
+        tree_sitter_generate::generate_parser_in_directory(
            current_dir,
            self.grammar_path.as_deref(),
            abi_version,
--- a/cli/src/tests/corpus_test.rs
+++ b/cli/src/tests/corpus_test.rs
@ -14,7 +14,6 @@ use crate::{
        EDIT_COUNT, EXAMPLE_EXCLUDE, EXAMPLE_INCLUDE, ITERATION_COUNT, LANGUAGE_FILTER,
        LOG_GRAPH_ENABLED, START_SEED,
    },
-    generate,
    parse::perform_edit,
    test::{parse_tests, print_diff, print_diff_key, strip_sexp_fields},
    tests::{
@ -353,8 +352,8 @@ fn test_feature_corpus_files() {
            grammar_path = test_path.join("grammar.json");
        }
        let error_message_path = test_path.join("expected_error.txt");
-        let grammar_json = generate::load_grammar_file(&grammar_path, None).unwrap();
-        let generate_result = generate::generate_parser_for_grammar(&grammar_json);
+        let grammar_json = tree_sitter_generate::load_grammar_file(&grammar_path, None).unwrap();
+        let generate_result = tree_sitter_generate::generate_parser_for_grammar(&grammar_json);

        if error_message_path.exists() {
            if EXAMPLE_INCLUDE.is_some() || EXAMPLE_EXCLUDE.is_some() {
--- a/cli/src/tests/helpers/fixtures.rs
+++ b/cli/src/tests/helpers/fixtures.rs
@ -6,12 +6,11 @@ use std::{
 use anyhow::Context;
 use lazy_static::lazy_static;
 use tree_sitter::Language;
+use tree_sitter_generate::{ALLOC_HEADER, ARRAY_HEADER};
 use tree_sitter_highlight::HighlightConfiguration;
 use tree_sitter_loader::{CompileConfig, Loader};
 use tree_sitter_tags::TagsConfiguration;

-use crate::generate::{ALLOC_HEADER, ARRAY_HEADER};
-
 include!("./dirs.rs");

 lazy_static! {
--- a/cli/src/tests/node_test.rs
+++ b/cli/src/tests/node_test.rs
@ -1,14 +1,12 @@
 use tree_sitter::{Node, Parser, Point, Tree};
+use tree_sitter_generate::{generate_parser_for_grammar, load_grammar_file};

 use super::{
    get_random_edit,
    helpers::fixtures::{fixtures_dir, get_language, get_test_language},
    Rand,
 };
-use crate::{
-    generate::{generate_parser_for_grammar, load_grammar_file},
-    parse::perform_edit,
-};
+use crate::parse::perform_edit;

 const JSON_EXAMPLE: &str = r#"

--- a/cli/src/tests/parser_hang_test.rs
+++ b/cli/src/tests/parser_hang_test.rs
@ -7,11 +7,9 @@ use std::{
 };

 use tree_sitter::Parser;
+use tree_sitter_generate::{generate_parser_for_grammar, load_grammar_file};

-use crate::{
-    generate::{generate_parser_for_grammar, load_grammar_file},
-    tests::helpers::fixtures::{fixtures_dir, get_test_language},
-};
+use crate::tests::helpers::fixtures::{fixtures_dir, get_test_language};

 // The `sanitizing` cfg is required to don't run tests under specific sunitizer
 // because they don't work well with subprocesses _(it's an assumption)_.
--- a/cli/src/tests/parser_test.rs
+++ b/cli/src/tests/parser_test.rs
@ -4,6 +4,7 @@ use std::{
 };

 use tree_sitter::{IncludedRangesError, InputEdit, LogType, Parser, Point, Range};
+use tree_sitter_generate::{generate_parser_for_grammar, load_grammar_file};
 use tree_sitter_proc_macro::retry;

 use super::helpers::{
@ -13,7 +14,6 @@ use super::helpers::{
 };
 use crate::{
    fuzz::edits::Edit,
-    generate::{generate_parser_for_grammar, load_grammar_file},
    parse::perform_edit,
    tests::{helpers::fixtures::fixtures_dir, invert_edit},
 };
--- a/cli/src/tests/query_test.rs
+++ b/cli/src/tests/query_test.rs
@ -7,6 +7,7 @@ use tree_sitter::{
    CaptureQuantifier, Language, Node, Parser, Point, Query, QueryCursor, QueryError,
    QueryErrorKind, QueryPredicate, QueryPredicateArg, QueryProperty,
 };
+use tree_sitter_generate::generate_parser_for_grammar;
 use unindent::Unindent;

 use super::helpers::{
@ -14,12 +15,9 @@ use super::helpers::{
    fixtures::{get_language, get_test_language},
    query_helpers::{assert_query_matches, Match, Pattern},
 };
-use crate::{
-    generate::generate_parser_for_grammar,
-    tests::{
-        helpers::query_helpers::{collect_captures, collect_matches},
-        ITERATION_COUNT,
-    },
+use crate::tests::{
+    helpers::query_helpers::{collect_captures, collect_matches},
+    ITERATION_COUNT,
 };

 lazy_static! {
--- a/cli/src/wasm.rs
+++ b/cli/src/wasm.rs
@ -5,11 +5,10 @@ use std::{

 use anyhow::{anyhow, Context, Result};
 use tree_sitter::wasm_stdlib_symbols;
+use tree_sitter_generate::parse_grammar::GrammarJSON;
 use tree_sitter_loader::Loader;
 use wasmparser::Parser;

-use super::generate::parse_grammar::GrammarJSON;
-
 pub fn load_language_wasm_file(language_dir: &Path) -> Result<(String, Vec<u8>)> {
    let grammar_name = get_grammar_name(language_dir)
        .with_context(|| "Failed to get wasm filename")
				`@ -1 +0,0 @@`
				{"Other":"C","Control":"Cc","cntrl":"Cc","Format":"Cf","Unassigned":"Cn","Private_Use":"Co","Surrogate":"Cs","Letter":"L","Cased_Letter":"LC","Lowercase_Letter":"Ll","Modifier_Letter":"Lm","Other_Letter":"Lo","Titlecase_Letter":"Lt","Uppercase_Letter":"Lu","Mark":"M","Combining_Mark":"M","Spacing_Mark":"Mc","Enclosing_Mark":"Me","Nonspacing_Mark":"Mn","Number":"N","Decimal_Number":"Nd","digit":"Nd","Letter_Number":"Nl","Other_Number":"No","Punctuation":"P","punct":"P","Connector_Punctuation":"Pc","Dash_Punctuation":"Pd","Close_Punctuation":"Pe","Final_Punctuation":"Pf","Initial_Punctuation":"Pi","Other_Punctuation":"Po","Open_Punctuation":"Ps","Symbol":"S","Currency_Symbol":"Sc","Modifier_Symbol":"Sk","Math_Symbol":"Sm","Other_Symbol":"So","Separator":"Z","Line_Separator":"Zl","Paragraph_Separator":"Zp","Space_Separator":"Zs"}
				`@ -1 +0,0 @@`
				{"cjkAccountingNumeric":"kAccountingNumeric","cjkOtherNumeric":"kOtherNumeric","cjkPrimaryNumeric":"kPrimaryNumeric","nv":"Numeric_Value","bmg":"Bidi_Mirroring_Glyph","bpb":"Bidi_Paired_Bracket","cf":"Case_Folding","cjkCompatibilityVariant":"kCompatibilityVariant","dm":"Decomposition_Mapping","EqUIdeo":"Equivalent_Unified_Ideograph","FC_NFKC":"FC_NFKC_Closure","lc":"Lowercase_Mapping","NFKC_CF":"NFKC_Casefold","NFKC_SCF":"NFKC_Simple_Casefold","scf":"Simple_Case_Folding","sfc":"Simple_Case_Folding","slc":"Simple_Lowercase_Mapping","stc":"Simple_Titlecase_Mapping","suc":"Simple_Uppercase_Mapping","tc":"Titlecase_Mapping","uc":"Uppercase_Mapping","cjkIICore":"kIICore","cjkIRG_GSource":"kIRG_GSource","cjkIRG_HSource":"kIRG_HSource","cjkIRG_JSource":"kIRG_JSource","cjkIRG_KPSource":"kIRG_KPSource","cjkIRG_KSource":"kIRG_KSource","cjkIRG_MSource":"kIRG_MSource","cjkIRG_SSource":"kIRG_SSource","cjkIRG_TSource":"kIRG_TSource","cjkIRG_UKSource":"kIRG_UKSource","cjkIRG_USource":"kIRG_USource","cjkIRG_VSource":"kIRG_VSource","cjkRSUnicode":"kRSUnicode","Unicode_Radical_Stroke":"kRSUnicode","URS":"kRSUnicode","isc":"ISO_Comment","JSN":"Jamo_Short_Name","na":"Name","na1":"Unicode_1_Name","Name_Alias":"Name_Alias","scx":"Script_Extensions","age":"Age","blk":"Block","sc":"Script","bc":"Bidi_Class","bpt":"Bidi_Paired_Bracket_Type","ccc":"Canonical_Combining_Class","dt":"Decomposition_Type","ea":"East_Asian_Width","gc":"General_Category","GCB":"Grapheme_Cluster_Break","hst":"Hangul_Syllable_Type","InCB":"Indic_Conjunct_Break","InPC":"Indic_Positional_Category","InSC":"Indic_Syllabic_Category","jg":"Joining_Group","jt":"Joining_Type","lb":"Line_Break","NFC_QC":"NFC_Quick_Check","NFD_QC":"NFD_Quick_Check","NFKC_QC":"NFKC_Quick_Check","NFKD_QC":"NFKD_Quick_Check","nt":"Numeric_Type","SB":"Sentence_Break","vo":"Vertical_Orientation","WB":"Word_Break","AHex":"ASCII_Hex_Digit","Alpha":"Alphabetic","Bidi_C":"Bidi_Control","Bidi_M":"Bidi_Mirrored","Cased":"Cased","CE":"Composition_Exclusion","CI":"Case_Ignorable","Comp_Ex":"Full_Composition_Exclusion","CWCF":"Changes_When_Casefolded","CWCM":"Changes_When_Casemapped","CWKCF":"Changes_When_NFKC_Casefolded","CWL":"Changes_When_Lowercased","CWT":"Changes_When_Titlecased","CWU":"Changes_When_Uppercased","Dash":"Dash","Dep":"Deprecated","DI":"Default_Ignorable_Code_Point","Dia":"Diacritic","EBase":"Emoji_Modifier_Base","EComp":"Emoji_Component","EMod":"Emoji_Modifier","Emoji":"Emoji","EPres":"Emoji_Presentation","Ext":"Extender","ExtPict":"Extended_Pictographic","Gr_Base":"Grapheme_Base","Gr_Ext":"Grapheme_Extend","Gr_Link":"Grapheme_Link","Hex":"Hex_Digit","Hyphen":"Hyphen","ID_Compat_Math_Continue":"ID_Compat_Math_Continue","ID_Compat_Math_Start":"ID_Compat_Math_Start","IDC":"ID_Continue","Ideo":"Ideographic","IDS":"ID_Start","IDSB":"IDS_Binary_Operator","IDST":"IDS_Trinary_Operator","IDSU":"IDS_Unary_Operator","Join_C":"Join_Control","LOE":"Logical_Order_Exception","Lower":"Lowercase","Math":"Math","NChar":"Noncharacter_Code_Point","OAlpha":"Other_Alphabetic","ODI":"Other_Default_Ignorable_Code_Point","OGr_Ext":"Other_Grapheme_Extend","OIDC":"Other_ID_Continue","OIDS":"Other_ID_Start","OLower":"Other_Lowercase","OMath":"Other_Math","OUpper":"Other_Uppercase","Pat_Syn":"Pattern_Syntax","Pat_WS":"Pattern_White_Space","PCM":"Prepended_Concatenation_Mark","QMark":"Quotation_Mark","Radical":"Radical","RI":"Regional_Indicator","SD":"Soft_Dotted","STerm":"Sentence_Terminal","Term":"Terminal_Punctuation","UIdeo":"Unified_Ideograph","Upper":"Uppercase","VS":"Variation_Selector","WSpace":"White_Space","space":"White_Space","XIDC":"XID_Continue","XIDS":"XID_Start","XO_NFC":"Expands_On_NFC","XO_NFD":"Expands_On_NFD","XO_NFKC":"Expands_On_NFKC","XO_NFKD":"Expands_On_NFKD"}