Move code into cli directory

2019-01-04 16:50:52 -08:00 · 2019-01-04 16:50:52 -08:00 · 5b0e12ea33
commit 5b0e12ea33
parent b8dd5d2640
29 changed files with 32 additions and 26 deletions
--- a/cli/src/build_tables/build_lex_table.rs
+++ b/cli/src/build_tables/build_lex_table.rs
@ -0,0 +1,278 @@
+use super::item::LookaheadSet;
+use super::token_conflicts::TokenConflictMap;
+use crate::grammars::{LexicalGrammar, SyntaxGrammar};
+use crate::nfa::{CharacterSet, NfaCursor, NfaTransition};
+use crate::rules::Symbol;
+use crate::tables::{AdvanceAction, LexState, LexTable, ParseTable};
+use std::collections::hash_map::Entry;
+use std::collections::{BTreeMap, HashMap, VecDeque};
+
+pub(crate) fn build_lex_table(
+    parse_table: &mut ParseTable,
+    syntax_grammar: &SyntaxGrammar,
+    lexical_grammar: &LexicalGrammar,
+    keywords: &LookaheadSet,
+    minimize: bool,
+) -> (LexTable, LexTable) {
+    let keyword_lex_table;
+    if syntax_grammar.word_token.is_some() {
+        let mut builder = LexTableBuilder::new(lexical_grammar);
+        builder.add_state_for_tokens(keywords);
+        keyword_lex_table = builder.table;
+    } else {
+        keyword_lex_table = LexTable::default();
+    }
+
+    let mut builder = LexTableBuilder::new(lexical_grammar);
+    for state in parse_table.states.iter_mut() {
+        let tokens = LookaheadSet::with(state.terminal_entries.keys().filter_map(|token| {
+            if token.is_terminal() {
+                if keywords.contains(&token) {
+                    syntax_grammar.word_token
+                } else {
+                    Some(*token)
+                }
+            } else if token.is_eof() {
+                Some(*token)
+            } else {
+                None
+            }
+        }));
+        state.lex_state_id = builder.add_state_for_tokens(&tokens);
+    }
+
+    let mut table = builder.table;
+
+    if minimize {
+        minimize_lex_table(&mut table, parse_table);
+    }
+
+    (table, keyword_lex_table)
+}
+
+struct QueueEntry {
+    state_id: usize,
+    nfa_states: Vec<u32>,
+    eof_valid: bool,
+}
+
+struct LexTableBuilder<'a> {
+    lexical_grammar: &'a LexicalGrammar,
+    cursor: NfaCursor<'a>,
+    table: LexTable,
+    state_queue: VecDeque<QueueEntry>,
+    state_ids_by_nfa_state_set: HashMap<(Vec<u32>, bool), usize>,
+}
+
+impl<'a> LexTableBuilder<'a> {
+    fn new(lexical_grammar: &'a LexicalGrammar) -> Self {
+        Self {
+            lexical_grammar,
+            cursor: NfaCursor::new(&lexical_grammar.nfa, vec![]),
+            table: LexTable::default(),
+            state_queue: VecDeque::new(),
+            state_ids_by_nfa_state_set: HashMap::new(),
+        }
+    }
+
+    fn add_state_for_tokens(&mut self, tokens: &LookaheadSet) -> usize {
+        let mut eof_valid = false;
+        let nfa_states = tokens
+            .iter()
+            .filter_map(|token| {
+                if token.is_terminal() {
+                    Some(self.lexical_grammar.variables[token.index].start_state)
+                } else {
+                    eof_valid = true;
+                    None
+                }
+            })
+            .collect();
+        let (state_id, is_new) = self.add_state(nfa_states, eof_valid);
+
+        if is_new {
+            info!(
+                "entry point state: {}, tokens: {:?}",
+                state_id,
+                tokens
+                    .iter()
+                    .map(|t| &self.lexical_grammar.variables[t.index].name)
+                    .collect::<Vec<_>>()
+            );
+        }
+
+        while let Some(QueueEntry {
+            state_id,
+            nfa_states,
+            eof_valid,
+        }) = self.state_queue.pop_front()
+        {
+            self.populate_state(state_id, nfa_states, eof_valid);
+        }
+        state_id
+    }
+
+    fn add_state(&mut self, nfa_states: Vec<u32>, eof_valid: bool) -> (usize, bool) {
+        self.cursor.reset(nfa_states);
+        match self
+            .state_ids_by_nfa_state_set
+            .entry((self.cursor.state_ids.clone(), eof_valid))
+        {
+            Entry::Occupied(o) => (*o.get(), false),
+            Entry::Vacant(v) => {
+                let state_id = self.table.states.len();
+                self.table.states.push(LexState::default());
+                self.state_queue.push_back(QueueEntry {
+                    state_id,
+                    nfa_states: v.key().0.clone(),
+                    eof_valid,
+                });
+                v.insert(state_id);
+                (state_id, true)
+            }
+        }
+    }
+
+    fn populate_state(&mut self, state_id: usize, nfa_states: Vec<u32>, eof_valid: bool) {
+        self.cursor.force_reset(nfa_states);
+
+        // The EOF state is represented as an empty list of NFA states.
+        let mut completion = None;
+        for (id, prec) in self.cursor.completions() {
+            if let Some((prev_id, prev_precedence)) = completion {
+                if TokenConflictMap::prefer_token(
+                    self.lexical_grammar,
+                    (prev_precedence, prev_id),
+                    (prec, id),
+                ) {
+                    continue;
+                }
+            }
+            completion = Some((id, prec));
+        }
+
+        info!(
+            "lex state: {}, completion: {:?}",
+            state_id,
+            completion.map(|(id, prec)| (&self.lexical_grammar.variables[id].name, prec))
+        );
+
+        let transitions = self.cursor.transitions();
+        info!("lex state: {}, transitions: {:?}", state_id, transitions);
+
+        // If EOF is a valid lookahead token, add a transition predicated on the null
+        // character that leads to the empty set of NFA states.
+        if eof_valid {
+            let (next_state_id, _) = self.add_state(Vec::new(), false);
+            info!("lex state: {}, successor: EOF", state_id);
+            self.table.states[state_id].advance_actions.push((
+                CharacterSet::empty().add_char('\0'),
+                AdvanceAction {
+                    state: Some(next_state_id),
+                    in_main_token: true,
+                },
+            ));
+        }
+
+        for NfaTransition {
+            characters,
+            precedence,
+            states,
+            is_separator,
+        } in transitions
+        {
+            if let Some((_, completed_precedence)) = completion {
+                if precedence < completed_precedence
+                    || (precedence == completed_precedence && is_separator)
+                {
+                    continue;
+                }
+            }
+            let (next_state_id, _) = self.add_state(states, eof_valid && is_separator);
+            let next_state = if next_state_id == state_id {
+                None
+            } else {
+                Some(next_state_id)
+            };
+            self.table.states[state_id].advance_actions.push((
+                characters,
+                AdvanceAction {
+                    state: next_state,
+                    in_main_token: !is_separator,
+                },
+            ));
+        }
+
+        if let Some((complete_id, _)) = completion {
+            self.table.states[state_id].accept_action = Some(Symbol::terminal(complete_id));
+        } else if self.cursor.state_ids.is_empty() {
+            self.table.states[state_id].accept_action = Some(Symbol::end());
+        }
+    }
+}
+
+fn minimize_lex_table(table: &mut LexTable, parse_table: &mut ParseTable) {
+    let mut state_replacements = BTreeMap::new();
+    let mut done = false;
+    while !done {
+        done = true;
+        for (i, state_i) in table.states.iter().enumerate() {
+            if state_replacements.contains_key(&i) {
+                continue;
+            }
+            for (j, state_j) in table.states.iter().enumerate() {
+                if j == i {
+                    break;
+                }
+                if state_replacements.contains_key(&j) {
+                    continue;
+                }
+                if state_i == state_j {
+                    info!("replace state {} with state {}", i, j);
+                    state_replacements.insert(i, j);
+                    done = false;
+                    break;
+                }
+            }
+        }
+        for state in table.states.iter_mut() {
+            for (_, advance_action) in state.advance_actions.iter_mut() {
+                advance_action.state = advance_action
+                    .state
+                    .map(|s| state_replacements.get(&s).cloned().unwrap_or(s))
+            }
+        }
+    }
+
+    let final_state_replacements = (0..table.states.len())
+        .into_iter()
+        .map(|state_id| {
+            let replacement = state_replacements
+                .get(&state_id)
+                .cloned()
+                .unwrap_or(state_id);
+            let prior_removed = state_replacements
+                .iter()
+                .take_while(|i| *i.0 < replacement)
+                .count();
+            replacement - prior_removed
+        })
+        .collect::<Vec<_>>();
+
+    for state in parse_table.states.iter_mut() {
+        state.lex_state_id = final_state_replacements[state.lex_state_id];
+    }
+
+    for state in table.states.iter_mut() {
+        for (_, advance_action) in state.advance_actions.iter_mut() {
+            advance_action.state = advance_action.state.map(|s| final_state_replacements[s]);
+        }
+    }
+
+    let mut i = 0;
+    table.states.retain(|_| {
+        let result = !state_replacements.contains_key(&i);
+        i += 1;
+        result
+    });
+}
--- a/cli/src/build_tables/build_parse_table.rs
+++ b/cli/src/build_tables/build_parse_table.rs
@ -0,0 +1,735 @@
+use super::item::{LookaheadSet, ParseItem, ParseItemSet};
+use super::item_set_builder::ParseItemSetBuilder;
+use crate::error::{Error, Result};
+use crate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar, VariableType};
+use crate::rules::{Alias, Associativity, Symbol, SymbolType};
+use crate::tables::{
+    AliasSequenceId, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry,
+};
+use core::ops::Range;
+use hashbrown::hash_map::Entry;
+use hashbrown::{HashMap, HashSet};
+use std::collections::hash_map::DefaultHasher;
+use std::collections::VecDeque;
+
+use std::fmt::Write;
+use std::hash::Hasher;
+
+#[derive(Clone)]
+struct AuxiliarySymbolInfo {
+    auxiliary_symbol: Symbol,
+    parent_symbols: Vec<Symbol>,
+}
+
+type SymbolSequence = Vec<Symbol>;
+type AuxiliarySymbolSequence = Vec<AuxiliarySymbolInfo>;
+
+struct ParseStateQueueEntry {
+    preceding_symbols: SymbolSequence,
+    preceding_auxiliary_symbols: AuxiliarySymbolSequence,
+    state_id: ParseStateId,
+}
+
+struct ParseTableBuilder<'a> {
+    item_set_builder: ParseItemSetBuilder<'a>,
+    syntax_grammar: &'a SyntaxGrammar,
+    lexical_grammar: &'a LexicalGrammar,
+    state_ids_by_item_set: HashMap<ParseItemSet<'a>, ParseStateId>,
+    item_sets_by_state_id: Vec<ParseItemSet<'a>>,
+    parse_state_queue: VecDeque<ParseStateQueueEntry>,
+    parse_table: ParseTable,
+    following_tokens: Vec<LookaheadSet>,
+    state_ids_to_log: Vec<ParseStateId>,
+}
+
+impl<'a> ParseTableBuilder<'a> {
+    fn build(mut self) -> Result<(ParseTable, Vec<LookaheadSet>)> {
+        // Ensure that the empty alias sequence has index 0.
+        self.parse_table.alias_sequences.push(Vec::new());
+
+        // Add the error state at index 0.
+        self.add_parse_state(&Vec::new(), &Vec::new(), ParseItemSet::default());
+
+        // Add the starting state at index 1.
+        self.add_parse_state(
+            &Vec::new(),
+            &Vec::new(),
+            ParseItemSet::with(
+                [(
+                    ParseItem::start(),
+                    LookaheadSet::with([Symbol::end()].iter().cloned()),
+                )]
+                .iter()
+                .cloned(),
+            ),
+        );
+
+        while let Some(entry) = self.parse_state_queue.pop_front() {
+            let item_set = self
+                .item_set_builder
+                .transitive_closure(&self.item_sets_by_state_id[entry.state_id]);
+
+            if self.state_ids_to_log.contains(&entry.state_id) {
+                eprintln!(
+                    "state: {}\n\ninitial item set:\n\n{}closed item set:\n\n{}",
+                    entry.state_id,
+                    super::item::ParseItemSetDisplay(
+                        &self.item_sets_by_state_id[entry.state_id],
+                        self.syntax_grammar,
+                        self.lexical_grammar,
+                    ),
+                    super::item::ParseItemSetDisplay(
+                        &item_set,
+                        self.syntax_grammar,
+                        self.lexical_grammar,
+                    )
+                );
+            }
+
+            self.add_actions(
+                entry.preceding_symbols,
+                entry.preceding_auxiliary_symbols,
+                entry.state_id,
+                item_set,
+            )?;
+        }
+
+        self.populate_used_symbols();
+        self.remove_precedences();
+
+        Ok((self.parse_table, self.following_tokens))
+    }
+
+    fn add_parse_state(
+        &mut self,
+        preceding_symbols: &SymbolSequence,
+        preceding_auxiliary_symbols: &AuxiliarySymbolSequence,
+        item_set: ParseItemSet<'a>,
+    ) -> ParseStateId {
+        if preceding_symbols.len() > 1 {
+            let left_tokens = self
+                .item_set_builder
+                .last_set(&preceding_symbols[preceding_symbols.len() - 2]);
+            let right_tokens = self
+                .item_set_builder
+                .first_set(&preceding_symbols[preceding_symbols.len() - 1]);
+            for left_token in left_tokens.iter() {
+                if left_token.is_terminal() {
+                    self.following_tokens[left_token.index].insert_all(right_tokens);
+                }
+            }
+        }
+
+        let mut hasher = DefaultHasher::new();
+        item_set.hash_unfinished_items(&mut hasher);
+        let unfinished_item_signature = hasher.finish();
+
+        match self.state_ids_by_item_set.entry(item_set) {
+            Entry::Occupied(o) => *o.get(),
+            Entry::Vacant(v) => {
+                let state_id = self.parse_table.states.len();
+                self.item_sets_by_state_id.push(v.key().clone());
+                self.parse_table.states.push(ParseState {
+                    lex_state_id: 0,
+                    terminal_entries: HashMap::new(),
+                    nonterminal_entries: HashMap::new(),
+                    unfinished_item_signature,
+                });
+                self.parse_state_queue.push_back(ParseStateQueueEntry {
+                    state_id,
+                    preceding_symbols: preceding_symbols.clone(),
+                    preceding_auxiliary_symbols: preceding_auxiliary_symbols.clone(),
+                });
+                v.insert(state_id);
+                state_id
+            }
+        }
+    }
+
+    fn add_actions(
+        &mut self,
+        mut preceding_symbols: SymbolSequence,
+        mut preceding_auxiliary_symbols: Vec<AuxiliarySymbolInfo>,
+        state_id: ParseStateId,
+        item_set: ParseItemSet<'a>,
+    ) -> Result<()> {
+        let mut terminal_successors = HashMap::new();
+        let mut non_terminal_successors = HashMap::new();
+        let mut lookaheads_with_conflicts = HashSet::new();
+
+        for (item, lookaheads) in &item_set.entries {
+            if let Some(next_symbol) = item.symbol() {
+                let successor = item.successor();
+                if next_symbol.is_non_terminal() {
+                    // Keep track of where auxiliary non-terminals (repeat symbols) are
+                    // used within visible symbols. This information may be needed later
+                    // for conflict resolution.
+                    if self.syntax_grammar.variables[next_symbol.index].is_auxiliary() {
+                        preceding_auxiliary_symbols
+                            .push(self.get_auxiliary_node_info(&item_set, next_symbol));
+                    }
+
+                    non_terminal_successors
+                        .entry(next_symbol)
+                        .or_insert_with(|| ParseItemSet::default())
+                        .entries
+                        .entry(successor)
+                        .or_insert_with(|| LookaheadSet::new())
+                        .insert_all(lookaheads);
+                } else {
+                    terminal_successors
+                        .entry(next_symbol)
+                        .or_insert_with(|| ParseItemSet::default())
+                        .entries
+                        .entry(successor)
+                        .or_insert_with(|| LookaheadSet::new())
+                        .insert_all(lookaheads);
+                }
+            } else {
+                let action = if item.is_augmented() {
+                    ParseAction::Accept
+                } else {
+                    ParseAction::Reduce {
+                        symbol: Symbol::non_terminal(item.variable_index as usize),
+                        child_count: item.step_index as usize,
+                        precedence: item.precedence(),
+                        associativity: item.associativity(),
+                        dynamic_precedence: item.production.dynamic_precedence,
+                        alias_sequence_id: self.get_alias_sequence_id(item),
+                    }
+                };
+
+                for lookahead in lookaheads.iter() {
+                    let entry = self.parse_table.states[state_id]
+                        .terminal_entries
+                        .entry(lookahead);
+                    let entry = entry.or_insert_with(|| ParseTableEntry::new());
+                    if entry.actions.is_empty() {
+                        entry.actions.push(action);
+                    } else if action.precedence() > entry.actions[0].precedence() {
+                        entry.actions.clear();
+                        entry.actions.push(action);
+                        lookaheads_with_conflicts.remove(&lookahead);
+                    } else if action.precedence() == entry.actions[0].precedence() {
+                        entry.actions.push(action);
+                        lookaheads_with_conflicts.insert(lookahead);
+                    }
+                }
+            }
+        }
+
+        for (symbol, next_item_set) in terminal_successors {
+            preceding_symbols.push(symbol);
+            let next_state_id = self.add_parse_state(
+                &preceding_symbols,
+                &preceding_auxiliary_symbols,
+                next_item_set,
+            );
+            preceding_symbols.pop();
+
+            let entry = self.parse_table.states[state_id]
+                .terminal_entries
+                .entry(symbol);
+            if let Entry::Occupied(e) = &entry {
+                if !e.get().actions.is_empty() {
+                    lookaheads_with_conflicts.insert(symbol);
+                }
+            }
+
+            entry
+                .or_insert_with(|| ParseTableEntry::new())
+                .actions
+                .push(ParseAction::Shift {
+                    state: next_state_id,
+                    is_repetition: false,
+                });
+        }
+
+        for (symbol, next_item_set) in non_terminal_successors {
+            preceding_symbols.push(symbol);
+            let next_state_id = self.add_parse_state(
+                &preceding_symbols,
+                &preceding_auxiliary_symbols,
+                next_item_set,
+            );
+            preceding_symbols.pop();
+            self.parse_table.states[state_id]
+                .nonterminal_entries
+                .insert(symbol, next_state_id);
+        }
+
+        for symbol in lookaheads_with_conflicts {
+            self.handle_conflict(
+                &item_set,
+                state_id,
+                &preceding_symbols,
+                &preceding_auxiliary_symbols,
+                symbol,
+            )?;
+        }
+
+        let state = &mut self.parse_table.states[state_id];
+        for extra_token in &self.syntax_grammar.extra_tokens {
+            state
+                .terminal_entries
+                .entry(*extra_token)
+                .or_insert(ParseTableEntry {
+                    reusable: true,
+                    actions: vec![ParseAction::ShiftExtra],
+                });
+        }
+
+        Ok(())
+    }
+
+    fn handle_conflict(
+        &mut self,
+        item_set: &ParseItemSet,
+        state_id: ParseStateId,
+        preceding_symbols: &SymbolSequence,
+        preceding_auxiliary_symbols: &Vec<AuxiliarySymbolInfo>,
+        conflicting_lookahead: Symbol,
+    ) -> Result<()> {
+        let entry = self.parse_table.states[state_id]
+            .terminal_entries
+            .get_mut(&conflicting_lookahead)
+            .unwrap();
+
+        // Determine which items in the set conflict with each other, and the
+        // precedences associated with SHIFT vs REDUCE actions. There won't
+        // be multiple REDUCE actions with different precedences; that is
+        // sorted out ahead of time in `add_actions`. But there can still be
+        // REDUCE-REDUCE conflicts where all actions have the *same*
+        // precedence, and there can still be SHIFT/REDUCE conflicts.
+        let reduce_precedence = entry.actions[0].precedence();
+        let mut considered_associativity = false;
+        let mut shift_precedence: Option<Range<i32>> = None;
+        let mut conflicting_items = HashSet::new();
+        for (item, lookaheads) in &item_set.entries {
+            if let Some(step) = item.step() {
+                if item.step_index > 0 {
+                    if self
+                        .item_set_builder
+                        .first_set(&step.symbol)
+                        .contains(&conflicting_lookahead)
+                    {
+                        conflicting_items.insert(item);
+                        let precedence = item.precedence();
+                        if let Some(range) = &mut shift_precedence {
+                            if precedence < range.start {
+                                range.start = precedence;
+                            } else if precedence > range.end {
+                                range.end = precedence;
+                            }
+                        } else {
+                            shift_precedence = Some(precedence..precedence);
+                        }
+                    }
+                }
+            } else if lookaheads.contains(&conflicting_lookahead) {
+                conflicting_items.insert(item);
+            }
+        }
+
+        if let ParseAction::Shift { is_repetition, .. } = entry.actions.last_mut().unwrap() {
+            let shift_precedence = shift_precedence.unwrap_or(0..0);
+
+            // If all of the items in the conflict have the same parent symbol,
+            // and that parent symbols is auxiliary, then this is just the intentional
+            // ambiguity associated with a repeat rule. Resolve that class of ambiguity
+            // by leaving it in the parse table, but marking the SHIFT action with
+            // an `is_repetition` flag.
+            let conflicting_variable_index =
+                conflicting_items.iter().next().unwrap().variable_index;
+            if self.syntax_grammar.variables[conflicting_variable_index as usize].is_auxiliary() {
+                if conflicting_items
+                    .iter()
+                    .all(|item| item.variable_index == conflicting_variable_index)
+                {
+                    *is_repetition = true;
+                    return Ok(());
+                }
+            }
+
+            // If the SHIFT action has higher precedence, remove all the REDUCE actions.
+            if shift_precedence.start > reduce_precedence
+                || (shift_precedence.start == reduce_precedence
+                    && shift_precedence.end > reduce_precedence)
+            {
+                entry.actions.drain(0..entry.actions.len() - 1);
+            }
+            // If the REDUCE actions have higher precedence, remove the SHIFT action.
+            else if shift_precedence.end < reduce_precedence
+                || (shift_precedence.end == reduce_precedence
+                    && shift_precedence.start < reduce_precedence)
+            {
+                entry.actions.pop();
+                conflicting_items.retain(|item| item.is_done());
+            }
+            // If the SHIFT and REDUCE actions have the same predence, consider
+            // the REDUCE actions' associativity.
+            else if shift_precedence == (reduce_precedence..reduce_precedence) {
+                considered_associativity = true;
+                let mut has_left = false;
+                let mut has_right = false;
+                let mut has_non = false;
+                for action in &entry.actions {
+                    if let ParseAction::Reduce { associativity, .. } = action {
+                        match associativity {
+                            Some(Associativity::Left) => has_left = true,
+                            Some(Associativity::Right) => has_right = true,
+                            None => has_non = true,
+                        }
+                    }
+                }
+
+                // If all reduce actions are left associative, remove the SHIFT action.
+                // If all reduce actions are right associative, remove the REDUCE actions.
+                match (has_left, has_non, has_right) {
+                    (true, false, false) => {
+                        entry.actions.pop();
+                        conflicting_items.retain(|item| item.is_done());
+                    }
+                    (false, false, true) => {
+                        entry.actions.drain(0..entry.actions.len() - 1);
+                    }
+                    _ => {}
+                }
+            }
+        }
+
+        // If all of the actions but one have been eliminated, then there's no problem.
+        let entry = self.parse_table.states[state_id]
+            .terminal_entries
+            .get_mut(&conflicting_lookahead)
+            .unwrap();
+        if entry.actions.len() == 1 {
+            return Ok(());
+        }
+
+        // Determine the set of parent symbols involved in this conflict.
+        let mut actual_conflict = Vec::new();
+        for item in &conflicting_items {
+            let symbol = Symbol::non_terminal(item.variable_index as usize);
+            if self.syntax_grammar.variables[symbol.index].is_auxiliary() {
+                actual_conflict.extend(
+                    preceding_auxiliary_symbols
+                        .iter()
+                        .rev()
+                        .find_map(|info| {
+                            if info.auxiliary_symbol == symbol {
+                                Some(&info.parent_symbols)
+                            } else {
+                                None
+                            }
+                        })
+                        .unwrap()
+                        .iter(),
+                );
+            } else {
+                actual_conflict.push(symbol);
+            }
+        }
+        actual_conflict.sort_unstable();
+        actual_conflict.dedup();
+
+        // If this set of symbols has been whitelisted, then there's no error.
+        if self
+            .syntax_grammar
+            .expected_conflicts
+            .contains(&actual_conflict)
+        {
+            return Ok(());
+        }
+
+        let mut msg = "Unresolved conflict for symbol sequence:\n\n".to_string();
+        for symbol in preceding_symbols {
+            write!(&mut msg, "  {}", self.symbol_name(symbol)).unwrap();
+        }
+
+        write!(
+            &mut msg,
+            "  •  {}  …\n\n",
+            self.symbol_name(&conflicting_lookahead)
+        )
+        .unwrap();
+        write!(&mut msg, "Possible interpretations:\n\n").unwrap();
+        for (i, item) in conflicting_items.iter().enumerate() {
+            write!(&mut msg, "  {}:", i + 1).unwrap();
+
+            for preceding_symbol in preceding_symbols
+                .iter()
+                .take(preceding_symbols.len() - item.step_index as usize)
+            {
+                write!(&mut msg, "  {}", self.symbol_name(preceding_symbol)).unwrap();
+            }
+
+            write!(
+                &mut msg,
+                "  ({}",
+                &self.syntax_grammar.variables[item.variable_index as usize].name
+            )
+            .unwrap();
+
+            for (j, step) in item.production.steps.iter().enumerate() {
+                if j as u32 == item.step_index {
+                    write!(&mut msg, "  •").unwrap();
+                }
+                write!(&mut msg, "  {}", self.symbol_name(&step.symbol)).unwrap();
+            }
+
+            write!(&mut msg, ")").unwrap();
+
+            if item.is_done() {
+                write!(
+                    &mut msg,
+                    "  •  {}",
+                    self.symbol_name(&conflicting_lookahead)
+                )
+                .unwrap();
+            }
+
+            let precedence = item.precedence();
+            let associativity = item.associativity();
+            if precedence != 0 || associativity.is_some() {
+                write!(
+                    &mut msg,
+                    "(precedence: {}, associativity: {:?})",
+                    precedence, associativity
+                )
+                .unwrap();
+            }
+
+            write!(&mut msg, "\n").unwrap();
+        }
+
+        let mut resolution_count = 0;
+        write!(&mut msg, "\nPossible resolutions:\n\n").unwrap();
+        let shift_items = conflicting_items
+            .iter()
+            .filter(|i| !i.is_done())
+            .cloned()
+            .collect::<Vec<_>>();
+        if shift_items.len() > 0 {
+            resolution_count += 1;
+            write!(
+                &mut msg,
+                "  {}:  Specify a higher precedence in",
+                resolution_count
+            )
+            .unwrap();
+            for (i, item) in shift_items.iter().enumerate() {
+                if i > 0 {
+                    write!(&mut msg, "  and").unwrap();
+                }
+                write!(
+                    &mut msg,
+                    " `{}`",
+                    self.symbol_name(&Symbol::non_terminal(item.variable_index as usize))
+                )
+                .unwrap();
+            }
+            write!(&mut msg, " than in the other rules.\n").unwrap();
+        }
+
+        if considered_associativity {
+            resolution_count += 1;
+            write!(
+                &mut msg,
+                "  {}:  Specify a left or right associativity in ",
+                resolution_count
+            )
+            .unwrap();
+            for (i, item) in conflicting_items.iter().filter(|i| i.is_done()).enumerate() {
+                if i > 0 {
+                    write!(&mut msg, " and ").unwrap();
+                }
+                write!(
+                    &mut msg,
+                    "{}",
+                    self.symbol_name(&Symbol::non_terminal(item.variable_index as usize))
+                )
+                .unwrap();
+            }
+            write!(&mut msg, "\n").unwrap();
+        }
+
+        for item in &conflicting_items {
+            if item.is_done() {
+                resolution_count += 1;
+                write!(
+                    &mut msg,
+                    "  {}:  Specify a higher precedence in `{}` than in the other rules.\n",
+                    resolution_count,
+                    self.symbol_name(&Symbol::non_terminal(item.variable_index as usize))
+                )
+                .unwrap();
+            }
+        }
+
+        resolution_count += 1;
+        write!(
+            &mut msg,
+            "  {}:  Add a conflict for these rules: ",
+            resolution_count
+        )
+        .unwrap();
+        for (i, symbol) in actual_conflict.iter().enumerate() {
+            if i > 0 {
+                write!(&mut msg, ", ").unwrap();
+            }
+            write!(&mut msg, "{}", self.symbol_name(symbol)).unwrap();
+        }
+        write!(&mut msg, "\n").unwrap();
+
+        Err(Error(msg))
+    }
+
+    fn get_auxiliary_node_info(
+        &self,
+        item_set: &ParseItemSet,
+        symbol: Symbol,
+    ) -> AuxiliarySymbolInfo {
+        let parent_symbols = item_set
+            .entries
+            .keys()
+            .filter_map(|item| {
+                let variable_index = item.variable_index as usize;
+                if item.symbol() == Some(symbol)
+                    && !self.syntax_grammar.variables[variable_index].is_auxiliary()
+                {
+                    Some(Symbol::non_terminal(variable_index))
+                } else {
+                    None
+                }
+            })
+            .collect();
+        AuxiliarySymbolInfo {
+            auxiliary_symbol: symbol,
+            parent_symbols,
+        }
+    }
+
+    fn populate_used_symbols(&mut self) {
+        let mut terminal_usages = vec![false; self.lexical_grammar.variables.len()];
+        let mut non_terminal_usages = vec![false; self.syntax_grammar.variables.len()];
+        let mut external_usages = vec![false; self.syntax_grammar.external_tokens.len()];
+        for state in &self.parse_table.states {
+            for symbol in state.terminal_entries.keys() {
+                match symbol.kind {
+                    SymbolType::Terminal => terminal_usages[symbol.index] = true,
+                    SymbolType::External => external_usages[symbol.index] = true,
+                    _ => {}
+                }
+            }
+            for symbol in state.nonterminal_entries.keys() {
+                non_terminal_usages[symbol.index] = true;
+            }
+        }
+        for (i, value) in external_usages.into_iter().enumerate() {
+            if value {
+                self.parse_table.symbols.push(Symbol::external(i));
+            }
+        }
+        self.parse_table.symbols.push(Symbol::end());
+        for (i, value) in terminal_usages.into_iter().enumerate() {
+            if value {
+                self.parse_table.symbols.push(Symbol::terminal(i));
+            }
+        }
+        for (i, value) in non_terminal_usages.into_iter().enumerate() {
+            if value {
+                self.parse_table.symbols.push(Symbol::non_terminal(i));
+            }
+        }
+    }
+
+    fn remove_precedences(&mut self) {
+        for state in self.parse_table.states.iter_mut() {
+            for (_, entry) in state.terminal_entries.iter_mut() {
+                for action in entry.actions.iter_mut() {
+                    match action {
+                        ParseAction::Reduce {
+                            precedence,
+                            associativity,
+                            ..
+                        } => {
+                            *precedence = 0;
+                            *associativity = None;
+                        }
+                        _ => {}
+                    }
+                }
+            }
+        }
+    }
+
+    fn get_alias_sequence_id(&mut self, item: &ParseItem) -> AliasSequenceId {
+        let mut alias_sequence: Vec<Option<Alias>> = item
+            .production
+            .steps
+            .iter()
+            .map(|s| s.alias.clone())
+            .collect();
+        while alias_sequence.last() == Some(&None) {
+            alias_sequence.pop();
+        }
+        if item.production.steps.len() > self.parse_table.max_aliased_production_length {
+            self.parse_table.max_aliased_production_length = item.production.steps.len()
+        }
+        if let Some(index) = self
+            .parse_table
+            .alias_sequences
+            .iter()
+            .position(|seq| *seq == alias_sequence)
+        {
+            index
+        } else {
+            self.parse_table.alias_sequences.push(alias_sequence);
+            self.parse_table.alias_sequences.len() - 1
+        }
+    }
+
+    fn symbol_name(&self, symbol: &Symbol) -> String {
+        match symbol.kind {
+            SymbolType::End => "EOF".to_string(),
+            SymbolType::External => self.syntax_grammar.external_tokens[symbol.index]
+                .name
+                .clone(),
+            SymbolType::NonTerminal => self.syntax_grammar.variables[symbol.index].name.clone(),
+            SymbolType::Terminal => {
+                let variable = &self.lexical_grammar.variables[symbol.index];
+                if variable.kind == VariableType::Named {
+                    variable.name.clone()
+                } else {
+                    format!("\"{}\"", &variable.name)
+                }
+            }
+        }
+    }
+}
+
+pub(crate) fn build_parse_table(
+    syntax_grammar: &SyntaxGrammar,
+    lexical_grammar: &LexicalGrammar,
+    inlines: &InlinedProductionMap,
+    state_ids_to_log: Vec<usize>,
+) -> Result<(ParseTable, Vec<LookaheadSet>)> {
+    ParseTableBuilder {
+        syntax_grammar,
+        lexical_grammar,
+        state_ids_to_log,
+        item_set_builder: ParseItemSetBuilder::new(syntax_grammar, lexical_grammar, inlines),
+        state_ids_by_item_set: HashMap::new(),
+        item_sets_by_state_id: Vec::new(),
+        parse_state_queue: VecDeque::new(),
+        parse_table: ParseTable {
+            states: Vec::new(),
+            symbols: Vec::new(),
+            alias_sequences: Vec::new(),
+            max_aliased_production_length: 0,
+        },
+        following_tokens: vec![LookaheadSet::new(); lexical_grammar.variables.len()],
+    }
+    .build()
+}
--- a/cli/src/build_tables/coincident_tokens.rs
+++ b/cli/src/build_tables/coincident_tokens.rs
@ -0,0 +1,71 @@
+use crate::grammars::LexicalGrammar;
+use crate::rules::Symbol;
+use crate::tables::{ParseStateId, ParseTable};
+use std::fmt;
+
+pub(crate) struct CoincidentTokenIndex<'a> {
+    entries: Vec<Vec<ParseStateId>>,
+    grammar: &'a LexicalGrammar,
+    n: usize,
+}
+
+impl<'a> CoincidentTokenIndex<'a> {
+    pub fn new(table: &ParseTable, lexical_grammar: &'a LexicalGrammar) -> Self {
+        let n = lexical_grammar.variables.len();
+        let mut result = Self {
+            n,
+            grammar: lexical_grammar,
+            entries: vec![Vec::new(); n * n],
+        };
+        for (i, state) in table.states.iter().enumerate() {
+            for symbol in state.terminal_entries.keys() {
+                for other_symbol in state.terminal_entries.keys() {
+                    let index = result.index(symbol.index, other_symbol.index);
+                    if result.entries[index].last().cloned() != Some(i) {
+                        result.entries[index].push(i);
+                    }
+                }
+            }
+        }
+        result
+    }
+
+    pub fn states_with(&self, a: Symbol, b: Symbol) -> &Vec<ParseStateId> {
+        &self.entries[self.index(a.index, b.index)]
+    }
+
+    pub fn contains(&self, a: Symbol, b: Symbol) -> bool {
+        !self.entries[self.index(a.index, b.index)].is_empty()
+    }
+
+    fn index(&self, a: usize, b: usize) -> usize {
+        if a < b {
+            a * self.n + b
+        } else {
+            b * self.n + a
+        }
+    }
+}
+
+impl<'a> fmt::Debug for CoincidentTokenIndex<'a> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "CoincidentTokenIndex {{\n")?;
+
+        write!(f, "  entries: {{\n")?;
+        for i in 0..self.n {
+            write!(f, "    {}: {{\n", self.grammar.variables[i].name)?;
+            for j in 0..self.n {
+                write!(
+                    f,
+                    "      {}: {:?},\n",
+                    self.grammar.variables[j].name,
+                    self.entries[self.index(i, j)].len()
+                )?;
+            }
+            write!(f, "    }},\n")?;
+        }
+        write!(f, "  }},")?;
+        write!(f, "}}")?;
+        Ok(())
+    }
+}
--- a/cli/src/build_tables/item.rs
+++ b/cli/src/build_tables/item.rs
@ -0,0 +1,446 @@
+use crate::grammars::{LexicalGrammar, Production, ProductionStep, SyntaxGrammar};
+use crate::rules::Associativity;
+use crate::rules::{Symbol, SymbolType};
+use smallbitvec::SmallBitVec;
+use std::cmp::Ordering;
+use std::collections::BTreeMap;
+use std::fmt;
+use std::hash::{Hash, Hasher};
+use std::u32;
+
+lazy_static! {
+    static ref START_PRODUCTION: Production = Production {
+        dynamic_precedence: 0,
+        steps: vec![ProductionStep {
+            symbol: Symbol {
+                index: 0,
+                kind: SymbolType::NonTerminal,
+            },
+            precedence: 0,
+            associativity: None,
+            alias: None,
+        }],
+    };
+}
+
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub(crate) struct LookaheadSet {
+    terminal_bits: SmallBitVec,
+    external_bits: SmallBitVec,
+    eof: bool,
+}
+
+#[derive(Clone, Copy, Debug)]
+pub(crate) struct ParseItem<'a> {
+    pub variable_index: u32,
+    pub step_index: u32,
+    pub production: &'a Production,
+}
+
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub(crate) struct ParseItemSet<'a> {
+    pub entries: BTreeMap<ParseItem<'a>, LookaheadSet>,
+}
+
+pub(crate) struct ParseItemDisplay<'a>(
+    pub &'a ParseItem<'a>,
+    pub &'a SyntaxGrammar,
+    pub &'a LexicalGrammar,
+);
+
+pub(crate) struct LookaheadSetDisplay<'a>(&'a LookaheadSet, &'a SyntaxGrammar, &'a LexicalGrammar);
+
+#[allow(dead_code)]
+pub(crate) struct ParseItemSetDisplay<'a>(
+    pub &'a ParseItemSet<'a>,
+    pub &'a SyntaxGrammar,
+    pub &'a LexicalGrammar,
+);
+
+impl LookaheadSet {
+    pub fn new() -> Self {
+        Self {
+            terminal_bits: SmallBitVec::new(),
+            external_bits: SmallBitVec::new(),
+            eof: false,
+        }
+    }
+
+    pub fn iter<'a>(&'a self) -> impl Iterator<Item = Symbol> + 'a {
+        self.terminal_bits
+            .iter()
+            .enumerate()
+            .filter_map(|(i, value)| {
+                if value {
+                    Some(Symbol::terminal(i))
+                } else {
+                    None
+                }
+            })
+            .chain(
+                self.external_bits
+                    .iter()
+                    .enumerate()
+                    .filter_map(|(i, value)| {
+                        if value {
+                            Some(Symbol::external(i))
+                        } else {
+                            None
+                        }
+                    }),
+            )
+            .chain(if self.eof { Some(Symbol::end()) } else { None })
+    }
+
+    pub fn with(symbols: impl IntoIterator<Item = Symbol>) -> Self {
+        let mut result = Self::new();
+        for symbol in symbols {
+            result.insert(symbol);
+        }
+        result
+    }
+
+    pub fn contains(&self, symbol: &Symbol) -> bool {
+        match symbol.kind {
+            SymbolType::NonTerminal => panic!("Cannot store non-terminals in a LookaheadSet"),
+            SymbolType::Terminal => self.terminal_bits.get(symbol.index).unwrap_or(false),
+            SymbolType::External => self.external_bits.get(symbol.index).unwrap_or(false),
+            SymbolType::End => self.eof,
+        }
+    }
+
+    pub fn insert(&mut self, other: Symbol) {
+        let vec = match other.kind {
+            SymbolType::NonTerminal => panic!("Cannot store non-terminals in a LookaheadSet"),
+            SymbolType::Terminal => &mut self.terminal_bits,
+            SymbolType::External => &mut self.external_bits,
+            SymbolType::End => {
+                self.eof = true;
+                return;
+            }
+        };
+        if other.index >= vec.len() {
+            vec.resize(other.index + 1, false);
+        }
+        vec.set(other.index, true);
+    }
+
+    pub fn insert_all(&mut self, other: &LookaheadSet) -> bool {
+        let mut result = false;
+        if other.terminal_bits.len() > self.terminal_bits.len() {
+            self.terminal_bits.resize(other.terminal_bits.len(), false);
+        }
+        if other.external_bits.len() > self.external_bits.len() {
+            self.external_bits.resize(other.external_bits.len(), false);
+        }
+        for (i, element) in other.terminal_bits.iter().enumerate() {
+            if element {
+                result |= !self.terminal_bits[i];
+                self.terminal_bits.set(i, element);
+            }
+        }
+        for (i, element) in other.external_bits.iter().enumerate() {
+            if element {
+                result |= !self.external_bits[i];
+                self.external_bits.set(i, element);
+            }
+        }
+        if other.eof {
+            result |= !self.eof;
+            self.eof = true;
+        }
+        result
+    }
+}
+
+impl<'a> ParseItem<'a> {
+    pub fn start() -> Self {
+        ParseItem {
+            variable_index: u32::MAX,
+            production: &START_PRODUCTION,
+            step_index: 0,
+        }
+    }
+
+    pub fn step(&self) -> Option<&'a ProductionStep> {
+        self.production.steps.get(self.step_index as usize)
+    }
+
+    pub fn symbol(&self) -> Option<Symbol> {
+        self.step().map(|step| step.symbol)
+    }
+
+    pub fn associativity(&self) -> Option<Associativity> {
+        self.prev_step().and_then(|step| step.associativity)
+    }
+
+    pub fn precedence(&self) -> i32 {
+        self.prev_step().map_or(0, |step| step.precedence)
+    }
+
+    pub fn prev_step(&self) -> Option<&'a ProductionStep> {
+        if self.step_index > 0 {
+            Some(&self.production.steps[self.step_index as usize - 1])
+        } else {
+            None
+        }
+    }
+
+    pub fn is_done(&self) -> bool {
+        self.step_index as usize == self.production.steps.len()
+    }
+
+    pub fn is_augmented(&self) -> bool {
+        self.variable_index == u32::MAX
+    }
+
+    pub fn successor(&self) -> ParseItem<'a> {
+        ParseItem {
+            variable_index: self.variable_index,
+            production: self.production,
+            step_index: self.step_index + 1,
+        }
+    }
+}
+
+impl<'a> ParseItemSet<'a> {
+    pub fn with(elements: impl IntoIterator<Item = (ParseItem<'a>, LookaheadSet)>) -> Self {
+        let mut result = Self::default();
+        for (item, lookaheads) in elements {
+            result.entries.insert(item, lookaheads);
+        }
+        result
+    }
+
+    pub fn hash_unfinished_items(&self, h: &mut impl Hasher) {
+        let mut previous_variable_index = u32::MAX;
+        let mut previous_step_index = u32::MAX;
+        for item in self.entries.keys() {
+            if item.step().is_none() && item.variable_index != previous_variable_index
+                || item.step_index != previous_step_index
+            {
+                h.write_u32(item.variable_index);
+                h.write_u32(item.step_index);
+                previous_variable_index = item.variable_index;
+                previous_step_index = item.step_index;
+            }
+        }
+    }
+}
+
+impl<'a> Default for ParseItemSet<'a> {
+    fn default() -> Self {
+        Self {
+            entries: BTreeMap::new(),
+        }
+    }
+}
+
+#[allow(dead_code)]
+impl<'a> fmt::Display for ParseItemDisplay<'a> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
+        if self.0.is_augmented() {
+            write!(f, "START →")?;
+        } else {
+            write!(
+                f,
+                "{} →",
+                &self.1.variables[self.0.variable_index as usize].name
+            )?;
+        }
+
+        for (i, step) in self.0.production.steps.iter().enumerate() {
+            if i == self.0.step_index as usize {
+                write!(f, " •")?;
+                if step.precedence != 0 || step.associativity.is_some() {
+                    write!(
+                        f,
+                        " (prec {:?} assoc {:?})",
+                        step.precedence, step.associativity
+                    )?;
+                }
+            }
+
+            write!(f, " ")?;
+            if step.symbol.is_terminal() {
+                if let Some(variable) = self.2.variables.get(step.symbol.index) {
+                    write!(f, "{}", &variable.name)?;
+                } else {
+                    write!(f, "{}-{}", "terminal", step.symbol.index)?;
+                }
+            } else if step.symbol.is_external() {
+                write!(f, "{}", &self.1.external_tokens[step.symbol.index].name)?;
+            } else {
+                write!(f, "{}", &self.1.variables[step.symbol.index].name)?;
+            }
+
+            if let Some(alias) = &step.alias {
+                write!(f, " (alias {})", alias.value)?;
+            }
+        }
+
+        if self.0.is_done() {
+            write!(f, " •")?;
+            if let Some(step) = self.0.production.steps.last() {
+                if step.precedence != 0 || step.associativity.is_some() {
+                    write!(
+                        f,
+                        " (prec {:?} assoc {:?})",
+                        step.precedence, step.associativity
+                    )?;
+                }
+            }
+        }
+
+        Ok(())
+    }
+}
+
+impl<'a> fmt::Display for LookaheadSetDisplay<'a> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
+        write!(f, "[")?;
+        for (i, symbol) in self.0.iter().enumerate() {
+            if i > 0 {
+                write!(f, ", ")?;
+            }
+
+            if symbol.is_terminal() {
+                if let Some(variable) = self.2.variables.get(symbol.index) {
+                    write!(f, "{}", &variable.name)?;
+                } else {
+                    write!(f, "{}-{}", "terminal", symbol.index)?;
+                }
+            } else if symbol.is_external() {
+                write!(f, "{}", &self.1.external_tokens[symbol.index].name)?;
+            } else {
+                write!(f, "{}", &self.1.variables[symbol.index].name)?;
+            }
+        }
+        write!(f, "]")?;
+        Ok(())
+    }
+}
+
+impl<'a> fmt::Display for ParseItemSetDisplay<'a> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
+        for (item, lookaheads) in self.0.entries.iter() {
+            writeln!(
+                f,
+                "{}\t{}",
+                ParseItemDisplay(item, self.1, self.2),
+                LookaheadSetDisplay(lookaheads, self.1, self.2)
+            )?;
+        }
+        Ok(())
+    }
+}
+
+impl<'a> Hash for ParseItem<'a> {
+    fn hash<H: Hasher>(&self, hasher: &mut H) {
+        hasher.write_u32(self.variable_index);
+        hasher.write_u32(self.step_index);
+        hasher.write_i32(self.production.dynamic_precedence);
+        hasher.write_usize(self.production.steps.len());
+        hasher.write_i32(self.precedence());
+        self.associativity().hash(hasher);
+        for step in &self.production.steps[0..self.step_index as usize] {
+            step.alias.hash(hasher);
+        }
+        for step in &self.production.steps[self.step_index as usize..] {
+            step.hash(hasher);
+        }
+    }
+}
+
+impl<'a> PartialEq for ParseItem<'a> {
+    fn eq(&self, other: &Self) -> bool {
+        if self.variable_index != other.variable_index
+            || self.step_index != other.step_index
+            || self.production.dynamic_precedence != other.production.dynamic_precedence
+            || self.production.steps.len() != other.production.steps.len()
+            || self.precedence() != other.precedence()
+            || self.associativity() != other.associativity()
+        {
+            return false;
+        }
+
+        for (i, step) in self.production.steps.iter().enumerate() {
+            if i < self.step_index as usize {
+                if step.alias != other.production.steps[i].alias {
+                    return false;
+                }
+            } else {
+                if *step != other.production.steps[i] {
+                    return false;
+                }
+            }
+        }
+
+        return true;
+    }
+}
+
+impl<'a> Ord for ParseItem<'a> {
+    fn cmp(&self, other: &Self) -> Ordering {
+        let o = self.variable_index.cmp(&other.variable_index);
+        if o != Ordering::Equal {
+            return o;
+        }
+        let o = self.step_index.cmp(&other.step_index);
+        if o != Ordering::Equal {
+            return o;
+        }
+        let o = self
+            .production
+            .dynamic_precedence
+            .cmp(&other.production.dynamic_precedence);
+        if o != Ordering::Equal {
+            return o;
+        }
+        let o = self
+            .production
+            .steps
+            .len()
+            .cmp(&other.production.steps.len());
+        if o != Ordering::Equal {
+            return o;
+        }
+        let o = self.precedence().cmp(&other.precedence());
+        if o != Ordering::Equal {
+            return o;
+        }
+        let o = self.associativity().cmp(&other.associativity());
+        if o != Ordering::Equal {
+            return o;
+        }
+        for (i, step) in self.production.steps.iter().enumerate() {
+            let o = if i < self.step_index as usize {
+                step.alias.cmp(&other.production.steps[i].alias)
+            } else {
+                step.cmp(&other.production.steps[i])
+            };
+            if o != Ordering::Equal {
+                return o;
+            }
+        }
+        return Ordering::Equal;
+    }
+}
+
+impl<'a> PartialOrd for ParseItem<'a> {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+impl<'a> Eq for ParseItem<'a> {}
+
+impl<'a> Hash for ParseItemSet<'a> {
+    fn hash<H: Hasher>(&self, hasher: &mut H) {
+        hasher.write_usize(self.entries.len());
+        for (item, lookaheads) in self.entries.iter() {
+            item.hash(hasher);
+            lookaheads.hash(hasher);
+        }
+    }
+}
--- a/cli/src/build_tables/item_set_builder.rs
+++ b/cli/src/build_tables/item_set_builder.rs
@ -0,0 +1,330 @@
+use super::item::{LookaheadSet, ParseItem, ParseItemDisplay, ParseItemSet};
+use crate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar};
+use crate::rules::Symbol;
+use hashbrown::{HashMap, HashSet};
+use std::fmt;
+
+#[derive(Clone, Debug, PartialEq, Eq)]
+struct TransitiveClosureAddition<'a> {
+    item: ParseItem<'a>,
+    info: FollowSetInfo,
+}
+
+#[derive(Clone, Debug, PartialEq, Eq)]
+struct FollowSetInfo {
+    lookaheads: LookaheadSet,
+    propagates_lookaheads: bool,
+}
+
+pub(crate) struct ParseItemSetBuilder<'a> {
+    syntax_grammar: &'a SyntaxGrammar,
+    lexical_grammar: &'a LexicalGrammar,
+    first_sets: HashMap<Symbol, LookaheadSet>,
+    last_sets: HashMap<Symbol, LookaheadSet>,
+    inlines: &'a InlinedProductionMap,
+    transitive_closure_additions: Vec<Vec<TransitiveClosureAddition<'a>>>,
+}
+
+fn find_or_push<T: Eq>(vector: &mut Vec<T>, value: T) {
+    if !vector.contains(&value) {
+        vector.push(value);
+    }
+}
+
+impl<'a> ParseItemSetBuilder<'a> {
+    pub fn new(
+        syntax_grammar: &'a SyntaxGrammar,
+        lexical_grammar: &'a LexicalGrammar,
+        inlines: &'a InlinedProductionMap,
+    ) -> Self {
+        let mut result = Self {
+            syntax_grammar,
+            lexical_grammar,
+            first_sets: HashMap::new(),
+            last_sets: HashMap::new(),
+            inlines,
+            transitive_closure_additions: vec![Vec::new(); syntax_grammar.variables.len()],
+        };
+
+        // For each grammar symbol, populate the FIRST and LAST sets: the set of
+        // terminals that appear at the beginning and end that symbol's productions,
+        // respectively.
+        //
+        // For a terminal symbol, the FIRST and LAST set just consists of the
+        // terminal itself.
+        for i in 0..lexical_grammar.variables.len() {
+            let symbol = Symbol::terminal(i);
+            let mut set = LookaheadSet::new();
+            set.insert(symbol);
+            result.first_sets.insert(symbol, set.clone());
+            result.last_sets.insert(symbol, set);
+        }
+
+        for i in 0..syntax_grammar.external_tokens.len() {
+            let symbol = Symbol::external(i);
+            let mut set = LookaheadSet::new();
+            set.insert(symbol);
+            result.first_sets.insert(symbol, set.clone());
+            result.last_sets.insert(symbol, set);
+        }
+
+        // The FIRST set of a non-terminal `i` is the union of the following sets:
+        // * the set of all terminals that appear at the beginings of i's productions
+        // * the FIRST sets of all the non-terminals that appear at the beginnings
+        //   of i's productions
+        //
+        // Rather than computing these sets using recursion, we use an explicit stack
+        // called `symbols_to_process`.
+        let mut symbols_to_process = Vec::new();
+        let mut processed_non_terminals = HashSet::new();
+        for i in 0..syntax_grammar.variables.len() {
+            let symbol = Symbol::non_terminal(i);
+
+            let first_set = &mut result
+                .first_sets
+                .entry(symbol)
+                .or_insert(LookaheadSet::new());
+            processed_non_terminals.clear();
+            symbols_to_process.clear();
+            symbols_to_process.push(symbol);
+            while let Some(current_symbol) = symbols_to_process.pop() {
+                if current_symbol.is_terminal() || current_symbol.is_external() {
+                    first_set.insert(current_symbol);
+                } else if processed_non_terminals.insert(current_symbol) {
+                    for production in syntax_grammar.variables[current_symbol.index]
+                        .productions
+                        .iter()
+                    {
+                        if let Some(step) = production.steps.first() {
+                            symbols_to_process.push(step.symbol);
+                        }
+                    }
+                }
+            }
+
+            // The LAST set is defined in a similar way to the FIRST set.
+            let last_set = &mut result
+                .last_sets
+                .entry(symbol)
+                .or_insert(LookaheadSet::new());
+            processed_non_terminals.clear();
+            symbols_to_process.clear();
+            symbols_to_process.push(symbol);
+            while let Some(current_symbol) = symbols_to_process.pop() {
+                if current_symbol.is_terminal() || current_symbol.is_external() {
+                    last_set.insert(current_symbol);
+                } else if processed_non_terminals.insert(current_symbol) {
+                    for production in syntax_grammar.variables[current_symbol.index]
+                        .productions
+                        .iter()
+                    {
+                        if let Some(step) = production.steps.last() {
+                            symbols_to_process.push(step.symbol);
+                        }
+                    }
+                }
+            }
+        }
+
+        // To compute an item set's transitive closure, we find each item in the set
+        // whose next symbol is a non-terminal, and we add new items to the set for
+        // each of that symbols' productions. These productions might themselves begin
+        // with non-terminals, so the process continues recursively. In this process,
+        // the total set of entries that get added depends only on two things:
+        //   * the set of non-terminal symbols that occur at each item's current position
+        //   * the set of terminals that occurs after each of these non-terminal symbols
+        //
+        // So we can avoid a lot of duplicated recursive work by precomputing, for each
+        // non-terminal symbol `i`, a final list of *additions* that must be made to an
+        // item set when `i` occurs as the next symbol in one if its core items. The
+        // structure of an *addition* is as follows:
+        //   * `item` - the new item that must be added as part of the expansion of `i`
+        //   * `lookaheads` - lookahead tokens that can always come after that item in
+        //      the expansion of `i`
+        //   * `propagates_lookaheads` - a boolean indicating whether or not `item` can
+        //      occur at the *end* of the expansion of `i`, so that i's own current
+        //      lookahead tokens can occur after `item`.
+        //
+        // Again, rather than computing these additions recursively, we use an explicit
+        // stack called `entries_to_process`.
+        for i in 0..syntax_grammar.variables.len() {
+            let empty_lookaheads = LookaheadSet::new();
+            let mut entries_to_process = vec![(i, &empty_lookaheads, true)];
+
+            // First, build up a map whose keys are all of the non-terminals that can
+            // appear at the beginning of non-terminal `i`, and whose values store
+            // information about the tokens that can follow each non-terminal.
+            let mut follow_set_info_by_non_terminal = HashMap::new();
+            while let Some(entry) = entries_to_process.pop() {
+                let (variable_index, lookaheads, propagates_lookaheads) = entry;
+                let existing_info = follow_set_info_by_non_terminal
+                    .entry(variable_index)
+                    .or_insert_with(|| FollowSetInfo {
+                        lookaheads: LookaheadSet::new(),
+                        propagates_lookaheads: false,
+                    });
+
+                let did_add_follow_set_info;
+                if propagates_lookaheads {
+                    did_add_follow_set_info = !existing_info.propagates_lookaheads;
+                    existing_info.propagates_lookaheads = true;
+                } else {
+                    did_add_follow_set_info = existing_info.lookaheads.insert_all(lookaheads);
+                }
+
+                if did_add_follow_set_info {
+                    for production in &syntax_grammar.variables[variable_index].productions {
+                        if let Some(symbol) = production.first_symbol() {
+                            if symbol.is_non_terminal() {
+                                if production.steps.len() == 1 {
+                                    entries_to_process.push((
+                                        symbol.index,
+                                        lookaheads,
+                                        propagates_lookaheads,
+                                    ));
+                                } else {
+                                    entries_to_process.push((
+                                        symbol.index,
+                                        &result.first_sets[&production.steps[1].symbol],
+                                        false,
+                                    ));
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+
+            // Store all of those non-terminals' productions, along with their associated
+            // lookahead info, as *additions* associated with non-terminal `i`.
+            let additions_for_non_terminal = &mut result.transitive_closure_additions[i];
+            for (variable_index, follow_set_info) in follow_set_info_by_non_terminal {
+                let variable = &syntax_grammar.variables[variable_index];
+                let non_terminal = Symbol::non_terminal(variable_index);
+                let variable_index = variable_index as u32;
+                if syntax_grammar.variables_to_inline.contains(&non_terminal) {
+                    continue;
+                }
+                for production in &variable.productions {
+                    let item = ParseItem {
+                        variable_index,
+                        production,
+                        step_index: 0,
+                    };
+
+                    if let Some(inlined_productions) =
+                        inlines.inlined_productions(item.production, item.step_index)
+                    {
+                        for production in inlined_productions {
+                            find_or_push(
+                                additions_for_non_terminal,
+                                TransitiveClosureAddition {
+                                    item: ParseItem {
+                                        variable_index,
+                                        production,
+                                        step_index: item.step_index,
+                                    },
+                                    info: follow_set_info.clone(),
+                                },
+                            );
+                        }
+                    } else {
+                        find_or_push(
+                            additions_for_non_terminal,
+                            TransitiveClosureAddition {
+                                item,
+                                info: follow_set_info.clone(),
+                            },
+                        );
+                    }
+                }
+            }
+        }
+
+        result
+    }
+
+    pub(crate) fn transitive_closure(&mut self, item_set: &ParseItemSet<'a>) -> ParseItemSet<'a> {
+        let mut result = ParseItemSet::default();
+        for (item, lookaheads) in &item_set.entries {
+            if let Some(productions) = self
+                .inlines
+                .inlined_productions(item.production, item.step_index)
+            {
+                for production in productions {
+                    self.add_item(
+                        &mut result,
+                        ParseItem {
+                            variable_index: item.variable_index,
+                            production,
+                            step_index: item.step_index,
+                        },
+                        lookaheads,
+                    );
+                }
+            } else {
+                self.add_item(&mut result, *item, lookaheads);
+            }
+        }
+        result
+    }
+
+    pub fn first_set(&self, symbol: &Symbol) -> &LookaheadSet {
+        &self.first_sets[symbol]
+    }
+
+    pub fn last_set(&self, symbol: &Symbol) -> &LookaheadSet {
+        &self.first_sets[symbol]
+    }
+
+    fn add_item(&self, set: &mut ParseItemSet<'a>, item: ParseItem<'a>, lookaheads: &LookaheadSet) {
+        if let Some(step) = item.step() {
+            if step.symbol.is_non_terminal() {
+                let next_step = item.successor().step();
+
+                // Determine which tokens can follow this non-terminal.
+                let following_tokens = if let Some(next_step) = next_step {
+                    self.first_sets.get(&next_step.symbol).unwrap()
+                } else {
+                    &lookaheads
+                };
+
+                // Use the pre-computed *additions* to expand the non-terminal.
+                for addition in &self.transitive_closure_additions[step.symbol.index] {
+                    let lookaheads = set
+                        .entries
+                        .entry(addition.item)
+                        .or_insert_with(|| LookaheadSet::new());
+                    lookaheads.insert_all(&addition.info.lookaheads);
+                    if addition.info.propagates_lookaheads {
+                        lookaheads.insert_all(following_tokens);
+                    }
+                }
+            }
+        }
+        set.entries.insert(item, lookaheads.clone());
+    }
+}
+
+impl<'a> fmt::Debug for ParseItemSetBuilder<'a> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "ParseItemSetBuilder {{\n")?;
+
+        write!(f, "  additions: {{\n")?;
+        for (i, variable) in self.syntax_grammar.variables.iter().enumerate() {
+            write!(f, "    {}: {{\n", variable.name)?;
+            for addition in &self.transitive_closure_additions[i] {
+                write!(
+                    f,
+                    "      {}\n",
+                    ParseItemDisplay(&addition.item, self.syntax_grammar, self.lexical_grammar)
+                )?;
+            }
+            write!(f, "    }},\n")?;
+        }
+        write!(f, "  }},")?;
+
+        write!(f, "}}")?;
+        Ok(())
+    }
+}
--- a/cli/src/build_tables/minimize_parse_table.rs
+++ b/cli/src/build_tables/minimize_parse_table.rs
@ -0,0 +1,281 @@
+use super::item::LookaheadSet;
+use super::token_conflicts::TokenConflictMap;
+use crate::grammars::{SyntaxGrammar, VariableType};
+use crate::rules::{AliasMap, Symbol};
+use crate::tables::{ParseAction, ParseState, ParseTable, ParseTableEntry};
+use hashbrown::{HashMap, HashSet};
+
+pub(crate) fn minimize_parse_table(
+    parse_table: &mut ParseTable,
+    syntax_grammar: &SyntaxGrammar,
+    simple_aliases: &AliasMap,
+    token_conflict_map: &TokenConflictMap,
+    keywords: &LookaheadSet,
+) {
+    let mut minimizer = Minimizer {
+        parse_table,
+        syntax_grammar,
+        token_conflict_map,
+        keywords,
+        simple_aliases,
+    };
+    minimizer.remove_unit_reductions();
+    minimizer.merge_compatible_states();
+    minimizer.remove_unused_states();
+}
+
+struct Minimizer<'a> {
+    parse_table: &'a mut ParseTable,
+    syntax_grammar: &'a SyntaxGrammar,
+    token_conflict_map: &'a TokenConflictMap<'a>,
+    keywords: &'a LookaheadSet,
+    simple_aliases: &'a AliasMap,
+}
+
+impl<'a> Minimizer<'a> {
+    fn remove_unit_reductions(&mut self) {
+        let mut aliased_symbols = HashSet::new();
+        for variable in &self.syntax_grammar.variables {
+            for production in &variable.productions {
+                for step in &production.steps {
+                    if step.alias.is_some() {
+                        aliased_symbols.insert(step.symbol);
+                    }
+                }
+            }
+        }
+
+        let mut unit_reduction_symbols_by_state = HashMap::new();
+        for (i, state) in self.parse_table.states.iter().enumerate() {
+            let mut only_unit_reductions = true;
+            let mut unit_reduction_symbol = None;
+            for (_, entry) in &state.terminal_entries {
+                for action in &entry.actions {
+                    match action {
+                        ParseAction::ShiftExtra => continue,
+                        ParseAction::Reduce {
+                            child_count: 1,
+                            alias_sequence_id: 0,
+                            symbol,
+                            ..
+                        } => {
+                            if !self.simple_aliases.contains_key(&symbol)
+                                && !aliased_symbols.contains(&symbol)
+                                && self.syntax_grammar.variables[symbol.index].kind
+                                    != VariableType::Named
+                                && (unit_reduction_symbol.is_none()
+                                    || unit_reduction_symbol == Some(symbol))
+                            {
+                                unit_reduction_symbol = Some(symbol);
+                                continue;
+                            }
+                        }
+                        _ => {}
+                    }
+                    only_unit_reductions = false;
+                    break;
+                }
+
+                if !only_unit_reductions {
+                    break;
+                }
+            }
+
+            if let Some(symbol) = unit_reduction_symbol {
+                if only_unit_reductions {
+                    unit_reduction_symbols_by_state.insert(i, *symbol);
+                }
+            }
+        }
+
+        for state in self.parse_table.states.iter_mut() {
+            let mut done = false;
+            while !done {
+                done = true;
+                state.update_referenced_states(|other_state_id, state| {
+                    if let Some(symbol) = unit_reduction_symbols_by_state.get(&other_state_id) {
+                        done = false;
+                        state.nonterminal_entries[symbol]
+                    } else {
+                        other_state_id
+                    }
+                })
+            }
+        }
+    }
+
+    fn merge_compatible_states(&mut self) {
+        let mut state_ids_by_signature = HashMap::new();
+        for (i, state) in self.parse_table.states.iter().enumerate() {
+            state_ids_by_signature
+                .entry(state.unfinished_item_signature)
+                .or_insert(Vec::new())
+                .push(i);
+        }
+
+        let mut deleted_states = HashSet::new();
+        loop {
+            let mut state_replacements = HashMap::new();
+            for (_, state_ids) in &state_ids_by_signature {
+                for i in state_ids {
+                    for j in state_ids {
+                        if j == i {
+                            break;
+                        }
+                        if deleted_states.contains(j) || deleted_states.contains(i) {
+                            continue;
+                        }
+                        if self.merge_parse_state(*j, *i) {
+                            deleted_states.insert(*i);
+                            state_replacements.insert(*i, *j);
+                        }
+                    }
+                }
+            }
+
+            if state_replacements.is_empty() {
+                break;
+            }
+
+            for state in self.parse_table.states.iter_mut() {
+                state.update_referenced_states(|other_state_id, _| {
+                    *state_replacements
+                        .get(&other_state_id)
+                        .unwrap_or(&other_state_id)
+                });
+            }
+        }
+    }
+
+    fn merge_parse_state(&mut self, left: usize, right: usize) -> bool {
+        let left_state = &self.parse_table.states[left];
+        let right_state = &self.parse_table.states[right];
+
+        if left_state.nonterminal_entries != right_state.nonterminal_entries {
+            return false;
+        }
+
+        for (symbol, left_entry) in &left_state.terminal_entries {
+            if let Some(right_entry) = right_state.terminal_entries.get(symbol) {
+                if right_entry.actions != left_entry.actions {
+                    return false;
+                }
+            } else if !self.can_add_entry_to_state(right_state, *symbol, left_entry) {
+                return false;
+            }
+        }
+
+        let mut symbols_to_add = Vec::new();
+        for (symbol, right_entry) in &right_state.terminal_entries {
+            if !left_state.terminal_entries.contains_key(&symbol) {
+                if !self.can_add_entry_to_state(left_state, *symbol, right_entry) {
+                    return false;
+                }
+                symbols_to_add.push(*symbol);
+            }
+        }
+
+        for symbol in symbols_to_add {
+            let entry = self.parse_table.states[right].terminal_entries[&symbol].clone();
+            self.parse_table.states[left]
+                .terminal_entries
+                .insert(symbol, entry);
+        }
+
+        true
+    }
+
+    fn can_add_entry_to_state(
+        &self,
+        state: &ParseState,
+        token: Symbol,
+        entry: &ParseTableEntry,
+    ) -> bool {
+        // Do not add external tokens; they could conflict lexically with any of the state's
+        // existing lookahead tokens.
+        if token.is_external() {
+            return false;
+        }
+
+        // Only merge_compatible_states parse states by allowing existing reductions to happen
+        // with additional lookahead tokens. Do not alter parse states in ways
+        // that allow entirely new types of actions to happen.
+        if state.terminal_entries.iter().all(|(_, e)| e != entry) {
+            return false;
+        }
+        match entry.actions.last() {
+            Some(ParseAction::Reduce { .. }) => {}
+            _ => return false,
+        }
+
+        // Do not add tokens which are both internal and external. Their validity could
+        // influence the behavior of the external scanner.
+        if self
+            .syntax_grammar
+            .external_tokens
+            .iter()
+            .any(|t| t.corresponding_internal_token == Some(token))
+        {
+            return false;
+        }
+
+        let is_word_token = self.syntax_grammar.word_token == Some(token);
+        let is_keyword = self.keywords.contains(&token);
+
+        // Do not add a token if it conflicts with an existing token.
+        if token.is_terminal() {
+            for existing_token in state.terminal_entries.keys() {
+                if (is_word_token && self.keywords.contains(existing_token))
+                    || is_keyword && self.syntax_grammar.word_token.as_ref() == Some(existing_token)
+                {
+                    continue;
+                }
+                if self
+                    .token_conflict_map
+                    .does_conflict(token.index, existing_token.index)
+                    || self
+                        .token_conflict_map
+                        .does_match_same_string(token.index, existing_token.index)
+                {
+                    return false;
+                }
+            }
+        }
+
+        true
+    }
+
+    fn remove_unused_states(&mut self) {
+        let mut state_usage_map = vec![false; self.parse_table.states.len()];
+
+        state_usage_map[0] = true;
+        state_usage_map[1] = true;
+
+        for state in &self.parse_table.states {
+            for referenced_state in state.referenced_states() {
+                state_usage_map[referenced_state] = true;
+            }
+        }
+        let mut removed_predecessor_count = 0;
+        let mut state_replacement_map = vec![0; self.parse_table.states.len()];
+        for state_id in 0..self.parse_table.states.len() {
+            state_replacement_map[state_id] = state_id - removed_predecessor_count;
+            if !state_usage_map[state_id] {
+                removed_predecessor_count += 1;
+            }
+        }
+        let mut state_id = 0;
+        let mut original_state_id = 0;
+        while state_id < self.parse_table.states.len() {
+            if state_usage_map[original_state_id] {
+                self.parse_table.states[state_id].update_referenced_states(|other_state_id, _| {
+                    state_replacement_map[other_state_id]
+                });
+                state_id += 1;
+            } else {
+                self.parse_table.states.remove(state_id);
+            }
+            original_state_id += 1;
+        }
+    }
+}
--- a/cli/src/build_tables/mod.rs
+++ b/cli/src/build_tables/mod.rs
@ -0,0 +1,285 @@
+mod build_lex_table;
+mod build_parse_table;
+mod coincident_tokens;
+mod item;
+mod item_set_builder;
+mod minimize_parse_table;
+mod token_conflicts;
+
+use self::build_lex_table::build_lex_table;
+use self::build_parse_table::build_parse_table;
+use self::coincident_tokens::CoincidentTokenIndex;
+use self::item::LookaheadSet;
+use self::minimize_parse_table::minimize_parse_table;
+use self::token_conflicts::TokenConflictMap;
+use crate::error::Result;
+use crate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar};
+use crate::nfa::{CharacterSet, NfaCursor};
+use crate::rules::{AliasMap, Symbol};
+use crate::tables::{LexTable, ParseAction, ParseTable, ParseTableEntry};
+
+pub(crate) fn build_tables(
+    syntax_grammar: &SyntaxGrammar,
+    lexical_grammar: &LexicalGrammar,
+    simple_aliases: &AliasMap,
+    inlines: &InlinedProductionMap,
+    minimize: bool,
+    state_ids_to_log: Vec<usize>,
+) -> Result<(ParseTable, LexTable, LexTable, Option<Symbol>)> {
+    let (mut parse_table, following_tokens) =
+        build_parse_table(syntax_grammar, lexical_grammar, inlines, state_ids_to_log)?;
+    let token_conflict_map = TokenConflictMap::new(lexical_grammar, following_tokens);
+    let coincident_token_index = CoincidentTokenIndex::new(&parse_table, lexical_grammar);
+    let keywords = identify_keywords(
+        lexical_grammar,
+        &parse_table,
+        syntax_grammar.word_token,
+        &token_conflict_map,
+        &coincident_token_index,
+    );
+    populate_error_state(
+        &mut parse_table,
+        syntax_grammar,
+        lexical_grammar,
+        &coincident_token_index,
+        &token_conflict_map,
+    );
+    mark_fragile_tokens(
+        &mut parse_table,
+        lexical_grammar,
+        &token_conflict_map,
+    );
+    if minimize {
+        minimize_parse_table(
+            &mut parse_table,
+            syntax_grammar,
+            simple_aliases,
+            &token_conflict_map,
+            &keywords,
+        );
+    }
+    let (main_lex_table, keyword_lex_table) = build_lex_table(
+        &mut parse_table,
+        syntax_grammar,
+        lexical_grammar,
+        &keywords,
+        minimize,
+    );
+    Ok((
+        parse_table,
+        main_lex_table,
+        keyword_lex_table,
+        syntax_grammar.word_token,
+    ))
+}
+
+fn populate_error_state(
+    parse_table: &mut ParseTable,
+    syntax_grammar: &SyntaxGrammar,
+    lexical_grammar: &LexicalGrammar,
+    coincident_token_index: &CoincidentTokenIndex,
+    token_conflict_map: &TokenConflictMap,
+) {
+    let state = &mut parse_table.states[0];
+    let n = lexical_grammar.variables.len();
+
+    // First identify the *conflict-free tokens*: tokens that do not overlap with
+    // any other token in any way.
+    let conflict_free_tokens = LookaheadSet::with((0..n).into_iter().filter_map(|i| {
+        let conflicts_with_other_tokens = (0..n).into_iter().any(|j| {
+            j != i
+                && !coincident_token_index.contains(Symbol::terminal(i), Symbol::terminal(j))
+                && token_conflict_map.does_conflict(i, j)
+        });
+        if conflicts_with_other_tokens {
+            None
+        } else {
+            info!(
+                "error recovery - token {} has no conflicts",
+                lexical_grammar.variables[i].name
+            );
+            Some(Symbol::terminal(i))
+        }
+    }));
+
+    let recover_entry = ParseTableEntry {
+        reusable: false,
+        actions: vec![ParseAction::Recover],
+    };
+
+    // Exclude from the error-recovery state any token that conflicts with one of
+    // the *conflict-free tokens* identified above.
+    for i in 0..n {
+        let symbol = Symbol::terminal(i);
+        if !conflict_free_tokens.contains(&symbol) {
+            if syntax_grammar.word_token != Some(symbol) {
+                if let Some(t) = conflict_free_tokens.iter().find(|t| {
+                    !coincident_token_index.contains(symbol, *t)
+                        && token_conflict_map.does_conflict(symbol.index, t.index)
+                }) {
+                    info!(
+                        "error recovery - exclude token {} because of conflict with {}",
+                        lexical_grammar.variables[i].name, lexical_grammar.variables[t.index].name
+                    );
+                    continue;
+                }
+            }
+        }
+        info!(
+            "error recovery - include token {}",
+            lexical_grammar.variables[i].name
+        );
+        state
+            .terminal_entries
+            .entry(symbol)
+            .or_insert_with(|| recover_entry.clone());
+    }
+
+    for (i, external_token) in syntax_grammar.external_tokens.iter().enumerate() {
+        if external_token.corresponding_internal_token.is_none() {
+            state
+                .terminal_entries
+                .entry(Symbol::external(i))
+                .or_insert_with(|| recover_entry.clone());
+        }
+    }
+
+    state.terminal_entries.insert(Symbol::end(), recover_entry);
+}
+
+fn identify_keywords(
+    lexical_grammar: &LexicalGrammar,
+    parse_table: &ParseTable,
+    word_token: Option<Symbol>,
+    token_conflict_map: &TokenConflictMap,
+    coincident_token_index: &CoincidentTokenIndex,
+) -> LookaheadSet {
+    if word_token.is_none() {
+        return LookaheadSet::new();
+    }
+
+    let word_token = word_token.unwrap();
+    let mut cursor = NfaCursor::new(&lexical_grammar.nfa, Vec::new());
+
+    // First find all of the candidate keyword tokens: tokens that start with
+    // letters or underscore and can match the same string as a word token.
+    let keywords = LookaheadSet::with(lexical_grammar.variables.iter().enumerate().filter_map(
+        |(i, variable)| {
+            cursor.reset(vec![variable.start_state]);
+            if all_chars_are_alphabetical(&cursor)
+                && token_conflict_map.does_match_same_string(i, word_token.index)
+            {
+                info!(
+                    "Keywords - add candidate {}",
+                    lexical_grammar.variables[i].name
+                );
+                Some(Symbol::terminal(i))
+            } else {
+                None
+            }
+        },
+    ));
+
+    // Exclude keyword candidates that shadow another keyword candidate.
+    let keywords = LookaheadSet::with(keywords.iter().filter(|token| {
+        for other_token in keywords.iter() {
+            if other_token != *token
+                && token_conflict_map.does_match_same_string(token.index, other_token.index)
+            {
+                info!(
+                    "Keywords - exclude {} because it matches the same string as {}",
+                    lexical_grammar.variables[token.index].name,
+                    lexical_grammar.variables[other_token.index].name
+                );
+                return false;
+            }
+        }
+        true
+    }));
+
+    // Exclude keyword candidates for which substituting the keyword capture
+    // token would introduce new lexical conflicts with other tokens.
+    let keywords = LookaheadSet::with(keywords.iter().filter(|token| {
+        for other_index in 0..lexical_grammar.variables.len() {
+            if keywords.contains(&Symbol::terminal(other_index)) {
+                continue;
+            }
+
+            // If the word token was already valid in every state containing
+            // this keyword candidate, then substituting the word token won't
+            // introduce any new lexical conflicts.
+            if coincident_token_index
+                .states_with(*token, Symbol::terminal(other_index))
+                .iter()
+                .all(|state_id| {
+                    parse_table.states[*state_id]
+                        .terminal_entries
+                        .contains_key(&word_token)
+                })
+            {
+                continue;
+            }
+
+            if !token_conflict_map.has_same_conflict_status(
+                token.index,
+                word_token.index,
+                other_index,
+            ) {
+                info!(
+                    "Keywords - exclude {} because of conflict with {}",
+                    lexical_grammar.variables[token.index].name,
+                    lexical_grammar.variables[other_index].name
+                );
+                return false;
+            }
+        }
+
+        info!(
+            "Keywords - include {}",
+            lexical_grammar.variables[token.index].name,
+        );
+        true
+    }));
+
+    keywords
+}
+
+fn mark_fragile_tokens(
+    parse_table: &mut ParseTable,
+    lexical_grammar: &LexicalGrammar,
+    token_conflict_map: &TokenConflictMap,
+) {
+    let n = lexical_grammar.variables.len();
+    let mut valid_tokens_mask = Vec::with_capacity(n);
+    for state in parse_table.states.iter_mut() {
+        valid_tokens_mask.clear();
+        valid_tokens_mask.resize(n, false);
+        for token in state.terminal_entries.keys() {
+            if token.is_terminal() {
+                valid_tokens_mask[token.index] = true;
+            }
+        }
+        for (token, entry) in state.terminal_entries.iter_mut() {
+            for i in 0..n {
+                if token_conflict_map.does_overlap(i, token.index) {
+                    if valid_tokens_mask[i] {
+                        entry.reusable = false;
+                        break;
+                    }
+                }
+            }
+        }
+    }
+}
+
+fn all_chars_are_alphabetical(cursor: &NfaCursor) -> bool {
+    cursor.transition_chars().all(|(chars, is_sep)| {
+        if is_sep {
+            true
+        } else if let CharacterSet::Include(chars) = chars {
+            chars.iter().all(|c| c.is_alphabetic() || *c == '_')
+        } else {
+            false
+        }
+    })
+}
--- a/cli/src/build_tables/token_conflicts.rs
+++ b/cli/src/build_tables/token_conflicts.rs
@ -0,0 +1,382 @@
+use crate::build_tables::item::LookaheadSet;
+use crate::grammars::LexicalGrammar;
+use crate::nfa::{CharacterSet, NfaCursor, NfaTransition};
+use hashbrown::HashSet;
+use std::cmp::Ordering;
+use std::fmt;
+
+#[derive(Clone, Debug, Default, PartialEq, Eq)]
+struct TokenConflictStatus {
+    does_overlap: bool,
+    does_match_valid_continuation: bool,
+    does_match_separators: bool,
+    matches_same_string: bool,
+}
+
+pub(crate) struct TokenConflictMap<'a> {
+    n: usize,
+    status_matrix: Vec<TokenConflictStatus>,
+    starting_chars_by_index: Vec<CharacterSet>,
+    following_chars_by_index: Vec<CharacterSet>,
+    grammar: &'a LexicalGrammar,
+}
+
+impl<'a> TokenConflictMap<'a> {
+    pub fn new(grammar: &'a LexicalGrammar, following_tokens: Vec<LookaheadSet>) -> Self {
+        let mut cursor = NfaCursor::new(&grammar.nfa, Vec::new());
+        let starting_chars = get_starting_chars(&mut cursor, grammar);
+        let following_chars = get_following_chars(&starting_chars, following_tokens);
+
+        let n = grammar.variables.len();
+        let mut status_matrix = vec![TokenConflictStatus::default(); n * n];
+        for i in 0..grammar.variables.len() {
+            for j in 0..i {
+                let status = compute_conflict_status(&mut cursor, grammar, &following_chars, i, j);
+                status_matrix[matrix_index(n, i, j)] = status.0;
+                status_matrix[matrix_index(n, j, i)] = status.1;
+            }
+        }
+
+        TokenConflictMap {
+            n,
+            status_matrix,
+            starting_chars_by_index: starting_chars,
+            following_chars_by_index: following_chars,
+            grammar,
+        }
+    }
+
+    pub fn has_same_conflict_status(&self, a: usize, b: usize, other: usize) -> bool {
+        let left = &self.status_matrix[matrix_index(self.n, a, other)];
+        let right = &self.status_matrix[matrix_index(self.n, b, other)];
+        left == right
+    }
+
+    pub fn does_match_same_string(&self, i: usize, j: usize) -> bool {
+        self.status_matrix[matrix_index(self.n, i, j)].matches_same_string
+    }
+
+    pub fn does_conflict(&self, i: usize, j: usize) -> bool {
+        let entry = &self.status_matrix[matrix_index(self.n, i, j)];
+        entry.does_match_valid_continuation || entry.does_match_separators
+    }
+
+    pub fn does_overlap(&self, i: usize, j: usize) -> bool {
+        self.status_matrix[matrix_index(self.n, i, j)].does_overlap
+    }
+
+    pub fn prefer_token(grammar: &LexicalGrammar, left: (i32, usize), right: (i32, usize)) -> bool {
+        if left.0 > right.0 {
+            return true;
+        } else if left.0 < right.0 {
+            return false;
+        }
+
+        match grammar.variables[left.1]
+            .implicit_precedence
+            .cmp(&grammar.variables[right.1].implicit_precedence)
+        {
+            Ordering::Less => false,
+            Ordering::Greater => true,
+            Ordering::Equal => left.1 < right.1,
+        }
+    }
+}
+
+impl<'a> fmt::Debug for TokenConflictMap<'a> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "TokenConflictMap {{\n")?;
+
+        write!(f, "  starting_characters: {{\n")?;
+        for i in 0..self.n {
+            write!(f, "    {}: {:?},\n", i, self.starting_chars_by_index[i])?;
+        }
+        write!(f, "  }},\n")?;
+
+        write!(f, "  following_characters: {{\n")?;
+        for i in 0..self.n {
+            write!(
+                f,
+                "    {}: {:?},\n",
+                self.grammar.variables[i].name, self.following_chars_by_index[i]
+            )?;
+        }
+        write!(f, "  }},\n")?;
+
+        write!(f, "  status_matrix: {{\n")?;
+        for i in 0..self.n {
+            write!(f, "    {}: {{\n", self.grammar.variables[i].name)?;
+            for j in 0..self.n {
+                write!(
+                    f,
+                    "      {}: {:?},\n",
+                    self.grammar.variables[j].name,
+                    self.status_matrix[matrix_index(self.n, i, j)]
+                )?;
+            }
+            write!(f, "    }},\n")?;
+        }
+        write!(f, "  }},")?;
+        write!(f, "}}")?;
+        Ok(())
+    }
+}
+
+fn matrix_index(variable_count: usize, i: usize, j: usize) -> usize {
+    variable_count * i + j
+}
+
+fn get_starting_chars(cursor: &mut NfaCursor, grammar: &LexicalGrammar) -> Vec<CharacterSet> {
+    let mut result = Vec::with_capacity(grammar.variables.len());
+    for variable in &grammar.variables {
+        cursor.reset(vec![variable.start_state]);
+        let mut all_chars = CharacterSet::empty();
+        for (chars, _) in cursor.transition_chars() {
+            all_chars = all_chars.add(chars);
+        }
+        result.push(all_chars);
+    }
+    result
+}
+
+fn get_following_chars(
+    starting_chars: &Vec<CharacterSet>,
+    following_tokens: Vec<LookaheadSet>,
+) -> Vec<CharacterSet> {
+    following_tokens
+        .into_iter()
+        .map(|following_tokens| {
+            let mut chars = CharacterSet::empty();
+            for token in following_tokens.iter() {
+                if token.is_terminal() {
+                    chars = chars.add(&starting_chars[token.index]);
+                }
+            }
+            chars
+        })
+        .collect()
+}
+
+fn compute_conflict_status(
+    cursor: &mut NfaCursor,
+    grammar: &LexicalGrammar,
+    following_chars: &Vec<CharacterSet>,
+    i: usize,
+    j: usize,
+) -> (TokenConflictStatus, TokenConflictStatus) {
+    let mut visited_state_sets = HashSet::new();
+    let mut state_set_queue = vec![vec![
+        grammar.variables[i].start_state,
+        grammar.variables[j].start_state,
+    ]];
+    let mut result = (
+        TokenConflictStatus::default(),
+        TokenConflictStatus::default(),
+    );
+
+    while let Some(state_set) = state_set_queue.pop() {
+        // Don't pursue states where there's no potential for conflict.
+        if variable_ids_for_states(&state_set, grammar).count() > 1 {
+            cursor.reset(state_set);
+        } else {
+            continue;
+        }
+
+        let mut completion = None;
+        for (id, precedence) in cursor.completions() {
+            if let Some((prev_id, prev_precedence)) = completion {
+                if id == prev_id {
+                    continue;
+                }
+
+                // Prefer tokens with higher precedence. For tokens with equal precedence,
+                // prefer those listed earlier in the grammar.
+                let winning_id;
+                if TokenConflictMap::prefer_token(
+                    grammar,
+                    (prev_precedence, prev_id),
+                    (precedence, id),
+                ) {
+                    winning_id = prev_id;
+                } else {
+                    winning_id = id;
+                    completion = Some((id, precedence));
+                }
+
+                if winning_id == i {
+                    result.0.matches_same_string = true;
+                    result.0.does_overlap = true;
+                } else {
+                    result.1.matches_same_string = true;
+                    result.1.does_overlap = true;
+                }
+            } else {
+                completion = Some((id, precedence));
+            }
+        }
+
+        for NfaTransition {
+            characters,
+            precedence,
+            states,
+            is_separator,
+        } in cursor.transitions()
+        {
+            let mut can_advance = true;
+            if let Some((completed_id, completed_precedence)) = completion {
+                let mut other_id = None;
+                let mut successor_contains_completed_id = false;
+                for variable_id in variable_ids_for_states(&states, grammar) {
+                    if variable_id == completed_id {
+                        successor_contains_completed_id = true;
+                        break;
+                    } else {
+                        other_id = Some(variable_id);
+                    }
+                }
+
+                if let (Some(other_id), false) = (other_id, successor_contains_completed_id) {
+                    let winning_id;
+                    if precedence < completed_precedence {
+                        winning_id = completed_id;
+                        can_advance = false;
+                    } else {
+                        winning_id = other_id;
+                    }
+
+                    if winning_id == i {
+                        result.0.does_overlap = true;
+                        if characters.does_intersect(&following_chars[j]) {
+                            result.0.does_match_valid_continuation = true;
+                        }
+                        if is_separator {
+                            result.0.does_match_separators = true;
+                        }
+                    } else {
+                        result.1.does_overlap = true;
+                        if characters.does_intersect(&following_chars[i]) {
+                            result.1.does_match_valid_continuation = true;
+                        }
+                    }
+                }
+            }
+
+            if can_advance && visited_state_sets.insert(states.clone()) {
+                state_set_queue.push(states);
+            }
+        }
+    }
+    result
+}
+
+fn variable_ids_for_states<'a>(
+    state_ids: &'a Vec<u32>,
+    grammar: &'a LexicalGrammar,
+) -> impl Iterator<Item = usize> + 'a {
+    let mut prev = None;
+    state_ids.iter().filter_map(move |state_id| {
+        let variable_id = grammar.variable_index_for_nfa_state(*state_id);
+        if prev != Some(variable_id) {
+            prev = Some(variable_id);
+            prev
+        } else {
+            None
+        }
+    })
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::grammars::{Variable, VariableType};
+    use crate::prepare_grammar::{expand_tokens, ExtractedLexicalGrammar};
+    use crate::rules::{Rule, Symbol};
+
+    #[test]
+    fn test_starting_characters() {
+        let grammar = expand_tokens(ExtractedLexicalGrammar {
+            separators: Vec::new(),
+            variables: vec![
+                Variable {
+                    name: "token_0".to_string(),
+                    kind: VariableType::Named,
+                    rule: Rule::pattern("[a-f]1|0x\\d"),
+                },
+                Variable {
+                    name: "token_1".to_string(),
+                    kind: VariableType::Named,
+                    rule: Rule::pattern("d*ef"),
+                },
+            ],
+        })
+        .unwrap();
+
+        let token_map = TokenConflictMap::new(&grammar, Vec::new());
+
+        assert_eq!(
+            token_map.starting_chars_by_index[0],
+            CharacterSet::empty().add_range('a', 'f').add_char('0')
+        );
+        assert_eq!(
+            token_map.starting_chars_by_index[1],
+            CharacterSet::empty().add_range('d', 'e')
+        );
+    }
+
+    #[test]
+    fn test_token_conflicts() {
+        let grammar = expand_tokens(ExtractedLexicalGrammar {
+            separators: Vec::new(),
+            variables: vec![
+                Variable {
+                    name: "in".to_string(),
+                    kind: VariableType::Named,
+                    rule: Rule::string("in"),
+                },
+                Variable {
+                    name: "identifier".to_string(),
+                    kind: VariableType::Named,
+                    rule: Rule::pattern("\\w+"),
+                },
+                Variable {
+                    name: "instanceof".to_string(),
+                    kind: VariableType::Named,
+                    rule: Rule::string("instanceof"),
+                },
+            ],
+        })
+        .unwrap();
+
+        let var = |name| index_of_var(&grammar, name);
+
+        let token_map = TokenConflictMap::new(
+            &grammar,
+            vec![
+                LookaheadSet::with([Symbol::terminal(var("identifier"))].iter().cloned()),
+                LookaheadSet::with([Symbol::terminal(var("in"))].iter().cloned()),
+                LookaheadSet::with([Symbol::terminal(var("identifier"))].iter().cloned()),
+            ],
+        );
+
+        // Given the string "in", the `in` token is preferred over the `identifier` token
+        assert!(token_map.does_match_same_string(var("in"), var("identifier")));
+        assert!(!token_map.does_match_same_string(var("identifier"), var("in")));
+
+        // Depending on what character follows, the string "in" may be treated as part of an
+        // `identifier` token.
+        assert!(token_map.does_conflict(var("identifier"), var("in")));
+
+        // Depending on what character follows, the string "instanceof" may be treated as part of
+        // an `identifier` token.
+        assert!(token_map.does_conflict(var("identifier"), var("instanceof")));
+        assert!(token_map.does_conflict(var("instanceof"), var("in")));
+    }
+
+    fn index_of_var(grammar: &LexicalGrammar, name: &str) -> usize {
+        grammar
+            .variables
+            .iter()
+            .position(|v| v.name == name)
+            .unwrap()
+    }
+}
--- a/cli/src/error.rs
+++ b/cli/src/error.rs
@ -0,0 +1,24 @@
+#[derive(Debug)]
+pub struct Error(pub String);
+
+pub type Result<T> = std::result::Result<T, Error>;
+
+impl Error {
+    pub fn grammar(message: &str) -> Self {
+        Error(format!("Grammar error: {}", message))
+    }
+
+    pub fn regex(message: &str) -> Self {
+        Error(format!("Regex error: {}", message))
+    }
+
+    pub fn undefined_symbol(name: &str) -> Self {
+        Error(format!("Undefined symbol `{}`", name))
+    }
+}
+
+impl From<serde_json::Error> for Error {
+    fn from(error: serde_json::Error) -> Self {
+        Error(error.to_string())
+    }
+}
--- a/cli/src/generate.rs
+++ b/cli/src/generate.rs
@ -0,0 +1,34 @@
+use crate::build_tables::build_tables;
+use crate::error::Result;
+use crate::parse_grammar::parse_grammar;
+use crate::prepare_grammar::prepare_grammar;
+use crate::render::render_c_code;
+
+pub fn generate_parser_for_grammar(
+    input: &str,
+    minimize: bool,
+    state_ids_to_log: Vec<usize>,
+) -> Result<String> {
+    let input_grammar = parse_grammar(input)?;
+    let (syntax_grammar, lexical_grammar, inlines, simple_aliases) =
+        prepare_grammar(&input_grammar)?;
+    let (parse_table, main_lex_table, keyword_lex_table, keyword_capture_token) = build_tables(
+        &syntax_grammar,
+        &lexical_grammar,
+        &simple_aliases,
+        &inlines,
+        minimize,
+        state_ids_to_log,
+    )?;
+    let c_code = render_c_code(
+        &input_grammar.name,
+        parse_table,
+        main_lex_table,
+        keyword_lex_table,
+        keyword_capture_token,
+        syntax_grammar,
+        lexical_grammar,
+        simple_aliases,
+    );
+    Ok(c_code)
+}
--- a/cli/src/grammars.rs
+++ b/cli/src/grammars.rs
@ -0,0 +1,204 @@
+use crate::nfa::Nfa;
+use crate::rules::{Alias, Associativity, Rule, Symbol};
+use hashbrown::HashMap;
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub(crate) enum VariableType {
+    Hidden,
+    Auxiliary,
+    Anonymous,
+    Named,
+}
+
+// Input grammar
+
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub(crate) struct Variable {
+    pub name: String,
+    pub kind: VariableType,
+    pub rule: Rule,
+}
+
+#[derive(Debug, PartialEq, Eq)]
+pub(crate) struct InputGrammar {
+    pub name: String,
+    pub variables: Vec<Variable>,
+    pub extra_tokens: Vec<Rule>,
+    pub expected_conflicts: Vec<Vec<String>>,
+    pub external_tokens: Vec<Rule>,
+    pub variables_to_inline: Vec<String>,
+    pub word_token: Option<String>,
+}
+
+// Extracted lexical grammar
+
+#[derive(Debug, PartialEq, Eq)]
+pub(crate) struct LexicalVariable {
+    pub name: String,
+    pub kind: VariableType,
+    pub implicit_precedence: i32,
+    pub start_state: u32,
+}
+
+#[derive(Debug, Default, PartialEq, Eq)]
+pub(crate) struct LexicalGrammar {
+    pub nfa: Nfa,
+    pub variables: Vec<LexicalVariable>,
+}
+
+// Extracted syntax grammar
+
+#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
+pub(crate) struct ProductionStep {
+    pub symbol: Symbol,
+    pub precedence: i32,
+    pub associativity: Option<Associativity>,
+    pub alias: Option<Alias>,
+}
+
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub(crate) struct Production {
+    pub steps: Vec<ProductionStep>,
+    pub dynamic_precedence: i32,
+}
+
+pub(crate) struct InlinedProductionMap {
+    pub productions: Vec<Production>,
+    pub production_map: HashMap<(*const Production, u32), Vec<usize>>,
+}
+
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub(crate) struct SyntaxVariable {
+    pub name: String,
+    pub kind: VariableType,
+    pub productions: Vec<Production>,
+}
+
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub(crate) struct ExternalToken {
+    pub name: String,
+    pub kind: VariableType,
+    pub corresponding_internal_token: Option<Symbol>,
+}
+
+#[derive(Debug)]
+pub(crate) struct SyntaxGrammar {
+    pub variables: Vec<SyntaxVariable>,
+    pub extra_tokens: Vec<Symbol>,
+    pub expected_conflicts: Vec<Vec<Symbol>>,
+    pub external_tokens: Vec<ExternalToken>,
+    pub variables_to_inline: Vec<Symbol>,
+    pub word_token: Option<Symbol>,
+}
+
+#[cfg(test)]
+impl ProductionStep {
+    pub(crate) fn new(symbol: Symbol) -> Self {
+        Self {
+            symbol,
+            precedence: 0,
+            associativity: None,
+            alias: None,
+        }
+    }
+
+    pub(crate) fn with_prec(self, precedence: i32, associativity: Option<Associativity>) -> Self {
+        Self {
+            symbol: self.symbol,
+            precedence,
+            associativity,
+            alias: self.alias,
+        }
+    }
+
+    pub(crate) fn with_alias(self, value: &str, is_named: bool) -> Self {
+        Self {
+            symbol: self.symbol,
+            precedence: self.precedence,
+            associativity: self.associativity,
+            alias: Some(Alias {
+                value: value.to_string(),
+                is_named,
+            }),
+        }
+    }
+}
+
+impl Production {
+    pub fn first_symbol(&self) -> Option<Symbol> {
+        self.steps.first().map(|s| s.symbol.clone())
+    }
+}
+
+impl Default for Production {
+    fn default() -> Self {
+        Production {
+            dynamic_precedence: 0,
+            steps: Vec::new(),
+        }
+    }
+}
+
+#[cfg(test)]
+impl Variable {
+    pub fn named(name: &str, rule: Rule) -> Self {
+        Self {
+            name: name.to_string(),
+            kind: VariableType::Named,
+            rule,
+        }
+    }
+
+    pub fn auxiliary(name: &str, rule: Rule) -> Self {
+        Self {
+            name: name.to_string(),
+            kind: VariableType::Auxiliary,
+            rule,
+        }
+    }
+
+    pub fn hidden(name: &str, rule: Rule) -> Self {
+        Self {
+            name: name.to_string(),
+            kind: VariableType::Hidden,
+            rule,
+        }
+    }
+
+    pub fn anonymous(name: &str, rule: Rule) -> Self {
+        Self {
+            name: name.to_string(),
+            kind: VariableType::Anonymous,
+            rule,
+        }
+    }
+}
+
+impl LexicalGrammar {
+    pub fn variable_index_for_nfa_state(&self, state_id: u32) -> usize {
+        self.variables.iter().position(|v| v.start_state >= state_id).unwrap()
+    }
+}
+
+impl SyntaxVariable {
+    pub fn is_auxiliary(&self) -> bool {
+        self.kind == VariableType::Auxiliary
+    }
+}
+
+impl InlinedProductionMap {
+    pub fn inlined_productions<'a>(
+        &'a self,
+        production: &Production,
+        step_index: u32,
+    ) -> Option<impl Iterator<Item = &'a Production> + 'a> {
+        self.production_map
+            .get(&(production as *const Production, step_index))
+            .map(|production_indices| {
+                production_indices
+                    .iter()
+                    .cloned()
+                    .map(move |index| &self.productions[index])
+            })
+    }
+}
--- a/cli/src/js/dsl.js
+++ b/cli/src/js/dsl.js
@ -0,0 +1,334 @@
+const UNICODE_ESCAPE_PATTERN = /\\u([0-9a-f]{4})/gi;
+const DELIMITER_ESCAPE_PATTERN = /\\\//g;
+
+function alias(rule, value) {
+  const result = {
+    type: "ALIAS",
+    content: normalize(rule),
+    named: false,
+    value: null
+  };
+
+  switch (value.constructor) {
+    case String:
+      result.named = false;
+      result.value = value;
+      return result;
+    case ReferenceError:
+      result.named = true;
+      result.value = value.symbol.name;
+      return result;
+    case Object:
+      if (typeof value.type === 'string' && value.type === 'SYMBOL') {
+        result.named = true;
+        result.value = value.name;
+        return result;
+      }
+  }
+
+  throw new Error('Invalid alias value ' + value);
+}
+
+function blank() {
+  return {
+    type: "BLANK"
+  };
+}
+
+function choice(...elements) {
+  return {
+    type: "CHOICE",
+    members: elements.map(normalize)
+  };
+}
+
+function optional(value) {
+  return choice(value, blank());
+}
+
+function prec(number, rule) {
+  if (rule == null) {
+    rule = number;
+    number = 0;
+  }
+
+  return {
+    type: "PREC",
+    value: number,
+    content: normalize(rule)
+  };
+}
+
+prec.left = function(number, rule) {
+  if (rule == null) {
+    rule = number;
+    number = 0;
+  }
+
+  return {
+    type: "PREC_LEFT",
+    value: number,
+    content: normalize(rule)
+  };
+}
+
+prec.right = function(number, rule) {
+  if (rule == null) {
+    rule = number;
+    number = 0;
+  }
+
+  return {
+    type: "PREC_RIGHT",
+    value: number,
+    content: normalize(rule)
+  };
+}
+
+prec.dynamic = function(number, rule) {
+  return {
+    type: "PREC_DYNAMIC",
+    value: number,
+    content: normalize(rule)
+  };
+}
+
+function repeat(rule) {
+  return {
+    type: "REPEAT",
+    content: normalize(rule)
+  };
+}
+
+function repeat1(rule) {
+  return {
+    type: "REPEAT1",
+    content: normalize(rule)
+  };
+}
+
+function seq(...elements) {
+  return {
+    type: "SEQ",
+    members: elements.map(normalize)
+  };
+}
+
+function sym(name) {
+  return {
+    type: "SYMBOL",
+    name: name
+  };
+}
+
+function token(value) {
+  return {
+    type: "TOKEN",
+    content: normalize(value)
+  };
+}
+
+token.immediate = function(value) {
+  return {
+    type: "IMMEDIATE_TOKEN",
+    content: normalize(value)
+  };
+}
+
+function normalize(value) {
+
+  if (typeof value == "undefined")
+    throw new Error("Undefined symbol");
+
+  switch (value.constructor) {
+    case String:
+      return {
+        type: 'STRING',
+        value
+      };
+    case RegExp:
+      return {
+          type: 'PATTERN',
+          value: value.source
+            .replace(
+              DELIMITER_ESCAPE_PATTERN,
+              '/'
+            )
+            .replace(
+              UNICODE_ESCAPE_PATTERN,
+              (match, group) => String.fromCharCode(parseInt(group, 16))
+            )
+      };
+    case ReferenceError:
+      throw value
+    default:
+      if (typeof value.type === 'string') {
+        return value;
+      } else {
+        throw new TypeError("Invalid rule: " + value.toString());
+      }
+  }
+}
+
+function RuleBuilder(ruleMap) {
+  return new Proxy({}, {
+    get(target, propertyName) {
+      const symbol = {
+        type: 'SYMBOL',
+        name: propertyName
+      };
+
+      if (!ruleMap || ruleMap.hasOwnProperty(propertyName)) {
+        return symbol;
+      } else {
+        const error = new ReferenceError(`Undefined symbol '${propertyName}'`);
+        error.symbol = symbol;
+        return error;
+      }
+    }
+  })
+}
+
+function grammar(baseGrammar, options) {
+    if (!options) {
+      options = baseGrammar;
+      baseGrammar = {
+        name: null,
+        rules: {},
+        extras: [normalize(/\s/)],
+        conflicts: [],
+        externals: [],
+        inline: []
+      };
+    }
+
+    let externals = baseGrammar.externals;
+    if (options.externals) {
+      if (typeof options.externals !== "function") {
+        throw new Error("Grammar's 'externals' property must be a function.");
+      }
+
+      const externalsRuleBuilder = RuleBuilder(null)
+      const externalRules = options.externals.call(externalsRuleBuilder, externalsRuleBuilder, baseGrammar.externals);
+
+      if (!Array.isArray(externalRules)) {
+        throw new Error("Grammar's 'externals' property must return an array of rules.");
+      }
+
+      externals = externalRules.map(normalize);
+    }
+
+    const ruleMap = {};
+    for (const key in options.rules) {
+      ruleMap[key] = true;
+    }
+    for (const key in baseGrammar.rules) {
+      ruleMap[key] = true;
+    }
+    for (const external of externals) {
+      if (typeof external.name === 'string') {
+        ruleMap[external.name] = true;
+      }
+    }
+
+    const ruleBuilder = RuleBuilder(ruleMap);
+
+    const name = options.name;
+    if (typeof name !== "string") {
+      throw new Error("Grammar's 'name' property must be a string.");
+    }
+
+    if (!/^[a-zA-Z_]\w*$/.test(name)) {
+      throw new Error("Grammar's 'name' property must not start with a digit and cannot contain non-word characters.");
+    }
+
+    let rules = Object.assign({}, baseGrammar.rules);
+    if (options.rules) {
+      if (typeof options.rules !== "object") {
+        throw new Error("Grammar's 'rules' property must be an object.");
+      }
+
+      for (const ruleName in options.rules) {
+        const ruleFn = options.rules[ruleName];
+        if (typeof ruleFn !== "function") {
+          throw new Error("Grammar rules must all be functions. '" + ruleName + "' rule is not.");
+        }
+        rules[ruleName] = normalize(ruleFn.call(ruleBuilder, ruleBuilder, baseGrammar.rules[ruleName]));
+      }
+    }
+
+    let extras = baseGrammar.extras.slice();
+    if (options.extras) {
+      if (typeof options.extras !== "function") {
+        throw new Error("Grammar's 'extras' property must be a function.");
+      }
+
+      extras = options.extras
+        .call(ruleBuilder, ruleBuilder, baseGrammar.extras)
+        .map(normalize);
+    }
+
+    let word = baseGrammar.word;
+    if (options.word) {
+      word = options.word.call(ruleBuilder, ruleBuilder).name;
+      if (typeof word != 'string') {
+        throw new Error("Grammar's 'word' property must be a named rule.");
+      }
+    }
+
+    let conflicts = baseGrammar.conflicts;
+    if (options.conflicts) {
+      if (typeof options.conflicts !== "function") {
+        throw new Error("Grammar's 'conflicts' property must be a function.");
+      }
+
+      const baseConflictRules = baseGrammar.conflicts.map(conflict => conflict.map(sym));
+      const conflictRules = options.conflicts.call(ruleBuilder, ruleBuilder, baseConflictRules);
+
+      if (!Array.isArray(conflictRules)) {
+        throw new Error("Grammar's conflicts must be an array of arrays of rules.");
+      }
+
+      conflicts = conflictRules.map(conflictSet => {
+        if (!Array.isArray(conflictSet)) {
+          throw new Error("Grammar's conflicts must be an array of arrays of rules.");
+        }
+
+        return conflictSet.map(symbol => symbol.name);
+      });
+    }
+
+    let inline = baseGrammar.inline;
+    if (options.inline) {
+      if (typeof options.inline !== "function") {
+        throw new Error("Grammar's 'inline' property must be a function.");
+      }
+
+      const baseInlineRules = baseGrammar.inline.map(sym);
+      const inlineRules = options.inline.call(ruleBuilder, ruleBuilder, baseInlineRules);
+
+      if (!Array.isArray(inlineRules)) {
+        throw new Error("Grammar's inline must be an array of rules.");
+      }
+
+      inline = inlineRules.map(symbol => symbol.name);
+    }
+
+    if (Object.keys(rules).length == 0) {
+      throw new Error("Grammar must have at least one rule.");
+    }
+
+    return {name, word, rules, extras, conflicts, externals, inline};
+  }
+
+global.alias = alias;
+global.blank = blank;
+global.choice = choice;
+global.optional = optional;
+global.prec = prec;
+global.repeat = repeat;
+global.repeat1 = repeat1;
+global.seq = seq;
+global.sym = sym;
+global.token = token;
+global.grammar = grammar;
--- a/cli/src/logger.rs
+++ b/cli/src/logger.rs
@ -0,0 +1,29 @@
+use log::{LevelFilter, Log, Metadata, Record};
+
+struct Logger {
+    pub filter: Option<String>,
+}
+
+impl Log for Logger {
+    fn enabled(&self, _: &Metadata) -> bool {
+        true
+    }
+
+    fn log(&self, record: &Record) {
+        eprintln!(
+            "[{}] {}",
+            record
+                .module_path()
+                .unwrap_or_default()
+                .trim_start_matches("rust_tree_sitter_cli::"),
+            record.args()
+        );
+    }
+
+    fn flush(&self) {}
+}
+
+pub(crate) fn init() {
+    log::set_boxed_logger(Box::new(Logger { filter: None })).unwrap();
+    log::set_max_level(LevelFilter::Info);
+}
--- a/cli/src/main.rs
+++ b/cli/src/main.rs
@ -0,0 +1,119 @@
+#[macro_use]
+extern crate lazy_static;
+#[macro_use]
+extern crate log;
+#[macro_use]
+extern crate serde_derive;
+extern crate hashbrown;
+extern crate serde_json;
+
+use clap::{App, Arg, SubCommand};
+use std::env;
+use std::io::Write;
+use std::path::PathBuf;
+use std::process::{exit, Command, Stdio};
+use std::usize;
+
+mod build_tables;
+mod error;
+mod generate;
+mod grammars;
+mod logger;
+mod nfa;
+mod parse_grammar;
+mod prepare_grammar;
+mod render;
+mod rules;
+mod tables;
+
+fn main() {
+    if let Err(e) = run() {
+        eprintln!("{}", e.0);
+        exit(1);
+    }
+}
+
+fn run() -> error::Result<()> {
+    let matches = App::new("tree-sitter")
+        .version("0.1")
+        .author("Max Brunsfeld <maxbrunsfeld@gmail.com>")
+        .about("Generates and tests parsers")
+        .subcommand(
+            SubCommand::with_name("generate")
+                .about("Generate a parser")
+                .arg(Arg::with_name("log").long("log"))
+                .arg(
+                    Arg::with_name("state-ids-to-log")
+                        .long("log-state")
+                        .takes_value(true),
+                )
+                .arg(Arg::with_name("no-minimize").long("no-minimize")),
+        )
+        .subcommand(
+            SubCommand::with_name("parse")
+                .about("Parse a file")
+                .arg(Arg::with_name("path").index(1)),
+        )
+        .subcommand(
+            SubCommand::with_name("test")
+                .about("Run a parser's tests")
+                .arg(Arg::with_name("path").index(1).required(true))
+                .arg(Arg::with_name("line").index(2).required(true))
+                .arg(Arg::with_name("column").index(3).required(true)),
+        )
+        .get_matches();
+
+    if let Some(matches) = matches.subcommand_matches("generate") {
+        if matches.is_present("log") {
+            logger::init();
+        }
+
+        let minimize = !matches.is_present("no-minimize");
+        let state_ids_to_log = matches
+            .values_of("state-ids-to-log")
+            .map_or(Vec::new(), |ids| {
+                ids.filter_map(|id| usize::from_str_radix(id, 10).ok())
+                    .collect()
+            });
+        let mut grammar_path = env::current_dir().expect("Failed to read CWD");
+        grammar_path.push("grammar.js");
+        let grammar_json = load_js_grammar_file(grammar_path);
+        let code =
+            generate::generate_parser_for_grammar(&grammar_json, minimize, state_ids_to_log)?;
+        println!("{}", code);
+    }
+
+    Ok(())
+}
+
+fn load_js_grammar_file(grammar_path: PathBuf) -> String {
+    let mut node_process = Command::new("node")
+        .stdin(Stdio::piped())
+        .stdout(Stdio::piped())
+        .spawn()
+        .expect("Failed to run `node`");
+
+    let js_prelude = include_str!("./js/dsl.js");
+    let mut node_stdin = node_process
+        .stdin
+        .take()
+        .expect("Failed to open stdin for node");
+    write!(
+        node_stdin,
+        "{}\nconsole.log(JSON.stringify(require(\"{}\"), null, 2));\n",
+        js_prelude,
+        grammar_path.to_str().unwrap()
+    )
+    .expect("Failed to write to node's stdin");
+    drop(node_stdin);
+    let output = node_process
+        .wait_with_output()
+        .expect("Failed to read output from node");
+    match output.status.code() {
+        None => panic!("Node process was killed"),
+        Some(0) => {}
+        Some(code) => panic!(format!("Node process exited with status {}", code)),
+    }
+
+    String::from_utf8(output.stdout).expect("Got invalid UTF8 from node")
+}
--- a/cli/src/nfa.rs
+++ b/cli/src/nfa.rs
@ -0,0 +1,771 @@
+use std::char;
+use std::cmp::max;
+use std::cmp::Ordering;
+use std::fmt;
+use std::mem::swap;
+
+#[derive(Clone, Debug, PartialEq, Eq, Hash)]
+pub enum CharacterSet {
+    Include(Vec<char>),
+    Exclude(Vec<char>),
+}
+
+#[derive(Debug, PartialEq, Eq)]
+pub enum NfaState {
+    Advance {
+        chars: CharacterSet,
+        state_id: u32,
+        is_sep: bool,
+        precedence: i32,
+    },
+    Split(u32, u32),
+    Accept {
+        variable_index: usize,
+        precedence: i32,
+    },
+}
+
+#[derive(PartialEq, Eq)]
+pub struct Nfa {
+    pub states: Vec<NfaState>,
+}
+
+#[derive(Debug)]
+pub struct NfaCursor<'a> {
+    pub(crate) state_ids: Vec<u32>,
+    nfa: &'a Nfa,
+}
+
+#[derive(Debug, PartialEq, Eq)]
+pub struct NfaTransition {
+    pub characters: CharacterSet,
+    pub is_separator: bool,
+    pub precedence: i32,
+    pub states: Vec<u32>,
+}
+
+impl Default for Nfa {
+    fn default() -> Self {
+        Self { states: Vec::new() }
+    }
+}
+
+impl CharacterSet {
+    pub fn empty() -> Self {
+        CharacterSet::Include(Vec::new())
+    }
+
+    pub fn all() -> Self {
+        CharacterSet::Exclude(Vec::new())
+    }
+
+    pub fn negate(self) -> CharacterSet {
+        match self {
+            CharacterSet::Include(chars) => CharacterSet::Exclude(chars),
+            CharacterSet::Exclude(chars) => CharacterSet::Include(chars),
+        }
+    }
+
+    pub fn add_char(self, c: char) -> Self {
+        if let CharacterSet::Include(mut chars) = self {
+            if let Err(i) = chars.binary_search(&c) {
+                chars.insert(i, c);
+            }
+            CharacterSet::Include(chars)
+        } else {
+            panic!("Called add with a negated character set");
+        }
+    }
+
+    pub fn add_range(self, start: char, end: char) -> Self {
+        if let CharacterSet::Include(mut chars) = self {
+            let mut c = start as u32;
+            while c <= end as u32 {
+                chars.push(char::from_u32(c).unwrap());
+                c += 1;
+            }
+            chars.sort_unstable();
+            chars.dedup();
+            CharacterSet::Include(chars)
+        } else {
+            panic!("Called add with a negated character set");
+        }
+    }
+
+    pub fn add(self, other: &CharacterSet) -> Self {
+        match self {
+            CharacterSet::Include(mut chars) => match other {
+                CharacterSet::Include(other_chars) => {
+                    chars.extend(other_chars);
+                    chars.sort_unstable();
+                    chars.dedup();
+                    CharacterSet::Include(chars)
+                }
+                CharacterSet::Exclude(other_chars) => {
+                    let excluded_chars = other_chars
+                        .iter()
+                        .cloned()
+                        .filter(|c| !chars.contains(&c))
+                        .collect();
+                    CharacterSet::Exclude(excluded_chars)
+                }
+            },
+            CharacterSet::Exclude(mut chars) => match other {
+                CharacterSet::Include(other_chars) => {
+                    chars.retain(|c| !other_chars.contains(&c));
+                    CharacterSet::Exclude(chars)
+                }
+                CharacterSet::Exclude(other_chars) => {
+                    chars.retain(|c| other_chars.contains(&c));
+                    CharacterSet::Exclude(chars)
+                }
+            },
+        }
+    }
+
+    pub fn does_intersect(&self, other: &CharacterSet) -> bool {
+        match self {
+            CharacterSet::Include(chars) => match other {
+                CharacterSet::Include(other_chars) => compare_chars(chars, other_chars).common,
+                CharacterSet::Exclude(other_chars) => compare_chars(chars, other_chars).left_only,
+            },
+            CharacterSet::Exclude(chars) => match other {
+                CharacterSet::Include(other_chars) => compare_chars(chars, other_chars).right_only,
+                CharacterSet::Exclude(_) => true,
+            },
+        }
+    }
+
+    pub fn remove_intersection(&mut self, other: &mut CharacterSet) -> CharacterSet {
+        match self {
+            CharacterSet::Include(chars) => match other {
+                CharacterSet::Include(other_chars) => {
+                    CharacterSet::Include(remove_chars(chars, other_chars, true))
+                }
+                CharacterSet::Exclude(other_chars) => {
+                    let mut removed = remove_chars(chars, other_chars, false);
+                    add_chars(other_chars, chars);
+                    swap(&mut removed, chars);
+                    CharacterSet::Include(removed)
+                }
+            },
+            CharacterSet::Exclude(chars) => match other {
+                CharacterSet::Include(other_chars) => {
+                    let mut removed = remove_chars(other_chars, chars, false);
+                    add_chars(chars, other_chars);
+                    swap(&mut removed, other_chars);
+                    CharacterSet::Include(removed)
+                }
+                CharacterSet::Exclude(other_chars) => {
+                    let mut result_exclusion = chars.clone();
+                    result_exclusion.extend(other_chars.iter().cloned());
+                    result_exclusion.sort_unstable();
+                    result_exclusion.dedup();
+                    remove_chars(chars, other_chars, true);
+                    let mut included_characters = Vec::new();
+                    let mut other_included_characters = Vec::new();
+                    swap(&mut included_characters, other_chars);
+                    swap(&mut other_included_characters, chars);
+                    *self = CharacterSet::Include(included_characters);
+                    *other = CharacterSet::Include(other_included_characters);
+                    CharacterSet::Exclude(result_exclusion)
+                }
+            },
+        }
+    }
+
+    pub fn is_empty(&self) -> bool {
+        if let CharacterSet::Include(c) = self {
+            c.is_empty()
+        } else {
+            false
+        }
+    }
+
+    pub fn contains(&self, c: char) -> bool {
+        match self {
+            CharacterSet::Include(chars) => chars.contains(&c),
+            CharacterSet::Exclude(chars) => !chars.contains(&c),
+        }
+    }
+}
+
+impl Ord for CharacterSet {
+    fn cmp(&self, other: &CharacterSet) -> Ordering {
+        match self {
+            CharacterSet::Include(chars) => {
+                if let CharacterSet::Include(other_chars) = other {
+                    order_chars(chars, other_chars)
+                } else {
+                    Ordering::Less
+                }
+            }
+            CharacterSet::Exclude(chars) => {
+                if let CharacterSet::Exclude(other_chars) = other {
+                    order_chars(chars, other_chars)
+                } else {
+                    Ordering::Greater
+                }
+            }
+        }
+    }
+}
+
+impl PartialOrd for CharacterSet {
+    fn partial_cmp(&self, other: &CharacterSet) -> Option<Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+fn add_chars(left: &mut Vec<char>, right: &Vec<char>) {
+    for c in right {
+        match left.binary_search(c) {
+            Err(i) => left.insert(i, *c),
+            _ => {}
+        }
+    }
+}
+
+fn remove_chars(left: &mut Vec<char>, right: &mut Vec<char>, mutate_right: bool) -> Vec<char> {
+    let mut result = Vec::new();
+    right.retain(|right_char| {
+        if let Some(index) = left.iter().position(|left_char| *left_char == *right_char) {
+            left.remove(index);
+            result.push(*right_char);
+            false || !mutate_right
+        } else {
+            true
+        }
+    });
+    result
+}
+
+struct SetComparision {
+    left_only: bool,
+    common: bool,
+    right_only: bool,
+}
+
+fn compare_chars(left: &Vec<char>, right: &Vec<char>) -> SetComparision {
+    let mut result = SetComparision {
+        left_only: false,
+        common: false,
+        right_only: false,
+    };
+    let mut left = left.iter().cloned();
+    let mut right = right.iter().cloned();
+    let mut i = left.next();
+    let mut j = right.next();
+    while let (Some(left_char), Some(right_char)) = (i, j) {
+        if left_char < right_char {
+            i = left.next();
+            result.left_only = true;
+        } else if left_char > right_char {
+            j = right.next();
+            result.right_only = true;
+        } else {
+            i = left.next();
+            j = right.next();
+            result.common = true;
+        }
+    }
+    result
+}
+
+fn order_chars(chars: &Vec<char>, other_chars: &Vec<char>) -> Ordering {
+    if chars.is_empty() {
+        if other_chars.is_empty() {
+            Ordering::Equal
+        } else {
+            Ordering::Less
+        }
+    } else if other_chars.is_empty() {
+        Ordering::Greater
+    } else {
+        let cmp = chars.len().cmp(&other_chars.len());
+        if cmp != Ordering::Equal {
+            return cmp;
+        }
+        for (c, other_c) in chars.iter().zip(other_chars.iter()) {
+            let cmp = c.cmp(other_c);
+            if cmp != Ordering::Equal {
+                return cmp;
+            }
+        }
+        Ordering::Equal
+    }
+}
+
+impl Nfa {
+    pub fn new() -> Self {
+        Nfa { states: Vec::new() }
+    }
+
+    pub fn last_state_id(&self) -> u32 {
+        self.states.len() as u32 - 1
+    }
+}
+
+impl fmt::Debug for Nfa {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "Nfa {{ states: {{\n")?;
+        for (i, state) in self.states.iter().enumerate() {
+            write!(f, "  {}: {:?},\n", i, state)?;
+        }
+        write!(f, "}} }}")?;
+        Ok(())
+    }
+}
+
+impl<'a> NfaCursor<'a> {
+    pub fn new(nfa: &'a Nfa, mut states: Vec<u32>) -> Self {
+        let mut result = Self {
+            nfa,
+            state_ids: Vec::new(),
+        };
+        result.add_states(&mut states);
+        result
+    }
+
+    pub fn reset(&mut self, mut states: Vec<u32>) {
+        self.state_ids.clear();
+        self.add_states(&mut states);
+    }
+
+    pub fn force_reset(&mut self, states: Vec<u32>) {
+        self.state_ids = states
+    }
+
+    pub fn transition_chars(&self) -> impl Iterator<Item = (&CharacterSet, bool)> {
+        self.raw_transitions().map(|t| (t.0, t.1))
+    }
+
+    pub fn transitions(&self) -> Vec<NfaTransition> {
+        Self::group_transitions(self.raw_transitions())
+    }
+
+    fn raw_transitions(&self) -> impl Iterator<Item = (&CharacterSet, bool, i32, u32)> {
+        self.state_ids.iter().filter_map(move |id| {
+            if let NfaState::Advance {
+                chars,
+                state_id,
+                precedence,
+                is_sep,
+            } = &self.nfa.states[*id as usize]
+            {
+                Some((chars, *is_sep, *precedence, *state_id))
+            } else {
+                None
+            }
+        })
+    }
+
+    fn group_transitions<'b>(
+        iter: impl Iterator<Item = (&'b CharacterSet, bool, i32, u32)>,
+    ) -> Vec<NfaTransition> {
+        let mut result: Vec<NfaTransition> = Vec::new();
+        for (chars, is_sep, prec, state) in iter {
+            let mut chars = chars.clone();
+            let mut i = 0;
+            while i < result.len() && !chars.is_empty() {
+                let intersection = result[i].characters.remove_intersection(&mut chars);
+                if !intersection.is_empty() {
+                    let mut intersection_states = result[i].states.clone();
+                    match intersection_states.binary_search(&state) {
+                        Err(j) => intersection_states.insert(j, state),
+                        _ => {}
+                    }
+                    let intersection_transition = NfaTransition {
+                        characters: intersection,
+                        is_separator: result[i].is_separator || is_sep,
+                        precedence: max(result[i].precedence, prec),
+                        states: intersection_states,
+                    };
+                    if result[i].characters.is_empty() {
+                        result[i] = intersection_transition;
+                    } else {
+                        result.insert(i, intersection_transition);
+                        i += 1;
+                    }
+                }
+                i += 1;
+            }
+            if !chars.is_empty() {
+                result.push(NfaTransition {
+                    characters: chars,
+                    precedence: prec,
+                    states: vec![state],
+                    is_separator: is_sep,
+                });
+            }
+        }
+        result.sort_unstable_by(|a, b| a.characters.cmp(&b.characters));
+        result
+    }
+
+    pub fn completions(&self) -> impl Iterator<Item = (usize, i32)> + '_ {
+        self.state_ids.iter().filter_map(move |state_id| {
+            if let NfaState::Accept {
+                variable_index,
+                precedence,
+            } = self.nfa.states[*state_id as usize]
+            {
+                Some((variable_index, precedence))
+            } else {
+                None
+            }
+        })
+    }
+
+    pub fn add_states(&mut self, new_state_ids: &mut Vec<u32>) {
+        let mut i = 0;
+        while i < new_state_ids.len() {
+            let state_id = new_state_ids[i];
+            let state = &self.nfa.states[state_id as usize];
+            if let NfaState::Split(left, right) = state {
+                let mut has_left = false;
+                let mut has_right = false;
+                for new_state_id in new_state_ids.iter() {
+                    if *new_state_id == *left {
+                        has_left = true;
+                    }
+                    if *new_state_id == *right {
+                        has_right = true;
+                    }
+                }
+                if !has_left {
+                    new_state_ids.push(*left);
+                }
+                if !has_right {
+                    new_state_ids.push(*right);
+                }
+            } else if let Err(i) = self.state_ids.binary_search(&state_id) {
+                self.state_ids.insert(i, state_id);
+            }
+            i += 1;
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_group_transitions() {
+        let table = [
+            // overlapping character classes
+            (
+                vec![
+                    (CharacterSet::empty().add_range('a', 'f'), false, 0, 1),
+                    (CharacterSet::empty().add_range('d', 'i'), false, 1, 2),
+                ],
+                vec![
+                    NfaTransition {
+                        characters: CharacterSet::empty().add_range('a', 'c'),
+                        is_separator: false,
+                        precedence: 0,
+                        states: vec![1],
+                    },
+                    NfaTransition {
+                        characters: CharacterSet::empty().add_range('d', 'f'),
+                        is_separator: false,
+                        precedence: 1,
+                        states: vec![1, 2],
+                    },
+                    NfaTransition {
+                        characters: CharacterSet::empty().add_range('g', 'i'),
+                        is_separator: false,
+                        precedence: 1,
+                        states: vec![2],
+                    },
+                ],
+            ),
+            // large character class followed by many individual characters
+            (
+                vec![
+                    (CharacterSet::empty().add_range('a', 'z'), false, 0, 1),
+                    (CharacterSet::empty().add_char('d'), false, 0, 2),
+                    (CharacterSet::empty().add_char('i'), false, 0, 3),
+                    (CharacterSet::empty().add_char('f'), false, 0, 4),
+                ],
+                vec![
+                    NfaTransition {
+                        characters: CharacterSet::empty().add_char('d'),
+                        is_separator: false,
+                        precedence: 0,
+                        states: vec![1, 2],
+                    },
+                    NfaTransition {
+                        characters: CharacterSet::empty().add_char('f'),
+                        is_separator: false,
+                        precedence: 0,
+                        states: vec![1, 4],
+                    },
+                    NfaTransition {
+                        characters: CharacterSet::empty().add_char('i'),
+                        is_separator: false,
+                        precedence: 0,
+                        states: vec![1, 3],
+                    },
+                    NfaTransition {
+                        characters: CharacterSet::empty()
+                            .add_range('a', 'c')
+                            .add_char('e')
+                            .add_range('g', 'h')
+                            .add_range('j', 'z'),
+                        is_separator: false,
+                        precedence: 0,
+                        states: vec![1],
+                    },
+                ],
+            ),
+            // negated character class followed by an individual character
+            (
+                vec![
+                    (CharacterSet::empty().add_char('0'), false, 0, 1),
+                    (CharacterSet::empty().add_char('b'), false, 0, 2),
+                    (
+                        CharacterSet::empty().add_range('a', 'f').negate(),
+                        false,
+                        0,
+                        3,
+                    ),
+                    (CharacterSet::empty().add_char('c'), false, 0, 4),
+                ],
+                vec![
+                    NfaTransition {
+                        characters: CharacterSet::empty().add_char('0'),
+                        precedence: 0,
+                        states: vec![1, 3],
+                        is_separator: false,
+                    },
+                    NfaTransition {
+                        characters: CharacterSet::empty().add_char('b'),
+                        precedence: 0,
+                        states: vec![2],
+                        is_separator: false,
+                    },
+                    NfaTransition {
+                        characters: CharacterSet::empty().add_char('c'),
+                        precedence: 0,
+                        states: vec![4],
+                        is_separator: false,
+                    },
+                    NfaTransition {
+                        characters: CharacterSet::empty()
+                            .add_range('a', 'f')
+                            .add_char('0')
+                            .negate(),
+                        precedence: 0,
+                        states: vec![3],
+                        is_separator: false,
+                    },
+                ],
+            ),
+            // multiple negated character classes
+            (
+                vec![
+                    (CharacterSet::Include(vec!['a']), false, 0, 1),
+                    (CharacterSet::Exclude(vec!['a', 'b', 'c']), false, 0, 2),
+                    (CharacterSet::Include(vec!['g']), false, 0, 6),
+                    (CharacterSet::Exclude(vec!['d', 'e', 'f']), false, 0, 3),
+                    (CharacterSet::Exclude(vec!['g', 'h', 'i']), false, 0, 4),
+                    (CharacterSet::Include(vec!['g']), false, 0, 5),
+                ],
+                vec![
+                    NfaTransition {
+                        characters: CharacterSet::Include(vec!['a']),
+                        precedence: 0,
+                        states: vec![1, 3, 4],
+                        is_separator: false,
+                    },
+                    NfaTransition {
+                        characters: CharacterSet::Include(vec!['g']),
+                        precedence: 0,
+                        states: vec![2, 3, 5, 6],
+                        is_separator: false,
+                    },
+                    NfaTransition {
+                        characters: CharacterSet::Include(vec!['b', 'c']),
+                        precedence: 0,
+                        states: vec![3, 4],
+                        is_separator: false,
+                    },
+                    NfaTransition {
+                        characters: CharacterSet::Include(vec!['h', 'i']),
+                        precedence: 0,
+                        states: vec![2, 3],
+                        is_separator: false,
+                    },
+                    NfaTransition {
+                        characters: CharacterSet::Include(vec!['d', 'e', 'f']),
+                        precedence: 0,
+                        states: vec![2, 4],
+                        is_separator: false,
+                    },
+                    NfaTransition {
+                        characters: CharacterSet::Exclude(vec![
+                            'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i',
+                        ]),
+                        precedence: 0,
+                        states: vec![2, 3, 4],
+                        is_separator: false,
+                    },
+                ],
+            ),
+        ];
+
+        for row in table.iter() {
+            assert_eq!(
+                NfaCursor::group_transitions(row.0.iter().map(|(c, sep, p, s)| (c, *sep, *p, *s))),
+                row.1
+            );
+        }
+    }
+
+    #[test]
+    fn test_character_set_remove_intersection() {
+        // A whitelist and an overlapping whitelist.
+        // Both sets contain 'c', 'd', and 'f'
+        let mut a = CharacterSet::empty().add_range('a', 'f');
+        let mut b = CharacterSet::empty().add_range('c', 'h');
+        assert_eq!(
+            a.remove_intersection(&mut b),
+            CharacterSet::empty().add_range('c', 'f')
+        );
+        assert_eq!(a, CharacterSet::empty().add_range('a', 'b'));
+        assert_eq!(b, CharacterSet::empty().add_range('g', 'h'));
+
+        let mut a = CharacterSet::empty().add_range('a', 'f');
+        let mut b = CharacterSet::empty().add_range('c', 'h');
+        assert_eq!(
+            b.remove_intersection(&mut a),
+            CharacterSet::empty().add_range('c', 'f')
+        );
+        assert_eq!(a, CharacterSet::empty().add_range('a', 'b'));
+        assert_eq!(b, CharacterSet::empty().add_range('g', 'h'));
+
+        // A whitelist and a larger whitelist.
+        let mut a = CharacterSet::empty().add_char('c');
+        let mut b = CharacterSet::empty().add_range('a', 'e');
+        assert_eq!(
+            a.remove_intersection(&mut b),
+            CharacterSet::empty().add_char('c')
+        );
+        assert_eq!(a, CharacterSet::empty());
+        assert_eq!(
+            b,
+            CharacterSet::empty()
+                .add_range('a', 'b')
+                .add_range('d', 'e')
+        );
+
+        let mut a = CharacterSet::empty().add_char('c');
+        let mut b = CharacterSet::empty().add_range('a', 'e');
+        assert_eq!(
+            b.remove_intersection(&mut a),
+            CharacterSet::empty().add_char('c')
+        );
+        assert_eq!(a, CharacterSet::empty());
+        assert_eq!(
+            b,
+            CharacterSet::empty()
+                .add_range('a', 'b')
+                .add_range('d', 'e')
+        );
+
+        // A whitelist and an intersecting blacklist.
+        // Both sets contain 'e', 'f', and 'm'
+        let mut a = CharacterSet::empty()
+            .add_range('c', 'h')
+            .add_range('k', 'm');
+        let mut b = CharacterSet::empty()
+            .add_range('a', 'd')
+            .add_range('g', 'l')
+            .negate();
+        assert_eq!(
+            a.remove_intersection(&mut b),
+            CharacterSet::Include(vec!['e', 'f', 'm'])
+        );
+        assert_eq!(a, CharacterSet::Include(vec!['c', 'd', 'g', 'h', 'k', 'l']));
+        assert_eq!(b, CharacterSet::empty().add_range('a', 'm').negate());
+
+        let mut a = CharacterSet::empty()
+            .add_range('c', 'h')
+            .add_range('k', 'm');
+        let mut b = CharacterSet::empty()
+            .add_range('a', 'd')
+            .add_range('g', 'l')
+            .negate();
+        assert_eq!(
+            b.remove_intersection(&mut a),
+            CharacterSet::Include(vec!['e', 'f', 'm'])
+        );
+        assert_eq!(a, CharacterSet::Include(vec!['c', 'd', 'g', 'h', 'k', 'l']));
+        assert_eq!(b, CharacterSet::empty().add_range('a', 'm').negate());
+
+        // A blacklist and an overlapping blacklist.
+        // Both sets exclude 'c', 'd', and 'e'
+        let mut a = CharacterSet::empty().add_range('a', 'e').negate();
+        let mut b = CharacterSet::empty().add_range('c', 'h').negate();
+        assert_eq!(
+            a.remove_intersection(&mut b),
+            CharacterSet::empty().add_range('a', 'h').negate(),
+        );
+        assert_eq!(a, CharacterSet::Include(vec!['f', 'g', 'h']));
+        assert_eq!(b, CharacterSet::Include(vec!['a', 'b']));
+
+        // A blacklist and a larger blacklist.
+        let mut a = CharacterSet::empty().add_range('b', 'c').negate();
+        let mut b = CharacterSet::empty().add_range('a', 'd').negate();
+        assert_eq!(
+            a.remove_intersection(&mut b),
+            CharacterSet::empty().add_range('a', 'd').negate(),
+        );
+        assert_eq!(a, CharacterSet::empty().add_char('a').add_char('d'));
+        assert_eq!(b, CharacterSet::empty());
+    }
+
+    #[test]
+    fn test_character_set_does_intersect() {
+        let (a, b) = (CharacterSet::empty(), CharacterSet::empty());
+        assert!(!a.does_intersect(&b));
+        assert!(!b.does_intersect(&a));
+
+        let (a, b) = (
+            CharacterSet::empty().add_char('a'),
+            CharacterSet::empty().add_char('a'),
+        );
+        assert!(a.does_intersect(&b));
+        assert!(b.does_intersect(&a));
+
+        let (a, b) = (
+            CharacterSet::empty().add_char('b'),
+            CharacterSet::empty().add_char('a').add_char('c'),
+        );
+        assert!(!a.does_intersect(&b));
+        assert!(!b.does_intersect(&a));
+
+        let (a, b) = (
+            CharacterSet::Include(vec!['b']),
+            CharacterSet::Exclude(vec!['a', 'b', 'c']),
+        );
+        assert!(!a.does_intersect(&b));
+        assert!(!b.does_intersect(&a));
+
+        let (a, b) = (
+            CharacterSet::Include(vec!['b']),
+            CharacterSet::Exclude(vec!['a', 'c']),
+        );
+        assert!(a.does_intersect(&b));
+        assert!(b.does_intersect(&a));
+
+        let (a, b) = (
+            CharacterSet::Exclude(vec!['a']),
+            CharacterSet::Exclude(vec!['a']),
+        );
+        assert!(a.does_intersect(&b));
+        assert!(b.does_intersect(&a));
+    }
+}
--- a/cli/src/parse_grammar.rs
+++ b/cli/src/parse_grammar.rs
@ -0,0 +1,167 @@
+use serde_json::{Map, Value};
+use crate::error::Result;
+use crate::grammars::{InputGrammar, Variable, VariableType};
+use crate::rules::Rule;
+
+#[derive(Deserialize)]
+#[serde(tag = "type")]
+#[allow(non_camel_case_types)]
+enum RuleJSON {
+    ALIAS {
+        content: Box<RuleJSON>,
+        named: bool,
+        value: String,
+    },
+    BLANK,
+    STRING {
+        value: String,
+    },
+    PATTERN {
+        value: String,
+    },
+    SYMBOL {
+        name: String,
+    },
+    CHOICE {
+        members: Vec<RuleJSON>,
+    },
+    SEQ {
+        members: Vec<RuleJSON>,
+    },
+    REPEAT {
+        content: Box<RuleJSON>,
+    },
+    REPEAT1 {
+        content: Box<RuleJSON>,
+    },
+    PREC_DYNAMIC {
+        value: i32,
+        content: Box<RuleJSON>,
+    },
+    PREC_LEFT {
+        value: i32,
+        content: Box<RuleJSON>,
+    },
+    PREC_RIGHT {
+        value: i32,
+        content: Box<RuleJSON>,
+    },
+    PREC {
+        value: i32,
+        content: Box<RuleJSON>,
+    },
+    TOKEN {
+        content: Box<RuleJSON>,
+    },
+    IMMEDIATE_TOKEN {
+        content: Box<RuleJSON>,
+    },
+}
+
+#[derive(Deserialize)]
+struct GrammarJSON {
+    name: String,
+    rules: Map<String, Value>,
+    conflicts: Option<Vec<Vec<String>>>,
+    externals: Option<Vec<RuleJSON>>,
+    extras: Option<Vec<RuleJSON>>,
+    inline: Option<Vec<String>>,
+    word: Option<String>,
+}
+
+pub(crate) fn parse_grammar(input: &str) -> Result<InputGrammar> {
+    let grammar_json: GrammarJSON = serde_json::from_str(&input)?;
+
+    let mut variables = Vec::with_capacity(grammar_json.rules.len());
+    for (name, value) in grammar_json.rules {
+        variables.push(Variable {
+            name: name.to_owned(),
+            kind: VariableType::Named,
+            rule: parse_rule(serde_json::from_value(value)?),
+        })
+    }
+
+    let extra_tokens = grammar_json.extras
+        .unwrap_or(Vec::new())
+        .into_iter()
+        .map(parse_rule)
+        .collect();
+    let external_tokens = grammar_json.externals
+        .unwrap_or(Vec::new())
+        .into_iter()
+        .map(parse_rule)
+        .collect();
+    let expected_conflicts = grammar_json.conflicts
+        .unwrap_or(Vec::new());
+    let variables_to_inline = grammar_json.inline
+        .unwrap_or(Vec::new());
+
+    Ok(InputGrammar {
+        name: grammar_json.name,
+        word_token: grammar_json.word,
+        variables,
+        extra_tokens,
+        expected_conflicts,
+        external_tokens,
+        variables_to_inline,
+    })
+}
+
+fn parse_rule(json: RuleJSON) -> Rule {
+    match json {
+        RuleJSON::ALIAS { content, value, named } => Rule::alias(parse_rule(*content), value, named),
+        RuleJSON::BLANK => Rule::Blank,
+        RuleJSON::STRING { value } => Rule::String(value),
+        RuleJSON::PATTERN { value } => Rule::Pattern(value),
+        RuleJSON::SYMBOL { name } => Rule::NamedSymbol(name),
+        RuleJSON::CHOICE { members } => Rule::choice(members.into_iter().map(parse_rule).collect()),
+        RuleJSON::SEQ { members } => Rule::seq(members.into_iter().map(parse_rule).collect()),
+        RuleJSON::REPEAT1 { content } => Rule::repeat(parse_rule(*content)),
+        RuleJSON::REPEAT { content } => Rule::choice(vec![Rule::repeat(parse_rule(*content)), Rule::Blank]),
+        RuleJSON::PREC { value, content } => Rule::prec(value, parse_rule(*content)),
+        RuleJSON::PREC_LEFT { value, content } => Rule::prec_left(value, parse_rule(*content)),
+        RuleJSON::PREC_RIGHT { value, content } => Rule::prec_right(value, parse_rule(*content)),
+        RuleJSON::PREC_DYNAMIC { value, content } => Rule::prec_dynamic(value, parse_rule(*content)),
+        RuleJSON::TOKEN { content } => Rule::token(parse_rule(*content)),
+        RuleJSON::IMMEDIATE_TOKEN { content } => Rule::immediate_token(parse_rule(*content)),
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_parse_grammar() {
+        let grammar = parse_grammar(r#"{
+            "name": "my_lang",
+            "rules": {
+                "file": {
+                    "type": "REPEAT1",
+                    "content": {
+                        "type": "SYMBOL",
+                        "name": "statement"
+                    }
+                },
+                "statement": {
+                    "type": "STRING",
+                    "value": "foo"
+                }
+            }
+        }"#).unwrap();
+
+        assert_eq!(grammar.name, "my_lang");
+        assert_eq!(grammar.variables, vec![
+            Variable {
+                name: "file".to_string(),
+                kind: VariableType::Named,
+                rule: Rule::repeat(Rule::NamedSymbol("statement".to_string()))
+            },
+            Variable {
+                name: "statement".to_string(),
+                kind: VariableType::Named,
+                rule: Rule::String("foo".to_string())
+            },
+        ]);
+    }
+}
--- a/cli/src/prepare_grammar/expand_repeats.rs
+++ b/cli/src/prepare_grammar/expand_repeats.rs
@ -0,0 +1,241 @@
+use super::ExtractedSyntaxGrammar;
+use crate::grammars::{Variable, VariableType};
+use crate::rules::{Rule, Symbol};
+use hashbrown::HashMap;
+use std::mem;
+
+struct Expander {
+    variable_name: String,
+    repeat_count_in_variable: usize,
+    preceding_symbol_count: usize,
+    auxiliary_variables: Vec<Variable>,
+    existing_repeats: HashMap<Rule, Symbol>,
+}
+
+impl Expander {
+    fn expand_variable(&mut self, variable: &mut Variable) {
+        self.variable_name.clear();
+        self.variable_name.push_str(&variable.name);
+        self.repeat_count_in_variable = 0;
+        let mut rule = Rule::Blank;
+        mem::swap(&mut rule, &mut variable.rule);
+        variable.rule = self.expand_rule(&rule);
+    }
+
+    fn expand_rule(&mut self, rule: &Rule) -> Rule {
+        match rule {
+            Rule::Choice(elements) => Rule::Choice(
+                elements
+                    .iter()
+                    .map(|element| self.expand_rule(element))
+                    .collect(),
+            ),
+
+            Rule::Seq(elements) => Rule::Seq(
+                elements
+                    .iter()
+                    .map(|element| self.expand_rule(element))
+                    .collect(),
+            ),
+
+            Rule::Repeat(content) => {
+                let inner_rule = self.expand_rule(content);
+
+                if let Some(existing_symbol) = self.existing_repeats.get(&inner_rule) {
+                    return Rule::Symbol(*existing_symbol);
+                }
+
+                self.repeat_count_in_variable += 1;
+                let rule_name = format!(
+                    "{}_repeat{}",
+                    self.variable_name, self.repeat_count_in_variable
+                );
+                let repeat_symbol = Symbol::non_terminal(
+                    self.preceding_symbol_count + self.auxiliary_variables.len(),
+                );
+                self.existing_repeats
+                    .insert(inner_rule.clone(), repeat_symbol);
+                self.auxiliary_variables.push(Variable {
+                    name: rule_name,
+                    kind: VariableType::Auxiliary,
+                    rule: Rule::Choice(vec![
+                        Rule::Seq(vec![
+                            Rule::Symbol(repeat_symbol),
+                            Rule::Symbol(repeat_symbol),
+                        ]),
+                        inner_rule,
+                    ]),
+                });
+
+                Rule::Symbol(repeat_symbol)
+            }
+
+            Rule::Metadata { rule, params } => Rule::Metadata {
+                rule: Box::new(self.expand_rule(rule)),
+                params: params.clone(),
+            },
+
+            _ => rule.clone(),
+        }
+    }
+}
+
+pub(super) fn expand_repeats(mut grammar: ExtractedSyntaxGrammar) -> ExtractedSyntaxGrammar {
+    let mut expander = Expander {
+        variable_name: String::new(),
+        repeat_count_in_variable: 0,
+        preceding_symbol_count: grammar.variables.len(),
+        auxiliary_variables: Vec::new(),
+        existing_repeats: HashMap::new(),
+    };
+
+    for mut variable in grammar.variables.iter_mut() {
+        expander.expand_variable(&mut variable);
+    }
+
+    grammar
+        .variables
+        .extend(expander.auxiliary_variables.into_iter());
+    grammar
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_basic_repeat_expansion() {
+        // Repeats nested inside of sequences and choices are expanded.
+        let grammar = expand_repeats(build_grammar(vec![Variable::named(
+            "rule0",
+            Rule::seq(vec![
+                Rule::terminal(10),
+                Rule::choice(vec![
+                    Rule::repeat(Rule::terminal(11)),
+                    Rule::repeat(Rule::terminal(12)),
+                ]),
+                Rule::terminal(13),
+            ]),
+        )]));
+
+        assert_eq!(
+            grammar.variables,
+            vec![
+                Variable::named(
+                    "rule0",
+                    Rule::seq(vec![
+                        Rule::terminal(10),
+                        Rule::choice(vec![Rule::non_terminal(1), Rule::non_terminal(2),]),
+                        Rule::terminal(13),
+                    ])
+                ),
+                Variable::auxiliary(
+                    "rule0_repeat1",
+                    Rule::choice(vec![
+                        Rule::seq(vec![Rule::non_terminal(1), Rule::non_terminal(1),]),
+                        Rule::terminal(11),
+                    ])
+                ),
+                Variable::auxiliary(
+                    "rule0_repeat2",
+                    Rule::choice(vec![
+                        Rule::seq(vec![Rule::non_terminal(2), Rule::non_terminal(2),]),
+                        Rule::terminal(12),
+                    ])
+                ),
+            ]
+        );
+    }
+
+    #[test]
+    fn test_repeat_deduplication() {
+        // Terminal 4 appears inside of a repeat in three different places.
+        let grammar = expand_repeats(build_grammar(vec![
+            Variable::named(
+                "rule0",
+                Rule::choice(vec![
+                    Rule::seq(vec![Rule::terminal(1), Rule::repeat(Rule::terminal(4))]),
+                    Rule::seq(vec![Rule::terminal(2), Rule::repeat(Rule::terminal(4))]),
+                ]),
+            ),
+            Variable::named(
+                "rule1",
+                Rule::seq(vec![Rule::terminal(3), Rule::repeat(Rule::terminal(4))]),
+            ),
+        ]));
+
+        // Only one auxiliary rule is created for repeating terminal 4.
+        assert_eq!(
+            grammar.variables,
+            vec![
+                Variable::named(
+                    "rule0",
+                    Rule::choice(vec![
+                        Rule::seq(vec![Rule::terminal(1), Rule::non_terminal(2)]),
+                        Rule::seq(vec![Rule::terminal(2), Rule::non_terminal(2)]),
+                    ])
+                ),
+                Variable::named(
+                    "rule1",
+                    Rule::seq(vec![Rule::terminal(3), Rule::non_terminal(2),])
+                ),
+                Variable::auxiliary(
+                    "rule0_repeat1",
+                    Rule::choice(vec![
+                        Rule::seq(vec![Rule::non_terminal(2), Rule::non_terminal(2),]),
+                        Rule::terminal(4),
+                    ])
+                )
+            ]
+        );
+    }
+
+    #[test]
+    fn test_expansion_of_nested_repeats() {
+        let grammar = expand_repeats(build_grammar(vec![Variable::named(
+            "rule0",
+            Rule::seq(vec![
+                Rule::terminal(10),
+                Rule::repeat(Rule::seq(vec![
+                    Rule::terminal(11),
+                    Rule::repeat(Rule::terminal(12)),
+                ])),
+            ]),
+        )]));
+
+        assert_eq!(
+            grammar.variables,
+            vec![
+                Variable::named(
+                    "rule0",
+                    Rule::seq(vec![Rule::terminal(10), Rule::non_terminal(2),])
+                ),
+                Variable::auxiliary(
+                    "rule0_repeat1",
+                    Rule::choice(vec![
+                        Rule::seq(vec![Rule::non_terminal(1), Rule::non_terminal(1),]),
+                        Rule::terminal(12),
+                    ])
+                ),
+                Variable::auxiliary(
+                    "rule0_repeat2",
+                    Rule::choice(vec![
+                        Rule::seq(vec![Rule::non_terminal(2), Rule::non_terminal(2),]),
+                        Rule::seq(vec![Rule::terminal(11), Rule::non_terminal(1),]),
+                    ])
+                ),
+            ]
+        );
+    }
+
+    fn build_grammar(variables: Vec<Variable>) -> ExtractedSyntaxGrammar {
+        ExtractedSyntaxGrammar {
+            variables,
+            extra_tokens: Vec::new(),
+            external_tokens: Vec::new(),
+            expected_conflicts: Vec::new(),
+            variables_to_inline: Vec::new(),
+            word_token: None,
+        }
+    }
+}
--- a/cli/src/prepare_grammar/expand_tokens.rs
+++ b/cli/src/prepare_grammar/expand_tokens.rs
@ -0,0 +1,611 @@
+use super::ExtractedLexicalGrammar;
+use crate::error::{Error, Result};
+use crate::grammars::{LexicalGrammar, LexicalVariable};
+use crate::nfa::{CharacterSet, Nfa, NfaState};
+use crate::rules::Rule;
+use regex_syntax::ast::{
+    parse, Ast, Class, ClassPerlKind, ClassSet, ClassSetItem, RepetitionKind, RepetitionRange,
+};
+use std::i32;
+
+struct NfaBuilder {
+    nfa: Nfa,
+    is_sep: bool,
+    precedence_stack: Vec<i32>,
+}
+
+fn get_implicit_precedence(rule: &Rule) -> i32 {
+    match rule {
+        Rule::String(_) => 1,
+        Rule::Metadata { rule, params } => {
+            if params.is_main_token {
+                get_implicit_precedence(rule) + 2
+            } else {
+                get_implicit_precedence(rule)
+            }
+        }
+        _ => 0,
+    }
+}
+
+fn get_completion_precedence(rule: &Rule) -> i32 {
+    match rule {
+        Rule::Metadata { params, .. } => params.precedence.unwrap_or(0),
+        _ => 0,
+    }
+}
+
+pub(crate) fn expand_tokens(mut grammar: ExtractedLexicalGrammar) -> Result<LexicalGrammar> {
+    let mut builder = NfaBuilder {
+        nfa: Nfa::new(),
+        is_sep: true,
+        precedence_stack: vec![0],
+    };
+
+    let separator_rule = if grammar.separators.len() > 0 {
+        grammar.separators.push(Rule::Blank);
+        Rule::repeat(Rule::choice(grammar.separators))
+    } else {
+        Rule::Blank
+    };
+
+    let mut variables = Vec::new();
+    for (i, variable) in grammar.variables.into_iter().enumerate() {
+        let is_immediate_token = match &variable.rule {
+            Rule::Metadata { params, .. } => params.is_main_token,
+            _ => false,
+        };
+
+        builder.is_sep = false;
+        builder.nfa.states.push(NfaState::Accept {
+            variable_index: i,
+            precedence: get_completion_precedence(&variable.rule),
+        });
+        let last_state_id = builder.nfa.last_state_id();
+        builder
+            .expand_rule(&variable.rule, last_state_id)
+            .map_err(|Error(msg)| Error(format!("Rule {} {}", variable.name, msg)))?;
+
+        if !is_immediate_token {
+            builder.is_sep = true;
+            let last_state_id = builder.nfa.last_state_id();
+            builder.expand_rule(&separator_rule, last_state_id)?;
+        }
+
+        variables.push(LexicalVariable {
+            name: variable.name,
+            kind: variable.kind,
+            implicit_precedence: get_implicit_precedence(&variable.rule),
+            start_state: builder.nfa.last_state_id(),
+        });
+    }
+
+    Ok(LexicalGrammar {
+        nfa: builder.nfa,
+        variables,
+    })
+}
+
+impl NfaBuilder {
+    fn expand_rule(&mut self, rule: &Rule, mut next_state_id: u32) -> Result<bool> {
+        match rule {
+            Rule::Pattern(s) => {
+                let ast = parse::Parser::new()
+                    .parse(&s)
+                    .map_err(|e| Error(e.to_string()))?;
+                self.expand_regex(&ast, next_state_id)
+            }
+            Rule::String(s) => {
+                for c in s.chars().rev() {
+                    self.push_advance(CharacterSet::empty().add_char(c), next_state_id);
+                    next_state_id = self.nfa.last_state_id();
+                }
+                Ok(s.len() > 0)
+            }
+            Rule::Choice(elements) => {
+                let mut alternative_state_ids = Vec::new();
+                for element in elements {
+                    if self.expand_rule(element, next_state_id)? {
+                        alternative_state_ids.push(self.nfa.last_state_id());
+                    } else {
+                        alternative_state_ids.push(next_state_id);
+                    }
+                }
+                alternative_state_ids.sort_unstable();
+                alternative_state_ids.dedup();
+                alternative_state_ids.retain(|i| *i != self.nfa.last_state_id());
+                for alternative_state_id in alternative_state_ids {
+                    self.push_split(alternative_state_id);
+                }
+                Ok(true)
+            }
+            Rule::Seq(elements) => {
+                let mut result = false;
+                for element in elements.into_iter().rev() {
+                    if self.expand_rule(element, next_state_id)? {
+                        result = true;
+                    }
+                    next_state_id = self.nfa.last_state_id();
+                }
+                Ok(result)
+            }
+            Rule::Repeat(rule) => {
+                self.nfa.states.push(NfaState::Accept {
+                    variable_index: 0,
+                    precedence: 0,
+                }); // Placeholder for split
+                let split_state_id = self.nfa.last_state_id();
+                if self.expand_rule(rule, split_state_id)? {
+                    self.nfa.states[split_state_id as usize] =
+                        NfaState::Split(self.nfa.last_state_id(), next_state_id);
+                    Ok(true)
+                } else {
+                    Ok(false)
+                }
+            }
+            Rule::Metadata { rule, params } => {
+                if let Some(precedence) = params.precedence {
+                    self.precedence_stack.push(precedence);
+                }
+                let result = self.expand_rule(rule, next_state_id);
+                if params.precedence.is_some() {
+                    self.precedence_stack.pop();
+                }
+                result
+            }
+            Rule::Blank => Ok(false),
+            _ => Err(Error::grammar(&format!("Unexpected rule {:?}", rule))),
+        }
+    }
+
+    fn expand_regex(&mut self, ast: &Ast, mut next_state_id: u32) -> Result<bool> {
+        match ast {
+            Ast::Empty(_) => Ok(false),
+            Ast::Flags(_) => Err(Error::regex("Flags are not supported")),
+            Ast::Literal(literal) => {
+                self.push_advance(CharacterSet::Include(vec![literal.c]), next_state_id);
+                Ok(true)
+            }
+            Ast::Dot(_) => {
+                self.push_advance(CharacterSet::Exclude(vec!['\n']), next_state_id);
+                Ok(true)
+            }
+            Ast::Assertion(_) => Err(Error::regex("Assertions are not supported")),
+            Ast::Class(class) => match class {
+                Class::Unicode(_) => {
+                    Err(Error::regex("Unicode character classes are not supported"))
+                }
+                Class::Perl(class) => {
+                    let mut chars = self.expand_perl_character_class(&class.kind);
+                    if class.negated {
+                        chars = chars.negate();
+                    }
+                    self.push_advance(chars, next_state_id);
+                    Ok(true)
+                }
+                Class::Bracketed(class) => match &class.kind {
+                    ClassSet::Item(item) => {
+                        let mut chars = self.expand_character_class(&item)?;
+                        if class.negated {
+                            chars = chars.negate();
+                        }
+                        self.push_advance(chars, next_state_id);
+                        Ok(true)
+                    }
+                    ClassSet::BinaryOp(_) => Err(Error::regex(
+                        "Binary operators in character classes aren't supported",
+                    )),
+                },
+            },
+            Ast::Repetition(repetition) => match repetition.op.kind {
+                RepetitionKind::ZeroOrOne => {
+                    self.expand_zero_or_one(&repetition.ast, next_state_id)
+                }
+                RepetitionKind::OneOrMore => {
+                    self.expand_one_or_more(&repetition.ast, next_state_id)
+                }
+                RepetitionKind::ZeroOrMore => {
+                    self.expand_zero_or_more(&repetition.ast, next_state_id)
+                }
+                RepetitionKind::Range(RepetitionRange::Exactly(count)) => {
+                    self.expand_count(&repetition.ast, count, next_state_id)
+                }
+                RepetitionKind::Range(RepetitionRange::AtLeast(min)) => {
+                    if self.expand_zero_or_more(&repetition.ast, next_state_id)? {
+                        self.expand_count(&repetition.ast, min, next_state_id)
+                    } else {
+                        Ok(false)
+                    }
+                }
+                RepetitionKind::Range(RepetitionRange::Bounded(min, max)) => {
+                    let mut result = self.expand_count(&repetition.ast, min, next_state_id)?;
+                    for _ in min..max {
+                        if result {
+                            next_state_id = self.nfa.last_state_id();
+                        }
+                        if self.expand_zero_or_one(&repetition.ast, next_state_id)? {
+                            result = true;
+                        }
+                    }
+                    Ok(result)
+                }
+            },
+            Ast::Group(group) => self.expand_regex(&group.ast, self.nfa.last_state_id()),
+            Ast::Alternation(alternation) => {
+                let mut alternative_state_ids = Vec::new();
+                for ast in alternation.asts.iter() {
+                    if self.expand_regex(&ast, next_state_id)? {
+                        alternative_state_ids.push(self.nfa.last_state_id());
+                    } else {
+                        alternative_state_ids.push(next_state_id);
+                    }
+                }
+                alternative_state_ids.sort_unstable();
+                alternative_state_ids.dedup();
+                alternative_state_ids.retain(|i| *i != self.nfa.last_state_id());
+
+                for alternative_state_id in alternative_state_ids {
+                    self.push_split(alternative_state_id);
+                }
+                Ok(true)
+            }
+            Ast::Concat(concat) => {
+                let mut result = false;
+                for ast in concat.asts.iter().rev() {
+                    if self.expand_regex(&ast, next_state_id)? {
+                        result = true;
+                        next_state_id = self.nfa.last_state_id();
+                    }
+                }
+                Ok(result)
+            }
+        }
+    }
+
+    fn expand_one_or_more(&mut self, ast: &Ast, next_state_id: u32) -> Result<bool> {
+        self.nfa.states.push(NfaState::Accept {
+            variable_index: 0,
+            precedence: 0,
+        }); // Placeholder for split
+        let split_state_id = self.nfa.last_state_id();
+        if self.expand_regex(&ast, split_state_id)? {
+            self.nfa.states[split_state_id as usize] =
+                NfaState::Split(self.nfa.last_state_id(), next_state_id);
+            Ok(true)
+        } else {
+            self.nfa.states.pop();
+            Ok(false)
+        }
+    }
+
+    fn expand_zero_or_one(&mut self, ast: &Ast, next_state_id: u32) -> Result<bool> {
+        if self.expand_regex(ast, next_state_id)? {
+            self.push_split(next_state_id);
+            Ok(true)
+        } else {
+            Ok(false)
+        }
+    }
+
+    fn expand_zero_or_more(&mut self, ast: &Ast, next_state_id: u32) -> Result<bool> {
+        if self.expand_one_or_more(&ast, next_state_id)? {
+            self.push_split(next_state_id);
+            Ok(true)
+        } else {
+            Ok(false)
+        }
+    }
+
+    fn expand_count(&mut self, ast: &Ast, count: u32, mut next_state_id: u32) -> Result<bool> {
+        let mut result = false;
+        for _ in 0..count {
+            if self.expand_regex(ast, next_state_id)? {
+                result = true;
+                next_state_id = self.nfa.last_state_id();
+            }
+        }
+        Ok(result)
+    }
+
+    fn expand_character_class(&self, item: &ClassSetItem) -> Result<CharacterSet> {
+        match item {
+            ClassSetItem::Empty(_) => Ok(CharacterSet::Include(Vec::new())),
+            ClassSetItem::Literal(literal) => Ok(CharacterSet::Include(vec![literal.c])),
+            ClassSetItem::Range(range) => {
+                Ok(CharacterSet::empty().add_range(range.start.c, range.end.c))
+            }
+            ClassSetItem::Union(union) => {
+                let mut result = CharacterSet::empty();
+                for item in &union.items {
+                    result = result.add(&self.expand_character_class(&item)?);
+                }
+                Ok(result)
+            }
+            ClassSetItem::Perl(class) => Ok(self.expand_perl_character_class(&class.kind)),
+            _ => Err(Error::regex(&format!(
+                "Unsupported character class syntax {:?}",
+                item
+            ))),
+        }
+    }
+
+    fn expand_perl_character_class(&self, item: &ClassPerlKind) -> CharacterSet {
+        match item {
+            ClassPerlKind::Digit => CharacterSet::empty().add_range('0', '9'),
+            ClassPerlKind::Space => CharacterSet::empty()
+                .add_char(' ')
+                .add_char('\t')
+                .add_char('\r')
+                .add_char('\n'),
+            ClassPerlKind::Word => CharacterSet::empty()
+                .add_char('_')
+                .add_range('A', 'Z')
+                .add_range('a', 'z')
+                .add_range('0', '9'),
+        }
+    }
+
+    fn push_advance(&mut self, chars: CharacterSet, state_id: u32) {
+        let precedence = *self.precedence_stack.last().unwrap();
+        self.nfa.states.push(NfaState::Advance {
+            chars,
+            state_id,
+            precedence,
+            is_sep: self.is_sep,
+        });
+    }
+
+    fn push_split(&mut self, state_id: u32) {
+        let last_state_id = self.nfa.last_state_id();
+        self.nfa
+            .states
+            .push(NfaState::Split(state_id, last_state_id));
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::grammars::Variable;
+    use crate::nfa::{NfaCursor, NfaTransition};
+
+    fn simulate_nfa<'a>(grammar: &'a LexicalGrammar, s: &'a str) -> Option<(usize, &'a str)> {
+        let start_states = grammar.variables.iter().map(|v| v.start_state).collect();
+        let mut cursor = NfaCursor::new(&grammar.nfa, start_states);
+
+        let mut result = None;
+        let mut result_precedence = i32::MIN;
+        let mut start_char = 0;
+        let mut end_char = 0;
+        for c in s.chars() {
+            for (id, precedence) in cursor.completions() {
+                if result.is_none() || result_precedence <= precedence {
+                    result = Some((id, &s[start_char..end_char]));
+                    result_precedence = precedence;
+                }
+            }
+            if let Some(NfaTransition {
+                states,
+                is_separator,
+                ..
+            }) = cursor
+                .transitions()
+                .into_iter()
+                .find(|t| t.characters.contains(c) && t.precedence >= result_precedence)
+            {
+                cursor.reset(states);
+                end_char += 1;
+                if is_separator {
+                    start_char = end_char;
+                }
+            } else {
+                break;
+            }
+        }
+
+        for (id, precedence) in cursor.completions() {
+            if result.is_none() || result_precedence <= precedence {
+                result = Some((id, &s[start_char..end_char]));
+                result_precedence = precedence;
+            }
+        }
+
+        result
+    }
+
+    #[test]
+    fn test_rule_expansion() {
+        struct Row {
+            rules: Vec<Rule>,
+            separators: Vec<Rule>,
+            examples: Vec<(&'static str, Option<(usize, &'static str)>)>,
+        }
+
+        let table = [
+            // regex with sequences and alternatives
+            Row {
+                rules: vec![Rule::pattern("(a|b|c)d(e|f|g)h?")],
+                separators: vec![],
+                examples: vec![
+                    ("ade1", Some((0, "ade"))),
+                    ("bdf1", Some((0, "bdf"))),
+                    ("bdfh1", Some((0, "bdfh"))),
+                    ("ad1", None),
+                ],
+            },
+            // regex with repeats
+            Row {
+                rules: vec![Rule::pattern("a*")],
+                separators: vec![],
+                examples: vec![("aaa1", Some((0, "aaa"))), ("b", Some((0, "")))],
+            },
+            // regex with repeats in sequences
+            Row {
+                rules: vec![Rule::pattern("a((bc)+|(de)*)f")],
+                separators: vec![],
+                examples: vec![
+                    ("af1", Some((0, "af"))),
+                    ("adedef1", Some((0, "adedef"))),
+                    ("abcbcbcf1", Some((0, "abcbcbcf"))),
+                    ("a", None),
+                ],
+            },
+            // regex with character ranges
+            Row {
+                rules: vec![Rule::pattern("[a-fA-F0-9]+")],
+                separators: vec![],
+                examples: vec![("A1ff0.", Some((0, "A1ff0")))],
+            },
+            // regex with perl character classes
+            Row {
+                rules: vec![Rule::pattern("\\w\\d\\s")],
+                separators: vec![],
+                examples: vec![("_0  ", Some((0, "_0 ")))],
+            },
+            // string
+            Row {
+                rules: vec![Rule::string("abc")],
+                separators: vec![],
+                examples: vec![("abcd", Some((0, "abc"))), ("ab", None)],
+            },
+            // complex rule containing strings and regexes
+            Row {
+                rules: vec![Rule::repeat(Rule::seq(vec![
+                    Rule::string("{"),
+                    Rule::pattern("[a-f]+"),
+                    Rule::string("}"),
+                ]))],
+                separators: vec![],
+                examples: vec![
+                    ("{a}{", Some((0, "{a}"))),
+                    ("{a}{d", Some((0, "{a}"))),
+                    ("ab", None),
+                ],
+            },
+            // longest match rule
+            Row {
+                rules: vec![
+                    Rule::pattern("a|bc"),
+                    Rule::pattern("aa"),
+                    Rule::pattern("bcd"),
+                ],
+                separators: vec![],
+                examples: vec![
+                    ("a.", Some((0, "a"))),
+                    ("bc.", Some((0, "bc"))),
+                    ("aa.", Some((1, "aa"))),
+                    ("bcd?", Some((2, "bcd"))),
+                    ("b.", None),
+                    ("c.", None),
+                ],
+            },
+            // regex with an alternative including the empty string
+            Row {
+                rules: vec![Rule::pattern("a(b|)+c")],
+                separators: vec![],
+                examples: vec![
+                    ("ac.", Some((0, "ac"))),
+                    ("abc.", Some((0, "abc"))),
+                    ("abbc.", Some((0, "abbc"))),
+                ],
+            },
+            // separators
+            Row {
+                rules: vec![Rule::pattern("[a-f]+")],
+                separators: vec![Rule::string("\\\n"), Rule::pattern("\\s")],
+                examples: vec![
+                    ("  a", Some((0, "a"))),
+                    ("  \nb", Some((0, "b"))),
+                    ("  \\a", None),
+                    ("  \\\na", Some((0, "a"))),
+                ],
+            },
+            // shorter tokens with higher precedence
+            Row {
+                rules: vec![
+                    Rule::prec(2, Rule::pattern("abc")),
+                    Rule::prec(1, Rule::pattern("ab[cd]e")),
+                    Rule::pattern("[a-e]+"),
+                ],
+                separators: vec![Rule::string("\\\n"), Rule::pattern("\\s")],
+                examples: vec![
+                    ("abceef", Some((0, "abc"))),
+                    ("abdeef", Some((1, "abde"))),
+                    ("aeeeef", Some((2, "aeeee"))),
+                ],
+            },
+            // immediate tokens with higher precedence
+            Row {
+                rules: vec![
+                    Rule::prec(1, Rule::pattern("[^a]+")),
+                    Rule::immediate_token(Rule::prec(2, Rule::pattern("[^ab]+"))),
+                ],
+                separators: vec![Rule::pattern("\\s")],
+                examples: vec![("cccb", Some((1, "ccc")))],
+            },
+            Row {
+                rules: vec![Rule::seq(vec![
+                    Rule::string("a"),
+                    Rule::choice(vec![Rule::string("b"), Rule::string("c")]),
+                    Rule::string("d"),
+                ])],
+                separators: vec![],
+                examples: vec![
+                    ("abd", Some((0, "abd"))),
+                    ("acd", Some((0, "acd"))),
+                    ("abc", None),
+                    ("ad", None),
+                    ("d", None),
+                    ("a", None),
+                ],
+            },
+            // nested choices within sequences
+            Row {
+                rules: vec![Rule::seq(vec![
+                    Rule::pattern("[0-9]+"),
+                    Rule::choice(vec![
+                        Rule::Blank,
+                        Rule::choice(vec![Rule::seq(vec![
+                            Rule::choice(vec![Rule::string("e"), Rule::string("E")]),
+                            Rule::choice(vec![
+                                Rule::Blank,
+                                Rule::choice(vec![Rule::string("+"), Rule::string("-")]),
+                            ]),
+                            Rule::pattern("[0-9]+"),
+                        ])]),
+                    ]),
+                ])],
+                separators: vec![],
+                examples: vec![
+                    ("12", Some((0, "12"))),
+                    ("12e", Some((0, "12"))),
+                    ("12g", Some((0, "12"))),
+                    ("12e3", Some((0, "12e3"))),
+                    ("12e+", Some((0, "12"))),
+                    ("12E+34 +", Some((0, "12E+34"))),
+                    ("12e34", Some((0, "12e34"))),
+                ],
+            },
+        ];
+
+        for Row {
+            rules,
+            separators,
+            examples,
+        } in &table
+        {
+            let grammar = expand_tokens(ExtractedLexicalGrammar {
+                separators: separators.clone(),
+                variables: rules
+                    .into_iter()
+                    .map(|rule| Variable::named("", rule.clone()))
+                    .collect(),
+            })
+            .unwrap();
+
+            for (haystack, needle) in examples.iter() {
+                assert_eq!(simulate_nfa(&grammar, haystack), *needle);
+            }
+        }
+    }
+}
--- a/cli/src/prepare_grammar/extract_simple_aliases.rs
+++ b/cli/src/prepare_grammar/extract_simple_aliases.rs
@ -0,0 +1,199 @@
+use crate::rules::{Alias, AliasMap, Symbol, SymbolType};
+use crate::grammars::{LexicalGrammar, SyntaxGrammar};
+
+#[derive(Clone, Default)]
+struct SymbolStatus {
+    alias: Option<Alias>,
+    conflicting: bool,
+}
+
+pub(super) fn extract_simple_aliases(
+    syntax_grammar: &mut SyntaxGrammar,
+    lexical_grammar: &LexicalGrammar
+) -> AliasMap {
+    // Determine which symbols in the grammars are *always* aliased to a single name.
+    let mut terminal_status_list = vec![SymbolStatus::default(); lexical_grammar.variables.len()];
+    let mut non_terminal_status_list = vec![SymbolStatus::default(); syntax_grammar.variables.len()];
+    let mut external_status_list = vec![SymbolStatus::default(); syntax_grammar.external_tokens.len()];
+    for variable in syntax_grammar.variables.iter() {
+        for production in variable.productions.iter() {
+            for step in production.steps.iter() {
+                let mut status = match step.symbol {
+                    Symbol { kind: SymbolType::External, index} => &mut external_status_list[index],
+                    Symbol { kind: SymbolType::NonTerminal, index} => &mut non_terminal_status_list[index],
+                    Symbol { kind: SymbolType::Terminal, index} => &mut terminal_status_list[index],
+                    Symbol { kind: SymbolType::End, .. } => panic!("Unexpected end token"),
+                };
+
+                if step.alias.is_none() {
+                    status.alias = None;
+                    status.conflicting = true;
+                }
+
+                if !status.conflicting {
+                    if status.alias.is_none() {
+                        status.alias = step.alias.clone();
+                    } else if status.alias != step.alias {
+                        status.alias = None;
+                        status.conflicting = true;
+                    }
+                }
+            }
+        }
+    }
+
+    // Remove the aliases for those symbols.
+    for variable in syntax_grammar.variables.iter_mut() {
+        for production in variable.productions.iter_mut() {
+            for step in production.steps.iter_mut() {
+                let status = match step.symbol {
+                    Symbol { kind: SymbolType::External, index} => &external_status_list[index],
+                    Symbol { kind: SymbolType::NonTerminal, index} => &non_terminal_status_list[index],
+                    Symbol { kind: SymbolType::Terminal, index} => &terminal_status_list[index],
+                    Symbol { kind: SymbolType::End, .. } => panic!("Unexpected end token"),
+                };
+
+                if status.alias.is_some() {
+                    step.alias = None;
+                }
+            }
+        }
+    }
+
+    // Populate a map of the symbols to their aliases.
+    let mut result = AliasMap::new();
+    for (i, status) in terminal_status_list.into_iter().enumerate() {
+        if let Some(alias) = status.alias {
+            result.insert(Symbol::terminal(i), alias);
+        }
+    }
+    for (i, status) in non_terminal_status_list.into_iter().enumerate() {
+        if let Some(alias) = status.alias {
+            result.insert(Symbol::non_terminal(i), alias);
+        }
+    }
+    for (i, status) in external_status_list.into_iter().enumerate() {
+        if let Some(alias) = status.alias {
+            result.insert(Symbol::external(i), alias);
+        }
+    }
+    result
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::grammars::{LexicalVariable, SyntaxVariable, VariableType, Production, ProductionStep};
+    use crate::nfa::Nfa;
+
+    #[test]
+    fn test_extract_simple_aliases() {
+        let mut syntax_grammar = SyntaxGrammar {
+            variables: vec![
+                SyntaxVariable {
+                    name: "v1".to_owned(),
+                    kind: VariableType::Named,
+                    productions: vec![
+                        Production {
+                            dynamic_precedence: 0,
+                            steps: vec![
+                                ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true),
+                                ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true),
+                                ProductionStep::new(Symbol::terminal(2)).with_alias("a3", true),
+                            ],
+                        },
+                    ],
+                },
+                SyntaxVariable {
+                    name: "v2".to_owned(),
+                    kind: VariableType::Named,
+                    productions: vec![
+                        Production {
+                            dynamic_precedence: 0,
+                            steps: vec![
+                                // Token 0 is always aliased as "a1".
+                                ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true),
+
+                                // Token 1 is aliased above, but not here.
+                                ProductionStep::new(Symbol::terminal(1)),
+
+                                // Token 2 is aliased differently than above.
+                                ProductionStep::new(Symbol::terminal(2)).with_alias("a4", true),
+                            ],
+                        },
+                    ],
+                },
+            ],
+            extra_tokens: Vec::new(),
+            expected_conflicts: Vec::new(),
+            variables_to_inline: Vec::new(),
+            external_tokens: Vec::new(),
+            word_token: None,
+        };
+
+        let lexical_grammar = LexicalGrammar {
+            nfa: Nfa::new(),
+            variables: vec![
+                LexicalVariable {
+                    name: "t1".to_string(),
+                    kind: VariableType::Anonymous,
+                    implicit_precedence: 0,
+                    start_state: 0,
+                },
+                LexicalVariable {
+                    name: "t2".to_string(),
+                    kind: VariableType::Anonymous,
+                    implicit_precedence: 0,
+                    start_state: 0,
+                },
+                LexicalVariable {
+                    name: "t3".to_string(),
+                    kind: VariableType::Anonymous,
+                    implicit_precedence: 0,
+                    start_state: 0,
+                }
+            ],
+        };
+
+        let simple_aliases = extract_simple_aliases(&mut syntax_grammar, &lexical_grammar);
+        assert_eq!(simple_aliases.len(), 1);
+        assert_eq!(simple_aliases[&Symbol::terminal(0)], Alias {
+            value: "a1".to_string(),
+            is_named: true,
+        });
+
+        assert_eq!(syntax_grammar.variables, vec![
+            SyntaxVariable {
+                name: "v1".to_owned(),
+                kind: VariableType::Named,
+                productions: vec![
+                    Production {
+                        dynamic_precedence: 0,
+                        steps: vec![
+                            // 'Simple' alias removed
+                            ProductionStep::new(Symbol::terminal(0)),
+
+                            // Other aliases unchanged
+                            ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true),
+                            ProductionStep::new(Symbol::terminal(2)).with_alias("a3", true),
+                        ],
+                    },
+                ],
+            },
+            SyntaxVariable {
+                name: "v2".to_owned(),
+                kind: VariableType::Named,
+                productions: vec![
+                    Production {
+                        dynamic_precedence: 0,
+                        steps: vec![
+                            ProductionStep::new(Symbol::terminal(0)),
+                            ProductionStep::new(Symbol::terminal(1)),
+                            ProductionStep::new(Symbol::terminal(2)).with_alias("a4", true),
+                        ],
+                    },
+                ],
+            },
+        ]);
+    }
+}
--- a/cli/src/prepare_grammar/extract_tokens.rs
+++ b/cli/src/prepare_grammar/extract_tokens.rs
@ -0,0 +1,525 @@
+use super::{ExtractedLexicalGrammar, ExtractedSyntaxGrammar, InternedGrammar};
+use crate::error::{Error, Result};
+use crate::grammars::{ExternalToken, Variable, VariableType};
+use crate::rules::{MetadataParams, Rule, Symbol, SymbolType};
+use hashbrown::HashMap;
+use std::mem;
+
+pub(super) fn extract_tokens(
+    mut grammar: InternedGrammar,
+) -> Result<(ExtractedSyntaxGrammar, ExtractedLexicalGrammar)> {
+    let mut extractor = TokenExtractor {
+        current_variable_name: String::new(),
+        current_variable_token_count: 0,
+        extracted_variables: Vec::new(),
+        extracted_usage_counts: Vec::new(),
+    };
+
+    for mut variable in grammar.variables.iter_mut() {
+        extractor.extract_tokens_in_variable(&mut variable);
+    }
+
+    for mut variable in grammar.external_tokens.iter_mut() {
+        extractor.extract_tokens_in_variable(&mut variable);
+    }
+
+    let mut lexical_variables = Vec::with_capacity(extractor.extracted_variables.len());
+    for variable in extractor.extracted_variables {
+        lexical_variables.push(Variable {
+            name: variable.name,
+            kind: variable.kind,
+            rule: variable.rule,
+        });
+    }
+
+    // If a variable's entire rule was extracted as a token and that token didn't
+    // appear within any other rule, then remove that variable from the syntax
+    // grammar, giving its name to the token in the lexical grammar. Any symbols
+    // that pointed to that variable will need to be updated to point to the
+    // variable in the lexical grammar. Symbols that pointed to later variables
+    // will need to have their indices decremented.
+    let mut variables = Vec::new();
+    let mut symbol_replacer = SymbolReplacer {
+        replacements: HashMap::new(),
+    };
+    for (i, variable) in grammar.variables.into_iter().enumerate() {
+        if let Rule::Symbol(Symbol {
+            kind: SymbolType::Terminal,
+            index,
+        }) = variable.rule
+        {
+            if i > 0 && extractor.extracted_usage_counts[index] == 1 {
+                let mut lexical_variable = &mut lexical_variables[index];
+                lexical_variable.kind = variable.kind;
+                lexical_variable.name = variable.name;
+                symbol_replacer.replacements.insert(i, index);
+                continue;
+            }
+        }
+        variables.push(variable);
+    }
+
+    for variable in variables.iter_mut() {
+        variable.rule = symbol_replacer.replace_symbols_in_rule(&variable.rule);
+    }
+
+    let expected_conflicts = grammar
+        .expected_conflicts
+        .into_iter()
+        .map(|conflict| {
+            let mut result: Vec<_> = conflict
+                .iter()
+                .map(|symbol| symbol_replacer.replace_symbol(*symbol))
+                .collect();
+            result.sort_unstable();
+            result.dedup();
+            result
+        })
+        .collect();
+
+    let variables_to_inline = grammar
+        .variables_to_inline
+        .into_iter()
+        .map(|symbol| symbol_replacer.replace_symbol(symbol))
+        .collect();
+
+    let mut separators = Vec::new();
+    let mut extra_tokens = Vec::new();
+    for rule in grammar.extra_tokens {
+        if let Rule::Symbol(symbol) = rule {
+            let new_symbol = symbol_replacer.replace_symbol(symbol);
+            if new_symbol.is_non_terminal() {
+                return Err(Error(format!(
+                    "Non-token symbol '{}' cannot be used as an extra token",
+                    &variables[new_symbol.index].name
+                )));
+            } else {
+                extra_tokens.push(new_symbol);
+            }
+        } else {
+            if let Some(index) = lexical_variables.iter().position(|v| v.rule == rule) {
+                extra_tokens.push(Symbol::terminal(index));
+            } else {
+                separators.push(rule);
+            }
+        }
+    }
+
+    let mut external_tokens = Vec::new();
+    for external_token in grammar.external_tokens {
+        let rule = symbol_replacer.replace_symbols_in_rule(&external_token.rule);
+        if let Rule::Symbol(symbol) = rule {
+            if symbol.is_non_terminal() {
+                return Err(Error(format!(
+                    "Rule '{}' cannot be used as both an external token and a non-terminal rule",
+                    &variables[symbol.index].name,
+                )));
+            }
+
+            if symbol.is_external() {
+                external_tokens.push(ExternalToken {
+                    name: external_token.name,
+                    kind: external_token.kind,
+                    corresponding_internal_token: None,
+                })
+            } else {
+                external_tokens.push(ExternalToken {
+                    name: lexical_variables[symbol.index].name.clone(),
+                    kind: external_token.kind,
+                    corresponding_internal_token: Some(symbol),
+                })
+            }
+        } else {
+            return Err(Error(format!(
+                "Non-symbol rules cannot be used as external tokens"
+            )));
+        }
+    }
+
+    let mut word_token = None;
+    if let Some(token) = grammar.word_token {
+        let token = symbol_replacer.replace_symbol(token);
+        if token.is_non_terminal() {
+            return Err(Error(format!(
+                "Non-terminal symbol '{}' cannot be used as the word token",
+                &variables[token.index].name
+            )));
+        }
+        word_token = Some(token);
+    }
+
+    Ok((
+        ExtractedSyntaxGrammar {
+            variables,
+            expected_conflicts,
+            extra_tokens,
+            variables_to_inline,
+            external_tokens,
+            word_token,
+        },
+        ExtractedLexicalGrammar {
+            variables: lexical_variables,
+            separators,
+        },
+    ))
+}
+
+struct TokenExtractor {
+    current_variable_name: String,
+    current_variable_token_count: usize,
+    extracted_variables: Vec<Variable>,
+    extracted_usage_counts: Vec<usize>,
+}
+
+struct SymbolReplacer {
+    replacements: HashMap<usize, usize>,
+}
+
+impl TokenExtractor {
+    fn extract_tokens_in_variable(&mut self, variable: &mut Variable) {
+        self.current_variable_name.clear();
+        self.current_variable_name.push_str(&variable.name);
+        self.current_variable_token_count = 0;
+        let mut rule = Rule::Blank;
+        mem::swap(&mut rule, &mut variable.rule);
+        variable.rule = self.extract_tokens_in_rule(&rule);
+    }
+
+    fn extract_tokens_in_rule(&mut self, input: &Rule) -> Rule {
+        match input {
+            Rule::String(name) => self.extract_token(input, Some(name)).into(),
+            Rule::Pattern(..) => self.extract_token(input, None).into(),
+            Rule::Metadata { params, rule } => {
+                if params.is_token {
+                    let mut params = params.clone();
+                    params.is_token = false;
+
+                    let mut string_value = None;
+                    if let Rule::String(value) = rule.as_ref() {
+                        string_value = Some(value);
+                    }
+
+                    let rule_to_extract = if params == MetadataParams::default() {
+                        rule.as_ref()
+                    } else {
+                        input
+                    };
+
+                    self.extract_token(rule_to_extract, string_value).into()
+                } else {
+                    Rule::Metadata {
+                        params: params.clone(),
+                        rule: Box::new(self.extract_tokens_in_rule((&rule).clone())),
+                    }
+                }
+            }
+            Rule::Repeat(content) => Rule::Repeat(Box::new(self.extract_tokens_in_rule(content))),
+            Rule::Seq(elements) => Rule::Seq(
+                elements
+                    .iter()
+                    .map(|e| self.extract_tokens_in_rule(e))
+                    .collect(),
+            ),
+            Rule::Choice(elements) => Rule::Choice(
+                elements
+                    .iter()
+                    .map(|e| self.extract_tokens_in_rule(e))
+                    .collect(),
+            ),
+            _ => input.clone(),
+        }
+    }
+
+    fn extract_token(&mut self, rule: &Rule, string_value: Option<&String>) -> Symbol {
+        for (i, variable) in self.extracted_variables.iter_mut().enumerate() {
+            if variable.rule == *rule {
+                self.extracted_usage_counts[i] += 1;
+                return Symbol::terminal(i);
+            }
+        }
+
+        let index = self.extracted_variables.len();
+        let variable = if let Some(string_value) = string_value {
+            Variable {
+                name: string_value.clone(),
+                kind: VariableType::Anonymous,
+                rule: rule.clone()
+            }
+        } else {
+            self.current_variable_token_count += 1;
+            Variable {
+                name: format!(
+                    "{}_token{}",
+                    &self.current_variable_name, self.current_variable_token_count
+                ),
+                kind: VariableType::Auxiliary,
+                rule: rule.clone(),
+            }
+        };
+
+        self.extracted_variables.push(variable);
+        self.extracted_usage_counts.push(1);
+        Symbol::terminal(index)
+    }
+}
+
+impl SymbolReplacer {
+    fn replace_symbols_in_rule(&mut self, rule: &Rule) -> Rule {
+        match rule {
+            Rule::Symbol(symbol) => self.replace_symbol(*symbol).into(),
+            Rule::Choice(elements) => Rule::Choice(
+                elements
+                    .iter()
+                    .map(|e| self.replace_symbols_in_rule(e))
+                    .collect(),
+            ),
+            Rule::Seq(elements) => Rule::Seq(
+                elements
+                    .iter()
+                    .map(|e| self.replace_symbols_in_rule(e))
+                    .collect(),
+            ),
+            Rule::Repeat(content) => Rule::Repeat(Box::new(self.replace_symbols_in_rule(content))),
+            Rule::Metadata { rule, params } => Rule::Metadata {
+                params: params.clone(),
+                rule: Box::new(self.replace_symbols_in_rule(rule)),
+            },
+            _ => rule.clone(),
+        }
+    }
+
+    fn replace_symbol(&self, symbol: Symbol) -> Symbol {
+        if !symbol.is_non_terminal() {
+            return symbol;
+        }
+
+        if let Some(replacement) = self.replacements.get(&symbol.index) {
+            return Symbol::terminal(*replacement);
+        }
+
+        let mut adjusted_index = symbol.index;
+        for (replaced_index, _) in self.replacements.iter() {
+            if *replaced_index < symbol.index {
+                adjusted_index -= 1;
+            }
+        }
+
+        return Symbol::non_terminal(adjusted_index);
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    use crate::grammars::VariableType;
+
+    #[test]
+    fn test_extraction() {
+        let (syntax_grammar, lexical_grammar) = extract_tokens(build_grammar(vec![
+            Variable::named(
+                "rule_0",
+                Rule::repeat(Rule::seq(vec![
+                    Rule::string("a"),
+                    Rule::pattern("b"),
+                    Rule::choice(vec![
+                        Rule::non_terminal(1),
+                        Rule::non_terminal(2),
+                        Rule::token(Rule::repeat(Rule::choice(vec![
+                            Rule::string("c"),
+                            Rule::string("d"),
+                        ]))),
+                    ]),
+                ])),
+            ),
+            Variable::named("rule_1", Rule::pattern("e")),
+            Variable::named("rule_2", Rule::pattern("b")),
+            Variable::named(
+                "rule_3",
+                Rule::seq(vec![Rule::non_terminal(2), Rule::Blank]),
+            ),
+        ]))
+        .unwrap();
+
+        assert_eq!(
+            syntax_grammar.variables,
+            vec![
+                Variable::named(
+                    "rule_0",
+                    Rule::repeat(Rule::seq(vec![
+                        // The string "a" was replaced by a symbol referencing the lexical grammar
+                        Rule::terminal(0),
+                        // The pattern "b" was replaced by a symbol referencing the lexical grammar
+                        Rule::terminal(1),
+                        Rule::choice(vec![
+                            // The symbol referencing `rule_1` was replaced by a symbol referencing
+                            // the lexical grammar.
+                            Rule::terminal(3),
+                            // The symbol referencing `rule_2` had its index decremented because
+                            // `rule_1` was moved to the lexical grammar.
+                            Rule::non_terminal(1),
+                            // The rule wrapped in `token` was replaced by a symbol referencing
+                            // the lexical grammar.
+                            Rule::terminal(2),
+                        ])
+                    ]))
+                ),
+                // The pattern "e" was only used in once place: as the definition of `rule_1`,
+                // so that rule was moved to the lexical grammar. The pattern "b" appeared in
+                // two places, so it was not moved into the lexical grammar.
+                Variable::named("rule_2", Rule::terminal(1)),
+                Variable::named(
+                    "rule_3",
+                    Rule::seq(vec![Rule::non_terminal(1), Rule::Blank,])
+                ),
+            ]
+        );
+
+        assert_eq!(
+            lexical_grammar.variables,
+            vec![
+                Variable::anonymous("a", Rule::string("a")),
+                Variable::auxiliary("rule_0_token1", Rule::pattern("b")),
+                Variable::auxiliary(
+                    "rule_0_token2",
+                    Rule::repeat(Rule::choice(vec![Rule::string("c"), Rule::string("d"),]))
+                ),
+                Variable::named("rule_1", Rule::pattern("e")),
+            ]
+        );
+    }
+
+    #[test]
+    fn test_start_rule_is_token() {
+        let (syntax_grammar, lexical_grammar) =
+            extract_tokens(build_grammar(vec![Variable::named(
+                "rule_0",
+                Rule::string("hello"),
+            )]))
+            .unwrap();
+
+        assert_eq!(
+            syntax_grammar.variables,
+            vec![Variable::named("rule_0", Rule::terminal(0)),]
+        );
+        assert_eq!(
+            lexical_grammar.variables,
+            vec![Variable::anonymous("hello", Rule::string("hello")),]
+        )
+    }
+
+    #[test]
+    fn test_extracting_extra_tokens() {
+        let mut grammar = build_grammar(vec![
+            Variable::named("rule_0", Rule::string("x")),
+            Variable::named("comment", Rule::pattern("//.*")),
+        ]);
+        grammar.extra_tokens = vec![Rule::string(" "), Rule::non_terminal(1)];
+
+        let (syntax_grammar, lexical_grammar) = extract_tokens(grammar).unwrap();
+        assert_eq!(syntax_grammar.extra_tokens, vec![Symbol::terminal(1),]);
+        assert_eq!(lexical_grammar.separators, vec![Rule::string(" "),]);
+    }
+
+    #[test]
+    fn test_extract_externals() {
+        let mut grammar = build_grammar(vec![
+            Variable::named(
+                "rule_0",
+                Rule::seq(vec![
+                    Rule::external(0),
+                    Rule::string("a"),
+                    Rule::non_terminal(1),
+                    Rule::non_terminal(2),
+                ]),
+            ),
+            Variable::named("rule_1", Rule::string("b")),
+            Variable::named("rule_2", Rule::string("c")),
+        ]);
+        grammar.external_tokens = vec![
+            Variable::named("external_0", Rule::external(0)),
+            Variable::anonymous("a", Rule::string("a")),
+            Variable::named("rule_2", Rule::non_terminal(2)),
+        ];
+
+        let (syntax_grammar, _) = extract_tokens(grammar).unwrap();
+
+        assert_eq!(
+            syntax_grammar.external_tokens,
+            vec![
+                ExternalToken {
+                    name: "external_0".to_string(),
+                    kind: VariableType::Named,
+                    corresponding_internal_token: None,
+                },
+                ExternalToken {
+                    name: "a".to_string(),
+                    kind: VariableType::Anonymous,
+                    corresponding_internal_token: Some(Symbol::terminal(0)),
+                },
+                ExternalToken {
+                    name: "rule_2".to_string(),
+                    kind: VariableType::Named,
+                    corresponding_internal_token: Some(Symbol::terminal(2)),
+                },
+            ]
+        );
+    }
+
+    #[test]
+    fn test_error_on_non_terminal_symbol_extras() {
+        let mut grammar = build_grammar(vec![
+            Variable::named("rule_0", Rule::non_terminal(1)),
+            Variable::named("rule_1", Rule::non_terminal(2)),
+            Variable::named("rule_2", Rule::string("x")),
+        ]);
+        grammar.extra_tokens = vec![Rule::non_terminal(1)];
+
+        match extract_tokens(grammar) {
+            Err(Error(s)) => {
+                assert_eq!(
+                    s,
+                    "Non-token symbol 'rule_1' cannot be used as an extra token"
+                );
+            }
+            _ => {
+                panic!("Expected an error but got no error");
+            }
+        }
+    }
+
+    #[test]
+    fn test_error_on_external_with_same_name_as_non_terminal() {
+        let mut grammar = build_grammar(vec![
+            Variable::named(
+                "rule_0",
+                Rule::seq(vec![Rule::non_terminal(1), Rule::non_terminal(2)]),
+            ),
+            Variable::named(
+                "rule_1",
+                Rule::seq(vec![Rule::non_terminal(2), Rule::non_terminal(2)]),
+            ),
+            Variable::named("rule_2", Rule::string("a")),
+        ]);
+        grammar.external_tokens = vec![Variable::named("rule_1", Rule::non_terminal(1))];
+
+        match extract_tokens(grammar) {
+            Err(Error(s)) => {
+                assert_eq!(s, "Rule 'rule_1' cannot be used as both an external token and a non-terminal rule");
+            }
+            _ => {
+                panic!("Expected an error but got no error");
+            }
+        }
+    }
+
+    fn build_grammar(variables: Vec<Variable>) -> InternedGrammar {
+        InternedGrammar {
+            variables,
+            extra_tokens: Vec::new(),
+            external_tokens: Vec::new(),
+            expected_conflicts: Vec::new(),
+            variables_to_inline: Vec::new(),
+            word_token: None,
+        }
+    }
+}
--- a/cli/src/prepare_grammar/flatten_grammar.rs
+++ b/cli/src/prepare_grammar/flatten_grammar.rs
@ -0,0 +1,313 @@
+use super::ExtractedSyntaxGrammar;
+use crate::error::Result;
+use crate::grammars::{Production, ProductionStep, SyntaxGrammar, SyntaxVariable, Variable};
+use crate::rules::{Alias, Associativity, Rule};
+
+struct RuleFlattener {
+    production: Production,
+    precedence_stack: Vec<i32>,
+    associativity_stack: Vec<Associativity>,
+    alias_stack: Vec<Alias>,
+}
+
+impl RuleFlattener {
+    fn new() -> Self {
+        Self {
+            production: Production {
+                steps: Vec::new(),
+                dynamic_precedence: 0,
+            },
+            precedence_stack: Vec::new(),
+            associativity_stack: Vec::new(),
+            alias_stack: Vec::new(),
+        }
+    }
+
+    fn flatten(mut self, rule: Rule) -> Production {
+        self.apply(rule, true);
+        self.production
+    }
+
+    fn apply(&mut self, rule: Rule, at_end: bool) {
+        match rule {
+            Rule::Seq(members) => {
+                let last_index = members.len() - 1;
+                for (i, member) in members.into_iter().enumerate() {
+                    self.apply(member, i == last_index && at_end);
+                }
+            }
+            Rule::Metadata { rule, params } => {
+                let mut has_precedence = false;
+                if let Some(precedence) = params.precedence {
+                    has_precedence = true;
+                    self.precedence_stack.push(precedence);
+                }
+
+                let mut has_associativity = false;
+                if let Some(associativity) = params.associativity {
+                    has_associativity = true;
+                    self.associativity_stack.push(associativity);
+                }
+
+                let mut has_alias = false;
+                if let Some(alias) = params.alias {
+                    has_alias = true;
+                    self.alias_stack.push(alias);
+                }
+
+                if params.dynamic_precedence.abs() > self.production.dynamic_precedence.abs() {
+                    self.production.dynamic_precedence = params.dynamic_precedence;
+                }
+
+                self.apply(*rule, at_end);
+
+                if has_precedence {
+                    self.precedence_stack.pop();
+                    if !at_end {
+                        self.production.steps.last_mut().unwrap().precedence =
+                            self.precedence_stack.last().cloned().unwrap_or(0);
+                    }
+                }
+
+                if has_associativity {
+                    self.associativity_stack.pop();
+                    if !at_end {
+                        self.production.steps.last_mut().unwrap().associativity =
+                            self.associativity_stack.last().cloned();
+                    }
+                }
+
+                if has_alias {
+                    self.alias_stack.pop();
+                }
+            }
+            Rule::Symbol(symbol) => {
+                self.production.steps.push(ProductionStep {
+                    symbol,
+                    precedence: self.precedence_stack.last().cloned().unwrap_or(0),
+                    associativity: self.associativity_stack.last().cloned(),
+                    alias: self.alias_stack.last().cloned(),
+                });
+            }
+            _ => (),
+        }
+    }
+}
+
+fn extract_choices(rule: Rule) -> Vec<Rule> {
+    match rule {
+        Rule::Seq(elements) => {
+            let mut result = vec![Rule::Blank];
+            for element in elements {
+                let extraction = extract_choices(element);
+                let mut next_result = Vec::new();
+                for entry in result {
+                    for extraction_entry in extraction.iter() {
+                        next_result.push(Rule::Seq(vec![entry.clone(), extraction_entry.clone()]));
+                    }
+                }
+                result = next_result;
+            }
+            result
+        }
+        Rule::Choice(elements) => {
+            let mut result = Vec::new();
+            for element in elements {
+                for rule in extract_choices(element) {
+                    result.push(rule);
+                }
+            }
+            result
+        }
+        Rule::Metadata { rule, params } => extract_choices(*rule)
+            .into_iter()
+            .map(|rule| Rule::Metadata {
+                rule: Box::new(rule),
+                params: params.clone(),
+            })
+            .collect(),
+        _ => vec![rule],
+    }
+}
+
+fn flatten_variable(variable: Variable) -> Result<SyntaxVariable> {
+    let mut productions = Vec::new();
+    for rule in extract_choices(variable.rule) {
+        let production = RuleFlattener::new().flatten(rule);
+        if !productions.contains(&production) {
+            productions.push(production);
+        }
+    }
+    Ok(SyntaxVariable {
+        name: variable.name,
+        kind: variable.kind,
+        productions,
+    })
+}
+
+pub(super) fn flatten_grammar(grammar: ExtractedSyntaxGrammar) -> Result<SyntaxGrammar> {
+    let mut variables = Vec::new();
+    for variable in grammar.variables {
+        variables.push(flatten_variable(variable)?);
+    }
+    Ok(SyntaxGrammar {
+        extra_tokens: grammar.extra_tokens,
+        expected_conflicts: grammar.expected_conflicts,
+        variables_to_inline: grammar.variables_to_inline,
+        external_tokens: grammar.external_tokens,
+        word_token: grammar.word_token,
+        variables,
+    })
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::grammars::VariableType;
+    use crate::rules::Symbol;
+
+    #[test]
+    fn test_flatten_grammar() {
+        let result = flatten_variable(Variable {
+            name: "test".to_string(),
+            kind: VariableType::Named,
+            rule: Rule::seq(vec![
+                Rule::non_terminal(1),
+                Rule::prec_left(
+                    101,
+                    Rule::seq(vec![
+                        Rule::non_terminal(2),
+                        Rule::choice(vec![
+                            Rule::prec_right(
+                                102,
+                                Rule::seq(vec![Rule::non_terminal(3), Rule::non_terminal(4)]),
+                            ),
+                            Rule::non_terminal(5),
+                        ]),
+                        Rule::non_terminal(6),
+                    ]),
+                ),
+                Rule::non_terminal(7),
+            ]),
+        })
+        .unwrap();
+
+        assert_eq!(
+            result.productions,
+            vec![
+                Production {
+                    dynamic_precedence: 0,
+                    steps: vec![
+                        ProductionStep::new(Symbol::non_terminal(1)),
+                        ProductionStep::new(Symbol::non_terminal(2))
+                            .with_prec(101, Some(Associativity::Left)),
+                        ProductionStep::new(Symbol::non_terminal(3))
+                            .with_prec(102, Some(Associativity::Right)),
+                        ProductionStep::new(Symbol::non_terminal(4))
+                            .with_prec(101, Some(Associativity::Left)),
+                        ProductionStep::new(Symbol::non_terminal(6)),
+                        ProductionStep::new(Symbol::non_terminal(7)),
+                    ]
+                },
+                Production {
+                    dynamic_precedence: 0,
+                    steps: vec![
+                        ProductionStep::new(Symbol::non_terminal(1)),
+                        ProductionStep::new(Symbol::non_terminal(2))
+                            .with_prec(101, Some(Associativity::Left)),
+                        ProductionStep::new(Symbol::non_terminal(5))
+                            .with_prec(101, Some(Associativity::Left)),
+                        ProductionStep::new(Symbol::non_terminal(6)),
+                        ProductionStep::new(Symbol::non_terminal(7)),
+                    ]
+                },
+            ]
+        );
+    }
+
+    #[test]
+    fn test_flatten_grammar_with_maximum_dynamic_precedence() {
+        let result = flatten_variable(Variable {
+          name: "test".to_string(),
+          kind: VariableType::Named,
+          rule: Rule::seq(vec![
+            Rule::non_terminal(1),
+            Rule::prec_dynamic(101, Rule::seq(vec![
+              Rule::non_terminal(2),
+              Rule::choice(vec![
+                Rule::prec_dynamic(102, Rule::seq(vec![
+                  Rule::non_terminal(3),
+                  Rule::non_terminal(4)
+                ])),
+                Rule::non_terminal(5),
+              ]),
+              Rule::non_terminal(6),
+            ])),
+            Rule::non_terminal(7),
+          ])
+        }).unwrap();
+
+        assert_eq!(result.productions, vec![
+            Production {
+                dynamic_precedence: 102,
+                steps: vec![
+                    ProductionStep::new(Symbol::non_terminal(1)),
+                    ProductionStep::new(Symbol::non_terminal(2)),
+                    ProductionStep::new(Symbol::non_terminal(3)),
+                    ProductionStep::new(Symbol::non_terminal(4)),
+                    ProductionStep::new(Symbol::non_terminal(6)),
+                    ProductionStep::new(Symbol::non_terminal(7)),
+                ],
+            },
+            Production {
+                dynamic_precedence: 101,
+                steps: vec![
+                    ProductionStep::new(Symbol::non_terminal(1)),
+                    ProductionStep::new(Symbol::non_terminal(2)),
+                    ProductionStep::new(Symbol::non_terminal(5)),
+                    ProductionStep::new(Symbol::non_terminal(6)),
+                    ProductionStep::new(Symbol::non_terminal(7)),
+                ],
+            },
+        ]);
+    }
+
+    #[test]
+    fn test_flatten_grammar_with_final_precedence() {
+        let result = flatten_variable(Variable {
+            name: "test".to_string(),
+            kind: VariableType::Named,
+            rule: Rule::prec_left(101, Rule::seq(vec![
+                Rule::non_terminal(1),
+                Rule::non_terminal(2),
+            ])),
+        }).unwrap();
+
+        assert_eq!(result.productions, vec![
+            Production {
+                dynamic_precedence: 0,
+                steps: vec![
+                    ProductionStep::new(Symbol::non_terminal(1)).with_prec(101, Some(Associativity::Left)),
+                    ProductionStep::new(Symbol::non_terminal(2)).with_prec(101, Some(Associativity::Left)),
+                ]
+            }
+        ]);
+
+        let result = flatten_variable(Variable {
+            name: "test".to_string(),
+            kind: VariableType::Named,
+            rule: Rule::prec_left(101, Rule::seq(vec![
+                Rule::non_terminal(1),
+            ])),
+        }).unwrap();
+
+        assert_eq!(result.productions, vec![
+            Production {
+                dynamic_precedence: 0,
+                steps: vec![
+                    ProductionStep::new(Symbol::non_terminal(1)).with_prec(101, Some(Associativity::Left)),
+                ]
+            }
+        ]);
+    }
+}
--- a/cli/src/prepare_grammar/intern_symbols.rs
+++ b/cli/src/prepare_grammar/intern_symbols.rs
@ -0,0 +1,238 @@
+use super::InternedGrammar;
+use crate::error::{Error, Result};
+use crate::grammars::{InputGrammar, Variable, VariableType};
+use crate::rules::{Rule, Symbol};
+
+pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result<InternedGrammar> {
+    let interner = Interner { grammar };
+
+    if variable_type_for_name(&grammar.variables[0].name) == VariableType::Hidden {
+        return Err(Error(
+            "Grammar's start rule must be visible".to_string(),
+        ));
+    }
+
+    let mut variables = Vec::with_capacity(grammar.variables.len());
+    for variable in grammar.variables.iter() {
+        variables.push(Variable {
+            name: variable.name.clone(),
+            kind: variable_type_for_name(&variable.name),
+            rule: interner.intern_rule(&variable.rule)?,
+        });
+    }
+
+    let mut external_tokens = Vec::with_capacity(grammar.external_tokens.len());
+    for external_token in grammar.external_tokens.iter() {
+        let rule = interner.intern_rule(&external_token)?;
+        let (name, kind) = if let Rule::NamedSymbol(name) = external_token {
+            (name.clone(), variable_type_for_name(&name))
+        } else {
+            (String::new(), VariableType::Anonymous)
+        };
+        external_tokens.push(Variable { name, kind, rule });
+    }
+
+    let mut extra_tokens = Vec::with_capacity(grammar.extra_tokens.len());
+    for extra_token in grammar.extra_tokens.iter() {
+        extra_tokens.push(interner.intern_rule(extra_token)?);
+    }
+
+    let mut expected_conflicts = Vec::new();
+    for conflict in grammar.expected_conflicts.iter() {
+        let mut interned_conflict = Vec::with_capacity(conflict.len());
+        for name in conflict {
+            interned_conflict.push(
+                interner
+                    .intern_name(&name)
+                    .ok_or_else(|| Error::undefined_symbol(name))?,
+            );
+        }
+        expected_conflicts.push(interned_conflict);
+    }
+
+    let mut variables_to_inline = Vec::new();
+    for name in grammar.variables_to_inline.iter() {
+        if let Some(symbol) = interner.intern_name(&name) {
+            variables_to_inline.push(symbol);
+        }
+    }
+
+    let mut word_token = None;
+    if let Some(name) = grammar.word_token.as_ref() {
+        word_token = Some(
+            interner
+                .intern_name(&name)
+                .ok_or_else(|| Error::undefined_symbol(&name))?,
+        );
+    }
+
+    Ok(InternedGrammar {
+        variables,
+        external_tokens,
+        extra_tokens,
+        expected_conflicts,
+        variables_to_inline,
+        word_token,
+    })
+}
+
+struct Interner<'a> {
+    grammar: &'a InputGrammar,
+}
+
+impl<'a> Interner<'a> {
+    fn intern_rule(&self, rule: &Rule) -> Result<Rule> {
+        match rule {
+            Rule::Choice(elements) => {
+                let mut result = Vec::with_capacity(elements.len());
+                for element in elements {
+                    result.push(self.intern_rule(element)?);
+                }
+                Ok(Rule::Choice(result))
+            }
+            Rule::Seq(elements) => {
+                let mut result = Vec::with_capacity(elements.len());
+                for element in elements {
+                    result.push(self.intern_rule(element)?);
+                }
+                Ok(Rule::Seq(result))
+            }
+            Rule::Repeat(content) => Ok(Rule::Repeat(Box::new(self.intern_rule(content)?))),
+            Rule::Metadata { rule, params } => Ok(Rule::Metadata {
+                rule: Box::new(self.intern_rule(rule)?),
+                params: params.clone(),
+            }),
+
+            Rule::NamedSymbol(name) => {
+                if let Some(symbol) = self.intern_name(&name) {
+                    Ok(Rule::Symbol(symbol))
+                } else {
+                    Err(Error::undefined_symbol(name))
+                }
+            }
+
+            _ => Ok(rule.clone()),
+        }
+    }
+
+    fn intern_name(&self, symbol: &str) -> Option<Symbol> {
+        for (i, variable) in self.grammar.variables.iter().enumerate() {
+            if variable.name == symbol {
+                return Some(Symbol::non_terminal(i));
+            }
+        }
+
+        for (i, external_token) in self.grammar.external_tokens.iter().enumerate() {
+            if let Rule::NamedSymbol(name) = external_token {
+                if name == symbol {
+                    return Some(Symbol::external(i));
+                }
+            }
+        }
+
+        return None;
+    }
+}
+
+fn variable_type_for_name(name: &str) -> VariableType {
+    if name.starts_with("_") {
+        VariableType::Hidden
+    } else {
+        VariableType::Named
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_basic_repeat_expansion() {
+        let grammar = intern_symbols(&build_grammar(vec![
+            Variable::named("x", Rule::choice(vec![Rule::named("y"), Rule::named("_z")])),
+            Variable::named("y", Rule::named("_z")),
+            Variable::named("_z", Rule::string("a")),
+        ]))
+        .unwrap();
+
+        assert_eq!(
+            grammar.variables,
+            vec![
+                Variable::named(
+                    "x",
+                    Rule::choice(vec![Rule::non_terminal(1), Rule::non_terminal(2),])
+                ),
+                Variable::named("y", Rule::non_terminal(2)),
+                Variable::hidden("_z", Rule::string("a")),
+            ]
+        );
+    }
+
+    #[test]
+    fn test_interning_external_token_names() {
+        // Variable `y` is both an internal and an external token.
+        // Variable `z` is just an external token.
+        let mut input_grammar = build_grammar(vec![
+            Variable::named(
+                "w",
+                Rule::choice(vec![Rule::named("x"), Rule::named("y"), Rule::named("z")]),
+            ),
+            Variable::named("x", Rule::string("a")),
+            Variable::named("y", Rule::string("b")),
+        ]);
+        input_grammar
+            .external_tokens
+            .extend(vec![Rule::named("y"), Rule::named("z")]);
+
+        let grammar = intern_symbols(&input_grammar).unwrap();
+
+        // Variable `y` is referred to by its internal index.
+        // Variable `z` is referred to by its external index.
+        assert_eq!(
+            grammar.variables,
+            vec![
+                Variable::named(
+                    "w",
+                    Rule::choice(vec![
+                        Rule::non_terminal(1),
+                        Rule::non_terminal(2),
+                        Rule::external(1),
+                    ])
+                ),
+                Variable::named("x", Rule::string("a")),
+                Variable::named("y", Rule::string("b")),
+            ]
+        );
+
+        // The external token for `y` refers back to its internal index.
+        assert_eq!(
+            grammar.external_tokens,
+            vec![
+                Variable::named("y", Rule::non_terminal(2)),
+                Variable::named("z", Rule::external(1)),
+            ]
+        );
+    }
+
+    #[test]
+    fn test_grammar_with_undefined_symbols() {
+        let result = intern_symbols(&build_grammar(vec![Variable::named("x", Rule::named("y"))]));
+
+        match result {
+            Err(Error(message)) => assert_eq!(message, "Undefined symbol 'y'"),
+            _ => panic!("Expected an error but got none"),
+        }
+    }
+
+    fn build_grammar(variables: Vec<Variable>) -> InputGrammar {
+        InputGrammar {
+            variables,
+            name: "the_language".to_string(),
+            extra_tokens: Vec::new(),
+            external_tokens: Vec::new(),
+            expected_conflicts: Vec::new(),
+            variables_to_inline: Vec::new(),
+            word_token: None,
+        }
+    }
+}
--- a/cli/src/prepare_grammar/mod.rs
+++ b/cli/src/prepare_grammar/mod.rs
@ -0,0 +1,57 @@
+mod expand_repeats;
+mod expand_tokens;
+mod extract_simple_aliases;
+mod extract_tokens;
+mod flatten_grammar;
+mod intern_symbols;
+mod process_inlines;
+
+use self::expand_repeats::expand_repeats;
+pub(crate) use self::expand_tokens::expand_tokens;
+use self::extract_simple_aliases::extract_simple_aliases;
+use self::extract_tokens::extract_tokens;
+use self::flatten_grammar::flatten_grammar;
+use self::intern_symbols::intern_symbols;
+use self::process_inlines::process_inlines;
+use crate::error::Result;
+use crate::grammars::{
+    ExternalToken, InlinedProductionMap, InputGrammar, LexicalGrammar, SyntaxGrammar, Variable,
+};
+use crate::rules::{AliasMap, Rule, Symbol};
+
+pub(crate) struct IntermediateGrammar<T, U> {
+    variables: Vec<Variable>,
+    extra_tokens: Vec<T>,
+    expected_conflicts: Vec<Vec<Symbol>>,
+    external_tokens: Vec<U>,
+    variables_to_inline: Vec<Symbol>,
+    word_token: Option<Symbol>,
+}
+
+pub(crate) type InternedGrammar = IntermediateGrammar<Rule, Variable>;
+
+pub(crate) type ExtractedSyntaxGrammar = IntermediateGrammar<Symbol, ExternalToken>;
+
+#[derive(Debug, PartialEq, Eq)]
+pub(crate) struct ExtractedLexicalGrammar {
+    pub variables: Vec<Variable>,
+    pub separators: Vec<Rule>,
+}
+
+pub(crate) fn prepare_grammar(
+    input_grammar: &InputGrammar,
+) -> Result<(
+    SyntaxGrammar,
+    LexicalGrammar,
+    InlinedProductionMap,
+    AliasMap,
+)> {
+    let interned_grammar = intern_symbols(input_grammar)?;
+    let (syntax_grammar, lexical_grammar) = extract_tokens(interned_grammar)?;
+    let syntax_grammar = expand_repeats(syntax_grammar);
+    let mut syntax_grammar = flatten_grammar(syntax_grammar)?;
+    let lexical_grammar = expand_tokens(lexical_grammar)?;
+    let simple_aliases = extract_simple_aliases(&mut syntax_grammar, &lexical_grammar);
+    let inlines = process_inlines(&syntax_grammar);
+    Ok((syntax_grammar, lexical_grammar, inlines, simple_aliases))
+}
--- a/cli/src/prepare_grammar/process_inlines.rs
+++ b/cli/src/prepare_grammar/process_inlines.rs
@ -0,0 +1,479 @@
+use crate::grammars::{InlinedProductionMap, Production, ProductionStep, SyntaxGrammar};
+use hashbrown::HashMap;
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+struct ProductionStepId {
+    // A `None` value here means that the production itself was produced via inlining,
+    // and is stored in the the builder's `productions` vector, as opposed to being
+    // stored in one of the grammar's variables.
+    variable_index: Option<usize>,
+    production_index: usize,
+    step_index: usize,
+}
+
+struct InlinedProductionMapBuilder {
+    production_indices_by_step_id: HashMap<ProductionStepId, Vec<usize>>,
+    productions: Vec<Production>,
+}
+
+impl InlinedProductionMapBuilder {
+    fn build<'a>(mut self, grammar: &'a SyntaxGrammar) -> InlinedProductionMap {
+        let mut step_ids_to_process = Vec::new();
+        for (variable_index, variable) in grammar.variables.iter().enumerate() {
+            for production_index in 0..variable.productions.len() {
+                step_ids_to_process.push(ProductionStepId {
+                    variable_index: Some(variable_index),
+                    production_index,
+                    step_index: 0,
+                });
+                while !step_ids_to_process.is_empty() {
+                    let mut i = 0;
+                    while i < step_ids_to_process.len() {
+                        let step_id = step_ids_to_process[i];
+                        if let Some(step) = self.production_step_for_id(step_id, grammar) {
+                            if grammar.variables_to_inline.contains(&step.symbol) {
+                                let inlined_step_ids = self
+                                    .inline_production_at_step(step_id, grammar)
+                                    .into_iter()
+                                    .cloned()
+                                    .map(|production_index| ProductionStepId {
+                                        variable_index: None,
+                                        production_index,
+                                        step_index: step_id.step_index,
+                                    });
+                                step_ids_to_process.splice(i..i + 1, inlined_step_ids);
+                            } else {
+                                step_ids_to_process[i] = ProductionStepId {
+                                    variable_index: step_id.variable_index,
+                                    production_index: step_id.production_index,
+                                    step_index: step_id.step_index + 1,
+                                };
+                                i += 1;
+                            }
+                        } else {
+                            step_ids_to_process.remove(i);
+                        }
+                    }
+                }
+            }
+        }
+
+        let productions = self.productions;
+        let production_indices_by_step_id = self.production_indices_by_step_id;
+        let production_map = production_indices_by_step_id
+            .into_iter()
+            .map(|(step_id, production_indices)| {
+                let production = if let Some(variable_index) = step_id.variable_index {
+                    &grammar.variables[variable_index].productions[step_id.production_index]
+                } else {
+                    &productions[step_id.production_index]
+                } as *const Production;
+                ((production, step_id.step_index as u32), production_indices)
+            })
+            .collect();
+
+        InlinedProductionMap {
+            productions,
+            production_map,
+        }
+    }
+
+    fn inline_production_at_step<'a>(
+        &'a mut self,
+        step_id: ProductionStepId,
+        grammar: &'a SyntaxGrammar,
+    ) -> &'a Vec<usize> {
+        // Build a list of productions produced by inlining rules.
+        let mut i = 0;
+        let step_index = step_id.step_index;
+        let mut productions_to_add = vec![self.production_for_id(step_id, grammar).clone()];
+        while i < productions_to_add.len() {
+            if let Some(step) = productions_to_add[i].steps.get(step_index) {
+                let symbol = step.symbol.clone();
+                if grammar.variables_to_inline.contains(&symbol) {
+                    // Remove the production from the vector, replacing it with a placeholder.
+                    let production = productions_to_add
+                        .splice(i..i + 1, [Production::default()].iter().cloned())
+                        .next()
+                        .unwrap();
+
+                    // Replace the placeholder with the inlined productions.
+                    productions_to_add.splice(
+                        i..i + 1,
+                        grammar.variables[symbol.index].productions.iter().map(|p| {
+                            let mut production = production.clone();
+                            let removed_step = production
+                                .steps
+                                .splice(step_index..(step_index + 1), p.steps.iter().cloned())
+                                .next()
+                                .unwrap();
+                            let inserted_steps =
+                                &mut production.steps[step_index..(step_index + p.steps.len())];
+                            if let Some(alias) = removed_step.alias {
+                                for inserted_step in inserted_steps.iter_mut() {
+                                    inserted_step.alias = Some(alias.clone());
+                                }
+                            }
+                            if let Some(last_inserted_step) = inserted_steps.last_mut() {
+                                if last_inserted_step.precedence == 0 {
+                                    last_inserted_step.precedence = removed_step.precedence;
+                                }
+                                if last_inserted_step.associativity == None {
+                                    last_inserted_step.associativity = removed_step.associativity;
+                                }
+                            }
+                            production
+                        }),
+                    );
+
+                    continue;
+                }
+            }
+            i += 1;
+        }
+
+        // Store all the computed productions.
+        let result = productions_to_add
+            .into_iter()
+            .map(|production| {
+                self.productions
+                    .iter()
+                    .position(|p| *p == production)
+                    .unwrap_or({
+                        self.productions.push(production);
+                        self.productions.len() - 1
+                    })
+            })
+            .collect();
+
+        // Cache these productions based on the original production step.
+        self.production_indices_by_step_id
+            .entry(step_id)
+            .or_insert(result)
+    }
+
+    fn production_for_id<'a>(
+        &'a self,
+        id: ProductionStepId,
+        grammar: &'a SyntaxGrammar,
+    ) -> &'a Production {
+        if let Some(variable_index) = id.variable_index {
+            &grammar.variables[variable_index].productions[id.production_index]
+        } else {
+            &self.productions[id.production_index]
+        }
+    }
+
+    fn production_step_for_id<'a>(
+        &'a self,
+        id: ProductionStepId,
+        grammar: &'a SyntaxGrammar,
+    ) -> Option<&'a ProductionStep> {
+        self.production_for_id(id, grammar).steps.get(id.step_index)
+    }
+}
+
+pub(super) fn process_inlines(grammar: &SyntaxGrammar) -> InlinedProductionMap {
+    InlinedProductionMapBuilder {
+        productions: Vec::new(),
+        production_indices_by_step_id: HashMap::new(),
+    }
+    .build(grammar)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::grammars::{ProductionStep, SyntaxVariable, VariableType};
+    use crate::rules::{Associativity, Symbol};
+
+    #[test]
+    fn test_basic_inlining() {
+        let grammar = SyntaxGrammar {
+            expected_conflicts: Vec::new(),
+            extra_tokens: Vec::new(),
+            external_tokens: Vec::new(),
+            word_token: None,
+            variables_to_inline: vec![Symbol::non_terminal(1)],
+            variables: vec![
+                SyntaxVariable {
+                    name: "non-terminal-0".to_string(),
+                    kind: VariableType::Named,
+                    productions: vec![Production {
+                        dynamic_precedence: 0,
+                        steps: vec![
+                            ProductionStep::new(Symbol::terminal(10)),
+                            ProductionStep::new(Symbol::non_terminal(1)), // inlined
+                            ProductionStep::new(Symbol::terminal(11)),
+                        ],
+                    }],
+                },
+                SyntaxVariable {
+                    name: "non-terminal-1".to_string(),
+                    kind: VariableType::Named,
+                    productions: vec![
+                        Production {
+                            dynamic_precedence: 0,
+                            steps: vec![
+                                ProductionStep::new(Symbol::terminal(12)),
+                                ProductionStep::new(Symbol::terminal(13)),
+                            ],
+                        },
+                        Production {
+                            dynamic_precedence: 0,
+                            steps: vec![ProductionStep::new(Symbol::terminal(14))],
+                        },
+                    ],
+                },
+            ],
+        };
+        let inline_map = process_inlines(&grammar);
+
+        // Nothing to inline at step 0.
+        assert!(inline_map
+            .inlined_productions(&grammar.variables[0].productions[0], 0)
+            .is_none());
+
+        // Inlining variable 1 yields two productions.
+        assert_eq!(
+            inline_map
+                .inlined_productions(&grammar.variables[0].productions[0], 1)
+                .unwrap()
+                .cloned()
+                .collect::<Vec<_>>(),
+            vec![
+                Production {
+                    dynamic_precedence: 0,
+                    steps: vec![
+                        ProductionStep::new(Symbol::terminal(10)),
+                        ProductionStep::new(Symbol::terminal(12)),
+                        ProductionStep::new(Symbol::terminal(13)),
+                        ProductionStep::new(Symbol::terminal(11)),
+                    ],
+                },
+                Production {
+                    dynamic_precedence: 0,
+                    steps: vec![
+                        ProductionStep::new(Symbol::terminal(10)),
+                        ProductionStep::new(Symbol::terminal(14)),
+                        ProductionStep::new(Symbol::terminal(11)),
+                    ],
+                },
+            ]
+        );
+    }
+
+    #[test]
+    fn test_nested_inlining() {
+        let grammar = SyntaxGrammar {
+            variables: vec![
+                SyntaxVariable {
+                    name: "non-terminal-0".to_string(),
+                    kind: VariableType::Named,
+                    productions: vec![Production {
+                        dynamic_precedence: 0,
+                        steps: vec![
+                            ProductionStep::new(Symbol::terminal(10)),
+                            ProductionStep::new(Symbol::non_terminal(1)), // inlined
+                            ProductionStep::new(Symbol::terminal(11)),
+                            ProductionStep::new(Symbol::non_terminal(2)), // inlined
+                            ProductionStep::new(Symbol::terminal(12)),
+                        ],
+                    }],
+                },
+                SyntaxVariable {
+                    name: "non-terminal-1".to_string(),
+                    kind: VariableType::Named,
+                    productions: vec![
+                        Production {
+                            dynamic_precedence: 0,
+                            steps: vec![ProductionStep::new(Symbol::terminal(13))],
+                        },
+                        Production {
+                            dynamic_precedence: 0,
+                            steps: vec![
+                                ProductionStep::new(Symbol::non_terminal(3)), // inlined
+                                ProductionStep::new(Symbol::terminal(14)),
+                            ],
+                        },
+                    ],
+                },
+                SyntaxVariable {
+                    name: "non-terminal-2".to_string(),
+                    kind: VariableType::Named,
+                    productions: vec![Production {
+                        dynamic_precedence: 0,
+                        steps: vec![ProductionStep::new(Symbol::terminal(15))],
+                    }],
+                },
+                SyntaxVariable {
+                    name: "non-terminal-3".to_string(),
+                    kind: VariableType::Named,
+                    productions: vec![Production {
+                        dynamic_precedence: 0,
+                        steps: vec![ProductionStep::new(Symbol::terminal(16))],
+                    }],
+                },
+            ],
+            variables_to_inline: vec![
+                Symbol::non_terminal(1),
+                Symbol::non_terminal(2),
+                Symbol::non_terminal(3),
+            ],
+            expected_conflicts: Vec::new(),
+            extra_tokens: Vec::new(),
+            external_tokens: Vec::new(),
+            word_token: None,
+        };
+        let inline_map = process_inlines(&grammar);
+
+        let productions: Vec<&Production> = inline_map
+            .inlined_productions(&grammar.variables[0].productions[0], 1)
+            .unwrap()
+            .collect();
+
+        assert_eq!(
+            productions.iter().cloned().cloned().collect::<Vec<_>>(),
+            vec![
+                Production {
+                    dynamic_precedence: 0,
+                    steps: vec![
+                        ProductionStep::new(Symbol::terminal(10)),
+                        ProductionStep::new(Symbol::terminal(13)),
+                        ProductionStep::new(Symbol::terminal(11)),
+                        ProductionStep::new(Symbol::non_terminal(2)),
+                        ProductionStep::new(Symbol::terminal(12)),
+                    ],
+                },
+                Production {
+                    dynamic_precedence: 0,
+                    steps: vec![
+                        ProductionStep::new(Symbol::terminal(10)),
+                        ProductionStep::new(Symbol::terminal(16)),
+                        ProductionStep::new(Symbol::terminal(14)),
+                        ProductionStep::new(Symbol::terminal(11)),
+                        ProductionStep::new(Symbol::non_terminal(2)),
+                        ProductionStep::new(Symbol::terminal(12)),
+                    ],
+                },
+            ]
+        );
+
+        assert_eq!(
+            inline_map
+                .inlined_productions(productions[0], 3)
+                .unwrap()
+                .cloned()
+                .collect::<Vec<_>>(),
+            vec![Production {
+                dynamic_precedence: 0,
+                steps: vec![
+                    ProductionStep::new(Symbol::terminal(10)),
+                    ProductionStep::new(Symbol::terminal(13)),
+                    ProductionStep::new(Symbol::terminal(11)),
+                    ProductionStep::new(Symbol::terminal(15)),
+                    ProductionStep::new(Symbol::terminal(12)),
+                ],
+            },]
+        );
+    }
+
+    #[test]
+    fn test_inlining_with_precedence_and_alias() {
+        let grammar = SyntaxGrammar {
+            variables_to_inline: vec![Symbol::non_terminal(1), Symbol::non_terminal(2)],
+            variables: vec![
+                SyntaxVariable {
+                    name: "non-terminal-0".to_string(),
+                    kind: VariableType::Named,
+                    productions: vec![Production {
+                        dynamic_precedence: 0,
+                        steps: vec![
+                            // inlined
+                            ProductionStep::new(Symbol::non_terminal(1))
+                                .with_prec(1, Some(Associativity::Left)),
+                            ProductionStep::new(Symbol::terminal(10)),
+                            // inlined
+                            ProductionStep::new(Symbol::non_terminal(2))
+                                .with_alias("outer_alias", true),
+                        ],
+                    }],
+                },
+                SyntaxVariable {
+                    name: "non-terminal-1".to_string(),
+                    kind: VariableType::Named,
+                    productions: vec![Production {
+                        dynamic_precedence: 0,
+                        steps: vec![
+                            ProductionStep::new(Symbol::terminal(11))
+                                .with_prec(2, None)
+                                .with_alias("inner_alias", true),
+                            ProductionStep::new(Symbol::terminal(12)).with_prec(3, None),
+                        ],
+                    }],
+                },
+                SyntaxVariable {
+                    name: "non-terminal-2".to_string(),
+                    kind: VariableType::Named,
+                    productions: vec![Production {
+                        dynamic_precedence: 0,
+                        steps: vec![ProductionStep::new(Symbol::terminal(13))],
+                    }],
+                },
+            ],
+            expected_conflicts: Vec::new(),
+            extra_tokens: Vec::new(),
+            external_tokens: Vec::new(),
+            word_token: None,
+        };
+
+        let inline_map = process_inlines(&grammar);
+
+        let productions: Vec<_> = inline_map
+            .inlined_productions(&grammar.variables[0].productions[0], 0)
+            .unwrap()
+            .collect();
+
+        assert_eq!(
+            productions.iter().cloned().cloned().collect::<Vec<_>>(),
+            vec![Production {
+                dynamic_precedence: 0,
+                steps: vec![
+                    // The first step in the inlined production retains its precedence
+                    // and alias.
+                    ProductionStep::new(Symbol::terminal(11))
+                        .with_prec(2, None)
+                        .with_alias("inner_alias", true),
+                    // The final step of the inlined production inherits the precedence of
+                    // the inlined step.
+                    ProductionStep::new(Symbol::terminal(12))
+                        .with_prec(1, Some(Associativity::Left)),
+                    ProductionStep::new(Symbol::terminal(10)),
+                    ProductionStep::new(Symbol::non_terminal(2)).with_alias("outer_alias", true),
+                ]
+            }],
+        );
+
+        assert_eq!(
+            inline_map
+                .inlined_productions(productions[0], 3)
+                .unwrap()
+                .cloned()
+                .collect::<Vec<_>>(),
+            vec![Production {
+                dynamic_precedence: 0,
+                steps: vec![
+                    ProductionStep::new(Symbol::terminal(11))
+                        .with_prec(2, None)
+                        .with_alias("inner_alias", true),
+                    ProductionStep::new(Symbol::terminal(12))
+                        .with_prec(1, Some(Associativity::Left)),
+                    ProductionStep::new(Symbol::terminal(10)),
+                    // All steps of the inlined production inherit their alias from the
+                    // inlined step.
+                    ProductionStep::new(Symbol::terminal(13)).with_alias("outer_alias", true),
+                ]
+            }],
+        );
+    }
+}
--- a/cli/src/render/mod.rs
+++ b/cli/src/render/mod.rs
--- a/cli/src/rules.rs
+++ b/cli/src/rules.rs
@ -0,0 +1,234 @@
+use hashbrown::HashMap;
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
+pub(crate) enum SymbolType {
+    External,
+    End,
+    Terminal,
+    NonTerminal,
+}
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
+pub(crate) enum Associativity {
+    Left,
+    Right,
+}
+
+#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
+pub(crate) struct Alias {
+    pub value: String,
+    pub is_named: bool,
+}
+
+pub(crate) type AliasMap = HashMap<Symbol, Alias>;
+
+#[derive(Clone, Debug, Default, PartialEq, Eq, Hash)]
+pub(crate) struct MetadataParams {
+    pub precedence: Option<i32>,
+    pub dynamic_precedence: i32,
+    pub associativity: Option<Associativity>,
+    pub is_token: bool,
+    pub is_string: bool,
+    pub is_active: bool,
+    pub is_main_token: bool,
+    pub alias: Option<Alias>,
+}
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
+pub(crate) struct Symbol {
+    pub kind: SymbolType,
+    pub index: usize,
+}
+
+#[derive(Clone, Debug, PartialEq, Eq, Hash)]
+pub(crate) enum Rule {
+    Blank,
+    String(String),
+    Pattern(String),
+    NamedSymbol(String),
+    Symbol(Symbol),
+    Choice(Vec<Rule>),
+    Metadata {
+        params: MetadataParams,
+        rule: Box<Rule>,
+    },
+    Repeat(Box<Rule>),
+    Seq(Vec<Rule>),
+}
+
+impl Rule {
+    pub fn alias(content: Rule, value: String, is_named: bool) -> Self {
+        add_metadata(content, move |params| {
+            params.alias = Some(Alias {
+                is_named,
+                value
+            });
+        })
+    }
+
+    pub fn token(content: Rule) -> Self {
+        add_metadata(content, |params| {
+            params.is_token = true;
+        })
+    }
+
+    pub fn immediate_token(content: Rule) -> Self {
+        add_metadata(content, |params| {
+            params.is_token = true;
+            params.is_main_token = true;
+        })
+    }
+
+    pub fn prec(value: i32, content: Rule) -> Self {
+        add_metadata(content, |params| {
+            params.precedence = Some(value);
+        })
+    }
+
+    pub fn prec_left(value: i32, content: Rule) -> Self {
+        add_metadata(content, |params| {
+            params.associativity = Some(Associativity::Left);
+            params.precedence = Some(value);
+        })
+    }
+
+    pub fn prec_right(value: i32, content: Rule) -> Self {
+        add_metadata(content, |params| {
+            params.associativity = Some(Associativity::Right);
+            params.precedence = Some(value);
+        })
+    }
+
+    pub fn prec_dynamic(value: i32, content: Rule) -> Self {
+        add_metadata(content, |params| {
+            params.dynamic_precedence = value;
+        })
+    }
+
+    pub fn repeat(rule: Rule) -> Self {
+        Rule::Repeat(Box::new(rule))
+    }
+
+    pub fn choice(rules: Vec<Rule>) -> Self {
+        let mut elements = Vec::with_capacity(rules.len());
+        for rule in rules {
+            choice_helper(&mut elements, rule);
+        }
+        Rule::Choice(elements)
+    }
+
+    pub fn seq(rules: Vec<Rule>) -> Self {
+        Rule::Seq(rules)
+    }
+}
+
+#[cfg(test)]
+impl Rule {
+    pub fn terminal(index: usize) -> Self {
+        Rule::Symbol(Symbol::terminal(index))
+    }
+
+    pub fn non_terminal(index: usize) -> Self {
+        Rule::Symbol(Symbol::non_terminal(index))
+    }
+
+    pub fn external(index: usize) -> Self {
+        Rule::Symbol(Symbol::external(index))
+    }
+
+    pub fn named(name: &'static str) -> Self {
+        Rule::NamedSymbol(name.to_string())
+    }
+
+    pub fn string(value: &'static str) -> Self {
+        Rule::String(value.to_string())
+    }
+
+    pub fn pattern(value: &'static str) -> Self {
+        Rule::Pattern(value.to_string())
+    }
+}
+
+impl Symbol {
+    pub fn is_terminal(&self) -> bool {
+        self.kind == SymbolType::Terminal
+    }
+
+    pub fn is_non_terminal(&self) -> bool {
+        self.kind == SymbolType::NonTerminal
+    }
+
+    pub fn is_external(&self) -> bool {
+        self.kind == SymbolType::External
+    }
+
+    pub fn is_eof(&self) -> bool {
+        self.kind == SymbolType::End
+    }
+
+    pub fn non_terminal(index: usize) -> Self {
+        Symbol {
+            kind: SymbolType::NonTerminal,
+            index,
+        }
+    }
+
+    pub fn terminal(index: usize) -> Self {
+        Symbol {
+            kind: SymbolType::Terminal,
+            index,
+        }
+    }
+
+    pub fn external(index: usize) -> Self {
+        Symbol {
+            kind: SymbolType::External,
+            index,
+        }
+    }
+
+    pub fn end() -> Self {
+        Symbol {
+            kind: SymbolType::End,
+            index: 0,
+        }
+    }
+}
+
+impl From<Symbol> for Rule {
+    fn from(symbol: Symbol) -> Self {
+        Rule::Symbol(symbol)
+    }
+}
+
+fn add_metadata<T: FnOnce(&mut MetadataParams)>(input: Rule, f: T) -> Rule {
+    match input {
+        Rule::Metadata { rule, mut params } => {
+            f(&mut params);
+            Rule::Metadata { rule, params }
+        }
+        _ => {
+            let mut params = MetadataParams::default();
+            f(&mut params);
+            Rule::Metadata {
+                rule: Box::new(input),
+                params,
+            }
+        }
+    }
+}
+
+fn choice_helper(result: &mut Vec<Rule>, rule: Rule) {
+    match rule {
+        Rule::Choice(elements) => {
+            for element in elements {
+                choice_helper(result, element);
+            }
+        }
+        _ => {
+            if !result.contains(&rule) {
+                result.push(rule);
+            }
+        }
+    }
+}
--- a/cli/src/tables.rs
+++ b/cli/src/tables.rs
@ -0,0 +1,140 @@
+use crate::nfa::CharacterSet;
+use crate::rules::{Alias, Associativity, Symbol};
+use hashbrown::HashMap;
+
+pub(crate) type AliasSequenceId = usize;
+pub(crate) type ParseStateId = usize;
+pub(crate) type LexStateId = usize;
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub(crate) enum ParseAction {
+    Accept,
+    Shift {
+        state: ParseStateId,
+        is_repetition: bool,
+    },
+    ShiftExtra,
+    Recover,
+    Reduce {
+        symbol: Symbol,
+        child_count: usize,
+        precedence: i32,
+        dynamic_precedence: i32,
+        associativity: Option<Associativity>,
+        alias_sequence_id: AliasSequenceId,
+    },
+}
+
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub(crate) struct ParseTableEntry {
+    pub actions: Vec<ParseAction>,
+    pub reusable: bool,
+}
+
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub(crate) struct ParseState {
+    pub terminal_entries: HashMap<Symbol, ParseTableEntry>,
+    pub nonterminal_entries: HashMap<Symbol, ParseStateId>,
+    pub lex_state_id: usize,
+    pub unfinished_item_signature: u64,
+}
+
+#[derive(Debug, PartialEq, Eq)]
+pub(crate) struct ParseTable {
+    pub states: Vec<ParseState>,
+    pub symbols: Vec<Symbol>,
+    pub alias_sequences: Vec<Vec<Option<Alias>>>,
+    pub max_aliased_production_length: usize,
+}
+
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub(crate) struct AdvanceAction {
+    pub state: Option<LexStateId>,
+    pub in_main_token: bool,
+}
+
+#[derive(Clone, Debug, Default, PartialEq, Eq)]
+pub(crate) struct LexState {
+    pub advance_actions: Vec<(CharacterSet, AdvanceAction)>,
+    pub accept_action: Option<Symbol>,
+}
+
+#[derive(Debug, PartialEq, Eq)]
+pub(crate) struct LexTable {
+    pub states: Vec<LexState>,
+}
+
+impl ParseTableEntry {
+    pub fn new() -> Self {
+        Self {
+            reusable: true,
+            actions: Vec::new(),
+        }
+    }
+}
+
+impl Default for LexTable {
+    fn default() -> Self {
+        LexTable { states: Vec::new() }
+    }
+}
+
+impl ParseState {
+    pub fn referenced_states<'a>(&'a self) -> impl Iterator<Item = ParseStateId> + 'a {
+        self.terminal_entries
+            .iter()
+            .flat_map(|(_, entry)| {
+                entry.actions.iter().filter_map(|action| match action {
+                    ParseAction::Shift { state, .. } => Some(*state),
+                    _ => None,
+                })
+            })
+            .chain(self.nonterminal_entries.iter().map(|(_, state)| *state))
+    }
+
+    pub fn update_referenced_states<F>(&mut self, mut f: F)
+    where
+        F: FnMut(usize, &ParseState) -> usize,
+    {
+        let mut updates = Vec::new();
+        for (symbol, entry) in &self.terminal_entries {
+            for (i, action) in entry.actions.iter().enumerate() {
+                if let ParseAction::Shift { state, .. } = action {
+                    let result = f(*state, self);
+                    if result != *state {
+                        updates.push((*symbol, i, result));
+                    }
+                }
+            }
+        }
+        for (symbol, other_state) in &self.nonterminal_entries {
+            let result = f(*other_state, self);
+            if result != *other_state {
+                updates.push((*symbol, 0, result));
+            }
+        }
+        for (symbol, action_index, new_state) in updates {
+            if symbol.is_non_terminal() {
+                self.nonterminal_entries.insert(symbol, new_state);
+            } else {
+                let entry = self.terminal_entries.get_mut(&symbol).unwrap();
+                if let ParseAction::Shift { is_repetition, .. } = entry.actions[action_index] {
+                    entry.actions[action_index] = ParseAction::Shift {
+                        state: new_state,
+                        is_repetition,
+                    };
+                }
+            }
+        }
+    }
+}
+
+impl ParseAction {
+    pub fn precedence(&self) -> i32 {
+        if let ParseAction::Reduce { precedence, .. } = self {
+            *precedence
+        } else {
+            0
+        }
+    }
+}