Start work on shrinking parse table

2018-12-29 13:57:34 -08:00 · 2018-12-29 13:57:34 -08:00 · 605b50e58b
commit 605b50e58b
parent 479400e5d3
5 changed files with 866 additions and 619 deletions
--- a/src/build_tables/build_parse_table.rs
+++ b/src/build_tables/build_parse_table.rs
@ -0,0 +1,605 @@
+use super::item::{LookaheadSet, ParseItem, ParseItemSet};
+use super::item_set_builder::ParseItemSetBuilder;
+use crate::error::{Error, Result};
+use crate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar, VariableType};
+use crate::rules::{Alias, AliasMap, Associativity, Symbol, SymbolType};
+use crate::tables::{
+    AliasSequenceId, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry,
+};
+use core::ops::Range;
+use std::collections::hash_map::Entry;
+use std::collections::{HashMap, HashSet, VecDeque};
+use std::fmt::Write;
+
+#[derive(Clone)]
+struct AuxiliarySymbolInfo {
+    auxiliary_symbol: Symbol,
+    parent_symbols: Vec<Symbol>,
+}
+
+type SymbolSequence = Vec<Symbol>;
+type AuxiliarySymbolSequence = Vec<AuxiliarySymbolInfo>;
+
+struct ParseStateQueueEntry {
+    preceding_symbols: SymbolSequence,
+    preceding_auxiliary_symbols: AuxiliarySymbolSequence,
+    state_id: ParseStateId,
+}
+
+struct ParseTableBuilder<'a> {
+    item_set_builder: ParseItemSetBuilder<'a>,
+    syntax_grammar: &'a SyntaxGrammar,
+    lexical_grammar: &'a LexicalGrammar,
+    inlines: &'a InlinedProductionMap,
+    state_ids_by_item_set: HashMap<ParseItemSet<'a>, ParseStateId>,
+    item_sets_by_state_id: Vec<ParseItemSet<'a>>,
+    parse_state_queue: VecDeque<ParseStateQueueEntry>,
+    parse_table: ParseTable,
+}
+
+impl<'a> ParseTableBuilder<'a> {
+    fn build(mut self) -> Result<ParseTable> {
+        // Ensure that the empty alias sequence has index 0.
+        self.parse_table.alias_sequences.push(Vec::new());
+
+        // Ensure that the error state has index 0.
+        let error_state_id =
+            self.add_parse_state(&Vec::new(), &Vec::new(), ParseItemSet::default());
+
+        self.add_parse_state(
+            &Vec::new(),
+            &Vec::new(),
+            ParseItemSet::with(
+                [(ParseItem::start(), LookaheadSet::with(&[Symbol::end()]))]
+                    .iter()
+                    .cloned(),
+            ),
+        );
+
+        self.process_part_state_queue()?;
+        self.populate_used_symbols();
+        Ok(self.parse_table)
+    }
+
+    fn add_parse_state(
+        &mut self,
+        preceding_symbols: &SymbolSequence,
+        preceding_auxiliary_symbols: &AuxiliarySymbolSequence,
+        item_set: ParseItemSet<'a>,
+    ) -> ParseStateId {
+        match self.state_ids_by_item_set.entry(item_set) {
+            Entry::Occupied(o) => *o.get(),
+            Entry::Vacant(v) => {
+                let state_id = self.parse_table.states.len();
+                self.item_sets_by_state_id.push(v.key().clone());
+                self.parse_table.states.push(ParseState {
+                    lex_state_id: 0,
+                    terminal_entries: HashMap::new(),
+                    nonterminal_entries: HashMap::new(),
+                });
+                self.parse_state_queue.push_back(ParseStateQueueEntry {
+                    state_id,
+                    preceding_symbols: preceding_symbols.clone(),
+                    preceding_auxiliary_symbols: preceding_auxiliary_symbols.clone(),
+                });
+                v.insert(state_id);
+                state_id
+            }
+        }
+    }
+
+    fn process_part_state_queue(&mut self) -> Result<()> {
+        while let Some(entry) = self.parse_state_queue.pop_front() {
+            let debug = false;
+
+            if debug {
+                println!(
+                    "ITEM SET {}:\n{}",
+                    entry.state_id,
+                    self.item_sets_by_state_id[entry.state_id]
+                        .display_with(&self.syntax_grammar, &self.lexical_grammar,)
+                );
+            }
+
+            let item_set = self.item_set_builder.transitive_closure(
+                &self.item_sets_by_state_id[entry.state_id],
+                self.syntax_grammar,
+                self.inlines,
+            );
+
+            if debug {
+                println!(
+                    "TRANSITIVE CLOSURE:\n{}",
+                    item_set.display_with(&self.syntax_grammar, &self.lexical_grammar)
+                );
+            }
+
+            self.add_actions(
+                entry.preceding_symbols,
+                entry.preceding_auxiliary_symbols,
+                item_set,
+                entry.state_id,
+            )?;
+        }
+        Ok(())
+    }
+
+    fn add_actions(
+        &mut self,
+        mut preceding_symbols: SymbolSequence,
+        mut preceding_auxiliary_symbols: Vec<AuxiliarySymbolInfo>,
+        item_set: ParseItemSet<'a>,
+        state_id: ParseStateId,
+    ) -> Result<()> {
+        let mut terminal_successors = HashMap::new();
+        let mut non_terminal_successors = HashMap::new();
+        let mut lookaheads_with_conflicts = HashSet::new();
+
+        for (item, lookaheads) in &item_set.entries {
+            if let Some(next_symbol) = item.symbol() {
+                let successor = item.successor();
+                if next_symbol.is_non_terminal() {
+                    // Keep track of where auxiliary non-terminals (repeat symbols) are
+                    // used within visible symbols. This information may be needed later
+                    // for conflict resolution.
+                    if self.syntax_grammar.variables[next_symbol.index].is_auxiliary() {
+                        preceding_auxiliary_symbols
+                            .push(self.get_auxiliary_node_info(&item_set, next_symbol));
+                    }
+
+                    non_terminal_successors
+                        .entry(next_symbol)
+                        .or_insert_with(|| ParseItemSet::default())
+                        .entries
+                        .entry(successor)
+                        .or_insert_with(|| LookaheadSet::new())
+                        .insert_all(lookaheads);
+                } else {
+                    terminal_successors
+                        .entry(next_symbol)
+                        .or_insert_with(|| ParseItemSet::default())
+                        .entries
+                        .entry(successor)
+                        .or_insert_with(|| LookaheadSet::new())
+                        .insert_all(lookaheads);
+                }
+            } else {
+                let action = if item.is_augmented() {
+                    ParseAction::Accept
+                } else {
+                    ParseAction::Reduce {
+                        symbol: Symbol::non_terminal(item.variable_index as usize),
+                        child_count: item.step_index as usize,
+                        precedence: item.precedence(),
+                        associativity: item.associativity(),
+                        dynamic_precedence: item.production.dynamic_precedence,
+                        alias_sequence_id: self.get_alias_sequence_id(item),
+                    }
+                };
+
+                for lookahead in lookaheads.iter() {
+                    let entry = self.parse_table.states[state_id]
+                        .terminal_entries
+                        .entry(lookahead);
+                    let entry = entry.or_insert_with(|| ParseTableEntry::new());
+                    if entry.actions.is_empty() {
+                        entry.actions.push(action);
+                    } else if action.precedence() > entry.actions[0].precedence() {
+                        entry.actions.clear();
+                        entry.actions.push(action);
+                        lookaheads_with_conflicts.remove(&lookahead);
+                    } else if action.precedence() == entry.actions[0].precedence() {
+                        entry.actions.push(action);
+                        lookaheads_with_conflicts.insert(lookahead);
+                    }
+                }
+            }
+        }
+
+        for (symbol, next_item_set) in terminal_successors {
+            preceding_symbols.push(symbol);
+            let next_state_id = self.add_parse_state(
+                &preceding_symbols,
+                &preceding_auxiliary_symbols,
+                next_item_set,
+            );
+            preceding_symbols.pop();
+
+            let entry = self.parse_table.states[state_id]
+                .terminal_entries
+                .entry(symbol);
+            if let Entry::Occupied(e) = &entry {
+                if !e.get().actions.is_empty() {
+                    lookaheads_with_conflicts.insert(symbol);
+                }
+            }
+
+            entry
+                .or_insert_with(|| ParseTableEntry::new())
+                .actions
+                .push(ParseAction::Shift {
+                    state: next_state_id,
+                    is_repetition: false,
+                });
+        }
+
+        for (symbol, next_item_set) in non_terminal_successors {
+            preceding_symbols.push(symbol);
+            let next_state_id = self.add_parse_state(
+                &preceding_symbols,
+                &preceding_auxiliary_symbols,
+                next_item_set,
+            );
+            preceding_symbols.pop();
+            self.parse_table.states[state_id]
+                .nonterminal_entries
+                .insert(symbol, next_state_id);
+        }
+
+        for symbol in lookaheads_with_conflicts {
+            self.handle_conflict(
+                &item_set,
+                state_id,
+                &preceding_symbols,
+                &preceding_auxiliary_symbols,
+                symbol,
+            )?;
+        }
+
+        let state = &mut self.parse_table.states[state_id];
+        for extra_token in &self.syntax_grammar.extra_tokens {
+            state
+                .terminal_entries
+                .entry(*extra_token)
+                .or_insert(ParseTableEntry {
+                    reusable: true,
+                    actions: vec![ParseAction::ShiftExtra],
+                });
+        }
+
+        Ok(())
+    }
+
+    fn handle_conflict(
+        &mut self,
+        item_set: &ParseItemSet,
+        state_id: ParseStateId,
+        preceding_symbols: &SymbolSequence,
+        preceding_auxiliary_symbols: &Vec<AuxiliarySymbolInfo>,
+        conflicting_lookahead: Symbol,
+    ) -> Result<()> {
+        let entry = self.parse_table.states[state_id]
+            .terminal_entries
+            .get_mut(&conflicting_lookahead)
+            .unwrap();
+
+        // Determine which items in the set conflict with each other, and the
+        // precedences associated with SHIFT vs REDUCE actions. There won't
+        // be multiple REDUCE actions with different precedences; that is
+        // sorted out ahead of time in `add_actions`. But there can still be
+        // REDUCE-REDUCE conflicts where all actions have the *same*
+        // precedence, and there can still be SHIFT/REDUCE conflicts.
+        let reduce_precedence = entry.actions[0].precedence();
+        let mut considered_associativity = false;
+        let mut shift_precedence: Option<Range<i32>> = None;
+        let mut conflicting_items = HashSet::new();
+        for (item, lookaheads) in &item_set.entries {
+            if let Some(step) = item.step() {
+                if item.step_index > 0 {
+                    if self
+                        .item_set_builder
+                        .first_set(&step.symbol)
+                        .contains(&conflicting_lookahead)
+                    {
+                        conflicting_items.insert(item);
+                        let precedence = item.precedence();
+                        if let Some(range) = &mut shift_precedence {
+                            if precedence < range.start {
+                                range.start = precedence;
+                            } else if precedence > range.end {
+                                range.end = precedence;
+                            }
+                        } else {
+                            shift_precedence = Some(precedence..precedence);
+                        }
+                    }
+                }
+            } else if lookaheads.contains(&conflicting_lookahead) {
+                conflicting_items.insert(item);
+            }
+        }
+
+        if let ParseAction::Shift { is_repetition, .. } = entry.actions.last_mut().unwrap() {
+            let shift_precedence = shift_precedence.unwrap_or(0..0);
+
+            // If all of the items in the conflict have the same parent symbol,
+            // and that parent symbols is auxiliary, then this is just the intentional
+            // ambiguity associated with a repeat rule. Resolve that class of ambiguity
+            // by leaving it in the parse table, but marking the SHIFT action with
+            // an `is_repetition` flag.
+            let conflicting_variable_index =
+                conflicting_items.iter().next().unwrap().variable_index;
+            if self.syntax_grammar.variables[conflicting_variable_index as usize].is_auxiliary() {
+                if conflicting_items
+                    .iter()
+                    .all(|item| item.variable_index == conflicting_variable_index)
+                {
+                    *is_repetition = true;
+                    return Ok(());
+                }
+            }
+
+            // If the SHIFT action has higher precedence, remove all the REDUCE actions.
+            if shift_precedence.start > reduce_precedence
+                || (shift_precedence.start == reduce_precedence
+                    && shift_precedence.end > reduce_precedence)
+            {
+                entry.actions.drain(0..entry.actions.len() - 1);
+            }
+            // If the REDUCE actions have higher precedence, remove the SHIFT action.
+            else if shift_precedence.end < reduce_precedence
+                || (shift_precedence.end == reduce_precedence
+                    && shift_precedence.start < reduce_precedence)
+            {
+                entry.actions.pop();
+                conflicting_items.retain(|item| item.is_done());
+            }
+            // If the SHIFT and REDUCE actions have the same predence, consider
+            // the REDUCE actions' associativity.
+            else if shift_precedence == (reduce_precedence..reduce_precedence) {
+                considered_associativity = true;
+                let mut has_left = false;
+                let mut has_right = false;
+                let mut has_non = false;
+                for action in &entry.actions {
+                    if let ParseAction::Reduce { associativity, .. } = action {
+                        match associativity {
+                            Some(Associativity::Left) => has_left = true,
+                            Some(Associativity::Right) => has_right = true,
+                            None => has_non = true,
+                        }
+                    }
+                }
+
+                // If all reduce actions are left associative, remove the SHIFT action.
+                // If all reduce actions are right associative, remove the REDUCE actions.
+                match (has_left, has_non, has_right) {
+                    (true, false, false) => {
+                        entry.actions.pop();
+                        conflicting_items.retain(|item| item.is_done());
+                    }
+                    (false, false, true) => {
+                        entry.actions.drain(0..entry.actions.len() - 1);
+                    }
+                    _ => {}
+                }
+            }
+        }
+
+        // If all of the actions but one have been eliminated, then there's no problem.
+        let entry = self.parse_table.states[state_id]
+            .terminal_entries
+            .get_mut(&conflicting_lookahead)
+            .unwrap();
+        if entry.actions.len() == 1 {
+            return Ok(());
+        }
+
+        // Determine the set of parent symbols involved in this conflict.
+        let mut actual_conflict = Vec::new();
+        for item in &conflicting_items {
+            let symbol = Symbol::non_terminal(item.variable_index as usize);
+            if self.syntax_grammar.variables[symbol.index].is_auxiliary() {
+                actual_conflict.extend(
+                    preceding_auxiliary_symbols
+                        .iter()
+                        .rev()
+                        .find_map(|info| {
+                            if info.auxiliary_symbol == symbol {
+                                Some(&info.parent_symbols)
+                            } else {
+                                None
+                            }
+                        })
+                        .unwrap()
+                        .iter(),
+                );
+            } else {
+                actual_conflict.push(symbol);
+            }
+        }
+        actual_conflict.sort_unstable();
+        actual_conflict.dedup();
+
+        // If this set of symbols has been whitelisted, then there's no error.
+        if self
+            .syntax_grammar
+            .expected_conflicts
+            .contains(&actual_conflict)
+        {
+            return Ok(());
+        }
+
+        let mut msg = "Unresolved conflict for symbol sequence:\n\n".to_string();
+        for symbol in preceding_symbols {
+            write!(&mut msg, "  {}", self.symbol_name(symbol)).unwrap();
+        }
+
+        write!(
+            &mut msg,
+            "  •  {}  …\n\n",
+            self.symbol_name(&conflicting_lookahead)
+        )
+        .unwrap();
+        write!(&mut msg, "Possible interpretations:\n").unwrap();
+        for (i, item) in conflicting_items.iter().enumerate() {
+            write!(&mut msg, "\n  {}:", i).unwrap();
+
+            for preceding_symbol in preceding_symbols
+                .iter()
+                .take(preceding_symbols.len() - item.step_index as usize)
+            {
+                write!(&mut msg, "  {}", self.symbol_name(preceding_symbol)).unwrap();
+            }
+
+            write!(
+                &mut msg,
+                "  ({}",
+                &self.syntax_grammar.variables[item.variable_index as usize].name
+            )
+            .unwrap();
+
+            for (j, step) in item.production.steps.iter().enumerate() {
+                if j as u32 == item.step_index {
+                    write!(&mut msg, "  •").unwrap();
+                }
+                write!(&mut msg, "  {}", self.symbol_name(&step.symbol)).unwrap();
+            }
+
+            write!(&mut msg, ")").unwrap();
+
+            if item.is_done() {
+                write!(
+                    &mut msg,
+                    "  •  {}",
+                    self.symbol_name(&conflicting_lookahead)
+                )
+                .unwrap();
+            }
+
+            let precedence = item.precedence();
+            let associativity = item.associativity();
+            if precedence != 0 || associativity.is_some() {
+                write!(
+                    &mut msg,
+                    "(precedence: {}, associativity: {:?})",
+                    precedence, associativity
+                )
+                .unwrap();
+            }
+        }
+
+        // TODO - generate suggested resolutions
+
+        Err(Error::ConflictError(msg))
+    }
+
+    fn get_auxiliary_node_info(
+        &self,
+        item_set: &ParseItemSet,
+        symbol: Symbol,
+    ) -> AuxiliarySymbolInfo {
+        let parent_symbols = item_set
+            .entries
+            .keys()
+            .filter_map(|item| {
+                if item.symbol() == Some(symbol) {
+                    None
+                } else {
+                    None
+                }
+            })
+            .collect();
+        AuxiliarySymbolInfo {
+            auxiliary_symbol: symbol,
+            parent_symbols,
+        }
+    }
+
+    fn populate_used_symbols(&mut self) {
+        let mut terminal_usages = vec![false; self.lexical_grammar.variables.len()];
+        let mut non_terminal_usages = vec![false; self.syntax_grammar.variables.len()];
+        let mut external_usages = vec![false; self.syntax_grammar.external_tokens.len()];
+        for state in &self.parse_table.states {
+            for symbol in state.terminal_entries.keys() {
+                match symbol.kind {
+                    SymbolType::Terminal => terminal_usages[symbol.index] = true,
+                    SymbolType::External => external_usages[symbol.index] = true,
+                    _ => {}
+                }
+            }
+            for symbol in state.nonterminal_entries.keys() {
+                non_terminal_usages[symbol.index] = true;
+            }
+        }
+        self.parse_table.symbols.push(Symbol::end());
+        for (i, value) in terminal_usages.into_iter().enumerate() {
+            if value {
+                self.parse_table.symbols.push(Symbol::terminal(i));
+            }
+        }
+        for (i, value) in non_terminal_usages.into_iter().enumerate() {
+            if value {
+                self.parse_table.symbols.push(Symbol::non_terminal(i));
+            }
+        }
+        for (i, value) in external_usages.into_iter().enumerate() {
+            if value {
+                self.parse_table.symbols.push(Symbol::external(i));
+            }
+        }
+    }
+
+    fn get_alias_sequence_id(&mut self, item: &ParseItem) -> AliasSequenceId {
+        let mut alias_sequence: Vec<Option<Alias>> = item
+            .production
+            .steps
+            .iter()
+            .map(|s| s.alias.clone())
+            .collect();
+        while alias_sequence.last() == Some(&None) {
+            alias_sequence.pop();
+        }
+        if let Some(index) = self
+            .parse_table
+            .alias_sequences
+            .iter()
+            .position(|seq| *seq == alias_sequence)
+        {
+            index
+        } else {
+            self.parse_table.alias_sequences.push(alias_sequence);
+            self.parse_table.alias_sequences.len() - 1
+        }
+    }
+
+    fn symbol_name(&self, symbol: &Symbol) -> String {
+        match symbol.kind {
+            SymbolType::End => "EOF".to_string(),
+            SymbolType::External => self.syntax_grammar.external_tokens[symbol.index]
+                .name
+                .clone(),
+            SymbolType::NonTerminal => self.syntax_grammar.variables[symbol.index].name.clone(),
+            SymbolType::Terminal => {
+                let variable = &self.lexical_grammar.variables[symbol.index];
+                if variable.kind == VariableType::Named {
+                    variable.name.clone()
+                } else {
+                    format!("\"{}\"", &variable.name)
+                }
+            }
+        }
+    }
+}
+
+pub(crate) fn build_parse_table(
+    syntax_grammar: &SyntaxGrammar,
+    lexical_grammar: &LexicalGrammar,
+    inlines: &InlinedProductionMap,
+) -> Result<ParseTable> {
+    ParseTableBuilder {
+        syntax_grammar,
+        lexical_grammar,
+        inlines,
+        item_set_builder: ParseItemSetBuilder::new(syntax_grammar, lexical_grammar, inlines),
+        state_ids_by_item_set: HashMap::new(),
+        item_sets_by_state_id: Vec::new(),
+        parse_state_queue: VecDeque::new(),
+        parse_table: ParseTable {
+            states: Vec::new(),
+            alias_sequences: Vec::new(),
+            symbols: Vec::new(),
+        },
+    }
+    .build()
+}
--- a/src/build_tables/mod.rs
+++ b/src/build_tables/mod.rs
@ -1,607 +1,17 @@
+use crate::error::Result;
+use crate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar};
+use crate::rules::{AliasMap, Symbol};
+use crate::tables::{LexTable, ParseTable};
+
+mod build_parse_table;
 mod item;
 mod item_set_builder;
 mod lex_table_builder;
+mod shrink_parse_table;
+mod token_conflict_map;

-use self::item::{LookaheadSet, ParseItem, ParseItemSet};
-use self::item_set_builder::ParseItemSetBuilder;
-use self::lex_table_builder::LexTableBuilder;
-use crate::error::{Error, Result};
-use crate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar, VariableType};
-use crate::rules::Alias;
-use crate::rules::{AliasMap, Associativity, Symbol, SymbolType};
-use crate::tables::{
-    AliasSequenceId, LexTable, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry,
-};
-use core::ops::Range;
-use std::collections::hash_map::Entry;
-use std::collections::{HashMap, HashSet, VecDeque};
-use std::fmt::Write;
-
-#[derive(Clone)]
-struct AuxiliarySymbolInfo {
-    auxiliary_symbol: Symbol,
-    parent_symbols: Vec<Symbol>,
-}
-
-type SymbolSequence = Vec<Symbol>;
-type AuxiliarySymbolSequence = Vec<AuxiliarySymbolInfo>;
-
-struct ParseStateQueueEntry {
-    preceding_symbols: SymbolSequence,
-    preceding_auxiliary_symbols: AuxiliarySymbolSequence,
-    state_id: ParseStateId,
-}
-
-struct ParseTableBuilder<'a> {
-    item_set_builder: ParseItemSetBuilder<'a>,
-    syntax_grammar: &'a SyntaxGrammar,
-    lexical_grammar: &'a LexicalGrammar,
-    inlines: &'a InlinedProductionMap,
-    simple_aliases: &'a AliasMap,
-    state_ids_by_item_set: HashMap<ParseItemSet<'a>, ParseStateId>,
-    item_sets_by_state_id: Vec<ParseItemSet<'a>>,
-    parse_state_queue: VecDeque<ParseStateQueueEntry>,
-    parse_table: ParseTable,
-}
-
-impl<'a> ParseTableBuilder<'a> {
-    fn build(mut self) -> Result<(ParseTable, LexTable, LexTable, Option<Symbol>)> {
-        // Ensure that the empty alias sequence has index 0.
-        self.parse_table.alias_sequences.push(Vec::new());
-
-        // Ensure that the error state has index 0.
-        let error_state_id =
-            self.add_parse_state(&Vec::new(), &Vec::new(), ParseItemSet::default());
-
-        self.add_parse_state(
-            &Vec::new(),
-            &Vec::new(),
-            ParseItemSet::with(
-                [(ParseItem::start(), LookaheadSet::with(&[Symbol::end()]))]
-                    .iter()
-                    .cloned(),
-            ),
-        );
-
-        self.process_part_state_queue()?;
-
-        let lex_table_builder = LexTableBuilder::new(self.syntax_grammar, self.lexical_grammar);
-
-        self.populate_used_symbols();
-
-        let (main_lex_table, keyword_lex_table, keyword_capture_token) = lex_table_builder.build();
-        Ok((
-            self.parse_table,
-            main_lex_table,
-            keyword_lex_table,
-            keyword_capture_token,
-        ))
-    }
-
-    fn add_parse_state(
-        &mut self,
-        preceding_symbols: &SymbolSequence,
-        preceding_auxiliary_symbols: &AuxiliarySymbolSequence,
-        item_set: ParseItemSet<'a>,
-    ) -> ParseStateId {
-        match self.state_ids_by_item_set.entry(item_set) {
-            Entry::Occupied(o) => {
-                // eprintln!("Item set already processed at state {}", *o.get());
-                *o.get()
-            }
-            Entry::Vacant(v) => {
-                // eprintln!("Item set not yet processed");
-                let state_id = self.parse_table.states.len();
-                self.item_sets_by_state_id.push(v.key().clone());
-                self.parse_table.states.push(ParseState {
-                    lex_state_id: 0,
-                    terminal_entries: HashMap::new(),
-                    nonterminal_entries: HashMap::new(),
-                });
-                self.parse_state_queue.push_back(ParseStateQueueEntry {
-                    state_id,
-                    preceding_symbols: preceding_symbols.clone(),
-                    preceding_auxiliary_symbols: preceding_auxiliary_symbols.clone(),
-                });
-                v.insert(state_id);
-                state_id
-            }
-        }
-    }
-
-    fn process_part_state_queue(&mut self) -> Result<()> {
-        while let Some(entry) = self.parse_state_queue.pop_front() {
-            let debug = false;
-
-            if debug {
-                println!(
-                    "ITEM SET {}:\n{}",
-                    entry.state_id,
-                    self.item_sets_by_state_id[entry.state_id]
-                        .display_with(&self.syntax_grammar, &self.lexical_grammar,)
-                );
-            }
-
-            let item_set = self.item_set_builder.transitive_closure(
-                &self.item_sets_by_state_id[entry.state_id],
-                self.syntax_grammar,
-                self.inlines,
-            );
-
-            if debug {
-                println!(
-                    "TRANSITIVE CLOSURE:\n{}",
-                    item_set.display_with(&self.syntax_grammar, &self.lexical_grammar)
-                );
-            }
-
-            self.add_actions(
-                entry.preceding_symbols,
-                entry.preceding_auxiliary_symbols,
-                item_set,
-                entry.state_id,
-            )?;
-        }
-        Ok(())
-    }
-
-    fn add_actions(
-        &mut self,
-        mut preceding_symbols: SymbolSequence,
-        mut preceding_auxiliary_symbols: Vec<AuxiliarySymbolInfo>,
-        item_set: ParseItemSet<'a>,
-        state_id: ParseStateId,
-    ) -> Result<()> {
-        let mut terminal_successors = HashMap::new();
-        let mut non_terminal_successors = HashMap::new();
-        let mut lookaheads_with_conflicts = HashSet::new();
-
-        for (item, lookaheads) in &item_set.entries {
-            if let Some(next_symbol) = item.symbol() {
-                let successor = item.successor();
-                if next_symbol.is_non_terminal() {
-                    // Keep track of where auxiliary non-terminals (repeat symbols) are
-                    // used within visible symbols. This information may be needed later
-                    // for conflict resolution.
-                    if self.syntax_grammar.variables[next_symbol.index].is_auxiliary() {
-                        preceding_auxiliary_symbols
-                            .push(self.get_auxiliary_node_info(&item_set, next_symbol));
-                    }
-
-                    non_terminal_successors
-                        .entry(next_symbol)
-                        .or_insert_with(|| ParseItemSet::default())
-                        .entries
-                        .entry(successor)
-                        .or_insert_with(|| LookaheadSet::new())
-                        .insert_all(lookaheads);
-                } else {
-                    terminal_successors
-                        .entry(next_symbol)
-                        .or_insert_with(|| ParseItemSet::default())
-                        .entries
-                        .entry(successor)
-                        .or_insert_with(|| LookaheadSet::new())
-                        .insert_all(lookaheads);
-                }
-            } else {
-                let action = if item.is_augmented() {
-                    ParseAction::Accept
-                } else {
-                    ParseAction::Reduce {
-                        symbol: Symbol::non_terminal(item.variable_index as usize),
-                        child_count: item.step_index as usize,
-                        precedence: item.precedence(),
-                        associativity: item.associativity(),
-                        dynamic_precedence: item.production.dynamic_precedence,
-                        alias_sequence_id: self.get_alias_sequence_id(item),
-                    }
-                };
-
-                for lookahead in lookaheads.iter() {
-                    let entry = self.parse_table.states[state_id]
-                        .terminal_entries
-                        .entry(lookahead);
-                    let entry = entry.or_insert_with(|| ParseTableEntry::new());
-                    if entry.actions.is_empty() {
-                        entry.actions.push(action);
-                    } else if action.precedence() > entry.actions[0].precedence() {
-                        entry.actions.clear();
-                        entry.actions.push(action);
-                        lookaheads_with_conflicts.remove(&lookahead);
-                    } else if action.precedence() == entry.actions[0].precedence() {
-                        entry.actions.push(action);
-                        lookaheads_with_conflicts.insert(lookahead);
-                    }
-                }
-            }
-        }
-
-        for (symbol, next_item_set) in terminal_successors {
-            preceding_symbols.push(symbol);
-            let next_state_id = self.add_parse_state(
-                &preceding_symbols,
-                &preceding_auxiliary_symbols,
-                next_item_set,
-            );
-            preceding_symbols.pop();
-
-            let entry = self.parse_table.states[state_id]
-                .terminal_entries
-                .entry(symbol);
-            if let Entry::Occupied(e) = &entry {
-                if !e.get().actions.is_empty() {
-                    lookaheads_with_conflicts.insert(symbol);
-                }
-            }
-
-            entry
-                .or_insert_with(|| ParseTableEntry::new())
-                .actions
-                .push(ParseAction::Shift {
-                    state: next_state_id,
-                    is_repetition: false,
-                });
-        }
-
-        for (symbol, next_item_set) in non_terminal_successors {
-            preceding_symbols.push(symbol);
-            let next_state_id = self.add_parse_state(
-                &preceding_symbols,
-                &preceding_auxiliary_symbols,
-                next_item_set,
-            );
-            preceding_symbols.pop();
-            self.parse_table.states[state_id]
-                .nonterminal_entries
-                .insert(symbol, next_state_id);
-        }
-
-        for symbol in lookaheads_with_conflicts {
-            self.handle_conflict(
-                &item_set,
-                state_id,
-                &preceding_symbols,
-                &preceding_auxiliary_symbols,
-                symbol,
-            )?;
-        }
-
-        let state = &mut self.parse_table.states[state_id];
-        for extra_token in &self.syntax_grammar.extra_tokens {
-            state
-                .terminal_entries
-                .entry(*extra_token)
-                .or_insert(ParseTableEntry {
-                    reusable: true,
-                    actions: vec![ParseAction::ShiftExtra],
-                });
-        }
-
-        Ok(())
-    }
-
-    fn handle_conflict(
-        &mut self,
-        item_set: &ParseItemSet,
-        state_id: ParseStateId,
-        preceding_symbols: &SymbolSequence,
-        preceding_auxiliary_symbols: &Vec<AuxiliarySymbolInfo>,
-        conflicting_lookahead: Symbol,
-    ) -> Result<()> {
-        let entry = self.parse_table.states[state_id]
-            .terminal_entries
-            .get_mut(&conflicting_lookahead)
-            .unwrap();
-
-        // Determine which items in the set conflict with each other, and the
-        // precedences associated with SHIFT vs REDUCE actions. There won't
-        // be multiple REDUCE actions with different precedences; that is
-        // sorted out ahead of time in `add_actions`. But there can still be
-        // REDUCE-REDUCE conflicts where all actions have the *same*
-        // precedence, and there can still be SHIFT/REDUCE conflicts.
-        let reduce_precedence = entry.actions[0].precedence();
-        let mut considered_associativity = false;
-        let mut shift_precedence: Option<Range<i32>> = None;
-        let mut conflicting_items = HashSet::new();
-        for (item, lookaheads) in &item_set.entries {
-            if let Some(step) = item.step() {
-                if item.step_index > 0 {
-                    if self
-                        .item_set_builder
-                        .first_set(&step.symbol)
-                        .contains(&conflicting_lookahead)
-                    {
-                        conflicting_items.insert(item);
-                        let precedence = item.precedence();
-                        if let Some(range) = &mut shift_precedence {
-                            if precedence < range.start {
-                                range.start = precedence;
-                            } else if precedence > range.end {
-                                range.end = precedence;
-                            }
-                        } else {
-                            shift_precedence = Some(precedence..precedence);
-                        }
-                    }
-                }
-            } else if lookaheads.contains(&conflicting_lookahead) {
-                conflicting_items.insert(item);
-            }
-        }
-
-        if let ParseAction::Shift { is_repetition, .. } = entry.actions.last_mut().unwrap() {
-            let shift_precedence = shift_precedence.unwrap_or(0..0);
-
-            // If all of the items in the conflict have the same parent symbol,
-            // and that parent symbols is auxiliary, then this is just the intentional
-            // ambiguity associated with a repeat rule. Resolve that class of ambiguity
-            // by leaving it in the parse table, but marking the SHIFT action with
-            // an `is_repetition` flag.
-            let conflicting_variable_index =
-                conflicting_items.iter().next().unwrap().variable_index;
-            if self.syntax_grammar.variables[conflicting_variable_index as usize].is_auxiliary() {
-                if conflicting_items
-                    .iter()
-                    .all(|item| item.variable_index == conflicting_variable_index)
-                {
-                    *is_repetition = true;
-                    return Ok(());
-                }
-            }
-
-            // If the SHIFT action has higher precedence, remove all the REDUCE actions.
-            if shift_precedence.start > reduce_precedence
-                || (shift_precedence.start == reduce_precedence
-                    && shift_precedence.end > reduce_precedence)
-            {
-                entry.actions.drain(0..entry.actions.len() - 1);
-            }
-            // If the REDUCE actions have higher precedence, remove the SHIFT action.
-            else if shift_precedence.end < reduce_precedence
-                || (shift_precedence.end == reduce_precedence
-                    && shift_precedence.start < reduce_precedence)
-            {
-                entry.actions.pop();
-                conflicting_items.retain(|item| item.is_done());
-            }
-            // If the SHIFT and REDUCE actions have the same predence, consider
-            // the REDUCE actions' associativity.
-            else if shift_precedence == (reduce_precedence..reduce_precedence) {
-                considered_associativity = true;
-                let mut has_left = false;
-                let mut has_right = false;
-                let mut has_non = false;
-                for action in &entry.actions {
-                    if let ParseAction::Reduce { associativity, .. } = action {
-                        match associativity {
-                            Some(Associativity::Left) => has_left = true,
-                            Some(Associativity::Right) => has_right = true,
-                            None => has_non = true,
-                        }
-                    }
-                }
-
-                // If all reduce actions are left associative, remove the SHIFT action.
-                // If all reduce actions are right associative, remove the REDUCE actions.
-                match (has_left, has_non, has_right) {
-                    (true, false, false) => {
-                        entry.actions.pop();
-                        conflicting_items.retain(|item| item.is_done());
-                    }
-                    (false, false, true) => {
-                        entry.actions.drain(0..entry.actions.len() - 1);
-                    }
-                    _ => {}
-                }
-            }
-        }
-
-        // If all of the actions but one have been eliminated, then there's no problem.
-        let entry = self.parse_table.states[state_id]
-            .terminal_entries
-            .get_mut(&conflicting_lookahead)
-            .unwrap();
-        if entry.actions.len() == 1 {
-            return Ok(());
-        }
-
-        // Determine the set of parent symbols involved in this conflict.
-        let mut actual_conflict = Vec::new();
-        for item in &conflicting_items {
-            let symbol = Symbol::non_terminal(item.variable_index as usize);
-            if self.syntax_grammar.variables[symbol.index].is_auxiliary() {
-                actual_conflict.extend(
-                    preceding_auxiliary_symbols
-                        .iter()
-                        .rev()
-                        .find_map(|info| {
-                            if info.auxiliary_symbol == symbol {
-                                Some(&info.parent_symbols)
-                            } else {
-                                None
-                            }
-                        })
-                        .unwrap()
-                        .iter(),
-                );
-            } else {
-                actual_conflict.push(symbol);
-            }
-        }
-        actual_conflict.sort_unstable();
-        actual_conflict.dedup();
-
-        // If this set of symbols has been whitelisted, then there's no error.
-        if self
-            .syntax_grammar
-            .expected_conflicts
-            .contains(&actual_conflict)
-        {
-            return Ok(());
-        }
-
-        let mut msg = "Unresolved conflict for symbol sequence:\n\n".to_string();
-        for symbol in preceding_symbols {
-            write!(&mut msg, "  {}", self.symbol_name(symbol)).unwrap();
-        }
-
-        write!(
-            &mut msg,
-            "  •  {}  …\n\n",
-            self.symbol_name(&conflicting_lookahead)
-        )
-        .unwrap();
-        write!(&mut msg, "Possible interpretations:\n").unwrap();
-        for (i, item) in conflicting_items.iter().enumerate() {
-            write!(&mut msg, "\n  {}:", i).unwrap();
-
-            for preceding_symbol in preceding_symbols
-                .iter()
-                .take(preceding_symbols.len() - item.step_index as usize)
-            {
-                write!(&mut msg, "  {}", self.symbol_name(preceding_symbol)).unwrap();
-            }
-
-            write!(
-                &mut msg,
-                "  ({}",
-                &self.syntax_grammar.variables[item.variable_index as usize].name
-            )
-            .unwrap();
-
-            for (j, step) in item.production.steps.iter().enumerate() {
-                if j as u32 == item.step_index {
-                    write!(&mut msg, "  •").unwrap();
-                }
-                write!(&mut msg, "  {}", self.symbol_name(&step.symbol)).unwrap();
-            }
-
-            write!(&mut msg, ")").unwrap();
-
-            if item.is_done() {
-                write!(
-                    &mut msg,
-                    "  •  {}",
-                    self.symbol_name(&conflicting_lookahead)
-                )
-                .unwrap();
-            }
-
-            let precedence = item.precedence();
-            let associativity = item.associativity();
-            if precedence != 0 || associativity.is_some() {
-                write!(
-                    &mut msg,
-                    "(precedence: {}, associativity: {:?})",
-                    precedence, associativity
-                )
-                .unwrap();
-            }
-        }
-
-        // TODO - generate suggested resolutions
-
-        Err(Error::ConflictError(msg))
-    }
-
-    fn get_auxiliary_node_info(
-        &self,
-        item_set: &ParseItemSet,
-        symbol: Symbol,
-    ) -> AuxiliarySymbolInfo {
-        let parent_symbols = item_set
-            .entries
-            .keys()
-            .filter_map(|item| {
-                if item.symbol() == Some(symbol) {
-                    None
-                } else {
-                    None
-                }
-            })
-            .collect();
-        AuxiliarySymbolInfo {
-            auxiliary_symbol: symbol,
-            parent_symbols,
-        }
-    }
-
-    fn populate_used_symbols(&mut self) {
-        let mut terminal_usages = vec![false; self.lexical_grammar.variables.len()];
-        let mut non_terminal_usages = vec![false; self.syntax_grammar.variables.len()];
-        let mut external_usages = vec![false; self.syntax_grammar.external_tokens.len()];
-        for state in &self.parse_table.states {
-            for symbol in state.terminal_entries.keys() {
-                match symbol.kind {
-                    SymbolType::Terminal => terminal_usages[symbol.index] = true,
-                    SymbolType::External => external_usages[symbol.index] = true,
-                    _ => {}
-                }
-            }
-            for symbol in state.nonterminal_entries.keys() {
-                non_terminal_usages[symbol.index] = true;
-            }
-        }
-        self.parse_table.symbols.push(Symbol::end());
-        for (i, value) in terminal_usages.into_iter().enumerate() {
-            if value {
-                self.parse_table.symbols.push(Symbol::terminal(i));
-            }
-        }
-        for (i, value) in non_terminal_usages.into_iter().enumerate() {
-            if value {
-                self.parse_table.symbols.push(Symbol::non_terminal(i));
-            }
-        }
-        for (i, value) in external_usages.into_iter().enumerate() {
-            if value {
-                self.parse_table.symbols.push(Symbol::external(i));
-            }
-        }
-    }
-
-    fn get_alias_sequence_id(&mut self, item: &ParseItem) -> AliasSequenceId {
-        let mut alias_sequence: Vec<Option<Alias>> = item
-            .production
-            .steps
-            .iter()
-            .map(|s| s.alias.clone())
-            .collect();
-        while alias_sequence.last() == Some(&None) {
-            alias_sequence.pop();
-        }
-        if let Some(index) = self
-            .parse_table
-            .alias_sequences
-            .iter()
-            .position(|seq| *seq == alias_sequence)
-        {
-            index
-        } else {
-            self.parse_table.alias_sequences.push(alias_sequence);
-            self.parse_table.alias_sequences.len() - 1
-        }
-    }
-
-    fn symbol_name(&self, symbol: &Symbol) -> String {
-        match symbol.kind {
-            SymbolType::End => "EOF".to_string(),
-            SymbolType::External => self.syntax_grammar.external_tokens[symbol.index]
-                .name
-                .clone(),
-            SymbolType::NonTerminal => self.syntax_grammar.variables[symbol.index].name.clone(),
-            SymbolType::Terminal => {
-                let variable = &self.lexical_grammar.variables[symbol.index];
-                if variable.kind == VariableType::Named {
-                    variable.name.clone()
-                } else {
-                    format!("\"{}\"", &variable.name)
-                }
-            }
-        }
-    }
-}
+use self::build_parse_table::build_parse_table;
+use self::shrink_parse_table::shrink_parse_table;

 pub(crate) fn build_tables(
    syntax_grammar: &SyntaxGrammar,
@ -609,20 +19,8 @@ pub(crate) fn build_tables(
    simple_aliases: &AliasMap,
    inlines: &InlinedProductionMap,
 ) -> Result<(ParseTable, LexTable, LexTable, Option<Symbol>)> {
-    ParseTableBuilder {
-        syntax_grammar,
-        lexical_grammar,
-        simple_aliases,
-        inlines,
-        item_set_builder: ParseItemSetBuilder::new(syntax_grammar, lexical_grammar, inlines),
-        state_ids_by_item_set: HashMap::new(),
-        item_sets_by_state_id: Vec::new(),
-        parse_state_queue: VecDeque::new(),
-        parse_table: ParseTable {
-            states: Vec::new(),
-            alias_sequences: Vec::new(),
-            symbols: Vec::new(),
-        },
-    }
-    .build()
+
+    let mut parse_table = build_parse_table(syntax_grammar, lexical_grammar, inlines)?;
+    shrink_parse_table(&mut parse_table, syntax_grammar, simple_aliases);
+    Ok((parse_table, LexTable::default(), LexTable::default(), None))
 }
--- a/src/build_tables/shrink_parse_table.rs
+++ b/src/build_tables/shrink_parse_table.rs
@ -0,0 +1,117 @@
+use crate::grammars::{SyntaxGrammar, VariableType};
+use crate::rules::AliasMap;
+use crate::tables::{ParseAction, ParseTable};
+use std::collections::{HashMap, HashSet};
+
+pub(crate) fn shrink_parse_table(
+    parse_table: &mut ParseTable,
+    syntax_grammar: &SyntaxGrammar,
+    simple_aliases: &AliasMap,
+) {
+    remove_unit_reductions(parse_table, syntax_grammar, simple_aliases);
+    remove_unused_states(parse_table);
+}
+
+fn remove_unit_reductions(
+    parse_table: &mut ParseTable,
+    syntax_grammar: &SyntaxGrammar,
+    simple_aliases: &AliasMap,
+) {
+    let mut aliased_symbols = HashSet::new();
+    for variable in &syntax_grammar.variables {
+        for production in &variable.productions {
+            for step in &production.steps {
+                if step.alias.is_some() {
+                    aliased_symbols.insert(step.symbol);
+                }
+            }
+        }
+    }
+
+    let mut unit_reduction_symbols_by_state = HashMap::new();
+    for (i, state) in parse_table.states.iter().enumerate() {
+        let mut only_unit_reductions = true;
+        let mut unit_reduction_symbol = None;
+        for (_, entry) in &state.terminal_entries {
+            for action in &entry.actions {
+                match action {
+                    ParseAction::ShiftExtra => continue,
+                    ParseAction::Reduce {
+                        child_count: 1,
+                        alias_sequence_id: 0,
+                        symbol,
+                        ..
+                    } => {
+                        if !simple_aliases.contains_key(&symbol)
+                            && !aliased_symbols.contains(&symbol)
+                            && syntax_grammar.variables[symbol.index].kind != VariableType::Named
+                            && (unit_reduction_symbol.is_none()
+                                || unit_reduction_symbol == Some(symbol))
+                        {
+                            unit_reduction_symbol = Some(symbol);
+                            continue;
+                        }
+                    }
+                    _ => {}
+                }
+                only_unit_reductions = false;
+                break;
+            }
+
+            if !only_unit_reductions {
+                break;
+            }
+        }
+
+        if let Some(symbol) = unit_reduction_symbol {
+            if only_unit_reductions {
+                unit_reduction_symbols_by_state.insert(i, *symbol);
+            }
+        }
+    }
+
+    for state in parse_table.states.iter_mut() {
+        let mut done = false;
+        while !done {
+            done = true;
+            state.update_referenced_states(|other_state_id, state| {
+                if let Some(symbol) = unit_reduction_symbols_by_state.get(&other_state_id) {
+                    done = false;
+                    state.nonterminal_entries[symbol]
+                } else {
+                    other_state_id
+                }
+            })
+        }
+    }
+}
+
+fn remove_unused_states(parse_table: &mut ParseTable) {
+    let mut state_usage_map = vec![false; parse_table.states.len()];
+    for state in &parse_table.states {
+        for referenced_state in state.referenced_states() {
+            state_usage_map[referenced_state] = true;
+        }
+    }
+    let mut removed_predecessor_count = 0;
+    let mut state_replacement_map = vec![0; parse_table.states.len()];
+    for state_id in 0..parse_table.states.len() {
+        state_replacement_map[state_id] = state_id - removed_predecessor_count;
+        if !state_usage_map[state_id] {
+            removed_predecessor_count += 1;
+        }
+    }
+    let mut state_id = 0;
+    let mut original_state_id = 0;
+    while state_id < parse_table.states.len() {
+        if state_usage_map[original_state_id] {
+            parse_table.states[state_id].update_referenced_states(|other_state_id, _| {
+                state_replacement_map[other_state_id]
+            });
+            state_id += 1;
+        } else {
+            parse_table.states.remove(state_id);
+        }
+        original_state_id += 1;
+    }
+}
--- a/src/build_tables/token_conflict_map.rs
+++ b/src/build_tables/token_conflict_map.rs
@ -0,0 +1,77 @@
+use crate::grammars::{LexicalGrammar, LexicalVariable};
+use crate::nfa::{CharacterSet, NfaCursor};
+use std::collections::HashSet;
+
+#[derive(Default)]
+struct TokenConflictStatus {
+    matches_same_string: bool,
+    matches_longer_string_with_valid_next_char: bool,
+}
+
+pub(crate) struct TokenConflictMap {
+    starting_chars_by_index: Vec<CharacterSet>,
+    status_matrix: Vec<TokenConflictStatus>,
+}
+
+impl TokenConflictMap {
+    pub fn new(grammar: &LexicalGrammar) -> Self {
+        let mut cursor = NfaCursor::new(&grammar.nfa, Vec::new());
+
+        let mut starting_chars_by_index = Vec::with_capacity(grammar.variables.len());
+        for variable in &grammar.variables {
+            cursor.reset(vec![variable.start_state]);
+            let mut all_chars = CharacterSet::empty();
+            for (chars, _, _) in cursor.successors() {
+                all_chars = all_chars.add(chars);
+            }
+            starting_chars_by_index.push(all_chars);
+        }
+
+        let status_matrix =
+            Vec::with_capacity(grammar.variables.len() * grammar.variables.len());
+
+        TokenConflictMap {
+            starting_chars_by_index,
+            status_matrix,
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::grammars::{Variable, VariableType};
+    use crate::prepare_grammar::{expand_tokens, ExtractedLexicalGrammar};
+    use crate::rules::Rule;
+
+    #[test]
+    fn test_starting_characters() {
+        let grammar = expand_tokens(ExtractedLexicalGrammar {
+            separators: Vec::new(),
+            variables: vec![
+                Variable {
+                    name: "token_0".to_string(),
+                    kind: VariableType::Named,
+                    rule: Rule::pattern("[a-f]1|0x\\d"),
+                },
+                Variable {
+                    name: "token_1".to_string(),
+                    kind: VariableType::Named,
+                    rule: Rule::pattern("d*ef"),
+                },
+            ],
+        })
+        .unwrap();
+
+        let token_map = TokenConflictMap::new(&grammar);
+
+        assert_eq!(
+            token_map.starting_chars_by_index[0],
+            CharacterSet::empty().add_range('a', 'f').add_char('0')
+        );
+        assert_eq!(
+            token_map.starting_chars_by_index[1],
+            CharacterSet::empty().add_range('d', 'e')
+        );
+    }
+}
--- a/src/tables.rs
+++ b/src/tables.rs
@ -1,7 +1,7 @@
+use crate::nfa::CharacterSet;
+use crate::rules::{Alias, Associativity, Symbol};
 use std::collections::HashMap;
 use std::ops::Range;
-use crate::rules::{Associativity, Symbol, Alias};
-use crate::nfa::CharacterSet;

 pub(crate) type AliasSequenceId = usize;
 pub(crate) type ParseStateId = usize;
@ -23,7 +23,7 @@ pub(crate) enum ParseAction {
        dynamic_precedence: i32,
        associativity: Option<Associativity>,
        alias_sequence_id: AliasSequenceId,
-    }
+    },
 }

 #[derive(Clone, Debug, PartialEq, Eq)]
@ -86,6 +86,56 @@ impl Default for LexTable {
    }
 }

+impl ParseState {
+    pub fn referenced_states<'a>(&'a self) -> impl Iterator<Item = ParseStateId> + 'a {
+        self.terminal_entries
+            .iter()
+            .flat_map(|(_, entry)| {
+                entry.actions.iter().filter_map(|action| match action {
+                    ParseAction::Shift { state, .. } => Some(*state),
+                    _ => None,
+                })
+            })
+            .chain(self.nonterminal_entries.iter().map(|(_, state)| *state))
+    }
+
+    pub fn update_referenced_states<F>(&mut self, mut f: F)
+    where
+        F: FnMut(usize, &ParseState) -> usize,
+    {
+        let mut updates = Vec::new();
+        for (symbol, entry) in &self.terminal_entries {
+            for (i, action) in entry.actions.iter().enumerate() {
+                if let ParseAction::Shift { state, .. } = action {
+                    let result = f(*state, self);
+                    if result != *state {
+                        updates.push((*symbol, i, result));
+                    }
+                }
+            }
+        }
+        for (symbol, other_state) in &self.nonterminal_entries {
+            let result = f(*other_state, self);
+            if result != *other_state {
+                updates.push((*symbol, 0, result));
+            }
+        }
+        for (symbol, action_index, new_state) in updates {
+            if symbol.is_non_terminal() {
+                self.nonterminal_entries.insert(symbol, new_state);
+            } else {
+                let entry = self.terminal_entries.get_mut(&symbol).unwrap();
+                if let ParseAction::Shift { is_repetition, .. } = entry.actions[action_index] {
+                    entry.actions[action_index] = ParseAction::Shift {
+                        state: new_state,
+                        is_repetition,
+                    };
+                }
+            }
+        }
+    }
+}
+
 impl ParseAction {
    pub fn precedence(&self) -> i32 {
        if let ParseAction::Reduce { precedence, .. } = self {