From 3fbaff5e69a1bfd200a7c9979e52412b55a26ba0 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Wed, 2 Jan 2019 16:48:44 -0800
Subject: [PATCH] Fix various logic errors in parse table construction

---
 Cargo.lock                             |  18 ++++
 Cargo.toml                             |   5 ++
 src/build_tables/build_lex_table.rs    | 116 +++++++++++++++++++++----
 src/build_tables/build_parse_table.rs  |  59 +++++++------
 src/build_tables/coincident_tokens.rs  |  38 ++++----
 src/build_tables/item.rs               |   4 +-
 src/build_tables/item_set_builder.rs   |   2 +-
 src/build_tables/mod.rs                |  44 +++++-----
 src/build_tables/shrink_parse_table.rs |   6 +-
 src/build_tables/token_conflicts.rs    |   2 +-
 src/grammars.rs                        |   2 +-
 src/logger.rs                          |  29 +++++++
 src/main.rs                            |  28 ++++--
 src/nfa.rs                             |  26 ++++--
 src/parse_grammar.rs                   |   4 +-
 src/prepare_grammar/expand_repeats.rs  |   2 +-
 src/prepare_grammar/extract_tokens.rs  |   2 +-
 src/prepare_grammar/process_inlines.rs |   2 +-
 src/render/mod.rs                      |  19 ++--
 src/rules.rs                           |   2 +-
 src/tables.rs                          |   2 +-
 21 files changed, 297 insertions(+), 115 deletions(-)
 create mode 100644 src/logger.rs

diff --git a/Cargo.lock b/Cargo.lock
index 538517f1..2312d362 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -76,6 +76,11 @@ dependencies = [
  "constant_time_eq 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
+[[package]]
+name = "byteorder"
+version = "1.2.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
 [[package]]
 name = "cc"
 version = "1.0.25"
@@ -212,6 +217,15 @@ dependencies = [
  "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
+[[package]]
+name = "hashbrown"
+version = "0.1.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "byteorder 1.2.7 (registry+https://github.com/rust-lang/crates.io-index)",
+ "scopeguard 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
 [[package]]
 name = "ignore"
 version = "0.4.4"
@@ -463,9 +477,11 @@ version = "0.1.0"
 dependencies = [
  "clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "dirs 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
+ "hashbrown 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)",
  "ignore 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)",
  "lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "libloading 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
  "regex-syntax 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)",
  "rusqlite 0.14.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -737,6 +753,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 "checksum backtrace-sys 0.1.24 (registry+https://github.com/rust-lang/crates.io-index)" = "c66d56ac8dabd07f6aacdaf633f4b8262f5b3601a810a0dcddffd5c22c69daa0"
 "checksum bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "228047a76f468627ca71776ecdebd732a3423081fcf5125585bcd7c49886ce12"
 "checksum blake2-rfc 0.2.18 (registry+https://github.com/rust-lang/crates.io-index)" = "5d6d530bdd2d52966a6d03b7a964add7ae1a288d25214066fd4b600f0f796400"
+"checksum byteorder 1.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "94f88df23a25417badc922ab0f5716cc1330e87f71ddd9203b3a3ccd9cedf75d"
 "checksum cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)" = "f159dfd43363c4d08055a07703eb7a3406b0dac4d0584d96965a3262db3c9d16"
 "checksum cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "082bb9b28e00d3c9d39cc03e64ce4cea0f1bb9b3fde493f0cbc008472d22bdf4"
 "checksum clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b957d88f4b6a63b9d70d5f454ac8011819c6efa7727858f458ab71c756ce2d3e"
@@ -753,6 +770,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 "checksum fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2e9763c69ebaae630ba35f74888db465e49e259ba1bc0eda7d06f4a067615d82"
 "checksum fuchsia-zircon-sys 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3dcaa9ae7725d12cdb85b3ad99a434db70b468c09ded17e012d86b5c1010f7a7"
 "checksum globset 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "4743617a7464bbda3c8aec8558ff2f9429047e025771037df561d383337ff865"
+"checksum hashbrown 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "64b7d419d0622ae02fe5da6b9a5e1964b610a65bb37923b976aeebb6dbb8f86e"
 "checksum ignore 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "36ecfc5ad80f0b1226df948c562e2cddd446096be3f644c95106400eae8a5e01"
 "checksum indexmap 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7e81a7c05f79578dbc15793d8b619db9ba32b4577003ef3af1a91c416798c58d"
 "checksum itoa 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "1306f3464951f30e30d12373d31c79fbd52d236e5e896fd92f96ec7babbbe60b"
diff --git a/Cargo.toml b/Cargo.toml
index b29bc85e..29b10e17 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -9,6 +9,7 @@ lazy_static = "1.2.0"
 smallbitvec = "2.3.0"
 clap = "2.32"
 dirs = "1.0.2"
+hashbrown = "0.1"
 ignore = "0.4.4"
 libloading = "0.5"
 rusqlite = "0.14.0"
@@ -20,3 +21,7 @@ regex-syntax = "0.6.4"
 [dependencies.serde_json]
 version = "1.0"
 features = ["preserve_order"]
+
+[dependencies.log]
+version = "0.4.6"
+features = ["std"]
diff --git a/src/build_tables/build_lex_table.rs b/src/build_tables/build_lex_table.rs
index aa929d97..c002f427 100644
--- a/src/build_tables/build_lex_table.rs
+++ b/src/build_tables/build_lex_table.rs
@@ -2,10 +2,9 @@ use super::item::LookaheadSet;
 use super::token_conflicts::TokenConflictMap;
 use crate::grammars::{LexicalGrammar, SyntaxGrammar};
 use crate::nfa::NfaCursor;
-use crate::rules::Symbol;
 use crate::tables::{AdvanceAction, LexState, LexTable, ParseTable};
 use std::collections::hash_map::Entry;
-use std::collections::{HashMap, VecDeque};
+use std::collections::{BTreeMap, HashMap, VecDeque};
 
 pub(crate) fn build_lex_table(
     parse_table: &mut ParseTable,
@@ -16,15 +15,16 @@ pub(crate) fn build_lex_table(
     let keyword_lex_table;
     if syntax_grammar.word_token.is_some() {
         let mut builder = LexTableBuilder::new(lexical_grammar);
-        builder.add_state_for_tokens(keywords.iter());
+        builder.add_state_for_tokens(keywords);
         keyword_lex_table = builder.table;
     } else {
         keyword_lex_table = LexTable::default();
     }
 
     let mut builder = LexTableBuilder::new(lexical_grammar);
-    for state in parse_table.states.iter_mut() {
-        let tokens = state.terminal_entries.keys().filter_map(|token| {
+    for (i, state) in parse_table.states.iter_mut().enumerate() {
+        info!("populate lex state for parse state {}", i);
+        let tokens = LookaheadSet::with(state.terminal_entries.keys().filter_map(|token| {
             if token.is_terminal() {
                 if keywords.contains(&token) {
                     syntax_grammar.word_token
@@ -34,11 +34,14 @@ pub(crate) fn build_lex_table(
             } else {
                 None
             }
-        });
-        state.lex_state_id = builder.add_state_for_tokens(tokens);
+        }));
+        state.lex_state_id = builder.add_state_for_tokens(&tokens);
     }
 
-    (builder.table, keyword_lex_table)
+    let mut table = builder.table;
+    shrink_lex_table(&mut table, parse_table);
+
+    (table, keyword_lex_table)
 }
 
 struct LexTableBuilder<'a> {
@@ -60,32 +63,49 @@ impl<'a> LexTableBuilder<'a> {
         }
     }
 
-    fn add_state_for_tokens(&mut self, tokens: impl Iterator<Item = Symbol>) -> usize {
+    fn add_state_for_tokens(&mut self, tokens: &LookaheadSet) -> usize {
         let nfa_states = tokens
+            .iter()
             .map(|token| self.lexical_grammar.variables[token.index].start_state)
             .collect();
-        let result = self.add_state(nfa_states);
-        while let Some((state_id, nfa_states)) = self.state_queue.pop_front() {
+        let (state_id, is_new) = self.add_state(nfa_states);
+
+        if is_new {
+            info!(
+                "entry point state: {}, tokens: {:?}",
+                state_id,
+                tokens
+                    .iter()
+                    .map(|t| &self.lexical_grammar.variables[t.index].name)
+                    .collect::<Vec<_>>()
+            );
+        }
+
+        while let Some((state_id, nfa_states)) = self.state_queue.pop_back() {
             self.populate_state(state_id, nfa_states);
         }
-        result
+        state_id
     }
 
-    fn add_state(&mut self, nfa_states: Vec<u32>) -> usize {
-        match self.state_ids_by_nfa_state_set.entry(nfa_states) {
-            Entry::Occupied(o) => *o.get(),
+    fn add_state(&mut self, nfa_states: Vec<u32>) -> (usize, bool) {
+        self.cursor.reset(nfa_states);
+        match self
+            .state_ids_by_nfa_state_set
+            .entry(self.cursor.state_ids.clone())
+        {
+            Entry::Occupied(o) => (*o.get(), false),
             Entry::Vacant(v) => {
                 let state_id = self.table.states.len();
                 self.table.states.push(LexState::default());
                 self.state_queue.push_back((state_id, v.key().clone()));
                 v.insert(state_id);
-                state_id
+                (state_id, true)
             }
         }
     }
 
     fn populate_state(&mut self, state_id: usize, nfa_states: Vec<u32>) {
-        self.cursor.reset(nfa_states);
+        self.cursor.force_reset(nfa_states);
 
         let mut completion = None;
         for (id, prec) in self.cursor.completions() {
@@ -102,12 +122,16 @@ impl<'a> LexTableBuilder<'a> {
         }
 
         for (chars, advance_precedence, next_states, is_sep) in self.cursor.grouped_successors() {
+            info!(
+                "populate state: {}, characters: {:?}, precedence: {:?}",
+                state_id, chars, advance_precedence
+            );
             if let Some((_, completed_precedence)) = completion {
                 if advance_precedence < completed_precedence {
                     continue;
                 }
             }
-            let next_state_id = self.add_state(next_states);
+            let (next_state_id, _) = self.add_state(next_states);
             self.table.states[state_id].advance_actions.push((
                 chars,
                 AdvanceAction {
@@ -122,3 +146,59 @@ impl<'a> LexTableBuilder<'a> {
         }
     }
 }
+
+fn shrink_lex_table(table: &mut LexTable, parse_table: &mut ParseTable) {
+    let mut state_replacements = BTreeMap::new();
+    let mut done = false;
+    while !done {
+        done = true;
+        for (i, state_i) in table.states.iter().enumerate() {
+            if state_replacements.contains_key(&i) {
+                continue;
+            }
+            for (j, state_j) in table.states.iter().enumerate() {
+                if state_replacements.contains_key(&j) {
+                    continue;
+                }
+                if j == i {
+                    break;
+                }
+                if state_i == state_j {
+                    info!("replace state {} with state {}", i, j);
+                    state_replacements.insert(i, j);
+                    done = false;
+                }
+            }
+        }
+        for state in table.states.iter_mut() {
+            for advance_action in state.advance_actions.iter_mut() {
+                if let Some(new_state_id) = state_replacements.get(&advance_action.1.state) {
+                    advance_action.1.state = *new_state_id;
+                }
+            }
+        }
+    }
+
+    let final_state_replacements = (0..table.states.len()).into_iter().map(|state_id| {
+        let replacement = state_replacements.get(&state_id).cloned().unwrap_or(state_id);
+        let prior_removed = state_replacements.iter().take_while(|i| *i.0 < replacement).count();
+        replacement - prior_removed
+    }).collect::<Vec<_>>();
+
+    for state in parse_table.states.iter_mut() {
+        state.lex_state_id = final_state_replacements[state.lex_state_id];
+    }
+
+    for state in table.states.iter_mut() {
+        for advance_action in state.advance_actions.iter_mut() {
+            advance_action.1.state = final_state_replacements[advance_action.1.state];
+        }
+    }
+
+    let mut i = 0;
+    table.states.retain(|_| {
+        let result = !state_replacements.contains_key(&i);
+        i += 1;
+        result
+    });
+}
diff --git a/src/build_tables/build_parse_table.rs b/src/build_tables/build_parse_table.rs
index c17261dc..ada34dff 100644
--- a/src/build_tables/build_parse_table.rs
+++ b/src/build_tables/build_parse_table.rs
@@ -7,8 +7,11 @@ use crate::tables::{
     AliasSequenceId, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry,
 };
 use core::ops::Range;
-use std::collections::hash_map::{DefaultHasher, Entry};
-use std::collections::{HashMap, HashSet, VecDeque};
+use hashbrown::hash_map::Entry;
+use hashbrown::{HashMap, HashSet};
+use std::collections::hash_map::DefaultHasher;
+use std::collections::VecDeque;
+
 use std::fmt::Write;
 use std::hash::Hasher;
 
@@ -43,9 +46,10 @@ impl<'a> ParseTableBuilder<'a> {
         // Ensure that the empty alias sequence has index 0.
         self.parse_table.alias_sequences.push(Vec::new());
 
-        // Ensure that the error state has index 0.
+        // Add the error state at index 0.
         self.add_parse_state(&Vec::new(), &Vec::new(), ParseItemSet::default());
 
+        // Add the starting state at index 1.
         self.add_parse_state(
             &Vec::new(),
             &Vec::new(),
@@ -61,6 +65,8 @@ impl<'a> ParseTableBuilder<'a> {
 
         self.process_part_state_queue()?;
         self.populate_used_symbols();
+        self.remove_precedences();
+
         Ok((self.parse_table, self.following_tokens))
     }
 
@@ -112,28 +118,9 @@ impl<'a> ParseTableBuilder<'a> {
 
     fn process_part_state_queue(&mut self) -> Result<()> {
         while let Some(entry) = self.parse_state_queue.pop_front() {
-            let debug = false;
-
-            if debug {
-                println!(
-                    "ITEM SET {}:\n{}",
-                    entry.state_id,
-                    self.item_sets_by_state_id[entry.state_id]
-                        .display_with(&self.syntax_grammar, &self.lexical_grammar,)
-                );
-            }
-
             let item_set = self
                 .item_set_builder
                 .transitive_closure(&self.item_sets_by_state_id[entry.state_id]);
-
-            if debug {
-                println!(
-                    "TRANSITIVE CLOSURE:\n{}",
-                    item_set.display_with(&self.syntax_grammar, &self.lexical_grammar)
-                );
-            }
-
             self.add_actions(
                 entry.preceding_symbols,
                 entry.preceding_auxiliary_symbols,
@@ -527,6 +514,7 @@ impl<'a> ParseTableBuilder<'a> {
     }
 
     fn populate_used_symbols(&mut self) {
+        self.parse_table.symbols.push(Symbol::end());
         let mut terminal_usages = vec![false; self.lexical_grammar.variables.len()];
         let mut non_terminal_usages = vec![false; self.syntax_grammar.variables.len()];
         let mut external_usages = vec![false; self.syntax_grammar.external_tokens.len()];
@@ -542,20 +530,39 @@ impl<'a> ParseTableBuilder<'a> {
                 non_terminal_usages[symbol.index] = true;
             }
         }
-        self.parse_table.symbols.push(Symbol::end());
         for (i, value) in terminal_usages.into_iter().enumerate() {
             if value {
                 self.parse_table.symbols.push(Symbol::terminal(i));
             }
         }
+        for (i, value) in external_usages.into_iter().enumerate() {
+            if value {
+                self.parse_table.symbols.push(Symbol::external(i));
+            }
+        }
         for (i, value) in non_terminal_usages.into_iter().enumerate() {
             if value {
                 self.parse_table.symbols.push(Symbol::non_terminal(i));
             }
         }
-        for (i, value) in external_usages.into_iter().enumerate() {
-            if value {
-                self.parse_table.symbols.push(Symbol::external(i));
+    }
+
+    fn remove_precedences(&mut self) {
+        for state in self.parse_table.states.iter_mut() {
+            for (_, entry) in state.terminal_entries.iter_mut() {
+                for action in entry.actions.iter_mut() {
+                    match action {
+                        ParseAction::Reduce {
+                            precedence,
+                            associativity,
+                            ..
+                        } => {
+                            *precedence = 0;
+                            *associativity = None;
+                        }
+                        _ => {}
+                    }
+                }
             }
         }
     }
diff --git a/src/build_tables/coincident_tokens.rs b/src/build_tables/coincident_tokens.rs
index 10707489..5f2bb3ec 100644
--- a/src/build_tables/coincident_tokens.rs
+++ b/src/build_tables/coincident_tokens.rs
@@ -1,36 +1,44 @@
+use crate::grammars::LexicalGrammar;
 use crate::rules::Symbol;
 use crate::tables::{ParseStateId, ParseTable};
-use std::collections::{HashMap, HashSet};
+use std::collections::HashSet;
 
 pub(crate) struct CoincidentTokenIndex {
-    entries: HashMap<(Symbol, Symbol), HashSet<ParseStateId>>,
-    empty: HashSet<ParseStateId>,
+    entries: Vec<HashSet<ParseStateId>>,
+    n: usize,
 }
 
 impl CoincidentTokenIndex {
-    pub fn new(table: &ParseTable) -> Self {
-        let mut entries = HashMap::new();
+    pub fn new(table: &ParseTable, lexical_grammar: &LexicalGrammar) -> Self {
+        let n = lexical_grammar.variables.len();
+        let mut result = Self {
+            n,
+            entries: vec![HashSet::new(); n * n],
+        };
         for (i, state) in table.states.iter().enumerate() {
             for symbol in state.terminal_entries.keys() {
                 for other_symbol in state.terminal_entries.keys() {
-                    entries
-                        .entry((*symbol, *other_symbol))
-                        .or_insert(HashSet::new())
-                        .insert(i);
+                    let index = result.index(*symbol, *other_symbol);
+                    result.entries[index].insert(i);
                 }
             }
         }
-        Self {
-            entries,
-            empty: HashSet::new(),
-        }
+        result
     }
 
     pub fn states_with(&self, a: Symbol, b: Symbol) -> &HashSet<ParseStateId> {
-        self.entries.get(&(a, b)).unwrap_or(&self.empty)
+        &self.entries[self.index(a, b)]
     }
 
     pub fn contains(&self, a: Symbol, b: Symbol) -> bool {
-        self.entries.contains_key(&(a, b))
+        !self.entries[self.index(a, b)].is_empty()
+    }
+
+    fn index(&self, a: Symbol, b: Symbol) -> usize {
+        if a.index < b.index {
+            a.index * self.n + b.index
+        } else {
+            b.index * self.n + a.index
+        }
     }
 }
diff --git a/src/build_tables/item.rs b/src/build_tables/item.rs
index 4cd2f643..511d7bef 100644
--- a/src/build_tables/item.rs
+++ b/src/build_tables/item.rs
@@ -112,7 +112,9 @@ impl LookaheadSet {
                 return;
             }
         };
-        vec.resize(other.index + 1, false);
+        if other.index >= vec.len() {
+            vec.resize(other.index + 1, false);
+        }
         vec.set(other.index, true);
     }
 
diff --git a/src/build_tables/item_set_builder.rs b/src/build_tables/item_set_builder.rs
index 5e61bfcc..5714e7e2 100644
--- a/src/build_tables/item_set_builder.rs
+++ b/src/build_tables/item_set_builder.rs
@@ -1,7 +1,7 @@
 use super::item::{LookaheadSet, ParseItem, ParseItemSet};
 use crate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar};
 use crate::rules::Symbol;
-use std::collections::{HashMap, HashSet};
+use hashbrown::{HashMap, HashSet};
 
 #[derive(Clone, Debug, PartialEq, Eq)]
 struct TransitiveClosureAddition<'a> {
diff --git a/src/build_tables/mod.rs b/src/build_tables/mod.rs
index 8b3a2db4..207431dd 100644
--- a/src/build_tables/mod.rs
+++ b/src/build_tables/mod.rs
@@ -27,22 +27,14 @@ pub(crate) fn build_tables(
     let (mut parse_table, following_tokens) =
         build_parse_table(syntax_grammar, lexical_grammar, inlines)?;
     let token_conflict_map = TokenConflictMap::new(lexical_grammar, following_tokens);
-
-    eprintln!("{:?}", token_conflict_map);
-
-    let coincident_token_index = CoincidentTokenIndex::new(&parse_table);
-    let keywords = if let Some(word_token) = syntax_grammar.word_token {
-        identify_keywords(
-            lexical_grammar,
-            &parse_table,
-            word_token,
-            &token_conflict_map,
-            &coincident_token_index,
-        )
-    } else {
-        LookaheadSet::new()
-    };
-
+    let coincident_token_index = CoincidentTokenIndex::new(&parse_table, lexical_grammar);
+    let keywords = identify_keywords(
+        lexical_grammar,
+        &parse_table,
+        syntax_grammar.word_token,
+        &token_conflict_map,
+        &coincident_token_index,
+    );
     populate_error_state(
         &mut parse_table,
         syntax_grammar,
@@ -123,10 +115,15 @@ fn populate_error_state(
 fn identify_keywords(
     lexical_grammar: &LexicalGrammar,
     parse_table: &ParseTable,
-    word_token: Symbol,
+    word_token: Option<Symbol>,
     token_conflict_map: &TokenConflictMap,
     coincident_token_index: &CoincidentTokenIndex,
 ) -> LookaheadSet {
+    if word_token.is_none() {
+        return LookaheadSet::new();
+    }
+
+    let word_token = word_token.unwrap();
     let mut cursor = NfaCursor::new(&lexical_grammar.nfa, Vec::new());
 
     // First find all of the candidate keyword tokens: tokens that start with
@@ -137,6 +134,7 @@ fn identify_keywords(
             if all_chars_are_alphabetical(&cursor)
                 && token_conflict_map.does_match_same_string(i, word_token.index)
             {
+                info!("Keywords - add candidate {}", lexical_grammar.variables[i].name);
                 Some(Symbol::terminal(i))
             } else {
                 None
@@ -150,8 +148,8 @@ fn identify_keywords(
             if other_token != *token
                 && token_conflict_map.does_match_same_string(token.index, other_token.index)
             {
-                eprintln!(
-                    "Exclude {} from keywords because it matches the same string as {}",
+                info!(
+                    "Keywords - exclude {} because it matches the same string as {}",
                     lexical_grammar.variables[token.index].name,
                     lexical_grammar.variables[other_token.index].name
                 );
@@ -189,8 +187,8 @@ fn identify_keywords(
                 word_token.index,
                 other_index,
             ) {
-                eprintln!(
-                    "Exclude {} from keywords because of conflict with {}",
+                info!(
+                    "Keywords - exclude {} because of conflict with {}",
                     lexical_grammar.variables[token.index].name,
                     lexical_grammar.variables[other_index].name
                 );
@@ -198,8 +196,8 @@ fn identify_keywords(
             }
         }
 
-        eprintln!(
-            "Include {} in keywords",
+        info!(
+            "Keywords - include {}",
             lexical_grammar.variables[token.index].name,
         );
         true
diff --git a/src/build_tables/shrink_parse_table.rs b/src/build_tables/shrink_parse_table.rs
index b943158f..33b72c32 100644
--- a/src/build_tables/shrink_parse_table.rs
+++ b/src/build_tables/shrink_parse_table.rs
@@ -2,7 +2,7 @@ use super::token_conflicts::TokenConflictMap;
 use crate::grammars::{SyntaxGrammar, VariableType};
 use crate::rules::{AliasMap, Symbol};
 use crate::tables::{ParseAction, ParseState, ParseTable, ParseTableEntry};
-use std::collections::{HashMap, HashSet};
+use hashbrown::{HashMap, HashSet};
 
 pub(crate) fn shrink_parse_table(
     parse_table: &mut ParseTable,
@@ -240,6 +240,10 @@ fn can_add_entry_to_state(
 
 fn remove_unused_states(parse_table: &mut ParseTable) {
     let mut state_usage_map = vec![false; parse_table.states.len()];
+
+    state_usage_map[0] = true;
+    state_usage_map[1] = true;
+
     for state in &parse_table.states {
         for referenced_state in state.referenced_states() {
             state_usage_map[referenced_state] = true;
diff --git a/src/build_tables/token_conflicts.rs b/src/build_tables/token_conflicts.rs
index 9f1c4426..18a80484 100644
--- a/src/build_tables/token_conflicts.rs
+++ b/src/build_tables/token_conflicts.rs
@@ -1,7 +1,7 @@
 use crate::build_tables::item::LookaheadSet;
 use crate::grammars::LexicalGrammar;
 use crate::nfa::{CharacterSet, NfaCursor};
-use std::collections::HashSet;
+use hashbrown::HashSet;
 use std::fmt;
 
 #[derive(Clone, Debug, Default, PartialEq, Eq)]
diff --git a/src/grammars.rs b/src/grammars.rs
index d23e8ca6..7f587a8c 100644
--- a/src/grammars.rs
+++ b/src/grammars.rs
@@ -1,6 +1,6 @@
 use crate::nfa::Nfa;
 use crate::rules::{Alias, Associativity, Rule, Symbol};
-use std::collections::HashMap;
+use hashbrown::HashMap;
 
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
 pub(crate) enum VariableType {
diff --git a/src/logger.rs b/src/logger.rs
new file mode 100644
index 00000000..18df763d
--- /dev/null
+++ b/src/logger.rs
@@ -0,0 +1,29 @@
+use log::{LevelFilter, Log, Metadata, Record};
+
+struct Logger {
+    pub filter: Option<String>,
+}
+
+impl Log for Logger {
+    fn enabled(&self, _: &Metadata) -> bool {
+        true
+    }
+
+    fn log(&self, record: &Record) {
+        eprintln!(
+            "[{}] {}",
+            record
+                .module_path()
+                .unwrap_or_default()
+                .trim_start_matches("rust_tree_sitter_cli::"),
+            record.args()
+        );
+    }
+
+    fn flush(&self) {}
+}
+
+pub(crate) fn init() {
+    log::set_boxed_logger(Box::new(Logger { filter: None })).unwrap();
+    log::set_max_level(LevelFilter::Info);
+}
diff --git a/src/main.rs b/src/main.rs
index cd672186..a08922b7 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,20 +1,23 @@
 #[macro_use]
-extern crate serde_derive;
-#[macro_use]
-extern crate serde_json;
-#[macro_use]
 extern crate lazy_static;
+#[macro_use]
+extern crate log;
+#[macro_use]
+extern crate serde_derive;
+extern crate hashbrown;
+extern crate serde_json;
 
-use std::path::PathBuf;
 use clap::{App, Arg, SubCommand};
 use std::env;
 use std::io::Write;
+use std::path::PathBuf;
 use std::process::{Command, Stdio};
 
 mod build_tables;
 mod error;
 mod generate;
 mod grammars;
+mod logger;
 mod nfa;
 mod parse_grammar;
 mod prepare_grammar;
@@ -27,7 +30,11 @@ fn main() -> error::Result<()> {
         .version("0.1")
         .author("Max Brunsfeld <maxbrunsfeld@gmail.com>")
         .about("Generates and tests parsers")
-        .subcommand(SubCommand::with_name("generate").about("Generate a parser"))
+        .subcommand(
+            SubCommand::with_name("generate")
+                .about("Generate a parser")
+                .arg(Arg::with_name("log").long("log")),
+        )
         .subcommand(
             SubCommand::with_name("parse")
                 .about("Parse a file")
@@ -42,7 +49,11 @@ fn main() -> error::Result<()> {
         )
         .get_matches();
 
-    if let Some(_) = matches.subcommand_matches("generate") {
+    if let Some(matches) = matches.subcommand_matches("generate") {
+        if matches.is_present("log") {
+            logger::init();
+        }
+
         let mut grammar_path = env::current_dir().expect("Failed to read CWD");
         grammar_path.push("grammar.js");
         let grammar_json = load_js_grammar_file(grammar_path);
@@ -70,7 +81,8 @@ fn load_js_grammar_file(grammar_path: PathBuf) -> String {
         "{}\nconsole.log(JSON.stringify(require(\"{}\"), null, 2));\n",
         js_prelude,
         grammar_path.to_str().unwrap()
-    ).expect("Failed to write to node's stdin");
+    )
+    .expect("Failed to write to node's stdin");
     drop(node_stdin);
     let output = node_process
         .wait_with_output()
diff --git a/src/nfa.rs b/src/nfa.rs
index e14dac44..1c7ff53b 100644
--- a/src/nfa.rs
+++ b/src/nfa.rs
@@ -320,6 +320,10 @@ impl<'a> NfaCursor<'a> {
         self.add_states(&mut states);
     }
 
+    pub fn force_reset(&mut self, states: Vec<u32>) {
+        self.state_ids = states
+    }
+
     pub fn successors(&self) -> impl Iterator<Item = (&CharacterSet, i32, u32, bool)> {
         self.state_ids.iter().filter_map(move |id| {
             if let NfaState::Advance {
@@ -352,16 +356,26 @@ impl<'a> NfaCursor<'a> {
                     result[i].1 = max(result[i].1, prec);
                     result[i].2.push(state);
                     result[i].3 |= is_sep;
-                } else {
-                    let intersection = result[i].0.remove_intersection(&mut chars);
-                    if !intersection.is_empty() {
-                        let mut states = result[i].2.clone();
-                        states.push(state);
+                    chars = CharacterSet::empty();
+                    break;
+                }
+
+                let intersection = result[i].0.remove_intersection(&mut chars);
+                if !intersection.is_empty() {
+                    let mut states = result[i].2.clone();
+                    let max_prec = max(result[i].1, prec);
+                    states.push(state);
+                    if result[i].0.is_empty() {
+                        result[i].0 = intersection;
+                        result[i].1 = max_prec;
+                        result[i].2 = states;
+                        result[i].3 |= is_sep;
+                    } else {
                         result.insert(
                             i,
                             (
                                 intersection,
-                                max(result[i].1, prec),
+                                max_prec,
                                 states,
                                 result[i].3 || is_sep,
                             ),
diff --git a/src/parse_grammar.rs b/src/parse_grammar.rs
index 07396329..6808f402 100644
--- a/src/parse_grammar.rs
+++ b/src/parse_grammar.rs
@@ -133,7 +133,7 @@ mod tests {
 
     #[test]
     fn test_parse_grammar() {
-        let grammar = parse_grammar(&json!({
+        let grammar = parse_grammar(r#"{
             "name": "my_lang",
             "rules": {
                 "file": {
@@ -148,7 +148,7 @@ mod tests {
                     "value": "foo"
                 }
             }
-        }).to_string()).unwrap();
+        }"#).unwrap();
 
         assert_eq!(grammar.name, "my_lang");
         assert_eq!(grammar.variables, vec![
diff --git a/src/prepare_grammar/expand_repeats.rs b/src/prepare_grammar/expand_repeats.rs
index f3811c5f..4589bd11 100644
--- a/src/prepare_grammar/expand_repeats.rs
+++ b/src/prepare_grammar/expand_repeats.rs
@@ -1,7 +1,7 @@
 use super::ExtractedSyntaxGrammar;
 use crate::grammars::{Variable, VariableType};
 use crate::rules::{Rule, Symbol};
-use std::collections::HashMap;
+use hashbrown::HashMap;
 use std::mem;
 
 struct Expander {
diff --git a/src/prepare_grammar/extract_tokens.rs b/src/prepare_grammar/extract_tokens.rs
index 5f3f6e16..115933ee 100644
--- a/src/prepare_grammar/extract_tokens.rs
+++ b/src/prepare_grammar/extract_tokens.rs
@@ -2,7 +2,7 @@ use super::{ExtractedLexicalGrammar, ExtractedSyntaxGrammar, InternedGrammar};
 use crate::error::{Error, Result};
 use crate::grammars::{ExternalToken, Variable, VariableType};
 use crate::rules::{MetadataParams, Rule, Symbol, SymbolType};
-use std::collections::HashMap;
+use hashbrown::HashMap;
 use std::mem;
 
 pub(super) fn extract_tokens(
diff --git a/src/prepare_grammar/process_inlines.rs b/src/prepare_grammar/process_inlines.rs
index 0d7f6827..24bbc14d 100644
--- a/src/prepare_grammar/process_inlines.rs
+++ b/src/prepare_grammar/process_inlines.rs
@@ -1,5 +1,5 @@
 use crate::grammars::{InlinedProductionMap, Production, ProductionStep, SyntaxGrammar};
-use std::collections::HashMap;
+use hashbrown::HashMap;
 
 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
 struct ProductionStepId {
diff --git a/src/render/mod.rs b/src/render/mod.rs
index cbb8ba0d..250218c1 100644
--- a/src/render/mod.rs
+++ b/src/render/mod.rs
@@ -1,9 +1,9 @@
 use crate::grammars::{ExternalToken, LexicalGrammar, SyntaxGrammar, VariableType};
 use crate::nfa::CharacterSet;
 use crate::rules::{Alias, AliasMap, Symbol, SymbolType};
-use crate::tables::{LexState, LexTable, ParseAction, ParseTable, ParseTableEntry};
+use crate::tables::{AdvanceAction, LexState, LexTable, ParseAction, ParseTable, ParseTableEntry};
 use core::ops::Range;
-use std::collections::{HashMap, HashSet};
+use hashbrown::{HashMap, HashSet};
 use std::fmt::Write;
 use std::mem::swap;
 
@@ -372,17 +372,14 @@ impl Generator {
             if self.add_character_set_condition(&characters, &ruled_out_characters) {
                 add!(self, ")\n");
                 indent!(self);
-                if action.in_main_token {
-                    add_line!(self, "ADVANCE({});", action.state);
-                } else {
-                    add_line!(self, "SKIP({});", action.state);
-                }
+                self.add_advance_action(&action);
                 if let CharacterSet::Include(chars) = characters {
                     ruled_out_characters.extend(chars.iter().map(|c| *c as u32));
                 }
                 dedent!(self);
             } else {
                 self.buffer.truncate(previous_length);
+                self.add_advance_action(&action);
             }
         }
 
@@ -494,6 +491,14 @@ impl Generator {
             })
     }
 
+    fn add_advance_action(&mut self, action: &AdvanceAction) {
+        if action.in_main_token {
+            add_line!(self, "ADVANCE({});", action.state);
+        } else {
+            add_line!(self, "SKIP({});", action.state);
+        }
+    }
+
     fn add_lex_modes_list(&mut self) {
         self.get_external_scanner_state_id(HashSet::new());
 
diff --git a/src/rules.rs b/src/rules.rs
index 77e50d3c..ad16c632 100644
--- a/src/rules.rs
+++ b/src/rules.rs
@@ -1,4 +1,4 @@
-use std::collections::HashMap;
+use hashbrown::HashMap;
 
 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
 pub(crate) enum SymbolType {
diff --git a/src/tables.rs b/src/tables.rs
index 1c125621..21222135 100644
--- a/src/tables.rs
+++ b/src/tables.rs
@@ -1,6 +1,6 @@
 use crate::nfa::CharacterSet;
 use crate::rules::{Alias, Associativity, Symbol};
-use std::collections::HashMap;
+use hashbrown::HashMap;
 
 pub(crate) type AliasSequenceId = usize;
 pub(crate) type ParseStateId = usize;