Fix various logic errors in parse table construction

2019-01-02 16:48:44 -08:00 · 2019-01-02 16:48:44 -08:00 · 3fbaff5e69
commit 3fbaff5e69
parent 9824ebbbc3
21 changed files with 297 additions and 115 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -76,6 +76,11 @@ dependencies = [
 "constant_time_eq 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
 ]

+[[package]]
+name = "byteorder"
+version = "1.2.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
 [[package]]
 name = "cc"
 version = "1.0.25"
@ -212,6 +217,15 @@ dependencies = [
 "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
 ]

+[[package]]
+name = "hashbrown"
+version = "0.1.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "byteorder 1.2.7 (registry+https://github.com/rust-lang/crates.io-index)",
+ "scopeguard 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
 [[package]]
 name = "ignore"
 version = "0.4.4"
@ -463,9 +477,11 @@ version = "0.1.0"
 dependencies = [
 "clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)",
 "dirs 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
+ "hashbrown 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)",
 "ignore 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)",
 "lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
 "libloading 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
 "regex-syntax 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)",
 "rusqlite 0.14.0 (registry+https://github.com/rust-lang/crates.io-index)",
 "serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)",
@ -737,6 +753,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 "checksum backtrace-sys 0.1.24 (registry+https://github.com/rust-lang/crates.io-index)" = "c66d56ac8dabd07f6aacdaf633f4b8262f5b3601a810a0dcddffd5c22c69daa0"
 "checksum bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "228047a76f468627ca71776ecdebd732a3423081fcf5125585bcd7c49886ce12"
 "checksum blake2-rfc 0.2.18 (registry+https://github.com/rust-lang/crates.io-index)" = "5d6d530bdd2d52966a6d03b7a964add7ae1a288d25214066fd4b600f0f796400"
+"checksum byteorder 1.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "94f88df23a25417badc922ab0f5716cc1330e87f71ddd9203b3a3ccd9cedf75d"
 "checksum cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)" = "f159dfd43363c4d08055a07703eb7a3406b0dac4d0584d96965a3262db3c9d16"
 "checksum cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "082bb9b28e00d3c9d39cc03e64ce4cea0f1bb9b3fde493f0cbc008472d22bdf4"
 "checksum clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b957d88f4b6a63b9d70d5f454ac8011819c6efa7727858f458ab71c756ce2d3e"
@ -753,6 +770,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 "checksum fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2e9763c69ebaae630ba35f74888db465e49e259ba1bc0eda7d06f4a067615d82"
 "checksum fuchsia-zircon-sys 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3dcaa9ae7725d12cdb85b3ad99a434db70b468c09ded17e012d86b5c1010f7a7"
 "checksum globset 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "4743617a7464bbda3c8aec8558ff2f9429047e025771037df561d383337ff865"
+"checksum hashbrown 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "64b7d419d0622ae02fe5da6b9a5e1964b610a65bb37923b976aeebb6dbb8f86e"
 "checksum ignore 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "36ecfc5ad80f0b1226df948c562e2cddd446096be3f644c95106400eae8a5e01"
 "checksum indexmap 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7e81a7c05f79578dbc15793d8b619db9ba32b4577003ef3af1a91c416798c58d"
 "checksum itoa 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "1306f3464951f30e30d12373d31c79fbd52d236e5e896fd92f96ec7babbbe60b"
--- a/Cargo.toml
+++ b/Cargo.toml
@ -9,6 +9,7 @@ lazy_static = "1.2.0"
 smallbitvec = "2.3.0"
 clap = "2.32"
 dirs = "1.0.2"
+hashbrown = "0.1"
 ignore = "0.4.4"
 libloading = "0.5"
 rusqlite = "0.14.0"
@ -20,3 +21,7 @@ regex-syntax = "0.6.4"
 [dependencies.serde_json]
 version = "1.0"
 features = ["preserve_order"]
+
+[dependencies.log]
+version = "0.4.6"
+features = ["std"]
--- a/src/build_tables/build_lex_table.rs
+++ b/src/build_tables/build_lex_table.rs
@ -2,10 +2,9 @@ use super::item::LookaheadSet;
 use super::token_conflicts::TokenConflictMap;
 use crate::grammars::{LexicalGrammar, SyntaxGrammar};
 use crate::nfa::NfaCursor;
-use crate::rules::Symbol;
 use crate::tables::{AdvanceAction, LexState, LexTable, ParseTable};
 use std::collections::hash_map::Entry;
-use std::collections::{HashMap, VecDeque};
+use std::collections::{BTreeMap, HashMap, VecDeque};

 pub(crate) fn build_lex_table(
    parse_table: &mut ParseTable,
@ -16,15 +15,16 @@ pub(crate) fn build_lex_table(
    let keyword_lex_table;
    if syntax_grammar.word_token.is_some() {
        let mut builder = LexTableBuilder::new(lexical_grammar);
-        builder.add_state_for_tokens(keywords.iter());
+        builder.add_state_for_tokens(keywords);
        keyword_lex_table = builder.table;
    } else {
        keyword_lex_table = LexTable::default();
    }

    let mut builder = LexTableBuilder::new(lexical_grammar);
-    for state in parse_table.states.iter_mut() {
-        let tokens = state.terminal_entries.keys().filter_map(|token| {
+    for (i, state) in parse_table.states.iter_mut().enumerate() {
+        info!("populate lex state for parse state {}", i);
+        let tokens = LookaheadSet::with(state.terminal_entries.keys().filter_map(|token| {
            if token.is_terminal() {
                if keywords.contains(&token) {
                    syntax_grammar.word_token
@ -34,11 +34,14 @@ pub(crate) fn build_lex_table(
            } else {
                None
            }
-        });
-        state.lex_state_id = builder.add_state_for_tokens(tokens);
+        }));
+        state.lex_state_id = builder.add_state_for_tokens(&tokens);
    }

-    (builder.table, keyword_lex_table)
+    let mut table = builder.table;
+    shrink_lex_table(&mut table, parse_table);
+
+    (table, keyword_lex_table)
 }

 struct LexTableBuilder<'a> {
@ -60,32 +63,49 @@ impl<'a> LexTableBuilder<'a> {
        }
    }

-    fn add_state_for_tokens(&mut self, tokens: impl Iterator<Item = Symbol>) -> usize {
+    fn add_state_for_tokens(&mut self, tokens: &LookaheadSet) -> usize {
        let nfa_states = tokens
+            .iter()
            .map(|token| self.lexical_grammar.variables[token.index].start_state)
            .collect();
-        let result = self.add_state(nfa_states);
-        while let Some((state_id, nfa_states)) = self.state_queue.pop_front() {
+        let (state_id, is_new) = self.add_state(nfa_states);
+
+        if is_new {
+            info!(
+                "entry point state: {}, tokens: {:?}",
+                state_id,
+                tokens
+                    .iter()
+                    .map(|t| &self.lexical_grammar.variables[t.index].name)
+                    .collect::<Vec<_>>()
+            );
+        }
+
+        while let Some((state_id, nfa_states)) = self.state_queue.pop_back() {
            self.populate_state(state_id, nfa_states);
        }
-        result
+        state_id
    }

-    fn add_state(&mut self, nfa_states: Vec<u32>) -> usize {
-        match self.state_ids_by_nfa_state_set.entry(nfa_states) {
-            Entry::Occupied(o) => *o.get(),
+    fn add_state(&mut self, nfa_states: Vec<u32>) -> (usize, bool) {
+        self.cursor.reset(nfa_states);
+        match self
+            .state_ids_by_nfa_state_set
+            .entry(self.cursor.state_ids.clone())
+        {
+            Entry::Occupied(o) => (*o.get(), false),
            Entry::Vacant(v) => {
                let state_id = self.table.states.len();
                self.table.states.push(LexState::default());
                self.state_queue.push_back((state_id, v.key().clone()));
                v.insert(state_id);
-                state_id
+                (state_id, true)
            }
        }
    }

    fn populate_state(&mut self, state_id: usize, nfa_states: Vec<u32>) {
-        self.cursor.reset(nfa_states);
+        self.cursor.force_reset(nfa_states);

        let mut completion = None;
        for (id, prec) in self.cursor.completions() {
@ -102,12 +122,16 @@ impl<'a> LexTableBuilder<'a> {
        }

        for (chars, advance_precedence, next_states, is_sep) in self.cursor.grouped_successors() {
+            info!(
+                "populate state: {}, characters: {:?}, precedence: {:?}",
+                state_id, chars, advance_precedence
+            );
            if let Some((_, completed_precedence)) = completion {
                if advance_precedence < completed_precedence {
                    continue;
                }
            }
-            let next_state_id = self.add_state(next_states);
+            let (next_state_id, _) = self.add_state(next_states);
            self.table.states[state_id].advance_actions.push((
                chars,
                AdvanceAction {
@ -122,3 +146,59 @@ impl<'a> LexTableBuilder<'a> {
        }
    }
 }
+
+fn shrink_lex_table(table: &mut LexTable, parse_table: &mut ParseTable) {
+    let mut state_replacements = BTreeMap::new();
+    let mut done = false;
+    while !done {
+        done = true;
+        for (i, state_i) in table.states.iter().enumerate() {
+            if state_replacements.contains_key(&i) {
+                continue;
+            }
+            for (j, state_j) in table.states.iter().enumerate() {
+                if state_replacements.contains_key(&j) {
+                    continue;
+                }
+                if j == i {
+                    break;
+                }
+                if state_i == state_j {
+                    info!("replace state {} with state {}", i, j);
+                    state_replacements.insert(i, j);
+                    done = false;
+                }
+            }
+        }
+        for state in table.states.iter_mut() {
+            for advance_action in state.advance_actions.iter_mut() {
+                if let Some(new_state_id) = state_replacements.get(&advance_action.1.state) {
+                    advance_action.1.state = *new_state_id;
+                }
+            }
+        }
+    }
+
+    let final_state_replacements = (0..table.states.len()).into_iter().map(|state_id| {
+        let replacement = state_replacements.get(&state_id).cloned().unwrap_or(state_id);
+        let prior_removed = state_replacements.iter().take_while(|i| *i.0 < replacement).count();
+        replacement - prior_removed
+    }).collect::<Vec<_>>();
+
+    for state in parse_table.states.iter_mut() {
+        state.lex_state_id = final_state_replacements[state.lex_state_id];
+    }
+
+    for state in table.states.iter_mut() {
+        for advance_action in state.advance_actions.iter_mut() {
+            advance_action.1.state = final_state_replacements[advance_action.1.state];
+        }
+    }
+
+    let mut i = 0;
+    table.states.retain(|_| {
+        let result = !state_replacements.contains_key(&i);
+        i += 1;
+        result
+    });
+}
--- a/src/build_tables/build_parse_table.rs
+++ b/src/build_tables/build_parse_table.rs
@ -7,8 +7,11 @@ use crate::tables::{
    AliasSequenceId, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry,
 };
 use core::ops::Range;
-use std::collections::hash_map::{DefaultHasher, Entry};
-use std::collections::{HashMap, HashSet, VecDeque};
+use hashbrown::hash_map::Entry;
+use hashbrown::{HashMap, HashSet};
+use std::collections::hash_map::DefaultHasher;
+use std::collections::VecDeque;
+
 use std::fmt::Write;
 use std::hash::Hasher;

@ -43,9 +46,10 @@ impl<'a> ParseTableBuilder<'a> {
        // Ensure that the empty alias sequence has index 0.
        self.parse_table.alias_sequences.push(Vec::new());

-        // Ensure that the error state has index 0.
+        // Add the error state at index 0.
        self.add_parse_state(&Vec::new(), &Vec::new(), ParseItemSet::default());

+        // Add the starting state at index 1.
        self.add_parse_state(
            &Vec::new(),
            &Vec::new(),
@ -61,6 +65,8 @@ impl<'a> ParseTableBuilder<'a> {

        self.process_part_state_queue()?;
        self.populate_used_symbols();
+        self.remove_precedences();
+
        Ok((self.parse_table, self.following_tokens))
    }

@ -112,28 +118,9 @@ impl<'a> ParseTableBuilder<'a> {

    fn process_part_state_queue(&mut self) -> Result<()> {
        while let Some(entry) = self.parse_state_queue.pop_front() {
-            let debug = false;
-
-            if debug {
-                println!(
-                    "ITEM SET {}:\n{}",
-                    entry.state_id,
-                    self.item_sets_by_state_id[entry.state_id]
-                        .display_with(&self.syntax_grammar, &self.lexical_grammar,)
-                );
-            }
-
            let item_set = self
                .item_set_builder
                .transitive_closure(&self.item_sets_by_state_id[entry.state_id]);
-
-            if debug {
-                println!(
-                    "TRANSITIVE CLOSURE:\n{}",
-                    item_set.display_with(&self.syntax_grammar, &self.lexical_grammar)
-                );
-            }
-
            self.add_actions(
                entry.preceding_symbols,
                entry.preceding_auxiliary_symbols,
@ -527,6 +514,7 @@ impl<'a> ParseTableBuilder<'a> {
    }

    fn populate_used_symbols(&mut self) {
+        self.parse_table.symbols.push(Symbol::end());
        let mut terminal_usages = vec![false; self.lexical_grammar.variables.len()];
        let mut non_terminal_usages = vec![false; self.syntax_grammar.variables.len()];
        let mut external_usages = vec![false; self.syntax_grammar.external_tokens.len()];
@ -542,20 +530,39 @@ impl<'a> ParseTableBuilder<'a> {
                non_terminal_usages[symbol.index] = true;
            }
        }
-        self.parse_table.symbols.push(Symbol::end());
        for (i, value) in terminal_usages.into_iter().enumerate() {
            if value {
                self.parse_table.symbols.push(Symbol::terminal(i));
            }
        }
+        for (i, value) in external_usages.into_iter().enumerate() {
+            if value {
+                self.parse_table.symbols.push(Symbol::external(i));
+            }
+        }
        for (i, value) in non_terminal_usages.into_iter().enumerate() {
            if value {
                self.parse_table.symbols.push(Symbol::non_terminal(i));
            }
        }
-        for (i, value) in external_usages.into_iter().enumerate() {
-            if value {
-                self.parse_table.symbols.push(Symbol::external(i));
+    }
+
+    fn remove_precedences(&mut self) {
+        for state in self.parse_table.states.iter_mut() {
+            for (_, entry) in state.terminal_entries.iter_mut() {
+                for action in entry.actions.iter_mut() {
+                    match action {
+                        ParseAction::Reduce {
+                            precedence,
+                            associativity,
+                            ..
+                        } => {
+                            *precedence = 0;
+                            *associativity = None;
+                        }
+                        _ => {}
+                    }
+                }
            }
        }
    }
--- a/src/build_tables/coincident_tokens.rs
+++ b/src/build_tables/coincident_tokens.rs
@ -1,36 +1,44 @@
+use crate::grammars::LexicalGrammar;
 use crate::rules::Symbol;
 use crate::tables::{ParseStateId, ParseTable};
-use std::collections::{HashMap, HashSet};
+use std::collections::HashSet;

 pub(crate) struct CoincidentTokenIndex {
-    entries: HashMap<(Symbol, Symbol), HashSet<ParseStateId>>,
-    empty: HashSet<ParseStateId>,
+    entries: Vec<HashSet<ParseStateId>>,
+    n: usize,
 }

 impl CoincidentTokenIndex {
-    pub fn new(table: &ParseTable) -> Self {
-        let mut entries = HashMap::new();
+    pub fn new(table: &ParseTable, lexical_grammar: &LexicalGrammar) -> Self {
+        let n = lexical_grammar.variables.len();
+        let mut result = Self {
+            n,
+            entries: vec![HashSet::new(); n * n],
+        };
        for (i, state) in table.states.iter().enumerate() {
            for symbol in state.terminal_entries.keys() {
                for other_symbol in state.terminal_entries.keys() {
-                    entries
-                        .entry((*symbol, *other_symbol))
-                        .or_insert(HashSet::new())
-                        .insert(i);
+                    let index = result.index(*symbol, *other_symbol);
+                    result.entries[index].insert(i);
                }
            }
        }
-        Self {
-            entries,
-            empty: HashSet::new(),
-        }
+        result
    }

    pub fn states_with(&self, a: Symbol, b: Symbol) -> &HashSet<ParseStateId> {
-        self.entries.get(&(a, b)).unwrap_or(&self.empty)
+        &self.entries[self.index(a, b)]
    }

    pub fn contains(&self, a: Symbol, b: Symbol) -> bool {
-        self.entries.contains_key(&(a, b))
+        !self.entries[self.index(a, b)].is_empty()
+    }
+
+    fn index(&self, a: Symbol, b: Symbol) -> usize {
+        if a.index < b.index {
+            a.index * self.n + b.index
+        } else {
+            b.index * self.n + a.index
+        }
    }
 }
--- a/src/build_tables/item.rs
+++ b/src/build_tables/item.rs
@ -112,7 +112,9 @@ impl LookaheadSet {
                return;
            }
        };
-        vec.resize(other.index + 1, false);
+        if other.index >= vec.len() {
+            vec.resize(other.index + 1, false);
+        }
        vec.set(other.index, true);
    }

--- a/src/build_tables/item_set_builder.rs
+++ b/src/build_tables/item_set_builder.rs
@ -1,7 +1,7 @@
 use super::item::{LookaheadSet, ParseItem, ParseItemSet};
 use crate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar};
 use crate::rules::Symbol;
-use std::collections::{HashMap, HashSet};
+use hashbrown::{HashMap, HashSet};

 #[derive(Clone, Debug, PartialEq, Eq)]
 struct TransitiveClosureAddition<'a> {
--- a/src/build_tables/mod.rs
+++ b/src/build_tables/mod.rs
@ -27,22 +27,14 @@ pub(crate) fn build_tables(
    let (mut parse_table, following_tokens) =
        build_parse_table(syntax_grammar, lexical_grammar, inlines)?;
    let token_conflict_map = TokenConflictMap::new(lexical_grammar, following_tokens);
-
-    eprintln!("{:?}", token_conflict_map);
-
-    let coincident_token_index = CoincidentTokenIndex::new(&parse_table);
-    let keywords = if let Some(word_token) = syntax_grammar.word_token {
-        identify_keywords(
-            lexical_grammar,
-            &parse_table,
-            word_token,
-            &token_conflict_map,
-            &coincident_token_index,
-        )
-    } else {
-        LookaheadSet::new()
-    };
-
+    let coincident_token_index = CoincidentTokenIndex::new(&parse_table, lexical_grammar);
+    let keywords = identify_keywords(
+        lexical_grammar,
+        &parse_table,
+        syntax_grammar.word_token,
+        &token_conflict_map,
+        &coincident_token_index,
+    );
    populate_error_state(
        &mut parse_table,
        syntax_grammar,
@ -123,10 +115,15 @@ fn populate_error_state(
 fn identify_keywords(
    lexical_grammar: &LexicalGrammar,
    parse_table: &ParseTable,
-    word_token: Symbol,
+    word_token: Option<Symbol>,
    token_conflict_map: &TokenConflictMap,
    coincident_token_index: &CoincidentTokenIndex,
 ) -> LookaheadSet {
+    if word_token.is_none() {
+        return LookaheadSet::new();
+    }
+
+    let word_token = word_token.unwrap();
    let mut cursor = NfaCursor::new(&lexical_grammar.nfa, Vec::new());

    // First find all of the candidate keyword tokens: tokens that start with
@ -137,6 +134,7 @@ fn identify_keywords(
            if all_chars_are_alphabetical(&cursor)
                && token_conflict_map.does_match_same_string(i, word_token.index)
            {
+                info!("Keywords - add candidate {}", lexical_grammar.variables[i].name);
                Some(Symbol::terminal(i))
            } else {
                None
@ -150,8 +148,8 @@ fn identify_keywords(
            if other_token != *token
                && token_conflict_map.does_match_same_string(token.index, other_token.index)
            {
-                eprintln!(
-                    "Exclude {} from keywords because it matches the same string as {}",
+                info!(
+                    "Keywords - exclude {} because it matches the same string as {}",
                    lexical_grammar.variables[token.index].name,
                    lexical_grammar.variables[other_token.index].name
                );
@ -189,8 +187,8 @@ fn identify_keywords(
                word_token.index,
                other_index,
            ) {
-                eprintln!(
-                    "Exclude {} from keywords because of conflict with {}",
+                info!(
+                    "Keywords - exclude {} because of conflict with {}",
                    lexical_grammar.variables[token.index].name,
                    lexical_grammar.variables[other_index].name
                );
@ -198,8 +196,8 @@ fn identify_keywords(
            }
        }

-        eprintln!(
-            "Include {} in keywords",
+        info!(
+            "Keywords - include {}",
            lexical_grammar.variables[token.index].name,
        );
        true
--- a/src/build_tables/shrink_parse_table.rs
+++ b/src/build_tables/shrink_parse_table.rs
@ -2,7 +2,7 @@ use super::token_conflicts::TokenConflictMap;
 use crate::grammars::{SyntaxGrammar, VariableType};
 use crate::rules::{AliasMap, Symbol};
 use crate::tables::{ParseAction, ParseState, ParseTable, ParseTableEntry};
-use std::collections::{HashMap, HashSet};
+use hashbrown::{HashMap, HashSet};

 pub(crate) fn shrink_parse_table(
    parse_table: &mut ParseTable,
@ -240,6 +240,10 @@ fn can_add_entry_to_state(

 fn remove_unused_states(parse_table: &mut ParseTable) {
    let mut state_usage_map = vec![false; parse_table.states.len()];
+
+    state_usage_map[0] = true;
+    state_usage_map[1] = true;
+
    for state in &parse_table.states {
        for referenced_state in state.referenced_states() {
            state_usage_map[referenced_state] = true;
--- a/src/build_tables/token_conflicts.rs
+++ b/src/build_tables/token_conflicts.rs
@ -1,7 +1,7 @@
 use crate::build_tables::item::LookaheadSet;
 use crate::grammars::LexicalGrammar;
 use crate::nfa::{CharacterSet, NfaCursor};
-use std::collections::HashSet;
+use hashbrown::HashSet;
 use std::fmt;

 #[derive(Clone, Debug, Default, PartialEq, Eq)]
--- a/src/grammars.rs
+++ b/src/grammars.rs
@ -1,6 +1,6 @@
 use crate::nfa::Nfa;
 use crate::rules::{Alias, Associativity, Rule, Symbol};
-use std::collections::HashMap;
+use hashbrown::HashMap;

 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
 pub(crate) enum VariableType {
--- a/src/logger.rs
+++ b/src/logger.rs
@ -0,0 +1,29 @@
+use log::{LevelFilter, Log, Metadata, Record};
+
+struct Logger {
+    pub filter: Option<String>,
+}
+
+impl Log for Logger {
+    fn enabled(&self, _: &Metadata) -> bool {
+        true
+    }
+
+    fn log(&self, record: &Record) {
+        eprintln!(
+            "[{}] {}",
+            record
+                .module_path()
+                .unwrap_or_default()
+                .trim_start_matches("rust_tree_sitter_cli::"),
+            record.args()
+        );
+    }
+
+    fn flush(&self) {}
+}
+
+pub(crate) fn init() {
+    log::set_boxed_logger(Box::new(Logger { filter: None })).unwrap();
+    log::set_max_level(LevelFilter::Info);
+}
--- a/src/main.rs
+++ b/src/main.rs
@ -1,20 +1,23 @@
 #[macro_use]
-extern crate serde_derive;
-#[macro_use]
-extern crate serde_json;
-#[macro_use]
 extern crate lazy_static;
+#[macro_use]
+extern crate log;
+#[macro_use]
+extern crate serde_derive;
+extern crate hashbrown;
+extern crate serde_json;

-use std::path::PathBuf;
 use clap::{App, Arg, SubCommand};
 use std::env;
 use std::io::Write;
+use std::path::PathBuf;
 use std::process::{Command, Stdio};

 mod build_tables;
 mod error;
 mod generate;
 mod grammars;
+mod logger;
 mod nfa;
 mod parse_grammar;
 mod prepare_grammar;
@ -27,7 +30,11 @@ fn main() -> error::Result<()> {
        .version("0.1")
        .author("Max Brunsfeld <maxbrunsfeld@gmail.com>")
        .about("Generates and tests parsers")
-        .subcommand(SubCommand::with_name("generate").about("Generate a parser"))
+        .subcommand(
+            SubCommand::with_name("generate")
+                .about("Generate a parser")
+                .arg(Arg::with_name("log").long("log")),
+        )
        .subcommand(
            SubCommand::with_name("parse")
                .about("Parse a file")
@ -42,7 +49,11 @@ fn main() -> error::Result<()> {
        )
        .get_matches();

-    if let Some(_) = matches.subcommand_matches("generate") {
+    if let Some(matches) = matches.subcommand_matches("generate") {
+        if matches.is_present("log") {
+            logger::init();
+        }
+
        let mut grammar_path = env::current_dir().expect("Failed to read CWD");
        grammar_path.push("grammar.js");
        let grammar_json = load_js_grammar_file(grammar_path);
@ -70,7 +81,8 @@ fn load_js_grammar_file(grammar_path: PathBuf) -> String {
        "{}\nconsole.log(JSON.stringify(require(\"{}\"), null, 2));\n",
        js_prelude,
        grammar_path.to_str().unwrap()
-    ).expect("Failed to write to node's stdin");
+    )
+    .expect("Failed to write to node's stdin");
    drop(node_stdin);
    let output = node_process
        .wait_with_output()
--- a/src/nfa.rs
+++ b/src/nfa.rs
@ -320,6 +320,10 @@ impl<'a> NfaCursor<'a> {
        self.add_states(&mut states);
    }

+    pub fn force_reset(&mut self, states: Vec<u32>) {
+        self.state_ids = states
+    }
+
    pub fn successors(&self) -> impl Iterator<Item = (&CharacterSet, i32, u32, bool)> {
        self.state_ids.iter().filter_map(move |id| {
            if let NfaState::Advance {
@ -352,16 +356,26 @@ impl<'a> NfaCursor<'a> {
                    result[i].1 = max(result[i].1, prec);
                    result[i].2.push(state);
                    result[i].3 |= is_sep;
-                } else {
-                    let intersection = result[i].0.remove_intersection(&mut chars);
-                    if !intersection.is_empty() {
-                        let mut states = result[i].2.clone();
-                        states.push(state);
+                    chars = CharacterSet::empty();
+                    break;
+                }
+
+                let intersection = result[i].0.remove_intersection(&mut chars);
+                if !intersection.is_empty() {
+                    let mut states = result[i].2.clone();
+                    let max_prec = max(result[i].1, prec);
+                    states.push(state);
+                    if result[i].0.is_empty() {
+                        result[i].0 = intersection;
+                        result[i].1 = max_prec;
+                        result[i].2 = states;
+                        result[i].3 |= is_sep;
+                    } else {
                        result.insert(
                            i,
                            (
                                intersection,
-                                max(result[i].1, prec),
+                                max_prec,
                                states,
                                result[i].3 || is_sep,
                            ),
--- a/src/parse_grammar.rs
+++ b/src/parse_grammar.rs
@ -133,7 +133,7 @@ mod tests {

    #[test]
    fn test_parse_grammar() {
-        let grammar = parse_grammar(&json!({
+        let grammar = parse_grammar(r#"{
            "name": "my_lang",
            "rules": {
                "file": {
@ -148,7 +148,7 @@ mod tests {
                    "value": "foo"
                }
            }
-        }).to_string()).unwrap();
+        }"#).unwrap();

        assert_eq!(grammar.name, "my_lang");
        assert_eq!(grammar.variables, vec![
--- a/src/prepare_grammar/expand_repeats.rs
+++ b/src/prepare_grammar/expand_repeats.rs
@ -1,7 +1,7 @@
 use super::ExtractedSyntaxGrammar;
 use crate::grammars::{Variable, VariableType};
 use crate::rules::{Rule, Symbol};
-use std::collections::HashMap;
+use hashbrown::HashMap;
 use std::mem;

 struct Expander {
--- a/src/prepare_grammar/extract_tokens.rs
+++ b/src/prepare_grammar/extract_tokens.rs
@ -2,7 +2,7 @@ use super::{ExtractedLexicalGrammar, ExtractedSyntaxGrammar, InternedGrammar};
 use crate::error::{Error, Result};
 use crate::grammars::{ExternalToken, Variable, VariableType};
 use crate::rules::{MetadataParams, Rule, Symbol, SymbolType};
-use std::collections::HashMap;
+use hashbrown::HashMap;
 use std::mem;

 pub(super) fn extract_tokens(
--- a/src/prepare_grammar/process_inlines.rs
+++ b/src/prepare_grammar/process_inlines.rs
@ -1,5 +1,5 @@
 use crate::grammars::{InlinedProductionMap, Production, ProductionStep, SyntaxGrammar};
-use std::collections::HashMap;
+use hashbrown::HashMap;

 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
 struct ProductionStepId {
--- a/src/render/mod.rs
+++ b/src/render/mod.rs
@ -1,9 +1,9 @@
 use crate::grammars::{ExternalToken, LexicalGrammar, SyntaxGrammar, VariableType};
 use crate::nfa::CharacterSet;
 use crate::rules::{Alias, AliasMap, Symbol, SymbolType};
-use crate::tables::{LexState, LexTable, ParseAction, ParseTable, ParseTableEntry};
+use crate::tables::{AdvanceAction, LexState, LexTable, ParseAction, ParseTable, ParseTableEntry};
 use core::ops::Range;
-use std::collections::{HashMap, HashSet};
+use hashbrown::{HashMap, HashSet};
 use std::fmt::Write;
 use std::mem::swap;

@ -372,17 +372,14 @@ impl Generator {
            if self.add_character_set_condition(&characters, &ruled_out_characters) {
                add!(self, ")\n");
                indent!(self);
-                if action.in_main_token {
-                    add_line!(self, "ADVANCE({});", action.state);
-                } else {
-                    add_line!(self, "SKIP({});", action.state);
-                }
+                self.add_advance_action(&action);
                if let CharacterSet::Include(chars) = characters {
                    ruled_out_characters.extend(chars.iter().map(|c| *c as u32));
                }
                dedent!(self);
            } else {
                self.buffer.truncate(previous_length);
+                self.add_advance_action(&action);
            }
        }

@ -494,6 +491,14 @@ impl Generator {
            })
    }

+    fn add_advance_action(&mut self, action: &AdvanceAction) {
+        if action.in_main_token {
+            add_line!(self, "ADVANCE({});", action.state);
+        } else {
+            add_line!(self, "SKIP({});", action.state);
+        }
+    }
+
    fn add_lex_modes_list(&mut self) {
        self.get_external_scanner_state_id(HashSet::new());

--- a/src/rules.rs
+++ b/src/rules.rs
@ -1,4 +1,4 @@
-use std::collections::HashMap;
+use hashbrown::HashMap;

 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
 pub(crate) enum SymbolType {
--- a/src/tables.rs
+++ b/src/tables.rs
@ -1,6 +1,6 @@
 use crate::nfa::CharacterSet;
 use crate::rules::{Alias, Associativity, Symbol};
-use std::collections::HashMap;
+use hashbrown::HashMap;

 pub(crate) type AliasSequenceId = usize;
 pub(crate) type ParseStateId = usize;