From c0f48dff6f3128d94855826e63588847dfcabb61 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 3 Jan 2019 11:52:57 -0800
Subject: [PATCH] Fix incorrect NFA generation for string rules

---
 src/build_tables/build_lex_table.rs  |  6 +--
 src/prepare_grammar/expand_tokens.rs | 63 +++++++++++++++++++++++++++-
 2 files changed, 63 insertions(+), 6 deletions(-)

diff --git a/src/build_tables/build_lex_table.rs b/src/build_tables/build_lex_table.rs
index 66a4fe43..6cd9a1ce 100644
--- a/src/build_tables/build_lex_table.rs
+++ b/src/build_tables/build_lex_table.rs
@@ -148,8 +148,8 @@ impl<'a> LexTableBuilder<'a> {
             completion = Some((id, prec));
         }
 
-        info!("raw successors: {:?}", self.cursor.successors().collect::<Vec<_>>());
         let successors = self.cursor.grouped_successors();
+        info!("populate state: {}, successors: {:?}", state_id, successors);
 
         // If EOF is a valid lookahead token, add a transition predicated on the null
         // character that leads to the empty set of NFA states.
@@ -166,10 +166,6 @@ impl<'a> LexTableBuilder<'a> {
         }
 
         for (chars, advance_precedence, next_states, is_sep) in successors {
-            info!(
-                "populate state: {}, characters: {:?}, precedence: {:?}",
-                state_id, chars, advance_precedence
-            );
             if let Some((_, completed_precedence)) = completion {
                 if advance_precedence < completed_precedence {
                     continue;
diff --git a/src/prepare_grammar/expand_tokens.rs b/src/prepare_grammar/expand_tokens.rs
index fdf085f6..61b1897c 100644
--- a/src/prepare_grammar/expand_tokens.rs
+++ b/src/prepare_grammar/expand_tokens.rs
@@ -89,7 +89,8 @@ impl NfaBuilder {
             }
             Rule::String(s) => {
                 for c in s.chars().rev() {
-                    self.push_advance(CharacterSet::empty().add_char(c), self.nfa.last_state_id());
+                    self.push_advance(CharacterSet::empty().add_char(c), next_state_id);
+                    next_state_id = self.nfa.last_state_id();
                 }
                 Ok(s.len() > 0)
             }
@@ -102,6 +103,8 @@ impl NfaBuilder {
                         alternative_state_ids.push(next_state_id);
                     }
                 }
+                alternative_state_ids.sort_unstable();
+                alternative_state_ids.dedup();
                 alternative_state_ids.retain(|i| *i != self.nfa.last_state_id());
                 for alternative_state_id in alternative_state_ids {
                     self.push_split(alternative_state_id);
@@ -542,6 +545,64 @@ mod tests {
                     ("aeeeef", Some((2, "aeeee"))),
                 ],
             },
+            Row {
+                rules: vec![
+                    Rule::seq(vec![
+                        Rule::string("a"),
+                        Rule::choice(vec![
+                            Rule::string("b"),
+                            Rule::string("c"),
+                        ]),
+                        Rule::string("d"),
+                    ])
+                ],
+                separators: vec![],
+                examples: vec![
+                    ("abd", Some((0, "abd"))),
+                    ("acd", Some((0, "acd"))),
+                    ("abc", None),
+                    ("ad", None),
+                    ("d", None),
+                    ("a", None),
+                ]
+            },
+            // nested choices within sequences
+            Row {
+                rules: vec![
+                    Rule::seq(vec![
+                        Rule::pattern("[0-9]+"),
+                        Rule::choice(vec![
+                            Rule::Blank,
+                            Rule::choice(vec![
+                                Rule::seq(vec![
+                                    Rule::choice(vec![
+                                        Rule::string("e"),
+                                        Rule::string("E")
+                                    ]),
+                                    Rule::choice(vec![
+                                        Rule::Blank,
+                                        Rule::choice(vec![
+                                            Rule::string("+"),
+                                            Rule::string("-"),
+                                        ])
+                                    ]),
+                                    Rule::pattern("[0-9]+"),
+                                ])
+                            ])
+                        ]),
+                    ]),
+                ],
+                separators: vec![],
+                examples: vec![
+                    ("12", Some((0, "12"))),
+                    ("12e", Some((0, "12"))),
+                    ("12g", Some((0, "12"))),
+                    ("12e3", Some((0, "12e3"))),
+                    ("12e+", Some((0, "12"))),
+                    ("12E+34 +", Some((0, "12E+34"))),
+                    ("12e34", Some((0, "12e34"))),
+                ],
+            },
         ];
 
         for Row {