From c0f48dff6f3128d94855826e63588847dfcabb61 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 3 Jan 2019 11:52:57 -0800 Subject: [PATCH] Fix incorrect NFA generation for string rules --- src/build_tables/build_lex_table.rs | 6 +-- src/prepare_grammar/expand_tokens.rs | 63 +++++++++++++++++++++++++++- 2 files changed, 63 insertions(+), 6 deletions(-) diff --git a/src/build_tables/build_lex_table.rs b/src/build_tables/build_lex_table.rs index 66a4fe43..6cd9a1ce 100644 --- a/src/build_tables/build_lex_table.rs +++ b/src/build_tables/build_lex_table.rs @@ -148,8 +148,8 @@ impl<'a> LexTableBuilder<'a> { completion = Some((id, prec)); } - info!("raw successors: {:?}", self.cursor.successors().collect::>()); let successors = self.cursor.grouped_successors(); + info!("populate state: {}, successors: {:?}", state_id, successors); // If EOF is a valid lookahead token, add a transition predicated on the null // character that leads to the empty set of NFA states. @@ -166,10 +166,6 @@ impl<'a> LexTableBuilder<'a> { } for (chars, advance_precedence, next_states, is_sep) in successors { - info!( - "populate state: {}, characters: {:?}, precedence: {:?}", - state_id, chars, advance_precedence - ); if let Some((_, completed_precedence)) = completion { if advance_precedence < completed_precedence { continue; diff --git a/src/prepare_grammar/expand_tokens.rs b/src/prepare_grammar/expand_tokens.rs index fdf085f6..61b1897c 100644 --- a/src/prepare_grammar/expand_tokens.rs +++ b/src/prepare_grammar/expand_tokens.rs @@ -89,7 +89,8 @@ impl NfaBuilder { } Rule::String(s) => { for c in s.chars().rev() { - self.push_advance(CharacterSet::empty().add_char(c), self.nfa.last_state_id()); + self.push_advance(CharacterSet::empty().add_char(c), next_state_id); + next_state_id = self.nfa.last_state_id(); } Ok(s.len() > 0) } @@ -102,6 +103,8 @@ impl NfaBuilder { alternative_state_ids.push(next_state_id); } } + alternative_state_ids.sort_unstable(); + alternative_state_ids.dedup(); alternative_state_ids.retain(|i| *i != self.nfa.last_state_id()); for alternative_state_id in alternative_state_ids { self.push_split(alternative_state_id); @@ -542,6 +545,64 @@ mod tests { ("aeeeef", Some((2, "aeeee"))), ], }, + Row { + rules: vec![ + Rule::seq(vec![ + Rule::string("a"), + Rule::choice(vec![ + Rule::string("b"), + Rule::string("c"), + ]), + Rule::string("d"), + ]) + ], + separators: vec![], + examples: vec![ + ("abd", Some((0, "abd"))), + ("acd", Some((0, "acd"))), + ("abc", None), + ("ad", None), + ("d", None), + ("a", None), + ] + }, + // nested choices within sequences + Row { + rules: vec![ + Rule::seq(vec![ + Rule::pattern("[0-9]+"), + Rule::choice(vec![ + Rule::Blank, + Rule::choice(vec![ + Rule::seq(vec![ + Rule::choice(vec![ + Rule::string("e"), + Rule::string("E") + ]), + Rule::choice(vec![ + Rule::Blank, + Rule::choice(vec![ + Rule::string("+"), + Rule::string("-"), + ]) + ]), + Rule::pattern("[0-9]+"), + ]) + ]) + ]), + ]), + ], + separators: vec![], + examples: vec![ + ("12", Some((0, "12"))), + ("12e", Some((0, "12"))), + ("12g", Some((0, "12"))), + ("12e3", Some((0, "12e3"))), + ("12e+", Some((0, "12"))), + ("12E+34 +", Some((0, "12E+34"))), + ("12e34", Some((0, "12e34"))), + ], + }, ]; for Row {