Fix incorrect NFA generation for string rules

This commit is contained in:
Max Brunsfeld 2019-01-03 11:52:57 -08:00
parent 02ca84fb4a
commit c0f48dff6f
2 changed files with 63 additions and 6 deletions

View file

@ -148,8 +148,8 @@ impl<'a> LexTableBuilder<'a> {
completion = Some((id, prec));
}
info!("raw successors: {:?}", self.cursor.successors().collect::<Vec<_>>());
let successors = self.cursor.grouped_successors();
info!("populate state: {}, successors: {:?}", state_id, successors);
// If EOF is a valid lookahead token, add a transition predicated on the null
// character that leads to the empty set of NFA states.
@ -166,10 +166,6 @@ impl<'a> LexTableBuilder<'a> {
}
for (chars, advance_precedence, next_states, is_sep) in successors {
info!(
"populate state: {}, characters: {:?}, precedence: {:?}",
state_id, chars, advance_precedence
);
if let Some((_, completed_precedence)) = completion {
if advance_precedence < completed_precedence {
continue;

View file

@ -89,7 +89,8 @@ impl NfaBuilder {
}
Rule::String(s) => {
for c in s.chars().rev() {
self.push_advance(CharacterSet::empty().add_char(c), self.nfa.last_state_id());
self.push_advance(CharacterSet::empty().add_char(c), next_state_id);
next_state_id = self.nfa.last_state_id();
}
Ok(s.len() > 0)
}
@ -102,6 +103,8 @@ impl NfaBuilder {
alternative_state_ids.push(next_state_id);
}
}
alternative_state_ids.sort_unstable();
alternative_state_ids.dedup();
alternative_state_ids.retain(|i| *i != self.nfa.last_state_id());
for alternative_state_id in alternative_state_ids {
self.push_split(alternative_state_id);
@ -542,6 +545,64 @@ mod tests {
("aeeeef", Some((2, "aeeee"))),
],
},
Row {
rules: vec![
Rule::seq(vec![
Rule::string("a"),
Rule::choice(vec![
Rule::string("b"),
Rule::string("c"),
]),
Rule::string("d"),
])
],
separators: vec![],
examples: vec![
("abd", Some((0, "abd"))),
("acd", Some((0, "acd"))),
("abc", None),
("ad", None),
("d", None),
("a", None),
]
},
// nested choices within sequences
Row {
rules: vec![
Rule::seq(vec![
Rule::pattern("[0-9]+"),
Rule::choice(vec![
Rule::Blank,
Rule::choice(vec![
Rule::seq(vec![
Rule::choice(vec![
Rule::string("e"),
Rule::string("E")
]),
Rule::choice(vec![
Rule::Blank,
Rule::choice(vec![
Rule::string("+"),
Rule::string("-"),
])
]),
Rule::pattern("[0-9]+"),
])
])
]),
]),
],
separators: vec![],
examples: vec![
("12", Some((0, "12"))),
("12e", Some((0, "12"))),
("12g", Some((0, "12"))),
("12e3", Some((0, "12e3"))),
("12e+", Some((0, "12"))),
("12E+34 +", Some((0, "12E+34"))),
("12e34", Some((0, "12e34"))),
],
},
];
for Row {