Represent nfa transitions as structs with named fields, not tuples

This commit is contained in:
Max Brunsfeld 2019-01-04 09:42:06 -08:00
parent cc0fbc0d93
commit d845b81ee9
5 changed files with 211 additions and 116 deletions

View file

@ -1,7 +1,7 @@
use super::item::LookaheadSet;
use super::token_conflicts::TokenConflictMap;
use crate::grammars::{LexicalGrammar, SyntaxGrammar};
use crate::nfa::{CharacterSet, NfaCursor};
use crate::nfa::{CharacterSet, NfaCursor, NfaTransition};
use crate::rules::Symbol;
use crate::tables::{AdvanceAction, LexState, LexTable, ParseTable};
use std::collections::hash_map::Entry;
@ -157,8 +157,8 @@ impl<'a> LexTableBuilder<'a> {
completion.map(|(id, prec)| (&self.lexical_grammar.variables[id].name, prec))
);
let successors = self.cursor.grouped_successors();
info!("lex state: {}, successors: {:?}", state_id, successors);
let transitions = self.cursor.transitions();
info!("lex state: {}, transitions: {:?}", state_id, transitions);
// If EOF is a valid lookahead token, add a transition predicated on the null
// character that leads to the empty set of NFA states.
@ -174,20 +174,26 @@ impl<'a> LexTableBuilder<'a> {
));
}
for (chars, advance_precedence, next_states, is_sep) in successors {
for NfaTransition {
characters,
precedence,
states,
is_separator,
} in transitions
{
if let Some((_, completed_precedence)) = completion {
if advance_precedence < completed_precedence
|| (advance_precedence == completed_precedence && is_sep)
if precedence < completed_precedence
|| (precedence == completed_precedence && is_separator)
{
continue;
}
}
let (next_state_id, _) = self.add_state(next_states, eof_valid && is_sep);
let (next_state_id, _) = self.add_state(states, eof_valid && is_separator);
self.table.states[state_id].advance_actions.push((
chars,
characters,
AdvanceAction {
state: next_state_id,
in_main_token: !is_sep,
in_main_token: !is_separator,
},
));
}

View file

@ -239,7 +239,7 @@ fn identify_keywords(
}
fn all_chars_are_alphabetical(cursor: &NfaCursor) -> bool {
cursor.successors().all(|(chars, _, _, is_sep)| {
cursor.transition_chars().all(|(chars, is_sep)| {
if is_sep {
true
} else if let CharacterSet::Include(chars) = chars {

View file

@ -1,6 +1,6 @@
use crate::build_tables::item::LookaheadSet;
use crate::grammars::LexicalGrammar;
use crate::nfa::{CharacterSet, NfaCursor};
use crate::nfa::{CharacterSet, NfaCursor, NfaTransition};
use hashbrown::HashSet;
use std::cmp::Ordering;
use std::fmt;
@ -131,7 +131,7 @@ fn get_starting_chars(cursor: &mut NfaCursor, grammar: &LexicalGrammar) -> Vec<C
for variable in &grammar.variables {
cursor.reset(vec![variable.start_state]);
let mut all_chars = CharacterSet::empty();
for (chars, _, _, _) in cursor.successors() {
for (chars, _) in cursor.transition_chars() {
all_chars = all_chars.add(chars);
}
result.push(all_chars);
@ -215,12 +215,18 @@ fn compute_conflict_status(
}
}
for (chars, advance_precedence, next_states, in_sep) in cursor.grouped_successors() {
for NfaTransition {
characters,
precedence,
states,
is_separator,
} in cursor.transitions()
{
let mut can_advance = true;
if let Some((completed_id, completed_precedence)) = completion {
let mut other_id = None;
let mut successor_contains_completed_id = false;
for variable_id in variable_ids_for_states(&next_states, grammar) {
for variable_id in variable_ids_for_states(&states, grammar) {
if variable_id == completed_id {
successor_contains_completed_id = true;
break;
@ -231,7 +237,7 @@ fn compute_conflict_status(
if let (Some(other_id), false) = (other_id, successor_contains_completed_id) {
let winning_id;
if advance_precedence < completed_precedence {
if precedence < completed_precedence {
winning_id = completed_id;
can_advance = false;
} else {
@ -240,23 +246,23 @@ fn compute_conflict_status(
if winning_id == i {
result.0.does_overlap = true;
if chars.does_intersect(&following_chars[j]) {
if characters.does_intersect(&following_chars[j]) {
result.0.does_match_valid_continuation = true;
}
if in_sep {
if is_separator {
result.0.does_match_separators = true;
}
} else {
result.1.does_overlap = true;
if chars.does_intersect(&following_chars[i]) {
if characters.does_intersect(&following_chars[i]) {
result.1.does_match_valid_continuation = true;
}
}
}
}
if can_advance && visited_state_sets.insert(next_states.clone()) {
state_set_queue.push(next_states);
if can_advance && visited_state_sets.insert(states.clone()) {
state_set_queue.push(states);
}
}
}

View file

@ -30,18 +30,26 @@ pub struct Nfa {
pub states: Vec<NfaState>,
}
impl Default for Nfa {
fn default() -> Self {
Self { states: Vec::new() }
}
}
#[derive(Debug)]
pub struct NfaCursor<'a> {
pub(crate) state_ids: Vec<u32>,
nfa: &'a Nfa,
}
#[derive(Debug, PartialEq, Eq)]
pub struct NfaTransition {
pub characters: CharacterSet,
pub is_separator: bool,
pub precedence: i32,
pub states: Vec<u32>,
}
impl Default for Nfa {
fn default() -> Self {
Self { states: Vec::new() }
}
}
impl CharacterSet {
pub fn empty() -> Self {
CharacterSet::Include(Vec::new())
@ -328,7 +336,15 @@ impl<'a> NfaCursor<'a> {
self.state_ids = states
}
pub fn successors(&self) -> impl Iterator<Item = (&CharacterSet, i32, u32, bool)> {
pub fn transition_chars(&self) -> impl Iterator<Item = (&CharacterSet, bool)> {
self.raw_transitions().map(|t| (t.0, t.1))
}
pub fn transitions(&self) -> Vec<NfaTransition> {
Self::group_transitions(self.raw_transitions())
}
fn raw_transitions(&self) -> impl Iterator<Item = (&CharacterSet, bool, i32, u32)> {
self.state_ids.iter().filter_map(move |id| {
if let NfaState::Advance {
chars,
@ -337,52 +353,53 @@ impl<'a> NfaCursor<'a> {
is_sep,
} = &self.nfa.states[*id as usize]
{
Some((chars, *precedence, *state_id, *is_sep))
Some((chars, *is_sep, *precedence, *state_id))
} else {
None
}
})
}
pub fn grouped_successors(&self) -> Vec<(CharacterSet, i32, Vec<u32>, bool)> {
Self::group_successors(self.successors())
}
fn group_successors<'b>(
iter: impl Iterator<Item = (&'b CharacterSet, i32, u32, bool)>,
) -> Vec<(CharacterSet, i32, Vec<u32>, bool)> {
let mut result: Vec<(CharacterSet, i32, Vec<u32>, bool)> = Vec::new();
for (chars, prec, state, is_sep) in iter {
fn group_transitions<'b>(
iter: impl Iterator<Item = (&'b CharacterSet, bool, i32, u32)>,
) -> Vec<NfaTransition> {
let mut result: Vec<NfaTransition> = Vec::new();
for (chars, is_sep, prec, state) in iter {
let mut chars = chars.clone();
let mut i = 0;
while i < result.len() && !chars.is_empty() {
let intersection = result[i].0.remove_intersection(&mut chars);
let intersection = result[i].characters.remove_intersection(&mut chars);
if !intersection.is_empty() {
let mut intersection_states = result[i].2.clone();
let mut intersection_states = result[i].states.clone();
match intersection_states.binary_search(&state) {
Err(j) => intersection_states.insert(j, state),
_ => {}
}
let intersection_entry = (
intersection,
max(result[i].1, prec),
intersection_states,
result[i].3 || is_sep,
);
if result[i].0.is_empty() {
result[i] = intersection_entry;
let intersection_transition = NfaTransition {
characters: intersection,
is_separator: result[i].is_separator || is_sep,
precedence: max(result[i].precedence, prec),
states: intersection_states,
};
if result[i].characters.is_empty() {
result[i] = intersection_transition;
} else {
result.insert(i, intersection_entry);
result.insert(i, intersection_transition);
i += 1;
}
}
i += 1;
}
if !chars.is_empty() {
result.push((chars, prec, vec![state], is_sep));
result.push(NfaTransition {
characters: chars,
precedence: prec,
states: vec![state],
is_separator: is_sep,
});
}
}
result.sort_unstable_by(|a, b| a.0.cmp(&b.0));
result.sort_unstable_by(|a, b| a.characters.cmp(&b.characters));
result
}
@ -435,111 +452,173 @@ mod tests {
use super::*;
#[test]
fn test_group_successors() {
fn test_group_transitions() {
let table = [
// overlapping character classes
(
vec![
(CharacterSet::empty().add_range('a', 'f'), 0, 1, false),
(CharacterSet::empty().add_range('d', 'i'), 1, 2, false),
(CharacterSet::empty().add_range('a', 'f'), false, 0, 1),
(CharacterSet::empty().add_range('d', 'i'), false, 1, 2),
],
vec![
(CharacterSet::empty().add_range('a', 'c'), 0, vec![1], false),
(
CharacterSet::empty().add_range('d', 'f'),
1,
vec![1, 2],
false,
),
(CharacterSet::empty().add_range('g', 'i'), 1, vec![2], false),
NfaTransition {
characters: CharacterSet::empty().add_range('a', 'c'),
is_separator: false,
precedence: 0,
states: vec![1],
},
NfaTransition {
characters: CharacterSet::empty().add_range('d', 'f'),
is_separator: false,
precedence: 1,
states: vec![1, 2],
},
NfaTransition {
characters: CharacterSet::empty().add_range('g', 'i'),
is_separator: false,
precedence: 1,
states: vec![2],
},
],
),
// large character class followed by many individual characters
(
vec![
(CharacterSet::empty().add_range('a', 'z'), 0, 1, false),
(CharacterSet::empty().add_char('d'), 0, 2, false),
(CharacterSet::empty().add_char('i'), 0, 3, false),
(CharacterSet::empty().add_char('f'), 0, 4, false),
(CharacterSet::empty().add_range('a', 'z'), false, 0, 1),
(CharacterSet::empty().add_char('d'), false, 0, 2),
(CharacterSet::empty().add_char('i'), false, 0, 3),
(CharacterSet::empty().add_char('f'), false, 0, 4),
],
vec![
(CharacterSet::empty().add_char('d'), 0, vec![1, 2], false),
(CharacterSet::empty().add_char('f'), 0, vec![1, 4], false),
(CharacterSet::empty().add_char('i'), 0, vec![1, 3], false),
(
CharacterSet::empty()
NfaTransition {
characters: CharacterSet::empty().add_char('d'),
is_separator: false,
precedence: 0,
states: vec![1, 2],
},
NfaTransition {
characters: CharacterSet::empty().add_char('f'),
is_separator: false,
precedence: 0,
states: vec![1, 4],
},
NfaTransition {
characters: CharacterSet::empty().add_char('i'),
is_separator: false,
precedence: 0,
states: vec![1, 3],
},
NfaTransition {
characters: CharacterSet::empty()
.add_range('a', 'c')
.add_char('e')
.add_range('g', 'h')
.add_range('j', 'z'),
0,
vec![1],
false,
),
is_separator: false,
precedence: 0,
states: vec![1],
},
],
),
// negated character class followed by an individual character
(
vec![
(CharacterSet::empty().add_char('0'), 0, 1, false),
(CharacterSet::empty().add_char('b'), 0, 2, false),
(CharacterSet::empty().add_char('0'), false, 0, 1),
(CharacterSet::empty().add_char('b'), false, 0, 2),
(
CharacterSet::empty().add_range('a', 'f').negate(),
false,
0,
3,
false,
),
(CharacterSet::empty().add_char('c'), 0, 4, false),
(CharacterSet::empty().add_char('c'), false, 0, 4),
],
vec![
(CharacterSet::empty().add_char('0'), 0, vec![1, 3], false),
(CharacterSet::empty().add_char('b'), 0, vec![2], false),
(CharacterSet::empty().add_char('c'), 0, vec![4], false),
(
CharacterSet::empty()
NfaTransition {
characters: CharacterSet::empty().add_char('0'),
precedence: 0,
states: vec![1, 3],
is_separator: false,
},
NfaTransition {
characters: CharacterSet::empty().add_char('b'),
precedence: 0,
states: vec![2],
is_separator: false,
},
NfaTransition {
characters: CharacterSet::empty().add_char('c'),
precedence: 0,
states: vec![4],
is_separator: false,
},
NfaTransition {
characters: CharacterSet::empty()
.add_range('a', 'f')
.add_char('0')
.negate(),
0,
vec![3],
false,
),
precedence: 0,
states: vec![3],
is_separator: false,
},
],
),
// multiple negated character classes
(
vec![
(CharacterSet::Include(vec!['a']), 0, 1, false),
(CharacterSet::Exclude(vec!['a', 'b', 'c']), 0, 2, false),
(CharacterSet::Include(vec!['g']), 0, 6, false),
(CharacterSet::Exclude(vec!['d', 'e', 'f']), 0, 3, false),
(CharacterSet::Exclude(vec!['g', 'h', 'i']), 0, 4, false),
(CharacterSet::Include(vec!['g']), 0, 5, false),
(CharacterSet::Include(vec!['a']), false, 0, 1),
(CharacterSet::Exclude(vec!['a', 'b', 'c']), false, 0, 2),
(CharacterSet::Include(vec!['g']), false, 0, 6),
(CharacterSet::Exclude(vec!['d', 'e', 'f']), false, 0, 3),
(CharacterSet::Exclude(vec!['g', 'h', 'i']), false, 0, 4),
(CharacterSet::Include(vec!['g']), false, 0, 5),
],
vec![
(CharacterSet::Include(vec!['a']), 0, vec![1, 3, 4], false),
(CharacterSet::Include(vec!['g']), 0, vec![2, 3, 5, 6], false),
(CharacterSet::Include(vec!['b', 'c']), 0, vec![3, 4], false),
(CharacterSet::Include(vec!['h', 'i']), 0, vec![2, 3], false),
(
CharacterSet::Include(vec!['d', 'e', 'f']),
0,
vec![2, 4],
false,
),
(
CharacterSet::Exclude(vec!['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i']),
0,
vec![2, 3, 4],
false,
),
NfaTransition {
characters: CharacterSet::Include(vec!['a']),
precedence: 0,
states: vec![1, 3, 4],
is_separator: false,
},
NfaTransition {
characters: CharacterSet::Include(vec!['g']),
precedence: 0,
states: vec![2, 3, 5, 6],
is_separator: false,
},
NfaTransition {
characters: CharacterSet::Include(vec!['b', 'c']),
precedence: 0,
states: vec![3, 4],
is_separator: false,
},
NfaTransition {
characters: CharacterSet::Include(vec!['h', 'i']),
precedence: 0,
states: vec![2, 3],
is_separator: false,
},
NfaTransition {
characters: CharacterSet::Include(vec!['d', 'e', 'f']),
precedence: 0,
states: vec![2, 4],
is_separator: false,
},
NfaTransition {
characters: CharacterSet::Exclude(vec![
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i',
]),
precedence: 0,
states: vec![2, 3, 4],
is_separator: false,
},
],
),
];
for row in table.iter() {
assert_eq!(
NfaCursor::group_successors(row.0.iter().map(|(c, p, s, sep)| (c, *p, *s, *sep))),
NfaCursor::group_transitions(row.0.iter().map(|(c, sep, p, s)| (c, *sep, *p, *s))),
row.1
);
}

View file

@ -372,7 +372,7 @@ impl NfaBuilder {
mod tests {
use super::*;
use crate::grammars::Variable;
use crate::nfa::NfaCursor;
use crate::nfa::{NfaCursor, NfaTransition};
fn simulate_nfa<'a>(grammar: &'a LexicalGrammar, s: &'a str) -> Option<(usize, &'a str)> {
let start_states = grammar.variables.iter().map(|v| v.start_state).collect();
@ -389,14 +389,18 @@ mod tests {
result_precedence = precedence;
}
}
if let Some((_, _, next_states, in_sep)) = cursor
.grouped_successors()
if let Some(NfaTransition {
states,
is_separator,
..
}) = cursor
.transitions()
.into_iter()
.find(|(chars, prec, _, _)| chars.contains(c) && *prec >= result_precedence)
.find(|t| t.characters.contains(c) && t.precedence >= result_precedence)
{
cursor.reset(next_states);
cursor.reset(states);
end_char += 1;
if in_sep {
if is_separator {
start_char = end_char;
}
} else {