From 5b38ff5f78e6ebb3003860c8f9262d3ee66c51b1 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 19 Jun 2019 21:36:01 -0700 Subject: [PATCH] Loosen lex state equality check to catch some spurious duplicates --- .../generate/build_tables/build_lex_table.rs | 20 ++++------ cli/src/generate/render.rs | 15 ++++---- cli/src/generate/tables.rs | 38 ++++++++++++++++++- 3 files changed, 52 insertions(+), 21 deletions(-) diff --git a/cli/src/generate/build_tables/build_lex_table.rs b/cli/src/generate/build_tables/build_lex_table.rs index ef4b3e5e..1985badd 100644 --- a/cli/src/generate/build_tables/build_lex_table.rs +++ b/cli/src/generate/build_tables/build_lex_table.rs @@ -195,7 +195,7 @@ impl<'a> LexTableBuilder<'a> { self.table.states[state_id].advance_actions.push(( CharacterSet::empty().add_char('\0'), AdvanceAction { - state: Some(next_state_id), + state: next_state_id, in_main_token: true, }, )); @@ -216,15 +216,10 @@ impl<'a> LexTableBuilder<'a> { let (next_state_id, _) = self.add_state(transition.states, eof_valid && transition.is_separator); - let next_state = if next_state_id == state_id { - None - } else { - Some(next_state_id) - }; self.table.states[state_id].advance_actions.push(( transition.characters, AdvanceAction { - state: next_state, + state: next_state_id, in_main_token: !transition.is_separator, }, )); @@ -282,7 +277,7 @@ fn minimize_lex_table(table: &mut LexTable, parse_table: &mut ParseTable) { if state_replacements.contains_key(&j) { continue; } - if state_i == state_j { + if state_i.equals(state_j, i, j) { info!("replace state {} with state {}", i, j); state_replacements.insert(i, j); done = false; @@ -292,9 +287,10 @@ fn minimize_lex_table(table: &mut LexTable, parse_table: &mut ParseTable) { } for state in table.states.iter_mut() { for (_, advance_action) in state.advance_actions.iter_mut() { - advance_action.state = advance_action - .state - .map(|s| state_replacements.get(&s).cloned().unwrap_or(s)) + advance_action.state = state_replacements + .get(&advance_action.state) + .cloned() + .unwrap_or(advance_action.state); } } } @@ -320,7 +316,7 @@ fn minimize_lex_table(table: &mut LexTable, parse_table: &mut ParseTable) { for state in table.states.iter_mut() { for (_, advance_action) in state.advance_actions.iter_mut() { - advance_action.state = advance_action.state.map(|s| final_state_replacements[s]); + advance_action.state = final_state_replacements[advance_action.state]; } } diff --git a/cli/src/generate/render.rs b/cli/src/generate/render.rs index f286e2a2..fbc29408 100644 --- a/cli/src/generate/render.rs +++ b/cli/src/generate/render.rs @@ -471,7 +471,7 @@ impl Generator { for (i, state) in lex_table.states.into_iter().enumerate() { add_line!(self, "case {}:", i); indent!(self); - self.add_lex_state(i, state); + self.add_lex_state(state); dedent!(self); } @@ -487,7 +487,7 @@ impl Generator { add_line!(self, ""); } - fn add_lex_state(&mut self, index: usize, state: LexState) { + fn add_lex_state(&mut self, state: LexState) { if let Some(accept_action) = state.accept_action { add_line!(self, "ACCEPT_TOKEN({});", self.symbol_ids[&accept_action]); } @@ -500,13 +500,13 @@ impl Generator { add!(self, "if ("); if self.add_character_set_condition(&characters, &ruled_out_characters) { add!(self, ") "); - self.add_advance_action(index, &action); + self.add_advance_action(&action); if let CharacterSet::Include(chars) = characters { ruled_out_characters.extend(chars.iter().map(|c| *c as u32)); } } else { self.buffer.truncate(previous_length); - self.add_advance_action(index, &action); + self.add_advance_action(&action); } add!(self, "\n"); } @@ -619,12 +619,11 @@ impl Generator { }) } - fn add_advance_action(&mut self, index: usize, action: &AdvanceAction) { - let state_id = action.state.unwrap_or(index); + fn add_advance_action(&mut self, action: &AdvanceAction) { if action.in_main_token { - add!(self, "ADVANCE({});", state_id); + add!(self, "ADVANCE({});", action.state); } else { - add!(self, "SKIP({})", state_id); + add!(self, "SKIP({})", action.state); } } diff --git a/cli/src/generate/tables.rs b/cli/src/generate/tables.rs index 4234dd31..e04c4ae2 100644 --- a/cli/src/generate/tables.rs +++ b/cli/src/generate/tables.rs @@ -63,7 +63,7 @@ pub(crate) struct ParseTable { #[derive(Clone, Debug, PartialEq, Eq)] pub(crate) struct AdvanceAction { - pub state: Option, + pub state: LexStateId, pub in_main_token: bool, } @@ -152,3 +152,39 @@ impl ParseAction { } } } + +impl LexState { + pub fn equals(&self, other: &LexState, left_state: usize, right_state: usize) -> bool { + if self.accept_action != other.accept_action { + return false; + } + + if self.advance_actions.len() != other.advance_actions.len() { + return false; + } + + for (left, right) in self + .advance_actions + .iter() + .zip(other.advance_actions.iter()) + { + if left.0 != right.0 || left.1.in_main_token != right.1.in_main_token { + return false; + } + + let left_successor = left.1.state; + let right_successor = right.1.state; + + // Two states can be equal if they have different successors but the successor + // states are equal. + if left_successor != right_successor + && (left_successor != left_state || right_successor != right_state) + && (left_successor != right_state || right_successor != left_state) + { + return false; + } + } + + true + } +}