From ceff3936ef6e9231e2ea78e1edaaac8370f542f0 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 15 Jan 2019 16:10:52 -0800 Subject: [PATCH] Unify logic for handling tokens that match separators into one place --- .../generate/build_tables/build_lex_table.rs | 39 +++------ .../generate/build_tables/token_conflicts.rs | 85 +++++++++++++------ 2 files changed, 71 insertions(+), 53 deletions(-) diff --git a/cli/src/generate/build_tables/build_lex_table.rs b/cli/src/generate/build_tables/build_lex_table.rs index 15f09f6b..03ec0c7b 100644 --- a/cli/src/generate/build_tables/build_lex_table.rs +++ b/cli/src/generate/build_tables/build_lex_table.rs @@ -2,7 +2,7 @@ use super::coincident_tokens::CoincidentTokenIndex; use super::item::TokenSet; use super::token_conflicts::TokenConflictMap; use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar}; -use crate::generate::nfa::{CharacterSet, NfaCursor, NfaTransition}; +use crate::generate::nfa::{CharacterSet, NfaCursor}; use crate::generate::rules::Symbol; use crate::generate::tables::{AdvanceAction, LexState, LexTable, ParseStateId, ParseTable}; use std::collections::hash_map::Entry; @@ -208,42 +208,31 @@ impl<'a> LexTableBuilder<'a> { )); } - for NfaTransition { - characters, - precedence, - states, - is_separator, - } in transitions - { + for transition in transitions { if let Some((completed_id, completed_precedence)) = completion { - if precedence < completed_precedence { + if !TokenConflictMap::prefer_transition( + &self.lexical_grammar, + &transition, + completed_id, + completed_precedence, + has_sep, + ) { continue; } - - if precedence == completed_precedence { - if is_separator { - continue; - } - if has_sep && self.lexical_grammar - .variable_indices_for_nfa_states(&states) - .position(|i| i == completed_id) - .is_none() - { - continue; - } - } } - let (next_state_id, _) = self.add_state(states, eof_valid && is_separator); + + let (next_state_id, _) = + self.add_state(transition.states, eof_valid && transition.is_separator); let next_state = if next_state_id == state_id { None } else { Some(next_state_id) }; self.table.states[state_id].advance_actions.push(( - characters, + transition.characters, AdvanceAction { state: next_state, - in_main_token: !is_separator, + in_main_token: !transition.is_separator, }, )); } diff --git a/cli/src/generate/build_tables/token_conflicts.rs b/cli/src/generate/build_tables/token_conflicts.rs index df3d4250..13c69c19 100644 --- a/cli/src/generate/build_tables/token_conflicts.rs +++ b/cli/src/generate/build_tables/token_conflicts.rs @@ -58,7 +58,9 @@ impl<'a> TokenConflictMap<'a> { pub fn does_conflict(&self, i: usize, j: usize) -> bool { let entry = &self.status_matrix[matrix_index(self.n, i, j)]; - entry.does_match_valid_continuation || entry.does_match_separators || entry.matches_same_string + entry.does_match_valid_continuation + || entry.does_match_separators + || entry.matches_same_string } pub fn does_overlap(&self, i: usize, j: usize) -> bool { @@ -81,6 +83,32 @@ impl<'a> TokenConflictMap<'a> { Ordering::Equal => left.1 < right.1, } } + + pub fn prefer_transition( + grammar: &LexicalGrammar, + t: &NfaTransition, + completed_id: usize, + completed_precedence: i32, + has_separator_transitions: bool, + ) -> bool { + if t.precedence < completed_precedence { + return false; + } + if t.precedence == completed_precedence { + if t.is_separator { + return false; + } + if has_separator_transitions + && grammar + .variable_indices_for_nfa_states(&t.states) + .position(|i| i == completed_id) + .is_none() + { + return false; + } + } + true + } } impl<'a> fmt::Debug for TokenConflictMap<'a> { @@ -97,7 +125,7 @@ impl<'a> fmt::Debug for TokenConflictMap<'a> { for i in 0..self.n { write!( f, - " {}: {:?},\n", + " {:?}: {:?},\n", self.grammar.variables[i].name, self.following_chars_by_index[i] )?; } @@ -105,11 +133,11 @@ impl<'a> fmt::Debug for TokenConflictMap<'a> { write!(f, " status_matrix: {{\n")?; for i in 0..self.n { - write!(f, " {}: {{\n", self.grammar.variables[i].name)?; + write!(f, " {:?}: {{\n", self.grammar.variables[i].name)?; for j in 0..self.n { write!( f, - " {}: {:?},\n", + " {:?}: {:?},\n", self.grammar.variables[j].name, self.status_matrix[matrix_index(self.n, i, j)] )?; @@ -191,19 +219,19 @@ fn compute_conflict_status( // Prefer tokens with higher precedence. For tokens with equal precedence, // prefer those listed earlier in the grammar. - let winning_id; + let preferred_id; if TokenConflictMap::prefer_token( grammar, (prev_precedence, prev_id), (precedence, id), ) { - winning_id = prev_id; + preferred_id = prev_id; } else { - winning_id = id; + preferred_id = id; completion = Some((id, precedence)); } - if winning_id == i { + if preferred_id == i { result.0.matches_same_string = true; result.0.does_overlap = true; } else { @@ -215,18 +243,14 @@ fn compute_conflict_status( } } - for NfaTransition { - characters, - precedence, - states, - is_separator, - } in cursor.transitions() - { + let has_sep = cursor.transition_chars().any(|(_, sep)| sep); + + for transition in cursor.transitions() { let mut can_advance = true; if let Some((completed_id, completed_precedence)) = completion { let mut other_id = None; let mut successor_contains_completed_id = false; - for variable_id in grammar.variable_indices_for_nfa_states(&states) { + for variable_id in grammar.variable_indices_for_nfa_states(&transition.states) { if variable_id == completed_id { successor_contains_completed_id = true; break; @@ -236,33 +260,38 @@ fn compute_conflict_status( } if let (Some(other_id), false) = (other_id, successor_contains_completed_id) { - let winning_id; - if precedence < completed_precedence { - winning_id = completed_id; - can_advance = false; + let preferred_id = if TokenConflictMap::prefer_transition( + grammar, + &transition, + completed_id, + completed_precedence, + has_sep, + ) { + can_advance = true; + other_id } else { - winning_id = other_id; - } + completed_id + }; - if winning_id == i { + if preferred_id == i { result.0.does_overlap = true; - if characters.does_intersect(&following_chars[j]) { + if transition.characters.does_intersect(&following_chars[j]) { result.0.does_match_valid_continuation = true; } - if is_separator { + if transition.is_separator || has_sep { result.0.does_match_separators = true; } } else { result.1.does_overlap = true; - if characters.does_intersect(&following_chars[i]) { + if transition.characters.does_intersect(&following_chars[i]) { result.1.does_match_valid_continuation = true; } } } } - if can_advance && visited_state_sets.insert(states.clone()) { - state_set_queue.push(states); + if can_advance && visited_state_sets.insert(transition.states.clone()) { + state_set_queue.push(transition.states); } } }