Fix bugs in handling tokens that overlap with separators

2019-01-15 12:13:42 -08:00 · 2019-01-15 12:13:42 -08:00 · d8ab36b2a5
commit d8ab36b2a5
parent b799b46f79
6 changed files with 98 additions and 56 deletions
--- a/cli/src/generate/build_tables/build_lex_table.rs
+++ b/cli/src/generate/build_tables/build_lex_table.rs
@ -191,6 +191,7 @@ impl<'a> LexTableBuilder<'a> {
        );

        let transitions = self.cursor.transitions();
+        let has_sep = self.cursor.transition_chars().any(|(_, sep)| sep);
        info!("lex state: {}, transitions: {:?}", state_id, transitions);

        // If EOF is a valid lookahead token, add a transition predicated on the null
@ -214,12 +215,23 @@ impl<'a> LexTableBuilder<'a> {
            is_separator,
        } in transitions
        {
-            if let Some((_, completed_precedence)) = completion {
-                if precedence < completed_precedence
-                    || (precedence == completed_precedence && is_separator)
-                {
+            if let Some((completed_id, completed_precedence)) = completion {
+                if precedence < completed_precedence {
                    continue;
                }
+
+                if precedence == completed_precedence {
+                    if is_separator {
+                        continue;
+                    }
+                    if has_sep && self.lexical_grammar
+                        .variable_indices_for_nfa_states(&states)
+                        .position(|i| i == completed_id)
+                        .is_none()
+                    {
+                        continue;
+                    }
+                }
            }
            let (next_state_id, _) = self.add_state(states, eof_valid && is_separator);
            let next_state = if next_state_id == state_id {
--- a/cli/src/generate/build_tables/token_conflicts.rs
+++ b/cli/src/generate/build_tables/token_conflicts.rs
@ -58,7 +58,7 @@ impl<'a> TokenConflictMap<'a> {

    pub fn does_conflict(&self, i: usize, j: usize) -> bool {
        let entry = &self.status_matrix[matrix_index(self.n, i, j)];
-        entry.does_match_valid_continuation || entry.does_match_separators
+        entry.does_match_valid_continuation || entry.does_match_separators || entry.matches_same_string
    }

    pub fn does_overlap(&self, i: usize, j: usize) -> bool {
@ -176,7 +176,7 @@ fn compute_conflict_status(

    while let Some(state_set) = state_set_queue.pop() {
        // Don't pursue states where there's no potential for conflict.
-        if variable_ids_for_states(&state_set, grammar).count() > 1 {
+        if grammar.variable_indices_for_nfa_states(&state_set).count() > 1 {
            cursor.reset(state_set);
        } else {
            continue;
@ -226,7 +226,7 @@ fn compute_conflict_status(
            if let Some((completed_id, completed_precedence)) = completion {
                let mut other_id = None;
                let mut successor_contains_completed_id = false;
-                for variable_id in variable_ids_for_states(&states, grammar) {
+                for variable_id in grammar.variable_indices_for_nfa_states(&states) {
                    if variable_id == completed_id {
                        successor_contains_completed_id = true;
                        break;
@ -269,22 +269,6 @@ fn compute_conflict_status(
    result
 }

-fn variable_ids_for_states<'a>(
-    state_ids: &'a Vec<u32>,
-    grammar: &'a LexicalGrammar,
-) -> impl Iterator<Item = usize> + 'a {
-    let mut prev = None;
-    state_ids.iter().filter_map(move |state_id| {
-        let variable_id = grammar.variable_index_for_nfa_state(*state_id);
-        if prev != Some(variable_id) {
-            prev = Some(variable_id);
-            prev
-        } else {
-            None
-        }
-    })
-}
-
 #[cfg(test)]
 mod tests {
    use super::*;
--- a/cli/src/generate/grammars.rs
+++ b/cli/src/generate/grammars.rs
@ -175,8 +175,27 @@ impl Variable {
 }

 impl LexicalGrammar {
+    pub fn variable_indices_for_nfa_states<'a>(
+        &'a self,
+        state_ids: &'a Vec<u32>,
+    ) -> impl Iterator<Item = usize> + 'a {
+        let mut prev = None;
+        state_ids.iter().filter_map(move |state_id| {
+            let variable_id = self.variable_index_for_nfa_state(*state_id);
+            if prev != Some(variable_id) {
+                prev = Some(variable_id);
+                prev
+            } else {
+                None
+            }
+        })
+    }
+
    pub fn variable_index_for_nfa_state(&self, state_id: u32) -> usize {
-        self.variables.iter().position(|v| v.start_state >= state_id).unwrap()
+        self.variables
+            .iter()
+            .position(|v| v.start_state >= state_id)
+            .unwrap()
    }
 }

--- a/cli/src/generate/nfa.rs
+++ b/cli/src/generate/nfa.rs
@ -374,7 +374,7 @@ impl<'a> NfaCursor<'a> {
                    }
                    let intersection_transition = NfaTransition {
                        characters: intersection,
-                        is_separator: result[i].is_separator || is_sep,
+                        is_separator: result[i].is_separator && is_sep,
                        precedence: max(result[i].precedence, prec),
                        states: intersection_states,
                    };