Merge lex states more liberally

2019-06-20 14:05:15 -07:00 · 2019-06-20 14:05:15 -07:00 · 70dc79b412
commit 70dc79b412
parent fe6a69a626
3 changed files with 57 additions and 25 deletions
--- a/cli/src/generate/build_tables/build_lex_table.rs
+++ b/cli/src/generate/build_tables/build_lex_table.rs
@ -251,11 +251,16 @@ fn merge_token_set(
        };

        for existing_token in set_without_terminal.terminals() {
-            if token_conflict_map.does_conflict(i, existing_token.index)
-                || !coincident_token_index.contains(symbol, existing_token)
-            {
+            if token_conflict_map.does_conflict(i, existing_token.index) ||
+               token_conflict_map.does_match_prefix(i, existing_token.index) {
                return false;
            }
+            if !coincident_token_index.contains(symbol, existing_token) {
+                if token_conflict_map.does_overlap(existing_token.index, i) ||
+                   token_conflict_map.does_overlap(i, existing_token.index) {
+                    return false;
+                }
+            }
        }
    }

--- a/cli/src/generate/build_tables/build_parse_table.rs
+++ b/cli/src/generate/build_tables/build_parse_table.rs
@ -745,6 +745,7 @@ fn populate_following_tokens(
        .iter()
        .flat_map(|v| &v.productions)
        .chain(&inlines.productions);
+    let all_tokens = (0..result.len()).into_iter().map(Symbol::terminal).collect::<TokenSet>();
    for production in productions {
        for i in 1..production.steps.len() {
            let left_tokens = builder.last_set(&production.steps[i - 1].symbol);
@ -756,6 +757,14 @@ fn populate_following_tokens(
            }
        }
    }
+    for extra in &grammar.extra_tokens {
+        if extra.is_terminal() {
+            for entry in result.iter_mut() {
+                entry.insert(*extra);
+            }
+            result[extra.index] = all_tokens.clone();
+        }
+    }
 }

 pub(crate) fn build_parse_table(
--- a/cli/src/generate/build_tables/token_conflicts.rs
+++ b/cli/src/generate/build_tables/token_conflicts.rs
@ -7,7 +7,8 @@ use std::fmt;

 #[derive(Clone, Debug, Default, PartialEq, Eq)]
 struct TokenConflictStatus {
-    does_overlap: bool,
+    matches_prefix: bool,
+    does_match_continuation: bool,
    does_match_valid_continuation: bool,
    does_match_separators: bool,
    matches_same_string: bool,
@ -65,6 +66,10 @@ impl<'a> TokenConflictMap<'a> {
            || entry.matches_same_string
    }

+    pub fn does_match_prefix(&self, i: usize, j: usize) -> bool {
+        self.status_matrix[matrix_index(self.n, i, j)].matches_prefix
+    }
+
    pub fn does_match_shorter_or_longer(&self, i: usize, j: usize) -> bool {
        let entry = &self.status_matrix[matrix_index(self.n, i, j)];
        let reverse_entry = &self.status_matrix[matrix_index(self.n, j, i)];
@ -73,7 +78,11 @@ impl<'a> TokenConflictMap<'a> {
    }

    pub fn does_overlap(&self, i: usize, j: usize) -> bool {
-        self.status_matrix[matrix_index(self.n, i, j)].does_overlap
+        let status = &self.status_matrix[matrix_index(self.n, i, j)];
+        status.does_match_separators ||
+            status.matches_prefix ||
+            status.matches_same_string ||
+            status.does_match_continuation
    }

    pub fn prefer_token(grammar: &LexicalGrammar, left: (i32, usize), right: (i32, usize)) -> bool {
@ -238,6 +247,7 @@ fn compute_conflict_status(

        let has_sep = cursor.transition_chars().any(|(_, sep)| sep);

+        // Examine each possible completed token in this state.
        let mut completion = None;
        for (id, precedence) in cursor.completions() {
            if has_sep {
@ -248,13 +258,14 @@ fn compute_conflict_status(
                }
            }

+            // If the other token has already completed, then this is
+            // a same-string conflict.
            if let Some((prev_id, prev_precedence)) = completion {
                if id == prev_id {
                    continue;
                }

-                // Prefer tokens with higher precedence. For tokens with equal precedence,
-                // prefer those listed earlier in the grammar.
+                // Determine which of the two tokens is preferred.
                let preferred_id;
                if TokenConflictMap::prefer_token(
                    grammar,
@ -269,32 +280,37 @@ fn compute_conflict_status(

                if preferred_id == i {
                    result.0.matches_same_string = true;
-                    result.0.does_overlap = true;
                } else {
                    result.1.matches_same_string = true;
-                    result.1.does_overlap = true;
                }
            } else {
                completion = Some((id, precedence));
            }
        }

+        // Examine each possible transition from this state to detect substring conflicts.
        for transition in cursor.transitions() {
            let mut can_advance = true;
+
+            // If there is already a completed token in this state, then determine
+            // if the next state can also match the completed token. If so, then
+            // this is *not* a conflict.
            if let Some((completed_id, completed_precedence)) = completion {
-                let mut other_id = None;
+                let mut advanced_id = None;
                let mut successor_contains_completed_id = false;
                for variable_id in grammar.variable_indices_for_nfa_states(&transition.states) {
                    if variable_id == completed_id {
                        successor_contains_completed_id = true;
                        break;
                    } else {
-                        other_id = Some(variable_id);
+                        advanced_id = Some(variable_id);
                    }
                }

-                if let (Some(other_id), false) = (other_id, successor_contains_completed_id) {
-                    let preferred_id = if TokenConflictMap::prefer_transition(
+                // Determine which action is preferred: matching the already complete
+                // token, or continuing on to try and match the other longer token.
+                if let (Some(advanced_id), false) = (advanced_id, successor_contains_completed_id) {
+                    if TokenConflictMap::prefer_transition(
                        grammar,
                        &transition,
                        completed_id,
@ -302,20 +318,22 @@ fn compute_conflict_status(
                        has_sep,
                    ) {
                        can_advance = true;
-                        other_id
-                    } else {
-                        completed_id
-                    };
-
-                    if preferred_id == i {
-                        result.0.does_overlap = true;
-                        if transition.characters.does_intersect(&following_chars[j]) {
-                            result.0.does_match_valid_continuation = true;
+                        if advanced_id == i {
+                            result.0.does_match_continuation = true;
+                            if transition.characters.does_intersect(&following_chars[j]) {
+                                result.0.does_match_valid_continuation = true;
+                            }
+                        } else {
+                            result.1.does_match_continuation = true;
+                            if transition.characters.does_intersect(&following_chars[i]) {
+                                result.1.does_match_valid_continuation = true;
+                            }
                        }
                    } else {
-                        result.1.does_overlap = true;
-                        if transition.characters.does_intersect(&following_chars[i]) {
-                            result.1.does_match_valid_continuation = true;
+                        if completed_id == i {
+                            result.0.matches_prefix = true;
+                        } else {
+                            result.1.matches_prefix = true;
                        }
                    }
                }