Loosen criteria for identifying conflict-free tokens for error recovery

2019-01-20 16:58:31 -08:00 · 2019-01-20 16:58:31 -08:00 · f6cdd5e3d4
commit f6cdd5e3d4
parent 9e610bf88e
2 changed files with 74 additions and 12 deletions
--- a/cli/src/generate/build_tables/mod.rs
+++ b/cli/src/generate/build_tables/mod.rs
@ -86,14 +86,14 @@ fn populate_error_state(
    let n = lexical_grammar.variables.len();

    // First identify the *conflict-free tokens*: tokens that do not overlap with
-    // any other token in any way.
+    // any other token in any way, besides matching exactly the same string.
    let conflict_free_tokens: TokenSet = (0..n)
        .into_iter()
        .filter_map(|i| {
            let conflicts_with_other_tokens = (0..n).into_iter().any(|j| {
                j != i
                    && !coincident_token_index.contains(Symbol::terminal(i), Symbol::terminal(j))
-                    && token_conflict_map.does_conflict(i, j)
+                    && token_conflict_map.does_match_shorter_or_longer(i, j)
            });
            if conflicts_with_other_tokens {
                None
--- a/cli/src/generate/build_tables/token_conflicts.rs
+++ b/cli/src/generate/build_tables/token_conflicts.rs
@ -65,6 +65,13 @@ impl<'a> TokenConflictMap<'a> {
            || entry.matches_same_string
    }

+    pub fn does_match_shorter_or_longer(&self, i: usize, j: usize) -> bool {
+        let entry = &self.status_matrix[matrix_index(self.n, i, j)];
+        let reverse_entry = &self.status_matrix[matrix_index(self.n, j, i)];
+        (entry.does_match_valid_continuation || entry.does_match_separators)
+            && !reverse_entry.does_match_separators
+    }
+
    pub fn does_overlap(&self, i: usize, j: usize) -> bool {
        self.status_matrix[matrix_index(self.n, i, j)].does_overlap
    }
@ -135,8 +142,7 @@ impl<'a> fmt::Debug for TokenConflictMap<'a> {
            write!(
                f,
                "    {:?}: {:?},\n",
-                self.grammar.variables[i].name,
-                self.starting_chars_by_index[i]
+                self.grammar.variables[i].name, self.starting_chars_by_index[i]
            )?;
        }
        write!(f, "  }},\n")?;
@ -230,8 +236,18 @@ fn compute_conflict_status(
            continue;
        }

+        let has_sep = cursor.transition_chars().any(|(_, sep)| sep);
+
        let mut completion = None;
        for (id, precedence) in cursor.completions() {
+            if has_sep {
+                if id == i {
+                    result.0.does_match_separators = true;
+                } else {
+                    result.1.does_match_separators = true;
+                }
+            }
+
            if let Some((prev_id, prev_precedence)) = completion {
                if id == prev_id {
                    continue;
@ -263,8 +279,6 @@ fn compute_conflict_status(
            }
        }

-        let has_sep = cursor.transition_chars().any(|(_, sep)| sep);
-
        for transition in cursor.transitions() {
            let mut can_advance = true;
            if let Some((completed_id, completed_precedence)) = completion {
@ -298,17 +312,11 @@ fn compute_conflict_status(
                        if transition.characters.does_intersect(&following_chars[j]) {
                            result.0.does_match_valid_continuation = true;
                        }
-                        if transition.is_separator || has_sep {
-                            result.0.does_match_separators = true;
-                        }
                    } else {
                        result.1.does_overlap = true;
                        if transition.characters.does_intersect(&following_chars[i]) {
                            result.1.does_match_valid_continuation = true;
                        }
-                        if transition.is_separator || has_sep {
-                            result.1.does_match_separators = true;
-                        }
                    }
                }
            }
@ -414,6 +422,60 @@ mod tests {
        assert!(token_map.does_conflict(var("instanceof"), var("in")));
    }

+    #[test]
+    fn test_token_conflicts_with_separators() {
+        let grammar = expand_tokens(ExtractedLexicalGrammar {
+            separators: vec![Rule::pattern("\\s")],
+            variables: vec![
+                Variable {
+                    name: "x".to_string(),
+                    kind: VariableType::Named,
+                    rule: Rule::string("x"),
+                },
+                Variable {
+                    name: "newline".to_string(),
+                    kind: VariableType::Named,
+                    rule: Rule::string("\n"),
+                },
+            ],
+        })
+        .unwrap();
+
+        let var = |name| index_of_var(&grammar, name);
+
+        let token_map = TokenConflictMap::new(&grammar, vec![TokenSet::new(); 4]);
+
+        assert!(token_map.does_conflict(var("newline"), var("x")));
+        assert!(!token_map.does_conflict(var("x"), var("newline")));
+    }
+
+    #[test]
+    fn test_token_conflicts_with_open_ended_tokens() {
+        let grammar = expand_tokens(ExtractedLexicalGrammar {
+            separators: vec![Rule::pattern("\\s")],
+            variables: vec![
+                Variable {
+                    name: "x".to_string(),
+                    kind: VariableType::Named,
+                    rule: Rule::string("x"),
+                },
+                Variable {
+                    name: "anything".to_string(),
+                    kind: VariableType::Named,
+                    rule: Rule::prec(-1, Rule::pattern(".*")),
+                },
+            ],
+        })
+        .unwrap();
+
+        let var = |name| index_of_var(&grammar, name);
+
+        let token_map = TokenConflictMap::new(&grammar, vec![TokenSet::new(); 4]);
+
+        assert!(token_map.does_match_shorter_or_longer(var("anything"), var("x")));
+        assert!(!token_map.does_match_shorter_or_longer(var("x"), var("anything")));
+    }
+
    fn index_of_var(grammar: &LexicalGrammar, name: &str) -> usize {
        grammar
            .variables