Loosen criteria for identifying conflict-free tokens for error recovery

This commit is contained in:
Max Brunsfeld 2019-01-20 16:58:31 -08:00
parent 9e610bf88e
commit f6cdd5e3d4
2 changed files with 74 additions and 12 deletions

View file

@ -86,14 +86,14 @@ fn populate_error_state(
let n = lexical_grammar.variables.len();
// First identify the *conflict-free tokens*: tokens that do not overlap with
// any other token in any way.
// any other token in any way, besides matching exactly the same string.
let conflict_free_tokens: TokenSet = (0..n)
.into_iter()
.filter_map(|i| {
let conflicts_with_other_tokens = (0..n).into_iter().any(|j| {
j != i
&& !coincident_token_index.contains(Symbol::terminal(i), Symbol::terminal(j))
&& token_conflict_map.does_conflict(i, j)
&& token_conflict_map.does_match_shorter_or_longer(i, j)
});
if conflicts_with_other_tokens {
None

View file

@ -65,6 +65,13 @@ impl<'a> TokenConflictMap<'a> {
|| entry.matches_same_string
}
pub fn does_match_shorter_or_longer(&self, i: usize, j: usize) -> bool {
let entry = &self.status_matrix[matrix_index(self.n, i, j)];
let reverse_entry = &self.status_matrix[matrix_index(self.n, j, i)];
(entry.does_match_valid_continuation || entry.does_match_separators)
&& !reverse_entry.does_match_separators
}
pub fn does_overlap(&self, i: usize, j: usize) -> bool {
self.status_matrix[matrix_index(self.n, i, j)].does_overlap
}
@ -135,8 +142,7 @@ impl<'a> fmt::Debug for TokenConflictMap<'a> {
write!(
f,
" {:?}: {:?},\n",
self.grammar.variables[i].name,
self.starting_chars_by_index[i]
self.grammar.variables[i].name, self.starting_chars_by_index[i]
)?;
}
write!(f, " }},\n")?;
@ -230,8 +236,18 @@ fn compute_conflict_status(
continue;
}
let has_sep = cursor.transition_chars().any(|(_, sep)| sep);
let mut completion = None;
for (id, precedence) in cursor.completions() {
if has_sep {
if id == i {
result.0.does_match_separators = true;
} else {
result.1.does_match_separators = true;
}
}
if let Some((prev_id, prev_precedence)) = completion {
if id == prev_id {
continue;
@ -263,8 +279,6 @@ fn compute_conflict_status(
}
}
let has_sep = cursor.transition_chars().any(|(_, sep)| sep);
for transition in cursor.transitions() {
let mut can_advance = true;
if let Some((completed_id, completed_precedence)) = completion {
@ -298,17 +312,11 @@ fn compute_conflict_status(
if transition.characters.does_intersect(&following_chars[j]) {
result.0.does_match_valid_continuation = true;
}
if transition.is_separator || has_sep {
result.0.does_match_separators = true;
}
} else {
result.1.does_overlap = true;
if transition.characters.does_intersect(&following_chars[i]) {
result.1.does_match_valid_continuation = true;
}
if transition.is_separator || has_sep {
result.1.does_match_separators = true;
}
}
}
}
@ -414,6 +422,60 @@ mod tests {
assert!(token_map.does_conflict(var("instanceof"), var("in")));
}
#[test]
fn test_token_conflicts_with_separators() {
let grammar = expand_tokens(ExtractedLexicalGrammar {
separators: vec![Rule::pattern("\\s")],
variables: vec![
Variable {
name: "x".to_string(),
kind: VariableType::Named,
rule: Rule::string("x"),
},
Variable {
name: "newline".to_string(),
kind: VariableType::Named,
rule: Rule::string("\n"),
},
],
})
.unwrap();
let var = |name| index_of_var(&grammar, name);
let token_map = TokenConflictMap::new(&grammar, vec![TokenSet::new(); 4]);
assert!(token_map.does_conflict(var("newline"), var("x")));
assert!(!token_map.does_conflict(var("x"), var("newline")));
}
#[test]
fn test_token_conflicts_with_open_ended_tokens() {
let grammar = expand_tokens(ExtractedLexicalGrammar {
separators: vec![Rule::pattern("\\s")],
variables: vec![
Variable {
name: "x".to_string(),
kind: VariableType::Named,
rule: Rule::string("x"),
},
Variable {
name: "anything".to_string(),
kind: VariableType::Named,
rule: Rule::prec(-1, Rule::pattern(".*")),
},
],
})
.unwrap();
let var = |name| index_of_var(&grammar, name);
let token_map = TokenConflictMap::new(&grammar, vec![TokenSet::new(); 4]);
assert!(token_map.does_match_shorter_or_longer(var("anything"), var("x")));
assert!(!token_map.does_match_shorter_or_longer(var("x"), var("anything")));
}
fn index_of_var(grammar: &LexicalGrammar, name: &str) -> usize {
grammar
.variables