Fix bugs in handling tokens that overlap with separators

This commit is contained in:
Max Brunsfeld 2019-01-15 12:13:42 -08:00
parent b799b46f79
commit d8ab36b2a5
6 changed files with 98 additions and 56 deletions

View file

@ -191,6 +191,7 @@ impl<'a> LexTableBuilder<'a> {
);
let transitions = self.cursor.transitions();
let has_sep = self.cursor.transition_chars().any(|(_, sep)| sep);
info!("lex state: {}, transitions: {:?}", state_id, transitions);
// If EOF is a valid lookahead token, add a transition predicated on the null
@ -214,12 +215,23 @@ impl<'a> LexTableBuilder<'a> {
is_separator,
} in transitions
{
if let Some((_, completed_precedence)) = completion {
if precedence < completed_precedence
|| (precedence == completed_precedence && is_separator)
{
if let Some((completed_id, completed_precedence)) = completion {
if precedence < completed_precedence {
continue;
}
if precedence == completed_precedence {
if is_separator {
continue;
}
if has_sep && self.lexical_grammar
.variable_indices_for_nfa_states(&states)
.position(|i| i == completed_id)
.is_none()
{
continue;
}
}
}
let (next_state_id, _) = self.add_state(states, eof_valid && is_separator);
let next_state = if next_state_id == state_id {

View file

@ -58,7 +58,7 @@ impl<'a> TokenConflictMap<'a> {
pub fn does_conflict(&self, i: usize, j: usize) -> bool {
let entry = &self.status_matrix[matrix_index(self.n, i, j)];
entry.does_match_valid_continuation || entry.does_match_separators
entry.does_match_valid_continuation || entry.does_match_separators || entry.matches_same_string
}
pub fn does_overlap(&self, i: usize, j: usize) -> bool {
@ -176,7 +176,7 @@ fn compute_conflict_status(
while let Some(state_set) = state_set_queue.pop() {
// Don't pursue states where there's no potential for conflict.
if variable_ids_for_states(&state_set, grammar).count() > 1 {
if grammar.variable_indices_for_nfa_states(&state_set).count() > 1 {
cursor.reset(state_set);
} else {
continue;
@ -226,7 +226,7 @@ fn compute_conflict_status(
if let Some((completed_id, completed_precedence)) = completion {
let mut other_id = None;
let mut successor_contains_completed_id = false;
for variable_id in variable_ids_for_states(&states, grammar) {
for variable_id in grammar.variable_indices_for_nfa_states(&states) {
if variable_id == completed_id {
successor_contains_completed_id = true;
break;
@ -269,22 +269,6 @@ fn compute_conflict_status(
result
}
fn variable_ids_for_states<'a>(
state_ids: &'a Vec<u32>,
grammar: &'a LexicalGrammar,
) -> impl Iterator<Item = usize> + 'a {
let mut prev = None;
state_ids.iter().filter_map(move |state_id| {
let variable_id = grammar.variable_index_for_nfa_state(*state_id);
if prev != Some(variable_id) {
prev = Some(variable_id);
prev
} else {
None
}
})
}
#[cfg(test)]
mod tests {
use super::*;

View file

@ -175,8 +175,27 @@ impl Variable {
}
impl LexicalGrammar {
pub fn variable_indices_for_nfa_states<'a>(
&'a self,
state_ids: &'a Vec<u32>,
) -> impl Iterator<Item = usize> + 'a {
let mut prev = None;
state_ids.iter().filter_map(move |state_id| {
let variable_id = self.variable_index_for_nfa_state(*state_id);
if prev != Some(variable_id) {
prev = Some(variable_id);
prev
} else {
None
}
})
}
pub fn variable_index_for_nfa_state(&self, state_id: u32) -> usize {
self.variables.iter().position(|v| v.start_state >= state_id).unwrap()
self.variables
.iter()
.position(|v| v.start_state >= state_id)
.unwrap()
}
}

View file

@ -374,7 +374,7 @@ impl<'a> NfaCursor<'a> {
}
let intersection_transition = NfaTransition {
characters: intersection,
is_separator: result[i].is_separator || is_sep,
is_separator: result[i].is_separator && is_sep,
precedence: max(result[i].precedence, prec),
states: intersection_states,
};