Fix bugs in handling tokens that overlap with separators

This commit is contained in:
Max Brunsfeld 2019-01-15 12:13:42 -08:00
parent b799b46f79
commit d8ab36b2a5
6 changed files with 98 additions and 56 deletions

View file

@ -191,6 +191,7 @@ impl<'a> LexTableBuilder<'a> {
);
let transitions = self.cursor.transitions();
let has_sep = self.cursor.transition_chars().any(|(_, sep)| sep);
info!("lex state: {}, transitions: {:?}", state_id, transitions);
// If EOF is a valid lookahead token, add a transition predicated on the null
@ -214,12 +215,23 @@ impl<'a> LexTableBuilder<'a> {
is_separator,
} in transitions
{
if let Some((_, completed_precedence)) = completion {
if precedence < completed_precedence
|| (precedence == completed_precedence && is_separator)
{
if let Some((completed_id, completed_precedence)) = completion {
if precedence < completed_precedence {
continue;
}
if precedence == completed_precedence {
if is_separator {
continue;
}
if has_sep && self.lexical_grammar
.variable_indices_for_nfa_states(&states)
.position(|i| i == completed_id)
.is_none()
{
continue;
}
}
}
let (next_state_id, _) = self.add_state(states, eof_valid && is_separator);
let next_state = if next_state_id == state_id {

View file

@ -58,7 +58,7 @@ impl<'a> TokenConflictMap<'a> {
pub fn does_conflict(&self, i: usize, j: usize) -> bool {
let entry = &self.status_matrix[matrix_index(self.n, i, j)];
entry.does_match_valid_continuation || entry.does_match_separators
entry.does_match_valid_continuation || entry.does_match_separators || entry.matches_same_string
}
pub fn does_overlap(&self, i: usize, j: usize) -> bool {
@ -176,7 +176,7 @@ fn compute_conflict_status(
while let Some(state_set) = state_set_queue.pop() {
// Don't pursue states where there's no potential for conflict.
if variable_ids_for_states(&state_set, grammar).count() > 1 {
if grammar.variable_indices_for_nfa_states(&state_set).count() > 1 {
cursor.reset(state_set);
} else {
continue;
@ -226,7 +226,7 @@ fn compute_conflict_status(
if let Some((completed_id, completed_precedence)) = completion {
let mut other_id = None;
let mut successor_contains_completed_id = false;
for variable_id in variable_ids_for_states(&states, grammar) {
for variable_id in grammar.variable_indices_for_nfa_states(&states) {
if variable_id == completed_id {
successor_contains_completed_id = true;
break;
@ -269,22 +269,6 @@ fn compute_conflict_status(
result
}
fn variable_ids_for_states<'a>(
state_ids: &'a Vec<u32>,
grammar: &'a LexicalGrammar,
) -> impl Iterator<Item = usize> + 'a {
let mut prev = None;
state_ids.iter().filter_map(move |state_id| {
let variable_id = grammar.variable_index_for_nfa_state(*state_id);
if prev != Some(variable_id) {
prev = Some(variable_id);
prev
} else {
None
}
})
}
#[cfg(test)]
mod tests {
use super::*;

View file

@ -175,8 +175,27 @@ impl Variable {
}
impl LexicalGrammar {
pub fn variable_indices_for_nfa_states<'a>(
&'a self,
state_ids: &'a Vec<u32>,
) -> impl Iterator<Item = usize> + 'a {
let mut prev = None;
state_ids.iter().filter_map(move |state_id| {
let variable_id = self.variable_index_for_nfa_state(*state_id);
if prev != Some(variable_id) {
prev = Some(variable_id);
prev
} else {
None
}
})
}
pub fn variable_index_for_nfa_state(&self, state_id: u32) -> usize {
self.variables.iter().position(|v| v.start_state >= state_id).unwrap()
self.variables
.iter()
.position(|v| v.start_state >= state_id)
.unwrap()
}
}

View file

@ -374,7 +374,7 @@ impl<'a> NfaCursor<'a> {
}
let intersection_transition = NfaTransition {
characters: intersection,
is_separator: result[i].is_separator || is_sep,
is_separator: result[i].is_separator && is_sep,
precedence: max(result[i].precedence, prec),
states: intersection_states,
};

View file

@ -75,30 +75,10 @@ pub fn run_tests_at_path(
println!("{} failures:", failures.len())
}
println!(
"\n{} / {}",
Colour::Green.paint("expected"),
Colour::Red.paint("actual")
);
print_diff_key();
for (i, (name, actual, expected)) in failures.iter().enumerate() {
println!("\n {}. {}:", i + 1, name);
let changeset = Changeset::new(actual, expected, " ");
print!(" ");
for diff in &changeset.diffs {
match diff {
Difference::Same(part) => {
print!("{}{}", part, changeset.split);
}
Difference::Add(part) => {
print!("{}{}", Colour::Green.paint(part), changeset.split);
}
Difference::Rem(part) => {
print!("{}{}", Colour::Red.paint(part), changeset.split);
}
}
}
println!("");
print_diff(actual, expected);
}
}
@ -106,6 +86,33 @@ pub fn run_tests_at_path(
Ok(())
}
pub fn print_diff_key() {
println!(
"\n{} / {}",
Colour::Green.paint("expected"),
Colour::Red.paint("actual")
);
}
pub fn print_diff(actual: &String, expected: &String) {
let changeset = Changeset::new(actual, expected, " ");
print!(" ");
for diff in &changeset.diffs {
match diff {
Difference::Same(part) => {
print!("{}{}", part, changeset.split);
}
Difference::Add(part) => {
print!("{}{}", Colour::Green.paint(part), changeset.split);
}
Difference::Rem(part) => {
print!("{}{}", Colour::Red.paint(part), changeset.split);
}
}
}
println!("");
}
fn run_tests(
parser: &mut Parser,
test_entry: TestEntry,

View file

@ -1,6 +1,6 @@
use super::fixtures::{get_language, get_test_language, fixtures_dir};
use crate::generate;
use crate::test::{parse_tests, TestEntry};
use crate::test::{parse_tests, print_diff, print_diff_key, TestEntry};
use crate::util;
use std::fs;
use tree_sitter::{LogType, Parser};
@ -13,6 +13,7 @@ const LANGUAGES: &'static [&'static str] = &[
"go",
"html",
"javascript",
"python",
];
lazy_static! {
@ -42,9 +43,10 @@ fn test_real_language_corpus_files() {
log_session = Some(util::log_graphs(&mut parser, "log.html").unwrap());
}
let mut did_fail = false;
for language_name in LANGUAGES.iter().cloned() {
if let Some(filter) = LANGUAGE_FILTER.as_ref() {
if !language_name.contains(filter.as_str()) {
if language_name != filter.as_str() {
continue;
}
}
@ -55,11 +57,15 @@ fn test_real_language_corpus_files() {
let corpus_dir = grammars_dir.join(language_name).join("corpus");
let test = parse_tests(&corpus_dir).unwrap();
parser.set_language(language).unwrap();
run_mutation_tests(&mut parser, test);
did_fail |= run_mutation_tests(&mut parser, test);
}
drop(parser);
drop(log_session);
if did_fail {
panic!("Corpus tests failed");
}
}
#[test]
@ -80,6 +86,7 @@ fn test_feature_corpus_files() {
log_session = Some(util::log_graphs(&mut parser, "log.html").unwrap());
}
let mut did_fail = false;
for entry in fs::read_dir(&test_grammars_dir).unwrap() {
let entry = entry.unwrap();
if !entry.metadata().unwrap().is_dir() {
@ -89,7 +96,7 @@ fn test_feature_corpus_files() {
let language_name = language_name.to_str().unwrap();
if let Some(filter) = LANGUAGE_FILTER.as_ref() {
if !language_name.contains(filter.as_str()) {
if language_name != filter.as_str() {
continue;
}
}
@ -123,15 +130,19 @@ fn test_feature_corpus_files() {
let language = get_test_language(language_name, c_code, &test_path);
let test = parse_tests(&corpus_path).unwrap();
parser.set_language(language).unwrap();
run_mutation_tests(&mut parser, test);
did_fail |= run_mutation_tests(&mut parser, test);
}
}
drop(parser);
drop(log_session);
if did_fail {
panic!("Corpus tests failed");
}
}
fn run_mutation_tests(parser: &mut Parser, test: TestEntry) {
fn run_mutation_tests(parser: &mut Parser, test: TestEntry) -> bool {
match test {
TestEntry::Example {
name,
@ -140,7 +151,7 @@ fn run_mutation_tests(parser: &mut Parser, test: TestEntry) {
} => {
if let Some(filter) = EXAMPLE_FILTER.as_ref() {
if !name.contains(filter.as_str()) {
return;
return false;
}
}
@ -150,12 +161,21 @@ fn run_mutation_tests(parser: &mut Parser, test: TestEntry) {
.parse_utf8(&mut |byte_offset, _| &input[byte_offset..], None)
.unwrap();
let actual = tree.root_node().to_sexp();
assert_eq!(actual, output);
if actual != output {
print_diff_key();
print_diff(&actual, &output);
println!("");
true
} else {
false
}
}
TestEntry::Group { children, .. } => {
let mut result = false;
for child in children {
run_mutation_tests(parser, child);
result |= run_mutation_tests(parser, child);
}
result
}
}
}