Fix bugs in handling tokens that overlap with separators

2019-01-15 12:13:42 -08:00 · 2019-01-15 12:13:42 -08:00 · d8ab36b2a5
commit d8ab36b2a5
parent b799b46f79
6 changed files with 98 additions and 56 deletions
--- a/cli/src/generate/build_tables/build_lex_table.rs
+++ b/cli/src/generate/build_tables/build_lex_table.rs
@ -191,6 +191,7 @@ impl<'a> LexTableBuilder<'a> {
        );

        let transitions = self.cursor.transitions();
+        let has_sep = self.cursor.transition_chars().any(|(_, sep)| sep);
        info!("lex state: {}, transitions: {:?}", state_id, transitions);

        // If EOF is a valid lookahead token, add a transition predicated on the null
@ -214,12 +215,23 @@ impl<'a> LexTableBuilder<'a> {
            is_separator,
        } in transitions
        {
-            if let Some((_, completed_precedence)) = completion {
-                if precedence < completed_precedence
-                    || (precedence == completed_precedence && is_separator)
-                {
+            if let Some((completed_id, completed_precedence)) = completion {
+                if precedence < completed_precedence {
                    continue;
                }
+
+                if precedence == completed_precedence {
+                    if is_separator {
+                        continue;
+                    }
+                    if has_sep && self.lexical_grammar
+                        .variable_indices_for_nfa_states(&states)
+                        .position(|i| i == completed_id)
+                        .is_none()
+                    {
+                        continue;
+                    }
+                }
            }
            let (next_state_id, _) = self.add_state(states, eof_valid && is_separator);
            let next_state = if next_state_id == state_id {
--- a/cli/src/generate/build_tables/token_conflicts.rs
+++ b/cli/src/generate/build_tables/token_conflicts.rs
@ -58,7 +58,7 @@ impl<'a> TokenConflictMap<'a> {

    pub fn does_conflict(&self, i: usize, j: usize) -> bool {
        let entry = &self.status_matrix[matrix_index(self.n, i, j)];
-        entry.does_match_valid_continuation || entry.does_match_separators
+        entry.does_match_valid_continuation || entry.does_match_separators || entry.matches_same_string
    }

    pub fn does_overlap(&self, i: usize, j: usize) -> bool {
@ -176,7 +176,7 @@ fn compute_conflict_status(

    while let Some(state_set) = state_set_queue.pop() {
        // Don't pursue states where there's no potential for conflict.
-        if variable_ids_for_states(&state_set, grammar).count() > 1 {
+        if grammar.variable_indices_for_nfa_states(&state_set).count() > 1 {
            cursor.reset(state_set);
        } else {
            continue;
@ -226,7 +226,7 @@ fn compute_conflict_status(
            if let Some((completed_id, completed_precedence)) = completion {
                let mut other_id = None;
                let mut successor_contains_completed_id = false;
-                for variable_id in variable_ids_for_states(&states, grammar) {
+                for variable_id in grammar.variable_indices_for_nfa_states(&states) {
                    if variable_id == completed_id {
                        successor_contains_completed_id = true;
                        break;
@ -269,22 +269,6 @@ fn compute_conflict_status(
    result
 }

-fn variable_ids_for_states<'a>(
-    state_ids: &'a Vec<u32>,
-    grammar: &'a LexicalGrammar,
-) -> impl Iterator<Item = usize> + 'a {
-    let mut prev = None;
-    state_ids.iter().filter_map(move |state_id| {
-        let variable_id = grammar.variable_index_for_nfa_state(*state_id);
-        if prev != Some(variable_id) {
-            prev = Some(variable_id);
-            prev
-        } else {
-            None
-        }
-    })
-}
-
 #[cfg(test)]
 mod tests {
    use super::*;
--- a/cli/src/generate/grammars.rs
+++ b/cli/src/generate/grammars.rs
@ -175,8 +175,27 @@ impl Variable {
 }

 impl LexicalGrammar {
+    pub fn variable_indices_for_nfa_states<'a>(
+        &'a self,
+        state_ids: &'a Vec<u32>,
+    ) -> impl Iterator<Item = usize> + 'a {
+        let mut prev = None;
+        state_ids.iter().filter_map(move |state_id| {
+            let variable_id = self.variable_index_for_nfa_state(*state_id);
+            if prev != Some(variable_id) {
+                prev = Some(variable_id);
+                prev
+            } else {
+                None
+            }
+        })
+    }
+
    pub fn variable_index_for_nfa_state(&self, state_id: u32) -> usize {
-        self.variables.iter().position(|v| v.start_state >= state_id).unwrap()
+        self.variables
+            .iter()
+            .position(|v| v.start_state >= state_id)
+            .unwrap()
    }
 }

--- a/cli/src/generate/nfa.rs
+++ b/cli/src/generate/nfa.rs
@ -374,7 +374,7 @@ impl<'a> NfaCursor<'a> {
                    }
                    let intersection_transition = NfaTransition {
                        characters: intersection,
-                        is_separator: result[i].is_separator || is_sep,
+                        is_separator: result[i].is_separator && is_sep,
                        precedence: max(result[i].precedence, prec),
                        states: intersection_states,
                    };
--- a/cli/src/test.rs
+++ b/cli/src/test.rs
@ -75,30 +75,10 @@ pub fn run_tests_at_path(
            println!("{} failures:", failures.len())
        }

-        println!(
-            "\n{} / {}",
-            Colour::Green.paint("expected"),
-            Colour::Red.paint("actual")
-        );
-
+        print_diff_key();
        for (i, (name, actual, expected)) in failures.iter().enumerate() {
            println!("\n  {}. {}:", i + 1, name);
-            let changeset = Changeset::new(actual, expected, " ");
-            print!("    ");
-            for diff in &changeset.diffs {
-                match diff {
-                    Difference::Same(part) => {
-                        print!("{}{}", part, changeset.split);
-                    }
-                    Difference::Add(part) => {
-                        print!("{}{}", Colour::Green.paint(part), changeset.split);
-                    }
-                    Difference::Rem(part) => {
-                        print!("{}{}", Colour::Red.paint(part), changeset.split);
-                    }
-                }
-            }
-            println!("");
+            print_diff(actual, expected);
        }
    }

@ -106,6 +86,33 @@ pub fn run_tests_at_path(
    Ok(())
 }

+pub fn print_diff_key() {
+    println!(
+        "\n{} / {}",
+        Colour::Green.paint("expected"),
+        Colour::Red.paint("actual")
+    );
+}
+
+pub fn print_diff(actual: &String, expected: &String) {
+    let changeset = Changeset::new(actual, expected, " ");
+    print!("    ");
+    for diff in &changeset.diffs {
+        match diff {
+            Difference::Same(part) => {
+                print!("{}{}", part, changeset.split);
+            }
+            Difference::Add(part) => {
+                print!("{}{}", Colour::Green.paint(part), changeset.split);
+            }
+            Difference::Rem(part) => {
+                print!("{}{}", Colour::Red.paint(part), changeset.split);
+            }
+        }
+    }
+    println!("");
+}
+
 fn run_tests(
    parser: &mut Parser,
    test_entry: TestEntry,
--- a/cli/src/tests/corpuses.rs
+++ b/cli/src/tests/corpuses.rs
@ -1,6 +1,6 @@
 use super::fixtures::{get_language, get_test_language, fixtures_dir};
 use crate::generate;
-use crate::test::{parse_tests, TestEntry};
+use crate::test::{parse_tests, print_diff, print_diff_key, TestEntry};
 use crate::util;
 use std::fs;
 use tree_sitter::{LogType, Parser};
@ -13,6 +13,7 @@ const LANGUAGES: &'static [&'static str] = &[
    "go",
    "html",
    "javascript",
+    "python",
 ];

 lazy_static! {
@ -42,9 +43,10 @@ fn test_real_language_corpus_files() {
        log_session = Some(util::log_graphs(&mut parser, "log.html").unwrap());
    }

+    let mut did_fail = false;
    for language_name in LANGUAGES.iter().cloned() {
        if let Some(filter) = LANGUAGE_FILTER.as_ref() {
-            if !language_name.contains(filter.as_str()) {
+            if language_name != filter.as_str() {
                continue;
            }
        }
@ -55,11 +57,15 @@ fn test_real_language_corpus_files() {
        let corpus_dir = grammars_dir.join(language_name).join("corpus");
        let test = parse_tests(&corpus_dir).unwrap();
        parser.set_language(language).unwrap();
-        run_mutation_tests(&mut parser, test);
+        did_fail |= run_mutation_tests(&mut parser, test);
    }

    drop(parser);
    drop(log_session);
+
+    if did_fail {
+        panic!("Corpus tests failed");
+    }
 }

 #[test]
@ -80,6 +86,7 @@ fn test_feature_corpus_files() {
        log_session = Some(util::log_graphs(&mut parser, "log.html").unwrap());
    }

+    let mut did_fail = false;
    for entry in fs::read_dir(&test_grammars_dir).unwrap() {
        let entry = entry.unwrap();
        if !entry.metadata().unwrap().is_dir() {
@ -89,7 +96,7 @@ fn test_feature_corpus_files() {
        let language_name = language_name.to_str().unwrap();

        if let Some(filter) = LANGUAGE_FILTER.as_ref() {
-            if !language_name.contains(filter.as_str()) {
+            if language_name != filter.as_str() {
                continue;
            }
        }
@ -123,15 +130,19 @@ fn test_feature_corpus_files() {
            let language = get_test_language(language_name, c_code, &test_path);
            let test = parse_tests(&corpus_path).unwrap();
            parser.set_language(language).unwrap();
-            run_mutation_tests(&mut parser, test);
+            did_fail |= run_mutation_tests(&mut parser, test);
        }
    }

    drop(parser);
    drop(log_session);
+
+    if did_fail {
+        panic!("Corpus tests failed");
+    }
 }

-fn run_mutation_tests(parser: &mut Parser, test: TestEntry) {
+fn run_mutation_tests(parser: &mut Parser, test: TestEntry) -> bool {
    match test {
        TestEntry::Example {
            name,
@ -140,7 +151,7 @@ fn run_mutation_tests(parser: &mut Parser, test: TestEntry) {
        } => {
            if let Some(filter) = EXAMPLE_FILTER.as_ref() {
                if !name.contains(filter.as_str()) {
-                    return;
+                    return false;
                }
            }

@ -150,12 +161,21 @@ fn run_mutation_tests(parser: &mut Parser, test: TestEntry) {
                .parse_utf8(&mut |byte_offset, _| &input[byte_offset..], None)
                .unwrap();
            let actual = tree.root_node().to_sexp();
-            assert_eq!(actual, output);
+            if actual != output {
+                print_diff_key();
+                print_diff(&actual, &output);
+                println!("");
+                true
+            } else {
+                false
+            }
        }
        TestEntry::Group { children, .. } => {
+            let mut result = false;
            for child in children {
-                run_mutation_tests(parser, child);
+                result |= run_mutation_tests(parser, child);
            }
+            result
        }
    }
 }