Fix parser generation error messages

2019-01-11 17:26:45 -08:00 · 2019-01-11 17:26:45 -08:00 · 6592fdd24c
commit 6592fdd24c
parent 0236de7963
16 changed files with 252 additions and 164 deletions
--- a/cli/src/generate/build_tables/build_parse_table.rs
+++ b/cli/src/generate/build_tables/build_parse_table.rs
@ -461,18 +461,20 @@ impl<'a> ParseTableBuilder<'a> {
        )
        .unwrap();
        write!(&mut msg, "Possible interpretations:\n\n").unwrap();
-        for (i, item) in conflicting_items.iter().enumerate() {
-            write!(&mut msg, "  {}:", i + 1).unwrap();
+
+        let interpretions = conflicting_items.iter().enumerate().map(|(i, item)| {
+            let mut line = String::new();
+            write!(&mut line, "  {}:", i + 1).unwrap();

            for preceding_symbol in preceding_symbols
                .iter()
                .take(preceding_symbols.len() - item.step_index as usize)
            {
-                write!(&mut msg, "  {}", self.symbol_name(preceding_symbol)).unwrap();
+                write!(&mut line, "  {}", self.symbol_name(preceding_symbol)).unwrap();
            }

            write!(
-                &mut msg,
+                &mut line,
                "  ({}",
                &self.syntax_grammar.variables[item.variable_index as usize].name
            )
@ -480,17 +482,17 @@ impl<'a> ParseTableBuilder<'a> {

            for (j, step) in item.production.steps.iter().enumerate() {
                if j as u32 == item.step_index {
-                    write!(&mut msg, "  •").unwrap();
+                    write!(&mut line, "  •").unwrap();
                }
-                write!(&mut msg, "  {}", self.symbol_name(&step.symbol)).unwrap();
+                write!(&mut line, "  {}", self.symbol_name(&step.symbol)).unwrap();
            }

-            write!(&mut msg, ")").unwrap();
+            write!(&mut line, ")").unwrap();

            if item.is_done() {
                write!(
-                    &mut msg,
-                    "  •  {}",
+                    &mut line,
+                    "  •  {}  …",
                    self.symbol_name(&conflicting_lookahead)
                )
                .unwrap();
@ -498,16 +500,33 @@ impl<'a> ParseTableBuilder<'a> {

            let precedence = item.precedence();
            let associativity = item.associativity();
-            if precedence != 0 || associativity.is_some() {
-                write!(
-                    &mut msg,
+
+            let prec_line = if let Some(associativity) = associativity {
+                Some(format!(
                    "(precedence: {}, associativity: {:?})",
                    precedence, associativity
-                )
-                .unwrap();
-            }
+                ))
+            } else if precedence > 0 {
+                Some(format!("(precedence: {})", precedence))
+            } else {
+                None
+            };

-            write!(&mut msg, "\n").unwrap();
+            (line, prec_line)
+        }).collect::<Vec<_>>();
+
+        let max_interpretation_length = interpretions.iter().map(|i| i.0.chars().count()).max().unwrap();
+
+        for (line, prec_suffix) in interpretions {
+            msg += &line;
+            if let Some(prec_suffix) = prec_suffix {
+                for _ in line.chars().count()..max_interpretation_length {
+                    msg.push(' ');
+                }
+                msg += "  ";
+                msg += &prec_suffix;
+            }
+            msg.push('\n');
        }

        let mut resolution_count = 0;
@ -517,26 +536,41 @@ impl<'a> ParseTableBuilder<'a> {
            .filter(|i| !i.is_done())
            .cloned()
            .collect::<Vec<_>>();
-        if shift_items.len() > 0 {
-            resolution_count += 1;
-            write!(
-                &mut msg,
-                "  {}:  Specify a higher precedence in",
-                resolution_count
-            )
-            .unwrap();
-            for (i, item) in shift_items.iter().enumerate() {
-                if i > 0 {
-                    write!(&mut msg, "  and").unwrap();
-                }
+        if actual_conflict.len() > 1 {
+            if shift_items.len() > 0 {
+                resolution_count += 1;
                write!(
                    &mut msg,
-                    " `{}`",
-                    self.symbol_name(&Symbol::non_terminal(item.variable_index as usize))
+                    "  {}:  Specify a higher precedence in",
+                    resolution_count
                )
                .unwrap();
+                for (i, item) in shift_items.iter().enumerate() {
+                    if i > 0 {
+                        write!(&mut msg, " and").unwrap();
+                    }
+                    write!(
+                        &mut msg,
+                        " `{}`",
+                        self.symbol_name(&Symbol::non_terminal(item.variable_index as usize))
+                    )
+                    .unwrap();
+                }
+                write!(&mut msg, " than in the other rules.\n").unwrap();
+            }
+
+            for item in &conflicting_items {
+                if item.is_done() {
+                    resolution_count += 1;
+                    write!(
+                        &mut msg,
+                        "  {}:  Specify a higher precedence in `{}` than in the other rules.\n",
+                        resolution_count,
+                        self.symbol_name(&Symbol::non_terminal(item.variable_index as usize))
+                    )
+                    .unwrap();
+                }
            }
-            write!(&mut msg, " than in the other rules.\n").unwrap();
        }

        if considered_associativity {
@ -553,7 +587,7 @@ impl<'a> ParseTableBuilder<'a> {
                }
                write!(
                    &mut msg,
-                    "{}",
+                    "`{}`",
                    self.symbol_name(&Symbol::non_terminal(item.variable_index as usize))
                )
                .unwrap();
@ -561,19 +595,6 @@ impl<'a> ParseTableBuilder<'a> {
            write!(&mut msg, "\n").unwrap();
        }

-        for item in &conflicting_items {
-            if item.is_done() {
-                resolution_count += 1;
-                write!(
-                    &mut msg,
-                    "  {}:  Specify a higher precedence in `{}` than in the other rules.\n",
-                    resolution_count,
-                    self.symbol_name(&Symbol::non_terminal(item.variable_index as usize))
-                )
-                .unwrap();
-            }
-        }
-
        resolution_count += 1;
        write!(
            &mut msg,
@ -585,7 +606,7 @@ impl<'a> ParseTableBuilder<'a> {
            if i > 0 {
                write!(&mut msg, ", ").unwrap();
            }
-            write!(&mut msg, "{}", self.symbol_name(symbol)).unwrap();
+            write!(&mut msg, "`{}`", self.symbol_name(symbol)).unwrap();
        }
        write!(&mut msg, "\n").unwrap();

--- a/cli/src/generate/build_tables/coincident_tokens.rs
+++ b/cli/src/generate/build_tables/coincident_tokens.rs
@ -19,10 +19,14 @@ impl<'a> CoincidentTokenIndex<'a> {
        };
        for (i, state) in table.states.iter().enumerate() {
            for symbol in state.terminal_entries.keys() {
-                for other_symbol in state.terminal_entries.keys() {
-                    let index = result.index(symbol.index, other_symbol.index);
-                    if result.entries[index].last().cloned() != Some(i) {
-                        result.entries[index].push(i);
+                if symbol.is_terminal() {
+                    for other_symbol in state.terminal_entries.keys() {
+                        if other_symbol.is_terminal() {
+                            let index = result.index(symbol.index, other_symbol.index);
+                            if result.entries[index].last().cloned() != Some(i) {
+                                result.entries[index].push(i);
+                            }
+                        }
                    }
                }
            }
--- a/cli/src/generate/build_tables/item.rs
+++ b/cli/src/generate/build_tables/item.rs
@ -402,11 +402,11 @@ impl<'a> PartialEq for ParseItem<'a> {

 impl<'a> Ord for ParseItem<'a> {
    fn cmp(&self, other: &Self) -> Ordering {
-        let o = self.variable_index.cmp(&other.variable_index);
+        let o = self.step_index.cmp(&other.step_index);
        if o != Ordering::Equal {
            return o;
        }
-        let o = self.step_index.cmp(&other.step_index);
+        let o = self.variable_index.cmp(&other.variable_index);
        if o != Ordering::Equal {
            return o;
        }
--- a/cli/src/generate/build_tables/mod.rs
+++ b/cli/src/generate/build_tables/mod.rs
@ -312,11 +312,13 @@ fn mark_fragile_tokens(
            }
        }
        for (token, entry) in state.terminal_entries.iter_mut() {
-            for i in 0..n {
-                if token_conflict_map.does_overlap(i, token.index) {
-                    if valid_tokens_mask[i] {
-                        entry.reusable = false;
-                        break;
+            if token.is_terminal() {
+                for i in 0..n {
+                    if token_conflict_map.does_overlap(i, token.index) {
+                        if valid_tokens_mask[i] {
+                            entry.reusable = false;
+                            break;
+                        }
                    }
                }
            }
--- a/cli/src/generate/prepare_grammar/flatten_grammar.rs
+++ b/cli/src/generate/prepare_grammar/flatten_grammar.rs
@ -1,6 +1,9 @@
 use super::ExtractedSyntaxGrammar;
-use crate::error::Result;
-use crate::generate::grammars::{Production, ProductionStep, SyntaxGrammar, SyntaxVariable, Variable};
+use crate::error::{Error, Result};
+use crate::generate::rules::Symbol;
+use crate::generate::grammars::{
+    Production, ProductionStep, SyntaxGrammar, SyntaxVariable, Variable,
+};
 use crate::generate::rules::{Alias, Associativity, Rule};

 struct RuleFlattener {
@ -145,11 +148,38 @@ fn flatten_variable(variable: Variable) -> Result<SyntaxVariable> {
    })
 }

+fn symbol_is_used(variables: &Vec<SyntaxVariable>, symbol: Symbol) -> bool {
+    for variable in variables {
+        for production in &variable.productions {
+            for step in &production.steps {
+                if step.symbol == symbol {
+                    return true;
+                }
+            }
+        }
+    }
+    false
+}
+
 pub(super) fn flatten_grammar(grammar: ExtractedSyntaxGrammar) -> Result<SyntaxGrammar> {
    let mut variables = Vec::new();
    for variable in grammar.variables {
        variables.push(flatten_variable(variable)?);
    }
+    for (i, variable) in variables.iter().enumerate() {
+        for production in &variable.productions {
+            if production.steps.is_empty() && symbol_is_used(&variables, Symbol::non_terminal(i)) {
+                return Err(Error(format!(
+                    "The rule `{}` matches the empty string.
+
+Tree-sitter does not support syntactic rules that match the empty string
+unless they are used only as the grammar's start rule.
+",
+                    variable.name
+                )));
+            }
+        }
+    }
    Ok(SyntaxGrammar {
        extra_tokens: grammar.extra_tokens,
        expected_conflicts: grammar.expected_conflicts,
@ -228,48 +258,55 @@ mod tests {
    #[test]
    fn test_flatten_grammar_with_maximum_dynamic_precedence() {
        let result = flatten_variable(Variable {
-          name: "test".to_string(),
-          kind: VariableType::Named,
-          rule: Rule::seq(vec![
-            Rule::non_terminal(1),
-            Rule::prec_dynamic(101, Rule::seq(vec![
-              Rule::non_terminal(2),
-              Rule::choice(vec![
-                Rule::prec_dynamic(102, Rule::seq(vec![
-                  Rule::non_terminal(3),
-                  Rule::non_terminal(4)
-                ])),
-                Rule::non_terminal(5),
-              ]),
-              Rule::non_terminal(6),
-            ])),
-            Rule::non_terminal(7),
-          ])
-        }).unwrap();
+            name: "test".to_string(),
+            kind: VariableType::Named,
+            rule: Rule::seq(vec![
+                Rule::non_terminal(1),
+                Rule::prec_dynamic(
+                    101,
+                    Rule::seq(vec![
+                        Rule::non_terminal(2),
+                        Rule::choice(vec![
+                            Rule::prec_dynamic(
+                                102,
+                                Rule::seq(vec![Rule::non_terminal(3), Rule::non_terminal(4)]),
+                            ),
+                            Rule::non_terminal(5),
+                        ]),
+                        Rule::non_terminal(6),
+                    ]),
+                ),
+                Rule::non_terminal(7),
+            ]),
+        })
+        .unwrap();

-        assert_eq!(result.productions, vec![
-            Production {
-                dynamic_precedence: 102,
-                steps: vec![
-                    ProductionStep::new(Symbol::non_terminal(1)),
-                    ProductionStep::new(Symbol::non_terminal(2)),
-                    ProductionStep::new(Symbol::non_terminal(3)),
-                    ProductionStep::new(Symbol::non_terminal(4)),
-                    ProductionStep::new(Symbol::non_terminal(6)),
-                    ProductionStep::new(Symbol::non_terminal(7)),
-                ],
-            },
-            Production {
-                dynamic_precedence: 101,
-                steps: vec![
-                    ProductionStep::new(Symbol::non_terminal(1)),
-                    ProductionStep::new(Symbol::non_terminal(2)),
-                    ProductionStep::new(Symbol::non_terminal(5)),
-                    ProductionStep::new(Symbol::non_terminal(6)),
-                    ProductionStep::new(Symbol::non_terminal(7)),
-                ],
-            },
-        ]);
+        assert_eq!(
+            result.productions,
+            vec![
+                Production {
+                    dynamic_precedence: 102,
+                    steps: vec![
+                        ProductionStep::new(Symbol::non_terminal(1)),
+                        ProductionStep::new(Symbol::non_terminal(2)),
+                        ProductionStep::new(Symbol::non_terminal(3)),
+                        ProductionStep::new(Symbol::non_terminal(4)),
+                        ProductionStep::new(Symbol::non_terminal(6)),
+                        ProductionStep::new(Symbol::non_terminal(7)),
+                    ],
+                },
+                Production {
+                    dynamic_precedence: 101,
+                    steps: vec![
+                        ProductionStep::new(Symbol::non_terminal(1)),
+                        ProductionStep::new(Symbol::non_terminal(2)),
+                        ProductionStep::new(Symbol::non_terminal(5)),
+                        ProductionStep::new(Symbol::non_terminal(6)),
+                        ProductionStep::new(Symbol::non_terminal(7)),
+                    ],
+                },
+            ]
+        );
    }

    #[test]
@ -277,37 +314,40 @@ mod tests {
        let result = flatten_variable(Variable {
            name: "test".to_string(),
            kind: VariableType::Named,
-            rule: Rule::prec_left(101, Rule::seq(vec![
-                Rule::non_terminal(1),
-                Rule::non_terminal(2),
-            ])),
-        }).unwrap();
+            rule: Rule::prec_left(
+                101,
+                Rule::seq(vec![Rule::non_terminal(1), Rule::non_terminal(2)]),
+            ),
+        })
+        .unwrap();

-        assert_eq!(result.productions, vec![
-            Production {
+        assert_eq!(
+            result.productions,
+            vec![Production {
                dynamic_precedence: 0,
                steps: vec![
-                    ProductionStep::new(Symbol::non_terminal(1)).with_prec(101, Some(Associativity::Left)),
-                    ProductionStep::new(Symbol::non_terminal(2)).with_prec(101, Some(Associativity::Left)),
+                    ProductionStep::new(Symbol::non_terminal(1))
+                        .with_prec(101, Some(Associativity::Left)),
+                    ProductionStep::new(Symbol::non_terminal(2))
+                        .with_prec(101, Some(Associativity::Left)),
                ]
-            }
-        ]);
+            }]
+        );

        let result = flatten_variable(Variable {
            name: "test".to_string(),
            kind: VariableType::Named,
-            rule: Rule::prec_left(101, Rule::seq(vec![
-                Rule::non_terminal(1),
-            ])),
-        }).unwrap();
+            rule: Rule::prec_left(101, Rule::seq(vec![Rule::non_terminal(1)])),
+        })
+        .unwrap();

-        assert_eq!(result.productions, vec![
-            Production {
+        assert_eq!(
+            result.productions,
+            vec![Production {
                dynamic_precedence: 0,
-                steps: vec![
-                    ProductionStep::new(Symbol::non_terminal(1)).with_prec(101, Some(Associativity::Left)),
-                ]
-            }
-        ]);
+                steps: vec![ProductionStep::new(Symbol::non_terminal(1))
+                    .with_prec(101, Some(Associativity::Left)),]
+            }]
+        );
    }
 }
--- a/cli/src/generate/prepare_grammar/intern_symbols.rs
+++ b/cli/src/generate/prepare_grammar/intern_symbols.rs
@ -8,7 +8,7 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result<InternedGrammar>

    if variable_type_for_name(&grammar.variables[0].name) == VariableType::Hidden {
        return Err(Error(
-            "Grammar's start rule must be visible".to_string(),
+            "A grammar's start rule must be visible.".to_string(),
        ));
    }

--- a/cli/src/generate/render.rs
+++ b/cli/src/generate/render.rs
@ -191,13 +191,11 @@ impl Generator {
            "#define EXTERNAL_TOKEN_COUNT {}",
            self.syntax_grammar.external_tokens.len()
        );
-        if self.parse_table.max_aliased_production_length > 0 {
-            add_line!(
-                self,
-                "#define MAX_ALIAS_SEQUENCE_LENGTH {}",
-                self.parse_table.max_aliased_production_length
-            );
-        }
+        add_line!(
+            self,
+            "#define MAX_ALIAS_SEQUENCE_LENGTH {}",
+            self.parse_table.max_aliased_production_length
+        );
        add_line!(self, "");
    }

--- a/cli/src/tests/corpuses.rs
+++ b/cli/src/tests/corpuses.rs
@ -3,7 +3,7 @@ use crate::generate;
 use crate::loader::Loader;
 use crate::test::{parse_tests, TestEntry};
 use std::fs;
-use std::path::{Path, PathBuf};
+use std::path::PathBuf;
 use tree_sitter::{Language, Parser};

 lazy_static! {
@ -19,6 +19,7 @@ lazy_static! {
    static ref HEADER_DIR: PathBuf = ROOT_DIR.join("lib").join("include");
    static ref SCRATCH_DIR: PathBuf = ROOT_DIR.join("target").join("scratch");
    static ref FIXTURES_DIR: PathBuf = ROOT_DIR.join("test").join("fixtures");
+    static ref EXEC_PATH: PathBuf = std::env::current_exe().unwrap();
 }

 #[test]
@ -38,27 +39,42 @@ fn test_real_language_corpus_files() {
 fn test_feature_corpus_files() {
    fs::create_dir_all(SCRATCH_DIR.as_path()).unwrap();

+    let filter = std::env::var("TREE_SITTER_TEST_FILTER").ok();
    let mut loader = Loader::new(SCRATCH_DIR.clone());
    let mut parser = Parser::new();
    let test_grammars_dir = FIXTURES_DIR.join("test_grammars");

    for entry in fs::read_dir(&test_grammars_dir).unwrap() {
        let entry = entry.unwrap();
+        if !entry.metadata().unwrap().is_dir() {
+            continue;
+        }
        let test_name = entry.file_name();
        let test_name = test_name.to_str().unwrap();

-        eprintln!("test name: {}", test_name);
+        if let Some(filter) = filter.as_ref() {
+            if !test_name.contains(filter.as_str()) {
+                continue;
+            }
+        }
+
+        eprintln!("test: {:?}", test_name);
+
        let test_path = entry.path();
        let grammar_path = test_path.join("grammar.json");
-        let corpus_path = test_path.join("corpus.txt");
        let error_message_path = test_path.join("expected_error.txt");
-
        let grammar_json = fs::read_to_string(grammar_path).unwrap();
        let generate_result = generate::generate_parser_for_grammar(&grammar_json);
+
        if error_message_path.exists() {
-            continue;
+            let expected_message = fs::read_to_string(&error_message_path).unwrap();
            if let Err(e) = generate_result {
-                assert_eq!(e.0, fs::read_to_string(&error_message_path).unwrap());
+                if e.0 != expected_message {
+                    panic!(
+                        "Unexpected error message.\n\nExpected:\n\n{}\nActual:\n\n{}\n",
+                        expected_message, e.0
+                    );
+                }
            } else {
                panic!(
                    "Expected error message but got none for test grammar '{}'",
@ -66,9 +82,15 @@ fn test_feature_corpus_files() {
                );
            }
        } else {
+            let corpus_path = test_path.join("corpus.txt");
            let c_code = generate_result.unwrap();
            let parser_c_path = SCRATCH_DIR.join(&format!("{}-parser.c", test_name));
-            fs::write(&parser_c_path, c_code).unwrap();
+            if !fs::read_to_string(&parser_c_path)
+                .map(|content| content == c_code)
+                .unwrap_or(false)
+            {
+                fs::write(&parser_c_path, c_code).unwrap();
+            }
            let scanner_path = test_path.join("scanner.c");
            let scanner_path = if scanner_path.exists() {
                Some(scanner_path)
@ -78,6 +100,7 @@ fn test_feature_corpus_files() {
            let language = loader
                .load_language_from_sources(test_name, &HEADER_DIR, &parser_c_path, &scanner_path)
                .unwrap();
+            let test = parse_tests(&corpus_path).unwrap();
        }
    }