Fix parser generation error messages

This commit is contained in:
Max Brunsfeld 2019-01-11 17:26:45 -08:00
parent 0236de7963
commit 6592fdd24c
16 changed files with 252 additions and 164 deletions

View file

@ -461,18 +461,20 @@ impl<'a> ParseTableBuilder<'a> {
)
.unwrap();
write!(&mut msg, "Possible interpretations:\n\n").unwrap();
for (i, item) in conflicting_items.iter().enumerate() {
write!(&mut msg, " {}:", i + 1).unwrap();
let interpretions = conflicting_items.iter().enumerate().map(|(i, item)| {
let mut line = String::new();
write!(&mut line, " {}:", i + 1).unwrap();
for preceding_symbol in preceding_symbols
.iter()
.take(preceding_symbols.len() - item.step_index as usize)
{
write!(&mut msg, " {}", self.symbol_name(preceding_symbol)).unwrap();
write!(&mut line, " {}", self.symbol_name(preceding_symbol)).unwrap();
}
write!(
&mut msg,
&mut line,
" ({}",
&self.syntax_grammar.variables[item.variable_index as usize].name
)
@ -480,17 +482,17 @@ impl<'a> ParseTableBuilder<'a> {
for (j, step) in item.production.steps.iter().enumerate() {
if j as u32 == item.step_index {
write!(&mut msg, "").unwrap();
write!(&mut line, "").unwrap();
}
write!(&mut msg, " {}", self.symbol_name(&step.symbol)).unwrap();
write!(&mut line, " {}", self.symbol_name(&step.symbol)).unwrap();
}
write!(&mut msg, ")").unwrap();
write!(&mut line, ")").unwrap();
if item.is_done() {
write!(
&mut msg,
" • {}",
&mut line,
" • {}",
self.symbol_name(&conflicting_lookahead)
)
.unwrap();
@ -498,16 +500,33 @@ impl<'a> ParseTableBuilder<'a> {
let precedence = item.precedence();
let associativity = item.associativity();
if precedence != 0 || associativity.is_some() {
write!(
&mut msg,
let prec_line = if let Some(associativity) = associativity {
Some(format!(
"(precedence: {}, associativity: {:?})",
precedence, associativity
)
.unwrap();
}
))
} else if precedence > 0 {
Some(format!("(precedence: {})", precedence))
} else {
None
};
write!(&mut msg, "\n").unwrap();
(line, prec_line)
}).collect::<Vec<_>>();
let max_interpretation_length = interpretions.iter().map(|i| i.0.chars().count()).max().unwrap();
for (line, prec_suffix) in interpretions {
msg += &line;
if let Some(prec_suffix) = prec_suffix {
for _ in line.chars().count()..max_interpretation_length {
msg.push(' ');
}
msg += " ";
msg += &prec_suffix;
}
msg.push('\n');
}
let mut resolution_count = 0;
@ -517,26 +536,41 @@ impl<'a> ParseTableBuilder<'a> {
.filter(|i| !i.is_done())
.cloned()
.collect::<Vec<_>>();
if shift_items.len() > 0 {
resolution_count += 1;
write!(
&mut msg,
" {}: Specify a higher precedence in",
resolution_count
)
.unwrap();
for (i, item) in shift_items.iter().enumerate() {
if i > 0 {
write!(&mut msg, " and").unwrap();
}
if actual_conflict.len() > 1 {
if shift_items.len() > 0 {
resolution_count += 1;
write!(
&mut msg,
" `{}`",
self.symbol_name(&Symbol::non_terminal(item.variable_index as usize))
" {}: Specify a higher precedence in",
resolution_count
)
.unwrap();
for (i, item) in shift_items.iter().enumerate() {
if i > 0 {
write!(&mut msg, " and").unwrap();
}
write!(
&mut msg,
" `{}`",
self.symbol_name(&Symbol::non_terminal(item.variable_index as usize))
)
.unwrap();
}
write!(&mut msg, " than in the other rules.\n").unwrap();
}
for item in &conflicting_items {
if item.is_done() {
resolution_count += 1;
write!(
&mut msg,
" {}: Specify a higher precedence in `{}` than in the other rules.\n",
resolution_count,
self.symbol_name(&Symbol::non_terminal(item.variable_index as usize))
)
.unwrap();
}
}
write!(&mut msg, " than in the other rules.\n").unwrap();
}
if considered_associativity {
@ -553,7 +587,7 @@ impl<'a> ParseTableBuilder<'a> {
}
write!(
&mut msg,
"{}",
"`{}`",
self.symbol_name(&Symbol::non_terminal(item.variable_index as usize))
)
.unwrap();
@ -561,19 +595,6 @@ impl<'a> ParseTableBuilder<'a> {
write!(&mut msg, "\n").unwrap();
}
for item in &conflicting_items {
if item.is_done() {
resolution_count += 1;
write!(
&mut msg,
" {}: Specify a higher precedence in `{}` than in the other rules.\n",
resolution_count,
self.symbol_name(&Symbol::non_terminal(item.variable_index as usize))
)
.unwrap();
}
}
resolution_count += 1;
write!(
&mut msg,
@ -585,7 +606,7 @@ impl<'a> ParseTableBuilder<'a> {
if i > 0 {
write!(&mut msg, ", ").unwrap();
}
write!(&mut msg, "{}", self.symbol_name(symbol)).unwrap();
write!(&mut msg, "`{}`", self.symbol_name(symbol)).unwrap();
}
write!(&mut msg, "\n").unwrap();

View file

@ -19,10 +19,14 @@ impl<'a> CoincidentTokenIndex<'a> {
};
for (i, state) in table.states.iter().enumerate() {
for symbol in state.terminal_entries.keys() {
for other_symbol in state.terminal_entries.keys() {
let index = result.index(symbol.index, other_symbol.index);
if result.entries[index].last().cloned() != Some(i) {
result.entries[index].push(i);
if symbol.is_terminal() {
for other_symbol in state.terminal_entries.keys() {
if other_symbol.is_terminal() {
let index = result.index(symbol.index, other_symbol.index);
if result.entries[index].last().cloned() != Some(i) {
result.entries[index].push(i);
}
}
}
}
}

View file

@ -402,11 +402,11 @@ impl<'a> PartialEq for ParseItem<'a> {
impl<'a> Ord for ParseItem<'a> {
fn cmp(&self, other: &Self) -> Ordering {
let o = self.variable_index.cmp(&other.variable_index);
let o = self.step_index.cmp(&other.step_index);
if o != Ordering::Equal {
return o;
}
let o = self.step_index.cmp(&other.step_index);
let o = self.variable_index.cmp(&other.variable_index);
if o != Ordering::Equal {
return o;
}

View file

@ -312,11 +312,13 @@ fn mark_fragile_tokens(
}
}
for (token, entry) in state.terminal_entries.iter_mut() {
for i in 0..n {
if token_conflict_map.does_overlap(i, token.index) {
if valid_tokens_mask[i] {
entry.reusable = false;
break;
if token.is_terminal() {
for i in 0..n {
if token_conflict_map.does_overlap(i, token.index) {
if valid_tokens_mask[i] {
entry.reusable = false;
break;
}
}
}
}

View file

@ -1,6 +1,9 @@
use super::ExtractedSyntaxGrammar;
use crate::error::Result;
use crate::generate::grammars::{Production, ProductionStep, SyntaxGrammar, SyntaxVariable, Variable};
use crate::error::{Error, Result};
use crate::generate::rules::Symbol;
use crate::generate::grammars::{
Production, ProductionStep, SyntaxGrammar, SyntaxVariable, Variable,
};
use crate::generate::rules::{Alias, Associativity, Rule};
struct RuleFlattener {
@ -145,11 +148,38 @@ fn flatten_variable(variable: Variable) -> Result<SyntaxVariable> {
})
}
fn symbol_is_used(variables: &Vec<SyntaxVariable>, symbol: Symbol) -> bool {
for variable in variables {
for production in &variable.productions {
for step in &production.steps {
if step.symbol == symbol {
return true;
}
}
}
}
false
}
pub(super) fn flatten_grammar(grammar: ExtractedSyntaxGrammar) -> Result<SyntaxGrammar> {
let mut variables = Vec::new();
for variable in grammar.variables {
variables.push(flatten_variable(variable)?);
}
for (i, variable) in variables.iter().enumerate() {
for production in &variable.productions {
if production.steps.is_empty() && symbol_is_used(&variables, Symbol::non_terminal(i)) {
return Err(Error(format!(
"The rule `{}` matches the empty string.
Tree-sitter does not support syntactic rules that match the empty string
unless they are used only as the grammar's start rule.
",
variable.name
)));
}
}
}
Ok(SyntaxGrammar {
extra_tokens: grammar.extra_tokens,
expected_conflicts: grammar.expected_conflicts,
@ -228,48 +258,55 @@ mod tests {
#[test]
fn test_flatten_grammar_with_maximum_dynamic_precedence() {
let result = flatten_variable(Variable {
name: "test".to_string(),
kind: VariableType::Named,
rule: Rule::seq(vec![
Rule::non_terminal(1),
Rule::prec_dynamic(101, Rule::seq(vec![
Rule::non_terminal(2),
Rule::choice(vec![
Rule::prec_dynamic(102, Rule::seq(vec![
Rule::non_terminal(3),
Rule::non_terminal(4)
])),
Rule::non_terminal(5),
]),
Rule::non_terminal(6),
])),
Rule::non_terminal(7),
])
}).unwrap();
name: "test".to_string(),
kind: VariableType::Named,
rule: Rule::seq(vec![
Rule::non_terminal(1),
Rule::prec_dynamic(
101,
Rule::seq(vec![
Rule::non_terminal(2),
Rule::choice(vec![
Rule::prec_dynamic(
102,
Rule::seq(vec![Rule::non_terminal(3), Rule::non_terminal(4)]),
),
Rule::non_terminal(5),
]),
Rule::non_terminal(6),
]),
),
Rule::non_terminal(7),
]),
})
.unwrap();
assert_eq!(result.productions, vec![
Production {
dynamic_precedence: 102,
steps: vec![
ProductionStep::new(Symbol::non_terminal(1)),
ProductionStep::new(Symbol::non_terminal(2)),
ProductionStep::new(Symbol::non_terminal(3)),
ProductionStep::new(Symbol::non_terminal(4)),
ProductionStep::new(Symbol::non_terminal(6)),
ProductionStep::new(Symbol::non_terminal(7)),
],
},
Production {
dynamic_precedence: 101,
steps: vec![
ProductionStep::new(Symbol::non_terminal(1)),
ProductionStep::new(Symbol::non_terminal(2)),
ProductionStep::new(Symbol::non_terminal(5)),
ProductionStep::new(Symbol::non_terminal(6)),
ProductionStep::new(Symbol::non_terminal(7)),
],
},
]);
assert_eq!(
result.productions,
vec![
Production {
dynamic_precedence: 102,
steps: vec![
ProductionStep::new(Symbol::non_terminal(1)),
ProductionStep::new(Symbol::non_terminal(2)),
ProductionStep::new(Symbol::non_terminal(3)),
ProductionStep::new(Symbol::non_terminal(4)),
ProductionStep::new(Symbol::non_terminal(6)),
ProductionStep::new(Symbol::non_terminal(7)),
],
},
Production {
dynamic_precedence: 101,
steps: vec![
ProductionStep::new(Symbol::non_terminal(1)),
ProductionStep::new(Symbol::non_terminal(2)),
ProductionStep::new(Symbol::non_terminal(5)),
ProductionStep::new(Symbol::non_terminal(6)),
ProductionStep::new(Symbol::non_terminal(7)),
],
},
]
);
}
#[test]
@ -277,37 +314,40 @@ mod tests {
let result = flatten_variable(Variable {
name: "test".to_string(),
kind: VariableType::Named,
rule: Rule::prec_left(101, Rule::seq(vec![
Rule::non_terminal(1),
Rule::non_terminal(2),
])),
}).unwrap();
rule: Rule::prec_left(
101,
Rule::seq(vec![Rule::non_terminal(1), Rule::non_terminal(2)]),
),
})
.unwrap();
assert_eq!(result.productions, vec![
Production {
assert_eq!(
result.productions,
vec![Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::non_terminal(1)).with_prec(101, Some(Associativity::Left)),
ProductionStep::new(Symbol::non_terminal(2)).with_prec(101, Some(Associativity::Left)),
ProductionStep::new(Symbol::non_terminal(1))
.with_prec(101, Some(Associativity::Left)),
ProductionStep::new(Symbol::non_terminal(2))
.with_prec(101, Some(Associativity::Left)),
]
}
]);
}]
);
let result = flatten_variable(Variable {
name: "test".to_string(),
kind: VariableType::Named,
rule: Rule::prec_left(101, Rule::seq(vec![
Rule::non_terminal(1),
])),
}).unwrap();
rule: Rule::prec_left(101, Rule::seq(vec![Rule::non_terminal(1)])),
})
.unwrap();
assert_eq!(result.productions, vec![
Production {
assert_eq!(
result.productions,
vec![Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::non_terminal(1)).with_prec(101, Some(Associativity::Left)),
]
}
]);
steps: vec![ProductionStep::new(Symbol::non_terminal(1))
.with_prec(101, Some(Associativity::Left)),]
}]
);
}
}

View file

@ -8,7 +8,7 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result<InternedGrammar>
if variable_type_for_name(&grammar.variables[0].name) == VariableType::Hidden {
return Err(Error(
"Grammar's start rule must be visible".to_string(),
"A grammar's start rule must be visible.".to_string(),
));
}

View file

@ -191,13 +191,11 @@ impl Generator {
"#define EXTERNAL_TOKEN_COUNT {}",
self.syntax_grammar.external_tokens.len()
);
if self.parse_table.max_aliased_production_length > 0 {
add_line!(
self,
"#define MAX_ALIAS_SEQUENCE_LENGTH {}",
self.parse_table.max_aliased_production_length
);
}
add_line!(
self,
"#define MAX_ALIAS_SEQUENCE_LENGTH {}",
self.parse_table.max_aliased_production_length
);
add_line!(self, "");
}

View file

@ -3,7 +3,7 @@ use crate::generate;
use crate::loader::Loader;
use crate::test::{parse_tests, TestEntry};
use std::fs;
use std::path::{Path, PathBuf};
use std::path::PathBuf;
use tree_sitter::{Language, Parser};
lazy_static! {
@ -19,6 +19,7 @@ lazy_static! {
static ref HEADER_DIR: PathBuf = ROOT_DIR.join("lib").join("include");
static ref SCRATCH_DIR: PathBuf = ROOT_DIR.join("target").join("scratch");
static ref FIXTURES_DIR: PathBuf = ROOT_DIR.join("test").join("fixtures");
static ref EXEC_PATH: PathBuf = std::env::current_exe().unwrap();
}
#[test]
@ -38,27 +39,42 @@ fn test_real_language_corpus_files() {
fn test_feature_corpus_files() {
fs::create_dir_all(SCRATCH_DIR.as_path()).unwrap();
let filter = std::env::var("TREE_SITTER_TEST_FILTER").ok();
let mut loader = Loader::new(SCRATCH_DIR.clone());
let mut parser = Parser::new();
let test_grammars_dir = FIXTURES_DIR.join("test_grammars");
for entry in fs::read_dir(&test_grammars_dir).unwrap() {
let entry = entry.unwrap();
if !entry.metadata().unwrap().is_dir() {
continue;
}
let test_name = entry.file_name();
let test_name = test_name.to_str().unwrap();
eprintln!("test name: {}", test_name);
if let Some(filter) = filter.as_ref() {
if !test_name.contains(filter.as_str()) {
continue;
}
}
eprintln!("test: {:?}", test_name);
let test_path = entry.path();
let grammar_path = test_path.join("grammar.json");
let corpus_path = test_path.join("corpus.txt");
let error_message_path = test_path.join("expected_error.txt");
let grammar_json = fs::read_to_string(grammar_path).unwrap();
let generate_result = generate::generate_parser_for_grammar(&grammar_json);
if error_message_path.exists() {
continue;
let expected_message = fs::read_to_string(&error_message_path).unwrap();
if let Err(e) = generate_result {
assert_eq!(e.0, fs::read_to_string(&error_message_path).unwrap());
if e.0 != expected_message {
panic!(
"Unexpected error message.\n\nExpected:\n\n{}\nActual:\n\n{}\n",
expected_message, e.0
);
}
} else {
panic!(
"Expected error message but got none for test grammar '{}'",
@ -66,9 +82,15 @@ fn test_feature_corpus_files() {
);
}
} else {
let corpus_path = test_path.join("corpus.txt");
let c_code = generate_result.unwrap();
let parser_c_path = SCRATCH_DIR.join(&format!("{}-parser.c", test_name));
fs::write(&parser_c_path, c_code).unwrap();
if !fs::read_to_string(&parser_c_path)
.map(|content| content == c_code)
.unwrap_or(false)
{
fs::write(&parser_c_path, c_code).unwrap();
}
let scanner_path = test_path.join("scanner.c");
let scanner_path = if scanner_path.exists() {
Some(scanner_path)
@ -78,6 +100,7 @@ fn test_feature_corpus_files() {
let language = loader
.load_language_from_sources(test_name, &HEADER_DIR, &parser_c_path, &scanner_path)
.unwrap();
let test = parse_tests(&corpus_path).unwrap();
}
}