Fix parser generation error messages
This commit is contained in:
parent
0236de7963
commit
6592fdd24c
16 changed files with 252 additions and 164 deletions
|
|
@ -461,18 +461,20 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
)
|
||||
.unwrap();
|
||||
write!(&mut msg, "Possible interpretations:\n\n").unwrap();
|
||||
for (i, item) in conflicting_items.iter().enumerate() {
|
||||
write!(&mut msg, " {}:", i + 1).unwrap();
|
||||
|
||||
let interpretions = conflicting_items.iter().enumerate().map(|(i, item)| {
|
||||
let mut line = String::new();
|
||||
write!(&mut line, " {}:", i + 1).unwrap();
|
||||
|
||||
for preceding_symbol in preceding_symbols
|
||||
.iter()
|
||||
.take(preceding_symbols.len() - item.step_index as usize)
|
||||
{
|
||||
write!(&mut msg, " {}", self.symbol_name(preceding_symbol)).unwrap();
|
||||
write!(&mut line, " {}", self.symbol_name(preceding_symbol)).unwrap();
|
||||
}
|
||||
|
||||
write!(
|
||||
&mut msg,
|
||||
&mut line,
|
||||
" ({}",
|
||||
&self.syntax_grammar.variables[item.variable_index as usize].name
|
||||
)
|
||||
|
|
@ -480,17 +482,17 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
|
||||
for (j, step) in item.production.steps.iter().enumerate() {
|
||||
if j as u32 == item.step_index {
|
||||
write!(&mut msg, " •").unwrap();
|
||||
write!(&mut line, " •").unwrap();
|
||||
}
|
||||
write!(&mut msg, " {}", self.symbol_name(&step.symbol)).unwrap();
|
||||
write!(&mut line, " {}", self.symbol_name(&step.symbol)).unwrap();
|
||||
}
|
||||
|
||||
write!(&mut msg, ")").unwrap();
|
||||
write!(&mut line, ")").unwrap();
|
||||
|
||||
if item.is_done() {
|
||||
write!(
|
||||
&mut msg,
|
||||
" • {}",
|
||||
&mut line,
|
||||
" • {} …",
|
||||
self.symbol_name(&conflicting_lookahead)
|
||||
)
|
||||
.unwrap();
|
||||
|
|
@ -498,16 +500,33 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
|
||||
let precedence = item.precedence();
|
||||
let associativity = item.associativity();
|
||||
if precedence != 0 || associativity.is_some() {
|
||||
write!(
|
||||
&mut msg,
|
||||
|
||||
let prec_line = if let Some(associativity) = associativity {
|
||||
Some(format!(
|
||||
"(precedence: {}, associativity: {:?})",
|
||||
precedence, associativity
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
))
|
||||
} else if precedence > 0 {
|
||||
Some(format!("(precedence: {})", precedence))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
write!(&mut msg, "\n").unwrap();
|
||||
(line, prec_line)
|
||||
}).collect::<Vec<_>>();
|
||||
|
||||
let max_interpretation_length = interpretions.iter().map(|i| i.0.chars().count()).max().unwrap();
|
||||
|
||||
for (line, prec_suffix) in interpretions {
|
||||
msg += &line;
|
||||
if let Some(prec_suffix) = prec_suffix {
|
||||
for _ in line.chars().count()..max_interpretation_length {
|
||||
msg.push(' ');
|
||||
}
|
||||
msg += " ";
|
||||
msg += &prec_suffix;
|
||||
}
|
||||
msg.push('\n');
|
||||
}
|
||||
|
||||
let mut resolution_count = 0;
|
||||
|
|
@ -517,26 +536,41 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
.filter(|i| !i.is_done())
|
||||
.cloned()
|
||||
.collect::<Vec<_>>();
|
||||
if shift_items.len() > 0 {
|
||||
resolution_count += 1;
|
||||
write!(
|
||||
&mut msg,
|
||||
" {}: Specify a higher precedence in",
|
||||
resolution_count
|
||||
)
|
||||
.unwrap();
|
||||
for (i, item) in shift_items.iter().enumerate() {
|
||||
if i > 0 {
|
||||
write!(&mut msg, " and").unwrap();
|
||||
}
|
||||
if actual_conflict.len() > 1 {
|
||||
if shift_items.len() > 0 {
|
||||
resolution_count += 1;
|
||||
write!(
|
||||
&mut msg,
|
||||
" `{}`",
|
||||
self.symbol_name(&Symbol::non_terminal(item.variable_index as usize))
|
||||
" {}: Specify a higher precedence in",
|
||||
resolution_count
|
||||
)
|
||||
.unwrap();
|
||||
for (i, item) in shift_items.iter().enumerate() {
|
||||
if i > 0 {
|
||||
write!(&mut msg, " and").unwrap();
|
||||
}
|
||||
write!(
|
||||
&mut msg,
|
||||
" `{}`",
|
||||
self.symbol_name(&Symbol::non_terminal(item.variable_index as usize))
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
write!(&mut msg, " than in the other rules.\n").unwrap();
|
||||
}
|
||||
|
||||
for item in &conflicting_items {
|
||||
if item.is_done() {
|
||||
resolution_count += 1;
|
||||
write!(
|
||||
&mut msg,
|
||||
" {}: Specify a higher precedence in `{}` than in the other rules.\n",
|
||||
resolution_count,
|
||||
self.symbol_name(&Symbol::non_terminal(item.variable_index as usize))
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
}
|
||||
write!(&mut msg, " than in the other rules.\n").unwrap();
|
||||
}
|
||||
|
||||
if considered_associativity {
|
||||
|
|
@ -553,7 +587,7 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
}
|
||||
write!(
|
||||
&mut msg,
|
||||
"{}",
|
||||
"`{}`",
|
||||
self.symbol_name(&Symbol::non_terminal(item.variable_index as usize))
|
||||
)
|
||||
.unwrap();
|
||||
|
|
@ -561,19 +595,6 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
write!(&mut msg, "\n").unwrap();
|
||||
}
|
||||
|
||||
for item in &conflicting_items {
|
||||
if item.is_done() {
|
||||
resolution_count += 1;
|
||||
write!(
|
||||
&mut msg,
|
||||
" {}: Specify a higher precedence in `{}` than in the other rules.\n",
|
||||
resolution_count,
|
||||
self.symbol_name(&Symbol::non_terminal(item.variable_index as usize))
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
resolution_count += 1;
|
||||
write!(
|
||||
&mut msg,
|
||||
|
|
@ -585,7 +606,7 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
if i > 0 {
|
||||
write!(&mut msg, ", ").unwrap();
|
||||
}
|
||||
write!(&mut msg, "{}", self.symbol_name(symbol)).unwrap();
|
||||
write!(&mut msg, "`{}`", self.symbol_name(symbol)).unwrap();
|
||||
}
|
||||
write!(&mut msg, "\n").unwrap();
|
||||
|
||||
|
|
|
|||
|
|
@ -19,10 +19,14 @@ impl<'a> CoincidentTokenIndex<'a> {
|
|||
};
|
||||
for (i, state) in table.states.iter().enumerate() {
|
||||
for symbol in state.terminal_entries.keys() {
|
||||
for other_symbol in state.terminal_entries.keys() {
|
||||
let index = result.index(symbol.index, other_symbol.index);
|
||||
if result.entries[index].last().cloned() != Some(i) {
|
||||
result.entries[index].push(i);
|
||||
if symbol.is_terminal() {
|
||||
for other_symbol in state.terminal_entries.keys() {
|
||||
if other_symbol.is_terminal() {
|
||||
let index = result.index(symbol.index, other_symbol.index);
|
||||
if result.entries[index].last().cloned() != Some(i) {
|
||||
result.entries[index].push(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -402,11 +402,11 @@ impl<'a> PartialEq for ParseItem<'a> {
|
|||
|
||||
impl<'a> Ord for ParseItem<'a> {
|
||||
fn cmp(&self, other: &Self) -> Ordering {
|
||||
let o = self.variable_index.cmp(&other.variable_index);
|
||||
let o = self.step_index.cmp(&other.step_index);
|
||||
if o != Ordering::Equal {
|
||||
return o;
|
||||
}
|
||||
let o = self.step_index.cmp(&other.step_index);
|
||||
let o = self.variable_index.cmp(&other.variable_index);
|
||||
if o != Ordering::Equal {
|
||||
return o;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -312,11 +312,13 @@ fn mark_fragile_tokens(
|
|||
}
|
||||
}
|
||||
for (token, entry) in state.terminal_entries.iter_mut() {
|
||||
for i in 0..n {
|
||||
if token_conflict_map.does_overlap(i, token.index) {
|
||||
if valid_tokens_mask[i] {
|
||||
entry.reusable = false;
|
||||
break;
|
||||
if token.is_terminal() {
|
||||
for i in 0..n {
|
||||
if token_conflict_map.does_overlap(i, token.index) {
|
||||
if valid_tokens_mask[i] {
|
||||
entry.reusable = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,9 @@
|
|||
use super::ExtractedSyntaxGrammar;
|
||||
use crate::error::Result;
|
||||
use crate::generate::grammars::{Production, ProductionStep, SyntaxGrammar, SyntaxVariable, Variable};
|
||||
use crate::error::{Error, Result};
|
||||
use crate::generate::rules::Symbol;
|
||||
use crate::generate::grammars::{
|
||||
Production, ProductionStep, SyntaxGrammar, SyntaxVariable, Variable,
|
||||
};
|
||||
use crate::generate::rules::{Alias, Associativity, Rule};
|
||||
|
||||
struct RuleFlattener {
|
||||
|
|
@ -145,11 +148,38 @@ fn flatten_variable(variable: Variable) -> Result<SyntaxVariable> {
|
|||
})
|
||||
}
|
||||
|
||||
fn symbol_is_used(variables: &Vec<SyntaxVariable>, symbol: Symbol) -> bool {
|
||||
for variable in variables {
|
||||
for production in &variable.productions {
|
||||
for step in &production.steps {
|
||||
if step.symbol == symbol {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
pub(super) fn flatten_grammar(grammar: ExtractedSyntaxGrammar) -> Result<SyntaxGrammar> {
|
||||
let mut variables = Vec::new();
|
||||
for variable in grammar.variables {
|
||||
variables.push(flatten_variable(variable)?);
|
||||
}
|
||||
for (i, variable) in variables.iter().enumerate() {
|
||||
for production in &variable.productions {
|
||||
if production.steps.is_empty() && symbol_is_used(&variables, Symbol::non_terminal(i)) {
|
||||
return Err(Error(format!(
|
||||
"The rule `{}` matches the empty string.
|
||||
|
||||
Tree-sitter does not support syntactic rules that match the empty string
|
||||
unless they are used only as the grammar's start rule.
|
||||
",
|
||||
variable.name
|
||||
)));
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(SyntaxGrammar {
|
||||
extra_tokens: grammar.extra_tokens,
|
||||
expected_conflicts: grammar.expected_conflicts,
|
||||
|
|
@ -228,48 +258,55 @@ mod tests {
|
|||
#[test]
|
||||
fn test_flatten_grammar_with_maximum_dynamic_precedence() {
|
||||
let result = flatten_variable(Variable {
|
||||
name: "test".to_string(),
|
||||
kind: VariableType::Named,
|
||||
rule: Rule::seq(vec![
|
||||
Rule::non_terminal(1),
|
||||
Rule::prec_dynamic(101, Rule::seq(vec![
|
||||
Rule::non_terminal(2),
|
||||
Rule::choice(vec![
|
||||
Rule::prec_dynamic(102, Rule::seq(vec![
|
||||
Rule::non_terminal(3),
|
||||
Rule::non_terminal(4)
|
||||
])),
|
||||
Rule::non_terminal(5),
|
||||
]),
|
||||
Rule::non_terminal(6),
|
||||
])),
|
||||
Rule::non_terminal(7),
|
||||
])
|
||||
}).unwrap();
|
||||
name: "test".to_string(),
|
||||
kind: VariableType::Named,
|
||||
rule: Rule::seq(vec![
|
||||
Rule::non_terminal(1),
|
||||
Rule::prec_dynamic(
|
||||
101,
|
||||
Rule::seq(vec![
|
||||
Rule::non_terminal(2),
|
||||
Rule::choice(vec![
|
||||
Rule::prec_dynamic(
|
||||
102,
|
||||
Rule::seq(vec![Rule::non_terminal(3), Rule::non_terminal(4)]),
|
||||
),
|
||||
Rule::non_terminal(5),
|
||||
]),
|
||||
Rule::non_terminal(6),
|
||||
]),
|
||||
),
|
||||
Rule::non_terminal(7),
|
||||
]),
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(result.productions, vec![
|
||||
Production {
|
||||
dynamic_precedence: 102,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::non_terminal(1)),
|
||||
ProductionStep::new(Symbol::non_terminal(2)),
|
||||
ProductionStep::new(Symbol::non_terminal(3)),
|
||||
ProductionStep::new(Symbol::non_terminal(4)),
|
||||
ProductionStep::new(Symbol::non_terminal(6)),
|
||||
ProductionStep::new(Symbol::non_terminal(7)),
|
||||
],
|
||||
},
|
||||
Production {
|
||||
dynamic_precedence: 101,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::non_terminal(1)),
|
||||
ProductionStep::new(Symbol::non_terminal(2)),
|
||||
ProductionStep::new(Symbol::non_terminal(5)),
|
||||
ProductionStep::new(Symbol::non_terminal(6)),
|
||||
ProductionStep::new(Symbol::non_terminal(7)),
|
||||
],
|
||||
},
|
||||
]);
|
||||
assert_eq!(
|
||||
result.productions,
|
||||
vec![
|
||||
Production {
|
||||
dynamic_precedence: 102,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::non_terminal(1)),
|
||||
ProductionStep::new(Symbol::non_terminal(2)),
|
||||
ProductionStep::new(Symbol::non_terminal(3)),
|
||||
ProductionStep::new(Symbol::non_terminal(4)),
|
||||
ProductionStep::new(Symbol::non_terminal(6)),
|
||||
ProductionStep::new(Symbol::non_terminal(7)),
|
||||
],
|
||||
},
|
||||
Production {
|
||||
dynamic_precedence: 101,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::non_terminal(1)),
|
||||
ProductionStep::new(Symbol::non_terminal(2)),
|
||||
ProductionStep::new(Symbol::non_terminal(5)),
|
||||
ProductionStep::new(Symbol::non_terminal(6)),
|
||||
ProductionStep::new(Symbol::non_terminal(7)),
|
||||
],
|
||||
},
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -277,37 +314,40 @@ mod tests {
|
|||
let result = flatten_variable(Variable {
|
||||
name: "test".to_string(),
|
||||
kind: VariableType::Named,
|
||||
rule: Rule::prec_left(101, Rule::seq(vec![
|
||||
Rule::non_terminal(1),
|
||||
Rule::non_terminal(2),
|
||||
])),
|
||||
}).unwrap();
|
||||
rule: Rule::prec_left(
|
||||
101,
|
||||
Rule::seq(vec![Rule::non_terminal(1), Rule::non_terminal(2)]),
|
||||
),
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(result.productions, vec![
|
||||
Production {
|
||||
assert_eq!(
|
||||
result.productions,
|
||||
vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::non_terminal(1)).with_prec(101, Some(Associativity::Left)),
|
||||
ProductionStep::new(Symbol::non_terminal(2)).with_prec(101, Some(Associativity::Left)),
|
||||
ProductionStep::new(Symbol::non_terminal(1))
|
||||
.with_prec(101, Some(Associativity::Left)),
|
||||
ProductionStep::new(Symbol::non_terminal(2))
|
||||
.with_prec(101, Some(Associativity::Left)),
|
||||
]
|
||||
}
|
||||
]);
|
||||
}]
|
||||
);
|
||||
|
||||
let result = flatten_variable(Variable {
|
||||
name: "test".to_string(),
|
||||
kind: VariableType::Named,
|
||||
rule: Rule::prec_left(101, Rule::seq(vec![
|
||||
Rule::non_terminal(1),
|
||||
])),
|
||||
}).unwrap();
|
||||
rule: Rule::prec_left(101, Rule::seq(vec![Rule::non_terminal(1)])),
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(result.productions, vec![
|
||||
Production {
|
||||
assert_eq!(
|
||||
result.productions,
|
||||
vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::non_terminal(1)).with_prec(101, Some(Associativity::Left)),
|
||||
]
|
||||
}
|
||||
]);
|
||||
steps: vec![ProductionStep::new(Symbol::non_terminal(1))
|
||||
.with_prec(101, Some(Associativity::Left)),]
|
||||
}]
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result<InternedGrammar>
|
|||
|
||||
if variable_type_for_name(&grammar.variables[0].name) == VariableType::Hidden {
|
||||
return Err(Error(
|
||||
"Grammar's start rule must be visible".to_string(),
|
||||
"A grammar's start rule must be visible.".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -191,13 +191,11 @@ impl Generator {
|
|||
"#define EXTERNAL_TOKEN_COUNT {}",
|
||||
self.syntax_grammar.external_tokens.len()
|
||||
);
|
||||
if self.parse_table.max_aliased_production_length > 0 {
|
||||
add_line!(
|
||||
self,
|
||||
"#define MAX_ALIAS_SEQUENCE_LENGTH {}",
|
||||
self.parse_table.max_aliased_production_length
|
||||
);
|
||||
}
|
||||
add_line!(
|
||||
self,
|
||||
"#define MAX_ALIAS_SEQUENCE_LENGTH {}",
|
||||
self.parse_table.max_aliased_production_length
|
||||
);
|
||||
add_line!(self, "");
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ use crate::generate;
|
|||
use crate::loader::Loader;
|
||||
use crate::test::{parse_tests, TestEntry};
|
||||
use std::fs;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::path::PathBuf;
|
||||
use tree_sitter::{Language, Parser};
|
||||
|
||||
lazy_static! {
|
||||
|
|
@ -19,6 +19,7 @@ lazy_static! {
|
|||
static ref HEADER_DIR: PathBuf = ROOT_DIR.join("lib").join("include");
|
||||
static ref SCRATCH_DIR: PathBuf = ROOT_DIR.join("target").join("scratch");
|
||||
static ref FIXTURES_DIR: PathBuf = ROOT_DIR.join("test").join("fixtures");
|
||||
static ref EXEC_PATH: PathBuf = std::env::current_exe().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -38,27 +39,42 @@ fn test_real_language_corpus_files() {
|
|||
fn test_feature_corpus_files() {
|
||||
fs::create_dir_all(SCRATCH_DIR.as_path()).unwrap();
|
||||
|
||||
let filter = std::env::var("TREE_SITTER_TEST_FILTER").ok();
|
||||
let mut loader = Loader::new(SCRATCH_DIR.clone());
|
||||
let mut parser = Parser::new();
|
||||
let test_grammars_dir = FIXTURES_DIR.join("test_grammars");
|
||||
|
||||
for entry in fs::read_dir(&test_grammars_dir).unwrap() {
|
||||
let entry = entry.unwrap();
|
||||
if !entry.metadata().unwrap().is_dir() {
|
||||
continue;
|
||||
}
|
||||
let test_name = entry.file_name();
|
||||
let test_name = test_name.to_str().unwrap();
|
||||
|
||||
eprintln!("test name: {}", test_name);
|
||||
if let Some(filter) = filter.as_ref() {
|
||||
if !test_name.contains(filter.as_str()) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
eprintln!("test: {:?}", test_name);
|
||||
|
||||
let test_path = entry.path();
|
||||
let grammar_path = test_path.join("grammar.json");
|
||||
let corpus_path = test_path.join("corpus.txt");
|
||||
let error_message_path = test_path.join("expected_error.txt");
|
||||
|
||||
let grammar_json = fs::read_to_string(grammar_path).unwrap();
|
||||
let generate_result = generate::generate_parser_for_grammar(&grammar_json);
|
||||
|
||||
if error_message_path.exists() {
|
||||
continue;
|
||||
let expected_message = fs::read_to_string(&error_message_path).unwrap();
|
||||
if let Err(e) = generate_result {
|
||||
assert_eq!(e.0, fs::read_to_string(&error_message_path).unwrap());
|
||||
if e.0 != expected_message {
|
||||
panic!(
|
||||
"Unexpected error message.\n\nExpected:\n\n{}\nActual:\n\n{}\n",
|
||||
expected_message, e.0
|
||||
);
|
||||
}
|
||||
} else {
|
||||
panic!(
|
||||
"Expected error message but got none for test grammar '{}'",
|
||||
|
|
@ -66,9 +82,15 @@ fn test_feature_corpus_files() {
|
|||
);
|
||||
}
|
||||
} else {
|
||||
let corpus_path = test_path.join("corpus.txt");
|
||||
let c_code = generate_result.unwrap();
|
||||
let parser_c_path = SCRATCH_DIR.join(&format!("{}-parser.c", test_name));
|
||||
fs::write(&parser_c_path, c_code).unwrap();
|
||||
if !fs::read_to_string(&parser_c_path)
|
||||
.map(|content| content == c_code)
|
||||
.unwrap_or(false)
|
||||
{
|
||||
fs::write(&parser_c_path, c_code).unwrap();
|
||||
}
|
||||
let scanner_path = test_path.join("scanner.c");
|
||||
let scanner_path = if scanner_path.exists() {
|
||||
Some(scanner_path)
|
||||
|
|
@ -78,6 +100,7 @@ fn test_feature_corpus_files() {
|
|||
let language = loader
|
||||
.load_language_from_sources(test_name, &HEADER_DIR, &parser_c_path, &scanner_path)
|
||||
.unwrap();
|
||||
let test = parse_tests(&corpus_path).unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue