fix(generate): return error when single state transitions have
indirectly recursive cycles.
This can cause infinite loops in the parser near EOF.
Co-authored-by: Amaan Qureshi <amaanq12@gmail.com>
(cherry picked from commit 310c0b86a7)
This commit is contained in:
parent
8e2b5ad2a4
commit
14c4d2f8ca
3 changed files with 114 additions and 1 deletions
|
|
@ -8,7 +8,7 @@ mod process_inlines;
|
|||
|
||||
use std::{
|
||||
cmp::Ordering,
|
||||
collections::{hash_map, HashMap, HashSet},
|
||||
collections::{hash_map, BTreeSet, HashMap, HashSet},
|
||||
mem,
|
||||
};
|
||||
|
||||
|
|
@ -16,6 +16,7 @@ use anyhow::Result;
|
|||
pub use expand_tokens::ExpandTokensError;
|
||||
pub use extract_tokens::ExtractTokensError;
|
||||
pub use flatten_grammar::FlattenGrammarError;
|
||||
use indexmap::IndexMap;
|
||||
pub use intern_symbols::InternSymbolsError;
|
||||
pub use process_inlines::ProcessInlinesError;
|
||||
use serde::Serialize;
|
||||
|
|
@ -80,6 +81,7 @@ pub type PrepareGrammarResult<T> = Result<T, PrepareGrammarError>;
|
|||
#[error(transparent)]
|
||||
pub enum PrepareGrammarError {
|
||||
ValidatePrecedences(#[from] ValidatePrecedenceError),
|
||||
ValidateIndirectRecursion(#[from] IndirectRecursionError),
|
||||
InternSymbols(#[from] InternSymbolsError),
|
||||
ExtractTokens(#[from] ExtractTokensError),
|
||||
FlattenGrammar(#[from] FlattenGrammarError),
|
||||
|
|
@ -96,6 +98,22 @@ pub enum ValidatePrecedenceError {
|
|||
Ordering(#[from] ConflictingPrecedenceOrderingError),
|
||||
}
|
||||
|
||||
#[derive(Debug, Error, Serialize)]
|
||||
pub struct IndirectRecursionError(pub Vec<String>);
|
||||
|
||||
impl std::fmt::Display for IndirectRecursionError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "Grammar contains an indirectly recursive rule: ")?;
|
||||
for (i, symbol) in self.0.iter().enumerate() {
|
||||
if i > 0 {
|
||||
write!(f, " -> ")?;
|
||||
}
|
||||
write!(f, "{symbol}")?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Error, Serialize)]
|
||||
pub struct UndeclaredPrecedenceError {
|
||||
pub precedence: String,
|
||||
|
|
@ -141,6 +159,7 @@ pub fn prepare_grammar(
|
|||
AliasMap,
|
||||
)> {
|
||||
validate_precedences(input_grammar)?;
|
||||
validate_indirect_recursion(input_grammar)?;
|
||||
|
||||
let interned_grammar = intern_symbols(input_grammar)?;
|
||||
let (syntax_grammar, lexical_grammar) = extract_tokens(interned_grammar)?;
|
||||
|
|
@ -152,6 +171,83 @@ pub fn prepare_grammar(
|
|||
Ok((syntax_grammar, lexical_grammar, inlines, default_aliases))
|
||||
}
|
||||
|
||||
/// Check for indirect recursion cycles in the grammar that can cause infinite loops while
|
||||
/// parsing. An indirect recursion cycle occurs when a non-terminal can derive itself through
|
||||
/// a chain of single-symbol productions (e.g., A -> B, B -> A).
|
||||
fn validate_indirect_recursion(grammar: &InputGrammar) -> Result<(), IndirectRecursionError> {
|
||||
let mut epsilon_transitions: IndexMap<&str, BTreeSet<String>> = IndexMap::new();
|
||||
|
||||
for variable in &grammar.variables {
|
||||
let productions = get_single_symbol_productions(&variable.rule);
|
||||
// Filter out rules that *directly* reference themselves, as this doesn't
|
||||
// cause a parsing loop.
|
||||
let filtered: BTreeSet<String> = productions
|
||||
.into_iter()
|
||||
.filter(|s| s != &variable.name)
|
||||
.collect();
|
||||
epsilon_transitions.insert(variable.name.as_str(), filtered);
|
||||
}
|
||||
|
||||
for start_symbol in epsilon_transitions.keys() {
|
||||
let mut visited = BTreeSet::new();
|
||||
let mut path = Vec::new();
|
||||
if let Some((start_idx, end_idx)) =
|
||||
get_cycle(start_symbol, &epsilon_transitions, &mut visited, &mut path)
|
||||
{
|
||||
let cycle_symbols = path[start_idx..=end_idx]
|
||||
.iter()
|
||||
.map(|s| (*s).to_string())
|
||||
.collect();
|
||||
return Err(IndirectRecursionError(cycle_symbols));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn get_single_symbol_productions(rule: &Rule) -> BTreeSet<String> {
|
||||
match rule {
|
||||
Rule::NamedSymbol(name) => BTreeSet::from([name.clone()]),
|
||||
Rule::Choice(choices) => choices
|
||||
.iter()
|
||||
.flat_map(get_single_symbol_productions)
|
||||
.collect(),
|
||||
Rule::Metadata { rule, .. } => get_single_symbol_productions(rule),
|
||||
_ => BTreeSet::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Perform a depth-first search to detect cycles in single state transitions.
|
||||
fn get_cycle<'a>(
|
||||
current: &'a str,
|
||||
transitions: &'a IndexMap<&'a str, BTreeSet<String>>,
|
||||
visited: &mut BTreeSet<&'a str>,
|
||||
path: &mut Vec<&'a str>,
|
||||
) -> Option<(usize, usize)> {
|
||||
if let Some(first_idx) = path.iter().position(|s| *s == current) {
|
||||
path.push(current);
|
||||
return Some((first_idx, path.len() - 1));
|
||||
}
|
||||
|
||||
if visited.contains(current) {
|
||||
return None;
|
||||
}
|
||||
|
||||
path.push(current);
|
||||
visited.insert(current);
|
||||
|
||||
if let Some(next_symbols) = transitions.get(current) {
|
||||
for next in next_symbols {
|
||||
if let Some(cycle) = get_cycle(next, transitions, visited, path) {
|
||||
return Some(cycle);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
path.pop();
|
||||
None
|
||||
}
|
||||
|
||||
/// Check that all of the named precedences used in the grammar are declared
|
||||
/// within the `precedences` lists, and also that there are no conflicting
|
||||
/// precedence orderings declared in those lists.
|
||||
|
|
|
|||
1
test/fixtures/test_grammars/indirect_recursion_in_transitions/expected_error.txt
vendored
Normal file
1
test/fixtures/test_grammars/indirect_recursion_in_transitions/expected_error.txt
vendored
Normal file
|
|
@ -0,0 +1 @@
|
|||
Grammar contains an indirectly recursive rule: type_expression -> _expression -> identifier_expression -> type_expression
|
||||
16
test/fixtures/test_grammars/indirect_recursion_in_transitions/grammar.js
vendored
Normal file
16
test/fixtures/test_grammars/indirect_recursion_in_transitions/grammar.js
vendored
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
module.exports = grammar({
|
||||
name: 'indirect_recursive_in_single_symbol_transitions',
|
||||
rules: {
|
||||
source_file: $ => repeat($._statement),
|
||||
|
||||
_statement: $ => seq($.initialization_part, $.type_expression),
|
||||
|
||||
type_expression: $ => choice('int', $._expression),
|
||||
|
||||
initialization_part: $ => seq('=', $._expression),
|
||||
|
||||
_expression: $ => choice($.identifier_expression, $.type_expression),
|
||||
|
||||
identifier_expression: $ => choice(/[a-zA-Z_][a-zA-Z0-9_]*/, $.type_expression),
|
||||
}
|
||||
});
|
||||
Loading…
Add table
Add a link
Reference in a new issue