Implement flatten_grammar
This commit is contained in:
parent
d482894c7d
commit
7acfb2b74e
8 changed files with 773 additions and 390 deletions
|
|
@ -52,7 +52,6 @@ pub(crate) struct ProductionStep {
|
|||
pub precedence: i32,
|
||||
pub associativity: Option<Associativity>,
|
||||
pub alias: Option<Alias>,
|
||||
pub is_excluded: bool,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
|
|
@ -65,6 +64,7 @@ pub(crate) struct Production {
|
|||
pub(crate) struct SyntaxVariable {
|
||||
pub name: String,
|
||||
pub kind: VariableType,
|
||||
pub productions: Vec<Production>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
|
|
@ -81,7 +81,22 @@ pub(crate) struct SyntaxGrammar {
|
|||
pub expected_conflicts: Vec<Vec<Symbol>>,
|
||||
pub external_tokens: Vec<ExternalToken>,
|
||||
pub variables_to_inline: Vec<Symbol>,
|
||||
pub word_token: Symbol,
|
||||
pub word_token: Option<Symbol>,
|
||||
}
|
||||
|
||||
impl ProductionStep {
|
||||
pub(crate) fn new(symbol: Symbol) -> Self {
|
||||
Self { symbol, precedence: 0, associativity: None, alias: None }
|
||||
}
|
||||
|
||||
pub(crate) fn with_prec(self, precedence: i32, associativity: Option<Associativity>) -> Self {
|
||||
Self {
|
||||
symbol: self.symbol,
|
||||
precedence,
|
||||
associativity,
|
||||
alias: self.alias,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Variable {
|
||||
|
|
|
|||
|
|
@ -1,16 +1,15 @@
|
|||
use crate::rules::{Rule, Symbol};
|
||||
use super::ExtractedSyntaxGrammar;
|
||||
use crate::grammars::{Variable, VariableType};
|
||||
use crate::rules::{Rule, Symbol};
|
||||
use std::collections::HashMap;
|
||||
use std::mem;
|
||||
use std::rc::Rc;
|
||||
use super::ExtractedSyntaxGrammar;
|
||||
|
||||
struct Expander {
|
||||
variable_name: String,
|
||||
repeat_count_in_variable: usize,
|
||||
preceding_symbol_count: usize,
|
||||
auxiliary_variables: Vec<Variable>,
|
||||
existing_repeats: HashMap<Rule, Symbol>
|
||||
existing_repeats: HashMap<Rule, Symbol>,
|
||||
}
|
||||
|
||||
impl Expander {
|
||||
|
|
@ -25,11 +24,19 @@ impl Expander {
|
|||
|
||||
fn expand_rule(&mut self, rule: &Rule) -> Rule {
|
||||
match rule {
|
||||
Rule::Choice(elements) =>
|
||||
Rule::Choice(elements.iter().map(|element| self.expand_rule(element)).collect()),
|
||||
Rule::Choice(elements) => Rule::Choice(
|
||||
elements
|
||||
.iter()
|
||||
.map(|element| self.expand_rule(element))
|
||||
.collect(),
|
||||
),
|
||||
|
||||
Rule::Seq(elements) =>
|
||||
Rule::Seq(elements.iter().map(|element| self.expand_rule(element)).collect()),
|
||||
Rule::Seq(elements) => Rule::Seq(
|
||||
elements
|
||||
.iter()
|
||||
.map(|element| self.expand_rule(element))
|
||||
.collect(),
|
||||
),
|
||||
|
||||
Rule::Repeat(content) => {
|
||||
let inner_rule = self.expand_rule(content);
|
||||
|
|
@ -39,9 +46,15 @@ impl Expander {
|
|||
}
|
||||
|
||||
self.repeat_count_in_variable += 1;
|
||||
let rule_name = format!("{}_repeat{}", self.variable_name, self.repeat_count_in_variable);
|
||||
let repeat_symbol = Symbol::non_terminal(self.preceding_symbol_count + self.auxiliary_variables.len());
|
||||
self.existing_repeats.insert(inner_rule.clone(), repeat_symbol);
|
||||
let rule_name = format!(
|
||||
"{}_repeat{}",
|
||||
self.variable_name, self.repeat_count_in_variable
|
||||
);
|
||||
let repeat_symbol = Symbol::non_terminal(
|
||||
self.preceding_symbol_count + self.auxiliary_variables.len(),
|
||||
);
|
||||
self.existing_repeats
|
||||
.insert(inner_rule.clone(), repeat_symbol);
|
||||
self.auxiliary_variables.push(Variable {
|
||||
name: rule_name,
|
||||
kind: VariableType::Auxiliary,
|
||||
|
|
@ -50,7 +63,7 @@ impl Expander {
|
|||
Rule::Symbol(repeat_symbol),
|
||||
Rule::Symbol(repeat_symbol),
|
||||
]),
|
||||
inner_rule
|
||||
inner_rule,
|
||||
]),
|
||||
});
|
||||
|
||||
|
|
@ -59,10 +72,10 @@ impl Expander {
|
|||
|
||||
Rule::Metadata { rule, params } => Rule::Metadata {
|
||||
rule: Box::new(self.expand_rule(rule)),
|
||||
params: params.clone()
|
||||
params: params.clone(),
|
||||
},
|
||||
|
||||
_ => rule.clone()
|
||||
_ => rule.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -80,7 +93,9 @@ pub(super) fn expand_repeats(mut grammar: ExtractedSyntaxGrammar) -> ExtractedSy
|
|||
expander.expand_variable(&mut variable);
|
||||
}
|
||||
|
||||
grammar.variables.extend(expander.auxiliary_variables.into_iter());
|
||||
grammar
|
||||
.variables
|
||||
.extend(expander.auxiliary_variables.into_iter());
|
||||
grammar
|
||||
}
|
||||
|
||||
|
|
@ -91,112 +106,126 @@ mod tests {
|
|||
#[test]
|
||||
fn test_basic_repeat_expansion() {
|
||||
// Repeats nested inside of sequences and choices are expanded.
|
||||
let grammar = expand_repeats(build_grammar(vec![
|
||||
Variable::named("rule0", Rule::seq(vec![
|
||||
let grammar = expand_repeats(build_grammar(vec![Variable::named(
|
||||
"rule0",
|
||||
Rule::seq(vec![
|
||||
Rule::terminal(10),
|
||||
Rule::choice(vec![
|
||||
Rule::repeat(Rule::terminal(11)),
|
||||
Rule::repeat(Rule::terminal(12)),
|
||||
]),
|
||||
Rule::terminal(13),
|
||||
])),
|
||||
]));
|
||||
]),
|
||||
)]));
|
||||
|
||||
assert_eq!(grammar.variables, vec![
|
||||
Variable::named("rule0", Rule::seq(vec![
|
||||
Rule::terminal(10),
|
||||
Rule::choice(vec![
|
||||
Rule::non_terminal(1),
|
||||
Rule::non_terminal(2),
|
||||
]),
|
||||
Rule::terminal(13),
|
||||
])),
|
||||
Variable::auxiliary("rule0_repeat1", Rule::choice(vec![
|
||||
Rule::seq(vec![
|
||||
Rule::non_terminal(1),
|
||||
Rule::non_terminal(1),
|
||||
]),
|
||||
Rule::terminal(11),
|
||||
])),
|
||||
Variable::auxiliary("rule0_repeat2", Rule::choice(vec![
|
||||
Rule::seq(vec![
|
||||
Rule::non_terminal(2),
|
||||
Rule::non_terminal(2),
|
||||
]),
|
||||
Rule::terminal(12),
|
||||
])),
|
||||
]);
|
||||
assert_eq!(
|
||||
grammar.variables,
|
||||
vec![
|
||||
Variable::named(
|
||||
"rule0",
|
||||
Rule::seq(vec![
|
||||
Rule::terminal(10),
|
||||
Rule::choice(vec![Rule::non_terminal(1), Rule::non_terminal(2),]),
|
||||
Rule::terminal(13),
|
||||
])
|
||||
),
|
||||
Variable::auxiliary(
|
||||
"rule0_repeat1",
|
||||
Rule::choice(vec![
|
||||
Rule::seq(vec![Rule::non_terminal(1), Rule::non_terminal(1),]),
|
||||
Rule::terminal(11),
|
||||
])
|
||||
),
|
||||
Variable::auxiliary(
|
||||
"rule0_repeat2",
|
||||
Rule::choice(vec![
|
||||
Rule::seq(vec![Rule::non_terminal(2), Rule::non_terminal(2),]),
|
||||
Rule::terminal(12),
|
||||
])
|
||||
),
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_repeat_deduplication() {
|
||||
// Terminal 4 appears inside of a repeat in three different places.
|
||||
let grammar = expand_repeats(build_grammar(vec![
|
||||
Variable::named("rule0", Rule::choice(vec![
|
||||
Rule::seq(vec![ Rule::terminal(1), Rule::repeat(Rule::terminal(4)) ]),
|
||||
Rule::seq(vec![ Rule::terminal(2), Rule::repeat(Rule::terminal(4)) ]),
|
||||
])),
|
||||
Variable::named("rule1", Rule::seq(vec![
|
||||
Rule::terminal(3),
|
||||
Rule::repeat(Rule::terminal(4)),
|
||||
])),
|
||||
Variable::named(
|
||||
"rule0",
|
||||
Rule::choice(vec![
|
||||
Rule::seq(vec![Rule::terminal(1), Rule::repeat(Rule::terminal(4))]),
|
||||
Rule::seq(vec![Rule::terminal(2), Rule::repeat(Rule::terminal(4))]),
|
||||
]),
|
||||
),
|
||||
Variable::named(
|
||||
"rule1",
|
||||
Rule::seq(vec![Rule::terminal(3), Rule::repeat(Rule::terminal(4))]),
|
||||
),
|
||||
]));
|
||||
|
||||
// Only one auxiliary rule is created for repeating terminal 4.
|
||||
assert_eq!(grammar.variables, vec![
|
||||
Variable::named("rule0", Rule::choice(vec![
|
||||
Rule::seq(vec![ Rule::terminal(1), Rule::non_terminal(2) ]),
|
||||
Rule::seq(vec![ Rule::terminal(2), Rule::non_terminal(2) ]),
|
||||
])),
|
||||
Variable::named("rule1", Rule::seq(vec![
|
||||
Rule::terminal(3),
|
||||
Rule::non_terminal(2),
|
||||
])),
|
||||
Variable::auxiliary("rule0_repeat1", Rule::choice(vec![
|
||||
Rule::seq(vec![
|
||||
Rule::non_terminal(2),
|
||||
Rule::non_terminal(2),
|
||||
]),
|
||||
Rule::terminal(4),
|
||||
]))
|
||||
]);
|
||||
assert_eq!(
|
||||
grammar.variables,
|
||||
vec![
|
||||
Variable::named(
|
||||
"rule0",
|
||||
Rule::choice(vec![
|
||||
Rule::seq(vec![Rule::terminal(1), Rule::non_terminal(2)]),
|
||||
Rule::seq(vec![Rule::terminal(2), Rule::non_terminal(2)]),
|
||||
])
|
||||
),
|
||||
Variable::named(
|
||||
"rule1",
|
||||
Rule::seq(vec![Rule::terminal(3), Rule::non_terminal(2),])
|
||||
),
|
||||
Variable::auxiliary(
|
||||
"rule0_repeat1",
|
||||
Rule::choice(vec![
|
||||
Rule::seq(vec![Rule::non_terminal(2), Rule::non_terminal(2),]),
|
||||
Rule::terminal(4),
|
||||
])
|
||||
)
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_expansion_of_nested_repeats() {
|
||||
let grammar = expand_repeats(build_grammar(vec![
|
||||
Variable::named("rule0", Rule::seq(vec![
|
||||
let grammar = expand_repeats(build_grammar(vec![Variable::named(
|
||||
"rule0",
|
||||
Rule::seq(vec![
|
||||
Rule::terminal(10),
|
||||
Rule::repeat(Rule::seq(vec![
|
||||
Rule::terminal(11),
|
||||
Rule::repeat(Rule::terminal(12))
|
||||
Rule::repeat(Rule::terminal(12)),
|
||||
])),
|
||||
])),
|
||||
]));
|
||||
]),
|
||||
)]));
|
||||
|
||||
assert_eq!(grammar.variables, vec![
|
||||
Variable::named("rule0", Rule::seq(vec![
|
||||
Rule::terminal(10),
|
||||
Rule::non_terminal(2),
|
||||
])),
|
||||
Variable::auxiliary("rule0_repeat1", Rule::choice(vec![
|
||||
Rule::seq(vec![
|
||||
Rule::non_terminal(1),
|
||||
Rule::non_terminal(1),
|
||||
]),
|
||||
Rule::terminal(12),
|
||||
])),
|
||||
Variable::auxiliary("rule0_repeat2", Rule::choice(vec![
|
||||
Rule::seq(vec![
|
||||
Rule::non_terminal(2),
|
||||
Rule::non_terminal(2),
|
||||
]),
|
||||
Rule::seq(vec![
|
||||
Rule::terminal(11),
|
||||
Rule::non_terminal(1),
|
||||
]),
|
||||
])),
|
||||
]);
|
||||
assert_eq!(
|
||||
grammar.variables,
|
||||
vec![
|
||||
Variable::named(
|
||||
"rule0",
|
||||
Rule::seq(vec![Rule::terminal(10), Rule::non_terminal(2),])
|
||||
),
|
||||
Variable::auxiliary(
|
||||
"rule0_repeat1",
|
||||
Rule::choice(vec![
|
||||
Rule::seq(vec![Rule::non_terminal(1), Rule::non_terminal(1),]),
|
||||
Rule::terminal(12),
|
||||
])
|
||||
),
|
||||
Variable::auxiliary(
|
||||
"rule0_repeat2",
|
||||
Rule::choice(vec![
|
||||
Rule::seq(vec![Rule::non_terminal(2), Rule::non_terminal(2),]),
|
||||
Rule::seq(vec![Rule::terminal(11), Rule::non_terminal(1),]),
|
||||
])
|
||||
),
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
fn build_grammar(variables: Vec<Variable>) -> ExtractedSyntaxGrammar {
|
||||
|
|
|
|||
|
|
@ -1,14 +1,13 @@
|
|||
use super::ExtractedLexicalGrammar;
|
||||
use crate::error::{Error, Result};
|
||||
use crate::rules::Rule;
|
||||
use crate::grammars::{LexicalGrammar, LexicalVariable};
|
||||
use crate::nfa::{Nfa, NfaState, CharacterSet};
|
||||
use super::{ExtractedLexicalGrammar};
|
||||
use crate::nfa::{CharacterSet, Nfa, NfaState};
|
||||
use crate::rules::Rule;
|
||||
use regex_syntax::ast::{parse, Ast, Class, ClassPerlKind, ClassSet, ClassSetItem, RepetitionKind};
|
||||
|
||||
fn expand_perl_character_class(item: &ClassPerlKind) -> CharacterSet {
|
||||
match item {
|
||||
ClassPerlKind::Digit => CharacterSet::empty()
|
||||
.add_range('0', '9'),
|
||||
ClassPerlKind::Digit => CharacterSet::empty().add_range('0', '9'),
|
||||
ClassPerlKind::Space => CharacterSet::empty()
|
||||
.add_char(' ')
|
||||
.add_char('\t')
|
||||
|
|
@ -18,7 +17,7 @@ fn expand_perl_character_class(item: &ClassPerlKind) -> CharacterSet {
|
|||
.add_char('_')
|
||||
.add_range('A', 'Z')
|
||||
.add_range('a', 'z')
|
||||
.add_range('0', '9')
|
||||
.add_range('0', '9'),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -26,7 +25,9 @@ fn expand_character_class(item: &ClassSetItem) -> Result<CharacterSet> {
|
|||
match item {
|
||||
ClassSetItem::Empty(_) => Ok(CharacterSet::Include(Vec::new())),
|
||||
ClassSetItem::Literal(literal) => Ok(CharacterSet::Include(vec![literal.c])),
|
||||
ClassSetItem::Range(range) => Ok(CharacterSet::empty().add_range(range.start.c, range.end.c)),
|
||||
ClassSetItem::Range(range) => {
|
||||
Ok(CharacterSet::empty().add_range(range.start.c, range.end.c))
|
||||
}
|
||||
ClassSetItem::Union(union) => {
|
||||
let mut result = CharacterSet::empty();
|
||||
for item in &union.items {
|
||||
|
|
@ -43,58 +44,64 @@ fn expand_regex(ast: &Ast, nfa: &mut Nfa, mut next_state_index: u32) -> Result<(
|
|||
Ast::Empty(_) => Ok(()),
|
||||
Ast::Flags(_) => Err(Error::regex("Flags are not supported")),
|
||||
Ast::Literal(literal) => {
|
||||
nfa.states.push(NfaState::Advance(CharacterSet::Include(vec![literal.c]), next_state_index));
|
||||
nfa.states.push(NfaState::Advance(
|
||||
CharacterSet::Include(vec![literal.c]),
|
||||
next_state_index,
|
||||
));
|
||||
Ok(())
|
||||
},
|
||||
}
|
||||
Ast::Dot(_) => {
|
||||
nfa.states.push(NfaState::Advance(CharacterSet::Exclude(vec!['\n']), next_state_index));
|
||||
nfa.states.push(NfaState::Advance(
|
||||
CharacterSet::Exclude(vec!['\n']),
|
||||
next_state_index,
|
||||
));
|
||||
Ok(())
|
||||
},
|
||||
}
|
||||
Ast::Assertion(_) => Err(Error::regex("Assertions are not supported")),
|
||||
Ast::Class(class) => match class {
|
||||
Class::Unicode(_) => Err(Error::regex("Unicode character classes are not supported")),
|
||||
Class::Perl(class) => {
|
||||
nfa.states.push(NfaState::Advance(expand_perl_character_class(&class.kind), next_state_index));
|
||||
nfa.states.push(NfaState::Advance(
|
||||
expand_perl_character_class(&class.kind),
|
||||
next_state_index,
|
||||
));
|
||||
Ok(())
|
||||
},
|
||||
}
|
||||
Class::Bracketed(class) => match &class.kind {
|
||||
ClassSet::Item(item) => {
|
||||
let character_set = expand_character_class(&item)?;
|
||||
nfa.states.push(NfaState::Advance(character_set, next_state_index));
|
||||
nfa.states
|
||||
.push(NfaState::Advance(character_set, next_state_index));
|
||||
Ok(())
|
||||
},
|
||||
ClassSet::BinaryOp(_) => {
|
||||
Err(Error::regex("Binary operators in character classes aren't supported"))
|
||||
}
|
||||
}
|
||||
ClassSet::BinaryOp(_) => Err(Error::regex(
|
||||
"Binary operators in character classes aren't supported",
|
||||
)),
|
||||
},
|
||||
},
|
||||
Ast::Repetition(repetition) => match repetition.op.kind {
|
||||
RepetitionKind::ZeroOrOne => {
|
||||
expand_regex(&repetition.ast, nfa, next_state_index)?;
|
||||
nfa.prepend(|start_index| NfaState::Split(next_state_index, start_index));
|
||||
Ok(())
|
||||
},
|
||||
}
|
||||
RepetitionKind::OneOrMore => {
|
||||
nfa.states.push(NfaState::Accept); // Placeholder for split
|
||||
let split_index = nfa.start_index();
|
||||
expand_regex(&repetition.ast, nfa, split_index)?;
|
||||
nfa.states[split_index as usize] = NfaState::Split(
|
||||
nfa.start_index(),
|
||||
next_state_index
|
||||
);
|
||||
nfa.states[split_index as usize] =
|
||||
NfaState::Split(nfa.start_index(), next_state_index);
|
||||
Ok(())
|
||||
},
|
||||
}
|
||||
RepetitionKind::ZeroOrMore => {
|
||||
nfa.states.push(NfaState::Accept); // Placeholder for split
|
||||
let split_index = nfa.start_index();
|
||||
expand_regex(&repetition.ast, nfa, split_index)?;
|
||||
nfa.states[split_index as usize] = NfaState::Split(
|
||||
nfa.start_index(),
|
||||
next_state_index
|
||||
);
|
||||
nfa.states[split_index as usize] =
|
||||
NfaState::Split(nfa.start_index(), next_state_index);
|
||||
nfa.prepend(|start_index| NfaState::Split(start_index, next_state_index));
|
||||
Ok(())
|
||||
},
|
||||
}
|
||||
RepetitionKind::Range(_) => unimplemented!(),
|
||||
},
|
||||
Ast::Group(group) => expand_regex(&group.ast, nfa, nfa.start_index()),
|
||||
|
|
@ -109,7 +116,7 @@ fn expand_regex(ast: &Ast, nfa: &mut Nfa, mut next_state_index: u32) -> Result<(
|
|||
nfa.prepend(|start_index| NfaState::Split(start_index, alternative_start_index));
|
||||
}
|
||||
Ok(())
|
||||
},
|
||||
}
|
||||
Ast::Concat(concat) => {
|
||||
for ast in concat.asts.iter().rev() {
|
||||
expand_regex(&ast, nfa, next_state_index)?;
|
||||
|
|
@ -123,16 +130,20 @@ fn expand_regex(ast: &Ast, nfa: &mut Nfa, mut next_state_index: u32) -> Result<(
|
|||
fn expand_rule(rule: Rule, nfa: &mut Nfa, mut next_state_index: u32) -> Result<()> {
|
||||
match rule {
|
||||
Rule::Pattern(s) => {
|
||||
let ast = parse::Parser::new().parse(&s).map_err(|e| Error::GrammarError(e.to_string()))?;
|
||||
let ast = parse::Parser::new()
|
||||
.parse(&s)
|
||||
.map_err(|e| Error::GrammarError(e.to_string()))?;
|
||||
expand_regex(&ast, nfa, next_state_index)?;
|
||||
Ok(())
|
||||
},
|
||||
}
|
||||
Rule::String(s) => {
|
||||
for c in s.chars().rev() {
|
||||
nfa.prepend(|start_index| NfaState::Advance(CharacterSet::empty().add_char(c), start_index));
|
||||
nfa.prepend(|start_index| {
|
||||
NfaState::Advance(CharacterSet::empty().add_char(c), start_index)
|
||||
});
|
||||
}
|
||||
Ok(())
|
||||
},
|
||||
}
|
||||
Rule::Choice(elements) => {
|
||||
let mut alternative_start_indices = Vec::new();
|
||||
for element in elements {
|
||||
|
|
@ -144,24 +155,21 @@ fn expand_rule(rule: Rule, nfa: &mut Nfa, mut next_state_index: u32) -> Result<(
|
|||
nfa.prepend(|start_index| NfaState::Split(start_index, alternative_start_index));
|
||||
}
|
||||
Ok(())
|
||||
},
|
||||
}
|
||||
Rule::Seq(elements) => {
|
||||
for element in elements.into_iter().rev() {
|
||||
expand_rule(element, nfa, next_state_index)?;
|
||||
next_state_index = nfa.start_index();
|
||||
}
|
||||
Ok(())
|
||||
},
|
||||
}
|
||||
Rule::Repeat(rule) => {
|
||||
nfa.states.push(NfaState::Accept); // Placeholder for split
|
||||
let split_index = nfa.start_index();
|
||||
expand_rule(*rule, nfa, split_index)?;
|
||||
nfa.states[split_index as usize] = NfaState::Split(
|
||||
nfa.start_index(),
|
||||
next_state_index
|
||||
);
|
||||
nfa.states[split_index as usize] = NfaState::Split(nfa.start_index(), next_state_index);
|
||||
Ok(())
|
||||
},
|
||||
}
|
||||
_ => Err(Error::grammar("Unexpected rule type")),
|
||||
}
|
||||
}
|
||||
|
|
@ -184,7 +192,10 @@ pub(super) fn expand_tokens(grammar: ExtractedLexicalGrammar) -> Result<LexicalG
|
|||
separators.push(nfa);
|
||||
}
|
||||
|
||||
Ok(LexicalGrammar { variables, separators })
|
||||
Ok(LexicalGrammar {
|
||||
variables,
|
||||
separators,
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
@ -237,10 +248,7 @@ mod tests {
|
|||
},
|
||||
Row {
|
||||
rule: Rule::pattern("a*"),
|
||||
examples: vec![
|
||||
("aaa1", Some("aaa")),
|
||||
("b", Some("")),
|
||||
],
|
||||
examples: vec![("aaa1", Some("aaa")), ("b", Some(""))],
|
||||
},
|
||||
Row {
|
||||
rule: Rule::pattern("a((bc)+|(de)*)f"),
|
||||
|
|
@ -253,22 +261,15 @@ mod tests {
|
|||
},
|
||||
Row {
|
||||
rule: Rule::pattern("[a-fA-F0-9]+"),
|
||||
examples: vec![
|
||||
("A1ff0", Some("A1ff")),
|
||||
],
|
||||
examples: vec![("A1ff0", Some("A1ff"))],
|
||||
},
|
||||
Row {
|
||||
rule: Rule::pattern("\\w\\d\\s"),
|
||||
examples: vec![
|
||||
("_0 ", Some("_0 ")),
|
||||
],
|
||||
examples: vec![("_0 ", Some("_0 "))],
|
||||
},
|
||||
Row {
|
||||
rule: Rule::string("abc"),
|
||||
examples: vec![
|
||||
("abcd", Some("abc")),
|
||||
("ab", None),
|
||||
],
|
||||
examples: vec![("abcd", Some("abc")), ("ab", None)],
|
||||
},
|
||||
Row {
|
||||
rule: Rule::repeat(Rule::seq(vec![
|
||||
|
|
@ -276,11 +277,7 @@ mod tests {
|
|||
Rule::pattern("[a-f]+"),
|
||||
Rule::string("}"),
|
||||
])),
|
||||
examples: vec![
|
||||
("{a}{", Some("{a}")),
|
||||
("{a}{d", Some("{a}")),
|
||||
("ab", None),
|
||||
],
|
||||
examples: vec![("{a}{", Some("{a}")), ("{a}{d", Some("{a}")), ("ab", None)],
|
||||
},
|
||||
];
|
||||
|
||||
|
|
|
|||
|
|
@ -1,13 +1,12 @@
|
|||
use std::collections::HashMap;
|
||||
use std::rc::Rc;
|
||||
use std::mem;
|
||||
use super::{ExtractedLexicalGrammar, ExtractedSyntaxGrammar, InternedGrammar};
|
||||
use crate::error::{Error, Result};
|
||||
use crate::rules::{Rule, MetadataParams, Symbol, SymbolType};
|
||||
use crate::grammars::{Variable, ExternalToken};
|
||||
use super::{InternedGrammar, ExtractedSyntaxGrammar, ExtractedLexicalGrammar};
|
||||
use crate::grammars::{ExternalToken, Variable};
|
||||
use crate::rules::{MetadataParams, Rule, Symbol, SymbolType};
|
||||
use std::collections::HashMap;
|
||||
use std::mem;
|
||||
|
||||
pub(super) fn extract_tokens(
|
||||
mut grammar: InternedGrammar
|
||||
mut grammar: InternedGrammar,
|
||||
) -> Result<(ExtractedSyntaxGrammar, ExtractedLexicalGrammar)> {
|
||||
let mut extractor = TokenExtractor {
|
||||
current_variable_name: String::new(),
|
||||
|
|
@ -40,9 +39,15 @@ pub(super) fn extract_tokens(
|
|||
// variable in the lexical grammar. Symbols that pointed to later variables
|
||||
// will need to have their indices decremented.
|
||||
let mut variables = Vec::new();
|
||||
let mut symbol_replacer = SymbolReplacer { replacements: HashMap::new() };
|
||||
let mut symbol_replacer = SymbolReplacer {
|
||||
replacements: HashMap::new(),
|
||||
};
|
||||
for (i, variable) in grammar.variables.into_iter().enumerate() {
|
||||
if let Rule::Symbol(Symbol { kind: SymbolType::Terminal, index }) = variable.rule {
|
||||
if let Rule::Symbol(Symbol {
|
||||
kind: SymbolType::Terminal,
|
||||
index,
|
||||
}) = variable.rule
|
||||
{
|
||||
if i > 0 && extractor.extracted_usage_counts[index] == 1 {
|
||||
let mut lexical_variable = &mut lexical_variables[index];
|
||||
lexical_variable.kind = variable.kind;
|
||||
|
|
@ -58,16 +63,19 @@ pub(super) fn extract_tokens(
|
|||
variable.rule = symbol_replacer.replace_symbols_in_rule(&variable.rule);
|
||||
}
|
||||
|
||||
let expected_conflicts = grammar.expected_conflicts
|
||||
let expected_conflicts = grammar
|
||||
.expected_conflicts
|
||||
.into_iter()
|
||||
.map(|conflict|
|
||||
.map(|conflict| {
|
||||
conflict
|
||||
.iter()
|
||||
.map(|symbol| symbol_replacer.replace_symbol(*symbol))
|
||||
.collect()
|
||||
).collect();
|
||||
})
|
||||
.collect();
|
||||
|
||||
let variables_to_inline = grammar.variables_to_inline
|
||||
let variables_to_inline = grammar
|
||||
.variables_to_inline
|
||||
.into_iter()
|
||||
.map(|symbol| symbol_replacer.replace_symbol(symbol))
|
||||
.collect();
|
||||
|
|
@ -149,7 +157,7 @@ pub(super) fn extract_tokens(
|
|||
ExtractedLexicalGrammar {
|
||||
variables: lexical_variables,
|
||||
separators,
|
||||
}
|
||||
},
|
||||
))
|
||||
}
|
||||
|
||||
|
|
@ -161,7 +169,7 @@ struct TokenExtractor {
|
|||
}
|
||||
|
||||
struct SymbolReplacer {
|
||||
replacements: HashMap<usize, usize>
|
||||
replacements: HashMap<usize, usize>,
|
||||
}
|
||||
|
||||
impl TokenExtractor {
|
||||
|
|
@ -198,20 +206,24 @@ impl TokenExtractor {
|
|||
} else {
|
||||
Rule::Metadata {
|
||||
params: params.clone(),
|
||||
rule: Box::new(self.extract_tokens_in_rule((&rule).clone()))
|
||||
rule: Box::new(self.extract_tokens_in_rule((&rule).clone())),
|
||||
}
|
||||
}
|
||||
},
|
||||
Rule::Repeat(content) => Rule::Repeat(
|
||||
Box::new(self.extract_tokens_in_rule(content))
|
||||
),
|
||||
}
|
||||
Rule::Repeat(content) => Rule::Repeat(Box::new(self.extract_tokens_in_rule(content))),
|
||||
Rule::Seq(elements) => Rule::Seq(
|
||||
elements.iter().map(|e| self.extract_tokens_in_rule(e)).collect()
|
||||
elements
|
||||
.iter()
|
||||
.map(|e| self.extract_tokens_in_rule(e))
|
||||
.collect(),
|
||||
),
|
||||
Rule::Choice(elements) => Rule::Choice(
|
||||
elements.iter().map(|e| self.extract_tokens_in_rule(e)).collect()
|
||||
elements
|
||||
.iter()
|
||||
.map(|e| self.extract_tokens_in_rule(e))
|
||||
.collect(),
|
||||
),
|
||||
_ => input.clone()
|
||||
_ => input.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -219,7 +231,7 @@ impl TokenExtractor {
|
|||
for (i, variable) in self.extracted_variables.iter_mut().enumerate() {
|
||||
if variable.rule == *rule {
|
||||
self.extracted_usage_counts[i] += 1;
|
||||
return Symbol::terminal(i)
|
||||
return Symbol::terminal(i);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -231,10 +243,9 @@ impl TokenExtractor {
|
|||
Variable::auxiliary(
|
||||
&format!(
|
||||
"{}_token{}",
|
||||
&self.current_variable_name,
|
||||
self.current_variable_token_count
|
||||
&self.current_variable_name, self.current_variable_token_count
|
||||
),
|
||||
rule.clone()
|
||||
rule.clone(),
|
||||
)
|
||||
};
|
||||
|
||||
|
|
@ -249,25 +260,29 @@ impl SymbolReplacer {
|
|||
match rule {
|
||||
Rule::Symbol(symbol) => self.replace_symbol(*symbol).into(),
|
||||
Rule::Choice(elements) => Rule::Choice(
|
||||
elements.iter().map(|e| self.replace_symbols_in_rule(e)).collect()
|
||||
elements
|
||||
.iter()
|
||||
.map(|e| self.replace_symbols_in_rule(e))
|
||||
.collect(),
|
||||
),
|
||||
Rule::Seq(elements) => Rule::Seq(
|
||||
elements.iter().map(|e| self.replace_symbols_in_rule(e)).collect()
|
||||
),
|
||||
Rule::Repeat(content) => Rule::Repeat(
|
||||
Box::new(self.replace_symbols_in_rule(content))
|
||||
elements
|
||||
.iter()
|
||||
.map(|e| self.replace_symbols_in_rule(e))
|
||||
.collect(),
|
||||
),
|
||||
Rule::Repeat(content) => Rule::Repeat(Box::new(self.replace_symbols_in_rule(content))),
|
||||
Rule::Metadata { rule, params } => Rule::Metadata {
|
||||
params: params.clone(),
|
||||
rule: Box::new(self.replace_symbols_in_rule(rule)),
|
||||
},
|
||||
_ => rule.clone()
|
||||
_ => rule.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
fn replace_symbol(&self, symbol: Symbol) -> Symbol {
|
||||
if !symbol.is_non_terminal() {
|
||||
return symbol
|
||||
return symbol;
|
||||
}
|
||||
|
||||
if let Some(replacement) = self.replacements.get(&symbol.index) {
|
||||
|
|
@ -293,81 +308,95 @@ mod test {
|
|||
#[test]
|
||||
fn test_extraction() {
|
||||
let (syntax_grammar, lexical_grammar) = extract_tokens(build_grammar(vec![
|
||||
Variable::named("rule_0", Rule::repeat(Rule::seq(vec![
|
||||
Rule::string("a"),
|
||||
Rule::pattern("b"),
|
||||
Rule::choice(vec![
|
||||
Rule::non_terminal(1),
|
||||
Rule::non_terminal(2),
|
||||
Rule::token(Rule::repeat(Rule::choice(vec![
|
||||
Rule::string("c"),
|
||||
Rule::string("d"),
|
||||
])))
|
||||
])
|
||||
]))),
|
||||
Variable::named(
|
||||
"rule_0",
|
||||
Rule::repeat(Rule::seq(vec![
|
||||
Rule::string("a"),
|
||||
Rule::pattern("b"),
|
||||
Rule::choice(vec![
|
||||
Rule::non_terminal(1),
|
||||
Rule::non_terminal(2),
|
||||
Rule::token(Rule::repeat(Rule::choice(vec![
|
||||
Rule::string("c"),
|
||||
Rule::string("d"),
|
||||
]))),
|
||||
]),
|
||||
])),
|
||||
),
|
||||
Variable::named("rule_1", Rule::pattern("e")),
|
||||
Variable::named("rule_2", Rule::pattern("b")),
|
||||
Variable::named("rule_3", Rule::seq(vec![
|
||||
Rule::non_terminal(2),
|
||||
Rule::Blank,
|
||||
])),
|
||||
])).unwrap();
|
||||
Variable::named(
|
||||
"rule_3",
|
||||
Rule::seq(vec![Rule::non_terminal(2), Rule::Blank]),
|
||||
),
|
||||
]))
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(syntax_grammar.variables, vec![
|
||||
Variable::named("rule_0", Rule::repeat(Rule::seq(vec![
|
||||
// The string "a" was replaced by a symbol referencing the lexical grammar
|
||||
Rule::terminal(0),
|
||||
assert_eq!(
|
||||
syntax_grammar.variables,
|
||||
vec![
|
||||
Variable::named(
|
||||
"rule_0",
|
||||
Rule::repeat(Rule::seq(vec![
|
||||
// The string "a" was replaced by a symbol referencing the lexical grammar
|
||||
Rule::terminal(0),
|
||||
// The pattern "b" was replaced by a symbol referencing the lexical grammar
|
||||
Rule::terminal(1),
|
||||
Rule::choice(vec![
|
||||
// The symbol referencing `rule_1` was replaced by a symbol referencing
|
||||
// the lexical grammar.
|
||||
Rule::terminal(3),
|
||||
// The symbol referencing `rule_2` had its index decremented because
|
||||
// `rule_1` was moved to the lexical grammar.
|
||||
Rule::non_terminal(1),
|
||||
// The rule wrapped in `token` was replaced by a symbol referencing
|
||||
// the lexical grammar.
|
||||
Rule::terminal(2),
|
||||
])
|
||||
]))
|
||||
),
|
||||
// The pattern "e" was only used in once place: as the definition of `rule_1`,
|
||||
// so that rule was moved to the lexical grammar. The pattern "b" appeared in
|
||||
// two places, so it was not moved into the lexical grammar.
|
||||
Variable::named("rule_2", Rule::terminal(1)),
|
||||
Variable::named(
|
||||
"rule_3",
|
||||
Rule::seq(vec![Rule::non_terminal(1), Rule::Blank,])
|
||||
),
|
||||
]
|
||||
);
|
||||
|
||||
// The pattern "b" was replaced by a symbol referencing the lexical grammar
|
||||
Rule::terminal(1),
|
||||
Rule::choice(vec![
|
||||
// The symbol referencing `rule_1` was replaced by a symbol referencing
|
||||
// the lexical grammar.
|
||||
Rule::terminal(3),
|
||||
|
||||
// The symbol referencing `rule_2` had its index decremented because
|
||||
// `rule_1` was moved to the lexical grammar.
|
||||
Rule::non_terminal(1),
|
||||
|
||||
// The rule wrapped in `token` was replaced by a symbol referencing
|
||||
// the lexical grammar.
|
||||
Rule::terminal(2),
|
||||
])
|
||||
]))),
|
||||
|
||||
// The pattern "e" was only used in once place: as the definition of `rule_1`,
|
||||
// so that rule was moved to the lexical grammar. The pattern "b" appeared in
|
||||
// two places, so it was not moved into the lexical grammar.
|
||||
Variable::named("rule_2", Rule::terminal(1)),
|
||||
Variable::named("rule_3", Rule::seq(vec![
|
||||
Rule::non_terminal(1),
|
||||
Rule::Blank,
|
||||
])),
|
||||
]);
|
||||
|
||||
assert_eq!(lexical_grammar.variables, vec![
|
||||
Variable::anonymous("a", Rule::string("a")),
|
||||
Variable::auxiliary("rule_0_token1", Rule::pattern("b")),
|
||||
Variable::auxiliary("rule_0_token2", Rule::repeat(Rule::choice(vec![
|
||||
Rule::string("c"),
|
||||
Rule::string("d"),
|
||||
]))),
|
||||
Variable::named("rule_1", Rule::pattern("e")),
|
||||
]);
|
||||
assert_eq!(
|
||||
lexical_grammar.variables,
|
||||
vec![
|
||||
Variable::anonymous("a", Rule::string("a")),
|
||||
Variable::auxiliary("rule_0_token1", Rule::pattern("b")),
|
||||
Variable::auxiliary(
|
||||
"rule_0_token2",
|
||||
Rule::repeat(Rule::choice(vec![Rule::string("c"), Rule::string("d"),]))
|
||||
),
|
||||
Variable::named("rule_1", Rule::pattern("e")),
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_start_rule_is_token() {
|
||||
let (syntax_grammar, lexical_grammar) = extract_tokens(build_grammar(vec![
|
||||
Variable::named("rule_0", Rule::string("hello")),
|
||||
])).unwrap();
|
||||
let (syntax_grammar, lexical_grammar) =
|
||||
extract_tokens(build_grammar(vec![Variable::named(
|
||||
"rule_0",
|
||||
Rule::string("hello"),
|
||||
)]))
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(syntax_grammar.variables, vec![
|
||||
Variable::named("rule_0", Rule::terminal(0)),
|
||||
]);
|
||||
assert_eq!(lexical_grammar.variables, vec![
|
||||
Variable::anonymous("hello", Rule::string("hello")),
|
||||
])
|
||||
assert_eq!(
|
||||
syntax_grammar.variables,
|
||||
vec![Variable::named("rule_0", Rule::terminal(0)),]
|
||||
);
|
||||
assert_eq!(
|
||||
lexical_grammar.variables,
|
||||
vec![Variable::anonymous("hello", Rule::string("hello")),]
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -376,29 +405,25 @@ mod test {
|
|||
Variable::named("rule_0", Rule::string("x")),
|
||||
Variable::named("comment", Rule::pattern("//.*")),
|
||||
]);
|
||||
grammar.extra_tokens = vec![
|
||||
Rule::string(" "),
|
||||
Rule::non_terminal(1),
|
||||
];
|
||||
grammar.extra_tokens = vec![Rule::string(" "), Rule::non_terminal(1)];
|
||||
|
||||
let (syntax_grammar, lexical_grammar) = extract_tokens(grammar).unwrap();
|
||||
assert_eq!(syntax_grammar.extra_tokens, vec![
|
||||
Symbol::terminal(1),
|
||||
]);
|
||||
assert_eq!(lexical_grammar.separators, vec![
|
||||
Rule::string(" "),
|
||||
]);
|
||||
assert_eq!(syntax_grammar.extra_tokens, vec![Symbol::terminal(1),]);
|
||||
assert_eq!(lexical_grammar.separators, vec![Rule::string(" "),]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_externals() {
|
||||
let mut grammar = build_grammar(vec![
|
||||
Variable::named("rule_0", Rule::seq(vec![
|
||||
Rule::external(0),
|
||||
Rule::string("a"),
|
||||
Rule::non_terminal(1),
|
||||
Rule::non_terminal(2),
|
||||
])),
|
||||
Variable::named(
|
||||
"rule_0",
|
||||
Rule::seq(vec![
|
||||
Rule::external(0),
|
||||
Rule::string("a"),
|
||||
Rule::non_terminal(1),
|
||||
Rule::non_terminal(2),
|
||||
]),
|
||||
),
|
||||
Variable::named("rule_1", Rule::string("b")),
|
||||
Variable::named("rule_2", Rule::string("c")),
|
||||
]);
|
||||
|
|
@ -410,23 +435,26 @@ mod test {
|
|||
|
||||
let (syntax_grammar, _) = extract_tokens(grammar).unwrap();
|
||||
|
||||
assert_eq!(syntax_grammar.external_tokens, vec![
|
||||
ExternalToken {
|
||||
name: "external_0".to_string(),
|
||||
kind: VariableType::Named,
|
||||
corresponding_internal_token: None,
|
||||
},
|
||||
ExternalToken {
|
||||
name: "a".to_string(),
|
||||
kind: VariableType::Anonymous,
|
||||
corresponding_internal_token: Some(Symbol::terminal(0)),
|
||||
},
|
||||
ExternalToken {
|
||||
name: "rule_2".to_string(),
|
||||
kind: VariableType::Named,
|
||||
corresponding_internal_token: Some(Symbol::terminal(2)),
|
||||
},
|
||||
]);
|
||||
assert_eq!(
|
||||
syntax_grammar.external_tokens,
|
||||
vec![
|
||||
ExternalToken {
|
||||
name: "external_0".to_string(),
|
||||
kind: VariableType::Named,
|
||||
corresponding_internal_token: None,
|
||||
},
|
||||
ExternalToken {
|
||||
name: "a".to_string(),
|
||||
kind: VariableType::Anonymous,
|
||||
corresponding_internal_token: Some(Symbol::terminal(0)),
|
||||
},
|
||||
ExternalToken {
|
||||
name: "rule_2".to_string(),
|
||||
kind: VariableType::Named,
|
||||
corresponding_internal_token: Some(Symbol::terminal(2)),
|
||||
},
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -436,14 +464,15 @@ mod test {
|
|||
Variable::named("rule_1", Rule::non_terminal(2)),
|
||||
Variable::named("rule_2", Rule::string("x")),
|
||||
]);
|
||||
grammar.extra_tokens = vec![
|
||||
Rule::non_terminal(1),
|
||||
];
|
||||
grammar.extra_tokens = vec![Rule::non_terminal(1)];
|
||||
|
||||
match extract_tokens(grammar) {
|
||||
Err(Error::GrammarError(s)) => {
|
||||
assert_eq!(s, "Non-token symbol 'rule_1' cannot be used as an extra token");
|
||||
},
|
||||
assert_eq!(
|
||||
s,
|
||||
"Non-token symbol 'rule_1' cannot be used as an extra token"
|
||||
);
|
||||
}
|
||||
_ => {
|
||||
panic!("Expected an error but got no error");
|
||||
}
|
||||
|
|
@ -453,24 +482,22 @@ mod test {
|
|||
#[test]
|
||||
fn test_error_on_external_with_same_name_as_non_terminal() {
|
||||
let mut grammar = build_grammar(vec![
|
||||
Variable::named("rule_0", Rule::seq(vec![
|
||||
Rule::non_terminal(1),
|
||||
Rule::non_terminal(2),
|
||||
])),
|
||||
Variable::named("rule_1", Rule::seq(vec![
|
||||
Rule::non_terminal(2),
|
||||
Rule::non_terminal(2),
|
||||
])),
|
||||
Variable::named(
|
||||
"rule_0",
|
||||
Rule::seq(vec![Rule::non_terminal(1), Rule::non_terminal(2)]),
|
||||
),
|
||||
Variable::named(
|
||||
"rule_1",
|
||||
Rule::seq(vec![Rule::non_terminal(2), Rule::non_terminal(2)]),
|
||||
),
|
||||
Variable::named("rule_2", Rule::string("a")),
|
||||
]);
|
||||
grammar.external_tokens = vec![
|
||||
Variable::named("rule_1", Rule::non_terminal(1)),
|
||||
];
|
||||
grammar.external_tokens = vec![Variable::named("rule_1", Rule::non_terminal(1))];
|
||||
|
||||
match extract_tokens(grammar) {
|
||||
Err(Error::GrammarError(s)) => {
|
||||
assert_eq!(s, "Rule 'rule_1' cannot be used as both an external token and a non-terminal rule");
|
||||
},
|
||||
}
|
||||
_ => {
|
||||
panic!("Expected an error but got no error");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,7 +1,313 @@
|
|||
use crate::error::Result;
|
||||
use crate::grammars::SyntaxGrammar;
|
||||
use super::ExtractedSyntaxGrammar;
|
||||
use crate::error::Result;
|
||||
use crate::grammars::{Production, ProductionStep, SyntaxGrammar, SyntaxVariable, Variable};
|
||||
use crate::rules::{Alias, Associativity, Rule};
|
||||
|
||||
struct RuleFlattener {
|
||||
production: Production,
|
||||
precedence_stack: Vec<i32>,
|
||||
associativity_stack: Vec<Associativity>,
|
||||
alias_stack: Vec<Alias>,
|
||||
}
|
||||
|
||||
impl RuleFlattener {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
production: Production {
|
||||
steps: Vec::new(),
|
||||
dynamic_precedence: 0,
|
||||
},
|
||||
precedence_stack: Vec::new(),
|
||||
associativity_stack: Vec::new(),
|
||||
alias_stack: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn flatten(mut self, rule: Rule) -> Production {
|
||||
self.apply(rule, true);
|
||||
self.production
|
||||
}
|
||||
|
||||
fn apply(&mut self, rule: Rule, at_end: bool) {
|
||||
match rule {
|
||||
Rule::Seq(members) => {
|
||||
let last_index = members.len() - 1;
|
||||
for (i, member) in members.into_iter().enumerate() {
|
||||
self.apply(member, i == last_index && at_end);
|
||||
}
|
||||
}
|
||||
Rule::Metadata { rule, params } => {
|
||||
let mut has_precedence = false;
|
||||
if let Some(precedence) = params.precedence {
|
||||
has_precedence = true;
|
||||
self.precedence_stack.push(precedence);
|
||||
}
|
||||
|
||||
let mut has_associativity = false;
|
||||
if let Some(associativity) = params.associativity {
|
||||
has_associativity = true;
|
||||
self.associativity_stack.push(associativity);
|
||||
}
|
||||
|
||||
let mut has_alias = false;
|
||||
if let Some(alias) = params.alias {
|
||||
has_alias = true;
|
||||
self.alias_stack.push(alias);
|
||||
}
|
||||
|
||||
if params.dynamic_precedence.abs() > self.production.dynamic_precedence.abs() {
|
||||
self.production.dynamic_precedence = params.dynamic_precedence;
|
||||
}
|
||||
|
||||
self.apply(*rule, at_end);
|
||||
|
||||
if has_precedence {
|
||||
self.precedence_stack.pop();
|
||||
if !at_end {
|
||||
self.production.steps.last_mut().unwrap().precedence =
|
||||
self.precedence_stack.last().cloned().unwrap_or(0);
|
||||
}
|
||||
}
|
||||
|
||||
if has_associativity {
|
||||
self.associativity_stack.pop();
|
||||
if !at_end {
|
||||
self.production.steps.last_mut().unwrap().associativity =
|
||||
self.associativity_stack.last().cloned();
|
||||
}
|
||||
}
|
||||
|
||||
if has_alias {
|
||||
self.alias_stack.pop();
|
||||
}
|
||||
}
|
||||
Rule::Symbol(symbol) => {
|
||||
self.production.steps.push(ProductionStep {
|
||||
symbol,
|
||||
precedence: self.precedence_stack.last().cloned().unwrap_or(0),
|
||||
associativity: self.associativity_stack.last().cloned(),
|
||||
alias: self.alias_stack.last().cloned(),
|
||||
});
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn extract_choices(rule: Rule) -> Vec<Rule> {
|
||||
match rule {
|
||||
Rule::Seq(elements) => {
|
||||
let mut result = vec![Rule::Blank];
|
||||
for element in elements {
|
||||
let extraction = extract_choices(element);
|
||||
let mut next_result = Vec::new();
|
||||
for entry in result {
|
||||
for extraction_entry in extraction.iter() {
|
||||
next_result.push(Rule::Seq(vec![entry.clone(), extraction_entry.clone()]));
|
||||
}
|
||||
}
|
||||
result = next_result;
|
||||
}
|
||||
result
|
||||
}
|
||||
Rule::Choice(elements) => {
|
||||
let mut result = Vec::new();
|
||||
for element in elements {
|
||||
for rule in extract_choices(element) {
|
||||
result.push(rule);
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
Rule::Metadata { rule, params } => extract_choices(*rule)
|
||||
.into_iter()
|
||||
.map(|rule| Rule::Metadata {
|
||||
rule: Box::new(rule),
|
||||
params: params.clone(),
|
||||
})
|
||||
.collect(),
|
||||
_ => vec![rule],
|
||||
}
|
||||
}
|
||||
|
||||
fn flatten_variable(variable: Variable) -> Result<SyntaxVariable> {
|
||||
let mut productions = Vec::new();
|
||||
for rule in extract_choices(variable.rule) {
|
||||
let production = RuleFlattener::new().flatten(rule);
|
||||
if !productions.contains(&production) {
|
||||
productions.push(production);
|
||||
}
|
||||
}
|
||||
Ok(SyntaxVariable {
|
||||
name: variable.name,
|
||||
kind: variable.kind,
|
||||
productions,
|
||||
})
|
||||
}
|
||||
|
||||
pub(super) fn flatten_grammar(grammar: ExtractedSyntaxGrammar) -> Result<SyntaxGrammar> {
|
||||
unimplemented!();
|
||||
let mut variables = Vec::new();
|
||||
for variable in grammar.variables {
|
||||
variables.push(flatten_variable(variable)?);
|
||||
}
|
||||
Ok(SyntaxGrammar {
|
||||
extra_tokens: grammar.extra_tokens,
|
||||
expected_conflicts: grammar.expected_conflicts,
|
||||
variables_to_inline: grammar.variables_to_inline,
|
||||
external_tokens: grammar.external_tokens,
|
||||
word_token: grammar.word_token,
|
||||
variables,
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::grammars::VariableType;
|
||||
use crate::rules::Symbol;
|
||||
|
||||
#[test]
|
||||
fn test_flatten_grammar() {
|
||||
let result = flatten_variable(Variable {
|
||||
name: "test".to_string(),
|
||||
kind: VariableType::Named,
|
||||
rule: Rule::seq(vec![
|
||||
Rule::non_terminal(1),
|
||||
Rule::prec_left(
|
||||
101,
|
||||
Rule::seq(vec![
|
||||
Rule::non_terminal(2),
|
||||
Rule::choice(vec![
|
||||
Rule::prec_right(
|
||||
102,
|
||||
Rule::seq(vec![Rule::non_terminal(3), Rule::non_terminal(4)]),
|
||||
),
|
||||
Rule::non_terminal(5),
|
||||
]),
|
||||
Rule::non_terminal(6),
|
||||
]),
|
||||
),
|
||||
Rule::non_terminal(7),
|
||||
]),
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
result.productions,
|
||||
vec![
|
||||
Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::non_terminal(1)),
|
||||
ProductionStep::new(Symbol::non_terminal(2))
|
||||
.with_prec(101, Some(Associativity::Left)),
|
||||
ProductionStep::new(Symbol::non_terminal(3))
|
||||
.with_prec(102, Some(Associativity::Right)),
|
||||
ProductionStep::new(Symbol::non_terminal(4))
|
||||
.with_prec(101, Some(Associativity::Left)),
|
||||
ProductionStep::new(Symbol::non_terminal(6)),
|
||||
ProductionStep::new(Symbol::non_terminal(7)),
|
||||
]
|
||||
},
|
||||
Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::non_terminal(1)),
|
||||
ProductionStep::new(Symbol::non_terminal(2))
|
||||
.with_prec(101, Some(Associativity::Left)),
|
||||
ProductionStep::new(Symbol::non_terminal(5))
|
||||
.with_prec(101, Some(Associativity::Left)),
|
||||
ProductionStep::new(Symbol::non_terminal(6)),
|
||||
ProductionStep::new(Symbol::non_terminal(7)),
|
||||
]
|
||||
},
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_flatten_grammar_with_maximum_dynamic_precedence() {
|
||||
let result = flatten_variable(Variable {
|
||||
name: "test".to_string(),
|
||||
kind: VariableType::Named,
|
||||
rule: Rule::seq(vec![
|
||||
Rule::non_terminal(1),
|
||||
Rule::prec_dynamic(101, Rule::seq(vec![
|
||||
Rule::non_terminal(2),
|
||||
Rule::choice(vec![
|
||||
Rule::prec_dynamic(102, Rule::seq(vec![
|
||||
Rule::non_terminal(3),
|
||||
Rule::non_terminal(4)
|
||||
])),
|
||||
Rule::non_terminal(5),
|
||||
]),
|
||||
Rule::non_terminal(6),
|
||||
])),
|
||||
Rule::non_terminal(7),
|
||||
])
|
||||
}).unwrap();
|
||||
|
||||
assert_eq!(result.productions, vec![
|
||||
Production {
|
||||
dynamic_precedence: 102,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::non_terminal(1)),
|
||||
ProductionStep::new(Symbol::non_terminal(2)),
|
||||
ProductionStep::new(Symbol::non_terminal(3)),
|
||||
ProductionStep::new(Symbol::non_terminal(4)),
|
||||
ProductionStep::new(Symbol::non_terminal(6)),
|
||||
ProductionStep::new(Symbol::non_terminal(7)),
|
||||
],
|
||||
},
|
||||
Production {
|
||||
dynamic_precedence: 101,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::non_terminal(1)),
|
||||
ProductionStep::new(Symbol::non_terminal(2)),
|
||||
ProductionStep::new(Symbol::non_terminal(5)),
|
||||
ProductionStep::new(Symbol::non_terminal(6)),
|
||||
ProductionStep::new(Symbol::non_terminal(7)),
|
||||
],
|
||||
},
|
||||
]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_flatten_grammar_with_final_precedence() {
|
||||
let result = flatten_variable(Variable {
|
||||
name: "test".to_string(),
|
||||
kind: VariableType::Named,
|
||||
rule: Rule::prec_left(101, Rule::seq(vec![
|
||||
Rule::non_terminal(1),
|
||||
Rule::non_terminal(2),
|
||||
])),
|
||||
}).unwrap();
|
||||
|
||||
assert_eq!(result.productions, vec![
|
||||
Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::non_terminal(1)).with_prec(101, Some(Associativity::Left)),
|
||||
ProductionStep::new(Symbol::non_terminal(2)).with_prec(101, Some(Associativity::Left)),
|
||||
]
|
||||
}
|
||||
]);
|
||||
|
||||
let result = flatten_variable(Variable {
|
||||
name: "test".to_string(),
|
||||
kind: VariableType::Named,
|
||||
rule: Rule::prec_left(101, Rule::seq(vec![
|
||||
Rule::non_terminal(1),
|
||||
])),
|
||||
}).unwrap();
|
||||
|
||||
assert_eq!(result.productions, vec![
|
||||
Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::non_terminal(1)).with_prec(101, Some(Associativity::Left)),
|
||||
]
|
||||
}
|
||||
]);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,14 +1,15 @@
|
|||
use crate::error::{Error, Result};
|
||||
use crate::rules::{Rule, Symbol};
|
||||
use crate::grammars::{InputGrammar, Variable, VariableType};
|
||||
use std::rc::Rc;
|
||||
use super::InternedGrammar;
|
||||
use crate::error::{Error, Result};
|
||||
use crate::grammars::{InputGrammar, Variable, VariableType};
|
||||
use crate::rules::{Rule, Symbol};
|
||||
|
||||
pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result<InternedGrammar> {
|
||||
let interner = Interner { grammar };
|
||||
|
||||
if variable_type_for_name(&grammar.variables[0].name) == VariableType::Hidden {
|
||||
return Err(Error::GrammarError("Grammar's start rule must be visible".to_string()));
|
||||
return Err(Error::GrammarError(
|
||||
"Grammar's start rule must be visible".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
let mut variables = Vec::with_capacity(grammar.variables.len());
|
||||
|
|
@ -40,9 +41,10 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result<InternedGrammar>
|
|||
for conflict in grammar.expected_conflicts.iter() {
|
||||
let mut interned_conflict = Vec::with_capacity(conflict.len());
|
||||
for name in conflict {
|
||||
interned_conflict.push(interner
|
||||
.intern_name(&name)
|
||||
.ok_or_else(|| symbol_error(name))?
|
||||
interned_conflict.push(
|
||||
interner
|
||||
.intern_name(&name)
|
||||
.ok_or_else(|| symbol_error(name))?,
|
||||
);
|
||||
}
|
||||
expected_conflicts.push(interned_conflict);
|
||||
|
|
@ -57,9 +59,10 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result<InternedGrammar>
|
|||
|
||||
let mut word_token = None;
|
||||
if let Some(name) = grammar.word_token.as_ref() {
|
||||
word_token = Some(interner
|
||||
.intern_name(&name)
|
||||
.ok_or_else(|| symbol_error(&name))?
|
||||
word_token = Some(
|
||||
interner
|
||||
.intern_name(&name)
|
||||
.ok_or_else(|| symbol_error(&name))?,
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -74,7 +77,7 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result<InternedGrammar>
|
|||
}
|
||||
|
||||
struct Interner<'a> {
|
||||
grammar: &'a InputGrammar
|
||||
grammar: &'a InputGrammar,
|
||||
}
|
||||
|
||||
impl<'a> Interner<'a> {
|
||||
|
|
@ -86,22 +89,19 @@ impl<'a> Interner<'a> {
|
|||
result.push(self.intern_rule(element)?);
|
||||
}
|
||||
Ok(Rule::Choice(result))
|
||||
},
|
||||
}
|
||||
Rule::Seq(elements) => {
|
||||
let mut result = Vec::with_capacity(elements.len());
|
||||
for element in elements {
|
||||
result.push(self.intern_rule(element)?);
|
||||
}
|
||||
Ok(Rule::Seq(result))
|
||||
},
|
||||
Rule::Repeat(content) => Ok(Rule::Repeat(
|
||||
Box::new(self.intern_rule(content)?)
|
||||
)),
|
||||
Rule::Metadata { rule, params } =>
|
||||
Ok(Rule::Metadata {
|
||||
rule: Box::new(self.intern_rule(rule)?),
|
||||
params: params.clone()
|
||||
}),
|
||||
}
|
||||
Rule::Repeat(content) => Ok(Rule::Repeat(Box::new(self.intern_rule(content)?))),
|
||||
Rule::Metadata { rule, params } => Ok(Rule::Metadata {
|
||||
rule: Box::new(self.intern_rule(rule)?),
|
||||
params: params.clone(),
|
||||
}),
|
||||
|
||||
Rule::NamedSymbol(name) => {
|
||||
if let Some(symbol) = self.intern_name(&name) {
|
||||
|
|
@ -109,29 +109,28 @@ impl<'a> Interner<'a> {
|
|||
} else {
|
||||
Err(symbol_error(name))
|
||||
}
|
||||
},
|
||||
|
||||
_ => Ok(rule.clone())
|
||||
}
|
||||
|
||||
_ => Ok(rule.clone()),
|
||||
}
|
||||
}
|
||||
|
||||
fn intern_name(&self, symbol: &str) -> Option<Symbol> {
|
||||
for (i, variable) in self.grammar.variables.iter().enumerate() {
|
||||
if variable.name == symbol {
|
||||
return Some(Symbol::non_terminal(i))
|
||||
return Some(Symbol::non_terminal(i));
|
||||
}
|
||||
}
|
||||
|
||||
for (i, external_token) in self.grammar.external_tokens.iter().enumerate() {
|
||||
if let Rule::NamedSymbol(name) = external_token {
|
||||
if name == symbol {
|
||||
return Some(Symbol::external(i))
|
||||
return Some(Symbol::external(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return None
|
||||
return None;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -154,22 +153,23 @@ mod tests {
|
|||
#[test]
|
||||
fn test_basic_repeat_expansion() {
|
||||
let grammar = intern_symbols(&build_grammar(vec![
|
||||
Variable::named("x", Rule::choice(vec![
|
||||
Rule::named("y"),
|
||||
Rule::named("_z"),
|
||||
])),
|
||||
Variable::named("x", Rule::choice(vec![Rule::named("y"), Rule::named("_z")])),
|
||||
Variable::named("y", Rule::named("_z")),
|
||||
Variable::named("_z", Rule::string("a")),
|
||||
])).unwrap();
|
||||
]))
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(grammar.variables, vec![
|
||||
Variable::named("x", Rule::choice(vec![
|
||||
Rule::non_terminal(1),
|
||||
Rule::non_terminal(2),
|
||||
])),
|
||||
Variable::named("y", Rule::non_terminal(2)),
|
||||
Variable::hidden("_z", Rule::string("a")),
|
||||
]);
|
||||
assert_eq!(
|
||||
grammar.variables,
|
||||
vec![
|
||||
Variable::named(
|
||||
"x",
|
||||
Rule::choice(vec![Rule::non_terminal(1), Rule::non_terminal(2),])
|
||||
),
|
||||
Variable::named("y", Rule::non_terminal(2)),
|
||||
Variable::hidden("_z", Rule::string("a")),
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -177,45 +177,50 @@ mod tests {
|
|||
// Variable `y` is both an internal and an external token.
|
||||
// Variable `z` is just an external token.
|
||||
let mut input_grammar = build_grammar(vec![
|
||||
Variable::named("w", Rule::choice(vec![
|
||||
Rule::named("x"),
|
||||
Rule::named("y"),
|
||||
Rule::named("z"),
|
||||
])),
|
||||
Variable::named(
|
||||
"w",
|
||||
Rule::choice(vec![Rule::named("x"), Rule::named("y"), Rule::named("z")]),
|
||||
),
|
||||
Variable::named("x", Rule::string("a")),
|
||||
Variable::named("y", Rule::string("b")),
|
||||
]);
|
||||
input_grammar.external_tokens.extend(vec![
|
||||
Rule::named("y"),
|
||||
Rule::named("z"),
|
||||
]);
|
||||
input_grammar
|
||||
.external_tokens
|
||||
.extend(vec![Rule::named("y"), Rule::named("z")]);
|
||||
|
||||
let grammar = intern_symbols(&input_grammar).unwrap();
|
||||
|
||||
// Variable `y` is referred to by its internal index.
|
||||
// Variable `z` is referred to by its external index.
|
||||
assert_eq!(grammar.variables, vec![
|
||||
Variable::named("w", Rule::choice(vec![
|
||||
Rule::non_terminal(1),
|
||||
Rule::non_terminal(2),
|
||||
Rule::external(1),
|
||||
])),
|
||||
Variable::named("x", Rule::string("a")),
|
||||
Variable::named("y", Rule::string("b")),
|
||||
]);
|
||||
assert_eq!(
|
||||
grammar.variables,
|
||||
vec![
|
||||
Variable::named(
|
||||
"w",
|
||||
Rule::choice(vec![
|
||||
Rule::non_terminal(1),
|
||||
Rule::non_terminal(2),
|
||||
Rule::external(1),
|
||||
])
|
||||
),
|
||||
Variable::named("x", Rule::string("a")),
|
||||
Variable::named("y", Rule::string("b")),
|
||||
]
|
||||
);
|
||||
|
||||
// The external token for `y` refers back to its internal index.
|
||||
assert_eq!(grammar.external_tokens, vec![
|
||||
Variable::named("y", Rule::non_terminal(2)),
|
||||
Variable::named("z", Rule::external(1)),
|
||||
]);
|
||||
assert_eq!(
|
||||
grammar.external_tokens,
|
||||
vec![
|
||||
Variable::named("y", Rule::non_terminal(2)),
|
||||
Variable::named("z", Rule::external(1)),
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_grammar_with_undefined_symbols() {
|
||||
let result = intern_symbols(&build_grammar(vec![
|
||||
Variable::named("x", Rule::named("y")),
|
||||
]));
|
||||
let result = intern_symbols(&build_grammar(vec![Variable::named("x", Rule::named("y"))]));
|
||||
|
||||
match result {
|
||||
Err(Error::SymbolError(message)) => assert_eq!(message, "Undefined symbol 'y'"),
|
||||
|
|
|
|||
|
|
@ -1,19 +1,19 @@
|
|||
mod intern_symbols;
|
||||
mod extract_tokens;
|
||||
mod expand_repeats;
|
||||
mod flatten_grammar;
|
||||
mod expand_tokens;
|
||||
mod extract_simple_aliases;
|
||||
mod extract_tokens;
|
||||
mod flatten_grammar;
|
||||
mod intern_symbols;
|
||||
|
||||
use crate::rules::{AliasMap, Rule, Symbol};
|
||||
use crate::grammars::{InputGrammar, SyntaxGrammar, LexicalGrammar, Variable, ExternalToken};
|
||||
use crate::error::Result;
|
||||
use self::intern_symbols::intern_symbols;
|
||||
use self::extract_tokens::extract_tokens;
|
||||
use self::expand_repeats::expand_repeats;
|
||||
use self::flatten_grammar::flatten_grammar;
|
||||
use self::expand_tokens::expand_tokens;
|
||||
use self::extract_simple_aliases::extract_simple_aliases;
|
||||
use self::extract_tokens::extract_tokens;
|
||||
use self::flatten_grammar::flatten_grammar;
|
||||
use self::intern_symbols::intern_symbols;
|
||||
use crate::error::Result;
|
||||
use crate::grammars::{ExternalToken, InputGrammar, LexicalGrammar, SyntaxGrammar, Variable};
|
||||
use crate::rules::{AliasMap, Rule, Symbol};
|
||||
|
||||
pub(self) struct IntermediateGrammar<T, U> {
|
||||
variables: Vec<Variable>,
|
||||
|
|
@ -35,7 +35,7 @@ pub(self) struct ExtractedLexicalGrammar {
|
|||
}
|
||||
|
||||
pub(crate) fn prepare_grammar(
|
||||
input_grammar: &InputGrammar
|
||||
input_grammar: &InputGrammar,
|
||||
) -> Result<(SyntaxGrammar, LexicalGrammar, AliasMap)> {
|
||||
let interned_grammar = intern_symbols(input_grammar)?;
|
||||
let (syntax_grammar, lexical_grammar) = extract_tokens(interned_grammar)?;
|
||||
|
|
|
|||
|
|
@ -1,5 +1,3 @@
|
|||
use std::rc::Rc;
|
||||
use std::char;
|
||||
use std::collections::HashMap;
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
|
||||
|
|
@ -92,6 +90,12 @@ impl Rule {
|
|||
})
|
||||
}
|
||||
|
||||
pub fn prec_dynamic(value: i32, content: Rule) -> Self {
|
||||
add_metadata(content, |params| {
|
||||
params.dynamic_precedence = value;
|
||||
})
|
||||
}
|
||||
|
||||
pub fn repeat(rule: Rule) -> Self {
|
||||
Rule::Repeat(Box::new(rule))
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue