242 lines
7.6 KiB
Rust
242 lines
7.6 KiB
Rust
use super::InternedGrammar;
|
|
use crate::error::{Error, Result};
|
|
use crate::grammars::{InputGrammar, Variable, VariableType};
|
|
use crate::rules::{Rule, Symbol};
|
|
|
|
pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result<InternedGrammar> {
|
|
let interner = Interner { grammar };
|
|
|
|
if variable_type_for_name(&grammar.variables[0].name) == VariableType::Hidden {
|
|
return Err(Error::GrammarError(
|
|
"Grammar's start rule must be visible".to_string(),
|
|
));
|
|
}
|
|
|
|
let mut variables = Vec::with_capacity(grammar.variables.len());
|
|
for variable in grammar.variables.iter() {
|
|
variables.push(Variable {
|
|
name: variable.name.clone(),
|
|
kind: variable_type_for_name(&variable.name),
|
|
rule: interner.intern_rule(&variable.rule)?,
|
|
});
|
|
}
|
|
|
|
let mut external_tokens = Vec::with_capacity(grammar.external_tokens.len());
|
|
for external_token in grammar.external_tokens.iter() {
|
|
let rule = interner.intern_rule(&external_token)?;
|
|
let (name, kind) = if let Rule::NamedSymbol(name) = external_token {
|
|
(name.clone(), variable_type_for_name(&name))
|
|
} else {
|
|
(String::new(), VariableType::Anonymous)
|
|
};
|
|
external_tokens.push(Variable { name, kind, rule });
|
|
}
|
|
|
|
let mut extra_tokens = Vec::with_capacity(grammar.extra_tokens.len());
|
|
for extra_token in grammar.extra_tokens.iter() {
|
|
extra_tokens.push(interner.intern_rule(extra_token)?);
|
|
}
|
|
|
|
let mut expected_conflicts = Vec::new();
|
|
for conflict in grammar.expected_conflicts.iter() {
|
|
let mut interned_conflict = Vec::with_capacity(conflict.len());
|
|
for name in conflict {
|
|
interned_conflict.push(
|
|
interner
|
|
.intern_name(&name)
|
|
.ok_or_else(|| symbol_error(name))?,
|
|
);
|
|
}
|
|
expected_conflicts.push(interned_conflict);
|
|
}
|
|
|
|
let mut variables_to_inline = Vec::new();
|
|
for name in grammar.variables_to_inline.iter() {
|
|
if let Some(symbol) = interner.intern_name(&name) {
|
|
variables_to_inline.push(symbol);
|
|
}
|
|
}
|
|
|
|
let mut word_token = None;
|
|
if let Some(name) = grammar.word_token.as_ref() {
|
|
word_token = Some(
|
|
interner
|
|
.intern_name(&name)
|
|
.ok_or_else(|| symbol_error(&name))?,
|
|
);
|
|
}
|
|
|
|
Ok(InternedGrammar {
|
|
variables,
|
|
external_tokens,
|
|
extra_tokens,
|
|
expected_conflicts,
|
|
variables_to_inline,
|
|
word_token,
|
|
})
|
|
}
|
|
|
|
struct Interner<'a> {
|
|
grammar: &'a InputGrammar,
|
|
}
|
|
|
|
impl<'a> Interner<'a> {
|
|
fn intern_rule(&self, rule: &Rule) -> Result<Rule> {
|
|
match rule {
|
|
Rule::Choice(elements) => {
|
|
let mut result = Vec::with_capacity(elements.len());
|
|
for element in elements {
|
|
result.push(self.intern_rule(element)?);
|
|
}
|
|
Ok(Rule::Choice(result))
|
|
}
|
|
Rule::Seq(elements) => {
|
|
let mut result = Vec::with_capacity(elements.len());
|
|
for element in elements {
|
|
result.push(self.intern_rule(element)?);
|
|
}
|
|
Ok(Rule::Seq(result))
|
|
}
|
|
Rule::Repeat(content) => Ok(Rule::Repeat(Box::new(self.intern_rule(content)?))),
|
|
Rule::Metadata { rule, params } => Ok(Rule::Metadata {
|
|
rule: Box::new(self.intern_rule(rule)?),
|
|
params: params.clone(),
|
|
}),
|
|
|
|
Rule::NamedSymbol(name) => {
|
|
if let Some(symbol) = self.intern_name(&name) {
|
|
Ok(Rule::Symbol(symbol))
|
|
} else {
|
|
Err(symbol_error(name))
|
|
}
|
|
}
|
|
|
|
_ => Ok(rule.clone()),
|
|
}
|
|
}
|
|
|
|
fn intern_name(&self, symbol: &str) -> Option<Symbol> {
|
|
for (i, variable) in self.grammar.variables.iter().enumerate() {
|
|
if variable.name == symbol {
|
|
return Some(Symbol::non_terminal(i));
|
|
}
|
|
}
|
|
|
|
for (i, external_token) in self.grammar.external_tokens.iter().enumerate() {
|
|
if let Rule::NamedSymbol(name) = external_token {
|
|
if name == symbol {
|
|
return Some(Symbol::external(i));
|
|
}
|
|
}
|
|
}
|
|
|
|
return None;
|
|
}
|
|
}
|
|
|
|
fn symbol_error(name: &str) -> Error {
|
|
Error::SymbolError(format!("Undefined symbol '{}'", name))
|
|
}
|
|
|
|
fn variable_type_for_name(name: &str) -> VariableType {
|
|
if name.starts_with("_") {
|
|
VariableType::Hidden
|
|
} else {
|
|
VariableType::Named
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn test_basic_repeat_expansion() {
|
|
let grammar = intern_symbols(&build_grammar(vec![
|
|
Variable::named("x", Rule::choice(vec![Rule::named("y"), Rule::named("_z")])),
|
|
Variable::named("y", Rule::named("_z")),
|
|
Variable::named("_z", Rule::string("a")),
|
|
]))
|
|
.unwrap();
|
|
|
|
assert_eq!(
|
|
grammar.variables,
|
|
vec![
|
|
Variable::named(
|
|
"x",
|
|
Rule::choice(vec![Rule::non_terminal(1), Rule::non_terminal(2),])
|
|
),
|
|
Variable::named("y", Rule::non_terminal(2)),
|
|
Variable::hidden("_z", Rule::string("a")),
|
|
]
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_interning_external_token_names() {
|
|
// Variable `y` is both an internal and an external token.
|
|
// Variable `z` is just an external token.
|
|
let mut input_grammar = build_grammar(vec![
|
|
Variable::named(
|
|
"w",
|
|
Rule::choice(vec![Rule::named("x"), Rule::named("y"), Rule::named("z")]),
|
|
),
|
|
Variable::named("x", Rule::string("a")),
|
|
Variable::named("y", Rule::string("b")),
|
|
]);
|
|
input_grammar
|
|
.external_tokens
|
|
.extend(vec![Rule::named("y"), Rule::named("z")]);
|
|
|
|
let grammar = intern_symbols(&input_grammar).unwrap();
|
|
|
|
// Variable `y` is referred to by its internal index.
|
|
// Variable `z` is referred to by its external index.
|
|
assert_eq!(
|
|
grammar.variables,
|
|
vec![
|
|
Variable::named(
|
|
"w",
|
|
Rule::choice(vec![
|
|
Rule::non_terminal(1),
|
|
Rule::non_terminal(2),
|
|
Rule::external(1),
|
|
])
|
|
),
|
|
Variable::named("x", Rule::string("a")),
|
|
Variable::named("y", Rule::string("b")),
|
|
]
|
|
);
|
|
|
|
// The external token for `y` refers back to its internal index.
|
|
assert_eq!(
|
|
grammar.external_tokens,
|
|
vec![
|
|
Variable::named("y", Rule::non_terminal(2)),
|
|
Variable::named("z", Rule::external(1)),
|
|
]
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_grammar_with_undefined_symbols() {
|
|
let result = intern_symbols(&build_grammar(vec![Variable::named("x", Rule::named("y"))]));
|
|
|
|
match result {
|
|
Err(Error::SymbolError(message)) => assert_eq!(message, "Undefined symbol 'y'"),
|
|
_ => panic!("Expected an error but got none"),
|
|
}
|
|
}
|
|
|
|
fn build_grammar(variables: Vec<Variable>) -> InputGrammar {
|
|
InputGrammar {
|
|
variables,
|
|
name: "the_language".to_string(),
|
|
extra_tokens: Vec::new(),
|
|
external_tokens: Vec::new(),
|
|
expected_conflicts: Vec::new(),
|
|
variables_to_inline: Vec::new(),
|
|
word_token: None,
|
|
}
|
|
}
|
|
}
|