Implement extract_tokens

This commit is contained in:
Max Brunsfeld 2018-12-06 22:11:52 -08:00
parent a4c4b85a16
commit 0688a5edd3
10 changed files with 621 additions and 128 deletions

View file

@ -25,7 +25,7 @@ struct ParseTableBuilder<'a> {
parse_table: ParseTable,
}
pub fn build_tables(
pub(crate) fn build_tables(
syntax_grammar: &SyntaxGrammar,
lexical_grammar: &LexicalGrammar,
simple_aliases: &AliasMap

View file

@ -1,7 +1,7 @@
use crate::rules::{Associativity, Alias, Rule, Symbol};
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum VariableType {
pub(crate) enum VariableType {
Hidden,
Auxiliary,
Anonymous,
@ -11,16 +11,16 @@ pub enum VariableType {
// Input grammar
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct InputVariable {
pub(crate) struct Variable {
pub name: String,
pub kind: VariableType,
pub rule: Rule,
}
#[derive(PartialEq, Eq)]
pub struct InputGrammar {
#[derive(Debug, PartialEq, Eq)]
pub(crate) struct InputGrammar {
pub name: String,
pub variables: Vec<InputVariable>,
pub variables: Vec<Variable>,
pub extra_tokens: Vec<Rule>,
pub expected_conflicts: Vec<Vec<String>>,
pub external_tokens: Vec<Rule>,
@ -30,60 +30,53 @@ pub struct InputGrammar {
// Extracted lexical grammar
#[derive(PartialEq, Eq)]
pub struct LexicalVariable {
name: String,
kind: VariableType,
rule: Rule,
is_string: bool,
}
pub struct LexicalGrammar {
variables: Vec<LexicalVariable>,
separators: Vec<Rule>,
#[derive(Debug, PartialEq, Eq)]
pub(crate) struct LexicalGrammar {
pub variables: Vec<Variable>,
pub separators: Vec<Rule>,
}
// Extracted syntax grammar
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct ProductionStep {
symbol: Symbol,
precedence: i32,
associativity: Option<Associativity>,
alias: Option<Alias>,
is_excluded: bool,
pub(crate) struct ProductionStep {
pub symbol: Symbol,
pub precedence: i32,
pub associativity: Option<Associativity>,
pub alias: Option<Alias>,
pub is_excluded: bool,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Production {
steps: Vec<ProductionStep>,
dynamic_precedence: i32,
pub(crate) struct Production {
pub steps: Vec<ProductionStep>,
pub dynamic_precedence: i32,
}
#[derive(Clone, PartialEq, Eq)]
pub struct SyntaxVariable {
name: String,
kind: VariableType,
#[derive(Clone, Debug, PartialEq, Eq)]
pub(crate) struct SyntaxVariable {
pub name: String,
pub kind: VariableType,
}
#[derive(Clone, PartialEq, Eq)]
pub struct ExternalToken {
name: String,
kind: VariableType,
corresponding_internal_token: Symbol,
#[derive(Clone, Debug, PartialEq, Eq)]
pub(crate) struct ExternalToken {
pub name: String,
pub kind: VariableType,
pub corresponding_internal_token: Option<Symbol>,
}
pub struct SyntaxGrammar {
variables: Vec<SyntaxVariable>,
extra_tokens: Vec<Symbol>,
expected_conflicts: Vec<Vec<Symbol>>,
external_tokens: Vec<ExternalToken>,
variables_to_inline: Vec<Symbol>,
word_token: Symbol,
#[derive(Debug)]
pub(crate) struct SyntaxGrammar {
pub variables: Vec<SyntaxVariable>,
pub extra_tokens: Vec<Symbol>,
pub expected_conflicts: Vec<Vec<Symbol>>,
pub external_tokens: Vec<ExternalToken>,
pub variables_to_inline: Vec<Symbol>,
pub word_token: Symbol,
}
#[cfg(test)]
impl InputVariable {
impl Variable {
pub fn named(name: &str, rule: Rule) -> Self {
Self { name: name.to_string(), kind: VariableType::Named, rule }
}
@ -95,4 +88,8 @@ impl InputVariable {
pub fn hidden(name: &str, rule: Rule) -> Self {
Self { name: name.to_string(), kind: VariableType::Hidden, rule }
}
pub fn anonymous(name: &str, rule: Rule) -> Self {
Self { name: name.to_string(), kind: VariableType::Anonymous, rule }
}
}

View file

@ -1,13 +1,13 @@
use serde_json::{Map, Value};
use crate::error::Result;
use crate::grammars::{InputGrammar, InputVariable, VariableType};
use crate::grammars::{InputGrammar, Variable, VariableType};
use crate::rules::Rule;
use std::collections::HashMap;
#[derive(Deserialize)]
#[serde(tag = "type")]
#[allow(non_camel_case_types)]
pub enum RuleJSON {
enum RuleJSON {
BLANK,
STRING {
value: String,
@ -58,12 +58,12 @@ struct GrammarJSON {
word: Option<String>,
}
pub fn parse_grammar(input: &str) -> Result<InputGrammar> {
pub(crate) fn parse_grammar(input: &str) -> Result<InputGrammar> {
let grammar_json: GrammarJSON = serde_json::from_str(&input)?;
let mut variables = Vec::with_capacity(grammar_json.rules.len());
for (name, value) in grammar_json.rules {
variables.push(InputVariable {
variables.push(Variable {
name: name.to_owned(),
kind: VariableType::Named,
rule: parse_rule(serde_json::from_value(value)?),
@ -138,12 +138,12 @@ mod tests {
assert_eq!(grammar.name, "my_lang");
assert_eq!(grammar.variables, vec![
InputVariable {
Variable {
name: "file".to_string(),
kind: VariableType::Named,
rule: Rule::repeat(Rule::NamedSymbol("statement".to_string()))
},
InputVariable {
Variable {
name: "statement".to_string(),
kind: VariableType::Named,
rule: Rule::String("foo".to_string())

View file

@ -1,5 +1,5 @@
use crate::rules::{Rule, Symbol};
use crate::grammars::{InputVariable, VariableType};
use crate::grammars::{Variable, VariableType};
use std::collections::HashMap;
use std::mem;
use std::rc::Rc;
@ -9,12 +9,12 @@ struct Expander {
variable_name: String,
repeat_count_in_variable: usize,
preceding_symbol_count: usize,
auxiliary_variables: Vec<InputVariable>,
auxiliary_variables: Vec<Variable>,
existing_repeats: HashMap<Rule, Symbol>
}
impl Expander {
fn expand_variable(&mut self, variable: &mut InputVariable) {
fn expand_variable(&mut self, variable: &mut Variable) {
self.variable_name.clear();
self.variable_name.push_str(&variable.name);
self.repeat_count_in_variable = 0;
@ -48,7 +48,7 @@ impl Expander {
let repeat_symbol = Symbol::non_terminal(self.preceding_symbol_count + self.auxiliary_variables.len());
let rc_symbol = Rc::new(Rule::Symbol(repeat_symbol));
self.existing_repeats.insert(inner_rule.clone(), repeat_symbol);
self.auxiliary_variables.push(InputVariable {
self.auxiliary_variables.push(Variable {
name: rule_name,
kind: VariableType::Auxiliary,
rule: Rule::Choice {
@ -100,7 +100,7 @@ mod tests {
fn test_basic_repeat_expansion() {
// Repeats nested inside of sequences and choices are expanded.
let grammar = expand_repeats(build_grammar(vec![
InputVariable::named("rule0", Rule::seq(vec![
Variable::named("rule0", Rule::seq(vec![
Rule::terminal(10),
Rule::choice(vec![
Rule::repeat(Rule::terminal(11)),
@ -111,7 +111,7 @@ mod tests {
]));
assert_eq!(grammar.variables, vec![
InputVariable::named("rule0", Rule::seq(vec![
Variable::named("rule0", Rule::seq(vec![
Rule::terminal(10),
Rule::choice(vec![
Rule::non_terminal(1),
@ -119,14 +119,14 @@ mod tests {
]),
Rule::terminal(13),
])),
InputVariable::auxiliary("rule0_repeat1", Rule::choice(vec![
Variable::auxiliary("rule0_repeat1", Rule::choice(vec![
Rule::seq(vec![
Rule::non_terminal(1),
Rule::non_terminal(1),
]),
Rule::terminal(11),
])),
InputVariable::auxiliary("rule0_repeat2", Rule::choice(vec![
Variable::auxiliary("rule0_repeat2", Rule::choice(vec![
Rule::seq(vec![
Rule::non_terminal(2),
Rule::non_terminal(2),
@ -140,11 +140,11 @@ mod tests {
fn test_repeat_deduplication() {
// Terminal 4 appears inside of a repeat in three different places.
let grammar = expand_repeats(build_grammar(vec![
InputVariable::named("rule0", Rule::choice(vec![
Variable::named("rule0", Rule::choice(vec![
Rule::seq(vec![ Rule::terminal(1), Rule::repeat(Rule::terminal(4)) ]),
Rule::seq(vec![ Rule::terminal(2), Rule::repeat(Rule::terminal(4)) ]),
])),
InputVariable::named("rule1", Rule::seq(vec![
Variable::named("rule1", Rule::seq(vec![
Rule::terminal(3),
Rule::repeat(Rule::terminal(4)),
])),
@ -152,15 +152,15 @@ mod tests {
// Only one auxiliary rule is created for repeating terminal 4.
assert_eq!(grammar.variables, vec![
InputVariable::named("rule0", Rule::choice(vec![
Variable::named("rule0", Rule::choice(vec![
Rule::seq(vec![ Rule::terminal(1), Rule::non_terminal(2) ]),
Rule::seq(vec![ Rule::terminal(2), Rule::non_terminal(2) ]),
])),
InputVariable::named("rule1", Rule::seq(vec![
Variable::named("rule1", Rule::seq(vec![
Rule::terminal(3),
Rule::non_terminal(2),
])),
InputVariable::auxiliary("rule0_repeat1", Rule::choice(vec![
Variable::auxiliary("rule0_repeat1", Rule::choice(vec![
Rule::seq(vec![
Rule::non_terminal(2),
Rule::non_terminal(2),
@ -173,7 +173,7 @@ mod tests {
#[test]
fn test_expansion_of_nested_repeats() {
let grammar = expand_repeats(build_grammar(vec![
InputVariable::named("rule0", Rule::seq(vec![
Variable::named("rule0", Rule::seq(vec![
Rule::terminal(10),
Rule::repeat(Rule::seq(vec![
Rule::terminal(11),
@ -183,18 +183,18 @@ mod tests {
]));
assert_eq!(grammar.variables, vec![
InputVariable::named("rule0", Rule::seq(vec![
Variable::named("rule0", Rule::seq(vec![
Rule::terminal(10),
Rule::non_terminal(2),
])),
InputVariable::auxiliary("rule0_repeat1", Rule::choice(vec![
Variable::auxiliary("rule0_repeat1", Rule::choice(vec![
Rule::seq(vec![
Rule::non_terminal(1),
Rule::non_terminal(1),
]),
Rule::terminal(12),
])),
InputVariable::auxiliary("rule0_repeat2", Rule::choice(vec![
Variable::auxiliary("rule0_repeat2", Rule::choice(vec![
Rule::seq(vec![
Rule::non_terminal(2),
Rule::non_terminal(2),
@ -207,7 +207,7 @@ mod tests {
]);
}
fn build_grammar(variables: Vec<InputVariable>) -> ExtractedGrammar {
fn build_grammar(variables: Vec<Variable>) -> ExtractedGrammar {
ExtractedGrammar {
variables,
extra_tokens: Vec::new(),

View file

@ -1,7 +1,491 @@
use crate::error::Result;
use crate::grammars::LexicalGrammar;
use std::collections::HashMap;
use std::rc::Rc;
use std::mem;
use crate::error::{Error, Result};
use crate::rules::{Rule, MetadataParams, Symbol, SymbolType};
use crate::grammars::{Variable, VariableType, LexicalGrammar, ExternalToken};
use super::{InternedGrammar, ExtractedGrammar};
pub(super) fn extract_tokens(grammar: InternedGrammar) -> Result<(ExtractedGrammar, LexicalGrammar)> {
unimplemented!();
pub(super) fn extract_tokens(
mut grammar: InternedGrammar
) -> Result<(ExtractedGrammar, LexicalGrammar)> {
let mut extractor = TokenExtractor {
current_variable_name: String::new(),
current_variable_token_count: 0,
extracted_variables: Vec::new(),
extracted_usage_counts: Vec::new(),
};
for mut variable in grammar.variables.iter_mut() {
extractor.extract_tokens_in_variable(&mut variable);
}
for mut variable in grammar.external_tokens.iter_mut() {
extractor.extract_tokens_in_variable(&mut variable);
}
let mut lexical_variables = Vec::with_capacity(extractor.extracted_variables.len());
for variable in extractor.extracted_variables {
lexical_variables.push(Variable {
name: variable.name,
kind: variable.kind,
rule: variable.rule,
});
}
// If a variable's entire rule was extracted as a token and that token didn't
// appear within any other rule, then remove that variable from the syntax
// grammar, giving its name to the token in the lexical grammar. Any symbols
// that pointed to that variable will need to be updated to point to the
// variable in the lexical grammar. Symbols that pointed to later variables
// will need to have their indices decremented.
let mut variables = Vec::new();
let mut symbol_replacer = SymbolReplacer { replacements: HashMap::new() };
for (i, variable) in grammar.variables.into_iter().enumerate() {
if let Rule::Symbol(Symbol { kind: SymbolType::Terminal, index }) = variable.rule {
if i > 0 && extractor.extracted_usage_counts[index] == 1 {
let mut lexical_variable = &mut lexical_variables[index];
lexical_variable.kind = variable.kind;
lexical_variable.name = variable.name;
symbol_replacer.replacements.insert(i, index);
continue;
}
}
variables.push(variable);
}
for variable in variables.iter_mut() {
variable.rule = symbol_replacer.replace_symbols_in_rule(&variable.rule);
}
let expected_conflicts = grammar.expected_conflicts
.into_iter()
.map(|conflict|
conflict
.iter()
.map(|symbol| symbol_replacer.replace_symbol(*symbol))
.collect()
).collect();
let variables_to_inline = grammar.variables_to_inline
.into_iter()
.map(|symbol| symbol_replacer.replace_symbol(symbol))
.collect();
let mut separators = Vec::new();
let mut extra_tokens = Vec::new();
for rule in grammar.extra_tokens {
if let Rule::Symbol(symbol) = rule {
let new_symbol = symbol_replacer.replace_symbol(symbol);
if new_symbol.is_non_terminal() {
return Err(Error::GrammarError(format!(
"Non-token symbol '{}' cannot be used as an extra token",
&variables[new_symbol.index].name
)));
} else {
extra_tokens.push(new_symbol);
}
} else {
if let Some(index) = lexical_variables.iter().position(|v| v.rule == rule) {
extra_tokens.push(Symbol::terminal(index));
} else {
separators.push(rule);
}
}
}
let mut external_tokens = Vec::new();
for external_token in grammar.external_tokens {
let rule = symbol_replacer.replace_symbols_in_rule(&external_token.rule);
if let Rule::Symbol(symbol) = rule {
if symbol.is_non_terminal() {
return Err(Error::GrammarError(format!(
"Rule '{}' cannot be used as both an external token and a non-terminal rule",
&variables[symbol.index].name,
)));
}
if symbol.is_external() {
external_tokens.push(ExternalToken {
name: external_token.name,
kind: external_token.kind,
corresponding_internal_token: None,
})
} else {
external_tokens.push(ExternalToken {
name: lexical_variables[symbol.index].name.clone(),
kind: external_token.kind,
corresponding_internal_token: Some(symbol),
})
}
} else {
return Err(Error::GrammarError(format!(
"Non-symbol rules cannot be used as external tokens"
)));
}
}
let mut word_token = None;
if let Some(token) = grammar.word_token {
let token = symbol_replacer.replace_symbol(token);
if token.is_non_terminal() {
return Err(Error::GrammarError(format!(
"Non-terminal symbol '{}' cannot be used as the word token",
&variables[token.index].name
)));
}
word_token = Some(token);
}
Ok((
ExtractedGrammar {
variables,
expected_conflicts,
extra_tokens,
variables_to_inline,
external_tokens,
word_token,
},
LexicalGrammar {
variables: lexical_variables,
separators,
}
))
}
struct TokenExtractor {
current_variable_name: String,
current_variable_token_count: usize,
extracted_variables: Vec<Variable>,
extracted_usage_counts: Vec<usize>,
}
struct SymbolReplacer {
replacements: HashMap<usize, usize>
}
impl TokenExtractor {
fn extract_tokens_in_variable(&mut self, variable: &mut Variable) {
self.current_variable_name.clear();
self.current_variable_name.push_str(&variable.name);
self.current_variable_token_count = 0;
let mut rule = Rule::Blank;
mem::swap(&mut rule, &mut variable.rule);
variable.rule = self.extract_tokens_in_rule(&rule);
}
fn extract_tokens_in_rule(&mut self, input: &Rule) -> Rule {
match input {
Rule::String(name) => self.extract_token(input, Some(name)).into(),
Rule::Pattern(..) => self.extract_token(input, None).into(),
Rule::Metadata { params, rule } => {
if params.is_token {
let mut params = params.clone();
params.is_token = false;
let mut string_value = None;
if let Rule::String(value) = rule.as_ref() {
string_value = Some(value);
}
let rule_to_extract = if params == MetadataParams::default() {
rule.as_ref()
} else {
input
};
self.extract_token(rule_to_extract, string_value).into()
} else {
Rule::Metadata {
params: params.clone(),
rule: Rc::new(self.extract_tokens_in_rule((&rule).clone()))
}
}
},
Rule::Repeat(content) => Rule::Repeat(
Rc::new(self.extract_tokens_in_rule(content))
),
Rule::Seq { left, right } => Rule::Seq {
left: Rc::new(self.extract_tokens_in_rule(left)),
right: Rc::new(self.extract_tokens_in_rule(right)),
},
Rule::Choice { elements } => Rule::Choice {
elements: elements.iter().map(|e| self.extract_tokens_in_rule(e)).collect()
},
_ => input.clone()
}
}
fn extract_token(&mut self, rule: &Rule, string_value: Option<&String>) -> Symbol {
for (i, variable) in self.extracted_variables.iter_mut().enumerate() {
if variable.rule == *rule {
self.extracted_usage_counts[i] += 1;
return Symbol::terminal(i)
}
}
let index = self.extracted_variables.len();
let variable = if let Some(string_value) = string_value {
Variable::anonymous(string_value, rule.clone())
} else {
self.current_variable_token_count += 1;
Variable::auxiliary(
&format!(
"{}_token{}",
&self.current_variable_name,
self.current_variable_token_count
),
rule.clone()
)
};
self.extracted_variables.push(variable);
self.extracted_usage_counts.push(1);
Symbol::terminal(index)
}
}
impl SymbolReplacer {
fn replace_symbols_in_rule(&mut self, rule: &Rule) -> Rule {
match rule {
Rule::Symbol(symbol) => self.replace_symbol(*symbol).into(),
Rule::Choice { elements } => Rule::Choice {
elements: elements.iter().map(|e| self.replace_symbols_in_rule(e)).collect()
},
Rule::Seq { left, right } => Rule::Seq {
left: Rc::new(self.replace_symbols_in_rule(left)),
right: Rc::new(self.replace_symbols_in_rule(right)),
},
Rule::Repeat(content) => Rule::Repeat(
Rc::new(self.replace_symbols_in_rule(content))
),
Rule::Metadata { rule, params } => Rule::Metadata {
params: params.clone(),
rule: Rc::new(self.replace_symbols_in_rule(rule)),
},
_ => rule.clone()
}
}
fn replace_symbol(&self, symbol: Symbol) -> Symbol {
if !symbol.is_non_terminal() {
return symbol
}
if let Some(replacement) = self.replacements.get(&symbol.index) {
return Symbol::terminal(*replacement);
}
let mut adjusted_index = symbol.index;
for (replaced_index, _) in self.replacements.iter() {
if *replaced_index < symbol.index {
adjusted_index -= 1;
}
}
return Symbol::non_terminal(adjusted_index);
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_extraction() {
let (syntax_grammar, lexical_grammar) = extract_tokens(build_grammar(vec![
Variable::named("rule_0", Rule::repeat(Rule::seq(vec![
Rule::string("a"),
Rule::pattern("b"),
Rule::choice(vec![
Rule::non_terminal(1),
Rule::non_terminal(2),
Rule::token(Rule::repeat(Rule::choice(vec![
Rule::string("c"),
Rule::string("d"),
])))
])
]))),
Variable::named("rule_1", Rule::pattern("e")),
Variable::named("rule_2", Rule::pattern("b")),
Variable::named("rule_3", Rule::seq(vec![
Rule::non_terminal(2),
Rule::Blank,
])),
])).unwrap();
assert_eq!(syntax_grammar.variables, vec![
Variable::named("rule_0", Rule::repeat(Rule::seq(vec![
// The string "a" was replaced by a symbol referencing the lexical grammar
Rule::terminal(0),
// The pattern "b" was replaced by a symbol referencing the lexical grammar
Rule::terminal(1),
Rule::choice(vec![
// The symbol referencing `rule_1` was replaced by a symbol referencing
// the lexical grammar.
Rule::terminal(3),
// The symbol referencing `rule_2` had its index decremented because
// `rule_1` was moved to the lexical grammar.
Rule::non_terminal(1),
// The rule wrapped in `token` was replaced by a symbol referencing
// the lexical grammar.
Rule::terminal(2),
])
]))),
// The pattern "e" was only used in once place: as the definition of `rule_1`,
// so that rule was moved to the lexical grammar. The pattern "b" appeared in
// two places, so it was not moved into the lexical grammar.
Variable::named("rule_2", Rule::terminal(1)),
Variable::named("rule_3", Rule::seq(vec![
Rule::non_terminal(1),
Rule::Blank,
])),
]);
assert_eq!(lexical_grammar.variables, vec![
Variable::anonymous("a", Rule::string("a")),
Variable::auxiliary("rule_0_token1", Rule::pattern("b")),
Variable::auxiliary("rule_0_token2", Rule::repeat(Rule::choice(vec![
Rule::string("c"),
Rule::string("d"),
]))),
Variable::named("rule_1", Rule::pattern("e")),
]);
}
#[test]
fn test_start_rule_is_token() {
let (syntax_grammar, lexical_grammar) = extract_tokens(build_grammar(vec![
Variable::named("rule_0", Rule::string("hello")),
])).unwrap();
assert_eq!(syntax_grammar.variables, vec![
Variable::named("rule_0", Rule::terminal(0)),
]);
assert_eq!(lexical_grammar.variables, vec![
Variable::anonymous("hello", Rule::string("hello")),
])
}
#[test]
fn test_extracting_extra_tokens() {
let mut grammar = build_grammar(vec![
Variable::named("rule_0", Rule::string("x")),
Variable::named("comment", Rule::pattern("//.*")),
]);
grammar.extra_tokens = vec![
Rule::string(" "),
Rule::non_terminal(1),
];
let (syntax_grammar, lexical_grammar) = extract_tokens(grammar).unwrap();
assert_eq!(syntax_grammar.extra_tokens, vec![
Symbol::terminal(1),
]);
assert_eq!(lexical_grammar.separators, vec![
Rule::string(" "),
]);
}
#[test]
fn test_extract_externals() {
let mut grammar = build_grammar(vec![
Variable::named("rule_0", Rule::seq(vec![
Rule::external(0),
Rule::string("a"),
Rule::non_terminal(1),
Rule::non_terminal(2),
])),
Variable::named("rule_1", Rule::string("b")),
Variable::named("rule_2", Rule::string("c")),
]);
grammar.external_tokens = vec![
Variable::named("external_0", Rule::external(0)),
Variable::anonymous("a", Rule::string("a")),
Variable::named("rule_2", Rule::non_terminal(2)),
];
let (syntax_grammar, _) = extract_tokens(grammar).unwrap();
assert_eq!(syntax_grammar.external_tokens, vec![
ExternalToken {
name: "external_0".to_string(),
kind: VariableType::Named,
corresponding_internal_token: None,
},
ExternalToken {
name: "a".to_string(),
kind: VariableType::Anonymous,
corresponding_internal_token: Some(Symbol::terminal(0)),
},
ExternalToken {
name: "rule_2".to_string(),
kind: VariableType::Named,
corresponding_internal_token: Some(Symbol::terminal(2)),
},
]);
}
#[test]
fn test_error_on_non_terminal_symbol_extras() {
let mut grammar = build_grammar(vec![
Variable::named("rule_0", Rule::non_terminal(1)),
Variable::named("rule_1", Rule::non_terminal(2)),
Variable::named("rule_2", Rule::string("x")),
]);
grammar.extra_tokens = vec![
Rule::non_terminal(1),
];
match extract_tokens(grammar) {
Err(Error::GrammarError(s)) => {
assert_eq!(s, "Non-token symbol 'rule_1' cannot be used as an extra token");
},
_ => {
panic!("Expected an error but got no error");
}
}
}
#[test]
fn test_error_on_external_with_same_name_as_non_terminal() {
let mut grammar = build_grammar(vec![
Variable::named("rule_0", Rule::seq(vec![
Rule::non_terminal(1),
Rule::non_terminal(2),
])),
Variable::named("rule_1", Rule::seq(vec![
Rule::non_terminal(2),
Rule::non_terminal(2),
])),
Variable::named("rule_2", Rule::string("a")),
]);
grammar.external_tokens = vec![
Variable::named("rule_1", Rule::non_terminal(1)),
];
match extract_tokens(grammar) {
Err(Error::GrammarError(s)) => {
assert_eq!(s, "Rule 'rule_1' cannot be used as both an external token and a non-terminal rule");
},
_ => {
panic!("Expected an error but got no error");
}
}
}
fn build_grammar(variables: Vec<Variable>) -> InternedGrammar {
InternedGrammar {
variables,
extra_tokens: Vec::new(),
external_tokens: Vec::new(),
expected_conflicts: Vec::new(),
variables_to_inline: Vec::new(),
word_token: None,
}
}
}

View file

@ -1,6 +1,6 @@
use crate::error::{Error, Result};
use crate::rules::{Rule, Symbol};
use crate::grammars::{InputGrammar, InputVariable, VariableType};
use crate::grammars::{InputGrammar, Variable, VariableType};
use std::rc::Rc;
use super::InternedGrammar;
@ -13,7 +13,7 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result<InternedGrammar>
let mut variables = Vec::with_capacity(grammar.variables.len());
for variable in grammar.variables.iter() {
variables.push(InputVariable {
variables.push(Variable {
name: variable.name.clone(),
kind: variable_type_for_name(&variable.name),
rule: interner.intern_rule(&variable.rule)?,
@ -28,7 +28,7 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result<InternedGrammar>
} else {
(String::new(), VariableType::Anonymous)
};
external_tokens.push(InputVariable { name, kind, rule });
external_tokens.push(Variable { name, kind, rule });
}
let mut extra_tokens = Vec::with_capacity(grammar.extra_tokens.len());
@ -154,21 +154,21 @@ mod tests {
#[test]
fn test_basic_repeat_expansion() {
let grammar = intern_symbols(&build_grammar(vec![
InputVariable::named("x", Rule::choice(vec![
Variable::named("x", Rule::choice(vec![
Rule::named("y"),
Rule::named("_z"),
])),
InputVariable::named("y", Rule::named("_z")),
InputVariable::named("_z", Rule::string("a")),
Variable::named("y", Rule::named("_z")),
Variable::named("_z", Rule::string("a")),
])).unwrap();
assert_eq!(grammar.variables, vec![
InputVariable::named("x", Rule::choice(vec![
Variable::named("x", Rule::choice(vec![
Rule::non_terminal(1),
Rule::non_terminal(2),
])),
InputVariable::named("y", Rule::non_terminal(2)),
InputVariable::hidden("_z", Rule::string("a")),
Variable::named("y", Rule::non_terminal(2)),
Variable::hidden("_z", Rule::string("a")),
]);
}
@ -177,13 +177,13 @@ mod tests {
// Variable `y` is both an internal and an external token.
// Variable `z` is just an external token.
let mut input_grammar = build_grammar(vec![
InputVariable::named("w", Rule::choice(vec![
Variable::named("w", Rule::choice(vec![
Rule::named("x"),
Rule::named("y"),
Rule::named("z"),
])),
InputVariable::named("x", Rule::string("a")),
InputVariable::named("y", Rule::string("b")),
Variable::named("x", Rule::string("a")),
Variable::named("y", Rule::string("b")),
]);
input_grammar.external_tokens.extend(vec![
Rule::named("y"),
@ -195,26 +195,26 @@ mod tests {
// Variable `y` is referred to by its internal index.
// Variable `z` is referred to by its external index.
assert_eq!(grammar.variables, vec![
InputVariable::named("w", Rule::choice(vec![
Variable::named("w", Rule::choice(vec![
Rule::non_terminal(1),
Rule::non_terminal(2),
Rule::external(1),
])),
InputVariable::named("x", Rule::string("a")),
InputVariable::named("y", Rule::string("b")),
Variable::named("x", Rule::string("a")),
Variable::named("y", Rule::string("b")),
]);
// The external token for `y` refers back to its internal index.
assert_eq!(grammar.external_tokens, vec![
InputVariable::named("y", Rule::non_terminal(2)),
InputVariable::named("z", Rule::external(1)),
Variable::named("y", Rule::non_terminal(2)),
Variable::named("z", Rule::external(1)),
]);
}
#[test]
fn test_grammar_with_undefined_symbols() {
let result = intern_symbols(&build_grammar(vec![
InputVariable::named("x", Rule::named("y")),
Variable::named("x", Rule::named("y")),
]));
match result {
@ -223,7 +223,7 @@ mod tests {
}
}
fn build_grammar(variables: Vec<InputVariable>) -> InputGrammar {
fn build_grammar(variables: Vec<Variable>) -> InputGrammar {
InputGrammar {
variables,
name: "the_language".to_string(),

View file

@ -6,7 +6,7 @@ mod normalize_rules;
mod extract_simple_aliases;
use crate::rules::{AliasMap, Rule, Symbol};
use crate::grammars::{InputGrammar, SyntaxGrammar, LexicalGrammar, InputVariable, ExternalToken};
use crate::grammars::{InputGrammar, SyntaxGrammar, LexicalGrammar, Variable, ExternalToken};
use crate::error::Result;
use self::intern_symbols::intern_symbols;
use self::extract_tokens::extract_tokens;
@ -16,7 +16,7 @@ use self::normalize_rules::normalize_rules;
use self::extract_simple_aliases::extract_simple_aliases;
pub(self) struct IntermediateGrammar<T, U> {
variables: Vec<InputVariable>,
variables: Vec<Variable>,
extra_tokens: Vec<T>,
expected_conflicts: Vec<Vec<Symbol>>,
external_tokens: Vec<U>,
@ -24,10 +24,10 @@ pub(self) struct IntermediateGrammar<T, U> {
word_token: Option<Symbol>,
}
pub(self) type InternedGrammar = IntermediateGrammar<Rule, InputVariable>;
pub(self) type InternedGrammar = IntermediateGrammar<Rule, Variable>;
pub(self) type ExtractedGrammar = IntermediateGrammar<Symbol, ExternalToken>;
pub fn prepare_grammar(
pub(crate) fn prepare_grammar(
input_grammar: &InputGrammar
) -> Result<(SyntaxGrammar, LexicalGrammar, AliasMap)> {
let interned_grammar = intern_symbols(input_grammar)?;

View file

@ -2,7 +2,7 @@ use crate::rules::{Symbol, AliasMap};
use crate::grammars::{SyntaxGrammar, LexicalGrammar};
use crate::tables::{ParseTable, LexTable};
pub fn render_c_code(
pub(crate) fn render_c_code(
name: &str,
parse_table: ParseTable,
main_lex_table: LexTable,

View file

@ -2,47 +2,47 @@ use std::rc::Rc;
use std::collections::HashMap;
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub enum SymbolType {
pub(crate) enum SymbolType {
External,
Terminal,
NonTerminal,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub enum Associativity {
pub(crate) enum Associativity {
Left,
Right
}
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub struct Alias {
value: String,
is_named: bool,
pub(crate) struct Alias {
pub value: String,
pub is_named: bool,
}
pub type AliasMap = HashMap<Symbol, Alias>;
pub(crate) type AliasMap = HashMap<Symbol, Alias>;
#[derive(Clone, Debug, Default, PartialEq, Eq, Hash)]
pub struct MetadataParams {
precedence: Option<i32>,
dynamic_precedence: i32,
associativity: Option<Associativity>,
is_token: bool,
is_string: bool,
is_active: bool,
is_main_token: bool,
is_excluded: bool,
alias: Option<Alias>,
pub(crate) struct MetadataParams {
pub precedence: Option<i32>,
pub dynamic_precedence: i32,
pub associativity: Option<Associativity>,
pub is_token: bool,
pub is_string: bool,
pub is_active: bool,
pub is_main_token: bool,
pub is_excluded: bool,
pub alias: Option<Alias>,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub struct Symbol {
kind: SymbolType,
index: usize,
pub(crate) struct Symbol {
pub kind: SymbolType,
pub index: usize,
}
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub enum Rule {
pub(crate) enum Rule {
Blank,
CharacterSet(Vec<char>),
String(String),
@ -153,9 +153,21 @@ impl Rule {
pub fn string(value: &'static str) -> Self {
Rule::String(value.to_string())
}
pub fn pattern(value: &'static str) -> Self {
Rule::Pattern(value.to_string())
}
}
impl Symbol {
pub fn is_non_terminal(&self) -> bool {
return self.kind == SymbolType::NonTerminal
}
pub fn is_external(&self) -> bool {
return self.kind == SymbolType::External
}
pub fn non_terminal(index: usize) -> Self {
Symbol { kind: SymbolType::NonTerminal, index }
}

View file

@ -2,12 +2,12 @@ use std::collections::HashMap;
use std::ops::Range;
use crate::rules::{Associativity, Symbol, Alias};
pub type AliasSequenceId = usize;
pub type ParseStateId = usize;
pub type LexStateId = usize;
pub(crate) type AliasSequenceId = usize;
pub(crate) type ParseStateId = usize;
pub(crate) type LexStateId = usize;
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum ParseActionType {
pub(crate) enum ParseActionType {
Error,
Shift,
Reduce,
@ -16,7 +16,7 @@ pub enum ParseActionType {
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum ParseAction {
pub(crate) enum ParseAction {
Accept,
Error,
Shift(ParseStateId),
@ -34,44 +34,44 @@ pub enum ParseAction {
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct ParseTableEntry {
pub(crate) struct ParseTableEntry {
actions: Vec<ParseAction>,
reusable: bool,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct ParseState {
pub(crate) struct ParseState {
terminal_entries: HashMap<Symbol, ParseTableEntry>,
nonterminal_entries: HashMap<Symbol, ParseStateId>
}
#[derive(Debug, PartialEq, Eq)]
pub struct ParseTable {
pub(crate) struct ParseTable {
states: Vec<ParseState>,
alias_sequences: Vec<Vec<Alias>>,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct AdvanceAction {
pub(crate) struct AdvanceAction {
state: LexStateId,
precedence: Range<i32>,
in_main_token: bool,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct AcceptTokenAction {
pub(crate) struct AcceptTokenAction {
symbol: Symbol,
precedence: i32,
implicit_precedence: i32,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct LexState {
pub(crate) struct LexState {
advance_actions: HashMap<Symbol, AdvanceAction>,
accept_action: Option<AcceptTokenAction>,
}
#[derive(Debug, PartialEq, Eq)]
pub struct LexTable {
pub(crate) struct LexTable {
states: Vec<LexState>,
}