From 40d24097ecdcc188f255a9fbb03adca05c5f39fd Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 11 Dec 2018 12:37:09 -0800 Subject: [PATCH] Implement extract_simple_aliases --- src/grammars.rs | 9 + src/prepare_grammar/extract_simple_aliases.rs | 191 +++++++++++++++++- src/prepare_grammar/mod.rs | 4 +- 3 files changed, 199 insertions(+), 5 deletions(-) diff --git a/src/grammars.rs b/src/grammars.rs index 3b3d47f7..b76a583e 100644 --- a/src/grammars.rs +++ b/src/grammars.rs @@ -97,6 +97,15 @@ impl ProductionStep { alias: self.alias, } } + + pub(crate) fn with_alias(self, value: &str, is_named: bool) -> Self { + Self { + symbol: self.symbol, + precedence: self.precedence, + associativity: self.associativity, + alias: Some(Alias { value: value.to_string(), is_named }), + } + } } impl Variable { diff --git a/src/prepare_grammar/extract_simple_aliases.rs b/src/prepare_grammar/extract_simple_aliases.rs index 2a175242..a10c7982 100644 --- a/src/prepare_grammar/extract_simple_aliases.rs +++ b/src/prepare_grammar/extract_simple_aliases.rs @@ -1,9 +1,194 @@ -use crate::rules::AliasMap; +use crate::rules::{Alias, AliasMap, Symbol, SymbolType}; use crate::grammars::{LexicalGrammar, SyntaxGrammar}; +#[derive(Clone, Default)] +struct SymbolStatus { + alias: Option, + conflicting: bool, +} + pub(super) fn extract_simple_aliases( syntax_grammar: &mut SyntaxGrammar, - lexical_grammar: &mut LexicalGrammar + lexical_grammar: &LexicalGrammar ) -> AliasMap { - unimplemented!(); + // Determine which symbols in the grammars are *always* aliased to a single name. + let mut terminal_status_list = vec![SymbolStatus::default(); lexical_grammar.variables.len()]; + let mut non_terminal_status_list = vec![SymbolStatus::default(); syntax_grammar.variables.len()]; + let mut external_status_list = vec![SymbolStatus::default(); syntax_grammar.external_tokens.len()]; + for variable in syntax_grammar.variables.iter() { + for production in variable.productions.iter() { + for step in production.steps.iter() { + let mut status = match step.symbol { + Symbol { kind: SymbolType::External, index} => &mut external_status_list[index], + Symbol { kind: SymbolType::NonTerminal, index} => &mut non_terminal_status_list[index], + Symbol { kind: SymbolType::Terminal, index} => &mut terminal_status_list[index], + }; + + if step.alias.is_none() { + status.alias = None; + status.conflicting = true; + } + + if !status.conflicting { + if status.alias.is_none() { + status.alias = step.alias.clone(); + } else if status.alias != step.alias { + status.alias = None; + status.conflicting = true; + } + } + } + } + } + + // Remove the aliases for those symbols. + for variable in syntax_grammar.variables.iter_mut() { + for production in variable.productions.iter_mut() { + for step in production.steps.iter_mut() { + let status = match step.symbol { + Symbol { kind: SymbolType::External, index} => &external_status_list[index], + Symbol { kind: SymbolType::NonTerminal, index} => &non_terminal_status_list[index], + Symbol { kind: SymbolType::Terminal, index} => &terminal_status_list[index], + }; + + if status.alias.is_some() { + step.alias = None; + } + } + } + } + + // Populate a map of the symbols to their aliases. + let mut result = AliasMap::new(); + for (i, status) in terminal_status_list.into_iter().enumerate() { + if let Some(alias) = status.alias { + result.insert(Symbol::terminal(i), alias); + } + } + for (i, status) in non_terminal_status_list.into_iter().enumerate() { + if let Some(alias) = status.alias { + result.insert(Symbol::non_terminal(i), alias); + } + } + for (i, status) in external_status_list.into_iter().enumerate() { + if let Some(alias) = status.alias { + result.insert(Symbol::external(i), alias); + } + } + result +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::grammars::{LexicalVariable, SyntaxVariable, VariableType, Production, ProductionStep}; + use crate::nfa::Nfa; + + #[test] + fn test_extract_simple_aliases() { + let mut syntax_grammar = SyntaxGrammar { + variables: vec![ + SyntaxVariable { + name: "v1".to_owned(), + kind: VariableType::Named, + productions: vec![ + Production { + dynamic_precedence: 0, + steps: vec![ + ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true), + ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true), + ProductionStep::new(Symbol::terminal(2)).with_alias("a3", true), + ], + }, + ], + }, + SyntaxVariable { + name: "v2".to_owned(), + kind: VariableType::Named, + productions: vec![ + Production { + dynamic_precedence: 0, + steps: vec![ + // Token 0 is always aliased as "a1". + ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true), + + // Token 1 is aliased above, but not here. + ProductionStep::new(Symbol::terminal(1)), + + // Token 2 is aliased differently than above. + ProductionStep::new(Symbol::terminal(2)).with_alias("a4", true), + ], + }, + ], + }, + ], + extra_tokens: Vec::new(), + expected_conflicts: Vec::new(), + variables_to_inline: Vec::new(), + external_tokens: Vec::new(), + word_token: None, + }; + + let lexical_grammar = LexicalGrammar { + variables: vec![ + LexicalVariable { + name: "t1".to_string(), + kind: VariableType::Anonymous, + nfa: Nfa::new(), + }, + LexicalVariable { + name: "t2".to_string(), + kind: VariableType::Anonymous, + nfa: Nfa::new(), + }, + LexicalVariable { + name: "t3".to_string(), + kind: VariableType::Anonymous, + nfa: Nfa::new(), + } + ], + separators: Vec::new(), + }; + + let simple_aliases = extract_simple_aliases(&mut syntax_grammar, &lexical_grammar); + assert_eq!(simple_aliases.len(), 1); + assert_eq!(simple_aliases[&Symbol::terminal(0)], Alias { + value: "a1".to_string(), + is_named: true, + }); + + assert_eq!(syntax_grammar.variables, vec![ + SyntaxVariable { + name: "v1".to_owned(), + kind: VariableType::Named, + productions: vec![ + Production { + dynamic_precedence: 0, + steps: vec![ + // 'Simple' alias removed + ProductionStep::new(Symbol::terminal(0)), + + // Other aliases unchanged + ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true), + ProductionStep::new(Symbol::terminal(2)).with_alias("a3", true), + ], + }, + ], + }, + SyntaxVariable { + name: "v2".to_owned(), + kind: VariableType::Named, + productions: vec![ + Production { + dynamic_precedence: 0, + steps: vec![ + ProductionStep::new(Symbol::terminal(0)), + ProductionStep::new(Symbol::terminal(1)), + ProductionStep::new(Symbol::terminal(2)).with_alias("a4", true), + ], + }, + ], + }, + ]); + } } diff --git a/src/prepare_grammar/mod.rs b/src/prepare_grammar/mod.rs index 08233c53..22435fca 100644 --- a/src/prepare_grammar/mod.rs +++ b/src/prepare_grammar/mod.rs @@ -41,7 +41,7 @@ pub(crate) fn prepare_grammar( let (syntax_grammar, lexical_grammar) = extract_tokens(interned_grammar)?; let syntax_grammar = expand_repeats(syntax_grammar); let mut syntax_grammar = flatten_grammar(syntax_grammar)?; - let mut lexical_grammar = expand_tokens(lexical_grammar)?; - let simple_aliases = extract_simple_aliases(&mut syntax_grammar, &mut lexical_grammar); + let lexical_grammar = expand_tokens(lexical_grammar)?; + let simple_aliases = extract_simple_aliases(&mut syntax_grammar, &lexical_grammar); Ok((syntax_grammar, lexical_grammar, simple_aliases)) }