diff --git a/src/build_tables/inline_variables.rs b/src/build_tables/inline_variables.rs deleted file mode 100644 index affbe163..00000000 --- a/src/build_tables/inline_variables.rs +++ /dev/null @@ -1,441 +0,0 @@ -use super::item::ParseItem; -use crate::grammars::{Production, SyntaxGrammar}; -use std::collections::HashMap; - -pub(crate) struct InlinedProductionMap { - pub inlined_productions: Vec, - item_map: HashMap>, -} - -impl InlinedProductionMap { - pub fn new(grammar: &SyntaxGrammar) -> Self { - let mut result = Self { - inlined_productions: Vec::new(), - item_map: HashMap::new(), - }; - - let mut items_to_process = Vec::new(); - for (variable_index, variable) in grammar.variables.iter().enumerate() { - for production_index in 0..variable.productions.len() { - items_to_process.push(ParseItem::Normal { - variable_index: variable_index as u32, - production_index: production_index as u32, - step_index: 0, - }); - while !items_to_process.is_empty() { - let mut i = 0; - while i < items_to_process.len() { - let item = &items_to_process[i]; - if let Some(step) = item.step(grammar, &result) { - if grammar.variables_to_inline.contains(&step.symbol) { - let inlined_items = result - .inline(*item, grammar) - .into_iter() - .map(|production_index| ParseItem::Inlined { - variable_index: item.variable_index(), - production_index: *production_index, - step_index: item.step_index() as u32, - }) - .collect::>(); - items_to_process.splice(i..i + 1, inlined_items); - } else { - items_to_process[i] = item.successor(); - i += 1; - } - } else { - items_to_process.remove(i); - } - } - } - } - } - - result - } - - pub fn inlined_items<'a>( - &'a self, - item: ParseItem, - ) -> Option + 'a> { - self.item_map.get(&item).map(|production_indices| { - production_indices - .iter() - .cloned() - .map(move |production_index| ParseItem::Inlined { - variable_index: item.variable_index(), - production_index, - step_index: item.step_index() as u32, - }) - }) - } - - fn inline(&mut self, item: ParseItem, grammar: &SyntaxGrammar) -> &Vec { - let step_index = item.step_index(); - let mut productions_to_add = grammar.variables - [item.step(grammar, self).unwrap().symbol.index] - .productions - .clone(); - - let mut i = 0; - while i < productions_to_add.len() { - if let Some(first_symbol) = productions_to_add[i].first_symbol() { - if grammar.variables_to_inline.contains(&first_symbol) { - // Remove the production from the vector, replacing it with a placeholder. - let production = productions_to_add - .splice(i..i + 1, [Production::default()].iter().cloned()) - .next() - .unwrap(); - - // Replace the placeholder with the inlined productions. - productions_to_add.splice( - i..i + 1, - grammar.variables[first_symbol.index] - .productions - .iter() - .map(|p| { - let mut p = p.clone(); - p.steps.extend(production.steps[1..].iter().cloned()); - p - }), - ); - continue; - } - } - i += 1; - } - - let result = productions_to_add - .into_iter() - .map(|production_to_add| { - let mut inlined_production = item.production(grammar, &self).clone(); - let removed_step = inlined_production - .steps - .splice( - step_index..step_index + 1, - production_to_add.steps.iter().cloned(), - ) - .next() - .unwrap(); - let inserted_steps = &mut inlined_production.steps - [step_index..step_index + production_to_add.steps.len()]; - if let Some(alias) = removed_step.alias { - for inserted_step in inserted_steps.iter_mut() { - inserted_step.alias = Some(alias.clone()); - } - } - if let Some(last_inserted_step) = inserted_steps.last_mut() { - last_inserted_step.precedence = removed_step.precedence; - last_inserted_step.associativity = removed_step.associativity; - } - self.inlined_productions - .iter() - .position(|p| *p == inlined_production) - .unwrap_or({ - self.inlined_productions.push(inlined_production); - self.inlined_productions.len() - 1 - }) as u32 - }) - .collect(); - - self.item_map.entry(item).or_insert(result) - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::grammars::{LexicalGrammar, ProductionStep, SyntaxVariable, VariableType}; - use crate::rules::{Alias, Associativity, Symbol}; - use std::borrow::Borrow; - - #[test] - fn test_basic_inlining() { - let grammar = SyntaxGrammar { - expected_conflicts: Vec::new(), - extra_tokens: Vec::new(), - external_tokens: Vec::new(), - word_token: None, - variables_to_inline: vec![Symbol::non_terminal(1)], - variables: vec![ - SyntaxVariable { - name: "non-terminal-0".to_string(), - kind: VariableType::Named, - productions: vec![Production { - dynamic_precedence: 0, - steps: vec![ - ProductionStep::new(Symbol::terminal(10)), - ProductionStep::new(Symbol::non_terminal(1)), // inlined - ProductionStep::new(Symbol::terminal(11)), - ], - }], - }, - SyntaxVariable { - name: "non-terminal-1".to_string(), - kind: VariableType::Named, - productions: vec![ - Production { - dynamic_precedence: 0, - steps: vec![ - ProductionStep::new(Symbol::terminal(12)), - ProductionStep::new(Symbol::terminal(13)), - ], - }, - Production { - dynamic_precedence: 0, - steps: vec![ProductionStep::new(Symbol::terminal(14))], - }, - ], - }, - ], - }; - - let inline_map = InlinedProductionMap::new(&grammar); - - // Nothing to inline at step 0. - assert!(inline_map - .inlined_items(ParseItem::Normal { - variable_index: 0, - production_index: 0, - step_index: 0 - }) - .is_none()); - - // Inlining variable 1 yields two productions. - assert_eq!( - display_items( - inline_map - .inlined_items(ParseItem::Normal { - variable_index: 0, - production_index: 0, - step_index: 1 - }) - .unwrap(), - &grammar, - &inline_map - ), - vec![ - "non-terminal-0 → terminal-10 • terminal-12 terminal-13 terminal-11" - .to_string(), - "non-terminal-0 → terminal-10 • terminal-14 terminal-11".to_string(), - ] - ); - } - - #[test] - fn test_nested_inlining() { - let grammar = SyntaxGrammar { - variables: vec![ - SyntaxVariable { - name: "non-terminal-0".to_string(), - kind: VariableType::Named, - productions: vec![Production { - dynamic_precedence: 0, - steps: vec![ - ProductionStep::new(Symbol::terminal(10)), - ProductionStep::new(Symbol::non_terminal(1)), // inlined - ProductionStep::new(Symbol::terminal(11)), - ProductionStep::new(Symbol::non_terminal(2)), // inlined - ProductionStep::new(Symbol::terminal(12)), - ], - }], - }, - SyntaxVariable { - name: "non-terminal-1".to_string(), - kind: VariableType::Named, - productions: vec![ - Production { - dynamic_precedence: 0, - steps: vec![ProductionStep::new(Symbol::terminal(13))], - }, - Production { - dynamic_precedence: 0, - steps: vec![ - ProductionStep::new(Symbol::non_terminal(3)), // inlined - ProductionStep::new(Symbol::terminal(14)), - ], - }, - ], - }, - SyntaxVariable { - name: "non-terminal-2".to_string(), - kind: VariableType::Named, - productions: vec![Production { - dynamic_precedence: 0, - steps: vec![ProductionStep::new(Symbol::terminal(15))], - }], - }, - SyntaxVariable { - name: "non-terminal-3".to_string(), - kind: VariableType::Named, - productions: vec![Production { - dynamic_precedence: 0, - steps: vec![ProductionStep::new(Symbol::terminal(16))], - }], - }, - ], - variables_to_inline: vec![ - Symbol::non_terminal(1), - Symbol::non_terminal(2), - Symbol::non_terminal(3), - ], - expected_conflicts: Vec::new(), - extra_tokens: Vec::new(), - external_tokens: Vec::new(), - word_token: None, - }; - - let inline_map = InlinedProductionMap::new(&grammar); - - let items = inline_map - .inlined_items(ParseItem::Normal { - variable_index: 0, - production_index: 0, - step_index: 1, - }) - .unwrap() - .collect::>(); - - assert_eq!( - display_items(&items, &grammar, &inline_map), - vec![ - "non-terminal-0 → terminal-10 • terminal-13 terminal-11 non-terminal-2 terminal-12".to_string(), - "non-terminal-0 → terminal-10 • terminal-16 terminal-14 terminal-11 non-terminal-2 terminal-12".to_string() - ] - ); - - let item = items[0].successor().successor(); - assert_eq!( - display_items(&[item], &grammar, &inline_map), - vec![ - "non-terminal-0 → terminal-10 terminal-13 terminal-11 • non-terminal-2 terminal-12".to_string(), - ] - ); - - assert_eq!( - display_items(inline_map.inlined_items(item).unwrap(), &grammar, &inline_map), - vec![ - "non-terminal-0 → terminal-10 terminal-13 terminal-11 • terminal-15 terminal-12".to_string(), - ] - ); - } - - #[test] - fn test_inlining_with_precedence_and_alias() { - let grammar = SyntaxGrammar { - variables_to_inline: vec![Symbol::non_terminal(1), Symbol::non_terminal(2)], - variables: vec![ - SyntaxVariable { - name: "non-terminal-0".to_string(), - kind: VariableType::Named, - productions: vec![Production { - dynamic_precedence: 0, - steps: vec![ - ProductionStep::new(Symbol::non_terminal(1)) // inlined - .with_prec(1, Some(Associativity::Left)), - ProductionStep::new(Symbol::terminal(10)), - ProductionStep::new(Symbol::non_terminal(2)), // inlined - ], - }], - }, - SyntaxVariable { - name: "non-terminal-1".to_string(), - kind: VariableType::Named, - productions: vec![Production { - dynamic_precedence: 0, - steps: vec![ - ProductionStep::new(Symbol::terminal(11)) - .with_prec(2, None) - .with_alias("inner_alias", true), - ProductionStep::new(Symbol::terminal(12)).with_prec(3, None), - ], - }], - }, - SyntaxVariable { - name: "non-terminal-2".to_string(), - kind: VariableType::Named, - productions: vec![Production { - dynamic_precedence: 0, - steps: vec![ProductionStep::new(Symbol::terminal(13)) - .with_alias("outer_alias", true)], - }], - }, - ], - expected_conflicts: Vec::new(), - extra_tokens: Vec::new(), - external_tokens: Vec::new(), - word_token: None, - }; - - let inline_map = InlinedProductionMap::new(&grammar); - - let items = inline_map - .inlined_items(ParseItem::Normal { - variable_index: 0, - production_index: 0, - step_index: 0, - }) - .unwrap() - .collect::>(); - assert_eq!( - display_items(&items, &grammar, &inline_map)[0], - "non-terminal-0 → • terminal-11 terminal-12 terminal-10 non-terminal-2".to_string(), - ); - - // The first step in the inlined production retains its precedence and alias. - let item = items[0].successor(); - assert_eq!( - display_items(&[item], &grammar, &inline_map)[0], - "non-terminal-0 → terminal-11 • terminal-12 terminal-10 non-terminal-2".to_string(), - ); - assert_eq!(item.precedence(&grammar, &inline_map), 2); - assert_eq!( - items[0].step(&grammar, &inline_map).unwrap().alias, - Some(Alias { - value: "inner_alias".to_string(), - is_named: true, - }) - ); - - // The final terminal of the inlined production inherits the precedence of - // the inlined step. - let item = item.successor(); - assert_eq!( - display_items(&[item], &grammar, &inline_map)[0], - "non-terminal-0 → terminal-11 terminal-12 • terminal-10 non-terminal-2".to_string(), - ); - assert_eq!(item.precedence(&grammar, &inline_map), 1); - - let item = item.successor(); - assert_eq!( - display_items(&[item], &grammar, &inline_map)[0], - "non-terminal-0 → terminal-11 terminal-12 terminal-10 • non-terminal-2".to_string(), - ); - - // All steps of the inlined production inherit their alias from the - // inlined step. - let items = inline_map.inlined_items(item).unwrap().collect::>(); - assert_eq!( - display_items(&items, &grammar, &inline_map)[0], - "non-terminal-0 → terminal-11 terminal-12 terminal-10 • terminal-13".to_string(), - ); - assert_eq!( - items[0].step(&grammar, &inline_map).unwrap().alias, - Some(Alias { - value: "outer_alias".to_string(), - is_named: true, - }) - ) - } - - fn display_items( - items: impl IntoIterator>, - grammar: &SyntaxGrammar, - inline_map: &InlinedProductionMap, - ) -> Vec { - let lex = LexicalGrammar::default(); - items - .into_iter() - .map(|item| format!("{}", item.borrow().display_with(grammar, &lex, inline_map))) - .collect() - } -} diff --git a/src/build_tables/item.rs b/src/build_tables/item.rs index 9208f602..49ab4f27 100644 --- a/src/build_tables/item.rs +++ b/src/build_tables/item.rs @@ -1,10 +1,12 @@ -use super::inline_variables::InlinedProductionMap; use crate::grammars::{LexicalGrammar, Production, ProductionStep, SyntaxGrammar}; -use crate::rules::{Associativity, Symbol, SymbolType}; +use crate::rules::Associativity; +use crate::rules::{Symbol, SymbolType}; use smallbitvec::SmallBitVec; use std::collections::{HashMap, BTreeMap}; use std::fmt; use std::hash::{Hash, Hasher}; +use std::u32; +use std::cmp::Ordering; lazy_static! { static ref START_PRODUCTION: Production = Production { @@ -28,49 +30,26 @@ pub(crate) struct LookaheadSet { eof: bool, } -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] -pub(crate) enum ParseItem { - Start { - step_index: u32, - }, - Normal { - variable_index: u32, - production_index: u32, - step_index: u32, - }, - Inlined { - variable_index: u32, - production_index: u32, - step_index: u32, - }, +#[derive(Clone, Copy, Debug)] +pub(crate) struct ParseItem<'a> { + pub variable_index: u32, + pub step_index: u32, + pub production: &'a Production, } #[derive(Clone, Debug, PartialEq, Eq)] -pub(crate) struct ParseItemSet { - pub entries: BTreeMap, +pub(crate) struct ParseItemSet<'a> { + pub entries: BTreeMap, LookaheadSet>, } -pub(crate) struct ParseItemDisplay<'a>( - &'a ParseItem, - &'a SyntaxGrammar, - &'a LexicalGrammar, - &'a InlinedProductionMap, -); - +pub(crate) struct ParseItemDisplay<'a>(&'a ParseItem<'a>, &'a SyntaxGrammar, &'a LexicalGrammar); pub(crate) struct LookaheadSetDisplay<'a>(&'a LookaheadSet, &'a SyntaxGrammar, &'a LexicalGrammar); - pub(crate) struct ParseItemSetDisplay<'a>( - &'a ParseItemSet, + &'a ParseItemSet<'a>, &'a SyntaxGrammar, &'a LexicalGrammar, - &'a InlinedProductionMap, ); -struct ParseItemSetMapEntry(ParseItemSet, u64); -pub(crate) struct ParseItemSetMap { - map: HashMap -} - impl LookaheadSet { pub fn new() -> Self { Self { @@ -173,152 +152,79 @@ impl LookaheadSet { } } -impl ParseItem { +impl<'a> ParseItem<'a> { pub fn start() -> Self { - ParseItem::Start { step_index: 0 } - } - - pub fn is_kernel(&self) -> bool { - match self { - ParseItem::Start { .. } => true, - ParseItem::Normal { step_index, .. } | ParseItem::Inlined { step_index, .. } => { - *step_index > 0 - } + ParseItem { + variable_index: u32::MAX, + production: &START_PRODUCTION, + step_index: 0, } } - pub fn production<'a>( - &self, - grammar: &'a SyntaxGrammar, - inlined_productions: &'a InlinedProductionMap, - ) -> &'a Production { - match self { - ParseItem::Start { .. } => &START_PRODUCTION, - ParseItem::Normal { - variable_index, - production_index, - .. - } => { - &grammar.variables[*variable_index as usize].productions[*production_index as usize] - } - ParseItem::Inlined { - production_index, .. - } => &inlined_productions.inlined_productions[*production_index as usize], + pub fn step(&self) -> Option<&'a ProductionStep> { + self.production.steps.get(self.step_index as usize) + } + + pub fn symbol(&self) -> Option { + self.step().map(|step| step.symbol) + } + + pub fn associativity(&self) -> Option { + self.prev_step().and_then(|step| step.associativity) + } + + pub fn precedence(&self) -> i32 { + self.prev_step().map_or(0, |step| step.precedence) + } + + pub fn prev_step(&self) -> Option<&'a ProductionStep> { + self.production.steps.get(self.step_index as usize - 1) + } + + pub fn is_done(&self) -> bool { + self.step_index as usize == self.production.steps.len() + } + + pub fn is_augmented(&self) -> bool { + self.variable_index == u32::MAX + } + + pub fn successor(&self) -> ParseItem<'a> { + ParseItem { + variable_index: self.variable_index, + production: self.production, + step_index: self.step_index + 1, } } - pub fn symbol( - &self, - grammar: &SyntaxGrammar, - inlined_productions: &InlinedProductionMap, - ) -> Option { - self.step(grammar, inlined_productions).map(|s| s.symbol) - } - - pub fn step<'a>( - &self, - grammar: &'a SyntaxGrammar, - inlined_productions: &'a InlinedProductionMap, - ) -> Option<&'a ProductionStep> { - self.production(grammar, inlined_productions) - .steps - .get(self.step_index()) - } - - pub fn precedence<'a>( - &self, - grammar: &'a SyntaxGrammar, - inlines: &'a InlinedProductionMap, - ) -> i32 { - self.production(grammar, inlines) - .steps - .get(self.step_index() - 1) - .map(|s| s.precedence) - .unwrap_or(0) - } - - pub fn associativity<'a>( - &self, - grammar: &'a SyntaxGrammar, - inlines: &'a InlinedProductionMap, - ) -> Option { - let production = self.production(grammar, inlines); - let step_index = self.step_index(); - if step_index == production.steps.len() { - production.steps.last().and_then(|s| s.associativity) - } else { - None - } - } - - pub fn variable_index(&self) -> u32 { - match self { - ParseItem::Start { .. } => panic!("Start item doesn't have a variable index"), - ParseItem::Normal { variable_index, .. } - | ParseItem::Inlined { variable_index, .. } => *variable_index, - } - } - - pub fn step_index(&self) -> usize { - match self { - ParseItem::Start { step_index } - | ParseItem::Normal { step_index, .. } - | ParseItem::Inlined { step_index, .. } => *step_index as usize, - } - } - - pub fn is_final(&self) -> bool { - if let ParseItem::Start { step_index: 1 } = self { - true - } else { - false - } - } - - fn step_index_mut(&mut self) -> &mut u32 { - match self { - ParseItem::Start { step_index } - | ParseItem::Normal { step_index, .. } - | ParseItem::Inlined { step_index, .. } => step_index, - } - } - - pub fn display_with<'a>( + pub fn display_with( &'a self, syntax_grammar: &'a SyntaxGrammar, lexical_grammar: &'a LexicalGrammar, - inlines: &'a InlinedProductionMap, ) -> ParseItemDisplay<'a> { - ParseItemDisplay(self, syntax_grammar, lexical_grammar, inlines) - } - - pub fn successor(&self) -> ParseItem { - let mut result = self.clone(); - *result.step_index_mut() += 1; - result + ParseItemDisplay(self, syntax_grammar, lexical_grammar) } } -impl ParseItemSet { - pub fn with<'a>(elements: impl IntoIterator) -> Self { +impl<'a> ParseItemSet<'a> { + pub fn with(elements: impl IntoIterator, LookaheadSet)>) -> Self { let mut result = Self::default(); for (item, lookaheads) in elements { - result.entries.insert(*item, lookaheads.clone()); + result.entries.insert(item, lookaheads); } result } - pub fn display_with<'a>( + pub fn display_with( &'a self, syntax_grammar: &'a SyntaxGrammar, lexical_grammar: &'a LexicalGrammar, - inlines: &'a InlinedProductionMap, ) -> ParseItemSetDisplay<'a> { - ParseItemSetDisplay(self, syntax_grammar, lexical_grammar, inlines) + ParseItemSetDisplay(self, syntax_grammar, lexical_grammar) } } -impl Default for ParseItemSet { +impl<'a> Default for ParseItemSet<'a> { fn default() -> Self { Self { entries: BTreeMap::new(), @@ -328,20 +234,18 @@ impl Default for ParseItemSet { impl<'a> fmt::Display for ParseItemDisplay<'a> { fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { - if let ParseItem::Start { .. } = &self.0 { + if self.0.is_augmented() { write!(f, "START →")?; } else { write!( f, "{} →", - &self.1.variables[self.0.variable_index() as usize].name + &self.1.variables[self.0.variable_index as usize].name )?; } - let step_index = self.0.step_index(); - let production = self.0.production(self.1, self.3); - for (i, step) in production.steps.iter().enumerate() { - if i == step_index { + for (i, step) in self.0.production.steps.iter().enumerate() { + if i == self.0.step_index as usize { write!(f, " •")?; } @@ -359,7 +263,7 @@ impl<'a> fmt::Display for ParseItemDisplay<'a> { } } - if production.steps.len() == step_index { + if self.0.is_done() { write!(f, " •")?; } @@ -398,7 +302,7 @@ impl<'a> fmt::Display for ParseItemSetDisplay<'a> { writeln!( f, "{}\t{}", - item.display_with(self.1, self.2, self.3), + item.display_with(self.1, self.2), lookaheads.display_with(self.1, self.2) )?; } @@ -406,7 +310,94 @@ impl<'a> fmt::Display for ParseItemSetDisplay<'a> { } } -impl Hash for ParseItemSet { +impl<'a> Hash for ParseItem<'a> { + fn hash(&self, hasher: &mut H) { + hasher.write_u32(self.variable_index); + hasher.write_u32(self.step_index); + hasher.write_i32(self.production.dynamic_precedence); + hasher.write_usize(self.production.steps.len()); + hasher.write_i32(self.precedence()); + self.associativity().hash(hasher); + for step in &self.production.steps[0..self.step_index as usize] { + step.alias.hash(hasher); + } + for step in &self.production.steps[self.step_index as usize..] { + step.hash(hasher); + } + } +} + +impl<'a> PartialEq for ParseItem<'a> { + fn eq(&self, other: &Self) -> bool { + if self.variable_index != other.variable_index + || self.step_index != other.step_index + || self.production.dynamic_precedence != other.production.dynamic_precedence + || self.production.steps.len() != other.production.steps.len() + || self.precedence() != other.precedence() + || self.associativity() != other.associativity() + { + return false; + } + + for (i, step) in self.production.steps.iter().enumerate() { + if i < self.step_index as usize { + if step.alias != other.production.steps[i].alias { + return false; + } + } else { + if *step != other.production.steps[i] { + return false; + } + } + } + + return true; + } +} + +impl<'a> PartialOrd for ParseItem<'a> { + fn partial_cmp(&self, other: &Self) -> Option { + if let Some(o) = self.variable_index.partial_cmp(&other.variable_index) { + return Some(o); + } + if let Some(o) = self.step_index.partial_cmp(&other.step_index) { + return Some(o); + } + if let Some(o) = self.production.dynamic_precedence.partial_cmp(&other.production.dynamic_precedence) { + return Some(o); + } + if let Some(o) = self.production.steps.len().partial_cmp(&other.production.steps.len()) { + return Some(o); + } + if let Some(o) = self.precedence().partial_cmp(&other.precedence()) { + return Some(o); + } + if let Some(o) = self.associativity().partial_cmp(&other.associativity()) { + return Some(o); + } + for (i, step) in self.production.steps.iter().enumerate() { + let cmp = if i < self.step_index as usize { + step.alias.partial_cmp(&other.production.steps[i].alias) + } else { + step.partial_cmp(&other.production.steps[i]) + }; + if let Some(o) = cmp { + return Some(o); + } + } + return None; + } +} + +impl<'a> Ord for ParseItem<'a> { + fn cmp(&self, other: &Self) -> Ordering { + self.partial_cmp(other).unwrap_or(Ordering::Equal) + } +} + +impl<'a> Eq for ParseItem<'a> {} + +impl<'a> Hash for ParseItemSet<'a> { fn hash(&self, hasher: &mut H) { hasher.write_usize(self.entries.len()); for (item, lookaheads) in self.entries.iter() { diff --git a/src/build_tables/item_set_builder.rs b/src/build_tables/item_set_builder.rs index 530c1f25..52ee0a45 100644 --- a/src/build_tables/item_set_builder.rs +++ b/src/build_tables/item_set_builder.rs @@ -1,12 +1,11 @@ -use super::inline_variables::InlinedProductionMap; use super::item::{LookaheadSet, ParseItem, ParseItemSet}; -use crate::grammars::{LexicalGrammar, SyntaxGrammar}; +use crate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar}; use crate::rules::Symbol; use std::collections::{HashMap, HashSet}; #[derive(Clone, Debug, PartialEq, Eq)] -struct TransitiveClosureAddition { - item: ParseItem, +struct TransitiveClosureAddition<'a> { + item: ParseItem<'a>, info: FollowSetInfo, } @@ -16,11 +15,10 @@ struct FollowSetInfo { propagates_lookaheads: bool, } -pub(crate) struct ParseItemSetBuilder { +pub(crate) struct ParseItemSetBuilder<'a> { first_sets: HashMap, last_sets: HashMap, - transitive_closure_additions: Vec>, - pub inlines: InlinedProductionMap, + transitive_closure_additions: Vec>>, } fn find_or_push(vector: &mut Vec, value: T) { @@ -29,13 +27,16 @@ fn find_or_push(vector: &mut Vec, value: T) { } } -impl ParseItemSetBuilder { - pub fn new(syntax_grammar: &SyntaxGrammar, lexical_grammar: &LexicalGrammar) -> Self { +impl<'a> ParseItemSetBuilder<'a> { + pub fn new( + syntax_grammar: &'a SyntaxGrammar, + lexical_grammar: &'a LexicalGrammar, + inlines: &'a InlinedProductionMap, + ) -> Self { let mut result = Self { first_sets: HashMap::new(), last_sets: HashMap::new(), transitive_closure_additions: vec![Vec::new(); syntax_grammar.variables.len()], - inlines: InlinedProductionMap::new(syntax_grammar), }; // For each grammar symbol, populate the FIRST and LAST sets: the set of @@ -193,22 +194,28 @@ impl ParseItemSetBuilder { for (variable_index, follow_set_info) in follow_set_info_by_non_terminal { let variable = &syntax_grammar.variables[variable_index]; let non_terminal = Symbol::non_terminal(variable_index); + let variable_index = variable_index as u32; if syntax_grammar.variables_to_inline.contains(&non_terminal) { continue; } - for production_index in 0..variable.productions.len() { - let item = ParseItem::Normal { - variable_index: variable_index as u32, - production_index: production_index as u32, + for (production_index, production) in variable.productions.iter().enumerate() { + let item = ParseItem { + variable_index, + production, step_index: 0, }; - if let Some(inlined_items) = result.inlines.inlined_items(item) { - for inlined_item in inlined_items { + // let step_id = item.as_step_id(syntax_grammar, inlines); + if let Some(inlined_productions) = inlines.inlined_productions(item.production, item.step_index) { + for production in inlined_productions { find_or_push( additions_for_non_terminal, TransitiveClosureAddition { - item: inlined_item, + item: ParseItem { + variable_index, + production, + step_index: item.step_index, + }, info: follow_set_info.clone(), }, ); @@ -231,14 +238,19 @@ impl ParseItemSetBuilder { pub(crate) fn transitive_closure( &mut self, - item_set: &ParseItemSet, - grammar: &SyntaxGrammar, - ) -> ParseItemSet { + item_set: &ParseItemSet<'a>, + grammar: &'a SyntaxGrammar, + inlines: &'a InlinedProductionMap, + ) -> ParseItemSet<'a> { let mut result = ParseItemSet::default(); for (item, lookaheads) in &item_set.entries { - if let Some(items) = self.inlines.inlined_items(*item) { - for item in items { - self.add_item(&mut result, item, lookaheads, grammar); + if let Some(productions) = inlines.inlined_productions(item.production, item.step_index) { + for production in productions { + self.add_item(&mut result, ParseItem { + variable_index: item.variable_index, + production, + step_index: item.step_index, + }, lookaheads, grammar); } } else { self.add_item(&mut result, *item, lookaheads, grammar); @@ -253,14 +265,14 @@ impl ParseItemSetBuilder { fn add_item( &self, - set: &mut ParseItemSet, - item: ParseItem, + set: &mut ParseItemSet<'a>, + item: ParseItem<'a>, lookaheads: &LookaheadSet, grammar: &SyntaxGrammar, ) { - if let Some(step) = item.step(grammar, &self.inlines) { + if let Some(step) = item.step() { if step.symbol.is_non_terminal() { - let next_step = item.successor().step(grammar, &self.inlines); + let next_step = item.successor().step(); // Determine which tokens can follow this non-terminal. let following_tokens = if let Some(next_step) = next_step { diff --git a/src/build_tables/mod.rs b/src/build_tables/mod.rs index 091c5486..27951453 100644 --- a/src/build_tables/mod.rs +++ b/src/build_tables/mod.rs @@ -1,14 +1,14 @@ -mod inline_variables; mod item; mod item_set_builder; use self::item::{LookaheadSet, ParseItem, ParseItemSet}; use self::item_set_builder::ParseItemSetBuilder; use crate::error::{Error, Result}; -use crate::grammars::{LexicalGrammar, SyntaxGrammar, VariableType}; +use crate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar, VariableType}; use crate::rules::{AliasMap, Associativity, Symbol, SymbolType}; -use crate::tables::ParseTableEntry; -use crate::tables::{AliasSequenceId, LexTable, ParseAction, ParseState, ParseStateId, ParseTable}; +use crate::tables::{ + AliasSequenceId, LexTable, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry, +}; use core::ops::Range; use std::collections::hash_map::Entry; use std::collections::{HashMap, HashSet, VecDeque}; @@ -30,12 +30,13 @@ struct ParseStateQueueEntry { } struct ParseTableBuilder<'a> { - item_set_builder: ParseItemSetBuilder, + item_set_builder: ParseItemSetBuilder<'a>, syntax_grammar: &'a SyntaxGrammar, lexical_grammar: &'a LexicalGrammar, + inlines: &'a InlinedProductionMap, simple_aliases: &'a AliasMap, - state_ids_by_item_set: HashMap, - item_sets_by_state_id: Vec, + state_ids_by_item_set: HashMap, ParseStateId>, + item_sets_by_state_id: Vec>, parse_state_queue: VecDeque, parse_table: ParseTable, } @@ -46,16 +47,17 @@ impl<'a> ParseTableBuilder<'a> { self.parse_table.alias_sequences.push(Vec::new()); // Ensure that the error state has index 0. - let error_state_id = self.add_parse_state( - &Vec::new(), - &Vec::new(), - ParseItemSet::default(), - ); + let error_state_id = + self.add_parse_state(&Vec::new(), &Vec::new(), ParseItemSet::default()); self.add_parse_state( &Vec::new(), &Vec::new(), - ParseItemSet::with(&[(ParseItem::start(), LookaheadSet::with(&[Symbol::end()]))]), + ParseItemSet::with( + [(ParseItem::start(), LookaheadSet::with(&[Symbol::end()]))] + .iter() + .cloned(), + ), ); self.process_part_state_queue()?; @@ -68,7 +70,7 @@ impl<'a> ParseTableBuilder<'a> { &mut self, preceding_symbols: &SymbolSequence, preceding_auxiliary_symbols: &AuxiliarySymbolSequence, - item_set: ParseItemSet, + item_set: ParseItemSet<'a>, ) -> ParseStateId { match self.state_ids_by_item_set.entry(item_set) { Entry::Occupied(o) => { @@ -99,16 +101,14 @@ impl<'a> ParseTableBuilder<'a> { println!( "ITEM SET {}:\n{}", entry.state_id, - self.item_sets_by_state_id[entry.state_id].display_with( - &self.syntax_grammar, - &self.lexical_grammar, - &self.item_set_builder.inlines - ) + self.item_sets_by_state_id[entry.state_id] + .display_with(&self.syntax_grammar, &self.lexical_grammar,) ); let item_set = self.item_set_builder.transitive_closure( &self.item_sets_by_state_id[entry.state_id], self.syntax_grammar, + self.inlines, ); // println!("TRANSITIVE CLOSURE:"); @@ -131,7 +131,7 @@ impl<'a> ParseTableBuilder<'a> { &mut self, mut preceding_symbols: SymbolSequence, mut preceding_auxiliary_symbols: Vec, - item_set: ParseItemSet, + item_set: ParseItemSet<'a>, state_id: ParseStateId, ) -> Result<()> { let mut terminal_successors = HashMap::new(); @@ -139,9 +139,7 @@ impl<'a> ParseTableBuilder<'a> { let mut lookaheads_with_conflicts = HashSet::new(); for (item, lookaheads) in &item_set.entries { - if let Some(next_symbol) = - item.symbol(self.syntax_grammar, &self.item_set_builder.inlines) - { + if let Some(next_symbol) = item.symbol() { let successor = item.successor(); if next_symbol.is_non_terminal() { // Keep track of where auxiliary non-terminals (repeat symbols) are @@ -169,17 +167,15 @@ impl<'a> ParseTableBuilder<'a> { .insert_all(lookaheads); } } else { - let action = if item.is_final() { + let action = if item.is_augmented() { ParseAction::Accept } else { - let production = - item.production(&self.syntax_grammar, &self.item_set_builder.inlines); ParseAction::Reduce { - symbol: Symbol::non_terminal(item.variable_index() as usize), - child_count: item.step_index(), - precedence: production.last_precedence(), - associativity: production.last_associativity(), - dynamic_precedence: production.dynamic_precedence, + symbol: Symbol::non_terminal(item.variable_index as usize), + child_count: item.step_index as usize, + precedence: item.precedence(), + associativity: item.associativity(), + dynamic_precedence: item.production.dynamic_precedence, alias_sequence_id: self.get_alias_sequence_id(item), } }; @@ -280,17 +276,15 @@ impl<'a> ParseTableBuilder<'a> { let mut shift_precedence: Option> = None; let mut conflicting_items = HashSet::new(); for (item, lookaheads) in &item_set.entries { - let production = item.production(&self.syntax_grammar, &self.item_set_builder.inlines); - let step_index = item.step_index(); - if let Some(step) = production.steps.get(step_index) { - if step_index > 0 { + if let Some(step) = item.step() { + if item.step_index > 0 { if self .item_set_builder .first_set(&step.symbol) .contains(&conflicting_lookahead) { conflicting_items.insert(item); - let precedence = production.steps[step_index - 1].precedence; + let precedence = item.precedence(); if let Some(range) = &mut shift_precedence { if precedence < range.start { range.start = precedence; @@ -316,11 +310,11 @@ impl<'a> ParseTableBuilder<'a> { // by leaving it in the parse table, but marking the SHIFT action with // an `is_repetition` flag. let conflicting_variable_index = - conflicting_items.iter().next().unwrap().variable_index(); + conflicting_items.iter().next().unwrap().variable_index; if self.syntax_grammar.variables[conflicting_variable_index as usize].is_auxiliary() { if conflicting_items .iter() - .all(|item| item.variable_index() == conflicting_variable_index) + .all(|item| item.variable_index == conflicting_variable_index) { *is_repetition = true; return Ok(()); @@ -340,10 +334,7 @@ impl<'a> ParseTableBuilder<'a> { && shift_precedence.start < reduce_precedence) { entry.actions.pop(); - conflicting_items.retain(|item| { - item.step(&self.syntax_grammar, &self.item_set_builder.inlines) - .is_none() - }); + conflicting_items.retain(|item| item.is_done()); } // If the SHIFT and REDUCE actions have the same predence, consider // the REDUCE actions' associativity. @@ -367,10 +358,7 @@ impl<'a> ParseTableBuilder<'a> { match (has_left, has_non, has_right) { (true, false, false) => { entry.actions.pop(); - conflicting_items.retain(|item| { - item.step(&self.syntax_grammar, &self.item_set_builder.inlines) - .is_none() - }); + conflicting_items.retain(|item| item.is_done()); } (false, false, true) => { entry.actions.drain(0..entry.actions.len() - 1); @@ -392,7 +380,7 @@ impl<'a> ParseTableBuilder<'a> { // Determine the set of parent symbols involved in this conflict. let mut actual_conflict = Vec::new(); for item in &conflicting_items { - let symbol = Symbol::non_terminal(item.variable_index() as usize); + let symbol = Symbol::non_terminal(item.variable_index as usize); if self.syntax_grammar.variables[symbol.index].is_auxiliary() { actual_conflict.extend( preceding_auxiliary_symbols @@ -441,7 +429,7 @@ impl<'a> ParseTableBuilder<'a> { for preceding_symbol in preceding_symbols .iter() - .take(preceding_symbols.len() - item.step_index()) + .take(preceding_symbols.len() - item.step_index as usize) { write!(&mut msg, " {}", self.symbol_name(preceding_symbol)).unwrap(); } @@ -449,17 +437,12 @@ impl<'a> ParseTableBuilder<'a> { write!( &mut msg, " ({}", - &self.syntax_grammar.variables[item.variable_index() as usize].name + &self.syntax_grammar.variables[item.variable_index as usize].name ) .unwrap(); - for (j, step) in item - .production(&self.syntax_grammar, &self.item_set_builder.inlines) - .steps - .iter() - .enumerate() - { - if j == item.step_index() { + for (j, step) in item.production.steps.iter().enumerate() { + if j as u32 == item.step_index { write!(&mut msg, " •").unwrap(); } write!(&mut msg, " {}", self.symbol_name(&step.symbol)).unwrap(); @@ -467,10 +450,7 @@ impl<'a> ParseTableBuilder<'a> { write!(&mut msg, ")").unwrap(); - if item - .step(&self.syntax_grammar, &self.item_set_builder.inlines) - .is_none() - { + if item.is_done() { write!( &mut msg, " • {}", @@ -479,9 +459,8 @@ impl<'a> ParseTableBuilder<'a> { .unwrap(); } - let precedence = item.precedence(&self.syntax_grammar, &self.item_set_builder.inlines); - let associativity = - item.associativity(&self.syntax_grammar, &self.item_set_builder.inlines); + let precedence = item.precedence(); + let associativity = item.associativity(); if precedence != 0 || associativity.is_some() { write!( &mut msg, @@ -506,8 +485,7 @@ impl<'a> ParseTableBuilder<'a> { .entries .keys() .filter_map(|item| { - if item.symbol(&self.syntax_grammar, &self.item_set_builder.inlines) == Some(symbol) - { + if item.symbol() == Some(symbol) { None } else { None @@ -554,8 +532,12 @@ impl<'a> ParseTableBuilder<'a> { } fn get_alias_sequence_id(&mut self, item: &ParseItem) -> AliasSequenceId { - let production = item.production(&self.syntax_grammar, &self.item_set_builder.inlines); - let alias_sequence = production.steps.iter().map(|s| s.alias.clone()).collect(); + let alias_sequence = item + .production + .steps + .iter() + .map(|s| s.alias.clone()) + .collect(); if let Some(index) = self .parse_table .alias_sequences @@ -592,12 +574,14 @@ pub(crate) fn build_tables( syntax_grammar: &SyntaxGrammar, lexical_grammar: &LexicalGrammar, simple_aliases: &AliasMap, + inlines: &InlinedProductionMap, ) -> Result<(ParseTable, LexTable, LexTable, Option)> { ParseTableBuilder { syntax_grammar, lexical_grammar, simple_aliases, - item_set_builder: ParseItemSetBuilder::new(syntax_grammar, lexical_grammar), + inlines, + item_set_builder: ParseItemSetBuilder::new(syntax_grammar, lexical_grammar, inlines), state_ids_by_item_set: HashMap::new(), item_sets_by_state_id: Vec::new(), parse_state_queue: VecDeque::new(), diff --git a/src/generate.rs b/src/generate.rs index dc3d5176..cdbbea4f 100644 --- a/src/generate.rs +++ b/src/generate.rs @@ -6,11 +6,12 @@ use crate::render::render_c_code; pub fn generate_parser_for_grammar(input: &str) -> Result { let input_grammar = parse_grammar(input)?; - let (syntax_grammar, lexical_grammar, simple_aliases) = prepare_grammar(&input_grammar)?; + let (syntax_grammar, lexical_grammar, inlines, simple_aliases) = prepare_grammar(&input_grammar)?; let (parse_table, main_lex_table, keyword_lex_table, keyword_capture_token) = build_tables( &syntax_grammar, &lexical_grammar, - &simple_aliases + &simple_aliases, + &inlines )?; let c_code = render_c_code( &input_grammar.name, diff --git a/src/grammars.rs b/src/grammars.rs index 7512ec03..b751e4e4 100644 --- a/src/grammars.rs +++ b/src/grammars.rs @@ -1,12 +1,13 @@ -use crate::rules::{Associativity, Alias, Rule, Symbol}; use crate::nfa::Nfa; +use crate::rules::{Alias, Associativity, Rule, Symbol}; +use std::collections::HashMap; #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub(crate) enum VariableType { Hidden, Auxiliary, Anonymous, - Named + Named, } // Input grammar @@ -46,12 +47,12 @@ pub(crate) struct LexicalGrammar { // Extracted syntax grammar -#[derive(Clone, Debug, PartialEq, Eq)] +#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] pub(crate) struct ProductionStep { - pub symbol: Symbol, - pub precedence: i32, - pub associativity: Option, - pub alias: Option, + pub symbol: Symbol, + pub precedence: i32, + pub associativity: Option, + pub alias: Option, } #[derive(Clone, Debug, PartialEq, Eq)] @@ -60,6 +61,11 @@ pub(crate) struct Production { pub dynamic_precedence: i32, } +pub(crate) struct InlinedProductionMap { + pub productions: Vec, + pub production_map: HashMap<(*const Production, u32), Vec>, +} + #[derive(Clone, Debug, PartialEq, Eq)] pub(crate) struct SyntaxVariable { pub name: String, @@ -86,7 +92,12 @@ pub(crate) struct SyntaxGrammar { impl ProductionStep { pub(crate) fn new(symbol: Symbol) -> Self { - Self { symbol, precedence: 0, associativity: None, alias: None } + Self { + symbol, + precedence: 0, + associativity: None, + alias: None, + } } pub(crate) fn with_prec(self, precedence: i32, associativity: Option) -> Self { @@ -103,7 +114,10 @@ impl ProductionStep { symbol: self.symbol, precedence: self.precedence, associativity: self.associativity, - alias: Some(Alias { value: value.to_string(), is_named }), + alias: Some(Alias { + value: value.to_string(), + is_named, + }), } } } @@ -124,25 +138,44 @@ impl Production { impl Default for Production { fn default() -> Self { - Production { dynamic_precedence: 0, steps: Vec::new() } + Production { + dynamic_precedence: 0, + steps: Vec::new(), + } } } impl Variable { pub fn named(name: &str, rule: Rule) -> Self { - Self { name: name.to_string(), kind: VariableType::Named, rule } + Self { + name: name.to_string(), + kind: VariableType::Named, + rule, + } } pub fn auxiliary(name: &str, rule: Rule) -> Self { - Self { name: name.to_string(), kind: VariableType::Auxiliary, rule } + Self { + name: name.to_string(), + kind: VariableType::Auxiliary, + rule, + } } pub fn hidden(name: &str, rule: Rule) -> Self { - Self { name: name.to_string(), kind: VariableType::Hidden, rule } + Self { + name: name.to_string(), + kind: VariableType::Hidden, + rule, + } } pub fn anonymous(name: &str, rule: Rule) -> Self { - Self { name: name.to_string(), kind: VariableType::Anonymous, rule } + Self { + name: name.to_string(), + kind: VariableType::Anonymous, + rule, + } } } @@ -151,3 +184,20 @@ impl SyntaxVariable { self.kind == VariableType::Auxiliary } } + +impl InlinedProductionMap { + pub fn inlined_productions<'a>( + &'a self, + production: &Production, + step_index: u32, + ) -> Option + 'a> { + self.production_map + .get(&(production as *const Production, step_index)) + .map(|production_indices| { + production_indices + .iter() + .cloned() + .map(move |index| &self.productions[index]) + }) + } +} diff --git a/src/prepare_grammar/mod.rs b/src/prepare_grammar/mod.rs index 22435fca..f325383b 100644 --- a/src/prepare_grammar/mod.rs +++ b/src/prepare_grammar/mod.rs @@ -4,6 +4,7 @@ mod extract_simple_aliases; mod extract_tokens; mod flatten_grammar; mod intern_symbols; +mod process_inlines; use self::expand_repeats::expand_repeats; use self::expand_tokens::expand_tokens; @@ -11,8 +12,11 @@ use self::extract_simple_aliases::extract_simple_aliases; use self::extract_tokens::extract_tokens; use self::flatten_grammar::flatten_grammar; use self::intern_symbols::intern_symbols; +use self::process_inlines::process_inlines; use crate::error::Result; -use crate::grammars::{ExternalToken, InputGrammar, LexicalGrammar, SyntaxGrammar, Variable}; +use crate::grammars::{ + ExternalToken, InlinedProductionMap, InputGrammar, LexicalGrammar, SyntaxGrammar, Variable, +}; use crate::rules::{AliasMap, Rule, Symbol}; pub(self) struct IntermediateGrammar { @@ -36,12 +40,18 @@ pub(self) struct ExtractedLexicalGrammar { pub(crate) fn prepare_grammar( input_grammar: &InputGrammar, -) -> Result<(SyntaxGrammar, LexicalGrammar, AliasMap)> { +) -> Result<( + SyntaxGrammar, + LexicalGrammar, + InlinedProductionMap, + AliasMap, +)> { let interned_grammar = intern_symbols(input_grammar)?; let (syntax_grammar, lexical_grammar) = extract_tokens(interned_grammar)?; let syntax_grammar = expand_repeats(syntax_grammar); let mut syntax_grammar = flatten_grammar(syntax_grammar)?; let lexical_grammar = expand_tokens(lexical_grammar)?; let simple_aliases = extract_simple_aliases(&mut syntax_grammar, &lexical_grammar); - Ok((syntax_grammar, lexical_grammar, simple_aliases)) + let inlines = process_inlines(&syntax_grammar); + Ok((syntax_grammar, lexical_grammar, inlines, simple_aliases)) } diff --git a/src/prepare_grammar/process_inlines.rs b/src/prepare_grammar/process_inlines.rs new file mode 100644 index 00000000..0d7f6827 --- /dev/null +++ b/src/prepare_grammar/process_inlines.rs @@ -0,0 +1,477 @@ +use crate::grammars::{InlinedProductionMap, Production, ProductionStep, SyntaxGrammar}; +use std::collections::HashMap; + +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +struct ProductionStepId { + variable_index: Option, + production_index: usize, + step_index: usize, +} + +struct InlinedProductionMapBuilder { + production_indices_by_step_id: HashMap>, + productions: Vec, +} + +impl ProductionStepId { + pub fn successor(&self) -> Self { + Self { + variable_index: self.variable_index, + production_index: self.production_index, + step_index: self.step_index + 1, + } + } +} + +fn production_for_id<'a>( + map: &'a InlinedProductionMapBuilder, + id: ProductionStepId, + grammar: &'a SyntaxGrammar, +) -> &'a Production { + if let Some(variable_index) = id.variable_index { + &grammar.variables[variable_index].productions[id.production_index] + } else { + &map.productions[id.production_index] + } +} + +fn production_step_for_id<'a>( + map: &'a InlinedProductionMapBuilder, + id: ProductionStepId, + grammar: &'a SyntaxGrammar, +) -> Option<&'a ProductionStep> { + production_for_id(map, id, grammar).steps.get(id.step_index) +} + +fn inline<'a>( + map: &'a mut InlinedProductionMapBuilder, + step_id: ProductionStepId, + grammar: &'a SyntaxGrammar, +) -> &'a Vec { + let step = production_step_for_id(map, step_id, grammar).unwrap(); + let mut productions_to_add = grammar.variables[step.symbol.index].productions.clone(); + + let mut i = 0; + while i < productions_to_add.len() { + if let Some(first_symbol) = productions_to_add[i].first_symbol() { + if grammar.variables_to_inline.contains(&first_symbol) { + // Remove the production from the vector, replacing it with a placeholder. + let production = productions_to_add + .splice(i..i + 1, [Production::default()].iter().cloned()) + .next() + .unwrap(); + + // Replace the placeholder with the inlined productions. + productions_to_add.splice( + i..i + 1, + grammar.variables[first_symbol.index] + .productions + .iter() + .map(|p| { + let mut p = p.clone(); + p.steps.extend(production.steps[1..].iter().cloned()); + p + }), + ); + continue; + } + } + i += 1; + } + + let result = productions_to_add + .into_iter() + .map(|production_to_add| { + let mut inlined_production = production_for_id(&map, step_id, grammar).clone(); + let removed_step = inlined_production + .steps + .splice( + step_id.step_index..step_id.step_index + 1, + production_to_add.steps.iter().cloned(), + ) + .next() + .unwrap(); + let inserted_steps = &mut inlined_production.steps + [step_id.step_index..step_id.step_index + production_to_add.steps.len()]; + if let Some(alias) = removed_step.alias { + for inserted_step in inserted_steps.iter_mut() { + inserted_step.alias = Some(alias.clone()); + } + } + if let Some(last_inserted_step) = inserted_steps.last_mut() { + last_inserted_step.precedence = removed_step.precedence; + last_inserted_step.associativity = removed_step.associativity; + } + map.productions + .iter() + .position(|p| *p == inlined_production) + .unwrap_or({ + map.productions.push(inlined_production); + map.productions.len() - 1 + }) + }) + .collect(); + + map.production_indices_by_step_id + .entry(step_id) + .or_insert(result) +} + +pub(super) fn process_inlines(grammar: &SyntaxGrammar) -> InlinedProductionMap { + let mut result = InlinedProductionMapBuilder { + productions: Vec::new(), + production_indices_by_step_id: HashMap::new(), + }; + + let mut step_ids_to_process = Vec::new(); + for (variable_index, variable) in grammar.variables.iter().enumerate() { + for production_index in 0..variable.productions.len() { + step_ids_to_process.push(ProductionStepId { + variable_index: Some(variable_index), + production_index, + step_index: 0, + }); + while !step_ids_to_process.is_empty() { + let mut i = 0; + while i < step_ids_to_process.len() { + let step_id = step_ids_to_process[i]; + if let Some(step) = production_step_for_id(&result, step_id, grammar) { + if grammar.variables_to_inline.contains(&step.symbol) { + let inlined_step_ids = inline(&mut result, step_id, grammar) + .into_iter() + .cloned() + .map(|production_index| ProductionStepId { + variable_index: None, + production_index, + step_index: step_id.step_index, + }) + .collect::>(); + step_ids_to_process.splice(i..i + 1, inlined_step_ids); + } else { + step_ids_to_process[i] = step_id.successor(); + i += 1; + } + } else { + step_ids_to_process.remove(i); + } + } + } + } + } + + // result + let productions = result.productions; + let production_indices_by_step_id = result.production_indices_by_step_id; + + let production_map = production_indices_by_step_id + .into_iter() + .map(|(step_id, production_indices)| { + let production = if let Some(variable_index) = step_id.variable_index { + &grammar.variables[variable_index].productions[step_id.production_index] + } else { + &productions[step_id.production_index] + } as *const Production; + ((production, step_id.step_index as u32), production_indices) + }) + .collect(); + + InlinedProductionMap { productions, production_map } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::grammars::{ProductionStep, SyntaxVariable, VariableType}; + use crate::rules::{Associativity, Symbol}; + + #[test] + fn test_basic_inlining() { + let grammar = SyntaxGrammar { + expected_conflicts: Vec::new(), + extra_tokens: Vec::new(), + external_tokens: Vec::new(), + word_token: None, + variables_to_inline: vec![Symbol::non_terminal(1)], + variables: vec![ + SyntaxVariable { + name: "non-terminal-0".to_string(), + kind: VariableType::Named, + productions: vec![Production { + dynamic_precedence: 0, + steps: vec![ + ProductionStep::new(Symbol::terminal(10)), + ProductionStep::new(Symbol::non_terminal(1)), // inlined + ProductionStep::new(Symbol::terminal(11)), + ], + }], + }, + SyntaxVariable { + name: "non-terminal-1".to_string(), + kind: VariableType::Named, + productions: vec![ + Production { + dynamic_precedence: 0, + steps: vec![ + ProductionStep::new(Symbol::terminal(12)), + ProductionStep::new(Symbol::terminal(13)), + ], + }, + Production { + dynamic_precedence: 0, + steps: vec![ProductionStep::new(Symbol::terminal(14))], + }, + ], + }, + ], + }; + let inline_map = process_inlines(&grammar); + + // Nothing to inline at step 0. + assert!(inline_map + .inlined_productions(&grammar.variables[0].productions[0], 0) + .is_none()); + + // Inlining variable 1 yields two productions. + assert_eq!( + inline_map + .inlined_productions(&grammar.variables[0].productions[0], 1) + .unwrap() + .cloned() + .collect::>(), + vec![ + Production { + dynamic_precedence: 0, + steps: vec![ + ProductionStep::new(Symbol::terminal(10)), + ProductionStep::new(Symbol::terminal(12)), + ProductionStep::new(Symbol::terminal(13)), + ProductionStep::new(Symbol::terminal(11)), + ], + }, + Production { + dynamic_precedence: 0, + steps: vec![ + ProductionStep::new(Symbol::terminal(10)), + ProductionStep::new(Symbol::terminal(14)), + ProductionStep::new(Symbol::terminal(11)), + ], + }, + ] + ); + } + + #[test] + fn test_nested_inlining() { + let grammar = SyntaxGrammar { + variables: vec![ + SyntaxVariable { + name: "non-terminal-0".to_string(), + kind: VariableType::Named, + productions: vec![Production { + dynamic_precedence: 0, + steps: vec![ + ProductionStep::new(Symbol::terminal(10)), + ProductionStep::new(Symbol::non_terminal(1)), // inlined + ProductionStep::new(Symbol::terminal(11)), + ProductionStep::new(Symbol::non_terminal(2)), // inlined + ProductionStep::new(Symbol::terminal(12)), + ], + }], + }, + SyntaxVariable { + name: "non-terminal-1".to_string(), + kind: VariableType::Named, + productions: vec![ + Production { + dynamic_precedence: 0, + steps: vec![ProductionStep::new(Symbol::terminal(13))], + }, + Production { + dynamic_precedence: 0, + steps: vec![ + ProductionStep::new(Symbol::non_terminal(3)), // inlined + ProductionStep::new(Symbol::terminal(14)), + ], + }, + ], + }, + SyntaxVariable { + name: "non-terminal-2".to_string(), + kind: VariableType::Named, + productions: vec![Production { + dynamic_precedence: 0, + steps: vec![ProductionStep::new(Symbol::terminal(15))], + }], + }, + SyntaxVariable { + name: "non-terminal-3".to_string(), + kind: VariableType::Named, + productions: vec![Production { + dynamic_precedence: 0, + steps: vec![ProductionStep::new(Symbol::terminal(16))], + }], + }, + ], + variables_to_inline: vec![ + Symbol::non_terminal(1), + Symbol::non_terminal(2), + Symbol::non_terminal(3), + ], + expected_conflicts: Vec::new(), + extra_tokens: Vec::new(), + external_tokens: Vec::new(), + word_token: None, + }; + let inline_map = process_inlines(&grammar); + + let productions: Vec<&Production> = inline_map + .inlined_productions(&grammar.variables[0].productions[0], 1) + .unwrap() + .collect(); + + assert_eq!( + productions.iter().cloned().cloned().collect::>(), + vec![ + Production { + dynamic_precedence: 0, + steps: vec![ + ProductionStep::new(Symbol::terminal(10)), + ProductionStep::new(Symbol::terminal(13)), + ProductionStep::new(Symbol::terminal(11)), + ProductionStep::new(Symbol::non_terminal(2)), + ProductionStep::new(Symbol::terminal(12)), + ], + }, + Production { + dynamic_precedence: 0, + steps: vec![ + ProductionStep::new(Symbol::terminal(10)), + ProductionStep::new(Symbol::terminal(16)), + ProductionStep::new(Symbol::terminal(14)), + ProductionStep::new(Symbol::terminal(11)), + ProductionStep::new(Symbol::non_terminal(2)), + ProductionStep::new(Symbol::terminal(12)), + ], + }, + ] + ); + + assert_eq!( + inline_map + .inlined_productions(productions[0], 3) + .unwrap() + .cloned() + .collect::>(), + vec![Production { + dynamic_precedence: 0, + steps: vec![ + ProductionStep::new(Symbol::terminal(10)), + ProductionStep::new(Symbol::terminal(13)), + ProductionStep::new(Symbol::terminal(11)), + ProductionStep::new(Symbol::terminal(15)), + ProductionStep::new(Symbol::terminal(12)), + ], + },] + ); + } + + #[test] + fn test_inlining_with_precedence_and_alias() { + let grammar = SyntaxGrammar { + variables_to_inline: vec![Symbol::non_terminal(1), Symbol::non_terminal(2)], + variables: vec![ + SyntaxVariable { + name: "non-terminal-0".to_string(), + kind: VariableType::Named, + productions: vec![Production { + dynamic_precedence: 0, + steps: vec![ + // inlined + ProductionStep::new(Symbol::non_terminal(1)) + .with_prec(1, Some(Associativity::Left)), + ProductionStep::new(Symbol::terminal(10)), + // inlined + ProductionStep::new(Symbol::non_terminal(2)) + .with_alias("outer_alias", true), + ], + }], + }, + SyntaxVariable { + name: "non-terminal-1".to_string(), + kind: VariableType::Named, + productions: vec![Production { + dynamic_precedence: 0, + steps: vec![ + ProductionStep::new(Symbol::terminal(11)) + .with_prec(2, None) + .with_alias("inner_alias", true), + ProductionStep::new(Symbol::terminal(12)).with_prec(3, None), + ], + }], + }, + SyntaxVariable { + name: "non-terminal-2".to_string(), + kind: VariableType::Named, + productions: vec![Production { + dynamic_precedence: 0, + steps: vec![ProductionStep::new(Symbol::terminal(13))], + }], + }, + ], + expected_conflicts: Vec::new(), + extra_tokens: Vec::new(), + external_tokens: Vec::new(), + word_token: None, + }; + + let inline_map = process_inlines(&grammar); + + let productions: Vec<_> = inline_map + .inlined_productions(&grammar.variables[0].productions[0], 0) + .unwrap() + .collect(); + + assert_eq!( + productions.iter().cloned().cloned().collect::>(), + vec![Production { + dynamic_precedence: 0, + steps: vec![ + // The first step in the inlined production retains its precedence + // and alias. + ProductionStep::new(Symbol::terminal(11)) + .with_prec(2, None) + .with_alias("inner_alias", true), + // The final step of the inlined production inherits the precedence of + // the inlined step. + ProductionStep::new(Symbol::terminal(12)) + .with_prec(1, Some(Associativity::Left)), + ProductionStep::new(Symbol::terminal(10)), + ProductionStep::new(Symbol::non_terminal(2)) + .with_alias("outer_alias", true), + ] + }], + ); + + assert_eq!( + inline_map + .inlined_productions(productions[0], 3) + .unwrap() + .cloned() + .collect::>(), + vec![Production { + dynamic_precedence: 0, + steps: vec![ + ProductionStep::new(Symbol::terminal(11)) + .with_prec(2, None) + .with_alias("inner_alias", true), + ProductionStep::new(Symbol::terminal(12)) + .with_prec(1, Some(Associativity::Left)), + ProductionStep::new(Symbol::terminal(10)), + // All steps of the inlined production inherit their alias from the + // inlined step. + ProductionStep::new(Symbol::terminal(13)).with_alias("outer_alias", true), + ] + }], + ); + } +} diff --git a/src/rules.rs b/src/rules.rs index 34f4c8b9..3bfd5181 100644 --- a/src/rules.rs +++ b/src/rules.rs @@ -8,13 +8,13 @@ pub(crate) enum SymbolType { End, } -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] pub(crate) enum Associativity { Left, Right, } -#[derive(Clone, Debug, PartialEq, Eq, Hash)] +#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] pub(crate) struct Alias { pub value: String, pub is_named: bool,