Represent ParseItem with reference to Production

Implement comparisons in a way that disregards past steps.
This commit is contained in:
Max Brunsfeld 2018-12-21 15:02:48 -08:00
parent a3dcfa0a52
commit 261a7fd073
9 changed files with 803 additions and 719 deletions

View file

@ -1,441 +0,0 @@
use super::item::ParseItem;
use crate::grammars::{Production, SyntaxGrammar};
use std::collections::HashMap;
pub(crate) struct InlinedProductionMap {
pub inlined_productions: Vec<Production>,
item_map: HashMap<ParseItem, Vec<u32>>,
}
impl InlinedProductionMap {
pub fn new(grammar: &SyntaxGrammar) -> Self {
let mut result = Self {
inlined_productions: Vec::new(),
item_map: HashMap::new(),
};
let mut items_to_process = Vec::new();
for (variable_index, variable) in grammar.variables.iter().enumerate() {
for production_index in 0..variable.productions.len() {
items_to_process.push(ParseItem::Normal {
variable_index: variable_index as u32,
production_index: production_index as u32,
step_index: 0,
});
while !items_to_process.is_empty() {
let mut i = 0;
while i < items_to_process.len() {
let item = &items_to_process[i];
if let Some(step) = item.step(grammar, &result) {
if grammar.variables_to_inline.contains(&step.symbol) {
let inlined_items = result
.inline(*item, grammar)
.into_iter()
.map(|production_index| ParseItem::Inlined {
variable_index: item.variable_index(),
production_index: *production_index,
step_index: item.step_index() as u32,
})
.collect::<Vec<_>>();
items_to_process.splice(i..i + 1, inlined_items);
} else {
items_to_process[i] = item.successor();
i += 1;
}
} else {
items_to_process.remove(i);
}
}
}
}
}
result
}
pub fn inlined_items<'a>(
&'a self,
item: ParseItem,
) -> Option<impl Iterator<Item = ParseItem> + 'a> {
self.item_map.get(&item).map(|production_indices| {
production_indices
.iter()
.cloned()
.map(move |production_index| ParseItem::Inlined {
variable_index: item.variable_index(),
production_index,
step_index: item.step_index() as u32,
})
})
}
fn inline(&mut self, item: ParseItem, grammar: &SyntaxGrammar) -> &Vec<u32> {
let step_index = item.step_index();
let mut productions_to_add = grammar.variables
[item.step(grammar, self).unwrap().symbol.index]
.productions
.clone();
let mut i = 0;
while i < productions_to_add.len() {
if let Some(first_symbol) = productions_to_add[i].first_symbol() {
if grammar.variables_to_inline.contains(&first_symbol) {
// Remove the production from the vector, replacing it with a placeholder.
let production = productions_to_add
.splice(i..i + 1, [Production::default()].iter().cloned())
.next()
.unwrap();
// Replace the placeholder with the inlined productions.
productions_to_add.splice(
i..i + 1,
grammar.variables[first_symbol.index]
.productions
.iter()
.map(|p| {
let mut p = p.clone();
p.steps.extend(production.steps[1..].iter().cloned());
p
}),
);
continue;
}
}
i += 1;
}
let result = productions_to_add
.into_iter()
.map(|production_to_add| {
let mut inlined_production = item.production(grammar, &self).clone();
let removed_step = inlined_production
.steps
.splice(
step_index..step_index + 1,
production_to_add.steps.iter().cloned(),
)
.next()
.unwrap();
let inserted_steps = &mut inlined_production.steps
[step_index..step_index + production_to_add.steps.len()];
if let Some(alias) = removed_step.alias {
for inserted_step in inserted_steps.iter_mut() {
inserted_step.alias = Some(alias.clone());
}
}
if let Some(last_inserted_step) = inserted_steps.last_mut() {
last_inserted_step.precedence = removed_step.precedence;
last_inserted_step.associativity = removed_step.associativity;
}
self.inlined_productions
.iter()
.position(|p| *p == inlined_production)
.unwrap_or({
self.inlined_productions.push(inlined_production);
self.inlined_productions.len() - 1
}) as u32
})
.collect();
self.item_map.entry(item).or_insert(result)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::grammars::{LexicalGrammar, ProductionStep, SyntaxVariable, VariableType};
use crate::rules::{Alias, Associativity, Symbol};
use std::borrow::Borrow;
#[test]
fn test_basic_inlining() {
let grammar = SyntaxGrammar {
expected_conflicts: Vec::new(),
extra_tokens: Vec::new(),
external_tokens: Vec::new(),
word_token: None,
variables_to_inline: vec![Symbol::non_terminal(1)],
variables: vec![
SyntaxVariable {
name: "non-terminal-0".to_string(),
kind: VariableType::Named,
productions: vec![Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::terminal(10)),
ProductionStep::new(Symbol::non_terminal(1)), // inlined
ProductionStep::new(Symbol::terminal(11)),
],
}],
},
SyntaxVariable {
name: "non-terminal-1".to_string(),
kind: VariableType::Named,
productions: vec![
Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::terminal(12)),
ProductionStep::new(Symbol::terminal(13)),
],
},
Production {
dynamic_precedence: 0,
steps: vec![ProductionStep::new(Symbol::terminal(14))],
},
],
},
],
};
let inline_map = InlinedProductionMap::new(&grammar);
// Nothing to inline at step 0.
assert!(inline_map
.inlined_items(ParseItem::Normal {
variable_index: 0,
production_index: 0,
step_index: 0
})
.is_none());
// Inlining variable 1 yields two productions.
assert_eq!(
display_items(
inline_map
.inlined_items(ParseItem::Normal {
variable_index: 0,
production_index: 0,
step_index: 1
})
.unwrap(),
&grammar,
&inline_map
),
vec![
"non-terminal-0 → terminal-10 • terminal-12 terminal-13 terminal-11"
.to_string(),
"non-terminal-0 → terminal-10 • terminal-14 terminal-11".to_string(),
]
);
}
#[test]
fn test_nested_inlining() {
let grammar = SyntaxGrammar {
variables: vec![
SyntaxVariable {
name: "non-terminal-0".to_string(),
kind: VariableType::Named,
productions: vec![Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::terminal(10)),
ProductionStep::new(Symbol::non_terminal(1)), // inlined
ProductionStep::new(Symbol::terminal(11)),
ProductionStep::new(Symbol::non_terminal(2)), // inlined
ProductionStep::new(Symbol::terminal(12)),
],
}],
},
SyntaxVariable {
name: "non-terminal-1".to_string(),
kind: VariableType::Named,
productions: vec![
Production {
dynamic_precedence: 0,
steps: vec![ProductionStep::new(Symbol::terminal(13))],
},
Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::non_terminal(3)), // inlined
ProductionStep::new(Symbol::terminal(14)),
],
},
],
},
SyntaxVariable {
name: "non-terminal-2".to_string(),
kind: VariableType::Named,
productions: vec![Production {
dynamic_precedence: 0,
steps: vec![ProductionStep::new(Symbol::terminal(15))],
}],
},
SyntaxVariable {
name: "non-terminal-3".to_string(),
kind: VariableType::Named,
productions: vec![Production {
dynamic_precedence: 0,
steps: vec![ProductionStep::new(Symbol::terminal(16))],
}],
},
],
variables_to_inline: vec![
Symbol::non_terminal(1),
Symbol::non_terminal(2),
Symbol::non_terminal(3),
],
expected_conflicts: Vec::new(),
extra_tokens: Vec::new(),
external_tokens: Vec::new(),
word_token: None,
};
let inline_map = InlinedProductionMap::new(&grammar);
let items = inline_map
.inlined_items(ParseItem::Normal {
variable_index: 0,
production_index: 0,
step_index: 1,
})
.unwrap()
.collect::<Vec<_>>();
assert_eq!(
display_items(&items, &grammar, &inline_map),
vec![
"non-terminal-0 → terminal-10 • terminal-13 terminal-11 non-terminal-2 terminal-12".to_string(),
"non-terminal-0 → terminal-10 • terminal-16 terminal-14 terminal-11 non-terminal-2 terminal-12".to_string()
]
);
let item = items[0].successor().successor();
assert_eq!(
display_items(&[item], &grammar, &inline_map),
vec![
"non-terminal-0 → terminal-10 terminal-13 terminal-11 • non-terminal-2 terminal-12".to_string(),
]
);
assert_eq!(
display_items(inline_map.inlined_items(item).unwrap(), &grammar, &inline_map),
vec![
"non-terminal-0 → terminal-10 terminal-13 terminal-11 • terminal-15 terminal-12".to_string(),
]
);
}
#[test]
fn test_inlining_with_precedence_and_alias() {
let grammar = SyntaxGrammar {
variables_to_inline: vec![Symbol::non_terminal(1), Symbol::non_terminal(2)],
variables: vec![
SyntaxVariable {
name: "non-terminal-0".to_string(),
kind: VariableType::Named,
productions: vec![Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::non_terminal(1)) // inlined
.with_prec(1, Some(Associativity::Left)),
ProductionStep::new(Symbol::terminal(10)),
ProductionStep::new(Symbol::non_terminal(2)), // inlined
],
}],
},
SyntaxVariable {
name: "non-terminal-1".to_string(),
kind: VariableType::Named,
productions: vec![Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::terminal(11))
.with_prec(2, None)
.with_alias("inner_alias", true),
ProductionStep::new(Symbol::terminal(12)).with_prec(3, None),
],
}],
},
SyntaxVariable {
name: "non-terminal-2".to_string(),
kind: VariableType::Named,
productions: vec![Production {
dynamic_precedence: 0,
steps: vec![ProductionStep::new(Symbol::terminal(13))
.with_alias("outer_alias", true)],
}],
},
],
expected_conflicts: Vec::new(),
extra_tokens: Vec::new(),
external_tokens: Vec::new(),
word_token: None,
};
let inline_map = InlinedProductionMap::new(&grammar);
let items = inline_map
.inlined_items(ParseItem::Normal {
variable_index: 0,
production_index: 0,
step_index: 0,
})
.unwrap()
.collect::<Vec<_>>();
assert_eq!(
display_items(&items, &grammar, &inline_map)[0],
"non-terminal-0 → • terminal-11 terminal-12 terminal-10 non-terminal-2".to_string(),
);
// The first step in the inlined production retains its precedence and alias.
let item = items[0].successor();
assert_eq!(
display_items(&[item], &grammar, &inline_map)[0],
"non-terminal-0 → terminal-11 • terminal-12 terminal-10 non-terminal-2".to_string(),
);
assert_eq!(item.precedence(&grammar, &inline_map), 2);
assert_eq!(
items[0].step(&grammar, &inline_map).unwrap().alias,
Some(Alias {
value: "inner_alias".to_string(),
is_named: true,
})
);
// The final terminal of the inlined production inherits the precedence of
// the inlined step.
let item = item.successor();
assert_eq!(
display_items(&[item], &grammar, &inline_map)[0],
"non-terminal-0 → terminal-11 terminal-12 • terminal-10 non-terminal-2".to_string(),
);
assert_eq!(item.precedence(&grammar, &inline_map), 1);
let item = item.successor();
assert_eq!(
display_items(&[item], &grammar, &inline_map)[0],
"non-terminal-0 → terminal-11 terminal-12 terminal-10 • non-terminal-2".to_string(),
);
// All steps of the inlined production inherit their alias from the
// inlined step.
let items = inline_map.inlined_items(item).unwrap().collect::<Vec<_>>();
assert_eq!(
display_items(&items, &grammar, &inline_map)[0],
"non-terminal-0 → terminal-11 terminal-12 terminal-10 • terminal-13".to_string(),
);
assert_eq!(
items[0].step(&grammar, &inline_map).unwrap().alias,
Some(Alias {
value: "outer_alias".to_string(),
is_named: true,
})
)
}
fn display_items(
items: impl IntoIterator<Item = impl Borrow<ParseItem>>,
grammar: &SyntaxGrammar,
inline_map: &InlinedProductionMap,
) -> Vec<String> {
let lex = LexicalGrammar::default();
items
.into_iter()
.map(|item| format!("{}", item.borrow().display_with(grammar, &lex, inline_map)))
.collect()
}
}

View file

@ -1,10 +1,12 @@
use super::inline_variables::InlinedProductionMap;
use crate::grammars::{LexicalGrammar, Production, ProductionStep, SyntaxGrammar};
use crate::rules::{Associativity, Symbol, SymbolType};
use crate::rules::Associativity;
use crate::rules::{Symbol, SymbolType};
use smallbitvec::SmallBitVec;
use std::collections::{HashMap, BTreeMap};
use std::fmt;
use std::hash::{Hash, Hasher};
use std::u32;
use std::cmp::Ordering;
lazy_static! {
static ref START_PRODUCTION: Production = Production {
@ -28,49 +30,26 @@ pub(crate) struct LookaheadSet {
eof: bool,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub(crate) enum ParseItem {
Start {
step_index: u32,
},
Normal {
variable_index: u32,
production_index: u32,
step_index: u32,
},
Inlined {
variable_index: u32,
production_index: u32,
step_index: u32,
},
#[derive(Clone, Copy, Debug)]
pub(crate) struct ParseItem<'a> {
pub variable_index: u32,
pub step_index: u32,
pub production: &'a Production,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub(crate) struct ParseItemSet {
pub entries: BTreeMap<ParseItem, LookaheadSet>,
pub(crate) struct ParseItemSet<'a> {
pub entries: BTreeMap<ParseItem<'a>, LookaheadSet>,
}
pub(crate) struct ParseItemDisplay<'a>(
&'a ParseItem,
&'a SyntaxGrammar,
&'a LexicalGrammar,
&'a InlinedProductionMap,
);
pub(crate) struct ParseItemDisplay<'a>(&'a ParseItem<'a>, &'a SyntaxGrammar, &'a LexicalGrammar);
pub(crate) struct LookaheadSetDisplay<'a>(&'a LookaheadSet, &'a SyntaxGrammar, &'a LexicalGrammar);
pub(crate) struct ParseItemSetDisplay<'a>(
&'a ParseItemSet,
&'a ParseItemSet<'a>,
&'a SyntaxGrammar,
&'a LexicalGrammar,
&'a InlinedProductionMap,
);
struct ParseItemSetMapEntry(ParseItemSet, u64);
pub(crate) struct ParseItemSetMap<T> {
map: HashMap<ParseItemSetMapEntry, T>
}
impl LookaheadSet {
pub fn new() -> Self {
Self {
@ -173,152 +152,79 @@ impl LookaheadSet {
}
}
impl ParseItem {
impl<'a> ParseItem<'a> {
pub fn start() -> Self {
ParseItem::Start { step_index: 0 }
}
pub fn is_kernel(&self) -> bool {
match self {
ParseItem::Start { .. } => true,
ParseItem::Normal { step_index, .. } | ParseItem::Inlined { step_index, .. } => {
*step_index > 0
}
ParseItem {
variable_index: u32::MAX,
production: &START_PRODUCTION,
step_index: 0,
}
}
pub fn production<'a>(
&self,
grammar: &'a SyntaxGrammar,
inlined_productions: &'a InlinedProductionMap,
) -> &'a Production {
match self {
ParseItem::Start { .. } => &START_PRODUCTION,
ParseItem::Normal {
variable_index,
production_index,
..
} => {
&grammar.variables[*variable_index as usize].productions[*production_index as usize]
}
ParseItem::Inlined {
production_index, ..
} => &inlined_productions.inlined_productions[*production_index as usize],
pub fn step(&self) -> Option<&'a ProductionStep> {
self.production.steps.get(self.step_index as usize)
}
pub fn symbol(&self) -> Option<Symbol> {
self.step().map(|step| step.symbol)
}
pub fn associativity(&self) -> Option<Associativity> {
self.prev_step().and_then(|step| step.associativity)
}
pub fn precedence(&self) -> i32 {
self.prev_step().map_or(0, |step| step.precedence)
}
pub fn prev_step(&self) -> Option<&'a ProductionStep> {
self.production.steps.get(self.step_index as usize - 1)
}
pub fn is_done(&self) -> bool {
self.step_index as usize == self.production.steps.len()
}
pub fn is_augmented(&self) -> bool {
self.variable_index == u32::MAX
}
pub fn successor(&self) -> ParseItem<'a> {
ParseItem {
variable_index: self.variable_index,
production: self.production,
step_index: self.step_index + 1,
}
}
pub fn symbol(
&self,
grammar: &SyntaxGrammar,
inlined_productions: &InlinedProductionMap,
) -> Option<Symbol> {
self.step(grammar, inlined_productions).map(|s| s.symbol)
}
pub fn step<'a>(
&self,
grammar: &'a SyntaxGrammar,
inlined_productions: &'a InlinedProductionMap,
) -> Option<&'a ProductionStep> {
self.production(grammar, inlined_productions)
.steps
.get(self.step_index())
}
pub fn precedence<'a>(
&self,
grammar: &'a SyntaxGrammar,
inlines: &'a InlinedProductionMap,
) -> i32 {
self.production(grammar, inlines)
.steps
.get(self.step_index() - 1)
.map(|s| s.precedence)
.unwrap_or(0)
}
pub fn associativity<'a>(
&self,
grammar: &'a SyntaxGrammar,
inlines: &'a InlinedProductionMap,
) -> Option<Associativity> {
let production = self.production(grammar, inlines);
let step_index = self.step_index();
if step_index == production.steps.len() {
production.steps.last().and_then(|s| s.associativity)
} else {
None
}
}
pub fn variable_index(&self) -> u32 {
match self {
ParseItem::Start { .. } => panic!("Start item doesn't have a variable index"),
ParseItem::Normal { variable_index, .. }
| ParseItem::Inlined { variable_index, .. } => *variable_index,
}
}
pub fn step_index(&self) -> usize {
match self {
ParseItem::Start { step_index }
| ParseItem::Normal { step_index, .. }
| ParseItem::Inlined { step_index, .. } => *step_index as usize,
}
}
pub fn is_final(&self) -> bool {
if let ParseItem::Start { step_index: 1 } = self {
true
} else {
false
}
}
fn step_index_mut(&mut self) -> &mut u32 {
match self {
ParseItem::Start { step_index }
| ParseItem::Normal { step_index, .. }
| ParseItem::Inlined { step_index, .. } => step_index,
}
}
pub fn display_with<'a>(
pub fn display_with(
&'a self,
syntax_grammar: &'a SyntaxGrammar,
lexical_grammar: &'a LexicalGrammar,
inlines: &'a InlinedProductionMap,
) -> ParseItemDisplay<'a> {
ParseItemDisplay(self, syntax_grammar, lexical_grammar, inlines)
}
pub fn successor(&self) -> ParseItem {
let mut result = self.clone();
*result.step_index_mut() += 1;
result
ParseItemDisplay(self, syntax_grammar, lexical_grammar)
}
}
impl ParseItemSet {
pub fn with<'a>(elements: impl IntoIterator<Item = &'a (ParseItem, LookaheadSet)>) -> Self {
impl<'a> ParseItemSet<'a> {
pub fn with(elements: impl IntoIterator<Item = (ParseItem<'a>, LookaheadSet)>) -> Self {
let mut result = Self::default();
for (item, lookaheads) in elements {
result.entries.insert(*item, lookaheads.clone());
result.entries.insert(item, lookaheads);
}
result
}
pub fn display_with<'a>(
pub fn display_with(
&'a self,
syntax_grammar: &'a SyntaxGrammar,
lexical_grammar: &'a LexicalGrammar,
inlines: &'a InlinedProductionMap,
) -> ParseItemSetDisplay<'a> {
ParseItemSetDisplay(self, syntax_grammar, lexical_grammar, inlines)
ParseItemSetDisplay(self, syntax_grammar, lexical_grammar)
}
}
impl Default for ParseItemSet {
impl<'a> Default for ParseItemSet<'a> {
fn default() -> Self {
Self {
entries: BTreeMap::new(),
@ -328,20 +234,18 @@ impl Default for ParseItemSet {
impl<'a> fmt::Display for ParseItemDisplay<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
if let ParseItem::Start { .. } = &self.0 {
if self.0.is_augmented() {
write!(f, "START →")?;
} else {
write!(
f,
"{} →",
&self.1.variables[self.0.variable_index() as usize].name
&self.1.variables[self.0.variable_index as usize].name
)?;
}
let step_index = self.0.step_index();
let production = self.0.production(self.1, self.3);
for (i, step) in production.steps.iter().enumerate() {
if i == step_index {
for (i, step) in self.0.production.steps.iter().enumerate() {
if i == self.0.step_index as usize {
write!(f, "")?;
}
@ -359,7 +263,7 @@ impl<'a> fmt::Display for ParseItemDisplay<'a> {
}
}
if production.steps.len() == step_index {
if self.0.is_done() {
write!(f, "")?;
}
@ -398,7 +302,7 @@ impl<'a> fmt::Display for ParseItemSetDisplay<'a> {
writeln!(
f,
"{}\t{}",
item.display_with(self.1, self.2, self.3),
item.display_with(self.1, self.2),
lookaheads.display_with(self.1, self.2)
)?;
}
@ -406,7 +310,94 @@ impl<'a> fmt::Display for ParseItemSetDisplay<'a> {
}
}
impl Hash for ParseItemSet {
impl<'a> Hash for ParseItem<'a> {
fn hash<H: Hasher>(&self, hasher: &mut H) {
hasher.write_u32(self.variable_index);
hasher.write_u32(self.step_index);
hasher.write_i32(self.production.dynamic_precedence);
hasher.write_usize(self.production.steps.len());
hasher.write_i32(self.precedence());
self.associativity().hash(hasher);
for step in &self.production.steps[0..self.step_index as usize] {
step.alias.hash(hasher);
}
for step in &self.production.steps[self.step_index as usize..] {
step.hash(hasher);
}
}
}
impl<'a> PartialEq for ParseItem<'a> {
fn eq(&self, other: &Self) -> bool {
if self.variable_index != other.variable_index
|| self.step_index != other.step_index
|| self.production.dynamic_precedence != other.production.dynamic_precedence
|| self.production.steps.len() != other.production.steps.len()
|| self.precedence() != other.precedence()
|| self.associativity() != other.associativity()
{
return false;
}
for (i, step) in self.production.steps.iter().enumerate() {
if i < self.step_index as usize {
if step.alias != other.production.steps[i].alias {
return false;
}
} else {
if *step != other.production.steps[i] {
return false;
}
}
}
return true;
}
}
impl<'a> PartialOrd for ParseItem<'a> {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
if let Some(o) = self.variable_index.partial_cmp(&other.variable_index) {
return Some(o);
}
if let Some(o) = self.step_index.partial_cmp(&other.step_index) {
return Some(o);
}
if let Some(o) = self.production.dynamic_precedence.partial_cmp(&other.production.dynamic_precedence) {
return Some(o);
}
if let Some(o) = self.production.steps.len().partial_cmp(&other.production.steps.len()) {
return Some(o);
}
if let Some(o) = self.precedence().partial_cmp(&other.precedence()) {
return Some(o);
}
if let Some(o) = self.associativity().partial_cmp(&other.associativity()) {
return Some(o);
}
for (i, step) in self.production.steps.iter().enumerate() {
let cmp = if i < self.step_index as usize {
step.alias.partial_cmp(&other.production.steps[i].alias)
} else {
step.partial_cmp(&other.production.steps[i])
};
if let Some(o) = cmp {
return Some(o);
}
}
return None;
}
}
impl<'a> Ord for ParseItem<'a> {
fn cmp(&self, other: &Self) -> Ordering {
self.partial_cmp(other).unwrap_or(Ordering::Equal)
}
}
impl<'a> Eq for ParseItem<'a> {}
impl<'a> Hash for ParseItemSet<'a> {
fn hash<H: Hasher>(&self, hasher: &mut H) {
hasher.write_usize(self.entries.len());
for (item, lookaheads) in self.entries.iter() {

View file

@ -1,12 +1,11 @@
use super::inline_variables::InlinedProductionMap;
use super::item::{LookaheadSet, ParseItem, ParseItemSet};
use crate::grammars::{LexicalGrammar, SyntaxGrammar};
use crate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar};
use crate::rules::Symbol;
use std::collections::{HashMap, HashSet};
#[derive(Clone, Debug, PartialEq, Eq)]
struct TransitiveClosureAddition {
item: ParseItem,
struct TransitiveClosureAddition<'a> {
item: ParseItem<'a>,
info: FollowSetInfo,
}
@ -16,11 +15,10 @@ struct FollowSetInfo {
propagates_lookaheads: bool,
}
pub(crate) struct ParseItemSetBuilder {
pub(crate) struct ParseItemSetBuilder<'a> {
first_sets: HashMap<Symbol, LookaheadSet>,
last_sets: HashMap<Symbol, LookaheadSet>,
transitive_closure_additions: Vec<Vec<TransitiveClosureAddition>>,
pub inlines: InlinedProductionMap,
transitive_closure_additions: Vec<Vec<TransitiveClosureAddition<'a>>>,
}
fn find_or_push<T: Eq>(vector: &mut Vec<T>, value: T) {
@ -29,13 +27,16 @@ fn find_or_push<T: Eq>(vector: &mut Vec<T>, value: T) {
}
}
impl ParseItemSetBuilder {
pub fn new(syntax_grammar: &SyntaxGrammar, lexical_grammar: &LexicalGrammar) -> Self {
impl<'a> ParseItemSetBuilder<'a> {
pub fn new(
syntax_grammar: &'a SyntaxGrammar,
lexical_grammar: &'a LexicalGrammar,
inlines: &'a InlinedProductionMap,
) -> Self {
let mut result = Self {
first_sets: HashMap::new(),
last_sets: HashMap::new(),
transitive_closure_additions: vec![Vec::new(); syntax_grammar.variables.len()],
inlines: InlinedProductionMap::new(syntax_grammar),
};
// For each grammar symbol, populate the FIRST and LAST sets: the set of
@ -193,22 +194,28 @@ impl ParseItemSetBuilder {
for (variable_index, follow_set_info) in follow_set_info_by_non_terminal {
let variable = &syntax_grammar.variables[variable_index];
let non_terminal = Symbol::non_terminal(variable_index);
let variable_index = variable_index as u32;
if syntax_grammar.variables_to_inline.contains(&non_terminal) {
continue;
}
for production_index in 0..variable.productions.len() {
let item = ParseItem::Normal {
variable_index: variable_index as u32,
production_index: production_index as u32,
for (production_index, production) in variable.productions.iter().enumerate() {
let item = ParseItem {
variable_index,
production,
step_index: 0,
};
if let Some(inlined_items) = result.inlines.inlined_items(item) {
for inlined_item in inlined_items {
// let step_id = item.as_step_id(syntax_grammar, inlines);
if let Some(inlined_productions) = inlines.inlined_productions(item.production, item.step_index) {
for production in inlined_productions {
find_or_push(
additions_for_non_terminal,
TransitiveClosureAddition {
item: inlined_item,
item: ParseItem {
variable_index,
production,
step_index: item.step_index,
},
info: follow_set_info.clone(),
},
);
@ -231,14 +238,19 @@ impl ParseItemSetBuilder {
pub(crate) fn transitive_closure(
&mut self,
item_set: &ParseItemSet,
grammar: &SyntaxGrammar,
) -> ParseItemSet {
item_set: &ParseItemSet<'a>,
grammar: &'a SyntaxGrammar,
inlines: &'a InlinedProductionMap,
) -> ParseItemSet<'a> {
let mut result = ParseItemSet::default();
for (item, lookaheads) in &item_set.entries {
if let Some(items) = self.inlines.inlined_items(*item) {
for item in items {
self.add_item(&mut result, item, lookaheads, grammar);
if let Some(productions) = inlines.inlined_productions(item.production, item.step_index) {
for production in productions {
self.add_item(&mut result, ParseItem {
variable_index: item.variable_index,
production,
step_index: item.step_index,
}, lookaheads, grammar);
}
} else {
self.add_item(&mut result, *item, lookaheads, grammar);
@ -253,14 +265,14 @@ impl ParseItemSetBuilder {
fn add_item(
&self,
set: &mut ParseItemSet,
item: ParseItem,
set: &mut ParseItemSet<'a>,
item: ParseItem<'a>,
lookaheads: &LookaheadSet,
grammar: &SyntaxGrammar,
) {
if let Some(step) = item.step(grammar, &self.inlines) {
if let Some(step) = item.step() {
if step.symbol.is_non_terminal() {
let next_step = item.successor().step(grammar, &self.inlines);
let next_step = item.successor().step();
// Determine which tokens can follow this non-terminal.
let following_tokens = if let Some(next_step) = next_step {

View file

@ -1,14 +1,14 @@
mod inline_variables;
mod item;
mod item_set_builder;
use self::item::{LookaheadSet, ParseItem, ParseItemSet};
use self::item_set_builder::ParseItemSetBuilder;
use crate::error::{Error, Result};
use crate::grammars::{LexicalGrammar, SyntaxGrammar, VariableType};
use crate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar, VariableType};
use crate::rules::{AliasMap, Associativity, Symbol, SymbolType};
use crate::tables::ParseTableEntry;
use crate::tables::{AliasSequenceId, LexTable, ParseAction, ParseState, ParseStateId, ParseTable};
use crate::tables::{
AliasSequenceId, LexTable, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry,
};
use core::ops::Range;
use std::collections::hash_map::Entry;
use std::collections::{HashMap, HashSet, VecDeque};
@ -30,12 +30,13 @@ struct ParseStateQueueEntry {
}
struct ParseTableBuilder<'a> {
item_set_builder: ParseItemSetBuilder,
item_set_builder: ParseItemSetBuilder<'a>,
syntax_grammar: &'a SyntaxGrammar,
lexical_grammar: &'a LexicalGrammar,
inlines: &'a InlinedProductionMap,
simple_aliases: &'a AliasMap,
state_ids_by_item_set: HashMap<ParseItemSet, ParseStateId>,
item_sets_by_state_id: Vec<ParseItemSet>,
state_ids_by_item_set: HashMap<ParseItemSet<'a>, ParseStateId>,
item_sets_by_state_id: Vec<ParseItemSet<'a>>,
parse_state_queue: VecDeque<ParseStateQueueEntry>,
parse_table: ParseTable,
}
@ -46,16 +47,17 @@ impl<'a> ParseTableBuilder<'a> {
self.parse_table.alias_sequences.push(Vec::new());
// Ensure that the error state has index 0.
let error_state_id = self.add_parse_state(
&Vec::new(),
&Vec::new(),
ParseItemSet::default(),
);
let error_state_id =
self.add_parse_state(&Vec::new(), &Vec::new(), ParseItemSet::default());
self.add_parse_state(
&Vec::new(),
&Vec::new(),
ParseItemSet::with(&[(ParseItem::start(), LookaheadSet::with(&[Symbol::end()]))]),
ParseItemSet::with(
[(ParseItem::start(), LookaheadSet::with(&[Symbol::end()]))]
.iter()
.cloned(),
),
);
self.process_part_state_queue()?;
@ -68,7 +70,7 @@ impl<'a> ParseTableBuilder<'a> {
&mut self,
preceding_symbols: &SymbolSequence,
preceding_auxiliary_symbols: &AuxiliarySymbolSequence,
item_set: ParseItemSet,
item_set: ParseItemSet<'a>,
) -> ParseStateId {
match self.state_ids_by_item_set.entry(item_set) {
Entry::Occupied(o) => {
@ -99,16 +101,14 @@ impl<'a> ParseTableBuilder<'a> {
println!(
"ITEM SET {}:\n{}",
entry.state_id,
self.item_sets_by_state_id[entry.state_id].display_with(
&self.syntax_grammar,
&self.lexical_grammar,
&self.item_set_builder.inlines
)
self.item_sets_by_state_id[entry.state_id]
.display_with(&self.syntax_grammar, &self.lexical_grammar,)
);
let item_set = self.item_set_builder.transitive_closure(
&self.item_sets_by_state_id[entry.state_id],
self.syntax_grammar,
self.inlines,
);
// println!("TRANSITIVE CLOSURE:");
@ -131,7 +131,7 @@ impl<'a> ParseTableBuilder<'a> {
&mut self,
mut preceding_symbols: SymbolSequence,
mut preceding_auxiliary_symbols: Vec<AuxiliarySymbolInfo>,
item_set: ParseItemSet,
item_set: ParseItemSet<'a>,
state_id: ParseStateId,
) -> Result<()> {
let mut terminal_successors = HashMap::new();
@ -139,9 +139,7 @@ impl<'a> ParseTableBuilder<'a> {
let mut lookaheads_with_conflicts = HashSet::new();
for (item, lookaheads) in &item_set.entries {
if let Some(next_symbol) =
item.symbol(self.syntax_grammar, &self.item_set_builder.inlines)
{
if let Some(next_symbol) = item.symbol() {
let successor = item.successor();
if next_symbol.is_non_terminal() {
// Keep track of where auxiliary non-terminals (repeat symbols) are
@ -169,17 +167,15 @@ impl<'a> ParseTableBuilder<'a> {
.insert_all(lookaheads);
}
} else {
let action = if item.is_final() {
let action = if item.is_augmented() {
ParseAction::Accept
} else {
let production =
item.production(&self.syntax_grammar, &self.item_set_builder.inlines);
ParseAction::Reduce {
symbol: Symbol::non_terminal(item.variable_index() as usize),
child_count: item.step_index(),
precedence: production.last_precedence(),
associativity: production.last_associativity(),
dynamic_precedence: production.dynamic_precedence,
symbol: Symbol::non_terminal(item.variable_index as usize),
child_count: item.step_index as usize,
precedence: item.precedence(),
associativity: item.associativity(),
dynamic_precedence: item.production.dynamic_precedence,
alias_sequence_id: self.get_alias_sequence_id(item),
}
};
@ -280,17 +276,15 @@ impl<'a> ParseTableBuilder<'a> {
let mut shift_precedence: Option<Range<i32>> = None;
let mut conflicting_items = HashSet::new();
for (item, lookaheads) in &item_set.entries {
let production = item.production(&self.syntax_grammar, &self.item_set_builder.inlines);
let step_index = item.step_index();
if let Some(step) = production.steps.get(step_index) {
if step_index > 0 {
if let Some(step) = item.step() {
if item.step_index > 0 {
if self
.item_set_builder
.first_set(&step.symbol)
.contains(&conflicting_lookahead)
{
conflicting_items.insert(item);
let precedence = production.steps[step_index - 1].precedence;
let precedence = item.precedence();
if let Some(range) = &mut shift_precedence {
if precedence < range.start {
range.start = precedence;
@ -316,11 +310,11 @@ impl<'a> ParseTableBuilder<'a> {
// by leaving it in the parse table, but marking the SHIFT action with
// an `is_repetition` flag.
let conflicting_variable_index =
conflicting_items.iter().next().unwrap().variable_index();
conflicting_items.iter().next().unwrap().variable_index;
if self.syntax_grammar.variables[conflicting_variable_index as usize].is_auxiliary() {
if conflicting_items
.iter()
.all(|item| item.variable_index() == conflicting_variable_index)
.all(|item| item.variable_index == conflicting_variable_index)
{
*is_repetition = true;
return Ok(());
@ -340,10 +334,7 @@ impl<'a> ParseTableBuilder<'a> {
&& shift_precedence.start < reduce_precedence)
{
entry.actions.pop();
conflicting_items.retain(|item| {
item.step(&self.syntax_grammar, &self.item_set_builder.inlines)
.is_none()
});
conflicting_items.retain(|item| item.is_done());
}
// If the SHIFT and REDUCE actions have the same predence, consider
// the REDUCE actions' associativity.
@ -367,10 +358,7 @@ impl<'a> ParseTableBuilder<'a> {
match (has_left, has_non, has_right) {
(true, false, false) => {
entry.actions.pop();
conflicting_items.retain(|item| {
item.step(&self.syntax_grammar, &self.item_set_builder.inlines)
.is_none()
});
conflicting_items.retain(|item| item.is_done());
}
(false, false, true) => {
entry.actions.drain(0..entry.actions.len() - 1);
@ -392,7 +380,7 @@ impl<'a> ParseTableBuilder<'a> {
// Determine the set of parent symbols involved in this conflict.
let mut actual_conflict = Vec::new();
for item in &conflicting_items {
let symbol = Symbol::non_terminal(item.variable_index() as usize);
let symbol = Symbol::non_terminal(item.variable_index as usize);
if self.syntax_grammar.variables[symbol.index].is_auxiliary() {
actual_conflict.extend(
preceding_auxiliary_symbols
@ -441,7 +429,7 @@ impl<'a> ParseTableBuilder<'a> {
for preceding_symbol in preceding_symbols
.iter()
.take(preceding_symbols.len() - item.step_index())
.take(preceding_symbols.len() - item.step_index as usize)
{
write!(&mut msg, " {}", self.symbol_name(preceding_symbol)).unwrap();
}
@ -449,17 +437,12 @@ impl<'a> ParseTableBuilder<'a> {
write!(
&mut msg,
" ({}",
&self.syntax_grammar.variables[item.variable_index() as usize].name
&self.syntax_grammar.variables[item.variable_index as usize].name
)
.unwrap();
for (j, step) in item
.production(&self.syntax_grammar, &self.item_set_builder.inlines)
.steps
.iter()
.enumerate()
{
if j == item.step_index() {
for (j, step) in item.production.steps.iter().enumerate() {
if j as u32 == item.step_index {
write!(&mut msg, "").unwrap();
}
write!(&mut msg, " {}", self.symbol_name(&step.symbol)).unwrap();
@ -467,10 +450,7 @@ impl<'a> ParseTableBuilder<'a> {
write!(&mut msg, ")").unwrap();
if item
.step(&self.syntax_grammar, &self.item_set_builder.inlines)
.is_none()
{
if item.is_done() {
write!(
&mut msg,
" • {}",
@ -479,9 +459,8 @@ impl<'a> ParseTableBuilder<'a> {
.unwrap();
}
let precedence = item.precedence(&self.syntax_grammar, &self.item_set_builder.inlines);
let associativity =
item.associativity(&self.syntax_grammar, &self.item_set_builder.inlines);
let precedence = item.precedence();
let associativity = item.associativity();
if precedence != 0 || associativity.is_some() {
write!(
&mut msg,
@ -506,8 +485,7 @@ impl<'a> ParseTableBuilder<'a> {
.entries
.keys()
.filter_map(|item| {
if item.symbol(&self.syntax_grammar, &self.item_set_builder.inlines) == Some(symbol)
{
if item.symbol() == Some(symbol) {
None
} else {
None
@ -554,8 +532,12 @@ impl<'a> ParseTableBuilder<'a> {
}
fn get_alias_sequence_id(&mut self, item: &ParseItem) -> AliasSequenceId {
let production = item.production(&self.syntax_grammar, &self.item_set_builder.inlines);
let alias_sequence = production.steps.iter().map(|s| s.alias.clone()).collect();
let alias_sequence = item
.production
.steps
.iter()
.map(|s| s.alias.clone())
.collect();
if let Some(index) = self
.parse_table
.alias_sequences
@ -592,12 +574,14 @@ pub(crate) fn build_tables(
syntax_grammar: &SyntaxGrammar,
lexical_grammar: &LexicalGrammar,
simple_aliases: &AliasMap,
inlines: &InlinedProductionMap,
) -> Result<(ParseTable, LexTable, LexTable, Option<Symbol>)> {
ParseTableBuilder {
syntax_grammar,
lexical_grammar,
simple_aliases,
item_set_builder: ParseItemSetBuilder::new(syntax_grammar, lexical_grammar),
inlines,
item_set_builder: ParseItemSetBuilder::new(syntax_grammar, lexical_grammar, inlines),
state_ids_by_item_set: HashMap::new(),
item_sets_by_state_id: Vec::new(),
parse_state_queue: VecDeque::new(),

View file

@ -6,11 +6,12 @@ use crate::render::render_c_code;
pub fn generate_parser_for_grammar(input: &str) -> Result<String> {
let input_grammar = parse_grammar(input)?;
let (syntax_grammar, lexical_grammar, simple_aliases) = prepare_grammar(&input_grammar)?;
let (syntax_grammar, lexical_grammar, inlines, simple_aliases) = prepare_grammar(&input_grammar)?;
let (parse_table, main_lex_table, keyword_lex_table, keyword_capture_token) = build_tables(
&syntax_grammar,
&lexical_grammar,
&simple_aliases
&simple_aliases,
&inlines
)?;
let c_code = render_c_code(
&input_grammar.name,

View file

@ -1,12 +1,13 @@
use crate::rules::{Associativity, Alias, Rule, Symbol};
use crate::nfa::Nfa;
use crate::rules::{Alias, Associativity, Rule, Symbol};
use std::collections::HashMap;
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub(crate) enum VariableType {
Hidden,
Auxiliary,
Anonymous,
Named
Named,
}
// Input grammar
@ -46,12 +47,12 @@ pub(crate) struct LexicalGrammar {
// Extracted syntax grammar
#[derive(Clone, Debug, PartialEq, Eq)]
#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub(crate) struct ProductionStep {
pub symbol: Symbol,
pub precedence: i32,
pub associativity: Option<Associativity>,
pub alias: Option<Alias>,
pub symbol: Symbol,
pub precedence: i32,
pub associativity: Option<Associativity>,
pub alias: Option<Alias>,
}
#[derive(Clone, Debug, PartialEq, Eq)]
@ -60,6 +61,11 @@ pub(crate) struct Production {
pub dynamic_precedence: i32,
}
pub(crate) struct InlinedProductionMap {
pub productions: Vec<Production>,
pub production_map: HashMap<(*const Production, u32), Vec<usize>>,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub(crate) struct SyntaxVariable {
pub name: String,
@ -86,7 +92,12 @@ pub(crate) struct SyntaxGrammar {
impl ProductionStep {
pub(crate) fn new(symbol: Symbol) -> Self {
Self { symbol, precedence: 0, associativity: None, alias: None }
Self {
symbol,
precedence: 0,
associativity: None,
alias: None,
}
}
pub(crate) fn with_prec(self, precedence: i32, associativity: Option<Associativity>) -> Self {
@ -103,7 +114,10 @@ impl ProductionStep {
symbol: self.symbol,
precedence: self.precedence,
associativity: self.associativity,
alias: Some(Alias { value: value.to_string(), is_named }),
alias: Some(Alias {
value: value.to_string(),
is_named,
}),
}
}
}
@ -124,25 +138,44 @@ impl Production {
impl Default for Production {
fn default() -> Self {
Production { dynamic_precedence: 0, steps: Vec::new() }
Production {
dynamic_precedence: 0,
steps: Vec::new(),
}
}
}
impl Variable {
pub fn named(name: &str, rule: Rule) -> Self {
Self { name: name.to_string(), kind: VariableType::Named, rule }
Self {
name: name.to_string(),
kind: VariableType::Named,
rule,
}
}
pub fn auxiliary(name: &str, rule: Rule) -> Self {
Self { name: name.to_string(), kind: VariableType::Auxiliary, rule }
Self {
name: name.to_string(),
kind: VariableType::Auxiliary,
rule,
}
}
pub fn hidden(name: &str, rule: Rule) -> Self {
Self { name: name.to_string(), kind: VariableType::Hidden, rule }
Self {
name: name.to_string(),
kind: VariableType::Hidden,
rule,
}
}
pub fn anonymous(name: &str, rule: Rule) -> Self {
Self { name: name.to_string(), kind: VariableType::Anonymous, rule }
Self {
name: name.to_string(),
kind: VariableType::Anonymous,
rule,
}
}
}
@ -151,3 +184,20 @@ impl SyntaxVariable {
self.kind == VariableType::Auxiliary
}
}
impl InlinedProductionMap {
pub fn inlined_productions<'a>(
&'a self,
production: &Production,
step_index: u32,
) -> Option<impl Iterator<Item = &'a Production> + 'a> {
self.production_map
.get(&(production as *const Production, step_index))
.map(|production_indices| {
production_indices
.iter()
.cloned()
.map(move |index| &self.productions[index])
})
}
}

View file

@ -4,6 +4,7 @@ mod extract_simple_aliases;
mod extract_tokens;
mod flatten_grammar;
mod intern_symbols;
mod process_inlines;
use self::expand_repeats::expand_repeats;
use self::expand_tokens::expand_tokens;
@ -11,8 +12,11 @@ use self::extract_simple_aliases::extract_simple_aliases;
use self::extract_tokens::extract_tokens;
use self::flatten_grammar::flatten_grammar;
use self::intern_symbols::intern_symbols;
use self::process_inlines::process_inlines;
use crate::error::Result;
use crate::grammars::{ExternalToken, InputGrammar, LexicalGrammar, SyntaxGrammar, Variable};
use crate::grammars::{
ExternalToken, InlinedProductionMap, InputGrammar, LexicalGrammar, SyntaxGrammar, Variable,
};
use crate::rules::{AliasMap, Rule, Symbol};
pub(self) struct IntermediateGrammar<T, U> {
@ -36,12 +40,18 @@ pub(self) struct ExtractedLexicalGrammar {
pub(crate) fn prepare_grammar(
input_grammar: &InputGrammar,
) -> Result<(SyntaxGrammar, LexicalGrammar, AliasMap)> {
) -> Result<(
SyntaxGrammar,
LexicalGrammar,
InlinedProductionMap,
AliasMap,
)> {
let interned_grammar = intern_symbols(input_grammar)?;
let (syntax_grammar, lexical_grammar) = extract_tokens(interned_grammar)?;
let syntax_grammar = expand_repeats(syntax_grammar);
let mut syntax_grammar = flatten_grammar(syntax_grammar)?;
let lexical_grammar = expand_tokens(lexical_grammar)?;
let simple_aliases = extract_simple_aliases(&mut syntax_grammar, &lexical_grammar);
Ok((syntax_grammar, lexical_grammar, simple_aliases))
let inlines = process_inlines(&syntax_grammar);
Ok((syntax_grammar, lexical_grammar, inlines, simple_aliases))
}

View file

@ -0,0 +1,477 @@
use crate::grammars::{InlinedProductionMap, Production, ProductionStep, SyntaxGrammar};
use std::collections::HashMap;
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
struct ProductionStepId {
variable_index: Option<usize>,
production_index: usize,
step_index: usize,
}
struct InlinedProductionMapBuilder {
production_indices_by_step_id: HashMap<ProductionStepId, Vec<usize>>,
productions: Vec<Production>,
}
impl ProductionStepId {
pub fn successor(&self) -> Self {
Self {
variable_index: self.variable_index,
production_index: self.production_index,
step_index: self.step_index + 1,
}
}
}
fn production_for_id<'a>(
map: &'a InlinedProductionMapBuilder,
id: ProductionStepId,
grammar: &'a SyntaxGrammar,
) -> &'a Production {
if let Some(variable_index) = id.variable_index {
&grammar.variables[variable_index].productions[id.production_index]
} else {
&map.productions[id.production_index]
}
}
fn production_step_for_id<'a>(
map: &'a InlinedProductionMapBuilder,
id: ProductionStepId,
grammar: &'a SyntaxGrammar,
) -> Option<&'a ProductionStep> {
production_for_id(map, id, grammar).steps.get(id.step_index)
}
fn inline<'a>(
map: &'a mut InlinedProductionMapBuilder,
step_id: ProductionStepId,
grammar: &'a SyntaxGrammar,
) -> &'a Vec<usize> {
let step = production_step_for_id(map, step_id, grammar).unwrap();
let mut productions_to_add = grammar.variables[step.symbol.index].productions.clone();
let mut i = 0;
while i < productions_to_add.len() {
if let Some(first_symbol) = productions_to_add[i].first_symbol() {
if grammar.variables_to_inline.contains(&first_symbol) {
// Remove the production from the vector, replacing it with a placeholder.
let production = productions_to_add
.splice(i..i + 1, [Production::default()].iter().cloned())
.next()
.unwrap();
// Replace the placeholder with the inlined productions.
productions_to_add.splice(
i..i + 1,
grammar.variables[first_symbol.index]
.productions
.iter()
.map(|p| {
let mut p = p.clone();
p.steps.extend(production.steps[1..].iter().cloned());
p
}),
);
continue;
}
}
i += 1;
}
let result = productions_to_add
.into_iter()
.map(|production_to_add| {
let mut inlined_production = production_for_id(&map, step_id, grammar).clone();
let removed_step = inlined_production
.steps
.splice(
step_id.step_index..step_id.step_index + 1,
production_to_add.steps.iter().cloned(),
)
.next()
.unwrap();
let inserted_steps = &mut inlined_production.steps
[step_id.step_index..step_id.step_index + production_to_add.steps.len()];
if let Some(alias) = removed_step.alias {
for inserted_step in inserted_steps.iter_mut() {
inserted_step.alias = Some(alias.clone());
}
}
if let Some(last_inserted_step) = inserted_steps.last_mut() {
last_inserted_step.precedence = removed_step.precedence;
last_inserted_step.associativity = removed_step.associativity;
}
map.productions
.iter()
.position(|p| *p == inlined_production)
.unwrap_or({
map.productions.push(inlined_production);
map.productions.len() - 1
})
})
.collect();
map.production_indices_by_step_id
.entry(step_id)
.or_insert(result)
}
pub(super) fn process_inlines(grammar: &SyntaxGrammar) -> InlinedProductionMap {
let mut result = InlinedProductionMapBuilder {
productions: Vec::new(),
production_indices_by_step_id: HashMap::new(),
};
let mut step_ids_to_process = Vec::new();
for (variable_index, variable) in grammar.variables.iter().enumerate() {
for production_index in 0..variable.productions.len() {
step_ids_to_process.push(ProductionStepId {
variable_index: Some(variable_index),
production_index,
step_index: 0,
});
while !step_ids_to_process.is_empty() {
let mut i = 0;
while i < step_ids_to_process.len() {
let step_id = step_ids_to_process[i];
if let Some(step) = production_step_for_id(&result, step_id, grammar) {
if grammar.variables_to_inline.contains(&step.symbol) {
let inlined_step_ids = inline(&mut result, step_id, grammar)
.into_iter()
.cloned()
.map(|production_index| ProductionStepId {
variable_index: None,
production_index,
step_index: step_id.step_index,
})
.collect::<Vec<_>>();
step_ids_to_process.splice(i..i + 1, inlined_step_ids);
} else {
step_ids_to_process[i] = step_id.successor();
i += 1;
}
} else {
step_ids_to_process.remove(i);
}
}
}
}
}
// result
let productions = result.productions;
let production_indices_by_step_id = result.production_indices_by_step_id;
let production_map = production_indices_by_step_id
.into_iter()
.map(|(step_id, production_indices)| {
let production = if let Some(variable_index) = step_id.variable_index {
&grammar.variables[variable_index].productions[step_id.production_index]
} else {
&productions[step_id.production_index]
} as *const Production;
((production, step_id.step_index as u32), production_indices)
})
.collect();
InlinedProductionMap { productions, production_map }
}
#[cfg(test)]
mod tests {
use super::*;
use crate::grammars::{ProductionStep, SyntaxVariable, VariableType};
use crate::rules::{Associativity, Symbol};
#[test]
fn test_basic_inlining() {
let grammar = SyntaxGrammar {
expected_conflicts: Vec::new(),
extra_tokens: Vec::new(),
external_tokens: Vec::new(),
word_token: None,
variables_to_inline: vec![Symbol::non_terminal(1)],
variables: vec![
SyntaxVariable {
name: "non-terminal-0".to_string(),
kind: VariableType::Named,
productions: vec![Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::terminal(10)),
ProductionStep::new(Symbol::non_terminal(1)), // inlined
ProductionStep::new(Symbol::terminal(11)),
],
}],
},
SyntaxVariable {
name: "non-terminal-1".to_string(),
kind: VariableType::Named,
productions: vec![
Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::terminal(12)),
ProductionStep::new(Symbol::terminal(13)),
],
},
Production {
dynamic_precedence: 0,
steps: vec![ProductionStep::new(Symbol::terminal(14))],
},
],
},
],
};
let inline_map = process_inlines(&grammar);
// Nothing to inline at step 0.
assert!(inline_map
.inlined_productions(&grammar.variables[0].productions[0], 0)
.is_none());
// Inlining variable 1 yields two productions.
assert_eq!(
inline_map
.inlined_productions(&grammar.variables[0].productions[0], 1)
.unwrap()
.cloned()
.collect::<Vec<_>>(),
vec![
Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::terminal(10)),
ProductionStep::new(Symbol::terminal(12)),
ProductionStep::new(Symbol::terminal(13)),
ProductionStep::new(Symbol::terminal(11)),
],
},
Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::terminal(10)),
ProductionStep::new(Symbol::terminal(14)),
ProductionStep::new(Symbol::terminal(11)),
],
},
]
);
}
#[test]
fn test_nested_inlining() {
let grammar = SyntaxGrammar {
variables: vec![
SyntaxVariable {
name: "non-terminal-0".to_string(),
kind: VariableType::Named,
productions: vec![Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::terminal(10)),
ProductionStep::new(Symbol::non_terminal(1)), // inlined
ProductionStep::new(Symbol::terminal(11)),
ProductionStep::new(Symbol::non_terminal(2)), // inlined
ProductionStep::new(Symbol::terminal(12)),
],
}],
},
SyntaxVariable {
name: "non-terminal-1".to_string(),
kind: VariableType::Named,
productions: vec![
Production {
dynamic_precedence: 0,
steps: vec![ProductionStep::new(Symbol::terminal(13))],
},
Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::non_terminal(3)), // inlined
ProductionStep::new(Symbol::terminal(14)),
],
},
],
},
SyntaxVariable {
name: "non-terminal-2".to_string(),
kind: VariableType::Named,
productions: vec![Production {
dynamic_precedence: 0,
steps: vec![ProductionStep::new(Symbol::terminal(15))],
}],
},
SyntaxVariable {
name: "non-terminal-3".to_string(),
kind: VariableType::Named,
productions: vec![Production {
dynamic_precedence: 0,
steps: vec![ProductionStep::new(Symbol::terminal(16))],
}],
},
],
variables_to_inline: vec![
Symbol::non_terminal(1),
Symbol::non_terminal(2),
Symbol::non_terminal(3),
],
expected_conflicts: Vec::new(),
extra_tokens: Vec::new(),
external_tokens: Vec::new(),
word_token: None,
};
let inline_map = process_inlines(&grammar);
let productions: Vec<&Production> = inline_map
.inlined_productions(&grammar.variables[0].productions[0], 1)
.unwrap()
.collect();
assert_eq!(
productions.iter().cloned().cloned().collect::<Vec<_>>(),
vec![
Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::terminal(10)),
ProductionStep::new(Symbol::terminal(13)),
ProductionStep::new(Symbol::terminal(11)),
ProductionStep::new(Symbol::non_terminal(2)),
ProductionStep::new(Symbol::terminal(12)),
],
},
Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::terminal(10)),
ProductionStep::new(Symbol::terminal(16)),
ProductionStep::new(Symbol::terminal(14)),
ProductionStep::new(Symbol::terminal(11)),
ProductionStep::new(Symbol::non_terminal(2)),
ProductionStep::new(Symbol::terminal(12)),
],
},
]
);
assert_eq!(
inline_map
.inlined_productions(productions[0], 3)
.unwrap()
.cloned()
.collect::<Vec<_>>(),
vec![Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::terminal(10)),
ProductionStep::new(Symbol::terminal(13)),
ProductionStep::new(Symbol::terminal(11)),
ProductionStep::new(Symbol::terminal(15)),
ProductionStep::new(Symbol::terminal(12)),
],
},]
);
}
#[test]
fn test_inlining_with_precedence_and_alias() {
let grammar = SyntaxGrammar {
variables_to_inline: vec![Symbol::non_terminal(1), Symbol::non_terminal(2)],
variables: vec![
SyntaxVariable {
name: "non-terminal-0".to_string(),
kind: VariableType::Named,
productions: vec![Production {
dynamic_precedence: 0,
steps: vec![
// inlined
ProductionStep::new(Symbol::non_terminal(1))
.with_prec(1, Some(Associativity::Left)),
ProductionStep::new(Symbol::terminal(10)),
// inlined
ProductionStep::new(Symbol::non_terminal(2))
.with_alias("outer_alias", true),
],
}],
},
SyntaxVariable {
name: "non-terminal-1".to_string(),
kind: VariableType::Named,
productions: vec![Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::terminal(11))
.with_prec(2, None)
.with_alias("inner_alias", true),
ProductionStep::new(Symbol::terminal(12)).with_prec(3, None),
],
}],
},
SyntaxVariable {
name: "non-terminal-2".to_string(),
kind: VariableType::Named,
productions: vec![Production {
dynamic_precedence: 0,
steps: vec![ProductionStep::new(Symbol::terminal(13))],
}],
},
],
expected_conflicts: Vec::new(),
extra_tokens: Vec::new(),
external_tokens: Vec::new(),
word_token: None,
};
let inline_map = process_inlines(&grammar);
let productions: Vec<_> = inline_map
.inlined_productions(&grammar.variables[0].productions[0], 0)
.unwrap()
.collect();
assert_eq!(
productions.iter().cloned().cloned().collect::<Vec<_>>(),
vec![Production {
dynamic_precedence: 0,
steps: vec![
// The first step in the inlined production retains its precedence
// and alias.
ProductionStep::new(Symbol::terminal(11))
.with_prec(2, None)
.with_alias("inner_alias", true),
// The final step of the inlined production inherits the precedence of
// the inlined step.
ProductionStep::new(Symbol::terminal(12))
.with_prec(1, Some(Associativity::Left)),
ProductionStep::new(Symbol::terminal(10)),
ProductionStep::new(Symbol::non_terminal(2))
.with_alias("outer_alias", true),
]
}],
);
assert_eq!(
inline_map
.inlined_productions(productions[0], 3)
.unwrap()
.cloned()
.collect::<Vec<_>>(),
vec![Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::terminal(11))
.with_prec(2, None)
.with_alias("inner_alias", true),
ProductionStep::new(Symbol::terminal(12))
.with_prec(1, Some(Associativity::Left)),
ProductionStep::new(Symbol::terminal(10)),
// All steps of the inlined production inherit their alias from the
// inlined step.
ProductionStep::new(Symbol::terminal(13)).with_alias("outer_alias", true),
]
}],
);
}
}

View file

@ -8,13 +8,13 @@ pub(crate) enum SymbolType {
End,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub(crate) enum Associativity {
Left,
Right,
}
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub(crate) struct Alias {
pub value: String,
pub is_named: bool,