Implement variable inlining

This commit is contained in:
Max Brunsfeld 2018-12-18 16:05:36 -08:00
parent 5fa586f7c9
commit 889f232b4c
9 changed files with 567 additions and 31 deletions

View file

@ -0,0 +1,318 @@
use super::item::ParseItem;
use crate::grammars::{Production, SyntaxGrammar};
use std::collections::HashMap;
pub(crate) struct InlinedProductionMap {
pub inlined_productions: Vec<Production>,
item_map: HashMap<ParseItem, Vec<u32>>,
}
impl InlinedProductionMap {
pub fn new(grammar: &SyntaxGrammar) -> Self {
let mut result = Self {
inlined_productions: Vec::new(),
item_map: HashMap::new(),
};
let mut items_to_process = Vec::new();
for (variable_index, variable) in grammar.variables.iter().enumerate() {
for production_index in 0..variable.productions.len() {
items_to_process.push(ParseItem::Normal {
variable_index: variable_index as u32,
production_index: production_index as u32,
step_index: 0,
});
while !items_to_process.is_empty() {
let mut i = 0;
while i < items_to_process.len() {
let item = &items_to_process[i];
if let Some(step) = item.step(grammar, &result) {
if grammar.variables_to_inline.contains(&step.symbol) {
let inlined_items = result
.inline(*item, grammar)
.into_iter()
.map(|production_index| ParseItem::Inlined {
variable_index: item.variable_index(),
production_index: *production_index,
step_index: item.step_index() as u32,
})
.collect::<Vec<_>>();
items_to_process.splice(i..i + 1, inlined_items);
} else {
items_to_process[i] = item.successor();
i += 1;
}
} else {
items_to_process.remove(i);
}
}
}
}
}
result
}
pub fn inlined_items<'a>(
&'a self,
item: ParseItem,
) -> Option<impl Iterator<Item = ParseItem> + 'a> {
self.item_map.get(&item).map(|production_indices| {
production_indices
.iter()
.cloned()
.map(move |production_index| ParseItem::Inlined {
variable_index: item.variable_index(),
production_index,
step_index: item.step_index() as u32,
})
})
}
fn inline(&mut self, item: ParseItem, grammar: &SyntaxGrammar) -> &Vec<u32> {
let step_index = item.step_index();
let mut productions_to_add = grammar.variables
[item.step(grammar, self).unwrap().symbol.index]
.productions
.clone();
let mut i = 0;
while i < productions_to_add.len() {
if let Some(first_symbol) = productions_to_add[i].first_symbol() {
if grammar.variables_to_inline.contains(&first_symbol) {
// Remove the production from the vector, replacing it with a placeholder.
let production = productions_to_add
.splice(i..i + 1, [Production::default()].iter().cloned())
.next()
.unwrap();
// Replace the placeholder with the inlined productions.
productions_to_add.splice(
i..i + 1,
grammar.variables[first_symbol.index]
.productions
.iter()
.map(|p| {
let mut p = p.clone();
p.steps.extend(production.steps[1..].iter().cloned());
p
}),
);
continue;
}
}
i += 1;
}
let result = productions_to_add
.into_iter()
.map(|production_to_add| {
let mut inlined_production = item.production(grammar, &self).clone();
inlined_production.steps.splice(
step_index..step_index + 1,
production_to_add.steps.iter().cloned(),
);
self.inlined_productions
.iter()
.position(|p| *p == inlined_production)
.unwrap_or({
self.inlined_productions.push(inlined_production);
self.inlined_productions.len() - 1
}) as u32
})
.collect();
self.item_map.entry(item).or_insert(result)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::grammars::{ProductionStep, SyntaxVariable, VariableType};
use crate::rules::Symbol;
#[test]
fn test_basic_inlining() {
let grammar = SyntaxGrammar {
expected_conflicts: Vec::new(),
extra_tokens: Vec::new(),
external_tokens: Vec::new(),
word_token: None,
variables_to_inline: vec![Symbol::non_terminal(1)],
variables: vec![
SyntaxVariable {
name: "var0".to_string(),
kind: VariableType::Named,
productions: vec![Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::terminal(10)),
ProductionStep::new(Symbol::non_terminal(1)), // inlined
ProductionStep::new(Symbol::terminal(11)),
],
}],
},
SyntaxVariable {
name: "var1".to_string(),
kind: VariableType::Named,
productions: vec![
Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::terminal(12)),
ProductionStep::new(Symbol::terminal(13)),
],
},
Production {
dynamic_precedence: 0,
steps: vec![ProductionStep::new(Symbol::terminal(14))],
},
],
},
],
};
let inline_map = InlinedProductionMap::new(&grammar);
// Nothing to inline at step 0.
assert_eq!(
display_items(
inline_map.inlined_items(ParseItem::Normal {
variable_index: 0,
production_index: 0,
step_index: 0
}),
&grammar,
&inline_map
),
None
);
// Inlining variable 1 yields two productions.
assert_eq!(
display_items(
inline_map.inlined_items(ParseItem::Normal {
variable_index: 0,
production_index: 0,
step_index: 1
}),
&grammar,
&inline_map
),
Some(vec![
"terminal-10 • terminal-12 terminal-13 terminal-11".to_string(),
"terminal-10 • terminal-14 terminal-11".to_string(),
])
);
}
#[test]
fn test_nested_inlining() {
let grammar = SyntaxGrammar {
variables: vec![
SyntaxVariable {
name: "var0".to_string(),
kind: VariableType::Named,
productions: vec![
Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::terminal(10)),
ProductionStep::new(Symbol::non_terminal(1)), // inlined
ProductionStep::new(Symbol::terminal(11)),
ProductionStep::new(Symbol::non_terminal(2)), // inlined
ProductionStep::new(Symbol::terminal(12)),
],
},
],
},
SyntaxVariable {
name: "var1".to_string(),
kind: VariableType::Named,
productions: vec![
Production {
dynamic_precedence: 0,
steps: vec![ProductionStep::new(Symbol::terminal(13))],
},
Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::non_terminal(3)), // inlined
ProductionStep::new(Symbol::terminal(14)),
],
},
],
},
SyntaxVariable {
name: "var2".to_string(),
kind: VariableType::Named,
productions: vec![Production {
dynamic_precedence: 0,
steps: vec![ProductionStep::new(Symbol::terminal(15))],
}],
},
SyntaxVariable {
name: "var3".to_string(),
kind: VariableType::Named,
productions: vec![Production {
dynamic_precedence: 0,
steps: vec![ProductionStep::new(Symbol::terminal(16))],
}],
},
],
variables_to_inline: vec![
Symbol::non_terminal(1),
Symbol::non_terminal(2),
Symbol::non_terminal(3),
],
expected_conflicts: Vec::new(),
extra_tokens: Vec::new(),
external_tokens: Vec::new(),
word_token: None,
};
let inline_map = InlinedProductionMap::new(&grammar);
let items = inline_map.inlined_items(ParseItem::Normal {
variable_index: 0,
production_index: 0,
step_index: 1
}).unwrap().collect::<Vec<_>>();
assert_eq!(
display_items(Some(items.iter().cloned()), &grammar, &inline_map),
Some(vec![
"terminal-10 • terminal-13 terminal-11 non-terminal-2 terminal-12".to_string(),
"terminal-10 • terminal-16 terminal-14 terminal-11 non-terminal-2 terminal-12".to_string()
])
);
let item = items[0].successor().successor();
assert_eq!(
display_items(Some([item].iter().cloned()), &grammar, &inline_map),
Some(vec![
"terminal-10 terminal-13 terminal-11 • non-terminal-2 terminal-12".to_string(),
])
);
assert_eq!(
display_items(inline_map.inlined_items(item), &grammar, &inline_map),
Some(vec![
"terminal-10 terminal-13 terminal-11 • terminal-15 terminal-12".to_string(),
])
);
}
fn display_items(
items: Option<impl Iterator<Item = ParseItem>>,
grammar: &SyntaxGrammar,
inline_map: &InlinedProductionMap,
) -> Option<Vec<String>> {
items.map(|items| {
items
.map(|item| format!("{}", item.with(grammar, inline_map)))
.collect()
})
}
}

View file

@ -1,22 +1,209 @@
use crate::grammars::Production;
use super::inline_variables::InlinedProductionMap;
use crate::grammars::{Production, ProductionStep, SyntaxGrammar};
use crate::rules::{Symbol, SymbolType};
use smallbitvec::SmallBitVec;
use std::collections::HashMap;
use bitvec::BitVec;
use std::hash::{Hash, Hasher};
use std::fmt;
#[derive(Debug, PartialEq, Eq)]
pub(super) struct LookaheadSet {
terminal_bits: BitVec,
external_bits: BitVec,
lazy_static! {
static ref START_PRODUCTION: Production = Production {
dynamic_precedence: 0,
steps: vec![ProductionStep {
symbol: Symbol {
index: 0,
kind: SymbolType::NonTerminal,
},
precedence: 0,
associativity: None,
alias: None,
}],
};
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub(crate) struct LookaheadSet {
terminal_bits: SmallBitVec,
external_bits: SmallBitVec,
eof: bool,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub(super) struct ParseItem {
variable_index: u32,
production_index: u32,
step_index: u32,
pub(crate) enum ParseItem {
Start {
step_index: u32,
},
Normal {
variable_index: u32,
production_index: u32,
step_index: u32,
},
Inlined {
variable_index: u32,
production_index: u32,
step_index: u32,
},
}
#[derive(Debug, PartialEq, Eq)]
pub(super) struct ParseItemSet {
entries: HashMap<ParseItem, LookaheadSet>
#[derive(Clone, Debug, PartialEq, Eq)]
pub(crate) struct ParseItemSet {
pub entries: HashMap<ParseItem, LookaheadSet>,
}
impl LookaheadSet {
pub fn new() -> Self {
Self {
terminal_bits: SmallBitVec::new(),
external_bits: SmallBitVec::new(),
eof: false,
}
}
pub fn insert(&mut self, other: Symbol) {
match other.kind {
SymbolType::NonTerminal => panic!("Cannot store non-terminals in a LookaheadSet"),
SymbolType::Terminal => self.terminal_bits.set(other.index, true),
SymbolType::External => self.external_bits.set(other.index, true),
}
}
pub fn insert_all(&mut self, other: &LookaheadSet) -> bool {
let mut result = false;
if other.terminal_bits.len() > self.terminal_bits.len() {
self.terminal_bits.resize(other.terminal_bits.len(), false);
}
if other.external_bits.len() > self.external_bits.len() {
self.external_bits.resize(other.external_bits.len(), false);
}
for (i, element) in other.terminal_bits.iter().enumerate() {
if element {
result |= !self.terminal_bits[i];
self.terminal_bits.set(i, element);
}
}
for (i, element) in other.external_bits.iter().enumerate() {
if element {
result |= !self.external_bits[i];
self.external_bits.set(i, element);
}
}
if other.eof {
result |= !self.eof;
self.eof = true;
}
result
}
}
impl ParseItem {
pub fn is_kernel(&self) -> bool {
match self {
ParseItem::Start { .. } => true,
ParseItem::Normal { step_index, .. } | ParseItem::Inlined { step_index, .. } => {
*step_index > 0
}
}
}
pub fn production<'a>(
&'a self,
grammar: &'a SyntaxGrammar,
inlined_productions: &'a InlinedProductionMap,
) -> &'a Production {
match self {
ParseItem::Start { .. } => &START_PRODUCTION,
ParseItem::Normal {
variable_index,
production_index,
..
} => {
&grammar.variables[*variable_index as usize].productions[*production_index as usize]
}
ParseItem::Inlined {
production_index,
..
} => &inlined_productions.inlined_productions[*production_index as usize],
}
}
pub fn step<'a>(
&'a self,
grammar: &'a SyntaxGrammar,
inlined_productions: &'a InlinedProductionMap,
) -> Option<&'a ProductionStep> {
self.production(grammar, inlined_productions).steps.get(self.step_index())
}
pub fn variable_index(&self) -> u32 {
match self {
ParseItem::Start { .. } => panic!("Start item doesn't have a variable index"),
ParseItem::Normal { variable_index, .. }
| ParseItem::Inlined { variable_index, .. } => *variable_index,
}
}
pub fn step_index(&self) -> usize {
match self {
ParseItem::Start { step_index }
| ParseItem::Normal { step_index, .. }
| ParseItem::Inlined { step_index, .. } => *step_index as usize,
}
}
fn step_index_mut(&mut self) -> &mut u32 {
match self {
ParseItem::Start { step_index }
| ParseItem::Normal { step_index, .. }
| ParseItem::Inlined { step_index, .. } => step_index,
}
}
pub fn with<'a>(&'a self, grammar: &'a SyntaxGrammar, inlines: &'a InlinedProductionMap) -> ParseItemDisplay<'a> {
ParseItemDisplay(self, grammar, inlines)
}
pub fn successor(&self) -> ParseItem {
let mut result = self.clone();
*result.step_index_mut() += 1;
result
}
}
pub struct ParseItemDisplay<'a>(&'a ParseItem, &'a SyntaxGrammar, &'a InlinedProductionMap);
impl<'a> fmt::Display for ParseItemDisplay<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
let step_index = self.0.step_index();
let production = self.0.production(self.1, self.2);
for (i, step) in production.steps.iter().enumerate() {
if i > 0 {
write!(f, " ")?;
}
if i == step_index {
write!(f, "")?;
}
let name = if step.symbol.is_terminal() {
"terminal"
} else if step.symbol.is_external() {
"external"
} else {
"non-terminal"
};
write!(f, "{}-{}", name, step.symbol.index)?;
}
Ok(())
}
}
impl Hash for ParseItemSet {
fn hash<H: Hasher>(&self, hasher: &mut H) {
hasher.write_usize(self.entries.len());
for (item, lookaheads) in self.entries.iter() {
item.hash(hasher);
lookaheads.hash(hasher);
}
}
}

View file

@ -1,4 +1,5 @@
mod item;
mod inline_variables;
use std::collections::{HashMap, VecDeque};
use crate::grammars::{SyntaxGrammar, LexicalGrammar};

View file

@ -108,6 +108,18 @@ impl ProductionStep {
}
}
impl Production {
pub fn first_symbol(&self) -> Option<Symbol> {
self.steps.first().map(|s| s.symbol.clone())
}
}
impl Default for Production {
fn default() -> Self {
Production { dynamic_precedence: 0, steps: Vec::new() }
}
}
impl Variable {
pub fn named(name: &str, rule: Rule) -> Self {
Self { name: name.to_string(), kind: VariableType::Named, rule }

View file

@ -2,6 +2,7 @@ use clap::{App, Arg, SubCommand};
#[macro_use] extern crate serde_derive;
#[macro_use] extern crate serde_json;
#[macro_use] extern crate lazy_static;
mod build_tables;
mod error;

View file

@ -2,7 +2,6 @@ use serde_json::{Map, Value};
use crate::error::Result;
use crate::grammars::{InputGrammar, Variable, VariableType};
use crate::rules::Rule;
use std::collections::HashMap;
#[derive(Deserialize)]
#[serde(tag = "type")]

View file

@ -10,7 +10,7 @@ pub(crate) enum SymbolType {
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub(crate) enum Associativity {
Left,
Right
Right,
}
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
@ -137,24 +137,37 @@ impl Rule {
}
impl Symbol {
pub fn is_terminal(&self) -> bool {
self.kind == SymbolType::Terminal
}
pub fn is_non_terminal(&self) -> bool {
return self.kind == SymbolType::NonTerminal
self.kind == SymbolType::NonTerminal
}
pub fn is_external(&self) -> bool {
return self.kind == SymbolType::External
self.kind == SymbolType::External
}
pub fn non_terminal(index: usize) -> Self {
Symbol { kind: SymbolType::NonTerminal, index }
Symbol {
kind: SymbolType::NonTerminal,
index,
}
}
pub fn terminal(index: usize) -> Self {
Symbol { kind: SymbolType::Terminal, index }
Symbol {
kind: SymbolType::Terminal,
index,
}
}
pub fn external(index: usize) -> Self {
Symbol { kind: SymbolType::External, index }
Symbol {
kind: SymbolType::External,
index,
}
}
}
@ -169,11 +182,14 @@ fn add_metadata<T: Fn(&mut MetadataParams)>(input: Rule, f: T) -> Rule {
Rule::Metadata { rule, mut params } => {
f(&mut params);
Rule::Metadata { rule, params }
},
}
_ => {
let mut params = MetadataParams::default();
f(&mut params);
Rule::Metadata { rule: Box::new(input), params }
Rule::Metadata {
rule: Box::new(input),
params,
}
}
}
}
@ -184,7 +200,7 @@ fn choice_helper(result: &mut Vec<Rule>, rule: Rule) {
for element in elements {
choice_helper(result, element);
}
},
}
_ => {
if !result.contains(&rule) {
result.push(rule);