Fix recursive processing of rule inlining

This commit is contained in:
Max Brunsfeld 2019-01-03 16:35:16 -08:00
parent 5d3d161c05
commit bf9556dadc
5 changed files with 230 additions and 209 deletions

View file

@ -23,7 +23,7 @@ pub(crate) fn build_lex_table(
}
let mut builder = LexTableBuilder::new(lexical_grammar);
for (i, state) in parse_table.states.iter_mut().enumerate() {
for state in parse_table.states.iter_mut() {
let tokens = LookaheadSet::with(state.terminal_entries.keys().filter_map(|token| {
if token.is_terminal() {
if keywords.contains(&token) {
@ -37,7 +37,6 @@ pub(crate) fn build_lex_table(
None
}
}));
info!("populate lex state for parse state {}", i);
state.lex_state_id = builder.add_state_for_tokens(&tokens);
}
@ -199,16 +198,17 @@ fn shrink_lex_table(table: &mut LexTable, parse_table: &mut ParseTable) {
continue;
}
for (j, state_j) in table.states.iter().enumerate() {
if state_replacements.contains_key(&j) {
continue;
}
if j == i {
break;
}
if state_replacements.contains_key(&j) {
continue;
}
if state_i == state_j {
info!("replace state {} with state {}", i, j);
state_replacements.insert(i, j);
done = false;
break;
}
}
}

View file

@ -63,7 +63,28 @@ impl<'a> ParseTableBuilder<'a> {
),
);
self.process_part_state_queue()?;
while let Some(entry) = self.parse_state_queue.pop_front() {
// info!(
// "state: {}, item set: {}",
// entry.state_id,
// ParseItemSetDisplay(
// &self.item_sets_by_state_id[entry.state_id],
// self.syntax_grammar,
// self.lexical_grammar,
// )
// );
let item_set = self
.item_set_builder
.transitive_closure(&self.item_sets_by_state_id[entry.state_id]);
self.add_actions(
entry.preceding_symbols,
entry.preceding_auxiliary_symbols,
entry.state_id,
item_set,
)?;
}
self.populate_used_symbols();
self.remove_precedences();
@ -116,27 +137,12 @@ impl<'a> ParseTableBuilder<'a> {
}
}
fn process_part_state_queue(&mut self) -> Result<()> {
while let Some(entry) = self.parse_state_queue.pop_front() {
let item_set = self
.item_set_builder
.transitive_closure(&self.item_sets_by_state_id[entry.state_id]);
self.add_actions(
entry.preceding_symbols,
entry.preceding_auxiliary_symbols,
item_set,
entry.state_id,
)?;
}
Ok(())
}
fn add_actions(
&mut self,
mut preceding_symbols: SymbolSequence,
mut preceding_auxiliary_symbols: Vec<AuxiliarySymbolInfo>,
item_set: ParseItemSet<'a>,
state_id: ParseStateId,
item_set: ParseItemSet<'a>,
) -> Result<()> {
let mut terminal_successors = HashMap::new();
let mut non_terminal_successors = HashMap::new();

View file

@ -42,12 +42,19 @@ pub(crate) struct ParseItemSet<'a> {
pub entries: BTreeMap<ParseItem<'a>, LookaheadSet>,
}
pub(crate) struct ParseItemDisplay<'a>(&'a ParseItem<'a>, &'a SyntaxGrammar, &'a LexicalGrammar);
pub(crate) struct ParseItemDisplay<'a>(
pub &'a ParseItem<'a>,
pub &'a SyntaxGrammar,
pub &'a LexicalGrammar
);
pub(crate) struct LookaheadSetDisplay<'a>(&'a LookaheadSet, &'a SyntaxGrammar, &'a LexicalGrammar);
#[allow(dead_code)]
pub(crate) struct ParseItemSetDisplay<'a>(
&'a ParseItemSet<'a>,
&'a SyntaxGrammar,
&'a LexicalGrammar,
pub &'a ParseItemSet<'a>,
pub &'a SyntaxGrammar,
pub &'a LexicalGrammar,
);
impl LookaheadSet {
@ -144,14 +151,6 @@ impl LookaheadSet {
}
result
}
pub fn display_with<'a>(
&'a self,
syntax_grammar: &'a SyntaxGrammar,
lexical_grammar: &'a LexicalGrammar,
) -> LookaheadSetDisplay<'a> {
LookaheadSetDisplay(self, syntax_grammar, lexical_grammar)
}
}
impl<'a> ParseItem<'a> {
@ -202,14 +201,6 @@ impl<'a> ParseItem<'a> {
step_index: self.step_index + 1,
}
}
pub fn display_with(
&'a self,
syntax_grammar: &'a SyntaxGrammar,
lexical_grammar: &'a LexicalGrammar,
) -> ParseItemDisplay<'a> {
ParseItemDisplay(self, syntax_grammar, lexical_grammar)
}
}
impl<'a> ParseItemSet<'a> {
@ -235,14 +226,6 @@ impl<'a> ParseItemSet<'a> {
}
}
}
pub fn display_with(
&'a self,
syntax_grammar: &'a SyntaxGrammar,
lexical_grammar: &'a LexicalGrammar,
) -> ParseItemSetDisplay<'a> {
ParseItemSetDisplay(self, syntax_grammar, lexical_grammar)
}
}
impl<'a> Default for ParseItemSet<'a> {
@ -253,6 +236,7 @@ impl<'a> Default for ParseItemSet<'a> {
}
}
#[allow(dead_code)]
impl<'a> fmt::Display for ParseItemDisplay<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
if self.0.is_augmented() {
@ -282,6 +266,10 @@ impl<'a> fmt::Display for ParseItemDisplay<'a> {
} else {
write!(f, "{}", &self.1.variables[step.symbol.index].name)?;
}
if let Some(alias) = &step.alias {
write!(f, " (alias {})", alias.value)?;
}
}
if self.0.is_done() {
@ -323,8 +311,8 @@ impl<'a> fmt::Display for ParseItemSetDisplay<'a> {
writeln!(
f,
"{}\t{}",
item.display_with(self.1, self.2),
lookaheads.display_with(self.1, self.2)
ParseItemDisplay(item, self.1, self.2),
LookaheadSetDisplay(lookaheads, self.1, self.2)
)?;
}
Ok(())

View file

@ -1,7 +1,8 @@
use super::item::{LookaheadSet, ParseItem, ParseItemSet};
use super::item::{LookaheadSet, ParseItem, ParseItemDisplay, ParseItemSet};
use crate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar};
use crate::rules::Symbol;
use hashbrown::{HashMap, HashSet};
use std::fmt;
#[derive(Clone, Debug, PartialEq, Eq)]
struct TransitiveClosureAddition<'a> {
@ -16,6 +17,8 @@ struct FollowSetInfo {
}
pub(crate) struct ParseItemSetBuilder<'a> {
syntax_grammar: &'a SyntaxGrammar,
lexical_grammar: &'a LexicalGrammar,
first_sets: HashMap<Symbol, LookaheadSet>,
last_sets: HashMap<Symbol, LookaheadSet>,
inlines: &'a InlinedProductionMap,
@ -35,6 +38,8 @@ impl<'a> ParseItemSetBuilder<'a> {
inlines: &'a InlinedProductionMap,
) -> Self {
let mut result = Self {
syntax_grammar,
lexical_grammar,
first_sets: HashMap::new(),
last_sets: HashMap::new(),
inlines,
@ -300,3 +305,26 @@ impl<'a> ParseItemSetBuilder<'a> {
set.entries.insert(item, lookaheads.clone());
}
}
impl<'a> fmt::Debug for ParseItemSetBuilder<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "ParseItemSetBuilder {{\n")?;
write!(f, " additions: {{\n")?;
for (i, variable) in self.syntax_grammar.variables.iter().enumerate() {
write!(f, " {}: {{\n", variable.name)?;
for addition in &self.transitive_closure_additions[i] {
write!(
f,
" {}\n",
ParseItemDisplay(&addition.item, self.syntax_grammar, self.lexical_grammar)
)?;
}
write!(f, " }},\n")?;
}
write!(f, " }},")?;
write!(f, "}}")?;
Ok(())
}
}

View file

@ -3,6 +3,9 @@ use hashbrown::HashMap;
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
struct ProductionStepId {
// A `None` value here means that the production itself was produced via inlining,
// and is stored in the the builder's `productions` vector, as opposed to being
// stored in one of the grammar's variables.
variable_index: Option<usize>,
production_index: usize,
step_index: usize,
@ -13,169 +16,166 @@ struct InlinedProductionMapBuilder {
productions: Vec<Production>,
}
impl ProductionStepId {
pub fn successor(&self) -> Self {
Self {
variable_index: self.variable_index,
production_index: self.production_index,
step_index: self.step_index + 1,
}
}
}
fn production_for_id<'a>(
map: &'a InlinedProductionMapBuilder,
id: ProductionStepId,
grammar: &'a SyntaxGrammar,
) -> &'a Production {
if let Some(variable_index) = id.variable_index {
&grammar.variables[variable_index].productions[id.production_index]
} else {
&map.productions[id.production_index]
}
}
fn production_step_for_id<'a>(
map: &'a InlinedProductionMapBuilder,
id: ProductionStepId,
grammar: &'a SyntaxGrammar,
) -> Option<&'a ProductionStep> {
production_for_id(map, id, grammar).steps.get(id.step_index)
}
fn inline<'a>(
map: &'a mut InlinedProductionMapBuilder,
step_id: ProductionStepId,
grammar: &'a SyntaxGrammar,
) -> &'a Vec<usize> {
let step = production_step_for_id(map, step_id, grammar).unwrap();
let mut productions_to_add = grammar.variables[step.symbol.index].productions.clone();
let mut i = 0;
while i < productions_to_add.len() {
if let Some(first_symbol) = productions_to_add[i].first_symbol() {
if grammar.variables_to_inline.contains(&first_symbol) {
// Remove the production from the vector, replacing it with a placeholder.
let production = productions_to_add
.splice(i..i + 1, [Production::default()].iter().cloned())
.next()
.unwrap();
// Replace the placeholder with the inlined productions.
productions_to_add.splice(
i..i + 1,
grammar.variables[first_symbol.index]
.productions
.iter()
.map(|p| {
let mut p = p.clone();
p.steps.extend(production.steps[1..].iter().cloned());
p
}),
);
continue;
}
}
i += 1;
}
let result = productions_to_add
.into_iter()
.map(|production_to_add| {
let mut inlined_production = production_for_id(&map, step_id, grammar).clone();
let removed_step = inlined_production
.steps
.splice(
step_id.step_index..step_id.step_index + 1,
production_to_add.steps.iter().cloned(),
)
.next()
.unwrap();
let inserted_steps = &mut inlined_production.steps
[step_id.step_index..step_id.step_index + production_to_add.steps.len()];
if let Some(alias) = removed_step.alias {
for inserted_step in inserted_steps.iter_mut() {
inserted_step.alias = Some(alias.clone());
}
}
if let Some(last_inserted_step) = inserted_steps.last_mut() {
last_inserted_step.precedence = removed_step.precedence;
last_inserted_step.associativity = removed_step.associativity;
}
map.productions
.iter()
.position(|p| *p == inlined_production)
.unwrap_or({
map.productions.push(inlined_production);
map.productions.len() - 1
})
})
.collect();
map.production_indices_by_step_id
.entry(step_id)
.or_insert(result)
}
pub(super) fn process_inlines(grammar: &SyntaxGrammar) -> InlinedProductionMap {
let mut result = InlinedProductionMapBuilder {
productions: Vec::new(),
production_indices_by_step_id: HashMap::new(),
};
let mut step_ids_to_process = Vec::new();
for (variable_index, variable) in grammar.variables.iter().enumerate() {
for production_index in 0..variable.productions.len() {
step_ids_to_process.push(ProductionStepId {
variable_index: Some(variable_index),
production_index,
step_index: 0,
});
while !step_ids_to_process.is_empty() {
let mut i = 0;
while i < step_ids_to_process.len() {
let step_id = step_ids_to_process[i];
if let Some(step) = production_step_for_id(&result, step_id, grammar) {
if grammar.variables_to_inline.contains(&step.symbol) {
let inlined_step_ids = inline(&mut result, step_id, grammar)
.into_iter()
.cloned()
.map(|production_index| ProductionStepId {
variable_index: None,
production_index,
step_index: step_id.step_index,
})
.collect::<Vec<_>>();
step_ids_to_process.splice(i..i + 1, inlined_step_ids);
impl InlinedProductionMapBuilder {
fn build<'a>(mut self, grammar: &'a SyntaxGrammar) -> InlinedProductionMap {
let mut step_ids_to_process = Vec::new();
for (variable_index, variable) in grammar.variables.iter().enumerate() {
for production_index in 0..variable.productions.len() {
step_ids_to_process.push(ProductionStepId {
variable_index: Some(variable_index),
production_index,
step_index: 0,
});
while !step_ids_to_process.is_empty() {
let mut i = 0;
while i < step_ids_to_process.len() {
let step_id = step_ids_to_process[i];
if let Some(step) = self.production_step_for_id(step_id, grammar) {
if grammar.variables_to_inline.contains(&step.symbol) {
let inlined_step_ids = self
.inline_production_at_step(step_id, grammar)
.into_iter()
.cloned()
.map(|production_index| ProductionStepId {
variable_index: None,
production_index,
step_index: step_id.step_index,
});
step_ids_to_process.splice(i..i + 1, inlined_step_ids);
} else {
step_ids_to_process[i] = ProductionStepId {
variable_index: step_id.variable_index,
production_index: step_id.production_index,
step_index: step_id.step_index + 1,
};
i += 1;
}
} else {
step_ids_to_process[i] = step_id.successor();
i += 1;
step_ids_to_process.remove(i);
}
} else {
step_ids_to_process.remove(i);
}
}
}
}
let productions = self.productions;
let production_indices_by_step_id = self.production_indices_by_step_id;
let production_map = production_indices_by_step_id
.into_iter()
.map(|(step_id, production_indices)| {
let production = if let Some(variable_index) = step_id.variable_index {
&grammar.variables[variable_index].productions[step_id.production_index]
} else {
&productions[step_id.production_index]
} as *const Production;
((production, step_id.step_index as u32), production_indices)
})
.collect();
InlinedProductionMap {
productions,
production_map,
}
}
// result
let productions = result.productions;
let production_indices_by_step_id = result.production_indices_by_step_id;
fn inline_production_at_step<'a>(
&'a mut self,
step_id: ProductionStepId,
grammar: &'a SyntaxGrammar,
) -> &'a Vec<usize> {
// Build a list of productions produced by inlining rules.
let mut i = 0;
let step_index = step_id.step_index;
let mut productions_to_add = vec![self.production_for_id(step_id, grammar).clone()];
while i < productions_to_add.len() {
if let Some(step) = productions_to_add[i].steps.get(step_index) {
let symbol = step.symbol.clone();
let production_map = production_indices_by_step_id
.into_iter()
.map(|(step_id, production_indices)| {
let production = if let Some(variable_index) = step_id.variable_index {
&grammar.variables[variable_index].productions[step_id.production_index]
} else {
&productions[step_id.production_index]
} as *const Production;
((production, step_id.step_index as u32), production_indices)
})
.collect();
if grammar.variables_to_inline.contains(&symbol) {
// Remove the production from the vector, replacing it with a placeholder.
let production = productions_to_add
.splice(i..i + 1, [Production::default()].iter().cloned())
.next()
.unwrap();
InlinedProductionMap { productions, production_map }
// Replace the placeholder with the inlined productions.
productions_to_add.splice(
i..i + 1,
grammar.variables[symbol.index].productions.iter().map(|p| {
let mut production = production.clone();
let removed_step = production
.steps
.splice(step_index..(step_index + 1), p.steps.iter().cloned())
.next()
.unwrap();
let inserted_steps =
&mut production.steps[step_index..(step_index + p.steps.len())];
if let Some(alias) = removed_step.alias {
for inserted_step in inserted_steps.iter_mut() {
inserted_step.alias = Some(alias.clone());
}
}
if let Some(last_inserted_step) = inserted_steps.last_mut() {
last_inserted_step.precedence = removed_step.precedence;
last_inserted_step.associativity = removed_step.associativity;
}
production
}),
);
continue;
}
}
i += 1;
}
// Store all the computed productions.
let result = productions_to_add
.into_iter()
.map(|production| {
self.productions
.iter()
.position(|p| *p == production)
.unwrap_or({
self.productions.push(production);
self.productions.len() - 1
})
})
.collect();
// Cache these productions based on the original production step.
self.production_indices_by_step_id
.entry(step_id)
.or_insert(result)
}
fn production_for_id<'a>(
&'a self,
id: ProductionStepId,
grammar: &'a SyntaxGrammar,
) -> &'a Production {
if let Some(variable_index) = id.variable_index {
&grammar.variables[variable_index].productions[id.production_index]
} else {
&self.productions[id.production_index]
}
}
fn production_step_for_id<'a>(
&'a self,
id: ProductionStepId,
grammar: &'a SyntaxGrammar,
) -> Option<&'a ProductionStep> {
self.production_for_id(id, grammar).steps.get(id.step_index)
}
}
pub(super) fn process_inlines(grammar: &SyntaxGrammar) -> InlinedProductionMap {
InlinedProductionMapBuilder {
productions: Vec::new(),
production_indices_by_step_id: HashMap::new(),
}
.build(grammar)
}
#[cfg(test)]
@ -234,7 +234,7 @@ mod tests {
// Inlining variable 1 yields two productions.
assert_eq!(
inline_map
.inlined_productions(&grammar.variables[0].productions[0], 1)
.inlined_productions(&grammar.variables[0].productions[0], 1)
.unwrap()
.cloned()
.collect::<Vec<_>>(),
@ -446,8 +446,7 @@ mod tests {
ProductionStep::new(Symbol::terminal(12))
.with_prec(1, Some(Associativity::Left)),
ProductionStep::new(Symbol::terminal(10)),
ProductionStep::new(Symbol::non_terminal(2))
.with_alias("outer_alias", true),
ProductionStep::new(Symbol::non_terminal(2)).with_alias("outer_alias", true),
]
}],
);