281 lines
9.8 KiB
Rust
281 lines
9.8 KiB
Rust
use super::item::LookaheadSet;
|
|
use super::token_conflicts::TokenConflictMap;
|
|
use crate::grammars::{SyntaxGrammar, VariableType};
|
|
use crate::rules::{AliasMap, Symbol};
|
|
use crate::tables::{ParseAction, ParseState, ParseTable, ParseTableEntry};
|
|
use hashbrown::{HashMap, HashSet};
|
|
|
|
pub(crate) fn shrink_parse_table(
|
|
parse_table: &mut ParseTable,
|
|
syntax_grammar: &SyntaxGrammar,
|
|
simple_aliases: &AliasMap,
|
|
token_conflict_map: &TokenConflictMap,
|
|
keywords: &LookaheadSet,
|
|
) {
|
|
let mut optimizer = Optimizer {
|
|
parse_table,
|
|
syntax_grammar,
|
|
token_conflict_map,
|
|
keywords,
|
|
simple_aliases,
|
|
};
|
|
optimizer.remove_unit_reductions();
|
|
optimizer.merge_compatible_states();
|
|
optimizer.remove_unused_states();
|
|
}
|
|
|
|
struct Optimizer<'a> {
|
|
parse_table: &'a mut ParseTable,
|
|
syntax_grammar: &'a SyntaxGrammar,
|
|
token_conflict_map: &'a TokenConflictMap<'a>,
|
|
keywords: &'a LookaheadSet,
|
|
simple_aliases: &'a AliasMap,
|
|
}
|
|
|
|
impl<'a> Optimizer<'a> {
|
|
fn remove_unit_reductions(&mut self) {
|
|
let mut aliased_symbols = HashSet::new();
|
|
for variable in &self.syntax_grammar.variables {
|
|
for production in &variable.productions {
|
|
for step in &production.steps {
|
|
if step.alias.is_some() {
|
|
aliased_symbols.insert(step.symbol);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
let mut unit_reduction_symbols_by_state = HashMap::new();
|
|
for (i, state) in self.parse_table.states.iter().enumerate() {
|
|
let mut only_unit_reductions = true;
|
|
let mut unit_reduction_symbol = None;
|
|
for (_, entry) in &state.terminal_entries {
|
|
for action in &entry.actions {
|
|
match action {
|
|
ParseAction::ShiftExtra => continue,
|
|
ParseAction::Reduce {
|
|
child_count: 1,
|
|
alias_sequence_id: 0,
|
|
symbol,
|
|
..
|
|
} => {
|
|
if !self.simple_aliases.contains_key(&symbol)
|
|
&& !aliased_symbols.contains(&symbol)
|
|
&& self.syntax_grammar.variables[symbol.index].kind
|
|
!= VariableType::Named
|
|
&& (unit_reduction_symbol.is_none()
|
|
|| unit_reduction_symbol == Some(symbol))
|
|
{
|
|
unit_reduction_symbol = Some(symbol);
|
|
continue;
|
|
}
|
|
}
|
|
_ => {}
|
|
}
|
|
only_unit_reductions = false;
|
|
break;
|
|
}
|
|
|
|
if !only_unit_reductions {
|
|
break;
|
|
}
|
|
}
|
|
|
|
if let Some(symbol) = unit_reduction_symbol {
|
|
if only_unit_reductions {
|
|
unit_reduction_symbols_by_state.insert(i, *symbol);
|
|
}
|
|
}
|
|
}
|
|
|
|
for state in self.parse_table.states.iter_mut() {
|
|
let mut done = false;
|
|
while !done {
|
|
done = true;
|
|
state.update_referenced_states(|other_state_id, state| {
|
|
if let Some(symbol) = unit_reduction_symbols_by_state.get(&other_state_id) {
|
|
done = false;
|
|
state.nonterminal_entries[symbol]
|
|
} else {
|
|
other_state_id
|
|
}
|
|
})
|
|
}
|
|
}
|
|
}
|
|
|
|
fn merge_compatible_states(&mut self) {
|
|
let mut state_ids_by_signature = HashMap::new();
|
|
for (i, state) in self.parse_table.states.iter().enumerate() {
|
|
state_ids_by_signature
|
|
.entry(state.unfinished_item_signature)
|
|
.or_insert(Vec::new())
|
|
.push(i);
|
|
}
|
|
|
|
let mut deleted_states = HashSet::new();
|
|
loop {
|
|
let mut state_replacements = HashMap::new();
|
|
for (_, state_ids) in &state_ids_by_signature {
|
|
for i in state_ids {
|
|
for j in state_ids {
|
|
if j == i {
|
|
break;
|
|
}
|
|
if deleted_states.contains(j) || deleted_states.contains(i) {
|
|
continue;
|
|
}
|
|
if self.merge_parse_state(*j, *i) {
|
|
deleted_states.insert(*i);
|
|
state_replacements.insert(*i, *j);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if state_replacements.is_empty() {
|
|
break;
|
|
}
|
|
|
|
for state in self.parse_table.states.iter_mut() {
|
|
state.update_referenced_states(|other_state_id, _| {
|
|
*state_replacements
|
|
.get(&other_state_id)
|
|
.unwrap_or(&other_state_id)
|
|
});
|
|
}
|
|
}
|
|
}
|
|
|
|
fn merge_parse_state(&mut self, left: usize, right: usize) -> bool {
|
|
let left_state = &self.parse_table.states[left];
|
|
let right_state = &self.parse_table.states[right];
|
|
|
|
if left_state.nonterminal_entries != right_state.nonterminal_entries {
|
|
return false;
|
|
}
|
|
|
|
for (symbol, left_entry) in &left_state.terminal_entries {
|
|
if let Some(right_entry) = right_state.terminal_entries.get(symbol) {
|
|
if right_entry.actions != left_entry.actions {
|
|
return false;
|
|
}
|
|
} else if !self.can_add_entry_to_state(right_state, *symbol, left_entry) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
let mut symbols_to_add = Vec::new();
|
|
for (symbol, right_entry) in &right_state.terminal_entries {
|
|
if !left_state.terminal_entries.contains_key(&symbol) {
|
|
if !self.can_add_entry_to_state(left_state, *symbol, right_entry) {
|
|
return false;
|
|
}
|
|
symbols_to_add.push(*symbol);
|
|
}
|
|
}
|
|
|
|
for symbol in symbols_to_add {
|
|
let entry = self.parse_table.states[right].terminal_entries[&symbol].clone();
|
|
self.parse_table.states[left]
|
|
.terminal_entries
|
|
.insert(symbol, entry);
|
|
}
|
|
|
|
true
|
|
}
|
|
|
|
fn can_add_entry_to_state(
|
|
&self,
|
|
state: &ParseState,
|
|
token: Symbol,
|
|
entry: &ParseTableEntry,
|
|
) -> bool {
|
|
// Do not add external tokens; they could conflict lexically with any of the state's
|
|
// existing lookahead tokens.
|
|
if token.is_external() {
|
|
return false;
|
|
}
|
|
|
|
// Only merge_compatible_states parse states by allowing existing reductions to happen
|
|
// with additional lookahead tokens. Do not alter parse states in ways
|
|
// that allow entirely new types of actions to happen.
|
|
if state.terminal_entries.iter().all(|(_, e)| e != entry) {
|
|
return false;
|
|
}
|
|
match entry.actions.last() {
|
|
Some(ParseAction::Reduce { .. }) => {}
|
|
_ => return false,
|
|
}
|
|
|
|
// Do not add tokens which are both internal and external. Their validity could
|
|
// influence the behavior of the external scanner.
|
|
if self
|
|
.syntax_grammar
|
|
.external_tokens
|
|
.iter()
|
|
.any(|t| t.corresponding_internal_token == Some(token))
|
|
{
|
|
return false;
|
|
}
|
|
|
|
let is_word_token = self.syntax_grammar.word_token == Some(token);
|
|
let is_keyword = self.keywords.contains(&token);
|
|
|
|
// Do not add a token if it conflicts with an existing token.
|
|
if token.is_terminal() {
|
|
for existing_token in state.terminal_entries.keys() {
|
|
if (is_word_token && self.keywords.contains(existing_token))
|
|
|| is_keyword && self.syntax_grammar.word_token.as_ref() == Some(existing_token)
|
|
{
|
|
continue;
|
|
}
|
|
if self
|
|
.token_conflict_map
|
|
.does_conflict(token.index, existing_token.index)
|
|
|| self
|
|
.token_conflict_map
|
|
.does_match_same_string(token.index, existing_token.index)
|
|
{
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
|
|
true
|
|
}
|
|
|
|
fn remove_unused_states(&mut self) {
|
|
let mut state_usage_map = vec![false; self.parse_table.states.len()];
|
|
|
|
state_usage_map[0] = true;
|
|
state_usage_map[1] = true;
|
|
|
|
for state in &self.parse_table.states {
|
|
for referenced_state in state.referenced_states() {
|
|
state_usage_map[referenced_state] = true;
|
|
}
|
|
}
|
|
let mut removed_predecessor_count = 0;
|
|
let mut state_replacement_map = vec![0; self.parse_table.states.len()];
|
|
for state_id in 0..self.parse_table.states.len() {
|
|
state_replacement_map[state_id] = state_id - removed_predecessor_count;
|
|
if !state_usage_map[state_id] {
|
|
removed_predecessor_count += 1;
|
|
}
|
|
}
|
|
let mut state_id = 0;
|
|
let mut original_state_id = 0;
|
|
while state_id < self.parse_table.states.len() {
|
|
if state_usage_map[original_state_id] {
|
|
self.parse_table.states[state_id].update_referenced_states(|other_state_id, _| {
|
|
state_replacement_map[other_state_id]
|
|
});
|
|
state_id += 1;
|
|
} else {
|
|
self.parse_table.states.remove(state_id);
|
|
}
|
|
original_state_id += 1;
|
|
}
|
|
}
|
|
}
|