Allow non-terminal extras

This commit is contained in:
Max Brunsfeld 2019-10-21 13:31:49 -07:00
parent 49c632ae90
commit fcaabea0cf
8 changed files with 274 additions and 83 deletions

View file

@ -7,7 +7,7 @@ use crate::generate::grammars::{
use crate::generate::node_types::VariableInfo;
use crate::generate::rules::{Associativity, Symbol, SymbolType, TokenSet};
use crate::generate::tables::{
FieldLocation, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry,
FieldLocation, GotoAction, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry,
ProductionInfo, ProductionInfoId,
};
use core::ops::Range;
@ -16,17 +16,19 @@ use std::collections::{BTreeMap, HashMap, HashSet, VecDeque};
use std::fmt::Write;
use std::u32;
// For conflict reporting, each parse state is associated with an example
// sequence of symbols that could lead to that parse state.
type SymbolSequence = Vec<Symbol>;
type AuxiliarySymbolSequence = Vec<AuxiliarySymbolInfo>;
pub(crate) type ParseStateInfo<'a> = (SymbolSequence, ParseItemSet<'a>);
#[derive(Clone)]
struct AuxiliarySymbolInfo {
auxiliary_symbol: Symbol,
parent_symbols: Vec<Symbol>,
}
type SymbolSequence = Vec<Symbol>;
type AuxiliarySymbolSequence = Vec<AuxiliarySymbolInfo>;
pub(crate) type ParseStateInfo<'a> = (SymbolSequence, ParseItemSet<'a>);
struct ParseStateQueueEntry {
state_id: ParseStateId,
preceding_auxiliary_symbols: AuxiliarySymbolSequence,
@ -41,6 +43,7 @@ struct ParseTableBuilder<'a> {
state_ids_by_item_set: HashMap<ParseItemSet<'a>, ParseStateId>,
parse_state_info_by_id: Vec<ParseStateInfo<'a>>,
parse_state_queue: VecDeque<ParseStateQueueEntry>,
non_terminal_extra_states: Vec<(Symbol, usize)>,
parse_table: ParseTable,
}
@ -52,7 +55,7 @@ impl<'a> ParseTableBuilder<'a> {
.push(ProductionInfo::default());
// Add the error state at index 0.
self.add_parse_state(&Vec::new(), &Vec::new(), ParseItemSet::default());
self.add_parse_state(&Vec::new(), &Vec::new(), ParseItemSet::default(), false);
// Add the starting state at index 1.
self.add_parse_state(
@ -66,8 +69,40 @@ impl<'a> ParseTableBuilder<'a> {
.iter()
.cloned(),
),
false,
);
// Compute the possible item sets for non-terminal extras.
let mut non_terminal_extra_item_sets_by_first_terminal = BTreeMap::new();
for extra_non_terminal in self
.syntax_grammar
.extra_tokens
.iter()
.filter(|s| s.is_non_terminal())
{
let variable = &self.syntax_grammar.variables[extra_non_terminal.index];
for production in &variable.productions {
non_terminal_extra_item_sets_by_first_terminal
.entry(production.first_symbol().unwrap())
.or_insert(ParseItemSet::default())
.insert(
ParseItem {
variable_index: extra_non_terminal.index as u32,
production,
step_index: 1,
},
&[Symbol::end()].iter().cloned().collect(),
);
}
}
// Add a state for each starting terminal of a non-terminal extra rule.
for (terminal, item_set) in non_terminal_extra_item_sets_by_first_terminal {
self.non_terminal_extra_states
.push((terminal, self.parse_table.states.len()));
self.add_parse_state(&Vec::new(), &Vec::new(), item_set, true);
}
while let Some(entry) = self.parse_state_queue.pop_front() {
let item_set = self
.item_set_builder
@ -91,9 +126,15 @@ impl<'a> ParseTableBuilder<'a> {
preceding_symbols: &SymbolSequence,
preceding_auxiliary_symbols: &AuxiliarySymbolSequence,
item_set: ParseItemSet<'a>,
is_non_terminal_extra: bool,
) -> ParseStateId {
match self.state_ids_by_item_set.entry(item_set) {
// If an equivalent item set has already been processed, then return
// the existing parse state index.
Entry::Occupied(o) => *o.get(),
// Otherwise, insert a new parse state and add it to the queue of
// parse states to populate.
Entry::Vacant(v) => {
let core = v.key().core();
let core_count = self.core_ids_by_core.len();
@ -116,6 +157,7 @@ impl<'a> ParseTableBuilder<'a> {
terminal_entries: HashMap::new(),
nonterminal_entries: HashMap::new(),
core_id,
is_non_terminal_extra,
});
self.parse_state_queue.push_back(ParseStateQueueEntry {
state_id,
@ -138,7 +180,12 @@ impl<'a> ParseTableBuilder<'a> {
let mut non_terminal_successors = BTreeMap::new();
let mut lookaheads_with_conflicts = TokenSet::new();
// Each item in the item set contributes to either or a Shift action or a Reduce
// action in this state.
for (item, lookaheads) in &item_set.entries {
// If the item is unfinished, then this state has a transition for the item's
// next symbol. Advance the item to its next step and insert the resulting
// item into the successor item set.
if let Some(next_symbol) = item.symbol() {
let successor = item.successor();
if next_symbol.is_non_terminal() {
@ -160,7 +207,10 @@ impl<'a> ParseTableBuilder<'a> {
.or_insert_with(|| ParseItemSet::default())
.insert(successor, lookaheads);
}
} else {
}
// If the item is finished, then add a Reduce action to this state based
// on this item.
else {
let action = if item.is_augmented() {
ParseAction::Accept
} else {
@ -179,6 +229,10 @@ impl<'a> ParseTableBuilder<'a> {
.terminal_entries
.entry(lookahead);
let entry = entry.or_insert_with(|| ParseTableEntry::new());
// While inserting Reduce actions, eagerly resolve conflicts related
// to precedence: avoid inserting lower-precedence reductions, and
// clear the action list when inserting higher-precedence reductions.
if entry.actions.is_empty() {
entry.actions.push(action);
} else if action.precedence() > entry.actions[0].precedence() {
@ -193,12 +247,16 @@ impl<'a> ParseTableBuilder<'a> {
}
}
// Having computed the the successor item sets for each symbol, add a new
// parse state for each of these item sets, and add a corresponding Shift
// action to this state.
for (symbol, next_item_set) in terminal_successors {
preceding_symbols.push(symbol);
let next_state_id = self.add_parse_state(
&preceding_symbols,
&preceding_auxiliary_symbols,
next_item_set,
self.parse_table.states[state_id].is_non_terminal_extra,
);
preceding_symbols.pop();
@ -226,13 +284,19 @@ impl<'a> ParseTableBuilder<'a> {
&preceding_symbols,
&preceding_auxiliary_symbols,
next_item_set,
self.parse_table.states[state_id].is_non_terminal_extra,
);
preceding_symbols.pop();
self.parse_table.states[state_id]
.nonterminal_entries
.insert(symbol, next_state_id);
.insert(symbol, GotoAction::Goto(next_state_id));
}
// For any symbol with multiple actions, perform conflict resolution.
// This will either
// * choose one action over the others using precedence or associativity
// * keep multiple actions if this conflict has been whitelisted in the grammar
// * fail, terminating the parser generation process
for symbol in lookaheads_with_conflicts.iter() {
self.handle_conflict(
&item_set,
@ -243,15 +307,50 @@ impl<'a> ParseTableBuilder<'a> {
)?;
}
// Finally, add actions for the grammar's `extra` symbols.
let state = &mut self.parse_table.states[state_id];
for extra_token in &self.syntax_grammar.extra_tokens {
state
.terminal_entries
.entry(*extra_token)
.or_insert(ParseTableEntry {
reusable: true,
actions: vec![ParseAction::ShiftExtra],
});
let is_non_terminal_extra = state.is_non_terminal_extra;
let is_end_of_non_terminal_extra =
is_non_terminal_extra && state.terminal_entries.len() == 1;
// Add actions for the start tokens of each non-terminal extra rule.
// These actions are added to every state except for the states that are
// alread within non-terminal extras. Non-terminal extras are not allowed
// to nest within each other.
if !is_non_terminal_extra {
for (terminal, state_id) in &self.non_terminal_extra_states {
state
.terminal_entries
.entry(*terminal)
.or_insert(ParseTableEntry {
reusable: true,
actions: vec![ParseAction::Shift {
state: *state_id,
is_repetition: false,
}],
});
}
}
// Add ShiftExtra actions for the terminal extra tokens. These actions
// are added to every state except for those at the ends of non-terminal
// extras.
if !is_end_of_non_terminal_extra {
for extra_token in &self.syntax_grammar.extra_tokens {
if extra_token.is_non_terminal() {
state
.nonterminal_entries
.insert(*extra_token, GotoAction::ShiftExtra);
} else {
state
.terminal_entries
.entry(*extra_token)
.or_insert(ParseTableEntry {
reusable: true,
actions: vec![ParseAction::ShiftExtra],
});
}
}
}
Ok(())
@ -362,8 +461,8 @@ impl<'a> ParseTableBuilder<'a> {
}
}
// If all reduce actions are left associative, remove the SHIFT action.
// If all reduce actions are right associative, remove the REDUCE actions.
// If all Reduce actions are left associative, remove the SHIFT action.
// If all Reduce actions are right associative, remove the REDUCE actions.
match (has_left, has_non, has_right) {
(true, false, false) => {
entry.actions.pop();
@ -774,6 +873,7 @@ pub(crate) fn build_parse_table<'a>(
lexical_grammar,
item_set_builder,
variable_info,
non_terminal_extra_states: Vec::new(),
state_ids_by_item_set: HashMap::new(),
core_ids_by_core: HashMap::new(),
parse_state_info_by_id: Vec::new(),

View file

@ -2,7 +2,9 @@ use super::token_conflicts::TokenConflictMap;
use crate::generate::dedup::split_state_id_groups;
use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar, VariableType};
use crate::generate::rules::{AliasMap, Symbol, TokenSet};
use crate::generate::tables::{ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry};
use crate::generate::tables::{
GotoAction, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry,
};
use log::info;
use std::collections::{HashMap, HashSet};
use std::mem;
@ -101,7 +103,10 @@ impl<'a> Minimizer<'a> {
state.update_referenced_states(|other_state_id, state| {
if let Some(symbol) = unit_reduction_symbols_by_state.get(&other_state_id) {
done = false;
state.nonterminal_entries[symbol]
match state.nonterminal_entries.get(symbol) {
Some(GotoAction::Goto(state_id)) => *state_id,
_ => other_state_id,
}
} else {
other_state_id
}
@ -262,18 +267,24 @@ impl<'a> Minimizer<'a> {
for (symbol, s1) in &state1.nonterminal_entries {
if let Some(s2) = state2.nonterminal_entries.get(symbol) {
let group1 = group_ids_by_state_id[*s1];
let group2 = group_ids_by_state_id[*s2];
if group1 != group2 {
info!(
"split states {} {} - successors for {} are split: {} {}",
state1.id,
state2.id,
self.symbol_name(symbol),
s1,
s2,
);
return true;
match (s1, s2) {
(GotoAction::ShiftExtra, GotoAction::ShiftExtra) => continue,
(GotoAction::Goto(s1), GotoAction::Goto(s2)) => {
let group1 = group_ids_by_state_id[*s1];
let group2 = group_ids_by_state_id[*s2];
if group1 != group2 {
info!(
"split states {} {} - successors for {} are split: {} {}",
state1.id,
state2.id,
self.symbol_name(symbol),
s1,
s2,
);
return true;
}
}
_ => return true,
}
}
}