2019-06-06 10:47:19 -07:00
|
|
|
use super::item::{ParseItem, ParseItemSet, ParseItemSetCore, TokenSet};
|
2018-12-29 13:57:34 -08:00
|
|
|
use super::item_set_builder::ParseItemSetBuilder;
|
|
|
|
|
use crate::error::{Error, Result};
|
2019-01-11 13:30:45 -08:00
|
|
|
use crate::generate::grammars::{
|
|
|
|
|
InlinedProductionMap, LexicalGrammar, SyntaxGrammar, VariableType,
|
|
|
|
|
};
|
2019-03-27 16:17:02 -07:00
|
|
|
use crate::generate::node_types::VariableInfo;
|
2019-02-07 12:29:20 -08:00
|
|
|
use crate::generate::rules::{Associativity, Symbol, SymbolType};
|
2019-01-07 10:23:01 -08:00
|
|
|
use crate::generate::tables::{
|
2019-03-27 16:17:02 -07:00
|
|
|
FieldLocation, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry,
|
|
|
|
|
ProductionInfo, ProductionInfoId,
|
2018-12-29 13:57:34 -08:00
|
|
|
};
|
|
|
|
|
use core::ops::Range;
|
2019-08-13 10:08:58 -07:00
|
|
|
use std::collections::hash_map::Entry;
|
|
|
|
|
use std::collections::{BTreeMap, HashMap, HashSet, VecDeque};
|
2018-12-29 13:57:34 -08:00
|
|
|
use std::fmt::Write;
|
2019-03-27 16:17:02 -07:00
|
|
|
use std::u32;
|
2018-12-29 13:57:34 -08:00
|
|
|
|
|
|
|
|
#[derive(Clone)]
|
|
|
|
|
struct AuxiliarySymbolInfo {
|
|
|
|
|
auxiliary_symbol: Symbol,
|
|
|
|
|
parent_symbols: Vec<Symbol>,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
type SymbolSequence = Vec<Symbol>;
|
|
|
|
|
type AuxiliarySymbolSequence = Vec<AuxiliarySymbolInfo>;
|
|
|
|
|
|
|
|
|
|
struct ParseStateQueueEntry {
|
|
|
|
|
preceding_symbols: SymbolSequence,
|
|
|
|
|
preceding_auxiliary_symbols: AuxiliarySymbolSequence,
|
|
|
|
|
state_id: ParseStateId,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct ParseTableBuilder<'a> {
|
|
|
|
|
item_set_builder: ParseItemSetBuilder<'a>,
|
|
|
|
|
syntax_grammar: &'a SyntaxGrammar,
|
|
|
|
|
lexical_grammar: &'a LexicalGrammar,
|
2019-03-27 16:17:02 -07:00
|
|
|
variable_info: &'a Vec<VariableInfo>,
|
2019-06-06 10:47:19 -07:00
|
|
|
core_ids_by_core: HashMap<ParseItemSetCore<'a>, usize>,
|
2018-12-29 13:57:34 -08:00
|
|
|
state_ids_by_item_set: HashMap<ParseItemSet<'a>, ParseStateId>,
|
|
|
|
|
item_sets_by_state_id: Vec<ParseItemSet<'a>>,
|
|
|
|
|
parse_state_queue: VecDeque<ParseStateQueueEntry>,
|
|
|
|
|
parse_table: ParseTable,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl<'a> ParseTableBuilder<'a> {
|
2019-01-18 15:13:13 -08:00
|
|
|
fn build(mut self) -> Result<ParseTable> {
|
2018-12-29 13:57:34 -08:00
|
|
|
// Ensure that the empty alias sequence has index 0.
|
2019-03-08 06:20:07 -05:00
|
|
|
self.parse_table
|
|
|
|
|
.production_infos
|
|
|
|
|
.push(ProductionInfo::default());
|
2018-12-29 13:57:34 -08:00
|
|
|
|
2019-01-02 16:48:44 -08:00
|
|
|
// Add the error state at index 0.
|
2019-01-01 13:47:29 -08:00
|
|
|
self.add_parse_state(&Vec::new(), &Vec::new(), ParseItemSet::default());
|
2018-12-29 13:57:34 -08:00
|
|
|
|
2019-01-02 16:48:44 -08:00
|
|
|
// Add the starting state at index 1.
|
2018-12-29 13:57:34 -08:00
|
|
|
self.add_parse_state(
|
|
|
|
|
&Vec::new(),
|
|
|
|
|
&Vec::new(),
|
|
|
|
|
ParseItemSet::with(
|
2019-01-02 12:34:40 -08:00
|
|
|
[(
|
|
|
|
|
ParseItem::start(),
|
2019-01-07 08:39:47 -08:00
|
|
|
[Symbol::end()].iter().cloned().collect(),
|
2019-01-02 12:34:40 -08:00
|
|
|
)]
|
|
|
|
|
.iter()
|
|
|
|
|
.cloned(),
|
2018-12-29 13:57:34 -08:00
|
|
|
),
|
|
|
|
|
);
|
|
|
|
|
|
2019-01-03 16:35:16 -08:00
|
|
|
while let Some(entry) = self.parse_state_queue.pop_front() {
|
|
|
|
|
let item_set = self
|
|
|
|
|
.item_set_builder
|
|
|
|
|
.transitive_closure(&self.item_sets_by_state_id[entry.state_id]);
|
|
|
|
|
self.add_actions(
|
|
|
|
|
entry.preceding_symbols,
|
|
|
|
|
entry.preceding_auxiliary_symbols,
|
|
|
|
|
entry.state_id,
|
|
|
|
|
item_set,
|
|
|
|
|
)?;
|
|
|
|
|
}
|
|
|
|
|
|
2019-01-02 16:48:44 -08:00
|
|
|
self.remove_precedences();
|
|
|
|
|
|
2019-01-18 15:13:13 -08:00
|
|
|
Ok(self.parse_table)
|
2018-12-29 13:57:34 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn add_parse_state(
|
|
|
|
|
&mut self,
|
|
|
|
|
preceding_symbols: &SymbolSequence,
|
|
|
|
|
preceding_auxiliary_symbols: &AuxiliarySymbolSequence,
|
|
|
|
|
item_set: ParseItemSet<'a>,
|
|
|
|
|
) -> ParseStateId {
|
|
|
|
|
match self.state_ids_by_item_set.entry(item_set) {
|
|
|
|
|
Entry::Occupied(o) => *o.get(),
|
|
|
|
|
Entry::Vacant(v) => {
|
2019-06-06 10:47:19 -07:00
|
|
|
let core = v.key().core();
|
|
|
|
|
let core_count = self.core_ids_by_core.len();
|
|
|
|
|
let core_id = match self.core_ids_by_core.entry(core) {
|
|
|
|
|
Entry::Occupied(e) => *e.get(),
|
|
|
|
|
Entry::Vacant(e) => {
|
|
|
|
|
e.insert(core_count);
|
|
|
|
|
core_count
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
2018-12-29 13:57:34 -08:00
|
|
|
let state_id = self.parse_table.states.len();
|
|
|
|
|
self.item_sets_by_state_id.push(v.key().clone());
|
|
|
|
|
self.parse_table.states.push(ParseState {
|
2019-06-06 10:47:19 -07:00
|
|
|
id: state_id,
|
2018-12-29 13:57:34 -08:00
|
|
|
lex_state_id: 0,
|
|
|
|
|
terminal_entries: HashMap::new(),
|
|
|
|
|
nonterminal_entries: HashMap::new(),
|
2019-06-06 10:47:19 -07:00
|
|
|
core_id,
|
2018-12-29 13:57:34 -08:00
|
|
|
});
|
|
|
|
|
self.parse_state_queue.push_back(ParseStateQueueEntry {
|
|
|
|
|
state_id,
|
|
|
|
|
preceding_symbols: preceding_symbols.clone(),
|
|
|
|
|
preceding_auxiliary_symbols: preceding_auxiliary_symbols.clone(),
|
|
|
|
|
});
|
|
|
|
|
v.insert(state_id);
|
|
|
|
|
state_id
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn add_actions(
|
|
|
|
|
&mut self,
|
|
|
|
|
mut preceding_symbols: SymbolSequence,
|
|
|
|
|
mut preceding_auxiliary_symbols: Vec<AuxiliarySymbolInfo>,
|
|
|
|
|
state_id: ParseStateId,
|
2019-01-03 16:35:16 -08:00
|
|
|
item_set: ParseItemSet<'a>,
|
2018-12-29 13:57:34 -08:00
|
|
|
) -> Result<()> {
|
|
|
|
|
let mut terminal_successors = HashMap::new();
|
|
|
|
|
let mut non_terminal_successors = HashMap::new();
|
|
|
|
|
let mut lookaheads_with_conflicts = HashSet::new();
|
|
|
|
|
|
|
|
|
|
for (item, lookaheads) in &item_set.entries {
|
|
|
|
|
if let Some(next_symbol) = item.symbol() {
|
|
|
|
|
let successor = item.successor();
|
|
|
|
|
if next_symbol.is_non_terminal() {
|
|
|
|
|
// Keep track of where auxiliary non-terminals (repeat symbols) are
|
|
|
|
|
// used within visible symbols. This information may be needed later
|
|
|
|
|
// for conflict resolution.
|
|
|
|
|
if self.syntax_grammar.variables[next_symbol.index].is_auxiliary() {
|
|
|
|
|
preceding_auxiliary_symbols
|
|
|
|
|
.push(self.get_auxiliary_node_info(&item_set, next_symbol));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
non_terminal_successors
|
|
|
|
|
.entry(next_symbol)
|
|
|
|
|
.or_insert_with(|| ParseItemSet::default())
|
2019-01-15 16:37:54 -08:00
|
|
|
.insert(successor, lookaheads);
|
2018-12-29 13:57:34 -08:00
|
|
|
} else {
|
|
|
|
|
terminal_successors
|
|
|
|
|
.entry(next_symbol)
|
|
|
|
|
.or_insert_with(|| ParseItemSet::default())
|
2019-01-15 16:37:54 -08:00
|
|
|
.insert(successor, lookaheads);
|
2018-12-29 13:57:34 -08:00
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
let action = if item.is_augmented() {
|
|
|
|
|
ParseAction::Accept
|
|
|
|
|
} else {
|
|
|
|
|
ParseAction::Reduce {
|
|
|
|
|
symbol: Symbol::non_terminal(item.variable_index as usize),
|
|
|
|
|
child_count: item.step_index as usize,
|
|
|
|
|
precedence: item.precedence(),
|
|
|
|
|
associativity: item.associativity(),
|
|
|
|
|
dynamic_precedence: item.production.dynamic_precedence,
|
2019-02-12 11:06:18 -08:00
|
|
|
production_id: self.get_production_id(item),
|
2018-12-29 13:57:34 -08:00
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
for lookahead in lookaheads.iter() {
|
|
|
|
|
let entry = self.parse_table.states[state_id]
|
|
|
|
|
.terminal_entries
|
|
|
|
|
.entry(lookahead);
|
|
|
|
|
let entry = entry.or_insert_with(|| ParseTableEntry::new());
|
|
|
|
|
if entry.actions.is_empty() {
|
|
|
|
|
entry.actions.push(action);
|
|
|
|
|
} else if action.precedence() > entry.actions[0].precedence() {
|
|
|
|
|
entry.actions.clear();
|
|
|
|
|
entry.actions.push(action);
|
|
|
|
|
lookaheads_with_conflicts.remove(&lookahead);
|
|
|
|
|
} else if action.precedence() == entry.actions[0].precedence() {
|
|
|
|
|
entry.actions.push(action);
|
|
|
|
|
lookaheads_with_conflicts.insert(lookahead);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (symbol, next_item_set) in terminal_successors {
|
|
|
|
|
preceding_symbols.push(symbol);
|
|
|
|
|
let next_state_id = self.add_parse_state(
|
|
|
|
|
&preceding_symbols,
|
|
|
|
|
&preceding_auxiliary_symbols,
|
|
|
|
|
next_item_set,
|
|
|
|
|
);
|
|
|
|
|
preceding_symbols.pop();
|
|
|
|
|
|
|
|
|
|
let entry = self.parse_table.states[state_id]
|
|
|
|
|
.terminal_entries
|
|
|
|
|
.entry(symbol);
|
|
|
|
|
if let Entry::Occupied(e) = &entry {
|
|
|
|
|
if !e.get().actions.is_empty() {
|
|
|
|
|
lookaheads_with_conflicts.insert(symbol);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
entry
|
|
|
|
|
.or_insert_with(|| ParseTableEntry::new())
|
|
|
|
|
.actions
|
|
|
|
|
.push(ParseAction::Shift {
|
|
|
|
|
state: next_state_id,
|
|
|
|
|
is_repetition: false,
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (symbol, next_item_set) in non_terminal_successors {
|
|
|
|
|
preceding_symbols.push(symbol);
|
|
|
|
|
let next_state_id = self.add_parse_state(
|
|
|
|
|
&preceding_symbols,
|
|
|
|
|
&preceding_auxiliary_symbols,
|
|
|
|
|
next_item_set,
|
|
|
|
|
);
|
|
|
|
|
preceding_symbols.pop();
|
|
|
|
|
self.parse_table.states[state_id]
|
|
|
|
|
.nonterminal_entries
|
|
|
|
|
.insert(symbol, next_state_id);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for symbol in lookaheads_with_conflicts {
|
|
|
|
|
self.handle_conflict(
|
|
|
|
|
&item_set,
|
|
|
|
|
state_id,
|
|
|
|
|
&preceding_symbols,
|
|
|
|
|
&preceding_auxiliary_symbols,
|
|
|
|
|
symbol,
|
|
|
|
|
)?;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let state = &mut self.parse_table.states[state_id];
|
|
|
|
|
for extra_token in &self.syntax_grammar.extra_tokens {
|
|
|
|
|
state
|
|
|
|
|
.terminal_entries
|
|
|
|
|
.entry(*extra_token)
|
|
|
|
|
.or_insert(ParseTableEntry {
|
|
|
|
|
reusable: true,
|
|
|
|
|
actions: vec![ParseAction::ShiftExtra],
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn handle_conflict(
|
|
|
|
|
&mut self,
|
|
|
|
|
item_set: &ParseItemSet,
|
|
|
|
|
state_id: ParseStateId,
|
|
|
|
|
preceding_symbols: &SymbolSequence,
|
|
|
|
|
preceding_auxiliary_symbols: &Vec<AuxiliarySymbolInfo>,
|
|
|
|
|
conflicting_lookahead: Symbol,
|
|
|
|
|
) -> Result<()> {
|
|
|
|
|
let entry = self.parse_table.states[state_id]
|
|
|
|
|
.terminal_entries
|
|
|
|
|
.get_mut(&conflicting_lookahead)
|
|
|
|
|
.unwrap();
|
|
|
|
|
|
|
|
|
|
// Determine which items in the set conflict with each other, and the
|
|
|
|
|
// precedences associated with SHIFT vs REDUCE actions. There won't
|
|
|
|
|
// be multiple REDUCE actions with different precedences; that is
|
|
|
|
|
// sorted out ahead of time in `add_actions`. But there can still be
|
|
|
|
|
// REDUCE-REDUCE conflicts where all actions have the *same*
|
|
|
|
|
// precedence, and there can still be SHIFT/REDUCE conflicts.
|
|
|
|
|
let reduce_precedence = entry.actions[0].precedence();
|
|
|
|
|
let mut considered_associativity = false;
|
|
|
|
|
let mut shift_precedence: Option<Range<i32>> = None;
|
|
|
|
|
let mut conflicting_items = HashSet::new();
|
|
|
|
|
for (item, lookaheads) in &item_set.entries {
|
|
|
|
|
if let Some(step) = item.step() {
|
|
|
|
|
if item.step_index > 0 {
|
|
|
|
|
if self
|
|
|
|
|
.item_set_builder
|
|
|
|
|
.first_set(&step.symbol)
|
|
|
|
|
.contains(&conflicting_lookahead)
|
|
|
|
|
{
|
2019-01-11 13:30:45 -08:00
|
|
|
if item.variable_index != u32::MAX {
|
|
|
|
|
conflicting_items.insert(item);
|
|
|
|
|
}
|
|
|
|
|
|
2018-12-29 13:57:34 -08:00
|
|
|
let precedence = item.precedence();
|
|
|
|
|
if let Some(range) = &mut shift_precedence {
|
|
|
|
|
if precedence < range.start {
|
|
|
|
|
range.start = precedence;
|
|
|
|
|
} else if precedence > range.end {
|
|
|
|
|
range.end = precedence;
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
shift_precedence = Some(precedence..precedence);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
} else if lookaheads.contains(&conflicting_lookahead) {
|
2019-01-11 13:30:45 -08:00
|
|
|
if item.variable_index != u32::MAX {
|
|
|
|
|
conflicting_items.insert(item);
|
|
|
|
|
}
|
2018-12-29 13:57:34 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if let ParseAction::Shift { is_repetition, .. } = entry.actions.last_mut().unwrap() {
|
|
|
|
|
let shift_precedence = shift_precedence.unwrap_or(0..0);
|
|
|
|
|
|
|
|
|
|
// If all of the items in the conflict have the same parent symbol,
|
|
|
|
|
// and that parent symbols is auxiliary, then this is just the intentional
|
|
|
|
|
// ambiguity associated with a repeat rule. Resolve that class of ambiguity
|
|
|
|
|
// by leaving it in the parse table, but marking the SHIFT action with
|
|
|
|
|
// an `is_repetition` flag.
|
|
|
|
|
let conflicting_variable_index =
|
|
|
|
|
conflicting_items.iter().next().unwrap().variable_index;
|
2019-05-15 11:25:10 -07:00
|
|
|
if self.syntax_grammar.variables[conflicting_variable_index as usize].is_auxiliary()
|
|
|
|
|
&& conflicting_items
|
2018-12-29 13:57:34 -08:00
|
|
|
.iter()
|
|
|
|
|
.all(|item| item.variable_index == conflicting_variable_index)
|
2019-05-15 11:25:10 -07:00
|
|
|
{
|
|
|
|
|
*is_repetition = true;
|
|
|
|
|
return Ok(());
|
2018-12-29 13:57:34 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// If the SHIFT action has higher precedence, remove all the REDUCE actions.
|
|
|
|
|
if shift_precedence.start > reduce_precedence
|
|
|
|
|
|| (shift_precedence.start == reduce_precedence
|
|
|
|
|
&& shift_precedence.end > reduce_precedence)
|
|
|
|
|
{
|
|
|
|
|
entry.actions.drain(0..entry.actions.len() - 1);
|
|
|
|
|
}
|
|
|
|
|
// If the REDUCE actions have higher precedence, remove the SHIFT action.
|
|
|
|
|
else if shift_precedence.end < reduce_precedence
|
|
|
|
|
|| (shift_precedence.end == reduce_precedence
|
|
|
|
|
&& shift_precedence.start < reduce_precedence)
|
|
|
|
|
{
|
|
|
|
|
entry.actions.pop();
|
|
|
|
|
conflicting_items.retain(|item| item.is_done());
|
|
|
|
|
}
|
|
|
|
|
// If the SHIFT and REDUCE actions have the same predence, consider
|
|
|
|
|
// the REDUCE actions' associativity.
|
|
|
|
|
else if shift_precedence == (reduce_precedence..reduce_precedence) {
|
|
|
|
|
considered_associativity = true;
|
|
|
|
|
let mut has_left = false;
|
|
|
|
|
let mut has_right = false;
|
|
|
|
|
let mut has_non = false;
|
|
|
|
|
for action in &entry.actions {
|
|
|
|
|
if let ParseAction::Reduce { associativity, .. } = action {
|
|
|
|
|
match associativity {
|
|
|
|
|
Some(Associativity::Left) => has_left = true,
|
|
|
|
|
Some(Associativity::Right) => has_right = true,
|
|
|
|
|
None => has_non = true,
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// If all reduce actions are left associative, remove the SHIFT action.
|
|
|
|
|
// If all reduce actions are right associative, remove the REDUCE actions.
|
|
|
|
|
match (has_left, has_non, has_right) {
|
|
|
|
|
(true, false, false) => {
|
|
|
|
|
entry.actions.pop();
|
|
|
|
|
conflicting_items.retain(|item| item.is_done());
|
|
|
|
|
}
|
|
|
|
|
(false, false, true) => {
|
|
|
|
|
entry.actions.drain(0..entry.actions.len() - 1);
|
|
|
|
|
}
|
|
|
|
|
_ => {}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// If all of the actions but one have been eliminated, then there's no problem.
|
|
|
|
|
let entry = self.parse_table.states[state_id]
|
|
|
|
|
.terminal_entries
|
|
|
|
|
.get_mut(&conflicting_lookahead)
|
|
|
|
|
.unwrap();
|
|
|
|
|
if entry.actions.len() == 1 {
|
|
|
|
|
return Ok(());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Determine the set of parent symbols involved in this conflict.
|
|
|
|
|
let mut actual_conflict = Vec::new();
|
|
|
|
|
for item in &conflicting_items {
|
|
|
|
|
let symbol = Symbol::non_terminal(item.variable_index as usize);
|
|
|
|
|
if self.syntax_grammar.variables[symbol.index].is_auxiliary() {
|
|
|
|
|
actual_conflict.extend(
|
|
|
|
|
preceding_auxiliary_symbols
|
|
|
|
|
.iter()
|
|
|
|
|
.rev()
|
|
|
|
|
.find_map(|info| {
|
|
|
|
|
if info.auxiliary_symbol == symbol {
|
|
|
|
|
Some(&info.parent_symbols)
|
|
|
|
|
} else {
|
|
|
|
|
None
|
|
|
|
|
}
|
|
|
|
|
})
|
|
|
|
|
.unwrap()
|
|
|
|
|
.iter(),
|
|
|
|
|
);
|
|
|
|
|
} else {
|
|
|
|
|
actual_conflict.push(symbol);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
actual_conflict.sort_unstable();
|
|
|
|
|
actual_conflict.dedup();
|
|
|
|
|
|
|
|
|
|
// If this set of symbols has been whitelisted, then there's no error.
|
|
|
|
|
if self
|
|
|
|
|
.syntax_grammar
|
|
|
|
|
.expected_conflicts
|
|
|
|
|
.contains(&actual_conflict)
|
|
|
|
|
{
|
|
|
|
|
return Ok(());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let mut msg = "Unresolved conflict for symbol sequence:\n\n".to_string();
|
|
|
|
|
for symbol in preceding_symbols {
|
|
|
|
|
write!(&mut msg, " {}", self.symbol_name(symbol)).unwrap();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
write!(
|
|
|
|
|
&mut msg,
|
|
|
|
|
" • {} …\n\n",
|
|
|
|
|
self.symbol_name(&conflicting_lookahead)
|
|
|
|
|
)
|
|
|
|
|
.unwrap();
|
2019-01-04 12:42:45 -08:00
|
|
|
write!(&mut msg, "Possible interpretations:\n\n").unwrap();
|
2019-01-11 17:26:45 -08:00
|
|
|
|
2019-02-08 17:11:44 -08:00
|
|
|
let mut interpretions = conflicting_items
|
2019-01-17 17:16:04 -08:00
|
|
|
.iter()
|
2019-02-08 17:11:44 -08:00
|
|
|
.map(|item| {
|
2019-01-17 17:16:04 -08:00
|
|
|
let mut line = String::new();
|
|
|
|
|
for preceding_symbol in preceding_symbols
|
|
|
|
|
.iter()
|
|
|
|
|
.take(preceding_symbols.len() - item.step_index as usize)
|
|
|
|
|
{
|
|
|
|
|
write!(&mut line, " {}", self.symbol_name(preceding_symbol)).unwrap();
|
2018-12-29 13:57:34 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
write!(
|
2019-01-11 17:26:45 -08:00
|
|
|
&mut line,
|
2019-01-17 17:16:04 -08:00
|
|
|
" ({}",
|
|
|
|
|
&self.syntax_grammar.variables[item.variable_index as usize].name
|
2018-12-29 13:57:34 -08:00
|
|
|
)
|
|
|
|
|
.unwrap();
|
|
|
|
|
|
2019-01-17 17:16:04 -08:00
|
|
|
for (j, step) in item.production.steps.iter().enumerate() {
|
|
|
|
|
if j as u32 == item.step_index {
|
|
|
|
|
write!(&mut line, " •").unwrap();
|
|
|
|
|
}
|
|
|
|
|
write!(&mut line, " {}", self.symbol_name(&step.symbol)).unwrap();
|
|
|
|
|
}
|
2019-01-11 17:26:45 -08:00
|
|
|
|
2019-01-17 17:16:04 -08:00
|
|
|
write!(&mut line, ")").unwrap();
|
|
|
|
|
|
|
|
|
|
if item.is_done() {
|
|
|
|
|
write!(
|
|
|
|
|
&mut line,
|
|
|
|
|
" • {} …",
|
|
|
|
|
self.symbol_name(&conflicting_lookahead)
|
|
|
|
|
)
|
|
|
|
|
.unwrap();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let precedence = item.precedence();
|
|
|
|
|
let associativity = item.associativity();
|
2019-01-04 12:42:45 -08:00
|
|
|
|
2019-01-17 17:16:04 -08:00
|
|
|
let prec_line = if let Some(associativity) = associativity {
|
|
|
|
|
Some(format!(
|
|
|
|
|
"(precedence: {}, associativity: {:?})",
|
|
|
|
|
precedence, associativity
|
|
|
|
|
))
|
|
|
|
|
} else if precedence > 0 {
|
|
|
|
|
Some(format!("(precedence: {})", precedence))
|
|
|
|
|
} else {
|
|
|
|
|
None
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
(line, prec_line)
|
|
|
|
|
})
|
|
|
|
|
.collect::<Vec<_>>();
|
2019-01-11 17:26:45 -08:00
|
|
|
|
2019-01-17 17:16:04 -08:00
|
|
|
let max_interpretation_length = interpretions
|
|
|
|
|
.iter()
|
|
|
|
|
.map(|i| i.0.chars().count())
|
|
|
|
|
.max()
|
|
|
|
|
.unwrap();
|
2019-02-08 17:11:44 -08:00
|
|
|
interpretions.sort_unstable();
|
|
|
|
|
for (i, (line, prec_suffix)) in interpretions.into_iter().enumerate() {
|
|
|
|
|
write!(&mut msg, " {}:", i + 1).unwrap();
|
2019-01-11 17:26:45 -08:00
|
|
|
msg += &line;
|
|
|
|
|
if let Some(prec_suffix) = prec_suffix {
|
|
|
|
|
for _ in line.chars().count()..max_interpretation_length {
|
|
|
|
|
msg.push(' ');
|
|
|
|
|
}
|
|
|
|
|
msg += " ";
|
|
|
|
|
msg += &prec_suffix;
|
|
|
|
|
}
|
|
|
|
|
msg.push('\n');
|
2019-01-04 12:42:45 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let mut resolution_count = 0;
|
|
|
|
|
write!(&mut msg, "\nPossible resolutions:\n\n").unwrap();
|
2019-03-29 17:36:19 -07:00
|
|
|
let mut shift_items = Vec::new();
|
|
|
|
|
let mut reduce_items = Vec::new();
|
|
|
|
|
for item in conflicting_items {
|
|
|
|
|
if item.is_done() {
|
|
|
|
|
reduce_items.push(item);
|
|
|
|
|
} else {
|
|
|
|
|
shift_items.push(item);
|
|
|
|
|
}
|
|
|
|
|
}
|
2019-02-08 09:39:00 -08:00
|
|
|
shift_items.sort_unstable();
|
2019-03-29 17:36:19 -07:00
|
|
|
reduce_items.sort_unstable();
|
2019-01-11 17:26:45 -08:00
|
|
|
if actual_conflict.len() > 1 {
|
|
|
|
|
if shift_items.len() > 0 {
|
|
|
|
|
resolution_count += 1;
|
2019-01-04 12:42:45 -08:00
|
|
|
write!(
|
|
|
|
|
&mut msg,
|
2019-01-11 17:26:45 -08:00
|
|
|
" {}: Specify a higher precedence in",
|
|
|
|
|
resolution_count
|
2019-01-04 12:42:45 -08:00
|
|
|
)
|
|
|
|
|
.unwrap();
|
2019-01-11 17:26:45 -08:00
|
|
|
for (i, item) in shift_items.iter().enumerate() {
|
|
|
|
|
if i > 0 {
|
|
|
|
|
write!(&mut msg, " and").unwrap();
|
|
|
|
|
}
|
|
|
|
|
write!(
|
|
|
|
|
&mut msg,
|
|
|
|
|
" `{}`",
|
|
|
|
|
self.symbol_name(&Symbol::non_terminal(item.variable_index as usize))
|
|
|
|
|
)
|
|
|
|
|
.unwrap();
|
|
|
|
|
}
|
|
|
|
|
write!(&mut msg, " than in the other rules.\n").unwrap();
|
|
|
|
|
}
|
|
|
|
|
|
2019-03-29 17:36:19 -07:00
|
|
|
for item in &reduce_items {
|
|
|
|
|
resolution_count += 1;
|
|
|
|
|
write!(
|
|
|
|
|
&mut msg,
|
|
|
|
|
" {}: Specify a higher precedence in `{}` than in the other rules.\n",
|
|
|
|
|
resolution_count,
|
|
|
|
|
self.symbol_name(&Symbol::non_terminal(item.variable_index as usize))
|
|
|
|
|
)
|
|
|
|
|
.unwrap();
|
2019-01-04 12:42:45 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if considered_associativity {
|
|
|
|
|
resolution_count += 1;
|
|
|
|
|
write!(
|
|
|
|
|
&mut msg,
|
|
|
|
|
" {}: Specify a left or right associativity in ",
|
|
|
|
|
resolution_count
|
|
|
|
|
)
|
|
|
|
|
.unwrap();
|
2019-03-29 17:36:19 -07:00
|
|
|
for (i, item) in reduce_items.iter().enumerate() {
|
2019-01-04 12:42:45 -08:00
|
|
|
if i > 0 {
|
|
|
|
|
write!(&mut msg, " and ").unwrap();
|
|
|
|
|
}
|
|
|
|
|
write!(
|
|
|
|
|
&mut msg,
|
2019-01-11 17:26:45 -08:00
|
|
|
"`{}`",
|
2019-01-04 12:42:45 -08:00
|
|
|
self.symbol_name(&Symbol::non_terminal(item.variable_index as usize))
|
|
|
|
|
)
|
|
|
|
|
.unwrap();
|
|
|
|
|
}
|
2019-01-04 15:26:48 -08:00
|
|
|
write!(&mut msg, "\n").unwrap();
|
2018-12-29 13:57:34 -08:00
|
|
|
}
|
|
|
|
|
|
2019-01-04 12:42:45 -08:00
|
|
|
resolution_count += 1;
|
|
|
|
|
write!(
|
|
|
|
|
&mut msg,
|
2019-01-04 15:26:48 -08:00
|
|
|
" {}: Add a conflict for these rules: ",
|
2019-01-04 12:42:45 -08:00
|
|
|
resolution_count
|
|
|
|
|
)
|
|
|
|
|
.unwrap();
|
|
|
|
|
for (i, symbol) in actual_conflict.iter().enumerate() {
|
|
|
|
|
if i > 0 {
|
|
|
|
|
write!(&mut msg, ", ").unwrap();
|
|
|
|
|
}
|
2019-01-11 17:26:45 -08:00
|
|
|
write!(&mut msg, "`{}`", self.symbol_name(symbol)).unwrap();
|
2019-01-04 12:42:45 -08:00
|
|
|
}
|
|
|
|
|
write!(&mut msg, "\n").unwrap();
|
2018-12-29 13:57:34 -08:00
|
|
|
|
2019-05-30 16:52:30 -07:00
|
|
|
Err(Error::new(msg))
|
2018-12-29 13:57:34 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn get_auxiliary_node_info(
|
|
|
|
|
&self,
|
|
|
|
|
item_set: &ParseItemSet,
|
|
|
|
|
symbol: Symbol,
|
|
|
|
|
) -> AuxiliarySymbolInfo {
|
|
|
|
|
let parent_symbols = item_set
|
|
|
|
|
.entries
|
2019-01-15 16:37:54 -08:00
|
|
|
.iter()
|
|
|
|
|
.filter_map(|(item, _)| {
|
2019-01-04 12:42:45 -08:00
|
|
|
let variable_index = item.variable_index as usize;
|
|
|
|
|
if item.symbol() == Some(symbol)
|
|
|
|
|
&& !self.syntax_grammar.variables[variable_index].is_auxiliary()
|
|
|
|
|
{
|
|
|
|
|
Some(Symbol::non_terminal(variable_index))
|
2018-12-29 13:57:34 -08:00
|
|
|
} else {
|
|
|
|
|
None
|
|
|
|
|
}
|
|
|
|
|
})
|
|
|
|
|
.collect();
|
|
|
|
|
AuxiliarySymbolInfo {
|
|
|
|
|
auxiliary_symbol: symbol,
|
|
|
|
|
parent_symbols,
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2019-01-02 16:48:44 -08:00
|
|
|
fn remove_precedences(&mut self) {
|
|
|
|
|
for state in self.parse_table.states.iter_mut() {
|
|
|
|
|
for (_, entry) in state.terminal_entries.iter_mut() {
|
|
|
|
|
for action in entry.actions.iter_mut() {
|
|
|
|
|
match action {
|
|
|
|
|
ParseAction::Reduce {
|
|
|
|
|
precedence,
|
|
|
|
|
associativity,
|
|
|
|
|
..
|
|
|
|
|
} => {
|
|
|
|
|
*precedence = 0;
|
|
|
|
|
*associativity = None;
|
|
|
|
|
}
|
|
|
|
|
_ => {}
|
|
|
|
|
}
|
|
|
|
|
}
|
2018-12-29 13:57:34 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2019-02-12 11:06:18 -08:00
|
|
|
fn get_production_id(&mut self, item: &ParseItem) -> ProductionInfoId {
|
|
|
|
|
let mut production_info = ProductionInfo {
|
2019-02-07 17:18:33 -08:00
|
|
|
alias_sequence: Vec::new(),
|
|
|
|
|
field_map: BTreeMap::new(),
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
for (i, step) in item.production.steps.iter().enumerate() {
|
2019-02-12 11:06:18 -08:00
|
|
|
production_info.alias_sequence.push(step.alias.clone());
|
2019-02-07 17:18:33 -08:00
|
|
|
if let Some(field_name) = &step.field_name {
|
2019-02-12 11:06:18 -08:00
|
|
|
production_info
|
2019-02-07 17:18:33 -08:00
|
|
|
.field_map
|
|
|
|
|
.entry(field_name.clone())
|
|
|
|
|
.or_insert(Vec::new())
|
|
|
|
|
.push(FieldLocation {
|
|
|
|
|
index: i,
|
|
|
|
|
inherited: false,
|
|
|
|
|
});
|
|
|
|
|
}
|
2019-02-12 11:06:18 -08:00
|
|
|
|
|
|
|
|
if step.symbol.kind == SymbolType::NonTerminal
|
|
|
|
|
&& !self.syntax_grammar.variables[step.symbol.index]
|
|
|
|
|
.kind
|
|
|
|
|
.is_visible()
|
|
|
|
|
{
|
2019-03-27 16:17:02 -07:00
|
|
|
let info = &self.variable_info[step.symbol.index];
|
2019-02-12 11:06:18 -08:00
|
|
|
for (field_name, _) in &info.fields {
|
|
|
|
|
production_info
|
2019-02-07 17:18:33 -08:00
|
|
|
.field_map
|
|
|
|
|
.entry(field_name.clone())
|
|
|
|
|
.or_insert(Vec::new())
|
|
|
|
|
.push(FieldLocation {
|
|
|
|
|
index: i,
|
|
|
|
|
inherited: true,
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2019-02-12 11:06:18 -08:00
|
|
|
while production_info.alias_sequence.last() == Some(&None) {
|
|
|
|
|
production_info.alias_sequence.pop();
|
2018-12-29 13:57:34 -08:00
|
|
|
}
|
2019-02-07 17:18:33 -08:00
|
|
|
|
2019-02-08 16:14:18 -08:00
|
|
|
if item.production.steps.len() > self.parse_table.max_aliased_production_length {
|
|
|
|
|
self.parse_table.max_aliased_production_length = item.production.steps.len()
|
2019-01-04 13:01:07 -08:00
|
|
|
}
|
2019-02-07 17:18:33 -08:00
|
|
|
|
2018-12-29 13:57:34 -08:00
|
|
|
if let Some(index) = self
|
|
|
|
|
.parse_table
|
2019-02-12 11:06:18 -08:00
|
|
|
.production_infos
|
2018-12-29 13:57:34 -08:00
|
|
|
.iter()
|
2019-02-12 11:06:18 -08:00
|
|
|
.position(|seq| *seq == production_info)
|
2018-12-29 13:57:34 -08:00
|
|
|
{
|
|
|
|
|
index
|
|
|
|
|
} else {
|
2019-02-12 11:06:18 -08:00
|
|
|
self.parse_table.production_infos.push(production_info);
|
|
|
|
|
self.parse_table.production_infos.len() - 1
|
2018-12-29 13:57:34 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn symbol_name(&self, symbol: &Symbol) -> String {
|
|
|
|
|
match symbol.kind {
|
|
|
|
|
SymbolType::End => "EOF".to_string(),
|
|
|
|
|
SymbolType::External => self.syntax_grammar.external_tokens[symbol.index]
|
|
|
|
|
.name
|
|
|
|
|
.clone(),
|
|
|
|
|
SymbolType::NonTerminal => self.syntax_grammar.variables[symbol.index].name.clone(),
|
|
|
|
|
SymbolType::Terminal => {
|
|
|
|
|
let variable = &self.lexical_grammar.variables[symbol.index];
|
|
|
|
|
if variable.kind == VariableType::Named {
|
|
|
|
|
variable.name.clone()
|
|
|
|
|
} else {
|
2019-01-11 13:30:45 -08:00
|
|
|
format!("'{}'", &variable.name)
|
2018-12-29 13:57:34 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2019-01-18 15:13:13 -08:00
|
|
|
fn populate_following_tokens(
|
|
|
|
|
result: &mut Vec<TokenSet>,
|
|
|
|
|
grammar: &SyntaxGrammar,
|
|
|
|
|
inlines: &InlinedProductionMap,
|
|
|
|
|
builder: &ParseItemSetBuilder,
|
|
|
|
|
) {
|
|
|
|
|
let productions = grammar
|
|
|
|
|
.variables
|
|
|
|
|
.iter()
|
|
|
|
|
.flat_map(|v| &v.productions)
|
|
|
|
|
.chain(&inlines.productions);
|
2019-06-21 13:12:09 -07:00
|
|
|
let all_tokens = (0..result.len())
|
|
|
|
|
.into_iter()
|
|
|
|
|
.map(Symbol::terminal)
|
|
|
|
|
.collect::<TokenSet>();
|
2019-01-18 15:13:13 -08:00
|
|
|
for production in productions {
|
|
|
|
|
for i in 1..production.steps.len() {
|
|
|
|
|
let left_tokens = builder.last_set(&production.steps[i - 1].symbol);
|
|
|
|
|
let right_tokens = builder.first_set(&production.steps[i].symbol);
|
|
|
|
|
for left_token in left_tokens.iter() {
|
|
|
|
|
if left_token.is_terminal() {
|
|
|
|
|
result[left_token.index].insert_all_terminals(right_tokens);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2019-06-20 14:05:15 -07:00
|
|
|
for extra in &grammar.extra_tokens {
|
|
|
|
|
if extra.is_terminal() {
|
|
|
|
|
for entry in result.iter_mut() {
|
|
|
|
|
entry.insert(*extra);
|
|
|
|
|
}
|
|
|
|
|
result[extra.index] = all_tokens.clone();
|
|
|
|
|
}
|
|
|
|
|
}
|
2019-01-18 15:13:13 -08:00
|
|
|
}
|
|
|
|
|
|
2018-12-29 13:57:34 -08:00
|
|
|
pub(crate) fn build_parse_table(
|
|
|
|
|
syntax_grammar: &SyntaxGrammar,
|
|
|
|
|
lexical_grammar: &LexicalGrammar,
|
|
|
|
|
inlines: &InlinedProductionMap,
|
2019-03-27 16:17:02 -07:00
|
|
|
variable_info: &Vec<VariableInfo>,
|
2019-01-07 08:39:47 -08:00
|
|
|
) -> Result<(ParseTable, Vec<TokenSet>)> {
|
2019-01-18 15:13:13 -08:00
|
|
|
let item_set_builder = ParseItemSetBuilder::new(syntax_grammar, lexical_grammar, inlines);
|
|
|
|
|
let mut following_tokens = vec![TokenSet::new(); lexical_grammar.variables.len()];
|
|
|
|
|
populate_following_tokens(
|
|
|
|
|
&mut following_tokens,
|
|
|
|
|
syntax_grammar,
|
|
|
|
|
inlines,
|
|
|
|
|
&item_set_builder,
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
let table = ParseTableBuilder {
|
2018-12-29 13:57:34 -08:00
|
|
|
syntax_grammar,
|
|
|
|
|
lexical_grammar,
|
2019-01-18 15:13:13 -08:00
|
|
|
item_set_builder,
|
2019-03-27 16:17:02 -07:00
|
|
|
variable_info,
|
2018-12-29 13:57:34 -08:00
|
|
|
state_ids_by_item_set: HashMap::new(),
|
2019-06-06 10:47:19 -07:00
|
|
|
core_ids_by_core: HashMap::new(),
|
2018-12-29 13:57:34 -08:00
|
|
|
item_sets_by_state_id: Vec::new(),
|
|
|
|
|
parse_state_queue: VecDeque::new(),
|
|
|
|
|
parse_table: ParseTable {
|
|
|
|
|
states: Vec::new(),
|
|
|
|
|
symbols: Vec::new(),
|
2019-02-12 11:06:18 -08:00
|
|
|
production_infos: Vec::new(),
|
2019-08-12 14:50:24 -07:00
|
|
|
max_aliased_production_length: 1,
|
2018-12-29 13:57:34 -08:00
|
|
|
},
|
|
|
|
|
}
|
2019-01-18 15:13:13 -08:00
|
|
|
.build()?;
|
|
|
|
|
|
|
|
|
|
Ok((table, following_tokens))
|
2018-12-29 13:57:34 -08:00
|
|
|
}
|