chore(cli): apply clippy fixes
This commit is contained in:
parent
1fb16a72ac
commit
04ff704bca
49 changed files with 1094 additions and 1277 deletions
|
|
@ -10,7 +10,7 @@ use std::collections::hash_map::Entry;
|
|||
use std::collections::{HashMap, VecDeque};
|
||||
use std::mem;
|
||||
|
||||
pub(crate) fn build_lex_table(
|
||||
pub fn build_lex_table(
|
||||
parse_table: &mut ParseTable,
|
||||
syntax_grammar: &SyntaxGrammar,
|
||||
lexical_grammar: &LexicalGrammar,
|
||||
|
|
@ -18,14 +18,13 @@ pub(crate) fn build_lex_table(
|
|||
coincident_token_index: &CoincidentTokenIndex,
|
||||
token_conflict_map: &TokenConflictMap,
|
||||
) -> (LexTable, LexTable) {
|
||||
let keyword_lex_table;
|
||||
if syntax_grammar.word_token.is_some() {
|
||||
let keyword_lex_table = if syntax_grammar.word_token.is_some() {
|
||||
let mut builder = LexTableBuilder::new(lexical_grammar);
|
||||
builder.add_state_for_tokens(keywords);
|
||||
keyword_lex_table = builder.table;
|
||||
builder.table
|
||||
} else {
|
||||
keyword_lex_table = LexTable::default();
|
||||
}
|
||||
LexTable::default()
|
||||
};
|
||||
|
||||
let mut parse_state_ids_by_token_set: Vec<(TokenSet, Vec<ParseStateId>)> = Vec::new();
|
||||
for (i, state) in parse_table.states.iter().enumerate() {
|
||||
|
|
@ -34,7 +33,7 @@ pub(crate) fn build_lex_table(
|
|||
.keys()
|
||||
.filter_map(|token| {
|
||||
if token.is_terminal() {
|
||||
if keywords.contains(&token) {
|
||||
if keywords.contains(token) {
|
||||
syntax_grammar.word_token
|
||||
} else {
|
||||
Some(*token)
|
||||
|
|
@ -48,7 +47,7 @@ pub(crate) fn build_lex_table(
|
|||
.collect();
|
||||
|
||||
let mut did_merge = false;
|
||||
for entry in parse_state_ids_by_token_set.iter_mut() {
|
||||
for entry in &mut parse_state_ids_by_token_set {
|
||||
if merge_token_set(
|
||||
&mut entry.0,
|
||||
&tokens,
|
||||
|
|
@ -198,7 +197,7 @@ impl<'a> LexTableBuilder<'a> {
|
|||
for transition in transitions {
|
||||
if let Some((completed_id, completed_precedence)) = completion {
|
||||
if !TokenConflictMap::prefer_transition(
|
||||
&self.lexical_grammar,
|
||||
self.lexical_grammar,
|
||||
&transition,
|
||||
completed_id,
|
||||
completed_precedence,
|
||||
|
|
@ -248,12 +247,11 @@ fn merge_token_set(
|
|||
{
|
||||
return false;
|
||||
}
|
||||
if !coincident_token_index.contains(symbol, existing_token) {
|
||||
if token_conflict_map.does_overlap(existing_token.index, i)
|
||||
|| token_conflict_map.does_overlap(i, existing_token.index)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
if !coincident_token_index.contains(symbol, existing_token)
|
||||
&& (token_conflict_map.does_overlap(existing_token.index, i)
|
||||
|| token_conflict_map.does_overlap(i, existing_token.index))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -315,7 +313,7 @@ fn minimize_lex_table(table: &mut LexTable, parse_table: &mut ParseTable) {
|
|||
let mut new_state = LexState::default();
|
||||
mem::swap(&mut new_state, &mut table.states[state_ids[0]]);
|
||||
|
||||
for (_, advance_action) in new_state.advance_actions.iter_mut() {
|
||||
for (_, advance_action) in &mut new_state.advance_actions {
|
||||
advance_action.state = group_ids_by_state_id[advance_action.state];
|
||||
}
|
||||
if let Some(eof_action) = &mut new_state.eof_action {
|
||||
|
|
@ -324,18 +322,14 @@ fn minimize_lex_table(table: &mut LexTable, parse_table: &mut ParseTable) {
|
|||
new_states.push(new_state);
|
||||
}
|
||||
|
||||
for state in parse_table.states.iter_mut() {
|
||||
for state in &mut parse_table.states {
|
||||
state.lex_state_id = group_ids_by_state_id[state.lex_state_id];
|
||||
}
|
||||
|
||||
table.states = new_states;
|
||||
}
|
||||
|
||||
fn lex_states_differ(
|
||||
left: &LexState,
|
||||
right: &LexState,
|
||||
group_ids_by_state_id: &Vec<usize>,
|
||||
) -> bool {
|
||||
fn lex_states_differ(left: &LexState, right: &LexState, group_ids_by_state_id: &[usize]) -> bool {
|
||||
left.advance_actions
|
||||
.iter()
|
||||
.zip(right.advance_actions.iter())
|
||||
|
|
@ -362,7 +356,7 @@ fn sort_states(table: &mut LexTable, parse_table: &mut ParseTable) {
|
|||
.map(|old_id| {
|
||||
let mut state = LexState::default();
|
||||
mem::swap(&mut state, &mut table.states[*old_id]);
|
||||
for (_, advance_action) in state.advance_actions.iter_mut() {
|
||||
for (_, advance_action) in &mut state.advance_actions {
|
||||
advance_action.state = new_ids_by_old_id[advance_action.state];
|
||||
}
|
||||
if let Some(eof_action) = &mut state.eof_action {
|
||||
|
|
@ -373,7 +367,7 @@ fn sort_states(table: &mut LexTable, parse_table: &mut ParseTable) {
|
|||
.collect();
|
||||
|
||||
// Update the parse table's lex state references
|
||||
for state in parse_table.states.iter_mut() {
|
||||
for state in &mut parse_table.states {
|
||||
state.lex_state_id = new_ids_by_old_id[state.lex_state_id];
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@ use rustc_hash::FxHasher;
|
|||
type SymbolSequence = Vec<Symbol>;
|
||||
|
||||
type AuxiliarySymbolSequence = Vec<AuxiliarySymbolInfo>;
|
||||
pub(crate) type ParseStateInfo<'a> = (SymbolSequence, ParseItemSet<'a>);
|
||||
pub type ParseStateInfo<'a> = (SymbolSequence, ParseItemSet<'a>);
|
||||
|
||||
#[derive(Clone)]
|
||||
struct AuxiliarySymbolInfo {
|
||||
|
|
@ -75,14 +75,10 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
self.add_parse_state(
|
||||
&Vec::new(),
|
||||
&Vec::new(),
|
||||
ParseItemSet::with(
|
||||
[(
|
||||
ParseItem::start(),
|
||||
[Symbol::end()].iter().cloned().collect(),
|
||||
)]
|
||||
.iter()
|
||||
.cloned(),
|
||||
),
|
||||
ParseItemSet::with(std::iter::once((
|
||||
ParseItem::start(),
|
||||
std::iter::once(&Symbol::end()).copied().collect(),
|
||||
))),
|
||||
);
|
||||
|
||||
// Compute the possible item sets for non-terminal extras.
|
||||
|
|
@ -97,7 +93,7 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
for production in &variable.productions {
|
||||
non_terminal_extra_item_sets_by_first_terminal
|
||||
.entry(production.first_symbol().unwrap())
|
||||
.or_insert(ParseItemSet::default())
|
||||
.or_insert_with(ParseItemSet::default)
|
||||
.insert(
|
||||
ParseItem {
|
||||
variable_index: extra_non_terminal.index as u32,
|
||||
|
|
@ -105,9 +101,8 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
step_index: 1,
|
||||
has_preceding_inherited_fields: false,
|
||||
},
|
||||
&[Symbol::end_of_nonterminal_extra()]
|
||||
.iter()
|
||||
.cloned()
|
||||
&std::iter::once(&Symbol::end_of_nonterminal_extra())
|
||||
.copied()
|
||||
.collect(),
|
||||
);
|
||||
}
|
||||
|
|
@ -129,7 +124,7 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
self.parse_state_info_by_id[entry.state_id].0.clone(),
|
||||
entry.preceding_auxiliary_symbols,
|
||||
entry.state_id,
|
||||
item_set,
|
||||
&item_set,
|
||||
)?;
|
||||
}
|
||||
|
||||
|
|
@ -195,7 +190,7 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
mut preceding_symbols: SymbolSequence,
|
||||
mut preceding_auxiliary_symbols: Vec<AuxiliarySymbolInfo>,
|
||||
state_id: ParseStateId,
|
||||
item_set: ParseItemSet<'a>,
|
||||
item_set: &ParseItemSet<'a>,
|
||||
) -> Result<()> {
|
||||
let mut terminal_successors = BTreeMap::new();
|
||||
let mut non_terminal_successors = BTreeMap::new();
|
||||
|
|
@ -218,7 +213,7 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
// for conflict resolution.
|
||||
if variable.is_auxiliary() {
|
||||
preceding_auxiliary_symbols
|
||||
.push(self.get_auxiliary_node_info(&item_set, next_symbol));
|
||||
.push(self.get_auxiliary_node_info(item_set, next_symbol));
|
||||
}
|
||||
|
||||
// For most parse items, the symbols associated with the preceding children
|
||||
|
|
@ -238,12 +233,12 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
|
||||
non_terminal_successors
|
||||
.entry(next_symbol)
|
||||
.or_insert_with(|| ParseItemSet::default())
|
||||
.or_insert_with(ParseItemSet::default)
|
||||
.insert(successor, lookaheads);
|
||||
} else {
|
||||
terminal_successors
|
||||
.entry(next_symbol)
|
||||
.or_insert_with(|| ParseItemSet::default())
|
||||
.or_insert_with(ParseItemSet::default)
|
||||
.insert(successor, lookaheads);
|
||||
}
|
||||
}
|
||||
|
|
@ -268,7 +263,7 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
let table_entry = self.parse_table.states[state_id]
|
||||
.terminal_entries
|
||||
.entry(lookahead)
|
||||
.or_insert_with(|| ParseTableEntry::new());
|
||||
.or_insert_with(ParseTableEntry::new);
|
||||
let reduction_info = reduction_infos.entry(lookahead).or_default();
|
||||
|
||||
// While inserting Reduce actions, eagerly resolve conflicts related
|
||||
|
|
@ -278,7 +273,7 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
table_entry.actions.push(action);
|
||||
} else {
|
||||
match Self::compare_precedence(
|
||||
&self.syntax_grammar,
|
||||
self.syntax_grammar,
|
||||
precedence,
|
||||
&[symbol],
|
||||
&reduction_info.precedence,
|
||||
|
|
@ -333,7 +328,7 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
}
|
||||
|
||||
entry
|
||||
.or_insert_with(|| ParseTableEntry::new())
|
||||
.or_insert_with(ParseTableEntry::new)
|
||||
.actions
|
||||
.push(ParseAction::Shift {
|
||||
state: next_state_id,
|
||||
|
|
@ -361,7 +356,7 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
// * fail, terminating the parser generation process
|
||||
for symbol in lookaheads_with_conflicts.iter() {
|
||||
self.handle_conflict(
|
||||
&item_set,
|
||||
item_set,
|
||||
state_id,
|
||||
&preceding_symbols,
|
||||
&preceding_auxiliary_symbols,
|
||||
|
|
@ -444,7 +439,7 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
item_set: &ParseItemSet,
|
||||
state_id: ParseStateId,
|
||||
preceding_symbols: &SymbolSequence,
|
||||
preceding_auxiliary_symbols: &Vec<AuxiliarySymbolInfo>,
|
||||
preceding_auxiliary_symbols: &[AuxiliarySymbolInfo],
|
||||
conflicting_lookahead: Symbol,
|
||||
reduction_info: &ReductionInfo,
|
||||
) -> Result<()> {
|
||||
|
|
@ -464,29 +459,27 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
let mut conflicting_items = HashSet::new();
|
||||
for (item, lookaheads) in &item_set.entries {
|
||||
if let Some(step) = item.step() {
|
||||
if item.step_index > 0 {
|
||||
if self
|
||||
if item.step_index > 0
|
||||
&& self
|
||||
.item_set_builder
|
||||
.first_set(&step.symbol)
|
||||
.contains(&conflicting_lookahead)
|
||||
{
|
||||
if item.variable_index != u32::MAX {
|
||||
conflicting_items.insert(item);
|
||||
}
|
||||
{
|
||||
if item.variable_index != u32::MAX {
|
||||
conflicting_items.insert(item);
|
||||
}
|
||||
|
||||
let p = (
|
||||
item.precedence(),
|
||||
Symbol::non_terminal(item.variable_index as usize),
|
||||
);
|
||||
if let Err(i) = shift_precedence.binary_search(&p) {
|
||||
shift_precedence.insert(i, p);
|
||||
}
|
||||
let p = (
|
||||
item.precedence(),
|
||||
Symbol::non_terminal(item.variable_index as usize),
|
||||
);
|
||||
if let Err(i) = shift_precedence.binary_search(&p) {
|
||||
shift_precedence.insert(i, p);
|
||||
}
|
||||
}
|
||||
} else if lookaheads.contains(&conflicting_lookahead) {
|
||||
if item.variable_index != u32::MAX {
|
||||
conflicting_items.insert(item);
|
||||
}
|
||||
} else if lookaheads.contains(&conflicting_lookahead) && item.variable_index != u32::MAX
|
||||
{
|
||||
conflicting_items.insert(item);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -512,7 +505,7 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
let mut shift_is_more = false;
|
||||
for p in shift_precedence {
|
||||
match Self::compare_precedence(
|
||||
&self.syntax_grammar,
|
||||
self.syntax_grammar,
|
||||
p.0,
|
||||
&[p.1],
|
||||
&reduction_info.precedence,
|
||||
|
|
@ -655,11 +648,10 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
|
||||
let prec_line = if let Some(associativity) = associativity {
|
||||
Some(format!(
|
||||
"(precedence: {}, associativity: {:?})",
|
||||
precedence, associativity
|
||||
"(precedence: {precedence}, associativity: {associativity:?})",
|
||||
))
|
||||
} else if !precedence.is_none() {
|
||||
Some(format!("(precedence: {})", precedence))
|
||||
Some(format!("(precedence: {precedence})"))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
|
@ -723,24 +715,22 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
};
|
||||
|
||||
if actual_conflict.len() > 1 {
|
||||
if shift_items.len() > 0 {
|
||||
if !shift_items.is_empty() {
|
||||
resolution_count += 1;
|
||||
write!(
|
||||
&mut msg,
|
||||
" {}: Specify a higher precedence in",
|
||||
resolution_count
|
||||
" {resolution_count}: Specify a higher precedence in",
|
||||
)
|
||||
.unwrap();
|
||||
list_rule_names(&mut msg, &shift_items);
|
||||
write!(&mut msg, " than in the other rules.\n").unwrap();
|
||||
writeln!(&mut msg, " than in the other rules.").unwrap();
|
||||
}
|
||||
|
||||
for item in &reduce_items {
|
||||
resolution_count += 1;
|
||||
write!(
|
||||
writeln!(
|
||||
&mut msg,
|
||||
" {}: Specify a higher precedence in `{}` than in the other rules.\n",
|
||||
resolution_count,
|
||||
" {resolution_count}: Specify a higher precedence in `{}` than in the other rules.",
|
||||
self.symbol_name(&Symbol::non_terminal(item.variable_index as usize))
|
||||
)
|
||||
.unwrap();
|
||||
|
|
@ -751,19 +741,17 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
resolution_count += 1;
|
||||
write!(
|
||||
&mut msg,
|
||||
" {}: Specify a left or right associativity in",
|
||||
resolution_count
|
||||
" {resolution_count}: Specify a left or right associativity in",
|
||||
)
|
||||
.unwrap();
|
||||
list_rule_names(&mut msg, &reduce_items);
|
||||
write!(&mut msg, "\n").unwrap();
|
||||
writeln!(&mut msg).unwrap();
|
||||
}
|
||||
|
||||
resolution_count += 1;
|
||||
write!(
|
||||
&mut msg,
|
||||
" {}: Add a conflict for these rules: ",
|
||||
resolution_count
|
||||
" {resolution_count}: Add a conflict for these rules: ",
|
||||
)
|
||||
.unwrap();
|
||||
for (i, symbol) in actual_conflict.iter().enumerate() {
|
||||
|
|
@ -772,7 +760,7 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
}
|
||||
write!(&mut msg, "`{}`", self.symbol_name(symbol)).unwrap();
|
||||
}
|
||||
write!(&mut msg, "\n").unwrap();
|
||||
writeln!(&mut msg).unwrap();
|
||||
|
||||
Err(anyhow!(msg))
|
||||
}
|
||||
|
|
@ -805,7 +793,7 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
// and to the default precedence, which is zero.
|
||||
(Precedence::Integer(l), Precedence::Integer(r)) if *l != 0 || *r != 0 => l.cmp(r),
|
||||
(Precedence::Integer(l), Precedence::None) if *l != 0 => l.cmp(&0),
|
||||
(Precedence::None, Precedence::Integer(r)) if *r != 0 => 0.cmp(&r),
|
||||
(Precedence::None, Precedence::Integer(r)) if *r != 0 => 0.cmp(r),
|
||||
|
||||
// Named precedences can be compared to other named precedences.
|
||||
_ => grammar
|
||||
|
|
@ -872,7 +860,7 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
production_info
|
||||
.field_map
|
||||
.entry(field_name.clone())
|
||||
.or_insert(Vec::new())
|
||||
.or_default()
|
||||
.push(FieldLocation {
|
||||
index: i,
|
||||
inherited: false,
|
||||
|
|
@ -885,11 +873,11 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
.is_visible()
|
||||
{
|
||||
let info = &self.variable_info[step.symbol.index];
|
||||
for (field_name, _) in &info.fields {
|
||||
for field_name in info.fields.keys() {
|
||||
production_info
|
||||
.field_map
|
||||
.entry(field_name.clone())
|
||||
.or_insert(Vec::new())
|
||||
.or_default()
|
||||
.push(FieldLocation {
|
||||
index: i,
|
||||
inherited: true,
|
||||
|
|
@ -903,7 +891,7 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
}
|
||||
|
||||
if item.production.steps.len() > self.parse_table.max_aliased_production_length {
|
||||
self.parse_table.max_aliased_production_length = item.production.steps.len()
|
||||
self.parse_table.max_aliased_production_length = item.production.steps.len();
|
||||
}
|
||||
|
||||
if let Some(index) = self
|
||||
|
|
@ -939,7 +927,7 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
}
|
||||
|
||||
fn populate_following_tokens(
|
||||
result: &mut Vec<TokenSet>,
|
||||
result: &mut [TokenSet],
|
||||
grammar: &SyntaxGrammar,
|
||||
inlines: &InlinedProductionMap,
|
||||
builder: &ParseItemSetBuilder,
|
||||
|
|
@ -950,7 +938,6 @@ fn populate_following_tokens(
|
|||
.flat_map(|v| &v.productions)
|
||||
.chain(&inlines.productions);
|
||||
let all_tokens = (0..result.len())
|
||||
.into_iter()
|
||||
.map(Symbol::terminal)
|
||||
.collect::<TokenSet>();
|
||||
for production in productions {
|
||||
|
|
@ -974,7 +961,7 @@ fn populate_following_tokens(
|
|||
}
|
||||
}
|
||||
|
||||
pub(crate) fn build_parse_table<'a>(
|
||||
pub fn build_parse_table<'a>(
|
||||
syntax_grammar: &'a SyntaxGrammar,
|
||||
lexical_grammar: &'a LexicalGrammar,
|
||||
inlines: &'a InlinedProductionMap,
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ use crate::generate::rules::Symbol;
|
|||
use crate::generate::tables::{ParseStateId, ParseTable};
|
||||
use std::fmt;
|
||||
|
||||
pub(crate) struct CoincidentTokenIndex<'a> {
|
||||
pub struct CoincidentTokenIndex<'a> {
|
||||
entries: Vec<Vec<ParseStateId>>,
|
||||
grammar: &'a LexicalGrammar,
|
||||
n: usize,
|
||||
|
|
@ -23,7 +23,7 @@ impl<'a> CoincidentTokenIndex<'a> {
|
|||
for other_symbol in state.terminal_entries.keys() {
|
||||
if other_symbol.is_terminal() {
|
||||
let index = result.index(symbol.index, other_symbol.index);
|
||||
if result.entries[index].last().cloned() != Some(i) {
|
||||
if result.entries[index].last().copied() != Some(i) {
|
||||
result.entries[index].push(i);
|
||||
}
|
||||
}
|
||||
|
|
@ -42,7 +42,8 @@ impl<'a> CoincidentTokenIndex<'a> {
|
|||
!self.entries[self.index(a.index, b.index)].is_empty()
|
||||
}
|
||||
|
||||
fn index(&self, a: usize, b: usize) -> usize {
|
||||
#[must_use]
|
||||
const fn index(&self, a: usize, b: usize) -> usize {
|
||||
if a < b {
|
||||
a * self.n + b
|
||||
} else {
|
||||
|
|
@ -53,20 +54,20 @@ impl<'a> CoincidentTokenIndex<'a> {
|
|||
|
||||
impl<'a> fmt::Debug for CoincidentTokenIndex<'a> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "CoincidentTokenIndex {{\n")?;
|
||||
writeln!(f, "CoincidentTokenIndex {{")?;
|
||||
|
||||
write!(f, " entries: {{\n")?;
|
||||
writeln!(f, " entries: {{")?;
|
||||
for i in 0..self.n {
|
||||
write!(f, " {}: {{\n", self.grammar.variables[i].name)?;
|
||||
writeln!(f, " {}: {{", self.grammar.variables[i].name)?;
|
||||
for j in 0..self.n {
|
||||
write!(
|
||||
writeln!(
|
||||
f,
|
||||
" {}: {:?},\n",
|
||||
" {}: {:?},",
|
||||
self.grammar.variables[j].name,
|
||||
self.entries[self.index(i, j)].len()
|
||||
)?;
|
||||
}
|
||||
write!(f, " }},\n")?;
|
||||
writeln!(f, " }},")?;
|
||||
}
|
||||
write!(f, " }},")?;
|
||||
write!(f, "}}")?;
|
||||
|
|
|
|||
|
|
@ -22,9 +22,9 @@ lazy_static! {
|
|||
};
|
||||
}
|
||||
|
||||
/// A ParseItem represents an in-progress match of a single production in a grammar.
|
||||
/// A [`ParseItem`] represents an in-progress match of a single production in a grammar.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub(crate) struct ParseItem<'a> {
|
||||
pub struct ParseItem<'a> {
|
||||
/// The index of the parent rule within the grammar.
|
||||
pub variable_index: u32,
|
||||
/// The number of symbols that have already been matched.
|
||||
|
|
@ -47,35 +47,35 @@ pub(crate) struct ParseItem<'a> {
|
|||
pub has_preceding_inherited_fields: bool,
|
||||
}
|
||||
|
||||
/// A ParseItemSet represents a set of in-progress matches of productions in a
|
||||
/// A [`ParseItemSet`] represents a set of in-progress matches of productions in a
|
||||
/// grammar, and for each in-progress match, a set of "lookaheads" - tokens that
|
||||
/// are allowed to *follow* the in-progress rule. This object corresponds directly
|
||||
/// to a state in the final parse table.
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub(crate) struct ParseItemSet<'a> {
|
||||
#[derive(Clone, Debug, PartialEq, Eq, Default)]
|
||||
pub struct ParseItemSet<'a> {
|
||||
pub entries: Vec<(ParseItem<'a>, TokenSet)>,
|
||||
}
|
||||
|
||||
/// A ParseItemSetCore is like a ParseItemSet, but without the lookahead
|
||||
/// A [`ParseItemSetCore`] is like a [`ParseItemSet`], but without the lookahead
|
||||
/// information. Parse states with the same core are candidates for merging.
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub(crate) struct ParseItemSetCore<'a> {
|
||||
pub struct ParseItemSetCore<'a> {
|
||||
pub entries: Vec<ParseItem<'a>>,
|
||||
}
|
||||
|
||||
pub(crate) struct ParseItemDisplay<'a>(
|
||||
pub struct ParseItemDisplay<'a>(
|
||||
pub &'a ParseItem<'a>,
|
||||
pub &'a SyntaxGrammar,
|
||||
pub &'a LexicalGrammar,
|
||||
);
|
||||
|
||||
pub(crate) struct TokenSetDisplay<'a>(
|
||||
pub struct TokenSetDisplay<'a>(
|
||||
pub &'a TokenSet,
|
||||
pub &'a SyntaxGrammar,
|
||||
pub &'a LexicalGrammar,
|
||||
);
|
||||
|
||||
pub(crate) struct ParseItemSetDisplay<'a>(
|
||||
pub struct ParseItemSetDisplay<'a>(
|
||||
pub &'a ParseItemSet<'a>,
|
||||
pub &'a SyntaxGrammar,
|
||||
pub &'a LexicalGrammar,
|
||||
|
|
@ -116,16 +116,19 @@ impl<'a> ParseItem<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn is_done(&self) -> bool {
|
||||
self.step_index as usize == self.production.steps.len()
|
||||
}
|
||||
|
||||
pub fn is_augmented(&self) -> bool {
|
||||
#[must_use]
|
||||
pub const fn is_augmented(&self) -> bool {
|
||||
self.variable_index == u32::MAX
|
||||
}
|
||||
|
||||
/// Create an item like this one, but advanced by one step.
|
||||
pub fn successor(&self) -> ParseItem<'a> {
|
||||
#[must_use]
|
||||
pub const fn successor(&self) -> ParseItem<'a> {
|
||||
ParseItem {
|
||||
variable_index: self.variable_index,
|
||||
production: self.production,
|
||||
|
|
@ -136,8 +139,8 @@ impl<'a> ParseItem<'a> {
|
|||
|
||||
/// Create an item identical to this one, but with a different production.
|
||||
/// This is used when dynamically "inlining" certain symbols in a production.
|
||||
pub fn substitute_production(&self, production: &'a Production) -> ParseItem<'a> {
|
||||
let mut result = self.clone();
|
||||
pub const fn substitute_production(&self, production: &'a Production) -> ParseItem<'a> {
|
||||
let mut result = *self;
|
||||
result.production = production;
|
||||
result
|
||||
}
|
||||
|
|
@ -172,14 +175,6 @@ impl<'a> ParseItemSet<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
impl<'a> Default for ParseItemSet<'a> {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
entries: Vec::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> fmt::Display for ParseItemDisplay<'a> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
|
||||
if self.0.is_augmented() {
|
||||
|
|
@ -196,10 +191,10 @@ impl<'a> fmt::Display for ParseItemDisplay<'a> {
|
|||
if i == self.0.step_index as usize {
|
||||
write!(f, " •")?;
|
||||
if let Some(associativity) = step.associativity {
|
||||
if !step.precedence.is_none() {
|
||||
write!(f, " ({} {:?})", step.precedence, associativity)?;
|
||||
if step.precedence.is_none() {
|
||||
write!(f, " ({associativity:?})")?;
|
||||
} else {
|
||||
write!(f, " ({:?})", associativity)?;
|
||||
write!(f, " ({} {associativity:?})", step.precedence)?;
|
||||
}
|
||||
} else if !step.precedence.is_none() {
|
||||
write!(f, " ({})", step.precedence)?;
|
||||
|
|
@ -211,7 +206,7 @@ impl<'a> fmt::Display for ParseItemDisplay<'a> {
|
|||
if let Some(variable) = self.2.variables.get(step.symbol.index) {
|
||||
write!(f, "{}", &variable.name)?;
|
||||
} else {
|
||||
write!(f, "{}-{}", "terminal", step.symbol.index)?;
|
||||
write!(f, "terminal-{}", step.symbol.index)?;
|
||||
}
|
||||
} else if step.symbol.is_external() {
|
||||
write!(f, "{}", &self.1.external_tokens[step.symbol.index].name)?;
|
||||
|
|
@ -228,10 +223,10 @@ impl<'a> fmt::Display for ParseItemDisplay<'a> {
|
|||
write!(f, " •")?;
|
||||
if let Some(step) = self.0.production.steps.last() {
|
||||
if let Some(associativity) = step.associativity {
|
||||
if !step.precedence.is_none() {
|
||||
write!(f, " ({} {:?})", step.precedence, associativity)?;
|
||||
if step.precedence.is_none() {
|
||||
write!(f, " ({associativity:?})")?;
|
||||
} else {
|
||||
write!(f, " ({:?})", associativity)?;
|
||||
write!(f, " ({} {associativity:?})", step.precedence)?;
|
||||
}
|
||||
} else if !step.precedence.is_none() {
|
||||
write!(f, " ({})", step.precedence)?;
|
||||
|
|
@ -255,7 +250,7 @@ impl<'a> fmt::Display for TokenSetDisplay<'a> {
|
|||
if let Some(variable) = self.2.variables.get(symbol.index) {
|
||||
write!(f, "{}", &variable.name)?;
|
||||
} else {
|
||||
write!(f, "{}-{}", "terminal", symbol.index)?;
|
||||
write!(f, "terminal-{}", symbol.index)?;
|
||||
}
|
||||
} else if symbol.is_external() {
|
||||
write!(f, "{}", &self.1.external_tokens[symbol.index].name)?;
|
||||
|
|
@ -270,7 +265,7 @@ impl<'a> fmt::Display for TokenSetDisplay<'a> {
|
|||
|
||||
impl<'a> fmt::Display for ParseItemSetDisplay<'a> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
|
||||
for (item, lookaheads) in self.0.entries.iter() {
|
||||
for (item, lookaheads) in &self.0.entries {
|
||||
writeln!(
|
||||
f,
|
||||
"{}\t{}",
|
||||
|
|
@ -288,7 +283,7 @@ impl<'a> Hash for ParseItem<'a> {
|
|||
hasher.write_u32(self.step_index);
|
||||
hasher.write_i32(self.production.dynamic_precedence);
|
||||
hasher.write_usize(self.production.steps.len());
|
||||
hasher.write_i32(self.has_preceding_inherited_fields as i32);
|
||||
hasher.write_i32(i32::from(self.has_preceding_inherited_fields));
|
||||
self.precedence().hash(hasher);
|
||||
self.associativity().hash(hasher);
|
||||
|
||||
|
|
@ -344,7 +339,7 @@ impl<'a> PartialEq for ParseItem<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -364,7 +359,7 @@ impl<'a> Ord for ParseItem<'a> {
|
|||
.len()
|
||||
.cmp(&other.production.steps.len())
|
||||
})
|
||||
.then_with(|| self.precedence().cmp(&other.precedence()))
|
||||
.then_with(|| self.precedence().cmp(other.precedence()))
|
||||
.then_with(|| self.associativity().cmp(&other.associativity()))
|
||||
.then_with(|| {
|
||||
for (i, step) in self.production.steps.iter().enumerate() {
|
||||
|
|
@ -383,7 +378,7 @@ impl<'a> Ord for ParseItem<'a> {
|
|||
return o;
|
||||
}
|
||||
}
|
||||
return Ordering::Equal;
|
||||
Ordering::Equal
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
@ -399,7 +394,7 @@ impl<'a> Eq for ParseItem<'a> {}
|
|||
impl<'a> Hash for ParseItemSet<'a> {
|
||||
fn hash<H: Hasher>(&self, hasher: &mut H) {
|
||||
hasher.write_usize(self.entries.len());
|
||||
for (item, lookaheads) in self.entries.iter() {
|
||||
for (item, lookaheads) in &self.entries {
|
||||
item.hash(hasher);
|
||||
lookaheads.hash(hasher);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ struct FollowSetInfo {
|
|||
propagates_lookaheads: bool,
|
||||
}
|
||||
|
||||
pub(crate) struct ParseItemSetBuilder<'a> {
|
||||
pub struct ParseItemSetBuilder<'a> {
|
||||
syntax_grammar: &'a SyntaxGrammar,
|
||||
lexical_grammar: &'a LexicalGrammar,
|
||||
first_sets: HashMap<Symbol, TokenSet>,
|
||||
|
|
@ -80,7 +80,10 @@ impl<'a> ParseItemSetBuilder<'a> {
|
|||
for i in 0..syntax_grammar.variables.len() {
|
||||
let symbol = Symbol::non_terminal(i);
|
||||
|
||||
let first_set = &mut result.first_sets.entry(symbol).or_insert(TokenSet::new());
|
||||
let first_set = result
|
||||
.first_sets
|
||||
.entry(symbol)
|
||||
.or_insert_with(TokenSet::new);
|
||||
processed_non_terminals.clear();
|
||||
symbols_to_process.clear();
|
||||
symbols_to_process.push(symbol);
|
||||
|
|
@ -88,10 +91,7 @@ impl<'a> ParseItemSetBuilder<'a> {
|
|||
if current_symbol.is_terminal() || current_symbol.is_external() {
|
||||
first_set.insert(current_symbol);
|
||||
} else if processed_non_terminals.insert(current_symbol) {
|
||||
for production in syntax_grammar.variables[current_symbol.index]
|
||||
.productions
|
||||
.iter()
|
||||
{
|
||||
for production in &syntax_grammar.variables[current_symbol.index].productions {
|
||||
if let Some(step) = production.steps.first() {
|
||||
symbols_to_process.push(step.symbol);
|
||||
}
|
||||
|
|
@ -100,7 +100,7 @@ impl<'a> ParseItemSetBuilder<'a> {
|
|||
}
|
||||
|
||||
// The LAST set is defined in a similar way to the FIRST set.
|
||||
let last_set = &mut result.last_sets.entry(symbol).or_insert(TokenSet::new());
|
||||
let last_set = result.last_sets.entry(symbol).or_insert_with(TokenSet::new);
|
||||
processed_non_terminals.clear();
|
||||
symbols_to_process.clear();
|
||||
symbols_to_process.push(symbol);
|
||||
|
|
@ -108,10 +108,7 @@ impl<'a> ParseItemSetBuilder<'a> {
|
|||
if current_symbol.is_terminal() || current_symbol.is_external() {
|
||||
last_set.insert(current_symbol);
|
||||
} else if processed_non_terminals.insert(current_symbol) {
|
||||
for production in syntax_grammar.variables[current_symbol.index]
|
||||
.productions
|
||||
.iter()
|
||||
{
|
||||
for production in &syntax_grammar.variables[current_symbol.index].productions {
|
||||
if let Some(step) = production.steps.last() {
|
||||
symbols_to_process.push(step.symbol);
|
||||
}
|
||||
|
|
@ -235,7 +232,7 @@ impl<'a> ParseItemSetBuilder<'a> {
|
|||
result
|
||||
}
|
||||
|
||||
pub(crate) fn transitive_closure(&mut self, item_set: &ParseItemSet<'a>) -> ParseItemSet<'a> {
|
||||
pub fn transitive_closure(&mut self, item_set: &ParseItemSet<'a>) -> ParseItemSet<'a> {
|
||||
let mut result = ParseItemSet::default();
|
||||
for (item, lookaheads) in &item_set.entries {
|
||||
if let Some(productions) = self
|
||||
|
|
@ -270,11 +267,9 @@ impl<'a> ParseItemSetBuilder<'a> {
|
|||
let next_step = item.successor().step();
|
||||
|
||||
// Determine which tokens can follow this non-terminal.
|
||||
let following_tokens = if let Some(next_step) = next_step {
|
||||
let following_tokens = next_step.map_or(lookaheads, |next_step| {
|
||||
self.first_sets.get(&next_step.symbol).unwrap()
|
||||
} else {
|
||||
&lookaheads
|
||||
};
|
||||
});
|
||||
|
||||
// Use the pre-computed *additions* to expand the non-terminal.
|
||||
for addition in &self.transitive_closure_additions[step.symbol.index] {
|
||||
|
|
@ -291,9 +286,9 @@ impl<'a> ParseItemSetBuilder<'a> {
|
|||
|
||||
impl<'a> fmt::Debug for ParseItemSetBuilder<'a> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "ParseItemSetBuilder {{\n")?;
|
||||
writeln!(f, "ParseItemSetBuilder {{")?;
|
||||
|
||||
write!(f, " first_sets: {{\n")?;
|
||||
writeln!(f, " first_sets: {{")?;
|
||||
for (symbol, first_set) in &self.first_sets {
|
||||
let name = match symbol.kind {
|
||||
SymbolType::NonTerminal => &self.syntax_grammar.variables[symbol.index].name,
|
||||
|
|
@ -301,16 +296,15 @@ impl<'a> fmt::Debug for ParseItemSetBuilder<'a> {
|
|||
SymbolType::Terminal => &self.lexical_grammar.variables[symbol.index].name,
|
||||
SymbolType::End | SymbolType::EndOfNonTerminalExtra => "END",
|
||||
};
|
||||
write!(
|
||||
writeln!(
|
||||
f,
|
||||
" first({:?}): {}\n",
|
||||
name,
|
||||
TokenSetDisplay(first_set, &self.syntax_grammar, &self.lexical_grammar)
|
||||
" first({name:?}): {}",
|
||||
TokenSetDisplay(first_set, self.syntax_grammar, self.lexical_grammar)
|
||||
)?;
|
||||
}
|
||||
write!(f, " }}\n")?;
|
||||
writeln!(f, " }}")?;
|
||||
|
||||
write!(f, " last_sets: {{\n")?;
|
||||
writeln!(f, " last_sets: {{")?;
|
||||
for (symbol, last_set) in &self.last_sets {
|
||||
let name = match symbol.kind {
|
||||
SymbolType::NonTerminal => &self.syntax_grammar.variables[symbol.index].name,
|
||||
|
|
@ -318,26 +312,25 @@ impl<'a> fmt::Debug for ParseItemSetBuilder<'a> {
|
|||
SymbolType::Terminal => &self.lexical_grammar.variables[symbol.index].name,
|
||||
SymbolType::End | SymbolType::EndOfNonTerminalExtra => "END",
|
||||
};
|
||||
write!(
|
||||
writeln!(
|
||||
f,
|
||||
" last({:?}): {}\n",
|
||||
name,
|
||||
TokenSetDisplay(last_set, &self.syntax_grammar, &self.lexical_grammar)
|
||||
" last({name:?}): {}",
|
||||
TokenSetDisplay(last_set, self.syntax_grammar, self.lexical_grammar)
|
||||
)?;
|
||||
}
|
||||
write!(f, " }}\n")?;
|
||||
writeln!(f, " }}")?;
|
||||
|
||||
write!(f, " additions: {{\n")?;
|
||||
writeln!(f, " additions: {{")?;
|
||||
for (i, variable) in self.syntax_grammar.variables.iter().enumerate() {
|
||||
write!(f, " {}: {{\n", variable.name)?;
|
||||
writeln!(f, " {}: {{", variable.name)?;
|
||||
for addition in &self.transitive_closure_additions[i] {
|
||||
write!(
|
||||
writeln!(
|
||||
f,
|
||||
" {}\n",
|
||||
" {}",
|
||||
ParseItemDisplay(&addition.item, self.syntax_grammar, self.lexical_grammar)
|
||||
)?;
|
||||
}
|
||||
write!(f, " }},\n")?;
|
||||
writeln!(f, " }},")?;
|
||||
}
|
||||
write!(f, " }},")?;
|
||||
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ use log::info;
|
|||
use std::collections::{HashMap, HashSet};
|
||||
use std::mem;
|
||||
|
||||
pub(crate) fn minimize_parse_table(
|
||||
pub fn minimize_parse_table(
|
||||
parse_table: &mut ParseTable,
|
||||
syntax_grammar: &SyntaxGrammar,
|
||||
lexical_grammar: &LexicalGrammar,
|
||||
|
|
@ -67,9 +67,9 @@ impl<'a> Minimizer<'a> {
|
|||
symbol,
|
||||
..
|
||||
} => {
|
||||
if !self.simple_aliases.contains_key(&symbol)
|
||||
&& !self.syntax_grammar.supertype_symbols.contains(&symbol)
|
||||
&& !aliased_symbols.contains(&symbol)
|
||||
if !self.simple_aliases.contains_key(symbol)
|
||||
&& !self.syntax_grammar.supertype_symbols.contains(symbol)
|
||||
&& !aliased_symbols.contains(symbol)
|
||||
&& self.syntax_grammar.variables[symbol.index].kind
|
||||
!= VariableType::Named
|
||||
&& (unit_reduction_symbol.is_none()
|
||||
|
|
@ -97,21 +97,22 @@ impl<'a> Minimizer<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
for state in self.parse_table.states.iter_mut() {
|
||||
for state in &mut self.parse_table.states {
|
||||
let mut done = false;
|
||||
while !done {
|
||||
done = true;
|
||||
state.update_referenced_states(|other_state_id, state| {
|
||||
if let Some(symbol) = unit_reduction_symbols_by_state.get(&other_state_id) {
|
||||
done = false;
|
||||
match state.nonterminal_entries.get(symbol) {
|
||||
Some(GotoAction::Goto(state_id)) => *state_id,
|
||||
_ => other_state_id,
|
||||
}
|
||||
} else {
|
||||
other_state_id
|
||||
}
|
||||
})
|
||||
unit_reduction_symbols_by_state.get(&other_state_id).map_or(
|
||||
other_state_id,
|
||||
|symbol| {
|
||||
done = false;
|
||||
match state.nonterminal_entries.get(symbol) {
|
||||
Some(GotoAction::Goto(state_id)) => *state_id,
|
||||
_ => other_state_id,
|
||||
}
|
||||
},
|
||||
)
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -198,7 +199,7 @@ impl<'a> Minimizer<'a> {
|
|||
&self,
|
||||
left_state: &ParseState,
|
||||
right_state: &ParseState,
|
||||
group_ids_by_state_id: &Vec<ParseStateId>,
|
||||
group_ids_by_state_id: &[ParseStateId],
|
||||
) -> bool {
|
||||
for (token, left_entry) in &left_state.terminal_entries {
|
||||
if let Some(right_entry) = right_state.terminal_entries.get(token) {
|
||||
|
|
@ -223,15 +224,15 @@ impl<'a> Minimizer<'a> {
|
|||
}
|
||||
|
||||
for token in right_state.terminal_entries.keys() {
|
||||
if !left_state.terminal_entries.contains_key(token) {
|
||||
if self.token_conflicts(
|
||||
if !left_state.terminal_entries.contains_key(token)
|
||||
&& self.token_conflicts(
|
||||
left_state.id,
|
||||
right_state.id,
|
||||
left_state.terminal_entries.keys(),
|
||||
*token,
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -242,7 +243,7 @@ impl<'a> Minimizer<'a> {
|
|||
&self,
|
||||
state1: &ParseState,
|
||||
state2: &ParseState,
|
||||
group_ids_by_state_id: &Vec<ParseStateId>,
|
||||
group_ids_by_state_id: &[ParseStateId],
|
||||
) -> bool {
|
||||
for (token, entry1) in &state1.terminal_entries {
|
||||
if let ParseAction::Shift { state: s1, .. } = entry1.actions.last().unwrap() {
|
||||
|
|
@ -252,12 +253,10 @@ impl<'a> Minimizer<'a> {
|
|||
let group2 = group_ids_by_state_id[*s2];
|
||||
if group1 != group2 {
|
||||
info!(
|
||||
"split states {} {} - successors for {} are split: {} {}",
|
||||
"split states {} {} - successors for {} are split: {s1} {s2}",
|
||||
state1.id,
|
||||
state2.id,
|
||||
self.symbol_name(token),
|
||||
s1,
|
||||
s2,
|
||||
);
|
||||
return true;
|
||||
}
|
||||
|
|
@ -275,12 +274,10 @@ impl<'a> Minimizer<'a> {
|
|||
let group2 = group_ids_by_state_id[*s2];
|
||||
if group1 != group2 {
|
||||
info!(
|
||||
"split states {} {} - successors for {} are split: {} {}",
|
||||
"split states {} {} - successors for {} are split: {s1} {s2}",
|
||||
state1.id,
|
||||
state2.id,
|
||||
self.symbol_name(symbol),
|
||||
s1,
|
||||
s2,
|
||||
);
|
||||
return true;
|
||||
}
|
||||
|
|
@ -300,16 +297,14 @@ impl<'a> Minimizer<'a> {
|
|||
token: &Symbol,
|
||||
entry1: &ParseTableEntry,
|
||||
entry2: &ParseTableEntry,
|
||||
group_ids_by_state_id: &Vec<ParseStateId>,
|
||||
group_ids_by_state_id: &[ParseStateId],
|
||||
) -> bool {
|
||||
// To be compatible, entries need to have the same actions.
|
||||
let actions1 = &entry1.actions;
|
||||
let actions2 = &entry2.actions;
|
||||
if actions1.len() != actions2.len() {
|
||||
info!(
|
||||
"split states {} {} - differing action counts for token {}",
|
||||
state_id1,
|
||||
state_id2,
|
||||
"split states {state_id1} {state_id2} - differing action counts for token {}",
|
||||
self.symbol_name(token)
|
||||
);
|
||||
return true;
|
||||
|
|
@ -334,22 +329,15 @@ impl<'a> Minimizer<'a> {
|
|||
let group2 = group_ids_by_state_id[*s2];
|
||||
if group1 == group2 && is_repetition1 == is_repetition2 {
|
||||
continue;
|
||||
} else {
|
||||
info!(
|
||||
"split states {} {} - successors for {} are split: {} {}",
|
||||
state_id1,
|
||||
state_id2,
|
||||
self.symbol_name(token),
|
||||
s1,
|
||||
s2,
|
||||
);
|
||||
return true;
|
||||
}
|
||||
info!(
|
||||
"split states {state_id1} {state_id2} - successors for {} are split: {s1} {s2}",
|
||||
self.symbol_name(token),
|
||||
);
|
||||
return true;
|
||||
} else if action1 != action2 {
|
||||
info!(
|
||||
"split states {} {} - unequal actions for {}",
|
||||
state_id1,
|
||||
state_id2,
|
||||
"split states {state_id1} {state_id2} - unequal actions for {}",
|
||||
self.symbol_name(token),
|
||||
);
|
||||
return true;
|
||||
|
|
@ -367,10 +355,7 @@ impl<'a> Minimizer<'a> {
|
|||
new_token: Symbol,
|
||||
) -> bool {
|
||||
if new_token == Symbol::end_of_nonterminal_extra() {
|
||||
info!(
|
||||
"split states {} {} - end of non-terminal extra",
|
||||
left_id, right_id,
|
||||
);
|
||||
info!("split states {left_id} {right_id} - end of non-terminal extra",);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -378,9 +363,7 @@ impl<'a> Minimizer<'a> {
|
|||
// existing lookahead tokens.
|
||||
if new_token.is_external() {
|
||||
info!(
|
||||
"split states {} {} - external token {}",
|
||||
left_id,
|
||||
right_id,
|
||||
"split states {left_id} {right_id} - external token {}",
|
||||
self.symbol_name(&new_token),
|
||||
);
|
||||
return true;
|
||||
|
|
@ -395,9 +378,7 @@ impl<'a> Minimizer<'a> {
|
|||
.any(|external| external.corresponding_internal_token == Some(new_token))
|
||||
{
|
||||
info!(
|
||||
"split states {} {} - internal/external token {}",
|
||||
left_id,
|
||||
right_id,
|
||||
"split states {left_id} {right_id} - internal/external token {}",
|
||||
self.symbol_name(&new_token),
|
||||
);
|
||||
return true;
|
||||
|
|
@ -405,27 +386,24 @@ impl<'a> Minimizer<'a> {
|
|||
|
||||
// Do not add a token if it conflicts with an existing token.
|
||||
for token in existing_tokens {
|
||||
if token.is_terminal() {
|
||||
if !(self.syntax_grammar.word_token == Some(*token)
|
||||
if token.is_terminal()
|
||||
&& !(self.syntax_grammar.word_token == Some(*token)
|
||||
&& self.keywords.contains(&new_token))
|
||||
&& !(self.syntax_grammar.word_token == Some(new_token)
|
||||
&& self.keywords.contains(token))
|
||||
&& (self
|
||||
&& !(self.syntax_grammar.word_token == Some(new_token)
|
||||
&& self.keywords.contains(token))
|
||||
&& (self
|
||||
.token_conflict_map
|
||||
.does_conflict(new_token.index, token.index)
|
||||
|| self
|
||||
.token_conflict_map
|
||||
.does_conflict(new_token.index, token.index)
|
||||
|| self
|
||||
.token_conflict_map
|
||||
.does_match_same_string(new_token.index, token.index))
|
||||
{
|
||||
info!(
|
||||
"split states {} {} - token {} conflicts with {}",
|
||||
left_id,
|
||||
right_id,
|
||||
self.symbol_name(&new_token),
|
||||
self.symbol_name(token),
|
||||
);
|
||||
return true;
|
||||
}
|
||||
.does_match_same_string(new_token.index, token.index))
|
||||
{
|
||||
info!(
|
||||
"split states {left_id} {right_id} - token {} conflicts with {}",
|
||||
self.symbol_name(&new_token),
|
||||
self.symbol_name(token),
|
||||
);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
pub(crate) mod build_lex_table;
|
||||
pub(crate) mod build_parse_table;
|
||||
pub mod build_lex_table;
|
||||
pub mod build_parse_table;
|
||||
mod coincident_tokens;
|
||||
mod item;
|
||||
mod item_set_builder;
|
||||
|
|
@ -20,7 +20,7 @@ use anyhow::Result;
|
|||
use log::info;
|
||||
use std::collections::{BTreeSet, HashMap};
|
||||
|
||||
pub(crate) fn build_tables(
|
||||
pub fn build_tables(
|
||||
syntax_grammar: &SyntaxGrammar,
|
||||
lexical_grammar: &LexicalGrammar,
|
||||
simple_aliases: &AliasMap,
|
||||
|
|
@ -69,8 +69,8 @@ pub(crate) fn build_tables(
|
|||
|
||||
if let Some(report_symbol_name) = report_symbol_name {
|
||||
report_state_info(
|
||||
&syntax_grammar,
|
||||
&lexical_grammar,
|
||||
syntax_grammar,
|
||||
lexical_grammar,
|
||||
&parse_table,
|
||||
&parse_state_info,
|
||||
report_symbol_name,
|
||||
|
|
@ -98,9 +98,8 @@ fn populate_error_state(
|
|||
// First identify the *conflict-free tokens*: tokens that do not overlap with
|
||||
// any other token in any way, besides matching exactly the same string.
|
||||
let conflict_free_tokens: TokenSet = (0..n)
|
||||
.into_iter()
|
||||
.filter_map(|i| {
|
||||
let conflicts_with_other_tokens = (0..n).into_iter().any(|j| {
|
||||
let conflicts_with_other_tokens = (0..n).any(|j| {
|
||||
j != i
|
||||
&& !coincident_token_index.contains(Symbol::terminal(i), Symbol::terminal(j))
|
||||
&& token_conflict_map.does_match_shorter_or_longer(i, j)
|
||||
|
|
@ -126,18 +125,19 @@ fn populate_error_state(
|
|||
// the *conflict-free tokens* identified above.
|
||||
for i in 0..n {
|
||||
let symbol = Symbol::terminal(i);
|
||||
if !conflict_free_tokens.contains(&symbol) && !keywords.contains(&symbol) {
|
||||
if syntax_grammar.word_token != Some(symbol) {
|
||||
if let Some(t) = conflict_free_tokens.iter().find(|t| {
|
||||
!coincident_token_index.contains(symbol, *t)
|
||||
&& token_conflict_map.does_conflict(symbol.index, t.index)
|
||||
}) {
|
||||
info!(
|
||||
"error recovery - exclude token {} because of conflict with {}",
|
||||
lexical_grammar.variables[i].name, lexical_grammar.variables[t.index].name
|
||||
);
|
||||
continue;
|
||||
}
|
||||
if !conflict_free_tokens.contains(&symbol)
|
||||
&& !keywords.contains(&symbol)
|
||||
&& syntax_grammar.word_token != Some(symbol)
|
||||
{
|
||||
if let Some(t) = conflict_free_tokens.iter().find(|t| {
|
||||
!coincident_token_index.contains(symbol, *t)
|
||||
&& token_conflict_map.does_conflict(symbol.index, t.index)
|
||||
}) {
|
||||
info!(
|
||||
"error recovery - exclude token {} because of conflict with {}",
|
||||
lexical_grammar.variables[i].name, lexical_grammar.variables[t.index].name
|
||||
);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
info!(
|
||||
|
|
@ -361,7 +361,7 @@ fn mark_fragile_tokens(
|
|||
) {
|
||||
let n = lexical_grammar.variables.len();
|
||||
let mut valid_tokens_mask = Vec::with_capacity(n);
|
||||
for state in parse_table.states.iter_mut() {
|
||||
for state in &mut parse_table.states {
|
||||
valid_tokens_mask.clear();
|
||||
valid_tokens_mask.resize(n, false);
|
||||
for token in state.terminal_entries.keys() {
|
||||
|
|
@ -369,14 +369,12 @@ fn mark_fragile_tokens(
|
|||
valid_tokens_mask[token.index] = true;
|
||||
}
|
||||
}
|
||||
for (token, entry) in state.terminal_entries.iter_mut() {
|
||||
for (token, entry) in &mut state.terminal_entries {
|
||||
if token.is_terminal() {
|
||||
for (i, is_valid) in valid_tokens_mask.iter().enumerate() {
|
||||
if *is_valid {
|
||||
if token_conflict_map.does_overlap(i, token.index) {
|
||||
entry.reusable = false;
|
||||
break;
|
||||
}
|
||||
if *is_valid && token_conflict_map.does_overlap(i, token.index) {
|
||||
entry.reusable = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -388,7 +386,7 @@ fn report_state_info<'a>(
|
|||
syntax_grammar: &SyntaxGrammar,
|
||||
lexical_grammar: &LexicalGrammar,
|
||||
parse_table: &ParseTable,
|
||||
parse_state_info: &Vec<ParseStateInfo<'a>>,
|
||||
parse_state_info: &[ParseStateInfo<'a>],
|
||||
report_symbol_name: &'a str,
|
||||
) {
|
||||
let mut all_state_indices = BTreeSet::new();
|
||||
|
|
@ -399,7 +397,7 @@ fn report_state_info<'a>(
|
|||
for (i, state) in parse_table.states.iter().enumerate() {
|
||||
all_state_indices.insert(i);
|
||||
let item_set = &parse_state_info[state.id];
|
||||
for (item, _) in item_set.1.entries.iter() {
|
||||
for (item, _) in &item_set.1.entries {
|
||||
if !item.is_augmented() {
|
||||
symbols_with_state_indices[item.variable_index as usize]
|
||||
.1
|
||||
|
|
@ -424,7 +422,7 @@ fn report_state_info<'a>(
|
|||
width = max_symbol_name_length
|
||||
);
|
||||
}
|
||||
eprintln!("");
|
||||
eprintln!();
|
||||
|
||||
let state_indices = if report_symbol_name == "*" {
|
||||
Some(&all_state_indices)
|
||||
|
|
@ -441,14 +439,14 @@ fn report_state_info<'a>(
|
|||
};
|
||||
|
||||
if let Some(state_indices) = state_indices {
|
||||
let mut state_indices = state_indices.into_iter().cloned().collect::<Vec<_>>();
|
||||
let mut state_indices = state_indices.iter().copied().collect::<Vec<_>>();
|
||||
state_indices.sort_unstable_by_key(|i| (parse_table.states[*i].core_id, *i));
|
||||
|
||||
for state_index in state_indices {
|
||||
let id = parse_table.states[state_index].id;
|
||||
let (preceding_symbols, item_set) = &parse_state_info[id];
|
||||
eprintln!("state index: {}", state_index);
|
||||
eprintln!("state id: {}", id);
|
||||
eprintln!("state index: {state_index}");
|
||||
eprintln!("state id: {id}");
|
||||
eprint!("symbol sequence:");
|
||||
for symbol in preceding_symbols {
|
||||
let name = if symbol.is_terminal() {
|
||||
|
|
@ -458,11 +456,11 @@ fn report_state_info<'a>(
|
|||
} else {
|
||||
&syntax_grammar.variables[symbol.index].name
|
||||
};
|
||||
eprint!(" {}", name);
|
||||
eprint!(" {name}");
|
||||
}
|
||||
eprintln!(
|
||||
"\nitems:\n{}",
|
||||
self::item::ParseItemSetDisplay(&item_set, syntax_grammar, lexical_grammar,),
|
||||
self::item::ParseItemSetDisplay(item_set, syntax_grammar, lexical_grammar,),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ struct TokenConflictStatus {
|
|||
matches_different_string: bool,
|
||||
}
|
||||
|
||||
pub(crate) struct TokenConflictMap<'a> {
|
||||
pub struct TokenConflictMap<'a> {
|
||||
n: usize,
|
||||
status_matrix: Vec<TokenConflictStatus>,
|
||||
following_tokens: Vec<TokenSet>,
|
||||
|
|
@ -104,19 +104,17 @@ impl<'a> TokenConflictMap<'a> {
|
|||
}
|
||||
|
||||
pub fn prefer_token(grammar: &LexicalGrammar, left: (i32, usize), right: (i32, usize)) -> bool {
|
||||
if left.0 > right.0 {
|
||||
return true;
|
||||
} else if left.0 < right.0 {
|
||||
return false;
|
||||
}
|
||||
|
||||
match grammar.variables[left.1]
|
||||
.implicit_precedence
|
||||
.cmp(&grammar.variables[right.1].implicit_precedence)
|
||||
{
|
||||
match left.0.cmp(&right.0) {
|
||||
Ordering::Less => false,
|
||||
Ordering::Greater => true,
|
||||
Ordering::Equal => left.1 < right.1,
|
||||
Ordering::Equal => match grammar.variables[left.1]
|
||||
.implicit_precedence
|
||||
.cmp(&grammar.variables[right.1].implicit_precedence)
|
||||
{
|
||||
Ordering::Less => false,
|
||||
Ordering::Greater => true,
|
||||
Ordering::Equal => left.1 < right.1,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -135,10 +133,9 @@ impl<'a> TokenConflictMap<'a> {
|
|||
return false;
|
||||
}
|
||||
if has_separator_transitions
|
||||
&& grammar
|
||||
&& !grammar
|
||||
.variable_indices_for_nfa_states(&t.states)
|
||||
.position(|i| i == completed_id)
|
||||
.is_none()
|
||||
.any(|i| i == completed_id)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
|
@ -149,53 +146,53 @@ impl<'a> TokenConflictMap<'a> {
|
|||
|
||||
impl<'a> fmt::Debug for TokenConflictMap<'a> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "TokenConflictMap {{\n")?;
|
||||
writeln!(f, "TokenConflictMap {{")?;
|
||||
|
||||
let syntax_grammar = SyntaxGrammar::default();
|
||||
|
||||
write!(f, " following_tokens: {{\n")?;
|
||||
writeln!(f, " following_tokens: {{")?;
|
||||
for (i, following_tokens) in self.following_tokens.iter().enumerate() {
|
||||
write!(
|
||||
writeln!(
|
||||
f,
|
||||
" follow({:?}): {},\n",
|
||||
" follow({:?}): {},",
|
||||
self.grammar.variables[i].name,
|
||||
TokenSetDisplay(following_tokens, &syntax_grammar, &self.grammar)
|
||||
TokenSetDisplay(following_tokens, &syntax_grammar, self.grammar)
|
||||
)?;
|
||||
}
|
||||
write!(f, " }},\n")?;
|
||||
writeln!(f, " }},")?;
|
||||
|
||||
write!(f, " starting_characters: {{\n")?;
|
||||
writeln!(f, " starting_characters: {{")?;
|
||||
for i in 0..self.n {
|
||||
write!(
|
||||
writeln!(
|
||||
f,
|
||||
" {:?}: {:?},\n",
|
||||
" {:?}: {:?},",
|
||||
self.grammar.variables[i].name, self.starting_chars_by_index[i]
|
||||
)?;
|
||||
}
|
||||
write!(f, " }},\n")?;
|
||||
writeln!(f, " }},")?;
|
||||
|
||||
write!(f, " following_characters: {{\n")?;
|
||||
writeln!(f, " following_characters: {{")?;
|
||||
for i in 0..self.n {
|
||||
write!(
|
||||
writeln!(
|
||||
f,
|
||||
" {:?}: {:?},\n",
|
||||
" {:?}: {:?},",
|
||||
self.grammar.variables[i].name, self.following_chars_by_index[i]
|
||||
)?;
|
||||
}
|
||||
write!(f, " }},\n")?;
|
||||
writeln!(f, " }},")?;
|
||||
|
||||
write!(f, " status_matrix: {{\n")?;
|
||||
writeln!(f, " status_matrix: {{")?;
|
||||
for i in 0..self.n {
|
||||
write!(f, " {:?}: {{\n", self.grammar.variables[i].name)?;
|
||||
writeln!(f, " {:?}: {{", self.grammar.variables[i].name)?;
|
||||
for j in 0..self.n {
|
||||
write!(
|
||||
writeln!(
|
||||
f,
|
||||
" {:?}: {:?},\n",
|
||||
" {:?}: {:?},",
|
||||
self.grammar.variables[j].name,
|
||||
self.status_matrix[matrix_index(self.n, i, j)]
|
||||
)?;
|
||||
}
|
||||
write!(f, " }},\n")?;
|
||||
writeln!(f, " }},")?;
|
||||
}
|
||||
write!(f, " }},")?;
|
||||
write!(f, "}}")?;
|
||||
|
|
@ -203,7 +200,7 @@ impl<'a> fmt::Debug for TokenConflictMap<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
fn matrix_index(variable_count: usize, i: usize, j: usize) -> usize {
|
||||
const fn matrix_index(variable_count: usize, i: usize, j: usize) -> usize {
|
||||
variable_count * i + j
|
||||
}
|
||||
|
||||
|
|
@ -221,8 +218,8 @@ fn get_starting_chars(cursor: &mut NfaCursor, grammar: &LexicalGrammar) -> Vec<C
|
|||
}
|
||||
|
||||
fn get_following_chars(
|
||||
starting_chars: &Vec<CharacterSet>,
|
||||
following_tokens: &Vec<TokenSet>,
|
||||
starting_chars: &[CharacterSet],
|
||||
following_tokens: &[TokenSet],
|
||||
) -> Vec<CharacterSet> {
|
||||
following_tokens
|
||||
.iter()
|
||||
|
|
@ -241,7 +238,7 @@ fn get_following_chars(
|
|||
fn compute_conflict_status(
|
||||
cursor: &mut NfaCursor,
|
||||
grammar: &LexicalGrammar,
|
||||
following_chars: &Vec<CharacterSet>,
|
||||
following_chars: &[CharacterSet],
|
||||
i: usize,
|
||||
j: usize,
|
||||
) -> (TokenConflictStatus, TokenConflictStatus) {
|
||||
|
|
@ -330,9 +327,8 @@ fn compute_conflict_status(
|
|||
if variable_id == completed_id {
|
||||
successor_contains_completed_id = true;
|
||||
break;
|
||||
} else {
|
||||
advanced_id = Some(variable_id);
|
||||
}
|
||||
advanced_id = Some(variable_id);
|
||||
}
|
||||
|
||||
// Determine which action is preferred: matching the already complete
|
||||
|
|
@ -357,12 +353,10 @@ fn compute_conflict_status(
|
|||
result.1.does_match_valid_continuation = true;
|
||||
}
|
||||
}
|
||||
} else if completed_id == i {
|
||||
result.0.matches_prefix = true;
|
||||
} else {
|
||||
if completed_id == i {
|
||||
result.0.matches_prefix = true;
|
||||
} else {
|
||||
result.1.matches_prefix = true;
|
||||
}
|
||||
result.1.matches_prefix = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue