use super::grammars::VariableType; use smallbitvec::SmallBitVec; use std::iter::FromIterator; use std::{collections::HashMap, fmt}; #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] pub(crate) enum SymbolType { External, End, EndOfNonTerminalExtra, Terminal, NonTerminal, } #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] pub(crate) enum Associativity { Left, Right, } #[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] pub(crate) struct Alias { pub value: String, pub is_named: bool, } #[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] pub enum Precedence { None, Integer(i32), Name(String), } pub(crate) type AliasMap = HashMap; #[derive(Clone, Debug, Default, PartialEq, Eq, Hash)] pub(crate) struct MetadataParams { pub precedence: Precedence, pub dynamic_precedence: i32, pub associativity: Option, pub is_token: bool, pub is_string: bool, pub is_active: bool, pub is_main_token: bool, pub alias: Option, pub field_name: Option, } #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] pub(crate) struct Symbol { pub kind: SymbolType, pub index: usize, } #[derive(Clone, Debug, PartialEq, Eq, Hash)] pub(crate) enum Rule { Blank, String(String), Pattern(String), NamedSymbol(String), Symbol(Symbol), Choice(Vec), Metadata { params: MetadataParams, rule: Box, }, Repeat(Box), Seq(Vec), } // Because tokens are represented as small (~400 max) unsigned integers, // sets of tokens can be efficiently represented as bit vectors with each // index correspoding to a token, and each value representing whether or not // the token is present in the set. #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub(crate) struct TokenSet { terminal_bits: SmallBitVec, external_bits: SmallBitVec, eof: bool, end_of_nonterminal_extra: bool, } impl Rule { pub fn field(name: String, content: Rule) -> Self { add_metadata(content, move |params| { params.field_name = Some(name); }) } pub fn alias(content: Rule, value: String, is_named: bool) -> Self { add_metadata(content, move |params| { params.alias = Some(Alias { is_named, value }); }) } pub fn token(content: Rule) -> Self { add_metadata(content, |params| { params.is_token = true; }) } pub fn immediate_token(content: Rule) -> Self { add_metadata(content, |params| { params.is_token = true; params.is_main_token = true; }) } pub fn prec(value: Precedence, content: Rule) -> Self { add_metadata(content, |params| { params.precedence = value; }) } pub fn prec_left(value: Precedence, content: Rule) -> Self { add_metadata(content, |params| { params.associativity = Some(Associativity::Left); params.precedence = value; }) } pub fn prec_right(value: Precedence, content: Rule) -> Self { add_metadata(content, |params| { params.associativity = Some(Associativity::Right); params.precedence = value; }) } pub fn prec_dynamic(value: i32, content: Rule) -> Self { add_metadata(content, |params| { params.dynamic_precedence = value; }) } pub fn repeat(rule: Rule) -> Self { Rule::Repeat(Box::new(rule)) } pub fn choice(rules: Vec) -> Self { let mut elements = Vec::with_capacity(rules.len()); for rule in rules { choice_helper(&mut elements, rule); } Rule::Choice(elements) } pub fn seq(rules: Vec) -> Self { Rule::Seq(rules) } } impl Alias { pub fn kind(&self) -> VariableType { if self.is_named { VariableType::Named } else { VariableType::Anonymous } } } impl Precedence { pub fn is_none(&self) -> bool { matches!(self, Precedence::None) } } #[cfg(test)] impl Rule { pub fn terminal(index: usize) -> Self { Rule::Symbol(Symbol::terminal(index)) } pub fn non_terminal(index: usize) -> Self { Rule::Symbol(Symbol::non_terminal(index)) } pub fn external(index: usize) -> Self { Rule::Symbol(Symbol::external(index)) } pub fn named(name: &'static str) -> Self { Rule::NamedSymbol(name.to_string()) } pub fn string(value: &'static str) -> Self { Rule::String(value.to_string()) } pub fn pattern(value: &'static str) -> Self { Rule::Pattern(value.to_string()) } } impl Symbol { pub fn is_terminal(&self) -> bool { self.kind == SymbolType::Terminal } pub fn is_non_terminal(&self) -> bool { self.kind == SymbolType::NonTerminal } pub fn is_external(&self) -> bool { self.kind == SymbolType::External } pub fn is_eof(&self) -> bool { self.kind == SymbolType::End } pub fn non_terminal(index: usize) -> Self { Symbol { kind: SymbolType::NonTerminal, index, } } pub fn terminal(index: usize) -> Self { Symbol { kind: SymbolType::Terminal, index, } } pub fn external(index: usize) -> Self { Symbol { kind: SymbolType::External, index, } } pub fn end() -> Self { Symbol { kind: SymbolType::End, index: 0, } } pub fn end_of_nonterminal_extra() -> Self { Symbol { kind: SymbolType::EndOfNonTerminalExtra, index: 0, } } } impl From for Rule { fn from(symbol: Symbol) -> Self { Rule::Symbol(symbol) } } impl TokenSet { pub fn new() -> Self { Self { terminal_bits: SmallBitVec::new(), external_bits: SmallBitVec::new(), eof: false, end_of_nonterminal_extra: false, } } pub fn iter<'a>(&'a self) -> impl Iterator + 'a { self.terminal_bits .iter() .enumerate() .filter_map(|(i, value)| { if value { Some(Symbol::terminal(i)) } else { None } }) .chain( self.external_bits .iter() .enumerate() .filter_map(|(i, value)| { if value { Some(Symbol::external(i)) } else { None } }), ) .chain(if self.eof { Some(Symbol::end()) } else { None }) .chain(if self.end_of_nonterminal_extra { Some(Symbol::end_of_nonterminal_extra()) } else { None }) } pub fn terminals<'a>(&'a self) -> impl Iterator + 'a { self.terminal_bits .iter() .enumerate() .filter_map(|(i, value)| { if value { Some(Symbol::terminal(i)) } else { None } }) } pub fn contains(&self, symbol: &Symbol) -> bool { match symbol.kind { SymbolType::NonTerminal => panic!("Cannot store non-terminals in a TokenSet"), SymbolType::Terminal => self.terminal_bits.get(symbol.index).unwrap_or(false), SymbolType::External => self.external_bits.get(symbol.index).unwrap_or(false), SymbolType::End => self.eof, SymbolType::EndOfNonTerminalExtra => self.end_of_nonterminal_extra, } } pub fn contains_terminal(&self, index: usize) -> bool { self.terminal_bits.get(index).unwrap_or(false) } pub fn insert(&mut self, other: Symbol) { let vec = match other.kind { SymbolType::NonTerminal => panic!("Cannot store non-terminals in a TokenSet"), SymbolType::Terminal => &mut self.terminal_bits, SymbolType::External => &mut self.external_bits, SymbolType::End => { self.eof = true; return; } SymbolType::EndOfNonTerminalExtra => { self.end_of_nonterminal_extra = true; return; } }; if other.index >= vec.len() { vec.resize(other.index + 1, false); } vec.set(other.index, true); } pub fn remove(&mut self, other: &Symbol) { let vec = match other.kind { SymbolType::NonTerminal => panic!("Cannot store non-terminals in a TokenSet"), SymbolType::Terminal => &mut self.terminal_bits, SymbolType::External => &mut self.external_bits, SymbolType::End => { self.eof = false; return; } SymbolType::EndOfNonTerminalExtra => { self.end_of_nonterminal_extra = false; return; } }; if other.index < vec.len() { vec.set(other.index, false); } } pub fn is_empty(&self) -> bool { !self.eof && !self.end_of_nonterminal_extra && !self.terminal_bits.iter().any(|a| a) && !self.external_bits.iter().any(|a| a) } pub fn insert_all_terminals(&mut self, other: &TokenSet) -> bool { let mut result = false; if other.terminal_bits.len() > self.terminal_bits.len() { self.terminal_bits.resize(other.terminal_bits.len(), false); } for (i, element) in other.terminal_bits.iter().enumerate() { if element { result |= !self.terminal_bits[i]; self.terminal_bits.set(i, element); } } result } fn insert_all_externals(&mut self, other: &TokenSet) -> bool { let mut result = false; if other.external_bits.len() > self.external_bits.len() { self.external_bits.resize(other.external_bits.len(), false); } for (i, element) in other.external_bits.iter().enumerate() { if element { result |= !self.external_bits[i]; self.external_bits.set(i, element); } } result } pub fn insert_all(&mut self, other: &TokenSet) -> bool { let mut result = false; if other.eof { result |= !self.eof; self.eof = true; } if other.end_of_nonterminal_extra { result |= !self.end_of_nonterminal_extra; self.end_of_nonterminal_extra = true; } result |= self.insert_all_terminals(other); result |= self.insert_all_externals(other); result } } impl FromIterator for TokenSet { fn from_iter>(iter: T) -> Self { let mut result = Self::new(); for symbol in iter { result.insert(symbol); } result } } fn add_metadata(input: Rule, f: T) -> Rule { match input { Rule::Metadata { rule, mut params } if !params.is_token => { f(&mut params); Rule::Metadata { rule, params } } _ => { let mut params = MetadataParams::default(); f(&mut params); Rule::Metadata { rule: Box::new(input), params, } } } } fn choice_helper(result: &mut Vec, rule: Rule) { match rule { Rule::Choice(elements) => { for element in elements { choice_helper(result, element); } } _ => { if !result.contains(&rule) { result.push(rule); } } } } impl fmt::Display for Precedence { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Precedence::Integer(i) => write!(f, "{}", i), Precedence::Name(s) => write!(f, "'{}'", s), Precedence::None => write!(f, "none"), } } } impl Default for Precedence { fn default() -> Self { Precedence::None } }