Move TokenSet to rules module
This commit is contained in:
parent
c5fc9d7dcb
commit
f371507d39
8 changed files with 169 additions and 168 deletions
|
|
@ -1,10 +1,9 @@
|
|||
use super::coincident_tokens::CoincidentTokenIndex;
|
||||
use super::item::TokenSet;
|
||||
use super::token_conflicts::TokenConflictMap;
|
||||
use crate::generate::dedup::split_state_id_groups;
|
||||
use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar};
|
||||
use crate::generate::nfa::{CharacterSet, NfaCursor};
|
||||
use crate::generate::rules::Symbol;
|
||||
use crate::generate::rules::{Symbol, TokenSet};
|
||||
use crate::generate::tables::{AdvanceAction, LexState, LexTable, ParseStateId, ParseTable};
|
||||
use log::info;
|
||||
use std::collections::hash_map::Entry;
|
||||
|
|
|
|||
|
|
@ -1,11 +1,11 @@
|
|||
use super::item::{ParseItem, ParseItemSet, ParseItemSetCore, TokenSet};
|
||||
use super::item::{ParseItem, ParseItemSet, ParseItemSetCore};
|
||||
use super::item_set_builder::ParseItemSetBuilder;
|
||||
use crate::error::{Error, Result};
|
||||
use crate::generate::grammars::{
|
||||
InlinedProductionMap, LexicalGrammar, SyntaxGrammar, VariableType,
|
||||
};
|
||||
use crate::generate::node_types::VariableInfo;
|
||||
use crate::generate::rules::{Associativity, Symbol, SymbolType};
|
||||
use crate::generate::rules::{Associativity, Symbol, SymbolType, TokenSet};
|
||||
use crate::generate::tables::{
|
||||
FieldLocation, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry,
|
||||
ProductionInfo, ProductionInfoId,
|
||||
|
|
|
|||
|
|
@ -1,12 +1,9 @@
|
|||
use crate::generate::grammars::{LexicalGrammar, Production, ProductionStep, SyntaxGrammar};
|
||||
use crate::generate::rules::Associativity;
|
||||
use crate::generate::rules::{Symbol, SymbolType};
|
||||
use crate::generate::rules::{Associativity, Symbol, SymbolType, TokenSet};
|
||||
use lazy_static::lazy_static;
|
||||
use smallbitvec::SmallBitVec;
|
||||
use std::cmp::Ordering;
|
||||
use std::fmt;
|
||||
use std::hash::{Hash, Hasher};
|
||||
use std::iter::FromIterator;
|
||||
use std::u32;
|
||||
|
||||
lazy_static! {
|
||||
|
|
@ -25,17 +22,6 @@ lazy_static! {
|
|||
};
|
||||
}
|
||||
|
||||
// Because tokens are represented as small (~400 max) unsigned integers,
|
||||
// sets of tokens can be efficiently represented as bit vectors with each
|
||||
// index correspoding to a token, and each value representing whether or not
|
||||
// the token is present in the set.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub(crate) struct TokenSet {
|
||||
terminal_bits: SmallBitVec,
|
||||
external_bits: SmallBitVec,
|
||||
eof: bool,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub(crate) struct ParseItem<'a> {
|
||||
pub variable_index: u32,
|
||||
|
|
@ -72,148 +58,6 @@ pub(crate) struct ParseItemSetDisplay<'a>(
|
|||
pub &'a LexicalGrammar,
|
||||
);
|
||||
|
||||
impl TokenSet {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
terminal_bits: SmallBitVec::new(),
|
||||
external_bits: SmallBitVec::new(),
|
||||
eof: false,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn iter<'a>(&'a self) -> impl Iterator<Item = Symbol> + 'a {
|
||||
self.terminal_bits
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter_map(|(i, value)| {
|
||||
if value {
|
||||
Some(Symbol::terminal(i))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.chain(
|
||||
self.external_bits
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter_map(|(i, value)| {
|
||||
if value {
|
||||
Some(Symbol::external(i))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}),
|
||||
)
|
||||
.chain(if self.eof { Some(Symbol::end()) } else { None })
|
||||
}
|
||||
|
||||
pub fn terminals<'a>(&'a self) -> impl Iterator<Item = Symbol> + 'a {
|
||||
self.terminal_bits
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter_map(|(i, value)| {
|
||||
if value {
|
||||
Some(Symbol::terminal(i))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
pub fn contains(&self, symbol: &Symbol) -> bool {
|
||||
match symbol.kind {
|
||||
SymbolType::NonTerminal => panic!("Cannot store non-terminals in a TokenSet"),
|
||||
SymbolType::Terminal => self.terminal_bits.get(symbol.index).unwrap_or(false),
|
||||
SymbolType::External => self.external_bits.get(symbol.index).unwrap_or(false),
|
||||
SymbolType::End => self.eof,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn contains_terminal(&self, index: usize) -> bool {
|
||||
self.terminal_bits.get(index).unwrap_or(false)
|
||||
}
|
||||
|
||||
pub fn insert(&mut self, other: Symbol) {
|
||||
let vec = match other.kind {
|
||||
SymbolType::NonTerminal => panic!("Cannot store non-terminals in a TokenSet"),
|
||||
SymbolType::Terminal => &mut self.terminal_bits,
|
||||
SymbolType::External => &mut self.external_bits,
|
||||
SymbolType::End => {
|
||||
self.eof = true;
|
||||
return;
|
||||
}
|
||||
};
|
||||
if other.index >= vec.len() {
|
||||
vec.resize(other.index + 1, false);
|
||||
}
|
||||
vec.set(other.index, true);
|
||||
}
|
||||
|
||||
pub fn remove(&mut self, other: &Symbol) {
|
||||
let vec = match other.kind {
|
||||
SymbolType::NonTerminal => panic!("Cannot store non-terminals in a TokenSet"),
|
||||
SymbolType::Terminal => &mut self.terminal_bits,
|
||||
SymbolType::External => &mut self.external_bits,
|
||||
SymbolType::End => {
|
||||
self.eof = false;
|
||||
return;
|
||||
}
|
||||
};
|
||||
if other.index < vec.len() {
|
||||
vec.set(other.index, false);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn insert_all_terminals(&mut self, other: &TokenSet) -> bool {
|
||||
let mut result = false;
|
||||
if other.terminal_bits.len() > self.terminal_bits.len() {
|
||||
self.terminal_bits.resize(other.terminal_bits.len(), false);
|
||||
}
|
||||
for (i, element) in other.terminal_bits.iter().enumerate() {
|
||||
if element {
|
||||
result |= !self.terminal_bits[i];
|
||||
self.terminal_bits.set(i, element);
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
fn insert_all_externals(&mut self, other: &TokenSet) -> bool {
|
||||
let mut result = false;
|
||||
if other.external_bits.len() > self.external_bits.len() {
|
||||
self.external_bits.resize(other.external_bits.len(), false);
|
||||
}
|
||||
for (i, element) in other.external_bits.iter().enumerate() {
|
||||
if element {
|
||||
result |= !self.external_bits[i];
|
||||
self.external_bits.set(i, element);
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
pub fn insert_all(&mut self, other: &TokenSet) -> bool {
|
||||
let mut result = false;
|
||||
if other.eof {
|
||||
result |= !self.eof;
|
||||
self.eof = true;
|
||||
}
|
||||
result |= self.insert_all_terminals(other);
|
||||
result |= self.insert_all_externals(other);
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
impl FromIterator<Symbol> for TokenSet {
|
||||
fn from_iter<T: IntoIterator<Item = Symbol>>(iter: T) -> Self {
|
||||
let mut result = Self::new();
|
||||
for symbol in iter {
|
||||
result.insert(symbol);
|
||||
}
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> ParseItem<'a> {
|
||||
pub fn start() -> Self {
|
||||
ParseItem {
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
use super::item::{ParseItem, ParseItemDisplay, ParseItemSet, TokenSet, TokenSetDisplay};
|
||||
use super::item::{ParseItem, ParseItemDisplay, ParseItemSet, TokenSetDisplay};
|
||||
use crate::generate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar};
|
||||
use crate::generate::rules::{Symbol, SymbolType};
|
||||
use crate::generate::rules::{Symbol, SymbolType, TokenSet};
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::fmt;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,8 +1,7 @@
|
|||
use super::item::TokenSet;
|
||||
use super::token_conflicts::TokenConflictMap;
|
||||
use crate::generate::dedup::split_state_id_groups;
|
||||
use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar, VariableType};
|
||||
use crate::generate::rules::{AliasMap, Symbol};
|
||||
use crate::generate::rules::{AliasMap, Symbol, TokenSet};
|
||||
use crate::generate::tables::{ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry};
|
||||
use log::info;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
|
|
|
|||
|
|
@ -9,14 +9,13 @@ mod token_conflicts;
|
|||
use self::build_lex_table::build_lex_table;
|
||||
use self::build_parse_table::build_parse_table;
|
||||
use self::coincident_tokens::CoincidentTokenIndex;
|
||||
use self::item::TokenSet;
|
||||
use self::minimize_parse_table::minimize_parse_table;
|
||||
use self::token_conflicts::TokenConflictMap;
|
||||
use crate::error::Result;
|
||||
use crate::generate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar};
|
||||
use crate::generate::nfa::{CharacterSet, NfaCursor};
|
||||
use crate::generate::node_types::VariableInfo;
|
||||
use crate::generate::rules::{AliasMap, Symbol, SymbolType};
|
||||
use crate::generate::rules::{AliasMap, Symbol, SymbolType, TokenSet};
|
||||
use crate::generate::tables::{LexTable, ParseAction, ParseTable, ParseTableEntry};
|
||||
use log::info;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
use crate::generate::build_tables::item::{TokenSet, TokenSetDisplay};
|
||||
use crate::generate::build_tables::item::{TokenSetDisplay};
|
||||
use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar};
|
||||
use crate::generate::nfa::{CharacterSet, NfaCursor, NfaTransition};
|
||||
use crate::generate::rules::TokenSet;
|
||||
use std::collections::HashSet;
|
||||
use std::cmp::Ordering;
|
||||
use std::fmt;
|
||||
|
|
|
|||
|
|
@ -1,4 +1,6 @@
|
|||
use smallbitvec::SmallBitVec;
|
||||
use std::collections::HashMap;
|
||||
use std::iter::FromIterator;
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
|
||||
pub(crate) enum SymbolType {
|
||||
|
|
@ -57,6 +59,17 @@ pub(crate) enum Rule {
|
|||
Seq(Vec<Rule>),
|
||||
}
|
||||
|
||||
// Because tokens are represented as small (~400 max) unsigned integers,
|
||||
// sets of tokens can be efficiently represented as bit vectors with each
|
||||
// index correspoding to a token, and each value representing whether or not
|
||||
// the token is present in the set.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub(crate) struct TokenSet {
|
||||
terminal_bits: SmallBitVec,
|
||||
external_bits: SmallBitVec,
|
||||
eof: bool,
|
||||
}
|
||||
|
||||
impl Rule {
|
||||
pub fn field(name: String, content: Rule) -> Self {
|
||||
add_metadata(content, move |params| {
|
||||
|
|
@ -205,6 +218,152 @@ impl From<Symbol> for Rule {
|
|||
}
|
||||
}
|
||||
|
||||
impl TokenSet {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
terminal_bits: SmallBitVec::new(),
|
||||
external_bits: SmallBitVec::new(),
|
||||
eof: false,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn iter<'a>(&'a self) -> impl Iterator<Item = Symbol> + 'a {
|
||||
self.terminal_bits
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter_map(|(i, value)| {
|
||||
if value {
|
||||
Some(Symbol::terminal(i))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.chain(
|
||||
self.external_bits
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter_map(|(i, value)| {
|
||||
if value {
|
||||
Some(Symbol::external(i))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}),
|
||||
)
|
||||
.chain(if self.eof { Some(Symbol::end()) } else { None })
|
||||
}
|
||||
|
||||
pub fn terminals<'a>(&'a self) -> impl Iterator<Item = Symbol> + 'a {
|
||||
self.terminal_bits
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter_map(|(i, value)| {
|
||||
if value {
|
||||
Some(Symbol::terminal(i))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
pub fn contains(&self, symbol: &Symbol) -> bool {
|
||||
match symbol.kind {
|
||||
SymbolType::NonTerminal => panic!("Cannot store non-terminals in a TokenSet"),
|
||||
SymbolType::Terminal => self.terminal_bits.get(symbol.index).unwrap_or(false),
|
||||
SymbolType::External => self.external_bits.get(symbol.index).unwrap_or(false),
|
||||
SymbolType::End => self.eof,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn contains_terminal(&self, index: usize) -> bool {
|
||||
self.terminal_bits.get(index).unwrap_or(false)
|
||||
}
|
||||
|
||||
pub fn insert(&mut self, other: Symbol) {
|
||||
let vec = match other.kind {
|
||||
SymbolType::NonTerminal => panic!("Cannot store non-terminals in a TokenSet"),
|
||||
SymbolType::Terminal => &mut self.terminal_bits,
|
||||
SymbolType::External => &mut self.external_bits,
|
||||
SymbolType::End => {
|
||||
self.eof = true;
|
||||
return;
|
||||
}
|
||||
};
|
||||
if other.index >= vec.len() {
|
||||
vec.resize(other.index + 1, false);
|
||||
}
|
||||
vec.set(other.index, true);
|
||||
}
|
||||
|
||||
pub fn remove(&mut self, other: &Symbol) {
|
||||
let vec = match other.kind {
|
||||
SymbolType::NonTerminal => panic!("Cannot store non-terminals in a TokenSet"),
|
||||
SymbolType::Terminal => &mut self.terminal_bits,
|
||||
SymbolType::External => &mut self.external_bits,
|
||||
SymbolType::End => {
|
||||
self.eof = false;
|
||||
return;
|
||||
}
|
||||
};
|
||||
if other.index < vec.len() {
|
||||
vec.set(other.index, false);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
!self.eof && !self.terminal_bits.iter().any(|a| a) && !self.external_bits.iter().any(|a| a)
|
||||
}
|
||||
|
||||
pub fn insert_all_terminals(&mut self, other: &TokenSet) -> bool {
|
||||
let mut result = false;
|
||||
if other.terminal_bits.len() > self.terminal_bits.len() {
|
||||
self.terminal_bits.resize(other.terminal_bits.len(), false);
|
||||
}
|
||||
for (i, element) in other.terminal_bits.iter().enumerate() {
|
||||
if element {
|
||||
result |= !self.terminal_bits[i];
|
||||
self.terminal_bits.set(i, element);
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
fn insert_all_externals(&mut self, other: &TokenSet) -> bool {
|
||||
let mut result = false;
|
||||
if other.external_bits.len() > self.external_bits.len() {
|
||||
self.external_bits.resize(other.external_bits.len(), false);
|
||||
}
|
||||
for (i, element) in other.external_bits.iter().enumerate() {
|
||||
if element {
|
||||
result |= !self.external_bits[i];
|
||||
self.external_bits.set(i, element);
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
pub fn insert_all(&mut self, other: &TokenSet) -> bool {
|
||||
let mut result = false;
|
||||
if other.eof {
|
||||
result |= !self.eof;
|
||||
self.eof = true;
|
||||
}
|
||||
result |= self.insert_all_terminals(other);
|
||||
result |= self.insert_all_externals(other);
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
impl FromIterator<Symbol> for TokenSet {
|
||||
fn from_iter<T: IntoIterator<Item = Symbol>>(iter: T) -> Self {
|
||||
let mut result = Self::new();
|
||||
for symbol in iter {
|
||||
result.insert(symbol);
|
||||
}
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
fn add_metadata<T: FnOnce(&mut MetadataParams)>(input: Rule, f: T) -> Rule {
|
||||
match input {
|
||||
Rule::Metadata { rule, mut params } => {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue