Move TokenSet to rules module

This commit is contained in:
Max Brunsfeld 2019-08-29 15:25:45 -07:00
parent c5fc9d7dcb
commit f371507d39
8 changed files with 169 additions and 168 deletions

View file

@ -1,10 +1,9 @@
use super::coincident_tokens::CoincidentTokenIndex;
use super::item::TokenSet;
use super::token_conflicts::TokenConflictMap;
use crate::generate::dedup::split_state_id_groups;
use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar};
use crate::generate::nfa::{CharacterSet, NfaCursor};
use crate::generate::rules::Symbol;
use crate::generate::rules::{Symbol, TokenSet};
use crate::generate::tables::{AdvanceAction, LexState, LexTable, ParseStateId, ParseTable};
use log::info;
use std::collections::hash_map::Entry;

View file

@ -1,11 +1,11 @@
use super::item::{ParseItem, ParseItemSet, ParseItemSetCore, TokenSet};
use super::item::{ParseItem, ParseItemSet, ParseItemSetCore};
use super::item_set_builder::ParseItemSetBuilder;
use crate::error::{Error, Result};
use crate::generate::grammars::{
InlinedProductionMap, LexicalGrammar, SyntaxGrammar, VariableType,
};
use crate::generate::node_types::VariableInfo;
use crate::generate::rules::{Associativity, Symbol, SymbolType};
use crate::generate::rules::{Associativity, Symbol, SymbolType, TokenSet};
use crate::generate::tables::{
FieldLocation, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry,
ProductionInfo, ProductionInfoId,

View file

@ -1,12 +1,9 @@
use crate::generate::grammars::{LexicalGrammar, Production, ProductionStep, SyntaxGrammar};
use crate::generate::rules::Associativity;
use crate::generate::rules::{Symbol, SymbolType};
use crate::generate::rules::{Associativity, Symbol, SymbolType, TokenSet};
use lazy_static::lazy_static;
use smallbitvec::SmallBitVec;
use std::cmp::Ordering;
use std::fmt;
use std::hash::{Hash, Hasher};
use std::iter::FromIterator;
use std::u32;
lazy_static! {
@ -25,17 +22,6 @@ lazy_static! {
};
}
// Because tokens are represented as small (~400 max) unsigned integers,
// sets of tokens can be efficiently represented as bit vectors with each
// index correspoding to a token, and each value representing whether or not
// the token is present in the set.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub(crate) struct TokenSet {
terminal_bits: SmallBitVec,
external_bits: SmallBitVec,
eof: bool,
}
#[derive(Clone, Copy, Debug)]
pub(crate) struct ParseItem<'a> {
pub variable_index: u32,
@ -72,148 +58,6 @@ pub(crate) struct ParseItemSetDisplay<'a>(
pub &'a LexicalGrammar,
);
impl TokenSet {
pub fn new() -> Self {
Self {
terminal_bits: SmallBitVec::new(),
external_bits: SmallBitVec::new(),
eof: false,
}
}
pub fn iter<'a>(&'a self) -> impl Iterator<Item = Symbol> + 'a {
self.terminal_bits
.iter()
.enumerate()
.filter_map(|(i, value)| {
if value {
Some(Symbol::terminal(i))
} else {
None
}
})
.chain(
self.external_bits
.iter()
.enumerate()
.filter_map(|(i, value)| {
if value {
Some(Symbol::external(i))
} else {
None
}
}),
)
.chain(if self.eof { Some(Symbol::end()) } else { None })
}
pub fn terminals<'a>(&'a self) -> impl Iterator<Item = Symbol> + 'a {
self.terminal_bits
.iter()
.enumerate()
.filter_map(|(i, value)| {
if value {
Some(Symbol::terminal(i))
} else {
None
}
})
}
pub fn contains(&self, symbol: &Symbol) -> bool {
match symbol.kind {
SymbolType::NonTerminal => panic!("Cannot store non-terminals in a TokenSet"),
SymbolType::Terminal => self.terminal_bits.get(symbol.index).unwrap_or(false),
SymbolType::External => self.external_bits.get(symbol.index).unwrap_or(false),
SymbolType::End => self.eof,
}
}
pub fn contains_terminal(&self, index: usize) -> bool {
self.terminal_bits.get(index).unwrap_or(false)
}
pub fn insert(&mut self, other: Symbol) {
let vec = match other.kind {
SymbolType::NonTerminal => panic!("Cannot store non-terminals in a TokenSet"),
SymbolType::Terminal => &mut self.terminal_bits,
SymbolType::External => &mut self.external_bits,
SymbolType::End => {
self.eof = true;
return;
}
};
if other.index >= vec.len() {
vec.resize(other.index + 1, false);
}
vec.set(other.index, true);
}
pub fn remove(&mut self, other: &Symbol) {
let vec = match other.kind {
SymbolType::NonTerminal => panic!("Cannot store non-terminals in a TokenSet"),
SymbolType::Terminal => &mut self.terminal_bits,
SymbolType::External => &mut self.external_bits,
SymbolType::End => {
self.eof = false;
return;
}
};
if other.index < vec.len() {
vec.set(other.index, false);
}
}
pub fn insert_all_terminals(&mut self, other: &TokenSet) -> bool {
let mut result = false;
if other.terminal_bits.len() > self.terminal_bits.len() {
self.terminal_bits.resize(other.terminal_bits.len(), false);
}
for (i, element) in other.terminal_bits.iter().enumerate() {
if element {
result |= !self.terminal_bits[i];
self.terminal_bits.set(i, element);
}
}
result
}
fn insert_all_externals(&mut self, other: &TokenSet) -> bool {
let mut result = false;
if other.external_bits.len() > self.external_bits.len() {
self.external_bits.resize(other.external_bits.len(), false);
}
for (i, element) in other.external_bits.iter().enumerate() {
if element {
result |= !self.external_bits[i];
self.external_bits.set(i, element);
}
}
result
}
pub fn insert_all(&mut self, other: &TokenSet) -> bool {
let mut result = false;
if other.eof {
result |= !self.eof;
self.eof = true;
}
result |= self.insert_all_terminals(other);
result |= self.insert_all_externals(other);
result
}
}
impl FromIterator<Symbol> for TokenSet {
fn from_iter<T: IntoIterator<Item = Symbol>>(iter: T) -> Self {
let mut result = Self::new();
for symbol in iter {
result.insert(symbol);
}
result
}
}
impl<'a> ParseItem<'a> {
pub fn start() -> Self {
ParseItem {

View file

@ -1,6 +1,6 @@
use super::item::{ParseItem, ParseItemDisplay, ParseItemSet, TokenSet, TokenSetDisplay};
use super::item::{ParseItem, ParseItemDisplay, ParseItemSet, TokenSetDisplay};
use crate::generate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar};
use crate::generate::rules::{Symbol, SymbolType};
use crate::generate::rules::{Symbol, SymbolType, TokenSet};
use std::collections::{HashMap, HashSet};
use std::fmt;

View file

@ -1,8 +1,7 @@
use super::item::TokenSet;
use super::token_conflicts::TokenConflictMap;
use crate::generate::dedup::split_state_id_groups;
use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar, VariableType};
use crate::generate::rules::{AliasMap, Symbol};
use crate::generate::rules::{AliasMap, Symbol, TokenSet};
use crate::generate::tables::{ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry};
use log::info;
use std::collections::{HashMap, HashSet};

View file

@ -9,14 +9,13 @@ mod token_conflicts;
use self::build_lex_table::build_lex_table;
use self::build_parse_table::build_parse_table;
use self::coincident_tokens::CoincidentTokenIndex;
use self::item::TokenSet;
use self::minimize_parse_table::minimize_parse_table;
use self::token_conflicts::TokenConflictMap;
use crate::error::Result;
use crate::generate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar};
use crate::generate::nfa::{CharacterSet, NfaCursor};
use crate::generate::node_types::VariableInfo;
use crate::generate::rules::{AliasMap, Symbol, SymbolType};
use crate::generate::rules::{AliasMap, Symbol, SymbolType, TokenSet};
use crate::generate::tables::{LexTable, ParseAction, ParseTable, ParseTableEntry};
use log::info;

View file

@ -1,6 +1,7 @@
use crate::generate::build_tables::item::{TokenSet, TokenSetDisplay};
use crate::generate::build_tables::item::{TokenSetDisplay};
use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar};
use crate::generate::nfa::{CharacterSet, NfaCursor, NfaTransition};
use crate::generate::rules::TokenSet;
use std::collections::HashSet;
use std::cmp::Ordering;
use std::fmt;

View file

@ -1,4 +1,6 @@
use smallbitvec::SmallBitVec;
use std::collections::HashMap;
use std::iter::FromIterator;
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub(crate) enum SymbolType {
@ -57,6 +59,17 @@ pub(crate) enum Rule {
Seq(Vec<Rule>),
}
// Because tokens are represented as small (~400 max) unsigned integers,
// sets of tokens can be efficiently represented as bit vectors with each
// index correspoding to a token, and each value representing whether or not
// the token is present in the set.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub(crate) struct TokenSet {
terminal_bits: SmallBitVec,
external_bits: SmallBitVec,
eof: bool,
}
impl Rule {
pub fn field(name: String, content: Rule) -> Self {
add_metadata(content, move |params| {
@ -205,6 +218,152 @@ impl From<Symbol> for Rule {
}
}
impl TokenSet {
pub fn new() -> Self {
Self {
terminal_bits: SmallBitVec::new(),
external_bits: SmallBitVec::new(),
eof: false,
}
}
pub fn iter<'a>(&'a self) -> impl Iterator<Item = Symbol> + 'a {
self.terminal_bits
.iter()
.enumerate()
.filter_map(|(i, value)| {
if value {
Some(Symbol::terminal(i))
} else {
None
}
})
.chain(
self.external_bits
.iter()
.enumerate()
.filter_map(|(i, value)| {
if value {
Some(Symbol::external(i))
} else {
None
}
}),
)
.chain(if self.eof { Some(Symbol::end()) } else { None })
}
pub fn terminals<'a>(&'a self) -> impl Iterator<Item = Symbol> + 'a {
self.terminal_bits
.iter()
.enumerate()
.filter_map(|(i, value)| {
if value {
Some(Symbol::terminal(i))
} else {
None
}
})
}
pub fn contains(&self, symbol: &Symbol) -> bool {
match symbol.kind {
SymbolType::NonTerminal => panic!("Cannot store non-terminals in a TokenSet"),
SymbolType::Terminal => self.terminal_bits.get(symbol.index).unwrap_or(false),
SymbolType::External => self.external_bits.get(symbol.index).unwrap_or(false),
SymbolType::End => self.eof,
}
}
pub fn contains_terminal(&self, index: usize) -> bool {
self.terminal_bits.get(index).unwrap_or(false)
}
pub fn insert(&mut self, other: Symbol) {
let vec = match other.kind {
SymbolType::NonTerminal => panic!("Cannot store non-terminals in a TokenSet"),
SymbolType::Terminal => &mut self.terminal_bits,
SymbolType::External => &mut self.external_bits,
SymbolType::End => {
self.eof = true;
return;
}
};
if other.index >= vec.len() {
vec.resize(other.index + 1, false);
}
vec.set(other.index, true);
}
pub fn remove(&mut self, other: &Symbol) {
let vec = match other.kind {
SymbolType::NonTerminal => panic!("Cannot store non-terminals in a TokenSet"),
SymbolType::Terminal => &mut self.terminal_bits,
SymbolType::External => &mut self.external_bits,
SymbolType::End => {
self.eof = false;
return;
}
};
if other.index < vec.len() {
vec.set(other.index, false);
}
}
pub fn is_empty(&self) -> bool {
!self.eof && !self.terminal_bits.iter().any(|a| a) && !self.external_bits.iter().any(|a| a)
}
pub fn insert_all_terminals(&mut self, other: &TokenSet) -> bool {
let mut result = false;
if other.terminal_bits.len() > self.terminal_bits.len() {
self.terminal_bits.resize(other.terminal_bits.len(), false);
}
for (i, element) in other.terminal_bits.iter().enumerate() {
if element {
result |= !self.terminal_bits[i];
self.terminal_bits.set(i, element);
}
}
result
}
fn insert_all_externals(&mut self, other: &TokenSet) -> bool {
let mut result = false;
if other.external_bits.len() > self.external_bits.len() {
self.external_bits.resize(other.external_bits.len(), false);
}
for (i, element) in other.external_bits.iter().enumerate() {
if element {
result |= !self.external_bits[i];
self.external_bits.set(i, element);
}
}
result
}
pub fn insert_all(&mut self, other: &TokenSet) -> bool {
let mut result = false;
if other.eof {
result |= !self.eof;
self.eof = true;
}
result |= self.insert_all_terminals(other);
result |= self.insert_all_externals(other);
result
}
}
impl FromIterator<Symbol> for TokenSet {
fn from_iter<T: IntoIterator<Item = Symbol>>(iter: T) -> Self {
let mut result = Self::new();
for symbol in iter {
result.insert(symbol);
}
result
}
}
fn add_metadata<T: FnOnce(&mut MetadataParams)>(input: Rule, f: T) -> Rule {
match input {
Rule::Metadata { rule, mut params } => {