Implement parse state merging
This commit is contained in:
parent
c6b9e97c58
commit
a46b8fcb46
9 changed files with 364 additions and 40 deletions
|
|
@ -7,7 +7,8 @@ use crate::tables::{
|
|||
AliasSequenceId, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry,
|
||||
};
|
||||
use core::ops::Range;
|
||||
use std::collections::hash_map::Entry;
|
||||
use std::hash::Hasher;
|
||||
use std::collections::hash_map::{Entry, DefaultHasher};
|
||||
use std::collections::{HashMap, HashSet, VecDeque};
|
||||
use std::fmt::Write;
|
||||
|
||||
|
|
@ -44,14 +45,13 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
self.parse_table.alias_sequences.push(Vec::new());
|
||||
|
||||
// Ensure that the error state has index 0.
|
||||
let error_state_id =
|
||||
self.add_parse_state(&Vec::new(), &Vec::new(), ParseItemSet::default());
|
||||
self.add_parse_state(&Vec::new(), &Vec::new(), ParseItemSet::default());
|
||||
|
||||
self.add_parse_state(
|
||||
&Vec::new(),
|
||||
&Vec::new(),
|
||||
ParseItemSet::with(
|
||||
[(ParseItem::start(), LookaheadSet::with(&[Symbol::end()]))]
|
||||
[(ParseItem::start(), LookaheadSet::with([Symbol::end()].iter().cloned()))]
|
||||
.iter()
|
||||
.cloned(),
|
||||
),
|
||||
|
|
@ -78,6 +78,10 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
let mut hasher = DefaultHasher::new();
|
||||
item_set.hash_unfinished_items(&mut hasher);
|
||||
let unfinished_item_signature = hasher.finish();
|
||||
|
||||
match self.state_ids_by_item_set.entry(item_set) {
|
||||
Entry::Occupied(o) => *o.get(),
|
||||
Entry::Vacant(v) => {
|
||||
|
|
@ -87,6 +91,7 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
lex_state_id: 0,
|
||||
terminal_entries: HashMap::new(),
|
||||
nonterminal_entries: HashMap::new(),
|
||||
unfinished_item_signature,
|
||||
});
|
||||
self.parse_state_queue.push_back(ParseStateQueueEntry {
|
||||
state_id,
|
||||
|
|
|
|||
36
src/build_tables/coincident_tokens.rs
Normal file
36
src/build_tables/coincident_tokens.rs
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
use crate::rules::Symbol;
|
||||
use crate::tables::{ParseStateId, ParseTable};
|
||||
use std::collections::{HashMap, HashSet};
|
||||
|
||||
pub(crate) struct CoincidentTokenIndex {
|
||||
entries: HashMap<(Symbol, Symbol), HashSet<ParseStateId>>,
|
||||
empty: HashSet<ParseStateId>,
|
||||
}
|
||||
|
||||
impl CoincidentTokenIndex {
|
||||
pub fn new(table: &ParseTable) -> Self {
|
||||
let mut entries = HashMap::new();
|
||||
for (i, state) in table.states.iter().enumerate() {
|
||||
for symbol in state.terminal_entries.keys() {
|
||||
for other_symbol in state.terminal_entries.keys() {
|
||||
entries
|
||||
.entry((*symbol, *other_symbol))
|
||||
.or_insert(HashSet::new())
|
||||
.insert(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
Self {
|
||||
entries,
|
||||
empty: HashSet::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn states_with(&self, a: Symbol, b: Symbol) -> &HashSet<ParseStateId> {
|
||||
self.entries.get(&(a, b)).unwrap_or(&self.empty)
|
||||
}
|
||||
|
||||
pub fn contains(&self, a: Symbol, b: Symbol) -> bool {
|
||||
self.entries.contains_key(&(a, b))
|
||||
}
|
||||
}
|
||||
|
|
@ -2,11 +2,11 @@ use crate::grammars::{LexicalGrammar, Production, ProductionStep, SyntaxGrammar}
|
|||
use crate::rules::Associativity;
|
||||
use crate::rules::{Symbol, SymbolType};
|
||||
use smallbitvec::SmallBitVec;
|
||||
use std::cmp::Ordering;
|
||||
use std::collections::BTreeMap;
|
||||
use std::fmt;
|
||||
use std::hash::{Hash, Hasher};
|
||||
use std::u32;
|
||||
use std::cmp::Ordering;
|
||||
|
||||
lazy_static! {
|
||||
static ref START_PRODUCTION: Production = Production {
|
||||
|
|
@ -85,10 +85,10 @@ impl LookaheadSet {
|
|||
.chain(if self.eof { Some(Symbol::end()) } else { None })
|
||||
}
|
||||
|
||||
pub fn with<'a>(symbols: impl IntoIterator<Item = &'a Symbol>) -> Self {
|
||||
pub fn with(symbols: impl IntoIterator<Item = Symbol>) -> Self {
|
||||
let mut result = Self::new();
|
||||
for symbol in symbols {
|
||||
result.insert(*symbol);
|
||||
result.insert(symbol);
|
||||
}
|
||||
result
|
||||
}
|
||||
|
|
@ -219,6 +219,21 @@ impl<'a> ParseItemSet<'a> {
|
|||
result
|
||||
}
|
||||
|
||||
pub fn hash_unfinished_items(&self, h: &mut impl Hasher) {
|
||||
let mut previous_variable_index = u32::MAX;
|
||||
let mut previous_step_index = u32::MAX;
|
||||
for item in self.entries.keys() {
|
||||
if item.step().is_none() && item.variable_index != previous_variable_index
|
||||
|| item.step_index != previous_step_index
|
||||
{
|
||||
h.write_u32(item.variable_index);
|
||||
h.write_u32(item.step_index);
|
||||
previous_variable_index = item.variable_index;
|
||||
previous_step_index = item.step_index;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn display_with(
|
||||
&'a self,
|
||||
syntax_grammar: &'a SyntaxGrammar,
|
||||
|
|
@ -369,11 +384,18 @@ impl<'a> Ord for ParseItem<'a> {
|
|||
if o != Ordering::Equal {
|
||||
return o;
|
||||
}
|
||||
let o = self.production.dynamic_precedence.cmp(&other.production.dynamic_precedence);
|
||||
let o = self
|
||||
.production
|
||||
.dynamic_precedence
|
||||
.cmp(&other.production.dynamic_precedence);
|
||||
if o != Ordering::Equal {
|
||||
return o;
|
||||
}
|
||||
let o = self.production.steps.len().cmp(&other.production.steps.len());
|
||||
let o = self
|
||||
.production
|
||||
.steps
|
||||
.len()
|
||||
.cmp(&other.production.steps.len());
|
||||
if o != Ordering::Equal {
|
||||
return o;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,18 +1,20 @@
|
|||
use crate::error::Result;
|
||||
use crate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar};
|
||||
use crate::rules::{AliasMap, Symbol};
|
||||
use crate::tables::{LexTable, ParseTable};
|
||||
|
||||
mod build_parse_table;
|
||||
mod coincident_tokens;
|
||||
mod item;
|
||||
mod item_set_builder;
|
||||
mod lex_table_builder;
|
||||
mod shrink_parse_table;
|
||||
mod token_conflict_map;
|
||||
mod token_conflicts;
|
||||
|
||||
use self::build_parse_table::build_parse_table;
|
||||
use self::coincident_tokens::CoincidentTokenIndex;
|
||||
use self::item::LookaheadSet;
|
||||
use self::shrink_parse_table::shrink_parse_table;
|
||||
use self::token_conflict_map::TokenConflictMap;
|
||||
use self::token_conflicts::TokenConflictMap;
|
||||
use crate::error::Result;
|
||||
use crate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar};
|
||||
use crate::rules::{AliasMap, Symbol};
|
||||
use crate::tables::{LexTable, ParseAction, ParseTable, ParseTableEntry};
|
||||
|
||||
pub(crate) fn build_tables(
|
||||
syntax_grammar: &SyntaxGrammar,
|
||||
|
|
@ -23,6 +25,76 @@ pub(crate) fn build_tables(
|
|||
let (mut parse_table, following_tokens) =
|
||||
build_parse_table(syntax_grammar, lexical_grammar, inlines)?;
|
||||
let token_conflict_map = TokenConflictMap::new(lexical_grammar, following_tokens);
|
||||
shrink_parse_table(&mut parse_table, syntax_grammar, simple_aliases);
|
||||
let coincident_token_index = CoincidentTokenIndex::new(&parse_table);
|
||||
populate_error_state(
|
||||
&mut parse_table,
|
||||
syntax_grammar,
|
||||
lexical_grammar,
|
||||
&coincident_token_index,
|
||||
&token_conflict_map,
|
||||
);
|
||||
shrink_parse_table(
|
||||
&mut parse_table,
|
||||
syntax_grammar,
|
||||
simple_aliases,
|
||||
&token_conflict_map,
|
||||
);
|
||||
Ok((parse_table, LexTable::default(), LexTable::default(), None))
|
||||
}
|
||||
|
||||
fn populate_error_state(
|
||||
parse_table: &mut ParseTable,
|
||||
syntax_grammar: &SyntaxGrammar,
|
||||
lexical_grammar: &LexicalGrammar,
|
||||
coincident_token_index: &CoincidentTokenIndex,
|
||||
token_conflict_map: &TokenConflictMap,
|
||||
) {
|
||||
let state = &mut parse_table.states[0];
|
||||
let n = lexical_grammar.variables.len();
|
||||
let conflict_free_tokens = LookaheadSet::with((0..n).into_iter().filter_map(|i| {
|
||||
let conflicts_with_other_tokens = (0..n).into_iter().all(|j| {
|
||||
j == i
|
||||
|| coincident_token_index.contains(Symbol::terminal(i), Symbol::terminal(j))
|
||||
|| !token_conflict_map.does_conflict(i, j)
|
||||
});
|
||||
if conflicts_with_other_tokens {
|
||||
None
|
||||
} else {
|
||||
Some(Symbol::terminal(i))
|
||||
}
|
||||
}));
|
||||
|
||||
let recover_entry = ParseTableEntry {
|
||||
reusable: false,
|
||||
actions: vec![ParseAction::Recover],
|
||||
};
|
||||
|
||||
for i in 0..n {
|
||||
let symbol = Symbol::terminal(i);
|
||||
let can_be_used_for_recovery = conflict_free_tokens.contains(&symbol)
|
||||
|| conflict_free_tokens.iter().all(|t| {
|
||||
coincident_token_index.contains(symbol, t)
|
||||
|| !token_conflict_map.does_conflict(i, t.index)
|
||||
});
|
||||
if can_be_used_for_recovery {
|
||||
eprintln!("include {}", &lexical_grammar.variables[symbol.index].name);
|
||||
state
|
||||
.terminal_entries
|
||||
.entry(symbol)
|
||||
.or_insert_with(|| recover_entry.clone());
|
||||
} else {
|
||||
eprintln!("exclude {}", &lexical_grammar.variables[symbol.index].name);
|
||||
}
|
||||
}
|
||||
|
||||
for (i, external_token) in syntax_grammar.external_tokens.iter().enumerate() {
|
||||
if external_token.corresponding_internal_token.is_none() {
|
||||
state
|
||||
.terminal_entries
|
||||
.entry(Symbol::external(i))
|
||||
.or_insert_with(|| recover_entry.clone());
|
||||
}
|
||||
}
|
||||
|
||||
state.terminal_entries.insert(Symbol::end(), recover_entry);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,14 +1,17 @@
|
|||
use super::token_conflicts::TokenConflictMap;
|
||||
use crate::grammars::{SyntaxGrammar, VariableType};
|
||||
use crate::rules::AliasMap;
|
||||
use crate::tables::{ParseAction, ParseTable};
|
||||
use crate::rules::{AliasMap, Symbol};
|
||||
use crate::tables::{ParseAction, ParseState, ParseTable, ParseTableEntry};
|
||||
use std::collections::{HashMap, HashSet};
|
||||
|
||||
pub(crate) fn shrink_parse_table(
|
||||
parse_table: &mut ParseTable,
|
||||
syntax_grammar: &SyntaxGrammar,
|
||||
simple_aliases: &AliasMap,
|
||||
token_conflict_map: &TokenConflictMap,
|
||||
) {
|
||||
remove_unit_reductions(parse_table, syntax_grammar, simple_aliases);
|
||||
merge_compatible_states(parse_table, syntax_grammar, token_conflict_map);
|
||||
remove_unused_states(parse_table);
|
||||
}
|
||||
|
||||
|
|
@ -86,6 +89,157 @@ fn remove_unit_reductions(
|
|||
}
|
||||
}
|
||||
|
||||
fn merge_compatible_states(
|
||||
parse_table: &mut ParseTable,
|
||||
syntax_grammar: &SyntaxGrammar,
|
||||
token_conflict_map: &TokenConflictMap,
|
||||
) {
|
||||
let mut state_ids_by_signature = HashMap::new();
|
||||
for (i, state) in parse_table.states.iter().enumerate() {
|
||||
state_ids_by_signature
|
||||
.entry(state.unfinished_item_signature)
|
||||
.or_insert(Vec::new())
|
||||
.push(i);
|
||||
}
|
||||
|
||||
let mut deleted_states = HashSet::new();
|
||||
loop {
|
||||
let mut state_replacements = HashMap::new();
|
||||
for (_, state_ids) in &state_ids_by_signature {
|
||||
for i in state_ids {
|
||||
for j in state_ids {
|
||||
if j == i {
|
||||
break;
|
||||
}
|
||||
if deleted_states.contains(j) || deleted_states.contains(i) {
|
||||
continue;
|
||||
}
|
||||
if merge_parse_state(syntax_grammar, token_conflict_map, parse_table, *j, *i) {
|
||||
deleted_states.insert(*i);
|
||||
state_replacements.insert(*i, *j);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if state_replacements.is_empty() {
|
||||
break;
|
||||
}
|
||||
|
||||
for state in parse_table.states.iter_mut() {
|
||||
state.update_referenced_states(|other_state_id, _| {
|
||||
*state_replacements
|
||||
.get(&other_state_id)
|
||||
.unwrap_or(&other_state_id)
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn merge_parse_state(
|
||||
syntax_grammar: &SyntaxGrammar,
|
||||
token_conflict_map: &TokenConflictMap,
|
||||
parse_table: &mut ParseTable,
|
||||
left: usize,
|
||||
right: usize,
|
||||
) -> bool {
|
||||
let left_state = &parse_table.states[left];
|
||||
let right_state = &parse_table.states[right];
|
||||
|
||||
if left_state.nonterminal_entries != right_state.nonterminal_entries {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (symbol, left_entry) in &left_state.terminal_entries {
|
||||
if let Some(right_entry) = right_state.terminal_entries.get(symbol) {
|
||||
if right_entry.actions != left_entry.actions {
|
||||
return false;
|
||||
}
|
||||
} else if !can_add_entry_to_state(
|
||||
syntax_grammar,
|
||||
token_conflict_map,
|
||||
right_state,
|
||||
*symbol,
|
||||
left_entry,
|
||||
) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
eprintln!("maybe merge {} {}", left, right);
|
||||
|
||||
let mut symbols_to_add = Vec::new();
|
||||
for (symbol, right_entry) in &right_state.terminal_entries {
|
||||
if !left_state.terminal_entries.contains_key(&symbol) {
|
||||
if !can_add_entry_to_state(
|
||||
syntax_grammar,
|
||||
token_conflict_map,
|
||||
left_state,
|
||||
*symbol,
|
||||
right_entry,
|
||||
) {
|
||||
return false;
|
||||
}
|
||||
symbols_to_add.push(*symbol);
|
||||
}
|
||||
}
|
||||
|
||||
for symbol in symbols_to_add {
|
||||
let entry = parse_table.states[right].terminal_entries[&symbol].clone();
|
||||
parse_table.states[left]
|
||||
.terminal_entries
|
||||
.insert(symbol, entry);
|
||||
}
|
||||
|
||||
true
|
||||
}
|
||||
|
||||
fn can_add_entry_to_state(
|
||||
syntax_grammar: &SyntaxGrammar,
|
||||
token_conflict_map: &TokenConflictMap,
|
||||
state: &ParseState,
|
||||
token: Symbol,
|
||||
entry: &ParseTableEntry,
|
||||
) -> bool {
|
||||
// Do not add external tokens; they could conflict lexically with any of the state's
|
||||
// existing lookahead tokens.
|
||||
if token.is_external() {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Only merge parse states by allowing existing reductions to happen
|
||||
// with additional lookahead tokens. Do not alter parse states in ways
|
||||
// that allow entirely new types of actions to happen.
|
||||
if state.terminal_entries.iter().all(|(_, e)| e != entry) {
|
||||
return false;
|
||||
}
|
||||
match entry.actions.last() {
|
||||
Some(ParseAction::Reduce { .. }) => {}
|
||||
_ => return false,
|
||||
}
|
||||
|
||||
// Do not add tokens which are both internal and external. Their validity could
|
||||
// influence the behavior of the external scanner.
|
||||
if syntax_grammar
|
||||
.external_tokens
|
||||
.iter()
|
||||
.any(|t| t.corresponding_internal_token == Some(token))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// Do not add a token if it conflicts with an existing token.
|
||||
if token.is_terminal() {
|
||||
for existing_token in state.terminal_entries.keys() {
|
||||
if token_conflict_map.does_conflict(token.index, existing_token.index) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
true
|
||||
}
|
||||
|
||||
fn remove_unused_states(parse_table: &mut ParseTable) {
|
||||
let mut state_usage_map = vec![false; parse_table.states.len()];
|
||||
for state in &parse_table.states {
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ use std::fmt;
|
|||
struct TokenConflictStatus {
|
||||
does_overlap: bool,
|
||||
does_match_valid_continuation: bool,
|
||||
does_match_separators: bool,
|
||||
matches_same_string: bool,
|
||||
}
|
||||
|
||||
|
|
@ -46,8 +47,9 @@ impl TokenConflictMap {
|
|||
self.status_matrix[matrix_index(self.n, i, j)].matches_same_string
|
||||
}
|
||||
|
||||
pub fn does_match_valid_continuation(&self, i: usize, j: usize) -> bool {
|
||||
self.status_matrix[matrix_index(self.n, i, j)].does_match_valid_continuation
|
||||
pub fn does_conflict(&self, i: usize, j: usize) -> bool {
|
||||
let entry = &self.status_matrix[matrix_index(self.n, i, j)];
|
||||
entry.does_match_valid_continuation || entry.does_match_separators
|
||||
}
|
||||
|
||||
pub fn does_overlap(&self, i: usize, j: usize) -> bool {
|
||||
|
|
@ -207,10 +209,15 @@ fn compute_conflict_status(
|
|||
if chars.does_intersect(&following_chars[j]) {
|
||||
result.0.does_match_valid_continuation = true;
|
||||
}
|
||||
if cursor.in_separator() {
|
||||
result.0.does_match_separators = true;
|
||||
}
|
||||
} else {
|
||||
result.1.does_overlap = true;
|
||||
if chars.does_intersect(&following_chars[i]) {
|
||||
result.1.does_match_valid_continuation = true;
|
||||
} else {
|
||||
result.1.does_match_separators = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -326,9 +333,9 @@ mod tests {
|
|||
let token_map = TokenConflictMap::new(
|
||||
&grammar,
|
||||
vec![
|
||||
LookaheadSet::with(&[Symbol::terminal(var("identifier"))]),
|
||||
LookaheadSet::with(&[Symbol::terminal(var("in"))]),
|
||||
LookaheadSet::with(&[Symbol::terminal(var("identifier"))]),
|
||||
LookaheadSet::with([Symbol::terminal(var("identifier"))].iter().cloned()),
|
||||
LookaheadSet::with([Symbol::terminal(var("in"))].iter().cloned()),
|
||||
LookaheadSet::with([Symbol::terminal(var("identifier"))].iter().cloned()),
|
||||
],
|
||||
);
|
||||
|
||||
|
|
@ -338,12 +345,12 @@ mod tests {
|
|||
|
||||
// Depending on what character follows, the string "in" may be treated as part of an
|
||||
// `identifier` token.
|
||||
assert!(token_map.does_match_valid_continuation(var("identifier"), var("in")));
|
||||
assert!(token_map.does_conflict(var("identifier"), var("in")));
|
||||
|
||||
// Depending on what character follows, the string "instanceof" may be treated as part of
|
||||
// an `identifier` token.
|
||||
assert!(token_map.does_match_valid_continuation(var("identifier"), var("instanceof")));
|
||||
assert!(token_map.does_match_valid_continuation(var("instanceof"), var("in")));
|
||||
assert!(token_map.does_conflict(var("identifier"), var("instanceof")));
|
||||
assert!(token_map.does_conflict(var("instanceof"), var("in")));
|
||||
}
|
||||
|
||||
fn index_of_var(grammar: &LexicalGrammar, name: &str) -> usize {
|
||||
41
src/nfa.rs
41
src/nfa.rs
|
|
@ -86,15 +86,34 @@ impl CharacterSet {
|
|||
}
|
||||
|
||||
pub fn add(self, other: &CharacterSet) -> Self {
|
||||
if let CharacterSet::Include(other_chars) = other {
|
||||
if let CharacterSet::Include(mut chars) = self {
|
||||
chars.extend(other_chars);
|
||||
chars.sort_unstable();
|
||||
chars.dedup();
|
||||
return CharacterSet::Include(chars);
|
||||
}
|
||||
match self {
|
||||
CharacterSet::Include(mut chars) => match other {
|
||||
CharacterSet::Include(other_chars) => {
|
||||
chars.extend(other_chars);
|
||||
chars.sort_unstable();
|
||||
chars.dedup();
|
||||
CharacterSet::Include(chars)
|
||||
}
|
||||
CharacterSet::Exclude(other_chars) => {
|
||||
let excluded_chars = other_chars
|
||||
.iter()
|
||||
.cloned()
|
||||
.filter(|c| !chars.contains(&c))
|
||||
.collect();
|
||||
CharacterSet::Exclude(excluded_chars)
|
||||
}
|
||||
},
|
||||
CharacterSet::Exclude(mut chars) => match other {
|
||||
CharacterSet::Include(other_chars) => {
|
||||
chars.retain(|c| !other_chars.contains(&c));
|
||||
CharacterSet::Exclude(chars)
|
||||
}
|
||||
CharacterSet::Exclude(other_chars) => {
|
||||
chars.retain(|c| other_chars.contains(&c));
|
||||
CharacterSet::Exclude(chars)
|
||||
},
|
||||
},
|
||||
}
|
||||
panic!("Called add with a negated character set");
|
||||
}
|
||||
|
||||
pub fn does_intersect(&self, other: &CharacterSet) -> bool {
|
||||
|
|
@ -458,6 +477,9 @@ mod tests {
|
|||
(CharacterSet::empty().add_char('f'), 0, 4),
|
||||
],
|
||||
vec![
|
||||
(CharacterSet::empty().add_char('d'), 0, vec![1, 2]),
|
||||
(CharacterSet::empty().add_char('f'), 0, vec![1, 4]),
|
||||
(CharacterSet::empty().add_char('i'), 0, vec![1, 3]),
|
||||
(
|
||||
CharacterSet::empty()
|
||||
.add_range('a', 'c')
|
||||
|
|
@ -467,9 +489,6 @@ mod tests {
|
|||
0,
|
||||
vec![1],
|
||||
),
|
||||
(CharacterSet::empty().add_char('d'), 0, vec![1, 2]),
|
||||
(CharacterSet::empty().add_char('f'), 0, vec![1, 4]),
|
||||
(CharacterSet::empty().add_char('i'), 0, vec![1, 3]),
|
||||
],
|
||||
),
|
||||
];
|
||||
|
|
|
|||
|
|
@ -164,12 +164,20 @@ impl NfaBuilder {
|
|||
Err(Error::regex("Unicode character classes are not supported"))
|
||||
}
|
||||
Class::Perl(class) => {
|
||||
self.push_advance(self.expand_perl_character_class(&class.kind), next_state_id);
|
||||
let mut chars = self.expand_perl_character_class(&class.kind);
|
||||
if class.negated {
|
||||
chars = chars.negate();
|
||||
}
|
||||
self.push_advance(chars, next_state_id);
|
||||
Ok(true)
|
||||
}
|
||||
Class::Bracketed(class) => match &class.kind {
|
||||
ClassSet::Item(item) => {
|
||||
self.push_advance(self.expand_character_class(&item)?, next_state_id);
|
||||
let mut chars = self.expand_character_class(&item)?;
|
||||
if class.negated {
|
||||
chars = chars.negate();
|
||||
}
|
||||
self.push_advance(chars, next_state_id);
|
||||
Ok(true)
|
||||
}
|
||||
ClassSet::BinaryOp(_) => Err(Error::regex(
|
||||
|
|
|
|||
|
|
@ -37,6 +37,7 @@ pub(crate) struct ParseState {
|
|||
pub terminal_entries: HashMap<Symbol, ParseTableEntry>,
|
||||
pub nonterminal_entries: HashMap<Symbol, ParseStateId>,
|
||||
pub lex_state_id: usize,
|
||||
pub unfinished_item_signature: u64,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue