tree-sitter/src/nfa.rs

162 lines
4.3 KiB
Rust
Raw Normal View History

2018-12-08 13:44:11 -08:00
use std::fmt;
use std::char;
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub enum CharacterSet {
Include(Vec<char>),
Exclude(Vec<char>),
}
2018-12-08 23:35:48 -08:00
#[derive(Debug, PartialEq, Eq)]
2018-12-08 13:44:11 -08:00
pub enum NfaState {
Advance(CharacterSet, u32),
Split(u32, u32),
Accept,
}
2018-12-08 23:35:48 -08:00
#[derive(PartialEq, Eq)]
2018-12-08 13:44:11 -08:00
pub struct Nfa {
pub states: Vec<NfaState>
}
#[derive(Debug)]
pub struct NfaCursor<'a> {
indices: Vec<u32>,
nfa: &'a Nfa,
}
impl CharacterSet {
pub fn empty() -> Self {
CharacterSet::Include(Vec::new())
}
pub fn all() -> Self {
CharacterSet::Exclude(Vec::new())
}
pub fn negate(self) -> CharacterSet {
match self {
CharacterSet::Include(chars) => CharacterSet::Exclude(chars),
CharacterSet::Exclude(chars) => CharacterSet::Include(chars),
}
}
pub fn add_char(self, c: char) -> Self {
if let CharacterSet::Include(mut chars) = self {
if let Err(i) = chars.binary_search(&c) {
chars.insert(i, c);
}
CharacterSet::Include(chars)
} else {
panic!("Called add with a negated character set");
}
}
pub fn add_range(self, start: char, end: char) -> Self {
if let CharacterSet::Include(mut chars) = self {
let mut c = start as u32;
while c <= end as u32 {
chars.push(char::from_u32(c).unwrap());
c += 1;
}
chars.sort_unstable();
chars.dedup();
CharacterSet::Include(chars)
} else {
panic!("Called add with a negated character set");
}
}
pub fn add(self, other: CharacterSet) -> Self {
if let (CharacterSet::Include(mut chars), CharacterSet::Include(other_chars)) = (self, other) {
chars.extend(other_chars);
chars.sort_unstable();
chars.dedup();
CharacterSet::Include(chars)
} else {
panic!("Called add with a negated character set");
}
}
pub fn contains(&self, c: char) -> bool {
match self {
CharacterSet::Include(chars) => chars.contains(&c),
CharacterSet::Exclude(chars) => !chars.contains(&c),
}
}
}
impl Nfa {
pub fn new() -> Self {
Nfa { states: vec![NfaState::Accept] }
}
pub fn start_index(&self) -> u32 {
self.states.len() as u32 - 1
}
pub fn prepend(&mut self, f: impl Fn(u32) -> NfaState) {
self.states.push(f(self.start_index()));
}
}
impl fmt::Debug for Nfa {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "Nfa {{ states: {{")?;
for (i, state) in self.states.iter().enumerate() {
if i > 0 {
write!(f, ", ")?;
}
write!(f, "{}: {:?}", i, state)?;
}
write!(f, "}} }}")?;
Ok(())
}
}
impl<'a> NfaCursor<'a> {
pub fn new(nfa: &'a Nfa) -> Self {
let mut result = Self { nfa, indices: Vec::new() };
result.add_indices(&mut vec![nfa.start_index()]);
result
}
pub fn advance(&mut self, c: char) -> bool {
let mut result = false;
let mut new_indices = Vec::new();
for index in &self.indices {
if let NfaState::Advance(chars, next_index) = &self.nfa.states[*index as usize] {
if chars.contains(c) {
new_indices.push(*next_index);
result = true;
}
}
}
self.indices.clear();
self.add_indices(&mut new_indices);
result
}
pub fn is_done(&self) -> bool {
self.indices.iter().any(|index| {
if let NfaState::Accept = self.nfa.states[*index as usize] {
true
} else {
false
}
})
}
pub fn add_indices(&mut self, new_indices: &mut Vec<u32>) {
while let Some(index) = new_indices.pop() {
let state = &self.nfa.states[index as usize];
if let NfaState::Split(left, right) = state {
new_indices.push(*left);
new_indices.push(*right);
} else if let Err(i) = self.indices.binary_search(&index) {
self.indices.insert(i, index);
}
}
}
}