Use binary search in generated character set functions

This commit is contained in:
Max Brunsfeld 2021-02-17 13:08:56 -08:00
parent f5a4c14dbe
commit 6132a10b1c
3 changed files with 186 additions and 9 deletions

View file

@ -0,0 +1,130 @@
use std::ops::Range;
#[derive(PartialEq, Eq)]
pub enum CharacterTree {
Yes,
Compare {
value: char,
operator: Comparator,
consequence: Option<Box<CharacterTree>>,
alternative: Option<Box<CharacterTree>>,
},
}
#[derive(PartialEq, Eq)]
pub enum Comparator {
Less,
LessOrEqual,
Equal,
GreaterOrEqual,
}
impl CharacterTree {
pub fn from_ranges(ranges: &[Range<char>]) -> Option<Self> {
match ranges.len() {
0 => None,
1 => {
let range = &ranges[0];
if range.start == range.end {
Some(CharacterTree::Compare {
operator: Comparator::Equal,
value: range.start,
consequence: Some(Box::new(CharacterTree::Yes)),
alternative: None,
})
} else {
Some(CharacterTree::Compare {
operator: Comparator::GreaterOrEqual,
value: range.start,
consequence: Some(Box::new(CharacterTree::Compare {
operator: Comparator::LessOrEqual,
value: range.end,
consequence: Some(Box::new(CharacterTree::Yes)),
alternative: None,
})),
alternative: None,
})
}
}
len => {
let mid = len / 2;
let mid_range = &ranges[mid];
Some(CharacterTree::Compare {
operator: Comparator::Less,
value: mid_range.start,
consequence: Self::from_ranges(&ranges[0..mid]).map(Box::new),
alternative: Some(Box::new(CharacterTree::Compare {
operator: Comparator::LessOrEqual,
value: mid_range.end,
consequence: Some(Box::new(CharacterTree::Yes)),
alternative: Self::from_ranges(&ranges[(mid + 1)..]).map(Box::new),
})),
})
}
}
}
#[cfg(test)]
fn contains(&self, c: char) -> bool {
match self {
CharacterTree::Yes => true,
CharacterTree::Compare {
value,
operator,
alternative,
consequence,
} => {
let condition = match operator {
Comparator::Less => c < *value,
Comparator::LessOrEqual => c <= *value,
Comparator::Equal => c == *value,
Comparator::GreaterOrEqual => c >= *value,
};
if condition { consequence } else { alternative }
.as_ref()
.map_or(false, |a| a.contains(c))
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_character_tree_simple() {
let tree = CharacterTree::from_ranges(&['a'..'d', 'h'..'l', 'p'..'r', 'u'..'u', 'z'..'z'])
.unwrap();
assert!(tree.contains('a'));
assert!(tree.contains('b'));
assert!(tree.contains('c'));
assert!(tree.contains('d'));
assert!(!tree.contains('e'));
assert!(!tree.contains('f'));
assert!(!tree.contains('g'));
assert!(tree.contains('h'));
assert!(tree.contains('i'));
assert!(tree.contains('j'));
assert!(tree.contains('k'));
assert!(tree.contains('l'));
assert!(!tree.contains('m'));
assert!(!tree.contains('n'));
assert!(!tree.contains('o'));
assert!(tree.contains('p'));
assert!(tree.contains('q'));
assert!(tree.contains('r'));
assert!(!tree.contains('s'));
assert!(!tree.contains('s'));
assert!(tree.contains('u'));
assert!(!tree.contains('v'));
}
}

View file

@ -1,4 +1,5 @@
mod build_tables;
mod char_tree;
mod dedup;
mod grammars;
mod nfa;

View file

@ -1,3 +1,4 @@
use super::char_tree::{CharacterTree, Comparator};
use super::grammars::{ExternalToken, LexicalGrammar, SyntaxGrammar, VariableType};
use super::rules::{Alias, AliasMap, Symbol, SymbolType};
use super::tables::{
@ -714,7 +715,7 @@ impl Generator {
if info.usage_count > 1 {
add_line!(
self,
"static inline bool {}_character_set_{}(int32_t lookahead) {{",
"static inline bool {}_character_set_{}(int32_t c) {{",
self.symbol_ids[&info.symbol],
info.index
);
@ -722,7 +723,8 @@ impl Generator {
add_line!(self, "return");
indent!(self);
add_whitespace!(self);
self.add_character_range_conditions(&info.ranges, true, 0);
let tree = CharacterTree::from_ranges(&info.ranges);
self.add_character_tree(tree.as_ref());
add!(self, ";\n");
dedent!(self);
dedent!(self);
@ -844,16 +846,15 @@ impl Generator {
ranges: &[Range<char>],
is_included: bool,
indent_count: usize,
) -> bool {
) {
let mut line_break = "\n".to_string();
for _ in 0..self.indent_level + indent_count {
line_break.push_str(" ");
}
let mut did_add = false;
for range in ranges {
for (i, range) in ranges.iter().enumerate() {
if is_included {
if did_add {
if i > 0 {
add!(self, " ||{}", line_break);
}
if range.end == range.start {
@ -872,7 +873,7 @@ impl Generator {
add!(self, ")");
}
} else {
if did_add {
if i > 0 {
add!(self, " &&{}", line_break);
}
if range.end == range.start {
@ -896,9 +897,54 @@ impl Generator {
}
}
}
did_add = true;
}
did_add
}
fn add_character_tree(&mut self, tree: Option<&CharacterTree>) {
match tree {
Some(CharacterTree::Compare {
value,
operator,
consequence,
alternative,
}) => {
let op = match operator {
Comparator::Less => "<",
Comparator::LessOrEqual => "<=",
Comparator::Equal => "==",
Comparator::GreaterOrEqual => ">=",
};
add!(self, "(c {} ", op);
self.add_character(*value);
add!(self, ")");
let consequence = consequence.as_ref().map(Box::as_ref);
let alternative = alternative.as_ref().map(Box::as_ref);
if alternative.is_none() {
if consequence != Some(&CharacterTree::Yes) {
add!(self, " && ");
self.add_character_tree(consequence);
}
} else {
add!(self, "\n");
indent!(self);
add_whitespace!(self);
add!(self, "? ");
self.add_character_tree(consequence);
add!(self, "\n");
add_whitespace!(self);
add!(self, ": ");
self.add_character_tree(alternative);
dedent!(self);
}
}
Some(CharacterTree::Yes) => {
add!(self, "true");
}
None => {
add!(self, "false");
}
}
}
fn add_advance_action(&mut self, action: &AdvanceAction) {