Use static arrays and a fixed binary search for large char set checks
This commit is contained in:
parent
0fc92c9a7d
commit
39be6972fe
4 changed files with 49 additions and 207 deletions
|
|
@ -1,133 +0,0 @@
|
|||
use std::ops::Range;
|
||||
|
||||
/// A set of characters represented as a balanced binary tree of comparisons.
|
||||
/// This is used as an intermediate step in generating efficient code for
|
||||
/// matching a given character set.
|
||||
#[derive(PartialEq, Eq)]
|
||||
pub enum CharacterTree {
|
||||
Yes,
|
||||
Compare {
|
||||
value: char,
|
||||
operator: Comparator,
|
||||
consequence: Option<Box<CharacterTree>>,
|
||||
alternative: Option<Box<CharacterTree>>,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Eq)]
|
||||
pub enum Comparator {
|
||||
Less,
|
||||
LessOrEqual,
|
||||
Equal,
|
||||
GreaterOrEqual,
|
||||
}
|
||||
|
||||
impl CharacterTree {
|
||||
pub fn from_ranges(ranges: &[Range<char>]) -> Option<Self> {
|
||||
match ranges.len() {
|
||||
0 => None,
|
||||
1 => {
|
||||
let range = &ranges[0];
|
||||
if range.start == range.end {
|
||||
Some(Self::Compare {
|
||||
operator: Comparator::Equal,
|
||||
value: range.start,
|
||||
consequence: Some(Box::new(Self::Yes)),
|
||||
alternative: None,
|
||||
})
|
||||
} else {
|
||||
Some(Self::Compare {
|
||||
operator: Comparator::GreaterOrEqual,
|
||||
value: range.start,
|
||||
consequence: Some(Box::new(Self::Compare {
|
||||
operator: Comparator::LessOrEqual,
|
||||
value: range.end,
|
||||
consequence: Some(Box::new(Self::Yes)),
|
||||
alternative: None,
|
||||
})),
|
||||
alternative: None,
|
||||
})
|
||||
}
|
||||
}
|
||||
len => {
|
||||
let mid = len / 2;
|
||||
let mid_range = &ranges[mid];
|
||||
Some(Self::Compare {
|
||||
operator: Comparator::Less,
|
||||
value: mid_range.start,
|
||||
consequence: Self::from_ranges(&ranges[0..mid]).map(Box::new),
|
||||
alternative: Some(Box::new(Self::Compare {
|
||||
operator: Comparator::LessOrEqual,
|
||||
value: mid_range.end,
|
||||
consequence: Some(Box::new(Self::Yes)),
|
||||
alternative: Self::from_ranges(&ranges[(mid + 1)..]).map(Box::new),
|
||||
})),
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
fn contains(&self, c: char) -> bool {
|
||||
match self {
|
||||
Self::Yes => true,
|
||||
Self::Compare {
|
||||
value,
|
||||
operator,
|
||||
alternative,
|
||||
consequence,
|
||||
} => {
|
||||
let condition = match operator {
|
||||
Comparator::Less => c < *value,
|
||||
Comparator::LessOrEqual => c <= *value,
|
||||
Comparator::Equal => c == *value,
|
||||
Comparator::GreaterOrEqual => c >= *value,
|
||||
};
|
||||
if condition { consequence } else { alternative }
|
||||
.as_ref()
|
||||
.map_or(false, |a| a.contains(c))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_character_tree_simple() {
|
||||
let tree = CharacterTree::from_ranges(&['a'..'d', 'h'..'l', 'p'..'r', 'u'..'u', 'z'..'z'])
|
||||
.unwrap();
|
||||
|
||||
assert!(tree.contains('a'));
|
||||
assert!(tree.contains('b'));
|
||||
assert!(tree.contains('c'));
|
||||
assert!(tree.contains('d'));
|
||||
|
||||
assert!(!tree.contains('e'));
|
||||
assert!(!tree.contains('f'));
|
||||
assert!(!tree.contains('g'));
|
||||
|
||||
assert!(tree.contains('h'));
|
||||
assert!(tree.contains('i'));
|
||||
assert!(tree.contains('j'));
|
||||
assert!(tree.contains('k'));
|
||||
assert!(tree.contains('l'));
|
||||
|
||||
assert!(!tree.contains('m'));
|
||||
assert!(!tree.contains('n'));
|
||||
assert!(!tree.contains('o'));
|
||||
|
||||
assert!(tree.contains('p'));
|
||||
assert!(tree.contains('q'));
|
||||
assert!(tree.contains('r'));
|
||||
|
||||
assert!(!tree.contains('s'));
|
||||
assert!(!tree.contains('s'));
|
||||
|
||||
assert!(tree.contains('u'));
|
||||
|
||||
assert!(!tree.contains('v'));
|
||||
}
|
||||
}
|
||||
|
|
@ -17,7 +17,6 @@ use render::render_c_code;
|
|||
use rules::AliasMap;
|
||||
|
||||
mod build_tables;
|
||||
mod char_tree;
|
||||
mod dedup;
|
||||
mod grammar_files;
|
||||
mod grammars;
|
||||
|
|
|
|||
|
|
@ -1,5 +1,4 @@
|
|||
use super::{
|
||||
char_tree::{CharacterTree, Comparator},
|
||||
grammars::{ExternalToken, LexicalGrammar, SyntaxGrammar, VariableType},
|
||||
rules::{Alias, AliasMap, Symbol, SymbolType},
|
||||
tables::{
|
||||
|
|
@ -737,21 +736,7 @@ impl Generator {
|
|||
let mut sorted_large_char_sets = large_character_sets.iter().collect::<Vec<_>>();
|
||||
sorted_large_char_sets.sort_unstable_by_key(|info| (info.symbol, info.index));
|
||||
for info in sorted_large_char_sets {
|
||||
add_line!(
|
||||
self,
|
||||
"static inline bool {}_character_set_{}(int32_t c) {{",
|
||||
self.symbol_ids[&info.symbol],
|
||||
info.index
|
||||
);
|
||||
indent!(self);
|
||||
add_whitespace!(self);
|
||||
add!(self, "return ");
|
||||
let tree = CharacterTree::from_ranges(&info.ranges);
|
||||
self.add_character_tree(tree.as_ref());
|
||||
add!(self, ";\n");
|
||||
dedent!(self);
|
||||
add_line!(self, "}}");
|
||||
add_line!(self, "");
|
||||
self.add_character_set(info);
|
||||
}
|
||||
|
||||
add_line!(
|
||||
|
|
@ -836,9 +821,10 @@ impl Generator {
|
|||
}
|
||||
add!(
|
||||
self,
|
||||
"{}_character_set_{}(lookahead)) ",
|
||||
"set_contains({}_character_set_{}, {}, lookahead)) ",
|
||||
self.symbol_ids[&info.symbol],
|
||||
info.index
|
||||
info.index,
|
||||
info.ranges.len(),
|
||||
);
|
||||
self.add_advance_action(&action);
|
||||
add!(self, "\n");
|
||||
|
|
@ -931,64 +917,31 @@ impl Generator {
|
|||
}
|
||||
}
|
||||
|
||||
fn add_character_tree(&mut self, tree: Option<&CharacterTree>) {
|
||||
match tree {
|
||||
Some(CharacterTree::Compare {
|
||||
value,
|
||||
operator,
|
||||
consequence,
|
||||
alternative,
|
||||
}) => {
|
||||
let op = match operator {
|
||||
Comparator::Less => "<",
|
||||
Comparator::LessOrEqual => "<=",
|
||||
Comparator::Equal => "==",
|
||||
Comparator::GreaterOrEqual => ">=",
|
||||
};
|
||||
let consequence = consequence.as_ref().map(Box::as_ref);
|
||||
let alternative = alternative.as_ref().map(Box::as_ref);
|
||||
|
||||
let simple = alternative.is_none() && consequence == Some(&CharacterTree::Yes);
|
||||
|
||||
if !simple {
|
||||
add!(self, "(");
|
||||
}
|
||||
|
||||
add!(self, "c {op} ");
|
||||
self.add_character(*value);
|
||||
|
||||
if !simple {
|
||||
if alternative.is_none() {
|
||||
add!(self, " && ");
|
||||
self.add_character_tree(consequence);
|
||||
} else if consequence == Some(&CharacterTree::Yes) {
|
||||
add!(self, " || ");
|
||||
self.add_character_tree(alternative);
|
||||
} else {
|
||||
add!(self, "\n");
|
||||
indent!(self);
|
||||
add_whitespace!(self);
|
||||
add!(self, "? ");
|
||||
self.add_character_tree(consequence);
|
||||
add!(self, "\n");
|
||||
add_whitespace!(self);
|
||||
add!(self, ": ");
|
||||
self.add_character_tree(alternative);
|
||||
dedent!(self);
|
||||
}
|
||||
}
|
||||
|
||||
if !simple {
|
||||
add!(self, ")");
|
||||
fn add_character_set(&mut self, info: &LargeCharacterSetInfo) {
|
||||
add_line!(
|
||||
self,
|
||||
"static TSCharacterRange {}_character_set_{}[] = {{",
|
||||
self.symbol_ids[&info.symbol],
|
||||
info.index
|
||||
);
|
||||
indent!(self);
|
||||
for chunk in info.ranges.chunks(8) {
|
||||
add_whitespace!(self);
|
||||
for (i, range) in chunk.iter().enumerate() {
|
||||
if i > 0 {
|
||||
add!(self, " ");
|
||||
}
|
||||
add!(self, "{{");
|
||||
self.add_character(range.start);
|
||||
add!(self, ", ");
|
||||
self.add_character(range.end);
|
||||
add!(self, "}},");
|
||||
}
|
||||
Some(CharacterTree::Yes) => {
|
||||
add!(self, "true");
|
||||
}
|
||||
None => {
|
||||
add!(self, "false");
|
||||
}
|
||||
add!(self, "\n");
|
||||
}
|
||||
dedent!(self);
|
||||
add_line!(self, "}};");
|
||||
add_line!(self, "");
|
||||
}
|
||||
|
||||
fn add_advance_action(&mut self, action: &AdvanceAction) {
|
||||
|
|
|
|||
|
|
@ -86,6 +86,11 @@ typedef union {
|
|||
} entry;
|
||||
} TSParseActionEntry;
|
||||
|
||||
typedef struct {
|
||||
int32_t start;
|
||||
int32_t end;
|
||||
} TSCharacterRange;
|
||||
|
||||
struct TSLanguage {
|
||||
uint32_t version;
|
||||
uint32_t symbol_count;
|
||||
|
|
@ -125,6 +130,24 @@ struct TSLanguage {
|
|||
const TSStateId *primary_state_ids;
|
||||
};
|
||||
|
||||
static inline bool set_contains(TSCharacterRange *ranges, uint32_t len, int32_t lookahead) {
|
||||
uint32_t index = 0;
|
||||
uint32_t size = len - index;
|
||||
while (size > 1) {
|
||||
uint32_t half_size = size / 2;
|
||||
uint32_t mid_index = index + half_size;
|
||||
TSCharacterRange *range = &ranges[mid_index];
|
||||
if (lookahead >= range->start && lookahead <= range->end) {
|
||||
return true;
|
||||
} else if (lookahead > range->end) {
|
||||
index = mid_index;
|
||||
}
|
||||
size -= half_size;
|
||||
}
|
||||
TSCharacterRange *range = &ranges[index];
|
||||
return (lookahead >= range->start && lookahead <= range->end);
|
||||
}
|
||||
|
||||
/*
|
||||
* Lexer Macros
|
||||
*/
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue