diff --git a/cli/src/generate/char_tree.rs b/cli/src/generate/char_tree.rs new file mode 100644 index 00000000..af86f28e --- /dev/null +++ b/cli/src/generate/char_tree.rs @@ -0,0 +1,130 @@ +use std::ops::Range; + +#[derive(PartialEq, Eq)] +pub enum CharacterTree { + Yes, + Compare { + value: char, + operator: Comparator, + consequence: Option>, + alternative: Option>, + }, +} + +#[derive(PartialEq, Eq)] +pub enum Comparator { + Less, + LessOrEqual, + Equal, + GreaterOrEqual, +} + +impl CharacterTree { + pub fn from_ranges(ranges: &[Range]) -> Option { + match ranges.len() { + 0 => None, + 1 => { + let range = &ranges[0]; + if range.start == range.end { + Some(CharacterTree::Compare { + operator: Comparator::Equal, + value: range.start, + consequence: Some(Box::new(CharacterTree::Yes)), + alternative: None, + }) + } else { + Some(CharacterTree::Compare { + operator: Comparator::GreaterOrEqual, + value: range.start, + consequence: Some(Box::new(CharacterTree::Compare { + operator: Comparator::LessOrEqual, + value: range.end, + consequence: Some(Box::new(CharacterTree::Yes)), + alternative: None, + })), + alternative: None, + }) + } + } + len => { + let mid = len / 2; + let mid_range = &ranges[mid]; + Some(CharacterTree::Compare { + operator: Comparator::Less, + value: mid_range.start, + consequence: Self::from_ranges(&ranges[0..mid]).map(Box::new), + alternative: Some(Box::new(CharacterTree::Compare { + operator: Comparator::LessOrEqual, + value: mid_range.end, + consequence: Some(Box::new(CharacterTree::Yes)), + alternative: Self::from_ranges(&ranges[(mid + 1)..]).map(Box::new), + })), + }) + } + } + } + + #[cfg(test)] + fn contains(&self, c: char) -> bool { + match self { + CharacterTree::Yes => true, + CharacterTree::Compare { + value, + operator, + alternative, + consequence, + } => { + let condition = match operator { + Comparator::Less => c < *value, + Comparator::LessOrEqual => c <= *value, + Comparator::Equal => c == *value, + Comparator::GreaterOrEqual => c >= *value, + }; + if condition { consequence } else { alternative } + .as_ref() + .map_or(false, |a| a.contains(c)) + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_character_tree_simple() { + let tree = CharacterTree::from_ranges(&['a'..'d', 'h'..'l', 'p'..'r', 'u'..'u', 'z'..'z']) + .unwrap(); + + assert!(tree.contains('a')); + assert!(tree.contains('b')); + assert!(tree.contains('c')); + assert!(tree.contains('d')); + + assert!(!tree.contains('e')); + assert!(!tree.contains('f')); + assert!(!tree.contains('g')); + + assert!(tree.contains('h')); + assert!(tree.contains('i')); + assert!(tree.contains('j')); + assert!(tree.contains('k')); + assert!(tree.contains('l')); + + assert!(!tree.contains('m')); + assert!(!tree.contains('n')); + assert!(!tree.contains('o')); + + assert!(tree.contains('p')); + assert!(tree.contains('q')); + assert!(tree.contains('r')); + + assert!(!tree.contains('s')); + assert!(!tree.contains('s')); + + assert!(tree.contains('u')); + + assert!(!tree.contains('v')); + } +} diff --git a/cli/src/generate/mod.rs b/cli/src/generate/mod.rs index 830c4a65..9e9adb44 100644 --- a/cli/src/generate/mod.rs +++ b/cli/src/generate/mod.rs @@ -1,4 +1,5 @@ mod build_tables; +mod char_tree; mod dedup; mod grammars; mod nfa; diff --git a/cli/src/generate/render.rs b/cli/src/generate/render.rs index 362f357c..eb45eab5 100644 --- a/cli/src/generate/render.rs +++ b/cli/src/generate/render.rs @@ -1,3 +1,4 @@ +use super::char_tree::{CharacterTree, Comparator}; use super::grammars::{ExternalToken, LexicalGrammar, SyntaxGrammar, VariableType}; use super::rules::{Alias, AliasMap, Symbol, SymbolType}; use super::tables::{ @@ -714,7 +715,7 @@ impl Generator { if info.usage_count > 1 { add_line!( self, - "static inline bool {}_character_set_{}(int32_t lookahead) {{", + "static inline bool {}_character_set_{}(int32_t c) {{", self.symbol_ids[&info.symbol], info.index ); @@ -722,7 +723,8 @@ impl Generator { add_line!(self, "return"); indent!(self); add_whitespace!(self); - self.add_character_range_conditions(&info.ranges, true, 0); + let tree = CharacterTree::from_ranges(&info.ranges); + self.add_character_tree(tree.as_ref()); add!(self, ";\n"); dedent!(self); dedent!(self); @@ -844,16 +846,15 @@ impl Generator { ranges: &[Range], is_included: bool, indent_count: usize, - ) -> bool { + ) { let mut line_break = "\n".to_string(); for _ in 0..self.indent_level + indent_count { line_break.push_str(" "); } - let mut did_add = false; - for range in ranges { + for (i, range) in ranges.iter().enumerate() { if is_included { - if did_add { + if i > 0 { add!(self, " ||{}", line_break); } if range.end == range.start { @@ -872,7 +873,7 @@ impl Generator { add!(self, ")"); } } else { - if did_add { + if i > 0 { add!(self, " &&{}", line_break); } if range.end == range.start { @@ -896,9 +897,54 @@ impl Generator { } } } - did_add = true; } - did_add + } + + fn add_character_tree(&mut self, tree: Option<&CharacterTree>) { + match tree { + Some(CharacterTree::Compare { + value, + operator, + consequence, + alternative, + }) => { + let op = match operator { + Comparator::Less => "<", + Comparator::LessOrEqual => "<=", + Comparator::Equal => "==", + Comparator::GreaterOrEqual => ">=", + }; + add!(self, "(c {} ", op); + self.add_character(*value); + add!(self, ")"); + + let consequence = consequence.as_ref().map(Box::as_ref); + let alternative = alternative.as_ref().map(Box::as_ref); + if alternative.is_none() { + if consequence != Some(&CharacterTree::Yes) { + add!(self, " && "); + self.add_character_tree(consequence); + } + } else { + add!(self, "\n"); + indent!(self); + add_whitespace!(self); + add!(self, "? "); + self.add_character_tree(consequence); + add!(self, "\n"); + add_whitespace!(self); + add!(self, ": "); + self.add_character_tree(alternative); + dedent!(self); + } + } + Some(CharacterTree::Yes) => { + add!(self, "true"); + } + None => { + add!(self, "false"); + } + } } fn add_advance_action(&mut self, action: &AdvanceAction) {