Handle unicode property escapes inside bracketed char classes

Refs #906
This commit is contained in:
Max Brunsfeld 2021-02-18 22:27:44 -08:00
parent 29bc26ecd5
commit 2f28a35e1b

View file

@ -368,6 +368,13 @@ impl NfaBuilder {
Ok(result)
}
ClassSetItem::Perl(class) => Ok(self.expand_perl_character_class(&class.kind)),
ClassSetItem::Unicode(class) => {
let mut set = self.expand_unicode_character_class(&class.kind)?;
if class.negated {
set = set.negate();
}
Ok(set)
}
_ => Err(Error::regex(format!(
"Unsupported character class syntax {:?}",
item
@ -709,7 +716,7 @@ mod tests {
Row {
rules: vec![
Rule::pattern(r#"\p{L}+\P{L}+"#),
Rule::pattern(r#"\p{White_Space}+\P{White_Space}+\p{White_Space}*"#),
Rule::pattern(r#"\p{White_Space}+\P{White_Space}+[\p{White_Space}]*"#),
],
separators: vec![],
examples: vec![
@ -717,6 +724,12 @@ mod tests {
("ბΨƁ___ƀƔ", Some((0, "ბΨƁ___"))),
],
},
// unicode property escapes in bracketed sets
Row {
rules: vec![Rule::pattern(r#"[\p{L}\p{Nd}]+"#)],
separators: vec![],
examples: vec![("abΨ12٣٣, ok", Some((0, "abΨ12٣٣")))],
},
// unicode character escapes
Row {
rules: vec![