From 5b630054c6999c134b3d2b2152b09424928efac4 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Wed, 17 Feb 2021 17:22:33 -0800
Subject: [PATCH] Handle negated unicode property escapes in regexes

Refs #380
---
 cli/src/generate/prepare_grammar/expand_tokens.rs | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)
diff --git a/cli/src/generate/prepare_grammar/expand_tokens.rs b/cli/src/generate/prepare_grammar/expand_tokens.rs
index b948ddfc..5580eb72 100644
--- a/cli/src/generate/prepare_grammar/expand_tokens.rs
+++ b/cli/src/generate/prepare_grammar/expand_tokens.rs
@@ -14,7 +14,7 @@ use std::i32;
 
 lazy_static! {
     static ref CURLY_BRACE_REGEX: Regex =
-        Regex::new(r#"(^|[^\\p])\{([^}]*[^0-9A-Fa-f,}][^}]*)\}"#).unwrap();
+        Regex::new(r#"(^|[^\\pP])\{([^}]*[^0-9A-Fa-f,}][^}]*)\}"#).unwrap();
     static ref UNICODE_CATEGORIES: HashMap<&'static str, Vec<u32>> =
         serde_json::from_str(UNICODE_CATEGORIES_JSON).unwrap();
     static ref UNICODE_PROPERTIES: HashMap<&'static str, Vec<u32>> =
@@ -705,6 +705,18 @@ mod tests {
                     (r#"'\'a"#, Some((2, r#"'\'"#))),
                 ],
             },
+            // unicode property escapes
+            Row {
+                rules: vec![
+                    Rule::pattern(r#"\p{L}+\P{L}+"#),
+                    Rule::pattern(r#"\p{White_Space}+\P{White_Space}+\p{White_Space}*"#),
+                ],
+                separators: vec![],
+                examples: vec![
+                    ("  123   abc", Some((1, "  123   "))),
+                    ("ბΨƁ___ƀƔ", Some((0, "ბΨƁ___"))),
+                ],
+            },
             // allowing un-escaped curly braces
             Row {
                 rules: vec![