diff --git a/cli/src/generate/dsl.js b/cli/src/generate/dsl.js index ba3962cd..fa60dfa7 100644 --- a/cli/src/generate/dsl.js +++ b/cli/src/generate/dsl.js @@ -1,5 +1,4 @@ const UNICODE_ESCAPE_PATTERN = /\\u([0-9a-f]{4})/gi; -const DELIMITER_ESCAPE_PATTERN = /\\\//g; function alias(rule, value) { const result = { @@ -150,10 +149,6 @@ function normalize(value) { return { type: 'PATTERN', value: value.source - .replace( - DELIMITER_ESCAPE_PATTERN, - '/' - ) .replace( UNICODE_ESCAPE_PATTERN, (match, group) => String.fromCharCode(parseInt(group, 16)) diff --git a/cli/src/generate/prepare_grammar/expand_tokens.rs b/cli/src/generate/prepare_grammar/expand_tokens.rs index e269df6d..1e2ef2e5 100644 --- a/cli/src/generate/prepare_grammar/expand_tokens.rs +++ b/cli/src/generate/prepare_grammar/expand_tokens.rs @@ -6,8 +6,15 @@ use crate::generate::rules::Rule; use regex_syntax::ast::{ parse, Ast, Class, ClassPerlKind, ClassSet, ClassSetItem, RepetitionKind, RepetitionRange, }; +use regex::Regex; use std::i32; +lazy_static! { + static ref CURLY_BRACE_REGEX: Regex = Regex::new(r#"(^|[^\\])\{([^}]*[^0-9}][^}]*)\}"#).unwrap(); +} + +const ALLOWED_REDUNDANT_ESCAPED_CHARS: [char; 4] = ['!', '\'', '"', '/']; + struct NfaBuilder { nfa: Nfa, is_sep: bool, @@ -35,6 +42,31 @@ fn get_completion_precedence(rule: &Rule) -> i32 { } } +fn preprocess_regex(content: &str) -> String { + let content = CURLY_BRACE_REGEX.replace(content, "$1\\{$2\\}"); + let mut result = String::with_capacity(content.len()); + let mut is_escaped = false; + for c in content.chars() { + if is_escaped { + if ALLOWED_REDUNDANT_ESCAPED_CHARS.contains(&c) { + result.push(c); + } else { + result.push('\\'); + result.push(c); + } + is_escaped = false; + } else if c == '\\' { + is_escaped = true; + } else { + result.push(c); + } + } + if is_escaped { + result.push('\\'); + } + result +} + pub(crate) fn expand_tokens(mut grammar: ExtractedLexicalGrammar) -> Result { let mut builder = NfaBuilder { nfa: Nfa::new(), @@ -90,6 +122,7 @@ impl NfaBuilder { fn expand_rule(&mut self, rule: &Rule, mut next_state_id: u32) -> Result { match rule { Rule::Pattern(s) => { + let s = preprocess_regex(s); let ast = parse::Parser::new() .parse(&s) .map_err(|e| Error(e.to_string()))?; @@ -586,6 +619,38 @@ mod tests { ("12e34", Some((0, "12e34"))), ], }, + // Allowing unrecognized escape sequences + Row { + rules: vec![ + // Escaped forward slash (used in JS because '/' is the regex delimiter) + Rule::pattern(r#"\/"#), + // Escaped quotes + Rule::pattern(r#"\"\'"#), + // Quote preceded by a literal backslash + Rule::pattern(r#"[\\']+"#), + ], + separators: vec![], + examples: vec![ + ("/", Some((0, "/"))), + ("\"\'", Some((1, "\"\'"))), + (r#"'\'a"#, Some((2, r#"'\'"#))), + ], + }, + // Allowing un-escaped curly braces + Row { + rules: vec![ + // Un-escaped curly braces + Rule::pattern(r#"u{[0-9a-fA-F]+}"#), + // Already-escaped curly braces + Rule::pattern(r#"\{[ab]{3}\}"#), + ], + separators: vec![], + examples: vec![ + ("u{1234} ok", Some((0, "u{1234}"))), + ("{aba}}", Some((1, "{aba}"))), + ], + + } ]; for Row {