From ab00f1b0dabbf7ad849bb0734b6ffff7dcd87c2a Mon Sep 17 00:00:00 2001 From: Timothy Clem Date: Tue, 31 Jan 2017 15:03:48 -0800 Subject: [PATCH] Add support for \W and \D negated character classes too --- spec/compiler/prepare_grammar/parse_regex_spec.cc | 13 +++++++++++-- src/compiler/prepare_grammar/parse_regex.cc | 9 +++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/spec/compiler/prepare_grammar/parse_regex_spec.cc b/spec/compiler/prepare_grammar/parse_regex_spec.cc index a685c68d..4d226619 100644 --- a/spec/compiler/prepare_grammar/parse_regex_spec.cc +++ b/spec/compiler/prepare_grammar/parse_regex_spec.cc @@ -30,7 +30,7 @@ describe("parse_regex", []() { { "character classes", - "\\w-\\d-\\s-\\S", + "\\w-\\d-\\s-\\W-\\D-\\S", seq({ character({ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', @@ -43,7 +43,16 @@ describe("parse_regex", []() { character({ '-' }), character({ ' ', '\t', '\r', '\n' }), character({ '-' }), - character({ ' ', '\t', '\r', '\n' }, false) + character({ + 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', + 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', + 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '_' }, false), + character({ '-' }), + character({ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' }, false), + character({ '-' }), + character({ ' ', '\t', '\r', '\n' }, false), }) }, diff --git a/src/compiler/prepare_grammar/parse_regex.cc b/src/compiler/prepare_grammar/parse_regex.cc index 62c6ad2f..9fdab0d1 100644 --- a/src/compiler/prepare_grammar/parse_regex.cc +++ b/src/compiler/prepare_grammar/parse_regex.cc @@ -187,8 +187,17 @@ class PatternParser { .include('A', 'Z') .include('0', '9') .include('_'); + case 'W': + return CharacterSet() + .include_all() + .exclude('a', 'z') + .exclude('A', 'Z') + .exclude('0', '9') + .exclude('_'); case 'd': return CharacterSet().include('0', '9'); + case 'D': + return CharacterSet().include_all().exclude('0', '9'); case 's': return CharacterSet().include(' ').include('\t').include('\n').include( '\r');