Fix some regex parsing bugs

* Allow escape sequences to be used in ranges
* Don't give special meaning to dashes outside of character classes
This commit is contained in:
Max Brunsfeld 2018-04-06 12:46:06 -07:00
parent 345e344377
commit 1ca261c79b
2 changed files with 64 additions and 27 deletions

View file

@ -110,6 +110,30 @@ describe("parse_regex", []() {
CharacterSet{{'\\'}}
},
{
"dashes",
"a-b",
Rule::seq({
CharacterSet{{'a'}},
CharacterSet{{'-'}},
CharacterSet{{'b'}}
})
},
{
"literal dashes in character classes",
"[a-][\\d-a][\\S-a]",
Rule::seq({
CharacterSet{{'a', '-'}},
CharacterSet().include('0', '9').include('-').include('a'),
CharacterSet().include_all()
.exclude(' ')
.exclude('\t')
.exclude('\r')
.exclude('\n')
})
},
{
"character groups in sequences",
"x([^x]|\\\\x)*x",
@ -171,6 +195,12 @@ describe("parse_regex", []() {
CharacterSet{{'[', '\\', ']'}}
},
{
"escaped characters in ranges",
"[\\0-\\n]",
CharacterSet().include(0, '\n')
},
{
"escaped periods",
"a\\.",