255 lines
6.1 KiB
C++
255 lines
6.1 KiB
C++
#include "compiler/compiler_spec_helper.h"
|
|
#include "compiler/prepare_grammar/parse_regex.h"
|
|
|
|
START_TEST
|
|
|
|
using namespace rules;
|
|
using prepare_grammar::parse_regex;
|
|
|
|
describe("parse_regex", []() {
|
|
struct ValidInputRow {
|
|
string description;
|
|
string pattern;
|
|
rule_ptr rule;
|
|
};
|
|
|
|
vector<ValidInputRow> valid_inputs = {
|
|
{
|
|
"character sets",
|
|
"[aAeE]",
|
|
character({ 'a', 'A', 'e', 'E' })
|
|
},
|
|
|
|
{
|
|
"'.' characters as wildcards",
|
|
".",
|
|
character({ '\n' }, false)
|
|
},
|
|
|
|
{
|
|
"character classes",
|
|
"\\w-\\d-\\s",
|
|
seq({
|
|
character({
|
|
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
|
|
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
|
|
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
|
|
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
|
|
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '_',
|
|
}),
|
|
character({ '-' }),
|
|
character({ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' }),
|
|
character({ '-' }),
|
|
character({ ' ', '\t', '\r', '\n' }) })
|
|
},
|
|
|
|
{
|
|
"choices",
|
|
"ab|cd|ef",
|
|
choice({
|
|
seq({
|
|
character({ 'a' }),
|
|
character({ 'b' }),
|
|
}),
|
|
seq({
|
|
character({ 'c' }),
|
|
character({ 'd' })
|
|
}),
|
|
seq({
|
|
character({ 'e' }),
|
|
character({ 'f' })
|
|
})
|
|
})
|
|
},
|
|
|
|
{
|
|
"simple sequences",
|
|
"abc",
|
|
seq({
|
|
character({ 'a' }),
|
|
character({ 'b' }),
|
|
character({ 'c' }) })
|
|
},
|
|
|
|
{
|
|
"character ranges",
|
|
"[12a-dA-D3]",
|
|
character({
|
|
'1', '2', '3',
|
|
'a', 'b', 'c', 'd',
|
|
'A', 'B', 'C', 'D' })
|
|
},
|
|
|
|
{
|
|
"negated characters",
|
|
"[^a\\d]",
|
|
character({ 'a', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' }, false)
|
|
},
|
|
|
|
{
|
|
"backslashes",
|
|
"\\\\",
|
|
character({ '\\' })
|
|
},
|
|
|
|
{
|
|
"character groups in sequences",
|
|
"x([^x]|\\\\x)*x",
|
|
seq({
|
|
character({ 'x' }),
|
|
repeat(choice({
|
|
character({ 'x' }, false),
|
|
seq({ character({ '\\' }), character({ 'x' }) })
|
|
})),
|
|
character({ 'x' })
|
|
})
|
|
},
|
|
|
|
{
|
|
"choices in sequences",
|
|
"(a|b)cd",
|
|
seq({
|
|
choice({
|
|
character({ 'a' }),
|
|
character({ 'b' }),
|
|
}),
|
|
character({ 'c' }),
|
|
character({ 'd' })
|
|
})
|
|
},
|
|
|
|
{
|
|
"escaped parentheses",
|
|
"a\\(b",
|
|
seq({
|
|
character({ 'a' }),
|
|
character({ '(' }),
|
|
character({ 'b' })
|
|
})
|
|
},
|
|
|
|
{
|
|
"escaped periods",
|
|
"a\\.",
|
|
seq({
|
|
character({ 'a' }),
|
|
character({ '.' })
|
|
})
|
|
},
|
|
|
|
{
|
|
"escaped characters",
|
|
"\\t\\n\\r",
|
|
seq({
|
|
character({ '\t' }),
|
|
character({ '\n' }),
|
|
character({ '\r' }),
|
|
})
|
|
},
|
|
|
|
{
|
|
"plus repeats",
|
|
"(ab)+(cd)+",
|
|
seq({
|
|
seq({
|
|
seq({ character({ 'a' }), character({ 'b' }) }),
|
|
repeat(seq({ character({ 'a' }), character({ 'b' }) })),
|
|
}),
|
|
seq({
|
|
seq({ character({ 'c' }), character({ 'd' }) }),
|
|
repeat(seq({ character({ 'c' }), character({ 'd' }) })),
|
|
}),
|
|
})
|
|
},
|
|
|
|
{
|
|
"asterix repeats",
|
|
"(ab)*(cd)*",
|
|
seq({
|
|
repeat(seq({ character({ 'a' }), character({ 'b' }) })),
|
|
repeat(seq({ character({ 'c' }), character({ 'd' }) })),
|
|
})
|
|
},
|
|
|
|
{
|
|
"optional rules",
|
|
"a(bc)?",
|
|
seq({
|
|
character({ 'a' }),
|
|
choice({
|
|
seq({ character({ 'b' }), character({ 'c' }) }),
|
|
blank()
|
|
})
|
|
})
|
|
},
|
|
|
|
{
|
|
"choices containing negated character classes",
|
|
"/([^/]|(\\\\/))*/",
|
|
seq({
|
|
character({ '/' }),
|
|
repeat(choice({
|
|
character({ '/' }, false),
|
|
seq({ character({ '\\' }), character({ '/' }) }),
|
|
})),
|
|
character({ '/' }),
|
|
}),
|
|
}
|
|
};
|
|
|
|
struct InvalidInputRow {
|
|
string description;
|
|
string pattern;
|
|
const char *message;
|
|
};
|
|
|
|
vector<InvalidInputRow> invalid_inputs = {
|
|
{
|
|
"mismatched open parens",
|
|
"(a",
|
|
"unmatched open paren",
|
|
},
|
|
{
|
|
"mismatched nested open parens",
|
|
"((a) (b)",
|
|
"unmatched open paren",
|
|
},
|
|
{
|
|
"mismatched close parens",
|
|
"a)",
|
|
"unmatched close paren",
|
|
},
|
|
{
|
|
"mismatched nested close parens",
|
|
"((a) b))",
|
|
"unmatched close paren",
|
|
},
|
|
{
|
|
"mismatched brackets for character classes",
|
|
"[a",
|
|
"unmatched open square bracket",
|
|
},
|
|
{
|
|
"mismatched brackets for character classes",
|
|
"a]",
|
|
"unmatched close square bracket",
|
|
},
|
|
};
|
|
|
|
for (auto &row : valid_inputs) {
|
|
it(("parses " + row.description).c_str(), [&]() {
|
|
auto result = parse_regex(row.pattern);
|
|
AssertThat(result.first, EqualsPointer(row.rule));
|
|
});
|
|
}
|
|
|
|
for (auto &row : invalid_inputs) {
|
|
it(("handles invalid regexes with " + row.description).c_str(), [&]() {
|
|
auto result = parse_regex(row.pattern);
|
|
AssertThat(result.second, !Equals((const GrammarError *)nullptr));
|
|
AssertThat(result.second->message, Contains(row.message));
|
|
});
|
|
}
|
|
});
|
|
|
|
END_TEST
|