177 lines
5 KiB
C++
177 lines
5 KiB
C++
#include "compiler_spec_helper.h"
|
|
#include "compiler/rules/pattern.h"
|
|
#include "compiler/rules/character_set.h"
|
|
|
|
using namespace rules;
|
|
|
|
START_TEST
|
|
|
|
describe("parsing regex pattern rules", []() {
|
|
it("parses simple strings", [&]() {
|
|
Pattern rule("abc");
|
|
AssertThat(
|
|
rule.to_rule_tree(),
|
|
EqualsPointer(seq({
|
|
character({ 'a' }),
|
|
character({ 'b' }),
|
|
character({ 'c' })
|
|
})));
|
|
});
|
|
|
|
it("parses wildcard '.' characters", [&]() {
|
|
Pattern rule(".");
|
|
AssertThat(
|
|
rule.to_rule_tree(),
|
|
EqualsPointer(CharacterSet({'\n'}).complement().copy()));
|
|
});
|
|
|
|
it("parses character classes", []() {
|
|
Pattern rule("\\w-\\d");
|
|
AssertThat(
|
|
rule.to_rule_tree(),
|
|
EqualsPointer(seq({
|
|
character({ {'a', 'z'}, {'A', 'Z'}, {'0', '9'} }),
|
|
character({ '-' }),
|
|
character({ {'0', '9'} })
|
|
})));
|
|
});
|
|
|
|
it("parses choices", []() {
|
|
Pattern rule("ab|cd|ef");
|
|
AssertThat(
|
|
rule.to_rule_tree(),
|
|
EqualsPointer(choice({
|
|
seq({
|
|
character({ 'a' }),
|
|
character({ 'b' }),
|
|
}),
|
|
seq({
|
|
character({ 'c' }),
|
|
character({ 'd' })
|
|
}),
|
|
seq({
|
|
character({ 'e' }),
|
|
character({ 'f' })
|
|
})
|
|
})));
|
|
});
|
|
|
|
it("parses character sets", []() {
|
|
Pattern rule("[aAeE]");
|
|
AssertThat(
|
|
rule.to_rule_tree(),
|
|
EqualsPointer(character({ 'a', 'A', 'e', 'E' })));
|
|
});
|
|
|
|
it("parses character ranges", []() {
|
|
Pattern rule("[12a-dA-D3]");
|
|
AssertThat(
|
|
rule.to_rule_tree(),
|
|
EqualsPointer(character({ {'1', '3'}, {'a', 'd'}, { 'A', 'D' }, })));
|
|
});
|
|
|
|
it("parses negated characters", []() {
|
|
Pattern rule("[^a\\d]");
|
|
AssertThat(
|
|
rule.to_rule_tree(),
|
|
EqualsPointer(character({ {'a'}, {'0', '9'} }, false)));
|
|
});
|
|
|
|
it("parses backslashes", []() {
|
|
Pattern rule("\\\\");
|
|
AssertThat(
|
|
rule.to_rule_tree(),
|
|
EqualsPointer(character({ '\\' })));
|
|
});
|
|
|
|
it("parses character groups in sequences", []() {
|
|
Pattern rule("\"([^\"]|\\\\\")*\"");
|
|
AssertThat(
|
|
rule.to_rule_tree(),
|
|
EqualsPointer(seq({
|
|
character({ '"' }),
|
|
repeat(choice({
|
|
character({ '"' }, false),
|
|
seq({ character({ '\\' }), character({ '"' }) })
|
|
})),
|
|
character({ '"' })
|
|
})));
|
|
});
|
|
|
|
it("parses choices in sequences", []() {
|
|
Pattern rule("(a|b)cd");
|
|
AssertThat(
|
|
rule.to_rule_tree(),
|
|
EqualsPointer(seq({
|
|
choice({
|
|
character({ 'a' }),
|
|
character({ 'b' }),
|
|
}),
|
|
character({ 'c' }),
|
|
character({ 'd' })
|
|
})));
|
|
});
|
|
|
|
it("parses special characters when they are escaped", []() {
|
|
Pattern rule("a\\(b");
|
|
AssertThat(
|
|
rule.to_rule_tree(),
|
|
EqualsPointer(seq({
|
|
character({ 'a' }),
|
|
character({ '(' }),
|
|
character({ 'b' })
|
|
})));
|
|
|
|
Pattern rule2("a\\.");
|
|
AssertThat(
|
|
rule2.to_rule_tree(),
|
|
EqualsPointer(seq({
|
|
character({ 'a' }),
|
|
character({ '.' }),
|
|
})));
|
|
|
|
});
|
|
|
|
it("parses repeating rules", []() {
|
|
Pattern rule("(ab)+(cd)+");
|
|
AssertThat(
|
|
rule.to_rule_tree(),
|
|
EqualsPointer(
|
|
seq({
|
|
seq({
|
|
seq({ character({ 'a' }), character({ 'b' }) }),
|
|
repeat(seq({ character({ 'a' }), character({ 'b' }) })),
|
|
}),
|
|
seq({
|
|
seq({ character({ 'c' }), character({ 'd' }) }),
|
|
repeat(seq({ character({ 'c' }), character({ 'd' }) })),
|
|
}),
|
|
})
|
|
));
|
|
|
|
Pattern rule2("(ab)*(cd)*");
|
|
AssertThat(
|
|
rule2.to_rule_tree(),
|
|
EqualsPointer(
|
|
seq({
|
|
repeat(seq({ character({ 'a' }), character({ 'b' }) })),
|
|
repeat(seq({ character({ 'c' }), character({ 'd' }) })),
|
|
})
|
|
));
|
|
});
|
|
|
|
it("parses optional rules", []() {
|
|
Pattern rule("a(bc)?");
|
|
AssertThat(
|
|
rule.to_rule_tree(),
|
|
EqualsPointer(seq({
|
|
character({ 'a' }),
|
|
choice({
|
|
seq({ character({ 'b' }), character({ 'c' }) }),
|
|
blank()
|
|
})
|
|
})));
|
|
});
|
|
});
|
|
|
|
END_TEST
|