Simplify lex item set transitions code

This commit is contained in:
Max Brunsfeld 2015-10-10 12:33:12 -07:00
parent 25791085c3
commit db9966b57c
13 changed files with 378 additions and 428 deletions

View file

@ -40,36 +40,256 @@ describe("LexItem", []() {
});
});
describe("lex_item_set_transitions", [&]() {
describe("when two items in the set have transitions on the same character", [&]() {
it("merges the transitions by computing the union of the two item sets", [&]() {
LexItemSet set1({
LexItem(Symbol(1), CharacterSet().include('a', 'f').copy()),
LexItem(Symbol(2), CharacterSet().include('e', 'x').copy())
});
describe("LexItemSet::transitions()", [&]() {
it("handles single characters", [&]() {
LexItemSet item_set({
LexItem(Symbol(1), character({ 'x' })),
});
AssertThat(set1.transitions(), Equals(map<CharacterSet, LexItemSet>({
AssertThat(
item_set.transitions(),
Equals(map<CharacterSet, LexItemSet>({
{
CharacterSet().include('a', 'd'),
CharacterSet().include('x'),
LexItemSet({
LexItem(Symbol(1), blank()),
})
}
})));
});
it("handles sequences", [&]() {
LexItemSet item_set({
LexItem(Symbol(1), seq({
character({ 'w' }),
character({ 'x' }),
character({ 'y' }),
character({ 'z' }),
})),
});
AssertThat(
item_set.transitions(),
Equals(map<CharacterSet, LexItemSet>({
{
CharacterSet().include('w'),
LexItemSet({
LexItem(Symbol(1), seq({
character({ 'x' }),
character({ 'y' }),
character({ 'z' }),
})),
})
}
})));
});
it("handles sequences where the left hand side can be blank", [&]() {
LexItemSet item_set({
LexItem(Symbol(1), seq({
choice({
character({ 'x' }),
blank(),
}),
character({ 'y' }),
character({ 'z' }),
})),
});
AssertThat(
item_set.transitions(),
Equals(map<CharacterSet, LexItemSet>({
{
CharacterSet().include('x'),
LexItemSet({
LexItem(Symbol(1), seq({
character({ 'y' }),
character({ 'z' }),
})),
})
},
{
CharacterSet().include('y'),
LexItemSet({
LexItem(Symbol(1), character({ 'z' })),
})
}
})));
});
it("handles blanks", [&]() {
LexItemSet item_set({
LexItem(Symbol(1), blank()),
});
AssertThat(item_set.transitions(), IsEmpty());
});
it("handles repeats", [&]() {
LexItemSet item_set({
LexItem(Symbol(1), repeat(seq({
character({ 'a' }),
character({ 'b' }),
}))),
LexItem(Symbol(2), repeat(character({ 'c' }))),
});
AssertThat(
item_set.transitions(),
Equals(map<CharacterSet, LexItemSet>({
{
CharacterSet().include('a'),
LexItemSet({
LexItem(Symbol(1), seq({
character({ 'b' }),
repeat(seq({
character({ 'a' }),
character({ 'b' }),
}))
}))
})
},
{
CharacterSet().include('c'),
LexItemSet({
LexItem(Symbol(2), repeat(character({ 'c' }))),
})
}
})));
});
it("handles choices between overlapping character sets", [&]() {
LexItemSet item_set({
LexItem(Symbol(1), choice({
seq({
character({ 'a', 'b', 'c', 'd' }),
character({ 'x' }),
}),
seq({
character({ 'c', 'd', 'e', 'f' }),
character({ 'y' }),
}),
}))
});
AssertThat(
item_set.transitions(),
Equals(map<CharacterSet, LexItemSet>({
{
CharacterSet().include('a', 'b'),
LexItemSet({
LexItem(Symbol(1), character({ 'x' })),
})
},
{
CharacterSet().include('c', 'd'),
LexItemSet({
LexItem(Symbol(1), character({ 'x' })),
LexItem(Symbol(1), character({ 'y' })),
})
},
{
CharacterSet().include('e', 'f'),
LexItemSet({
LexItem(Symbol(1), blank()),
LexItem(Symbol(2), blank()),
})
},
{
CharacterSet().include('g', 'x'),
LexItemSet({
LexItem(Symbol(2), blank()),
LexItem(Symbol(1), character({ 'y' })),
})
},
})));
});
it("handles choices between a subset and a superset of characters", [&]() {
LexItemSet item_set({
LexItem(Symbol(1), choice({
seq({
character({ 'b', 'c', 'd' }),
character({ 'x' }),
}),
seq({
character({ 'a', 'b', 'c', 'd', 'e', 'f' }),
character({ 'y' }),
}),
})),
});
AssertThat(
item_set.transitions(),
Equals(map<CharacterSet, LexItemSet>({
{
CharacterSet().include('a').include('e', 'f'),
LexItemSet({
LexItem(Symbol(1), character({ 'y' })),
})
},
{
CharacterSet().include('b', 'd'),
LexItemSet({
LexItem(Symbol(1), character({ 'x' })),
LexItem(Symbol(1), character({ 'y' })),
})
},
})));
});
it("handles choices between whitelisted and blacklisted character sets", [&]() {
LexItemSet item_set({
LexItem(Symbol(1), seq({
choice({
character({ '/' }, false),
seq({
character({ '\\' }),
character({ '/' }),
}),
}),
character({ '/' }),
}))
});
AssertThat(
item_set.transitions(),
Equals(map<CharacterSet, LexItemSet>({
{
CharacterSet().include_all().exclude('/').exclude('\\'),
LexItemSet({
LexItem(Symbol(1), character({ '/' })),
})
},
{
CharacterSet().include('\\'),
LexItemSet({
LexItem(Symbol(1), character({ '/' })),
LexItem(Symbol(1), seq({ character({ '/' }), character({ '/' }) })),
})
},
})));
});
it("handles different items with overlapping character sets", [&]() {
LexItemSet set1({
LexItem(Symbol(1), character({ 'a', 'b', 'c', 'd', 'e', 'f' })),
LexItem(Symbol(2), character({ 'e', 'f', 'g', 'h', 'i' }))
});
AssertThat(set1.transitions(), Equals(map<CharacterSet, LexItemSet>({
{
CharacterSet().include('a', 'd'),
LexItemSet({
LexItem(Symbol(1), blank()),
})
},
{
CharacterSet().include('e', 'f'),
LexItemSet({
LexItem(Symbol(1), blank()),
LexItem(Symbol(2), blank()),
})
},
{
CharacterSet().include('g', 'i'),
LexItemSet({
LexItem(Symbol(2), blank()),
})
},
})));
});
});

View file

@ -1,90 +0,0 @@
#include "compiler/compiler_spec_helper.h"
#include "compiler/build_tables/merge_transitions.h"
using namespace rules;
using namespace build_tables;
START_TEST
describe("merge_transition", []() {
typedef map<CharacterSet, int> int_map;
auto do_merge = [&](int_map *left, const pair<CharacterSet, int> &new_pair) {
merge_transition<int>(left, new_pair, [](int *l, const int *r) {
*l = *l | *r;
});
};
describe("when none of the transitions intersect", [&]() {
it("returns the union of the two sets of transitions", [&]() {
int_map map({
{ CharacterSet().include('a').include('c'), 1 },
{ CharacterSet().include('x').include('y'), 2 },
{ CharacterSet().include('1').include('9'), 4 },
});
do_merge(&map, { CharacterSet().include(' '), 8 });
do_merge(&map, { CharacterSet().include('\t'), 16 });
AssertThat(map, Equals(int_map({
{ CharacterSet().include('a').include('c'), 1 },
{ CharacterSet().include('x').include('y'), 2 },
{ CharacterSet().include('1').include('9'), 4 },
{ CharacterSet().include(' '), 8 },
{ CharacterSet().include('\t'), 16 },
})));
});
});
describe("when transitions intersect", [&]() {
it("merges the intersecting transitions using the provided function", [&]() {
int_map map({
{ CharacterSet().include('a', 'f').include('A', 'F'), 1 },
{ CharacterSet().include('0', '9'), 2 },
});
do_merge(&map, { CharacterSet().include('c'), 4 });
do_merge(&map, { CharacterSet().include('3'), 8 });
AssertThat(map, Equals(int_map({
{
CharacterSet()
.include('a', 'b')
.include('d', 'f')
.include('A', 'F'),
1
},
{
CharacterSet().include('c'),
5
},
{
CharacterSet().include('0', '2').include('4', '9'),
2
},
{
CharacterSet().include('3'),
10
},
})));
});
});
describe("when two of the right transitions intersect the same left transition", [&]() {
it("splits the left-hand transition correctly", [&]() {
int_map map1({
{ CharacterSet().include('a').include('c'), 1 },
});
do_merge(&map1, { CharacterSet().include('a'), 2 });
do_merge(&map1, { CharacterSet().include('c'), 4 });
AssertThat(map1, Equals(int_map({
{ CharacterSet().include('a'), 3 },
{ CharacterSet().include('c'), 5 },
})));
});
});
});
END_TEST

View file

@ -1,173 +0,0 @@
#include "compiler/compiler_spec_helper.h"
#include "compiler/build_tables/rule_transitions.h"
#include "compiler/rules/metadata.h"
using namespace rules;
using namespace build_tables;
class transition_map : public std::map<CharacterSet, rule_ptr> {
public:
bool operator==(const std::map<CharacterSet, rule_ptr> &other) const {
if (this->size() != other.size()) return false;
for (const auto &pair : *this) {
auto other_pair = other.find(pair.first);
if (other_pair == other.end()) return false;
if (!pair.second->operator==(*other_pair->second)) return false;
}
return true;
}
transition_map(const std::initializer_list<std::pair<const CharacterSet, rule_ptr>> &list) :
std::map<CharacterSet, rule_ptr>(list) {}
};
START_TEST
describe("rule_transitions", []() {
it("handles single characters", [&]() {
AssertThat(
rule_transitions(character({ '1' })),
Equals(transition_map({
{ CharacterSet().include('1'), blank() }
})));
});
it("handles sequences", [&]() {
AssertThat(
rule_transitions(seq({ character({ '1' }), character({ '2' }) })),
Equals(transition_map({
{ CharacterSet().include('1'), character({ '2' }) }
})));
});
it("handles long sequences", [&]() {
AssertThat(
rule_transitions(seq({
character({ '1' }),
character({ '2' }),
character({ '3' }),
character({ '4' })
})),
Equals(transition_map({
{
CharacterSet().include('1'),
seq({ character({ '2' }), character({ '3' }), character({ '4' }) }),
}
})));
});
it("handles sequences whose left sides can be blank", [&]() {
AssertThat(
rule_transitions(seq({
choice({
character({ '1' }),
blank() }),
seq({
character({ '1' }),
character({ '2' }) })
})), Equals(transition_map({
{
CharacterSet().include('1'),
choice({ seq({ character({ '1' }), character({ '2' }) }), character({ '2' }), }),
}
})));
});
it("handles choices between overlapping character sets", [&]() {
AssertThat(
rule_transitions(choice({
seq({
character({ 'a', 'b', 'c', 'd' }),
sym("x") }),
seq({
character({ 'c', 'd', 'e', 'f' }),
sym("y") }) })),
Equals(transition_map({
{ CharacterSet().include('a', 'b'), sym("x") },
{ CharacterSet().include('c', 'd'), choice({ sym("x"), sym("y") }) },
{ CharacterSet().include('e', 'f'), sym("y") },
})));
});
it("handles choices between whitelisted and blacklisted character sets", [&]() {
AssertThat(
rule_transitions(seq({
choice({
character({ '/' }, false),
seq({
character({ '\\' }),
character({ '/' }) }) }),
character({ '/' }) })),
Equals(transition_map({
{ CharacterSet()
.include_all()
.exclude('/')
.exclude('\\'),
character({ '/' }) },
{ CharacterSet()
.include('\\'),
seq({
choice({
blank(),
character({ '/' }) }),
character({ '/' }) }) },
})));
});
it("handles choices between a subset and a superset of characters", [&]() {
AssertThat(
rule_transitions(choice({
seq({
character({ 'b', 'c', 'd' }),
sym("x") }),
seq({
character({ 'a', 'b', 'c', 'd', 'e', 'f' }),
sym("y") }) })),
Equals(transition_map({
{ CharacterSet().include('b', 'd'), choice({ sym("x"), sym("y") }) },
{ CharacterSet().include('a').include('e', 'f'), sym("y") },
})));
AssertThat(
rule_transitions(choice({
seq({
character({ 'a', 'b', 'c', 'd', 'e', 'f' }),
sym("x") }),
seq({
character({ 'b', 'c', 'd' }),
sym("y") }) })),
Equals(transition_map({
{ CharacterSet().include('b', 'd'), choice({ sym("x"), sym("y") }) },
{ CharacterSet().include('a').include('e', 'f'), sym("x") },
})));
});
it("handles blanks", [&]() {
AssertThat(rule_transitions(blank()), Equals(transition_map({})));
});
it("handles repeats", [&]() {
rule_ptr rule = repeat(seq({ character({ 'a' }), character({ 'b' }) }));
AssertThat(
rule_transitions(rule),
Equals(transition_map({
{
CharacterSet().include('a'),
seq({
character({ 'b' }),
rule })
}})));
rule = repeat(character({ 'a' }));
AssertThat(
rule_transitions(rule),
Equals(transition_map({
{ CharacterSet().include('a'), rule }
})));
});
});
END_TEST