Implement CharacterSet intersections

This commit is contained in:
Max Brunsfeld 2014-02-08 15:26:10 -08:00
parent b01c672fca
commit e92ac719f4
3 changed files with 54 additions and 25 deletions

View file

@ -6,35 +6,37 @@ using namespace rules;
START_TEST
describe("character sets", []() {
describe("computing the complement", []() {
it("works for the set containing only the null character", []() {
char max_char = 255;
describe("computing the complement", [&]() {
it("works for the set containing only the null character", [&]() {
CharacterSet set1({ '\0' });
auto set2 = set1.complement();
AssertThat(set2, Equals(CharacterSet({
{ 1, -1 },
{ 1, max_char },
}, true)));
AssertThat(set2.complement(), Equals(set1));
});
it("works for single character sets", []() {
it("works for single character sets", [&]() {
CharacterSet set1({ 'b' });
auto set2 = set1.complement();
AssertThat(set2, Equals(CharacterSet({
{ 0, 'a' },
{ 'c', -1 },
{ 'c', max_char },
})));
AssertThat(set2.complement(), Equals(set1));
});
});
describe("computing unions", []() {
it("works for disjoint sets", []() {
describe("computing unions", [&]() {
it("works for disjoint sets", [&]() {
CharacterSet set({ {'a', 'z'} }, true);
set.add_set(CharacterSet({ {'A', 'Z'} }, true));
AssertThat(set, Equals(CharacterSet({ {'a', 'z'}, {'A', 'Z'}, })));
});
it("works for sets with adjacent ranges", []() {
it("works for sets with adjacent ranges", [&]() {
CharacterSet set({ {'a', 'r'} }, true);
set.add_set(CharacterSet({ {'s', 'z'} }, true));
AssertThat(set, Equals(CharacterSet({ {'a', 'z'} }, true)));
@ -42,7 +44,7 @@ describe("character sets", []() {
set = CharacterSet({ 'c' });
auto c = set.complement();
set.add_set(c);
AssertThat(set, Equals(CharacterSet({ {0, -1} }, true)));
AssertThat(set, Equals(CharacterSet({ {0, max_char} }, true)));
});
it("works when the result becomes a continuous range", []() {
@ -51,10 +53,10 @@ describe("character sets", []() {
AssertThat(set, Equals(CharacterSet({ {'a', 'z'} }, true)));
});
it("does nothing for the set of all characters", []() {
it("does nothing for the set of all characters", [&]() {
CharacterSet set({ 'a' });
set.add_set(set.complement());
AssertThat(set, Equals(CharacterSet({ {'\0', '\xff'} }, true)));
AssertThat(set, Equals(CharacterSet({ {'\0', max_char} }, true)));
});
});
@ -87,6 +89,26 @@ describe("character sets", []() {
AssertThat(set1, Equals(CharacterSet({ {'a', 'b'}, {'p','r'}, {'y','z'} })));
});
});
describe("computing intersections", []() {
it("returns an empty set for disjoint sets", []() {
CharacterSet set1({ {'a','d'} }, true);
CharacterSet set2({ {'e','x'} }, true);
AssertThat(set1.intersect(set2), Equals(CharacterSet()));
});
it("works for sets with a single overlapping range", []() {
CharacterSet set1({ {'a','e'} }, true);
CharacterSet set2({ {'c','x'} }, true);
AssertThat(set1.intersect(set2), Equals(CharacterSet({ {'c', 'e'} }, true)));
});
it("works for sets with two overlapping ranges", []() {
CharacterSet set1({ {'a','e'}, {'w','z'} }, true);
CharacterSet set2({ {'c','y'} }, true);
AssertThat(set1.intersect(set2), Equals(CharacterSet({ {'c', 'e'}, {'w', 'y'} })));
});
});
});
END_TEST

View file

@ -80,16 +80,9 @@ namespace tree_sitter {
}
CharacterSet CharacterSet::complement() const {
set<CharacterRange> result;
char current_char = 0;
for (auto &range : ranges) {
if (range.min != 0)
result.insert(CharacterRange(current_char, range.min - 1));
current_char = range.max + 1;
}
if (current_char != 0)
result.insert(CharacterRange(current_char, MAX_CHAR));
return CharacterSet(result);
CharacterSet result({ {0, MAX_CHAR} }, true);
result.remove_set(*this);
return result;
}
std::pair<CharacterSet, bool> CharacterSet::most_compact_representation() const {
@ -130,7 +123,8 @@ namespace tree_sitter {
self->ranges = new_ranges;
}
void remove_range(CharacterSet *self, CharacterRange new_range) {
CharacterSet remove_range(CharacterSet *self, CharacterRange new_range) {
CharacterSet removed_set;
set<CharacterRange> new_ranges;
auto new_min = min_int(new_range);
auto new_max = max_int(new_range);
@ -141,19 +135,23 @@ namespace tree_sitter {
new_ranges.insert(range);
} else if (new_max <= max_int(range)) {
new_ranges.insert(CharacterRange(new_max + 1, range.max));
add_range(&removed_set, CharacterRange(range.min, new_max));
}
} else if (new_min <= max_int(range)) {
if (new_max < max_int(range)) {
new_ranges.insert(CharacterRange(range.min, new_min - 1));
new_ranges.insert(CharacterRange(new_max + 1, range.max));
add_range(&removed_set, new_range);
} else {
new_ranges.insert(CharacterRange(range.min, new_min - 1));
add_range(&removed_set, CharacterRange(new_min, range.max));
}
} else {
new_ranges.insert(range);
}
}
self->ranges = new_ranges;
return removed_set;
}
void CharacterSet::add_set(const CharacterSet &other) {
@ -162,10 +160,18 @@ namespace tree_sitter {
}
}
void CharacterSet::remove_set(const CharacterSet &other) {
CharacterSet CharacterSet::remove_set(const CharacterSet &other) {
CharacterSet result;
for (auto &other_range : other.ranges) {
remove_range(this, other_range);
auto removed_set = remove_range(this, other_range);
result.add_set(removed_set);
}
return result;
}
CharacterSet CharacterSet::intersect(const CharacterSet &set) const {
CharacterSet copy = *this;
return copy.remove_set(set);
}
void CharacterSet::accept(Visitor &visitor) const {

View file

@ -36,10 +36,11 @@ namespace tree_sitter {
CharacterSet(const std::set<CharacterRange> &ranges, bool);
CharacterSet complement() const;
CharacterSet intersect(const CharacterSet &) const;
std::pair<CharacterSet, bool> most_compact_representation() const;
void add_set(const CharacterSet &other);
void remove_set(const CharacterSet &other);
CharacterSet remove_set(const CharacterSet &other);
bool operator==(const Rule& other) const;
size_t hash_code() const;