Implement CharacterSet intersections
This commit is contained in:
parent
b01c672fca
commit
e92ac719f4
3 changed files with 54 additions and 25 deletions
|
|
@ -6,35 +6,37 @@ using namespace rules;
|
|||
START_TEST
|
||||
|
||||
describe("character sets", []() {
|
||||
describe("computing the complement", []() {
|
||||
it("works for the set containing only the null character", []() {
|
||||
char max_char = 255;
|
||||
|
||||
describe("computing the complement", [&]() {
|
||||
it("works for the set containing only the null character", [&]() {
|
||||
CharacterSet set1({ '\0' });
|
||||
auto set2 = set1.complement();
|
||||
AssertThat(set2, Equals(CharacterSet({
|
||||
{ 1, -1 },
|
||||
{ 1, max_char },
|
||||
}, true)));
|
||||
AssertThat(set2.complement(), Equals(set1));
|
||||
});
|
||||
|
||||
it("works for single character sets", []() {
|
||||
it("works for single character sets", [&]() {
|
||||
CharacterSet set1({ 'b' });
|
||||
auto set2 = set1.complement();
|
||||
AssertThat(set2, Equals(CharacterSet({
|
||||
{ 0, 'a' },
|
||||
{ 'c', -1 },
|
||||
{ 'c', max_char },
|
||||
})));
|
||||
AssertThat(set2.complement(), Equals(set1));
|
||||
});
|
||||
});
|
||||
|
||||
describe("computing unions", []() {
|
||||
it("works for disjoint sets", []() {
|
||||
describe("computing unions", [&]() {
|
||||
it("works for disjoint sets", [&]() {
|
||||
CharacterSet set({ {'a', 'z'} }, true);
|
||||
set.add_set(CharacterSet({ {'A', 'Z'} }, true));
|
||||
AssertThat(set, Equals(CharacterSet({ {'a', 'z'}, {'A', 'Z'}, })));
|
||||
});
|
||||
|
||||
it("works for sets with adjacent ranges", []() {
|
||||
it("works for sets with adjacent ranges", [&]() {
|
||||
CharacterSet set({ {'a', 'r'} }, true);
|
||||
set.add_set(CharacterSet({ {'s', 'z'} }, true));
|
||||
AssertThat(set, Equals(CharacterSet({ {'a', 'z'} }, true)));
|
||||
|
|
@ -42,7 +44,7 @@ describe("character sets", []() {
|
|||
set = CharacterSet({ 'c' });
|
||||
auto c = set.complement();
|
||||
set.add_set(c);
|
||||
AssertThat(set, Equals(CharacterSet({ {0, -1} }, true)));
|
||||
AssertThat(set, Equals(CharacterSet({ {0, max_char} }, true)));
|
||||
});
|
||||
|
||||
it("works when the result becomes a continuous range", []() {
|
||||
|
|
@ -51,10 +53,10 @@ describe("character sets", []() {
|
|||
AssertThat(set, Equals(CharacterSet({ {'a', 'z'} }, true)));
|
||||
});
|
||||
|
||||
it("does nothing for the set of all characters", []() {
|
||||
it("does nothing for the set of all characters", [&]() {
|
||||
CharacterSet set({ 'a' });
|
||||
set.add_set(set.complement());
|
||||
AssertThat(set, Equals(CharacterSet({ {'\0', '\xff'} }, true)));
|
||||
AssertThat(set, Equals(CharacterSet({ {'\0', max_char} }, true)));
|
||||
});
|
||||
});
|
||||
|
||||
|
|
@ -87,6 +89,26 @@ describe("character sets", []() {
|
|||
AssertThat(set1, Equals(CharacterSet({ {'a', 'b'}, {'p','r'}, {'y','z'} })));
|
||||
});
|
||||
});
|
||||
|
||||
describe("computing intersections", []() {
|
||||
it("returns an empty set for disjoint sets", []() {
|
||||
CharacterSet set1({ {'a','d'} }, true);
|
||||
CharacterSet set2({ {'e','x'} }, true);
|
||||
AssertThat(set1.intersect(set2), Equals(CharacterSet()));
|
||||
});
|
||||
|
||||
it("works for sets with a single overlapping range", []() {
|
||||
CharacterSet set1({ {'a','e'} }, true);
|
||||
CharacterSet set2({ {'c','x'} }, true);
|
||||
AssertThat(set1.intersect(set2), Equals(CharacterSet({ {'c', 'e'} }, true)));
|
||||
});
|
||||
|
||||
it("works for sets with two overlapping ranges", []() {
|
||||
CharacterSet set1({ {'a','e'}, {'w','z'} }, true);
|
||||
CharacterSet set2({ {'c','y'} }, true);
|
||||
AssertThat(set1.intersect(set2), Equals(CharacterSet({ {'c', 'e'}, {'w', 'y'} })));
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
END_TEST
|
||||
|
|
@ -80,16 +80,9 @@ namespace tree_sitter {
|
|||
}
|
||||
|
||||
CharacterSet CharacterSet::complement() const {
|
||||
set<CharacterRange> result;
|
||||
char current_char = 0;
|
||||
for (auto &range : ranges) {
|
||||
if (range.min != 0)
|
||||
result.insert(CharacterRange(current_char, range.min - 1));
|
||||
current_char = range.max + 1;
|
||||
}
|
||||
if (current_char != 0)
|
||||
result.insert(CharacterRange(current_char, MAX_CHAR));
|
||||
return CharacterSet(result);
|
||||
CharacterSet result({ {0, MAX_CHAR} }, true);
|
||||
result.remove_set(*this);
|
||||
return result;
|
||||
}
|
||||
|
||||
std::pair<CharacterSet, bool> CharacterSet::most_compact_representation() const {
|
||||
|
|
@ -130,7 +123,8 @@ namespace tree_sitter {
|
|||
self->ranges = new_ranges;
|
||||
}
|
||||
|
||||
void remove_range(CharacterSet *self, CharacterRange new_range) {
|
||||
CharacterSet remove_range(CharacterSet *self, CharacterRange new_range) {
|
||||
CharacterSet removed_set;
|
||||
set<CharacterRange> new_ranges;
|
||||
auto new_min = min_int(new_range);
|
||||
auto new_max = max_int(new_range);
|
||||
|
|
@ -141,19 +135,23 @@ namespace tree_sitter {
|
|||
new_ranges.insert(range);
|
||||
} else if (new_max <= max_int(range)) {
|
||||
new_ranges.insert(CharacterRange(new_max + 1, range.max));
|
||||
add_range(&removed_set, CharacterRange(range.min, new_max));
|
||||
}
|
||||
} else if (new_min <= max_int(range)) {
|
||||
if (new_max < max_int(range)) {
|
||||
new_ranges.insert(CharacterRange(range.min, new_min - 1));
|
||||
new_ranges.insert(CharacterRange(new_max + 1, range.max));
|
||||
add_range(&removed_set, new_range);
|
||||
} else {
|
||||
new_ranges.insert(CharacterRange(range.min, new_min - 1));
|
||||
add_range(&removed_set, CharacterRange(new_min, range.max));
|
||||
}
|
||||
} else {
|
||||
new_ranges.insert(range);
|
||||
}
|
||||
}
|
||||
self->ranges = new_ranges;
|
||||
return removed_set;
|
||||
}
|
||||
|
||||
void CharacterSet::add_set(const CharacterSet &other) {
|
||||
|
|
@ -162,10 +160,18 @@ namespace tree_sitter {
|
|||
}
|
||||
}
|
||||
|
||||
void CharacterSet::remove_set(const CharacterSet &other) {
|
||||
CharacterSet CharacterSet::remove_set(const CharacterSet &other) {
|
||||
CharacterSet result;
|
||||
for (auto &other_range : other.ranges) {
|
||||
remove_range(this, other_range);
|
||||
auto removed_set = remove_range(this, other_range);
|
||||
result.add_set(removed_set);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
CharacterSet CharacterSet::intersect(const CharacterSet &set) const {
|
||||
CharacterSet copy = *this;
|
||||
return copy.remove_set(set);
|
||||
}
|
||||
|
||||
void CharacterSet::accept(Visitor &visitor) const {
|
||||
|
|
|
|||
|
|
@ -36,10 +36,11 @@ namespace tree_sitter {
|
|||
CharacterSet(const std::set<CharacterRange> &ranges, bool);
|
||||
|
||||
CharacterSet complement() const;
|
||||
CharacterSet intersect(const CharacterSet &) const;
|
||||
std::pair<CharacterSet, bool> most_compact_representation() const;
|
||||
|
||||
void add_set(const CharacterSet &other);
|
||||
void remove_set(const CharacterSet &other);
|
||||
CharacterSet remove_set(const CharacterSet &other);
|
||||
|
||||
bool operator==(const Rule& other) const;
|
||||
size_t hash_code() const;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue