From e92ac719f426aacaefcb6dd1f067b22af8f061fc Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Sat, 8 Feb 2014 15:26:10 -0800 Subject: [PATCH] Implement CharacterSet intersections --- character_set_spec.cpp | 44 +++++++++++++++++++++------- src/compiler/rules/character_set.cpp | 32 ++++++++++++-------- src/compiler/rules/character_set.h | 3 +- 3 files changed, 54 insertions(+), 25 deletions(-) diff --git a/character_set_spec.cpp b/character_set_spec.cpp index 7822553a..772db771 100644 --- a/character_set_spec.cpp +++ b/character_set_spec.cpp @@ -6,35 +6,37 @@ using namespace rules; START_TEST describe("character sets", []() { - describe("computing the complement", []() { - it("works for the set containing only the null character", []() { + char max_char = 255; + + describe("computing the complement", [&]() { + it("works for the set containing only the null character", [&]() { CharacterSet set1({ '\0' }); auto set2 = set1.complement(); AssertThat(set2, Equals(CharacterSet({ - { 1, -1 }, + { 1, max_char }, }, true))); AssertThat(set2.complement(), Equals(set1)); }); - it("works for single character sets", []() { + it("works for single character sets", [&]() { CharacterSet set1({ 'b' }); auto set2 = set1.complement(); AssertThat(set2, Equals(CharacterSet({ { 0, 'a' }, - { 'c', -1 }, + { 'c', max_char }, }))); AssertThat(set2.complement(), Equals(set1)); }); }); - describe("computing unions", []() { - it("works for disjoint sets", []() { + describe("computing unions", [&]() { + it("works for disjoint sets", [&]() { CharacterSet set({ {'a', 'z'} }, true); set.add_set(CharacterSet({ {'A', 'Z'} }, true)); AssertThat(set, Equals(CharacterSet({ {'a', 'z'}, {'A', 'Z'}, }))); }); - it("works for sets with adjacent ranges", []() { + it("works for sets with adjacent ranges", [&]() { CharacterSet set({ {'a', 'r'} }, true); set.add_set(CharacterSet({ {'s', 'z'} }, true)); AssertThat(set, Equals(CharacterSet({ {'a', 'z'} }, true))); @@ -42,7 +44,7 @@ describe("character sets", []() { set = CharacterSet({ 'c' }); auto c = set.complement(); set.add_set(c); - AssertThat(set, Equals(CharacterSet({ {0, -1} }, true))); + AssertThat(set, Equals(CharacterSet({ {0, max_char} }, true))); }); it("works when the result becomes a continuous range", []() { @@ -51,10 +53,10 @@ describe("character sets", []() { AssertThat(set, Equals(CharacterSet({ {'a', 'z'} }, true))); }); - it("does nothing for the set of all characters", []() { + it("does nothing for the set of all characters", [&]() { CharacterSet set({ 'a' }); set.add_set(set.complement()); - AssertThat(set, Equals(CharacterSet({ {'\0', '\xff'} }, true))); + AssertThat(set, Equals(CharacterSet({ {'\0', max_char} }, true))); }); }); @@ -87,6 +89,26 @@ describe("character sets", []() { AssertThat(set1, Equals(CharacterSet({ {'a', 'b'}, {'p','r'}, {'y','z'} }))); }); }); + + describe("computing intersections", []() { + it("returns an empty set for disjoint sets", []() { + CharacterSet set1({ {'a','d'} }, true); + CharacterSet set2({ {'e','x'} }, true); + AssertThat(set1.intersect(set2), Equals(CharacterSet())); + }); + + it("works for sets with a single overlapping range", []() { + CharacterSet set1({ {'a','e'} }, true); + CharacterSet set2({ {'c','x'} }, true); + AssertThat(set1.intersect(set2), Equals(CharacterSet({ {'c', 'e'} }, true))); + }); + + it("works for sets with two overlapping ranges", []() { + CharacterSet set1({ {'a','e'}, {'w','z'} }, true); + CharacterSet set2({ {'c','y'} }, true); + AssertThat(set1.intersect(set2), Equals(CharacterSet({ {'c', 'e'}, {'w', 'y'} }))); + }); + }); }); END_TEST \ No newline at end of file diff --git a/src/compiler/rules/character_set.cpp b/src/compiler/rules/character_set.cpp index 0938e078..7db04b79 100644 --- a/src/compiler/rules/character_set.cpp +++ b/src/compiler/rules/character_set.cpp @@ -80,16 +80,9 @@ namespace tree_sitter { } CharacterSet CharacterSet::complement() const { - set result; - char current_char = 0; - for (auto &range : ranges) { - if (range.min != 0) - result.insert(CharacterRange(current_char, range.min - 1)); - current_char = range.max + 1; - } - if (current_char != 0) - result.insert(CharacterRange(current_char, MAX_CHAR)); - return CharacterSet(result); + CharacterSet result({ {0, MAX_CHAR} }, true); + result.remove_set(*this); + return result; } std::pair CharacterSet::most_compact_representation() const { @@ -130,7 +123,8 @@ namespace tree_sitter { self->ranges = new_ranges; } - void remove_range(CharacterSet *self, CharacterRange new_range) { + CharacterSet remove_range(CharacterSet *self, CharacterRange new_range) { + CharacterSet removed_set; set new_ranges; auto new_min = min_int(new_range); auto new_max = max_int(new_range); @@ -141,19 +135,23 @@ namespace tree_sitter { new_ranges.insert(range); } else if (new_max <= max_int(range)) { new_ranges.insert(CharacterRange(new_max + 1, range.max)); + add_range(&removed_set, CharacterRange(range.min, new_max)); } } else if (new_min <= max_int(range)) { if (new_max < max_int(range)) { new_ranges.insert(CharacterRange(range.min, new_min - 1)); new_ranges.insert(CharacterRange(new_max + 1, range.max)); + add_range(&removed_set, new_range); } else { new_ranges.insert(CharacterRange(range.min, new_min - 1)); + add_range(&removed_set, CharacterRange(new_min, range.max)); } } else { new_ranges.insert(range); } } self->ranges = new_ranges; + return removed_set; } void CharacterSet::add_set(const CharacterSet &other) { @@ -162,10 +160,18 @@ namespace tree_sitter { } } - void CharacterSet::remove_set(const CharacterSet &other) { + CharacterSet CharacterSet::remove_set(const CharacterSet &other) { + CharacterSet result; for (auto &other_range : other.ranges) { - remove_range(this, other_range); + auto removed_set = remove_range(this, other_range); + result.add_set(removed_set); } + return result; + } + + CharacterSet CharacterSet::intersect(const CharacterSet &set) const { + CharacterSet copy = *this; + return copy.remove_set(set); } void CharacterSet::accept(Visitor &visitor) const { diff --git a/src/compiler/rules/character_set.h b/src/compiler/rules/character_set.h index 89955369..5838e7eb 100644 --- a/src/compiler/rules/character_set.h +++ b/src/compiler/rules/character_set.h @@ -36,10 +36,11 @@ namespace tree_sitter { CharacterSet(const std::set &ranges, bool); CharacterSet complement() const; + CharacterSet intersect(const CharacterSet &) const; std::pair most_compact_representation() const; void add_set(const CharacterSet &other); - void remove_set(const CharacterSet &other); + CharacterSet remove_set(const CharacterSet &other); bool operator==(const Rule& other) const; size_t hash_code() const;