From 2a222adb7e7549f2a38a729e716a9b8d4f4ea1e0 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 31 Mar 2014 18:47:18 -0700 Subject: [PATCH] Represent character sets with unsigned chars This is better for comparing character ranges, since there is a definite maximum character value. --- spec/compiler/rules/character_set_spec.cc | 2 +- src/compiler/rules/character_range.cc | 10 ++-- src/compiler/rules/character_range.h | 12 +++-- src/compiler/rules/character_set.cc | 64 ++++++++--------------- 4 files changed, 35 insertions(+), 53 deletions(-) diff --git a/spec/compiler/rules/character_set_spec.cc b/spec/compiler/rules/character_set_spec.cc index 1306e131..911d9e41 100644 --- a/spec/compiler/rules/character_set_spec.cc +++ b/spec/compiler/rules/character_set_spec.cc @@ -6,7 +6,7 @@ using namespace rules; START_TEST describe("character sets", []() { - char max_char = 255; + unsigned char max_char = 255; describe("computing the complement", [&]() { it("works for the set containing only the null character", [&]() { diff --git a/src/compiler/rules/character_range.cc b/src/compiler/rules/character_range.cc index 96e9d29f..f964a647 100644 --- a/src/compiler/rules/character_range.cc +++ b/src/compiler/rules/character_range.cc @@ -6,10 +6,10 @@ namespace tree_sitter { using std::string; namespace rules { - static const char MAX_CHAR = '\xff'; + static const unsigned char MAX_CHAR = -1; - CharacterRange::CharacterRange(char value) : min(value), max(value) {} - CharacterRange::CharacterRange(char min, char max) : min(min), max(max) {} + CharacterRange::CharacterRange(unsigned char value) : min(value), max(value) {} + CharacterRange::CharacterRange(unsigned char min, unsigned char max) : min(min), max(max) {} bool CharacterRange::operator==(const CharacterRange &other) const { return min == other.min && max == other.max; @@ -22,14 +22,14 @@ namespace tree_sitter { return false; } - string escape_character(char input) { + string escape_character(unsigned char input) { switch (input) { case '\0': return ""; case MAX_CHAR: return ""; default: - return string() + input; + return string() + char(input); } } diff --git a/src/compiler/rules/character_range.h b/src/compiler/rules/character_range.h index 22d9126e..b27d312e 100644 --- a/src/compiler/rules/character_range.h +++ b/src/compiler/rules/character_range.h @@ -7,11 +7,13 @@ namespace tree_sitter { namespace rules { struct CharacterRange { - char min; - char max; + unsigned char min; + unsigned char max; + // IMPLICIT_CONSTRUCTORS - CharacterRange(char value); - CharacterRange(char min, char max); + CharacterRange(unsigned char value); + CharacterRange(unsigned char min, unsigned char max); + bool operator==(const CharacterRange &other) const; bool operator<(const CharacterRange &others) const; std::string to_string() const; @@ -23,7 +25,7 @@ namespace std { template<> struct hash { size_t operator()(const tree_sitter::rules::CharacterRange &range) const { - return (hash()(range.min) ^ hash()(range.max)); + return (hash()(range.min) ^ hash()(range.max)); } }; } diff --git a/src/compiler/rules/character_set.cc b/src/compiler/rules/character_set.cc index 1558b671..ef0c0df4 100644 --- a/src/compiler/rules/character_set.cc +++ b/src/compiler/rules/character_set.cc @@ -11,15 +11,7 @@ using std::initializer_list; namespace tree_sitter { namespace rules { - static const char MAX_CHAR = '\xff'; - - int max_int(const CharacterRange &range) { - return range.max == MAX_CHAR ? 255 : static_cast(range.max); - } - - int min_int(const CharacterRange &range) { - return static_cast(range.min); - } + static const unsigned char MAX_CHAR = -1; CharacterSet::CharacterSet() : ranges({}) {} CharacterSet::CharacterSet(const set &ranges) : ranges(ranges) {} @@ -37,8 +29,8 @@ namespace tree_sitter { size_t CharacterSet::hash_code() const { size_t result = std::hash()(ranges.size()); for (auto &range : ranges) { - result ^= std::hash()(range.min); - result ^= std::hash()(range.max); + result ^= std::hash()(range.min); + result ^= std::hash()(range.max); } return result; } @@ -69,59 +61,47 @@ namespace tree_sitter { } } - void add_range(CharacterSet *self, CharacterRange new_range) { + void add_range(CharacterSet *self, CharacterRange addition) { set new_ranges; - for (auto range : self->ranges) { - auto new_min = min_int(new_range); - auto new_max = max_int(new_range); bool is_adjacent = false; - - if (min_int(range) < new_min) { - if (max_int(range) >= new_min - 1) { - is_adjacent = true; - new_range.min = range.min; - } + if (range.min < addition.min && range.max >= addition.min - 1) { + is_adjacent = true; + addition.min = range.min; } - if (max_int(range) > new_max) { - if (min_int(range) <= new_max + 1) { - is_adjacent = true; - new_range.max = range.max; - } + if (range.max > addition.max && range.min <= addition.max + 1) { + is_adjacent = true; + addition.max = range.max; } - if (!is_adjacent) { new_ranges.insert(range); } } - new_ranges.insert(new_range); + new_ranges.insert(addition); self->ranges = new_ranges; } CharacterSet remove_range(CharacterSet *self, CharacterRange range_to_remove) { CharacterSet removed_set; set new_ranges; - auto min_to_remove = min_int(range_to_remove); - auto max_to_remove = max_int(range_to_remove); - for (auto range : self->ranges) { - if (min_to_remove <= min_int(range)) { - if (max_to_remove < min_int(range)) { + if (range_to_remove.min <= range.min) { + if (range_to_remove.max < range.min) { new_ranges.insert(range); - } else if (max_to_remove < max_int(range)) { - new_ranges.insert(CharacterRange(max_to_remove + 1, range.max)); - add_range(&removed_set, CharacterRange(range.min, max_to_remove)); + } else if (range_to_remove.max < range.max) { + new_ranges.insert(CharacterRange(range_to_remove.max + 1, range.max)); + add_range(&removed_set, CharacterRange(range.min, range_to_remove.max)); } else { add_range(&removed_set, range); } - } else if (min_to_remove <= max_int(range)) { - if (max_to_remove < max_int(range)) { - new_ranges.insert(CharacterRange(range.min, min_to_remove - 1)); - new_ranges.insert(CharacterRange(max_to_remove + 1, range.max)); + } else if (range_to_remove.min <= range.max) { + if (range_to_remove.max < range.max) { + new_ranges.insert(CharacterRange(range.min, range_to_remove.min - 1)); + new_ranges.insert(CharacterRange(range_to_remove.max + 1, range.max)); add_range(&removed_set, range_to_remove); } else { - new_ranges.insert(CharacterRange(range.min, min_to_remove - 1)); - add_range(&removed_set, CharacterRange(min_to_remove, range.max)); + new_ranges.insert(CharacterRange(range.min, range_to_remove.min - 1)); + add_range(&removed_set, CharacterRange(range_to_remove.min, range.max)); } } else { new_ranges.insert(range);