Represent character sets with unsigned chars

This is better for comparing character ranges, since
there is a definite maximum character value.
This commit is contained in:
Max Brunsfeld 2014-03-31 18:47:18 -07:00
parent 7824b3191b
commit 2a222adb7e
4 changed files with 35 additions and 53 deletions

View file

@ -6,7 +6,7 @@ using namespace rules;
START_TEST
describe("character sets", []() {
char max_char = 255;
unsigned char max_char = 255;
describe("computing the complement", [&]() {
it("works for the set containing only the null character", [&]() {

View file

@ -6,10 +6,10 @@ namespace tree_sitter {
using std::string;
namespace rules {
static const char MAX_CHAR = '\xff';
static const unsigned char MAX_CHAR = -1;
CharacterRange::CharacterRange(char value) : min(value), max(value) {}
CharacterRange::CharacterRange(char min, char max) : min(min), max(max) {}
CharacterRange::CharacterRange(unsigned char value) : min(value), max(value) {}
CharacterRange::CharacterRange(unsigned char min, unsigned char max) : min(min), max(max) {}
bool CharacterRange::operator==(const CharacterRange &other) const {
return min == other.min && max == other.max;
@ -22,14 +22,14 @@ namespace tree_sitter {
return false;
}
string escape_character(char input) {
string escape_character(unsigned char input) {
switch (input) {
case '\0':
return "<EOF>";
case MAX_CHAR:
return "<MAX>";
default:
return string() + input;
return string() + char(input);
}
}

View file

@ -7,11 +7,13 @@
namespace tree_sitter {
namespace rules {
struct CharacterRange {
char min;
char max;
unsigned char min;
unsigned char max;
// IMPLICIT_CONSTRUCTORS
CharacterRange(char value);
CharacterRange(char min, char max);
CharacterRange(unsigned char value);
CharacterRange(unsigned char min, unsigned char max);
bool operator==(const CharacterRange &other) const;
bool operator<(const CharacterRange &others) const;
std::string to_string() const;
@ -23,7 +25,7 @@ namespace std {
template<>
struct hash<tree_sitter::rules::CharacterRange> {
size_t operator()(const tree_sitter::rules::CharacterRange &range) const {
return (hash<char>()(range.min) ^ hash<char>()(range.max));
return (hash<unsigned char>()(range.min) ^ hash<unsigned char>()(range.max));
}
};
}

View file

@ -11,15 +11,7 @@ using std::initializer_list;
namespace tree_sitter {
namespace rules {
static const char MAX_CHAR = '\xff';
int max_int(const CharacterRange &range) {
return range.max == MAX_CHAR ? 255 : static_cast<int>(range.max);
}
int min_int(const CharacterRange &range) {
return static_cast<int>(range.min);
}
static const unsigned char MAX_CHAR = -1;
CharacterSet::CharacterSet() : ranges({}) {}
CharacterSet::CharacterSet(const set<CharacterRange> &ranges) : ranges(ranges) {}
@ -37,8 +29,8 @@ namespace tree_sitter {
size_t CharacterSet::hash_code() const {
size_t result = std::hash<size_t>()(ranges.size());
for (auto &range : ranges) {
result ^= std::hash<char>()(range.min);
result ^= std::hash<char>()(range.max);
result ^= std::hash<unsigned char>()(range.min);
result ^= std::hash<unsigned char>()(range.max);
}
return result;
}
@ -69,59 +61,47 @@ namespace tree_sitter {
}
}
void add_range(CharacterSet *self, CharacterRange new_range) {
void add_range(CharacterSet *self, CharacterRange addition) {
set<CharacterRange> new_ranges;
for (auto range : self->ranges) {
auto new_min = min_int(new_range);
auto new_max = max_int(new_range);
bool is_adjacent = false;
if (min_int(range) < new_min) {
if (max_int(range) >= new_min - 1) {
is_adjacent = true;
new_range.min = range.min;
}
if (range.min < addition.min && range.max >= addition.min - 1) {
is_adjacent = true;
addition.min = range.min;
}
if (max_int(range) > new_max) {
if (min_int(range) <= new_max + 1) {
is_adjacent = true;
new_range.max = range.max;
}
if (range.max > addition.max && range.min <= addition.max + 1) {
is_adjacent = true;
addition.max = range.max;
}
if (!is_adjacent) {
new_ranges.insert(range);
}
}
new_ranges.insert(new_range);
new_ranges.insert(addition);
self->ranges = new_ranges;
}
CharacterSet remove_range(CharacterSet *self, CharacterRange range_to_remove) {
CharacterSet removed_set;
set<CharacterRange> new_ranges;
auto min_to_remove = min_int(range_to_remove);
auto max_to_remove = max_int(range_to_remove);
for (auto range : self->ranges) {
if (min_to_remove <= min_int(range)) {
if (max_to_remove < min_int(range)) {
if (range_to_remove.min <= range.min) {
if (range_to_remove.max < range.min) {
new_ranges.insert(range);
} else if (max_to_remove < max_int(range)) {
new_ranges.insert(CharacterRange(max_to_remove + 1, range.max));
add_range(&removed_set, CharacterRange(range.min, max_to_remove));
} else if (range_to_remove.max < range.max) {
new_ranges.insert(CharacterRange(range_to_remove.max + 1, range.max));
add_range(&removed_set, CharacterRange(range.min, range_to_remove.max));
} else {
add_range(&removed_set, range);
}
} else if (min_to_remove <= max_int(range)) {
if (max_to_remove < max_int(range)) {
new_ranges.insert(CharacterRange(range.min, min_to_remove - 1));
new_ranges.insert(CharacterRange(max_to_remove + 1, range.max));
} else if (range_to_remove.min <= range.max) {
if (range_to_remove.max < range.max) {
new_ranges.insert(CharacterRange(range.min, range_to_remove.min - 1));
new_ranges.insert(CharacterRange(range_to_remove.max + 1, range.max));
add_range(&removed_set, range_to_remove);
} else {
new_ranges.insert(CharacterRange(range.min, min_to_remove - 1));
add_range(&removed_set, CharacterRange(min_to_remove, range.max));
new_ranges.insert(CharacterRange(range.min, range_to_remove.min - 1));
add_range(&removed_set, CharacterRange(range_to_remove.min, range.max));
}
} else {
new_ranges.insert(range);