Implement character set difference

This commit is contained in:
Max Brunsfeld 2014-02-07 12:57:35 -08:00
parent b94fa3ed35
commit df3397f02c
7 changed files with 200 additions and 139 deletions

View file

@ -162,14 +162,14 @@ namespace tree_sitter {
string lex_error_call(const unordered_set<rules::CharacterSet> &expected_inputs) {
rules::CharacterSet expected_set;
for (auto &rule : expected_inputs)
expected_set.union_with(rule);
expected_set.add_set(rule);
string result = "LEX_ERROR(" + to_string(expected_set.ranges.size()) + ", EXPECT({";
bool started = false;
for (auto &ranges : expected_set.ranges) {
for (auto &range : expected_set.ranges) {
if (started) result += ", ";
started = true;
result += "\"" + escape_string(ranges.to_string()) + "\"";
result += "\"" + escape_string(range.to_string()) + "\"";
}
result += "}));";
return result;

View file

@ -3,6 +3,7 @@
using std::string;
using std::hash;
using std::set;
using std::pair;
namespace tree_sitter {
namespace rules {
@ -36,23 +37,12 @@ namespace tree_sitter {
}
}
int CharacterRange::max_int() const {
return max == MAX_CHAR ? 255 : (int)max;
int max_int(const CharacterRange &range) {
return range.max == MAX_CHAR ? 255 : (int)range.max;
}
int CharacterRange::min_int() const {
return (int)min;
}
bool CharacterRange::is_adjacent(const CharacterRange &other) const {
return
(min_int() <= other.min_int() && max_int() >= (other.min_int() - 1)) ||
(min_int() <= (other.max_int() + 1) && max_int() >= other.max_int());
}
void CharacterRange::add_range(const CharacterRange &other) {
if (other.min < min) min = other.min;
if (other.max_int() > max_int()) max = other.max;
int min_int(const CharacterRange &range) {
return (int)range.min;
}
string CharacterRange::to_string() const {
@ -101,7 +91,7 @@ namespace tree_sitter {
result.insert(CharacterRange(current_char, MAX_CHAR));
return CharacterSet(result);
}
std::pair<CharacterSet, bool> CharacterSet::most_compact_representation() const {
auto first_range = *ranges.begin();
if (first_range.min == 0 && first_range.max > 0) {
@ -113,10 +103,26 @@ namespace tree_sitter {
void add_range(CharacterSet *self, CharacterRange new_range) {
set<CharacterRange> new_ranges;
for (auto range : self->ranges) {
if (range.is_adjacent(new_range)) {
new_range.add_range(range);
} else {
auto new_min = min_int(new_range);
auto new_max = max_int(new_range);
bool is_adjacent = false;
if (min_int(range) < new_min) {
if (max_int(range) >= new_min - 1) {
is_adjacent = true;
new_range.min = range.min;
}
}
if (max_int(range) > new_max) {
if (min_int(range) <= new_max + 1) {
is_adjacent = true;
new_range.max = range.max;
}
}
if (!is_adjacent) {
new_ranges.insert(range);
}
}
@ -124,12 +130,44 @@ namespace tree_sitter {
self->ranges = new_ranges;
}
void CharacterSet::union_with(const CharacterSet &other) {
void remove_range(CharacterSet *self, CharacterRange new_range) {
set<CharacterRange> new_ranges;
auto new_min = min_int(new_range);
auto new_max = max_int(new_range);
for (auto range : self->ranges) {
if (new_min <= min_int(range)) {
if (new_max < min_int(range)) {
new_ranges.insert(range);
} else if (new_max <= max_int(range)) {
new_ranges.insert(CharacterRange(new_max + 1, range.max));
}
} else if (new_min <= max_int(range)) {
if (new_max < max_int(range)) {
new_ranges.insert(CharacterRange(range.min, new_min - 1));
new_ranges.insert(CharacterRange(new_max + 1, range.max));
} else {
new_ranges.insert(CharacterRange(range.min, new_min - 1));
}
} else {
new_ranges.insert(range);
}
}
self->ranges = new_ranges;
}
void CharacterSet::add_set(const CharacterSet &other) {
for (auto &other_range : other.ranges) {
add_range(this, other_range);
}
}
void CharacterSet::remove_set(const CharacterSet &other) {
for (auto &other_range : other.ranges) {
remove_range(this, other_range);
}
}
void CharacterSet::accept(Visitor &visitor) const {
visitor.visit(this);
}

View file

@ -9,19 +9,10 @@ namespace tree_sitter {
struct CharacterRange {
char min;
char max;
CharacterRange(char);
CharacterRange(char, char);
int max_int() const;
int min_int() const;
bool operator==(const CharacterRange &) const;
bool operator<(const CharacterRange &) const;
bool is_adjacent(const CharacterRange &) const;
void add_range(const CharacterRange &);
std::string to_string() const;
};
}
@ -45,8 +36,10 @@ namespace tree_sitter {
CharacterSet(const std::set<CharacterRange> &ranges, bool);
CharacterSet complement() const;
void union_with(const CharacterSet &other);
std::pair<CharacterSet, bool> most_compact_representation() const;
void add_set(const CharacterSet &other);
void remove_set(const CharacterSet &other);
bool operator==(const Rule& other) const;
size_t hash_code() const;

View file

@ -75,7 +75,7 @@ namespace tree_sitter {
}
CharacterSet result;
while (has_more_input() && (peek() != ']'))
result.union_with(single_char());
result.add_set(single_char());
return is_affirmative ? result : result.complement();
}