2014-02-05 18:56:04 -08:00
|
|
|
#include "rules.h"
|
|
|
|
|
|
|
|
|
|
using std::string;
|
|
|
|
|
using std::hash;
|
|
|
|
|
using std::set;
|
2014-02-07 12:57:35 -08:00
|
|
|
using std::pair;
|
2014-02-05 18:56:04 -08:00
|
|
|
|
|
|
|
|
namespace tree_sitter {
|
|
|
|
|
namespace rules {
|
2014-02-06 09:12:03 -08:00
|
|
|
const char MAX_CHAR = '\xff';
|
2014-02-05 18:56:04 -08:00
|
|
|
|
|
|
|
|
CharacterRange::CharacterRange(char value) : min(value), max(value) {}
|
2014-02-06 09:12:03 -08:00
|
|
|
CharacterRange::CharacterRange(char min, char max) :
|
|
|
|
|
min(min),
|
|
|
|
|
max(max)
|
|
|
|
|
{}
|
2014-02-05 18:56:04 -08:00
|
|
|
|
|
|
|
|
bool CharacterRange::operator==(const CharacterRange &other) const {
|
|
|
|
|
return min == other.min && max == other.max;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool CharacterRange::operator<(const CharacterRange &other) const {
|
|
|
|
|
if (min < other.min) return true;
|
|
|
|
|
if (min > other.min) return false;
|
|
|
|
|
if (max < other.max) return true;
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
string escape_character(char input) {
|
|
|
|
|
switch (input) {
|
|
|
|
|
case '\0':
|
2014-02-06 09:12:03 -08:00
|
|
|
return "<EOF>";
|
|
|
|
|
case MAX_CHAR:
|
|
|
|
|
return "<MAX>";
|
2014-02-05 18:56:04 -08:00
|
|
|
default:
|
|
|
|
|
return string() + input;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2014-02-07 12:57:35 -08:00
|
|
|
int max_int(const CharacterRange &range) {
|
|
|
|
|
return range.max == MAX_CHAR ? 255 : (int)range.max;
|
2014-02-06 09:12:03 -08:00
|
|
|
}
|
|
|
|
|
|
2014-02-07 12:57:35 -08:00
|
|
|
int min_int(const CharacterRange &range) {
|
|
|
|
|
return (int)range.min;
|
2014-02-05 18:56:04 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
string CharacterRange::to_string() const {
|
2014-02-06 09:12:03 -08:00
|
|
|
if (min == 0 && max == MAX_CHAR)
|
|
|
|
|
return "<ANY>";
|
|
|
|
|
if (min == max)
|
2014-02-05 18:56:04 -08:00
|
|
|
return escape_character(min);
|
2014-02-06 09:12:03 -08:00
|
|
|
else
|
|
|
|
|
return string() + escape_character(min) + "-" + escape_character(max);
|
2014-02-05 18:56:04 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
CharacterSet::CharacterSet() : ranges({}) {}
|
|
|
|
|
CharacterSet::CharacterSet(const set<CharacterRange> &ranges) : ranges(ranges) {}
|
|
|
|
|
CharacterSet::CharacterSet(const set<CharacterRange> &ranges, bool sign) :
|
|
|
|
|
ranges(sign ? ranges : CharacterSet(ranges).complement().ranges) {}
|
|
|
|
|
|
|
|
|
|
bool CharacterSet::operator==(const Rule &rule) const {
|
|
|
|
|
const CharacterSet *other = dynamic_cast<const CharacterSet *>(&rule);
|
|
|
|
|
return other && (ranges == other->ranges);
|
|
|
|
|
}
|
|
|
|
|
|
2014-02-10 18:38:01 -08:00
|
|
|
bool CharacterSet::operator<(const CharacterSet &other) const {
|
|
|
|
|
return ranges < other.ranges;
|
|
|
|
|
}
|
|
|
|
|
|
2014-02-05 18:56:04 -08:00
|
|
|
size_t CharacterSet::hash_code() const {
|
2014-02-15 16:12:16 -08:00
|
|
|
size_t result = std::hash<size_t>()(ranges.size());
|
|
|
|
|
for (auto &range : ranges) {
|
|
|
|
|
result ^= std::hash<char>()(range.min);
|
|
|
|
|
result ^= std::hash<char>()(range.max);
|
|
|
|
|
}
|
|
|
|
|
return result;
|
2014-02-05 18:56:04 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
rule_ptr CharacterSet::copy() const {
|
|
|
|
|
return std::make_shared<CharacterSet>(*this);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
string CharacterSet::to_string() const {
|
|
|
|
|
string result("#<char {");
|
|
|
|
|
for (auto &range : ranges)
|
|
|
|
|
result += " " + range.to_string();
|
|
|
|
|
return result + " }>";
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
CharacterSet CharacterSet::complement() const {
|
2014-02-08 15:26:10 -08:00
|
|
|
CharacterSet result({ {0, MAX_CHAR} }, true);
|
|
|
|
|
result.remove_set(*this);
|
|
|
|
|
return result;
|
2014-02-05 18:56:04 -08:00
|
|
|
}
|
2014-02-07 12:57:35 -08:00
|
|
|
|
2014-02-05 18:56:04 -08:00
|
|
|
std::pair<CharacterSet, bool> CharacterSet::most_compact_representation() const {
|
|
|
|
|
auto first_range = *ranges.begin();
|
|
|
|
|
if (first_range.min == 0 && first_range.max > 0) {
|
|
|
|
|
return { this->complement(), false };
|
|
|
|
|
} else {
|
|
|
|
|
return { *this, true };
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void add_range(CharacterSet *self, CharacterRange new_range) {
|
|
|
|
|
set<CharacterRange> new_ranges;
|
2014-02-07 12:57:35 -08:00
|
|
|
|
2014-02-05 18:56:04 -08:00
|
|
|
for (auto range : self->ranges) {
|
2014-02-07 12:57:35 -08:00
|
|
|
auto new_min = min_int(new_range);
|
|
|
|
|
auto new_max = max_int(new_range);
|
|
|
|
|
bool is_adjacent = false;
|
|
|
|
|
|
|
|
|
|
if (min_int(range) < new_min) {
|
|
|
|
|
if (max_int(range) >= new_min - 1) {
|
|
|
|
|
is_adjacent = true;
|
|
|
|
|
new_range.min = range.min;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (max_int(range) > new_max) {
|
|
|
|
|
if (min_int(range) <= new_max + 1) {
|
|
|
|
|
is_adjacent = true;
|
|
|
|
|
new_range.max = range.max;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!is_adjacent) {
|
2014-02-05 18:56:04 -08:00
|
|
|
new_ranges.insert(range);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
new_ranges.insert(new_range);
|
|
|
|
|
self->ranges = new_ranges;
|
|
|
|
|
}
|
|
|
|
|
|
2014-02-08 15:26:10 -08:00
|
|
|
CharacterSet remove_range(CharacterSet *self, CharacterRange new_range) {
|
|
|
|
|
CharacterSet removed_set;
|
2014-02-07 12:57:35 -08:00
|
|
|
set<CharacterRange> new_ranges;
|
|
|
|
|
auto new_min = min_int(new_range);
|
|
|
|
|
auto new_max = max_int(new_range);
|
|
|
|
|
|
|
|
|
|
for (auto range : self->ranges) {
|
|
|
|
|
if (new_min <= min_int(range)) {
|
|
|
|
|
if (new_max < min_int(range)) {
|
|
|
|
|
new_ranges.insert(range);
|
|
|
|
|
} else if (new_max <= max_int(range)) {
|
|
|
|
|
new_ranges.insert(CharacterRange(new_max + 1, range.max));
|
2014-02-08 15:26:10 -08:00
|
|
|
add_range(&removed_set, CharacterRange(range.min, new_max));
|
2014-02-07 12:57:35 -08:00
|
|
|
}
|
|
|
|
|
} else if (new_min <= max_int(range)) {
|
|
|
|
|
if (new_max < max_int(range)) {
|
|
|
|
|
new_ranges.insert(CharacterRange(range.min, new_min - 1));
|
|
|
|
|
new_ranges.insert(CharacterRange(new_max + 1, range.max));
|
2014-02-08 15:26:10 -08:00
|
|
|
add_range(&removed_set, new_range);
|
2014-02-07 12:57:35 -08:00
|
|
|
} else {
|
|
|
|
|
new_ranges.insert(CharacterRange(range.min, new_min - 1));
|
2014-02-08 15:26:10 -08:00
|
|
|
add_range(&removed_set, CharacterRange(new_min, range.max));
|
2014-02-07 12:57:35 -08:00
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
new_ranges.insert(range);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
self->ranges = new_ranges;
|
2014-02-08 15:26:10 -08:00
|
|
|
return removed_set;
|
2014-02-07 12:57:35 -08:00
|
|
|
}
|
|
|
|
|
|
2014-02-10 13:20:43 -08:00
|
|
|
bool CharacterSet::is_empty() const {
|
|
|
|
|
return ranges.empty();
|
|
|
|
|
}
|
|
|
|
|
|
2014-02-07 12:57:35 -08:00
|
|
|
void CharacterSet::add_set(const CharacterSet &other) {
|
2014-02-05 18:56:04 -08:00
|
|
|
for (auto &other_range : other.ranges) {
|
|
|
|
|
add_range(this, other_range);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2014-02-08 15:26:10 -08:00
|
|
|
CharacterSet CharacterSet::remove_set(const CharacterSet &other) {
|
|
|
|
|
CharacterSet result;
|
2014-02-07 12:57:35 -08:00
|
|
|
for (auto &other_range : other.ranges) {
|
2014-02-08 15:26:10 -08:00
|
|
|
auto removed_set = remove_range(this, other_range);
|
|
|
|
|
result.add_set(removed_set);
|
2014-02-07 12:57:35 -08:00
|
|
|
}
|
2014-02-08 15:26:10 -08:00
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
CharacterSet CharacterSet::intersect(const CharacterSet &set) const {
|
|
|
|
|
CharacterSet copy = *this;
|
|
|
|
|
return copy.remove_set(set);
|
2014-02-07 12:57:35 -08:00
|
|
|
}
|
|
|
|
|
|
2014-02-05 18:56:04 -08:00
|
|
|
void CharacterSet::accept(Visitor &visitor) const {
|
|
|
|
|
visitor.visit(this);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|