179 lines
4.6 KiB
C++
179 lines
4.6 KiB
C++
#include "compiler/rules/character_set.h"
|
|
|
|
using std::set;
|
|
using std::vector;
|
|
|
|
namespace tree_sitter {
|
|
namespace rules {
|
|
|
|
static void add_range(set<uint32_t> *characters, uint32_t min, uint32_t max) {
|
|
for (uint32_t c = min; c <= max; c++) {
|
|
characters->insert(c);
|
|
}
|
|
}
|
|
|
|
static void remove_range(set<uint32_t> *characters, uint32_t min, uint32_t max) {
|
|
for (uint32_t c = min; c <= max; c++) {
|
|
characters->erase(c);
|
|
}
|
|
}
|
|
|
|
static set<uint32_t> remove_chars(set<uint32_t> *left, const set<uint32_t> &right) {
|
|
set<uint32_t> result;
|
|
for (uint32_t c : right) {
|
|
if (left->erase(c)) {
|
|
result.insert(c);
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
static set<uint32_t> add_chars(set<uint32_t> *left, const set<uint32_t> &right) {
|
|
set<uint32_t> result;
|
|
for (uint32_t c : right) {
|
|
if (left->insert(c).second) {
|
|
result.insert(c);
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
static vector<CharacterRange> consolidate_ranges(const set<uint32_t> &chars) {
|
|
vector<CharacterRange> result;
|
|
for (uint32_t c : chars) {
|
|
auto size = result.size();
|
|
if (size >= 2 && result[size - 2].max == (c - 2)) {
|
|
result.pop_back();
|
|
result.back().max = c;
|
|
} else if (size >= 1) {
|
|
CharacterRange &last = result.back();
|
|
if (last.min < last.max && last.max == (c - 1)) {
|
|
last.max = c;
|
|
} else {
|
|
result.push_back(CharacterRange(c));
|
|
}
|
|
} else {
|
|
result.push_back(CharacterRange(c));
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
CharacterSet::CharacterSet() : includes_all(false) {}
|
|
|
|
CharacterSet::CharacterSet(const set<uint32_t> &chars) : included_chars(chars), includes_all(false) {}
|
|
|
|
bool CharacterSet::operator==(const CharacterSet &other) const {
|
|
return includes_all == other.includes_all &&
|
|
included_chars == other.included_chars &&
|
|
excluded_chars == other.excluded_chars;
|
|
}
|
|
|
|
bool CharacterSet::operator<(const CharacterSet &other) const {
|
|
if (!includes_all && other.includes_all)
|
|
return true;
|
|
if (includes_all && !other.includes_all)
|
|
return false;
|
|
if (included_chars < other.included_chars)
|
|
return true;
|
|
if (other.included_chars < included_chars)
|
|
return false;
|
|
return excluded_chars < other.excluded_chars;
|
|
}
|
|
|
|
CharacterSet &CharacterSet::include_all() {
|
|
includes_all = true;
|
|
included_chars = {};
|
|
excluded_chars = { 0 };
|
|
return *this;
|
|
}
|
|
|
|
CharacterSet &CharacterSet::include(uint32_t min, uint32_t max) {
|
|
if (includes_all)
|
|
remove_range(&excluded_chars, min, max);
|
|
else
|
|
add_range(&included_chars, min, max);
|
|
return *this;
|
|
}
|
|
|
|
CharacterSet &CharacterSet::exclude(uint32_t min, uint32_t max) {
|
|
if (includes_all)
|
|
add_range(&excluded_chars, min, max);
|
|
else
|
|
remove_range(&included_chars, min, max);
|
|
return *this;
|
|
}
|
|
|
|
CharacterSet &CharacterSet::include(uint32_t c) {
|
|
return include(c, c);
|
|
}
|
|
|
|
CharacterSet &CharacterSet::exclude(uint32_t c) {
|
|
return exclude(c, c);
|
|
}
|
|
|
|
bool CharacterSet::is_empty() const {
|
|
return !includes_all && included_chars.empty();
|
|
}
|
|
|
|
void CharacterSet::add_set(const CharacterSet &other) {
|
|
if (includes_all) {
|
|
if (other.includes_all) {
|
|
excluded_chars = remove_chars(&excluded_chars, other.excluded_chars);
|
|
} else {
|
|
remove_chars(&excluded_chars, other.included_chars);
|
|
}
|
|
} else {
|
|
if (other.includes_all) {
|
|
includes_all = true;
|
|
for (uint32_t c : other.excluded_chars)
|
|
if (!included_chars.count(c))
|
|
excluded_chars.insert(c);
|
|
included_chars.clear();
|
|
} else {
|
|
for (uint32_t c : other.included_chars)
|
|
included_chars.insert(c);
|
|
}
|
|
}
|
|
}
|
|
|
|
CharacterSet CharacterSet::remove_set(const CharacterSet &other) {
|
|
CharacterSet result;
|
|
if (includes_all) {
|
|
if (other.includes_all) {
|
|
result.includes_all = true;
|
|
result.excluded_chars = excluded_chars;
|
|
included_chars = add_chars(&result.excluded_chars, other.excluded_chars);
|
|
excluded_chars = {};
|
|
includes_all = false;
|
|
} else {
|
|
result.included_chars = add_chars(&excluded_chars, other.included_chars);
|
|
}
|
|
} else {
|
|
if (other.includes_all) {
|
|
result.included_chars = included_chars;
|
|
included_chars =
|
|
remove_chars(&result.included_chars, other.excluded_chars);
|
|
} else {
|
|
result.included_chars =
|
|
remove_chars(&included_chars, other.included_chars);
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
bool CharacterSet::intersects(const CharacterSet &other) const {
|
|
CharacterSet copy(*this);
|
|
return !copy.remove_set(other).is_empty();
|
|
}
|
|
|
|
vector<CharacterRange> CharacterSet::included_ranges() const {
|
|
return consolidate_ranges(included_chars);
|
|
}
|
|
|
|
vector<CharacterRange> CharacterSet::excluded_ranges() const {
|
|
return consolidate_ranges(excluded_chars);
|
|
}
|
|
|
|
} // namespace rules
|
|
} // namespace tree_sitter
|