tree-sitter/src/compiler/rules/character_set.cc
2015-07-31 16:32:24 -07:00

198 lines
5.2 KiB
C++

#include "compiler/rules/character_set.h"
#include <string>
#include <utility>
#include <vector>
#include "compiler/rules/visitor.h"
namespace tree_sitter {
namespace rules {
using std::string;
using std::hash;
using std::set;
using std::vector;
static void add_range(set<uint32_t> *characters, uint32_t min, uint32_t max) {
for (uint32_t c = min; c <= max; c++)
characters->insert(c);
}
static void remove_range(set<uint32_t> *characters, uint32_t min, uint32_t max) {
for (uint32_t c = min; c <= max; c++)
characters->erase(c);
}
static set<uint32_t> remove_chars(set<uint32_t> *left,
const set<uint32_t> &right) {
set<uint32_t> result;
for (uint32_t c : right) {
if (left->erase(c))
result.insert(c);
}
return result;
}
static set<uint32_t> add_chars(set<uint32_t> *left, const set<uint32_t> &right) {
set<uint32_t> result;
for (uint32_t c : right)
if (left->insert(c).second)
result.insert(c);
return result;
}
static vector<CharacterRange> consolidate_ranges(const set<uint32_t> &chars) {
vector<CharacterRange> result;
for (uint32_t c : chars) {
size_t size = result.size();
if (size >= 2 && result[size - 2].max == (c - 2)) {
result.pop_back();
result.back().max = c;
} else if (size >= 1) {
CharacterRange &last = result.back();
if (last.min < last.max && last.max == (c - 1))
last.max = c;
else
result.push_back(CharacterRange(c));
} else {
result.push_back(CharacterRange(c));
}
}
return result;
}
CharacterSet::CharacterSet()
: includes_all(false), included_chars({}), excluded_chars({}) {}
bool CharacterSet::operator==(const Rule &rule) const {
const CharacterSet *other = dynamic_cast<const CharacterSet *>(&rule);
return other && (includes_all == other->includes_all) &&
(included_chars == other->included_chars) &&
(excluded_chars == other->excluded_chars);
}
bool CharacterSet::operator<(const CharacterSet &other) const {
if (!includes_all && other.includes_all)
return true;
if (includes_all && !other.includes_all)
return false;
if (included_chars < other.included_chars)
return true;
if (other.included_chars < included_chars)
return false;
return excluded_chars < other.excluded_chars;
}
size_t CharacterSet::hash_code() const {
size_t result = hash<bool>()(includes_all);
result ^= hash<size_t>()(included_chars.size());
for (auto &c : included_chars)
result ^= hash<uint32_t>()(c);
result <<= 1;
result ^= hash<size_t>()(excluded_chars.size());
for (auto &c : excluded_chars)
result ^= hash<uint32_t>()(c);
return result;
}
rule_ptr CharacterSet::copy() const {
return std::make_shared<CharacterSet>(*this);
}
string CharacterSet::to_string() const {
string result("(char");
if (includes_all)
result += " include_all";
if (!included_chars.empty()) {
result += " (include";
for (auto r : included_ranges())
result += string(" ") + r.to_string();
result += ")";
}
if (!excluded_chars.empty()) {
result += " (exclude";
for (auto r : excluded_ranges())
result += string(" ") + r.to_string();
result += ")";
}
return result + ")";
}
CharacterSet &CharacterSet::include_all() {
includes_all = true;
included_chars = {};
excluded_chars = { 0 };
return *this;
}
CharacterSet &CharacterSet::include(uint32_t min, uint32_t max) {
if (includes_all)
remove_range(&excluded_chars, min, max);
else
add_range(&included_chars, min, max);
return *this;
}
CharacterSet &CharacterSet::exclude(uint32_t min, uint32_t max) {
if (includes_all)
add_range(&excluded_chars, min, max);
else
remove_range(&included_chars, min, max);
return *this;
}
CharacterSet &CharacterSet::include(uint32_t c) {
return include(c, c);
}
CharacterSet &CharacterSet::exclude(uint32_t c) {
return exclude(c, c);
}
bool CharacterSet::is_empty() const {
return !includes_all && included_chars.empty();
}
void CharacterSet::add_set(const CharacterSet &other) {
for (uint32_t c : other.included_chars)
included_chars.insert(c);
}
CharacterSet CharacterSet::remove_set(const CharacterSet &other) {
CharacterSet result;
if (includes_all) {
if (other.includes_all) {
result.includes_all = true;
result.excluded_chars = excluded_chars;
included_chars = add_chars(&result.excluded_chars, other.excluded_chars);
excluded_chars = {};
includes_all = false;
} else {
result.included_chars = add_chars(&excluded_chars, other.included_chars);
}
} else {
if (other.includes_all) {
result.included_chars = included_chars;
included_chars =
remove_chars(&result.included_chars, other.excluded_chars);
} else {
result.included_chars =
remove_chars(&included_chars, other.included_chars);
}
}
return result;
}
vector<CharacterRange> CharacterSet::included_ranges() const {
return consolidate_ranges(included_chars);
}
vector<CharacterRange> CharacterSet::excluded_ranges() const {
return consolidate_ranges(excluded_chars);
}
void CharacterSet::accept(Visitor *visitor) const {
visitor->visit(this);
}
} // namespace rules
} // namespace tree_sitter