#include "compiler/rules/character_set.h" using std::set; using std::vector; namespace tree_sitter { namespace rules { static void add_range(set *characters, uint32_t min, uint32_t max) { for (uint32_t c = min; c <= max; c++) { characters->insert(c); } } static void remove_range(set *characters, uint32_t min, uint32_t max) { for (uint32_t c = min; c <= max; c++) { characters->erase(c); } } static set remove_chars(set *left, const set &right) { set result; for (uint32_t c : right) { if (left->erase(c)) { result.insert(c); } } return result; } static set add_chars(set *left, const set &right) { set result; for (uint32_t c : right) { if (left->insert(c).second) { result.insert(c); } } return result; } static vector consolidate_ranges(const set &chars) { vector result; for (uint32_t c : chars) { auto size = result.size(); if (size >= 2 && result[size - 2].max == (c - 2)) { result.pop_back(); result.back().max = c; } else if (size >= 1) { CharacterRange &last = result.back(); if (last.min < last.max && last.max == (c - 1)) { last.max = c; } else { result.push_back(CharacterRange(c)); } } else { result.push_back(CharacterRange(c)); } } return result; } CharacterSet::CharacterSet() : includes_all(false) {} CharacterSet::CharacterSet(const set &chars) : included_chars(chars), includes_all(false) {} bool CharacterSet::operator==(const CharacterSet &other) const { return includes_all == other.includes_all && included_chars == other.included_chars && excluded_chars == other.excluded_chars; } bool CharacterSet::operator<(const CharacterSet &other) const { if (!includes_all && other.includes_all) return true; if (includes_all && !other.includes_all) return false; if (included_chars < other.included_chars) return true; if (other.included_chars < included_chars) return false; return excluded_chars < other.excluded_chars; } CharacterSet &CharacterSet::include_all() { includes_all = true; included_chars = {}; excluded_chars = { 0 }; return *this; } CharacterSet &CharacterSet::include(uint32_t min, uint32_t max) { if (includes_all) remove_range(&excluded_chars, min, max); else add_range(&included_chars, min, max); return *this; } CharacterSet &CharacterSet::exclude(uint32_t min, uint32_t max) { if (includes_all) add_range(&excluded_chars, min, max); else remove_range(&included_chars, min, max); return *this; } CharacterSet &CharacterSet::include(uint32_t c) { return include(c, c); } CharacterSet &CharacterSet::exclude(uint32_t c) { return exclude(c, c); } bool CharacterSet::is_empty() const { return !includes_all && included_chars.empty(); } void CharacterSet::add_set(const CharacterSet &other) { if (includes_all) { if (other.includes_all) { excluded_chars = remove_chars(&excluded_chars, other.excluded_chars); } else { remove_chars(&excluded_chars, other.included_chars); } } else { if (other.includes_all) { includes_all = true; for (uint32_t c : other.excluded_chars) if (!included_chars.count(c)) excluded_chars.insert(c); included_chars.clear(); } else { for (uint32_t c : other.included_chars) included_chars.insert(c); } } } CharacterSet CharacterSet::remove_set(const CharacterSet &other) { CharacterSet result; if (includes_all) { if (other.includes_all) { result.includes_all = true; result.excluded_chars = excluded_chars; included_chars = add_chars(&result.excluded_chars, other.excluded_chars); excluded_chars = {}; includes_all = false; } else { result.included_chars = add_chars(&excluded_chars, other.included_chars); } } else { if (other.includes_all) { result.included_chars = included_chars; included_chars = remove_chars(&result.included_chars, other.excluded_chars); } else { result.included_chars = remove_chars(&included_chars, other.included_chars); } } return result; } bool CharacterSet::intersects(const CharacterSet &other) const { CharacterSet copy(*this); return !copy.remove_set(other).is_empty(); } vector CharacterSet::included_ranges() const { return consolidate_ranges(included_chars); } vector CharacterSet::excluded_ranges() const { return consolidate_ranges(excluded_chars); } } // namespace rules } // namespace tree_sitter