Allow Character rules to handle arbitrary character sets
This commit is contained in:
parent
bc1d115ee2
commit
7f62e752be
16 changed files with 322 additions and 309 deletions
|
|
@ -5,17 +5,64 @@ using std::hash;
|
|||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
Character::Character(char value) : value(CharMatchSpecific(value)) {};
|
||||
Character::Character(CharClass value) : value(CharMatchClass(value)) {};
|
||||
Character::Character(char min, char max) : value(CharMatchRange(min, max)) {};
|
||||
CharacterMatch::CharacterMatch(char character) : type(CharacterMatchTypeSpecific) { value.character = character; }
|
||||
CharacterMatch::CharacterMatch(CharacterClass klass) : type(CharacterMatchTypeClass) { value.character_class = klass; }
|
||||
CharacterMatch::CharacterMatch(std::pair<char, char> bounds) : type(CharacterMatchTypeRange) {
|
||||
value.range.min_character = bounds.first;
|
||||
value.range.max_character = bounds.second;
|
||||
}
|
||||
|
||||
Character::Character(char character) : matches({ CharacterMatch(character) }), sign(true) {}
|
||||
Character::Character(CharacterClass char_class) : matches({ CharacterMatch(char_class) }), sign(true) {}
|
||||
Character::Character(const std::vector<CharacterMatch> &matches, bool sign) : matches(matches), sign(sign) {}
|
||||
|
||||
bool CharacterMatch::operator==(const CharacterMatch &right) const {
|
||||
if (type != right.type)
|
||||
return false;
|
||||
switch (type) {
|
||||
case CharacterMatchTypeClass:
|
||||
return (value.character_class == right.value.character_class);
|
||||
case CharacterMatchTypeSpecific:
|
||||
return (value.character == right.value.character);
|
||||
case CharacterMatchTypeRange:
|
||||
return (value.range.min_character == right.value.range.min_character &&
|
||||
value.range.max_character == right.value.range.max_character);
|
||||
}
|
||||
}
|
||||
|
||||
string CharacterMatch::to_string() const {
|
||||
switch (type) {
|
||||
case CharacterMatchTypeClass:
|
||||
switch (value.character_class) {
|
||||
case CharClassDigit:
|
||||
return "<digit>";
|
||||
case CharClassWord:
|
||||
return "<word>";
|
||||
}
|
||||
case CharacterMatchTypeSpecific:
|
||||
return (value.character == '\0') ?
|
||||
"<EOF>" :
|
||||
string("'") + value.character + "'";
|
||||
case CharacterMatchTypeRange:
|
||||
return (string("'") +
|
||||
value.range.min_character + "'-'" +
|
||||
value.range.max_character + "'");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
bool Character::operator==(const Rule &rule) const {
|
||||
const Character *other = dynamic_cast<const Character *>(&rule);
|
||||
return other && (other->value == value);
|
||||
if (!other) return false;
|
||||
auto size = matches.size();
|
||||
if (other->matches.size() != size) return false;
|
||||
for (int i = 0; i < size; i++)
|
||||
if (!(matches[i] == other->matches[i])) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
size_t Character::hash_code() const {
|
||||
return typeid(this).hash_code() ^ hash<string>()(CharMatchToString(value));
|
||||
return typeid(this).hash_code() ^ hash<string>()(to_string());
|
||||
}
|
||||
|
||||
rule_ptr Character::copy() const {
|
||||
|
|
@ -23,7 +70,10 @@ namespace tree_sitter {
|
|||
}
|
||||
|
||||
string Character::to_string() const {
|
||||
return string("#<char ") + CharMatchToString(value) + ">";
|
||||
string prefix("#<char");
|
||||
for (auto &match : matches)
|
||||
prefix += " " + match.to_string();
|
||||
return prefix + ">";
|
||||
}
|
||||
|
||||
void Character::accept(Visitor &visitor) const {
|
||||
|
|
|
|||
|
|
@ -2,15 +2,46 @@
|
|||
#define __tree_sitter__char__
|
||||
|
||||
#include "rule.h"
|
||||
#include "char_match.h"
|
||||
#include <vector>
|
||||
#include <unordered_set>
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
typedef enum {
|
||||
CharClassWord,
|
||||
CharClassDigit
|
||||
} CharacterClass;
|
||||
|
||||
typedef enum {
|
||||
CharacterMatchTypeSpecific,
|
||||
CharacterMatchTypeClass,
|
||||
CharacterMatchTypeRange,
|
||||
} CharacterMatchType;
|
||||
|
||||
struct CharacterMatch {
|
||||
CharacterMatchType type;
|
||||
union {
|
||||
CharacterClass character_class;
|
||||
char character;
|
||||
struct {
|
||||
char min_character;
|
||||
char max_character;
|
||||
} range;
|
||||
} value;
|
||||
|
||||
CharacterMatch(char);
|
||||
CharacterMatch(std::pair<char, char>);
|
||||
CharacterMatch(CharacterClass);
|
||||
bool operator==(const CharacterMatch &) const;
|
||||
std::string to_string() const;
|
||||
};
|
||||
|
||||
class Character : public Rule {
|
||||
public:
|
||||
Character(char character);
|
||||
Character(CharClass character_class);
|
||||
Character(CharacterClass character_class);
|
||||
Character(char min_character, char max_character);
|
||||
Character(const std::vector<CharacterMatch> &matches, bool sign);
|
||||
|
||||
bool operator==(const Rule& other) const;
|
||||
size_t hash_code() const;
|
||||
|
|
@ -18,9 +49,35 @@ namespace tree_sitter {
|
|||
std::string to_string() const;
|
||||
void accept(Visitor &visitor) const;
|
||||
|
||||
const CharMatch value;
|
||||
std::vector<CharacterMatch> matches;
|
||||
bool sign;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
namespace std {
|
||||
template<>
|
||||
struct hash<tree_sitter::rules::CharacterMatch> {
|
||||
size_t operator()(const tree_sitter::rules::CharacterMatch &match) const {
|
||||
auto type = match.type;
|
||||
auto result = hash<short int>()(type);
|
||||
switch (type) {
|
||||
case tree_sitter::rules::CharacterMatchTypeClass:
|
||||
result ^= hash<short int>()(match.value.character_class);
|
||||
case tree_sitter::rules::CharacterMatchTypeRange:
|
||||
result ^= hash<char>()(match.value.range.min_character);
|
||||
result ^= hash<char>()(match.value.range.max_character);
|
||||
case tree_sitter::rules::CharacterMatchTypeSpecific:
|
||||
result ^= hash<char>()(match.value.character);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
namespace std {
|
||||
template<>
|
||||
struct hash<tree_sitter::rules::Character> : hash<tree_sitter::rules::Rule> {};
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -14,9 +14,13 @@ namespace tree_sitter {
|
|||
return make_shared<Character>(value);
|
||||
}
|
||||
|
||||
rule_ptr character(CharClass value) {
|
||||
rule_ptr character(CharacterClass value) {
|
||||
return make_shared<Character>(value);
|
||||
}
|
||||
|
||||
rule_ptr character(const std::vector<CharacterMatch> &matches, bool is_affirmative) {
|
||||
return make_shared<Character>(matches, is_affirmative);
|
||||
}
|
||||
|
||||
rule_ptr choice(const initializer_list<rule_ptr> &rules) {
|
||||
rule_ptr result;
|
||||
|
|
|
|||
|
|
@ -16,8 +16,10 @@ namespace tree_sitter {
|
|||
namespace rules {
|
||||
rule_ptr blank();
|
||||
rule_ptr character(char value);
|
||||
rule_ptr character(char min, char max);
|
||||
rule_ptr character(CharClass value);
|
||||
rule_ptr character(CharacterClass value);
|
||||
rule_ptr character(const std::vector<CharacterMatch> &matches);
|
||||
rule_ptr character(const std::vector<CharacterMatch> &matches, bool);
|
||||
|
||||
rule_ptr choice(const std::initializer_list<rule_ptr> &rules);
|
||||
rule_ptr pattern(const std::string &value);
|
||||
rule_ptr repeat(const rule_ptr content);
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
#include "rules.h"
|
||||
#include <unordered_map>
|
||||
|
||||
using std::string;
|
||||
using std::hash;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue