Rename Character -> CharacterSet, CharacterMatch -> CharacterRange

This commit is contained in:
Max Brunsfeld 2014-02-03 13:05:51 -08:00
parent 716a4a4259
commit 8cce11a52a
16 changed files with 98 additions and 95 deletions

View file

@ -16,8 +16,8 @@ static unordered_set<Symbol> keys(const unordered_map<Symbol, parse_actions> &ma
return result;
}
static unordered_set<Character> keys(const unordered_map<Character, lex_actions> &map) {
unordered_set<Character> result;
static unordered_set<CharacterSet> keys(const unordered_map<CharacterSet, lex_actions> &map) {
unordered_set<CharacterSet> result;
for (auto pair : map) {
result.insert(pair.first);
}
@ -79,16 +79,16 @@ describe("building parse and lex tables", []() {
Symbol("left-paren"),
})));
AssertThat(keys(lex_state(0).actions), Equals(unordered_set<Character>({
Character('('),
Character(CharClassDigit),
Character(CharClassWord),
AssertThat(keys(lex_state(0).actions), Equals(unordered_set<CharacterSet>({
CharacterSet('('),
CharacterSet(CharClassDigit),
CharacterSet(CharClassWord),
})));
AssertThat(lex_state(0).expected_inputs(), Equals(unordered_set<Character>({
Character('('),
Character(CharClassDigit),
Character(CharClassWord),
AssertThat(lex_state(0).expected_inputs(), Equals(unordered_set<CharacterSet>({
CharacterSet('('),
CharacterSet(CharClassDigit),
CharacterSet(CharClassWord),
})));
});

View file

@ -59,7 +59,7 @@ describe("parsing pattern rules", []() {
Pattern rule("[12a-dA-D3]");
AssertThat(
rule.to_rule_tree(),
EqualsPointer(character({ '1', '2', CharacterMatch({'a', 'd'}), CharacterMatch({ 'A', 'D' }), '3' }, true)));
EqualsPointer(character({ '1', '2', CharacterRange({'a', 'd'}), CharacterRange({ 'A', 'D' }), '3' }, true)));
});
it("parses negated characters", []() {

View file

@ -7,6 +7,8 @@ using std::make_shared;
using std::shared_ptr;
namespace tree_sitter {
using rules::CharacterSet;
namespace build_tables {
template<typename T>
static std::set<T> merge_sets(const std::set<T> &left, const std::set<T> &right) {
@ -15,12 +17,12 @@ namespace tree_sitter {
return result;
}
transition_map<rules::Character, LexItemSet> char_transitions(const LexItemSet &item_set, const Grammar &grammar) {
transition_map<rules::Character, LexItemSet> result;
transition_map<CharacterSet, LexItemSet> char_transitions(const LexItemSet &item_set, const Grammar &grammar) {
transition_map<CharacterSet, LexItemSet> result;
for (LexItem item : item_set) {
transition_map<rules::Character, LexItemSet> item_transitions;
transition_map<CharacterSet, LexItemSet> item_transitions;
for (auto transition : rule_transitions(item.rule)) {
auto rule = dynamic_pointer_cast<const rules::Character>(transition.first);
auto rule = dynamic_pointer_cast<const CharacterSet>(transition.first);
if (rule.get()) {
auto new_item = LexItem(item.lhs, transition.second);
auto new_item_set = LexItemSet({ new_item });

View file

@ -8,7 +8,7 @@
namespace tree_sitter {
namespace build_tables {
transition_map<rules::Character, LexItemSet> char_transitions(const LexItemSet &item_set, const Grammar &grammar);
transition_map<rules::CharacterSet, LexItemSet> char_transitions(const LexItemSet &item_set, const Grammar &grammar);
transition_map<rules::Symbol, ParseItemSet> sym_transitions(const ParseItemSet &item_set, const Grammar &grammar);
}
}

View file

@ -46,7 +46,7 @@ namespace tree_sitter {
void add_advance_actions(const LexItemSet &item_set, size_t state_index) {
for (auto transition : char_transitions(item_set, grammar)) {
rules::Character rule = *transition.first;
rules::CharacterSet rule = *transition.first;
LexItemSet item_set = *transition.second;
size_t new_state_index = add_lex_state(item_set);
lex_table.add_action(state_index, rule, LexAction::Advance(new_state_index));

View file

@ -13,7 +13,7 @@ namespace tree_sitter {
public:
transition_map<Rule, Rule> value;
void visit(const Character *rule) {
void visit(const CharacterSet *rule) {
value = transition_map<Rule, Rule>({{ rule->copy(), blank() }});
}

View file

@ -101,28 +101,28 @@ namespace tree_sitter {
}
}
string condition_for_character_match(const rules::CharacterMatch &match) {
string condition_for_character_match(const rules::CharacterRange &match) {
string lookahead("LOOKAHEAD_CHAR()");
auto value = match.value;
switch (match.type) {
case rules::CharacterMatchTypeClass:
case rules::CharacterRangeTypeClass:
switch (value.character_class) {
case rules::CharClassDigit:
return string("isdigit(") + lookahead + ")";
case rules::CharClassWord:
return string("isalnum(") + lookahead + ")";
}
case rules::CharacterMatchTypeSpecific:
case rules::CharacterRangeTypeSpecific:
return lookahead + " == '" + character_code(value.character) + "'";
case rules::CharacterMatchTypeRange:
case rules::CharacterRangeTypeRange:
return string("'") + value.range.min_character + string("' <= ") + lookahead +
" && " + lookahead + " <= '" + value.range.max_character + "'";
}
}
string condition_for_character_rule(const rules::Character &rule) {
string condition_for_character_rule(const rules::CharacterSet &rule) {
vector<string> parts;
for (auto &match : rule.matches) {
for (auto &match : rule.ranges) {
parts.push_back("(" + condition_for_character_match(match) + ")");
}
string result = join(parts, " ||\n ");
@ -176,10 +176,10 @@ namespace tree_sitter {
return input;
}
string lex_error_call(const unordered_set<rules::Character> &expected_inputs) {
unordered_set<rules::CharacterMatch> expected_matches;
string lex_error_call(const unordered_set<rules::CharacterSet> &expected_inputs) {
unordered_set<rules::CharacterRange> expected_matches;
for (auto &rule : expected_inputs)
for (auto &match : rule.matches)
for (auto &match : rule.ranges)
expected_matches.insert(match);
string result = "LEX_ERROR(" + to_string(expected_matches.size()) + ", EXPECT({";
@ -193,7 +193,7 @@ namespace tree_sitter {
return result;
}
string code_for_lex_actions(const unordered_set<LexAction> &actions, const unordered_set<rules::Character> &expected_inputs) {
string code_for_lex_actions(const unordered_set<LexAction> &actions, const unordered_set<rules::CharacterSet> &expected_inputs) {
auto action = actions.begin();
if (action == actions.end()) {
return lex_error_call(expected_inputs);

View file

@ -5,6 +5,7 @@ using std::to_string;
using std::unordered_map;
using std::unordered_set;
using tree_sitter::rules::Symbol;
using tree_sitter::rules::CharacterSet;
namespace tree_sitter {
// Action
@ -44,8 +45,8 @@ namespace tree_sitter {
}
// State
unordered_set<rules::Character> LexState::expected_inputs() const {
unordered_set<rules::Character> result;
unordered_set<CharacterSet> LexState::expected_inputs() const {
unordered_set<CharacterSet> result;
for (auto pair : actions)
result.insert(pair.first);
return result;
@ -57,7 +58,7 @@ namespace tree_sitter {
return states.size() - 1;
}
void LexTable::add_action(size_t state_index, rules::Character match, LexAction action) {
void LexTable::add_action(size_t state_index, CharacterSet match, LexAction action) {
states[state_index].actions[match].insert(action);
}

View file

@ -45,15 +45,15 @@ namespace std {
namespace tree_sitter {
class LexState {
public:
std::unordered_map<rules::Character, std::unordered_set<LexAction>> actions;
std::unordered_map<rules::CharacterSet, std::unordered_set<LexAction>> actions;
std::unordered_set<LexAction> default_actions;
std::unordered_set<rules::Character> expected_inputs() const;
std::unordered_set<rules::CharacterSet> expected_inputs() const;
};
class LexTable {
public:
size_t add_state();
void add_action(size_t state_index, rules::Character rule, LexAction action);
void add_action(size_t state_index, rules::CharacterSet rule, LexAction action);
void add_default_action(size_t state_index, LexAction action);
std::vector<LexState> states;

View file

@ -5,79 +5,79 @@ using std::hash;
namespace tree_sitter {
namespace rules {
CharacterMatch::CharacterMatch(char character) : type(CharacterMatchTypeSpecific) { value.character = character; }
CharacterMatch::CharacterMatch(CharacterClass klass) : type(CharacterMatchTypeClass) { value.character_class = klass; }
CharacterMatch::CharacterMatch(const std::pair<char, char> bounds) : type(CharacterMatchTypeRange) {
CharacterRange::CharacterRange(char character) : type(CharacterRangeTypeSpecific) { value.character = character; }
CharacterRange::CharacterRange(CharacterClass klass) : type(CharacterRangeTypeClass) { value.character_class = klass; }
CharacterRange::CharacterRange(const std::pair<char, char> bounds) : type(CharacterRangeTypeRange) {
value.range.min_character = bounds.first;
value.range.max_character = bounds.second;
}
bool CharacterMatch::operator==(const CharacterMatch &right) const {
bool CharacterRange::operator==(const CharacterRange &right) const {
if (type != right.type)
return false;
switch (type) {
case CharacterMatchTypeClass:
case CharacterRangeTypeClass:
return (value.character_class == right.value.character_class);
case CharacterMatchTypeSpecific:
case CharacterRangeTypeSpecific:
return (value.character == right.value.character);
case CharacterMatchTypeRange:
case CharacterRangeTypeRange:
return (value.range.min_character == right.value.range.min_character &&
value.range.max_character == right.value.range.max_character);
}
}
string CharacterMatch::to_string() const {
string CharacterRange::to_string() const {
switch (type) {
case CharacterMatchTypeClass:
case CharacterRangeTypeClass:
switch (value.character_class) {
case CharClassDigit:
return "<digit>";
case CharClassWord:
return "<word>";
}
case CharacterMatchTypeSpecific:
case CharacterRangeTypeSpecific:
return (value.character == '\0') ?
"<EOF>" :
string("'") + value.character + "'";
case CharacterMatchTypeRange:
case CharacterRangeTypeRange:
return (string("'") +
value.range.min_character + "'-'" +
value.range.max_character + "'");
}
}
Character::Character(char character) : matches({ CharacterMatch(character) }), sign(true) {}
Character::Character(CharacterClass char_class) : matches({ CharacterMatch(char_class) }), sign(true) {}
Character::Character(const std::unordered_set<CharacterMatch> &matches, bool sign) : matches(matches), sign(sign) {}
CharacterSet::CharacterSet(char character) : ranges({ CharacterRange(character) }), sign(true) {}
CharacterSet::CharacterSet(CharacterClass char_class) : ranges({ CharacterRange(char_class) }), sign(true) {}
CharacterSet::CharacterSet(const std::unordered_set<CharacterRange> &ranges, bool sign) : ranges(ranges), sign(sign) {}
bool Character::operator==(const Rule &rule) const {
const Character *other = dynamic_cast<const Character *>(&rule);
bool CharacterSet::operator==(const Rule &rule) const {
const CharacterSet *other = dynamic_cast<const CharacterSet *>(&rule);
return other && this->operator==(*other);
}
bool Character::operator==(const Character &other) const {
bool CharacterSet::operator==(const CharacterSet &other) const {
if (other.sign != sign) return false;
if (other.matches != matches) return false;
if (other.ranges != ranges) return false;
return true;
}
size_t Character::hash_code() const {
size_t CharacterSet::hash_code() const {
return typeid(this).hash_code() ^ hash<string>()(to_string());
}
rule_ptr Character::copy() const {
return std::make_shared<Character>(*this);
rule_ptr CharacterSet::copy() const {
return std::make_shared<CharacterSet>(*this);
}
string Character::to_string() const {
string CharacterSet::to_string() const {
string prefix("#<char");
if (!sign) prefix += " (not)";
for (auto &match : matches)
prefix += " " + match.to_string();
for (auto &range : ranges)
prefix += " " + range.to_string();
return prefix + ">";
}
void Character::accept(Visitor &visitor) const {
void CharacterSet::accept(Visitor &visitor) const {
visitor.visit(this);
}
}

View file

@ -12,13 +12,13 @@ namespace tree_sitter {
} CharacterClass;
typedef enum {
CharacterMatchTypeSpecific,
CharacterMatchTypeClass,
CharacterMatchTypeRange,
} CharacterMatchType;
CharacterRangeTypeSpecific,
CharacterRangeTypeClass,
CharacterRangeTypeRange,
} CharacterRangeType;
struct CharacterMatch {
CharacterMatchType type;
struct CharacterRange {
CharacterRangeType type;
union {
CharacterClass character_class;
char character;
@ -28,10 +28,10 @@ namespace tree_sitter {
} range;
} value;
CharacterMatch(char);
CharacterMatch(const std::pair<char, char>);
CharacterMatch(CharacterClass);
bool operator==(const CharacterMatch &) const;
CharacterRange(char);
CharacterRange(const std::pair<char, char>);
CharacterRange(CharacterClass);
bool operator==(const CharacterRange &) const;
std::string to_string() const;
};
}
@ -39,17 +39,17 @@ namespace tree_sitter {
namespace std {
template<>
struct hash<tree_sitter::rules::CharacterMatch> {
size_t operator()(const tree_sitter::rules::CharacterMatch &match) const {
struct hash<tree_sitter::rules::CharacterRange> {
size_t operator()(const tree_sitter::rules::CharacterRange &match) const {
auto type = match.type;
auto result = hash<short int>()(type);
switch (type) {
case tree_sitter::rules::CharacterMatchTypeClass:
case tree_sitter::rules::CharacterRangeTypeClass:
result ^= hash<short int>()(match.value.character_class);
case tree_sitter::rules::CharacterMatchTypeRange:
case tree_sitter::rules::CharacterRangeTypeRange:
result ^= hash<char>()(match.value.range.min_character);
result ^= hash<char>()(match.value.range.max_character);
case tree_sitter::rules::CharacterMatchTypeSpecific:
case tree_sitter::rules::CharacterRangeTypeSpecific:
result ^= hash<char>()(match.value.character);
}
return result;
@ -60,21 +60,21 @@ namespace std {
namespace tree_sitter {
namespace rules {
class Character : public Rule {
class CharacterSet : public Rule {
public:
Character(char character);
Character(CharacterClass character_class);
Character(char min_character, char max_character);
Character(const std::unordered_set<CharacterMatch> &matches, bool sign);
CharacterSet(char character);
CharacterSet(CharacterClass character_class);
CharacterSet(char min_character, char max_character);
CharacterSet(const std::unordered_set<CharacterRange> &matches, bool sign);
bool operator==(const Rule& other) const;
bool operator==(const Character& other) const;
bool operator==(const CharacterSet& other) const;
size_t hash_code() const;
rule_ptr copy() const;
std::string to_string() const;
void accept(Visitor &visitor) const;
std::unordered_set<CharacterMatch> matches;
std::unordered_set<CharacterRange> ranges;
bool sign;
};
}
@ -82,7 +82,7 @@ namespace tree_sitter {
namespace std {
template<>
struct hash<tree_sitter::rules::Character> : hash<tree_sitter::rules::Rule> {};
struct hash<tree_sitter::rules::CharacterSet> : hash<tree_sitter::rules::Rule> {};
}
#endif

View file

@ -44,7 +44,7 @@ namespace tree_sitter {
next();
is_affirmative = false;
}
std::unordered_set<CharacterMatch> matches;
std::unordered_set<CharacterRange> matches;
while (has_more_input() && (peek() != ']'))
matches.insert(single_char());
return character(matches, is_affirmative);
@ -78,8 +78,8 @@ namespace tree_sitter {
return result;
}
CharacterMatch single_char() {
CharacterMatch value('\0');
CharacterRange single_char() {
CharacterRange value('\0');
switch (peek()) {
case '\\':
next();
@ -91,7 +91,7 @@ namespace tree_sitter {
next();
if (peek() == '-') {
next();
value = CharacterMatch({ first_char, peek() });
value = CharacterRange({ first_char, peek() });
next();
} else {
value = first_char;
@ -100,7 +100,7 @@ namespace tree_sitter {
return value;
}
CharacterMatch escaped_char(char value) {
CharacterRange escaped_char(char value) {
switch (value) {
case '\\':
case '(':

View file

@ -11,15 +11,15 @@ namespace tree_sitter {
}
rule_ptr character(char value) {
return make_shared<Character>(value);
return make_shared<CharacterSet>(value);
}
rule_ptr character(CharacterClass value) {
return make_shared<Character>(value);
return make_shared<CharacterSet>(value);
}
rule_ptr character(const std::unordered_set<CharacterMatch> &matches, bool is_affirmative) {
return make_shared<Character>(matches, is_affirmative);
rule_ptr character(const std::unordered_set<CharacterRange> &matches, bool is_affirmative) {
return make_shared<CharacterSet>(matches, is_affirmative);
}
rule_ptr choice(const initializer_list<rule_ptr> &rules) {

View file

@ -17,8 +17,8 @@ namespace tree_sitter {
rule_ptr blank();
rule_ptr character(char value);
rule_ptr character(CharacterClass value);
rule_ptr character(const std::unordered_set<CharacterMatch> &matches);
rule_ptr character(const std::unordered_set<CharacterMatch> &matches, bool);
rule_ptr character(const std::unordered_set<CharacterRange> &matches);
rule_ptr character(const std::unordered_set<CharacterRange> &matches, bool);
rule_ptr choice(const std::initializer_list<rule_ptr> &rules);
rule_ptr pattern(const std::string &value);

View file

@ -5,7 +5,7 @@ namespace tree_sitter {
void Visitor::default_visit(const Rule *rule) {};
void Visitor::visit(const Blank *rule) { default_visit(rule); }
void Visitor::visit(const Symbol *rule) { default_visit(rule); }
void Visitor::visit(const Character *rule) { default_visit(rule); }
void Visitor::visit(const CharacterSet *rule) { default_visit(rule); }
void Visitor::visit(const Choice *rule) { default_visit(rule); }
void Visitor::visit(const Repeat *rule) { default_visit(rule); }
void Visitor::visit(const Seq *rule) { default_visit(rule); }

View file

@ -10,7 +10,7 @@ namespace tree_sitter {
virtual void default_visit(const Rule *rule);
virtual void visit(const Blank *rule);
virtual void visit(const Symbol *rule);
virtual void visit(const Character *rule);
virtual void visit(const CharacterSet *rule);
virtual void visit(const Choice *rule);
virtual void visit(const Repeat *rule);
virtual void visit(const Seq *rule);