From 8cce11a52ad0c375bcd86ca51fe57f4a228d403a Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 3 Feb 2014 13:05:51 -0800 Subject: [PATCH] Rename Character -> CharacterSet, CharacterMatch -> CharacterRange --- spec/compiler/build_tables/perform_spec.cpp | 20 ++++---- spec/compiler/rules/pattern_spec.cpp | 2 +- .../build_tables/item_set_transitions.cpp | 10 ++-- .../build_tables/item_set_transitions.h | 2 +- src/compiler/build_tables/perform.cpp | 2 +- .../build_tables/rule_transitions.cpp | 2 +- src/compiler/generate_code/c_code.cpp | 20 ++++---- src/compiler/lex_table.cpp | 7 +-- src/compiler/lex_table.h | 6 +-- src/compiler/rules/character.cpp | 50 +++++++++---------- src/compiler/rules/character.h | 46 ++++++++--------- src/compiler/rules/pattern.cpp | 10 ++-- src/compiler/rules/rules.cpp | 8 +-- src/compiler/rules/rules.h | 4 +- src/compiler/rules/visitor.cpp | 2 +- src/compiler/rules/visitor.h | 2 +- 16 files changed, 98 insertions(+), 95 deletions(-) diff --git a/spec/compiler/build_tables/perform_spec.cpp b/spec/compiler/build_tables/perform_spec.cpp index d45345de..e88c1863 100644 --- a/spec/compiler/build_tables/perform_spec.cpp +++ b/spec/compiler/build_tables/perform_spec.cpp @@ -16,8 +16,8 @@ static unordered_set keys(const unordered_map &ma return result; } -static unordered_set keys(const unordered_map &map) { - unordered_set result; +static unordered_set keys(const unordered_map &map) { + unordered_set result; for (auto pair : map) { result.insert(pair.first); } @@ -79,16 +79,16 @@ describe("building parse and lex tables", []() { Symbol("left-paren"), }))); - AssertThat(keys(lex_state(0).actions), Equals(unordered_set({ - Character('('), - Character(CharClassDigit), - Character(CharClassWord), + AssertThat(keys(lex_state(0).actions), Equals(unordered_set({ + CharacterSet('('), + CharacterSet(CharClassDigit), + CharacterSet(CharClassWord), }))); - AssertThat(lex_state(0).expected_inputs(), Equals(unordered_set({ - Character('('), - Character(CharClassDigit), - Character(CharClassWord), + AssertThat(lex_state(0).expected_inputs(), Equals(unordered_set({ + CharacterSet('('), + CharacterSet(CharClassDigit), + CharacterSet(CharClassWord), }))); }); diff --git a/spec/compiler/rules/pattern_spec.cpp b/spec/compiler/rules/pattern_spec.cpp index 98076cda..f8197e97 100644 --- a/spec/compiler/rules/pattern_spec.cpp +++ b/spec/compiler/rules/pattern_spec.cpp @@ -59,7 +59,7 @@ describe("parsing pattern rules", []() { Pattern rule("[12a-dA-D3]"); AssertThat( rule.to_rule_tree(), - EqualsPointer(character({ '1', '2', CharacterMatch({'a', 'd'}), CharacterMatch({ 'A', 'D' }), '3' }, true))); + EqualsPointer(character({ '1', '2', CharacterRange({'a', 'd'}), CharacterRange({ 'A', 'D' }), '3' }, true))); }); it("parses negated characters", []() { diff --git a/src/compiler/build_tables/item_set_transitions.cpp b/src/compiler/build_tables/item_set_transitions.cpp index ed26c382..57bb0cba 100644 --- a/src/compiler/build_tables/item_set_transitions.cpp +++ b/src/compiler/build_tables/item_set_transitions.cpp @@ -7,6 +7,8 @@ using std::make_shared; using std::shared_ptr; namespace tree_sitter { + using rules::CharacterSet; + namespace build_tables { template static std::set merge_sets(const std::set &left, const std::set &right) { @@ -15,12 +17,12 @@ namespace tree_sitter { return result; } - transition_map char_transitions(const LexItemSet &item_set, const Grammar &grammar) { - transition_map result; + transition_map char_transitions(const LexItemSet &item_set, const Grammar &grammar) { + transition_map result; for (LexItem item : item_set) { - transition_map item_transitions; + transition_map item_transitions; for (auto transition : rule_transitions(item.rule)) { - auto rule = dynamic_pointer_cast(transition.first); + auto rule = dynamic_pointer_cast(transition.first); if (rule.get()) { auto new_item = LexItem(item.lhs, transition.second); auto new_item_set = LexItemSet({ new_item }); diff --git a/src/compiler/build_tables/item_set_transitions.h b/src/compiler/build_tables/item_set_transitions.h index b6e175a8..479ed174 100644 --- a/src/compiler/build_tables/item_set_transitions.h +++ b/src/compiler/build_tables/item_set_transitions.h @@ -8,7 +8,7 @@ namespace tree_sitter { namespace build_tables { - transition_map char_transitions(const LexItemSet &item_set, const Grammar &grammar); + transition_map char_transitions(const LexItemSet &item_set, const Grammar &grammar); transition_map sym_transitions(const ParseItemSet &item_set, const Grammar &grammar); } } diff --git a/src/compiler/build_tables/perform.cpp b/src/compiler/build_tables/perform.cpp index 8e6d5fed..c6d5c10a 100644 --- a/src/compiler/build_tables/perform.cpp +++ b/src/compiler/build_tables/perform.cpp @@ -46,7 +46,7 @@ namespace tree_sitter { void add_advance_actions(const LexItemSet &item_set, size_t state_index) { for (auto transition : char_transitions(item_set, grammar)) { - rules::Character rule = *transition.first; + rules::CharacterSet rule = *transition.first; LexItemSet item_set = *transition.second; size_t new_state_index = add_lex_state(item_set); lex_table.add_action(state_index, rule, LexAction::Advance(new_state_index)); diff --git a/src/compiler/build_tables/rule_transitions.cpp b/src/compiler/build_tables/rule_transitions.cpp index f1f68647..622b32e9 100644 --- a/src/compiler/build_tables/rule_transitions.cpp +++ b/src/compiler/build_tables/rule_transitions.cpp @@ -13,7 +13,7 @@ namespace tree_sitter { public: transition_map value; - void visit(const Character *rule) { + void visit(const CharacterSet *rule) { value = transition_map({{ rule->copy(), blank() }}); } diff --git a/src/compiler/generate_code/c_code.cpp b/src/compiler/generate_code/c_code.cpp index 2d174a79..33e0e79a 100644 --- a/src/compiler/generate_code/c_code.cpp +++ b/src/compiler/generate_code/c_code.cpp @@ -101,28 +101,28 @@ namespace tree_sitter { } } - string condition_for_character_match(const rules::CharacterMatch &match) { + string condition_for_character_match(const rules::CharacterRange &match) { string lookahead("LOOKAHEAD_CHAR()"); auto value = match.value; switch (match.type) { - case rules::CharacterMatchTypeClass: + case rules::CharacterRangeTypeClass: switch (value.character_class) { case rules::CharClassDigit: return string("isdigit(") + lookahead + ")"; case rules::CharClassWord: return string("isalnum(") + lookahead + ")"; } - case rules::CharacterMatchTypeSpecific: + case rules::CharacterRangeTypeSpecific: return lookahead + " == '" + character_code(value.character) + "'"; - case rules::CharacterMatchTypeRange: + case rules::CharacterRangeTypeRange: return string("'") + value.range.min_character + string("' <= ") + lookahead + " && " + lookahead + " <= '" + value.range.max_character + "'"; } } - string condition_for_character_rule(const rules::Character &rule) { + string condition_for_character_rule(const rules::CharacterSet &rule) { vector parts; - for (auto &match : rule.matches) { + for (auto &match : rule.ranges) { parts.push_back("(" + condition_for_character_match(match) + ")"); } string result = join(parts, " ||\n "); @@ -176,10 +176,10 @@ namespace tree_sitter { return input; } - string lex_error_call(const unordered_set &expected_inputs) { - unordered_set expected_matches; + string lex_error_call(const unordered_set &expected_inputs) { + unordered_set expected_matches; for (auto &rule : expected_inputs) - for (auto &match : rule.matches) + for (auto &match : rule.ranges) expected_matches.insert(match); string result = "LEX_ERROR(" + to_string(expected_matches.size()) + ", EXPECT({"; @@ -193,7 +193,7 @@ namespace tree_sitter { return result; } - string code_for_lex_actions(const unordered_set &actions, const unordered_set &expected_inputs) { + string code_for_lex_actions(const unordered_set &actions, const unordered_set &expected_inputs) { auto action = actions.begin(); if (action == actions.end()) { return lex_error_call(expected_inputs); diff --git a/src/compiler/lex_table.cpp b/src/compiler/lex_table.cpp index 210e6642..8c495997 100644 --- a/src/compiler/lex_table.cpp +++ b/src/compiler/lex_table.cpp @@ -5,6 +5,7 @@ using std::to_string; using std::unordered_map; using std::unordered_set; using tree_sitter::rules::Symbol; +using tree_sitter::rules::CharacterSet; namespace tree_sitter { // Action @@ -44,8 +45,8 @@ namespace tree_sitter { } // State - unordered_set LexState::expected_inputs() const { - unordered_set result; + unordered_set LexState::expected_inputs() const { + unordered_set result; for (auto pair : actions) result.insert(pair.first); return result; @@ -57,7 +58,7 @@ namespace tree_sitter { return states.size() - 1; } - void LexTable::add_action(size_t state_index, rules::Character match, LexAction action) { + void LexTable::add_action(size_t state_index, CharacterSet match, LexAction action) { states[state_index].actions[match].insert(action); } diff --git a/src/compiler/lex_table.h b/src/compiler/lex_table.h index 7350d23d..b60594c2 100644 --- a/src/compiler/lex_table.h +++ b/src/compiler/lex_table.h @@ -45,15 +45,15 @@ namespace std { namespace tree_sitter { class LexState { public: - std::unordered_map> actions; + std::unordered_map> actions; std::unordered_set default_actions; - std::unordered_set expected_inputs() const; + std::unordered_set expected_inputs() const; }; class LexTable { public: size_t add_state(); - void add_action(size_t state_index, rules::Character rule, LexAction action); + void add_action(size_t state_index, rules::CharacterSet rule, LexAction action); void add_default_action(size_t state_index, LexAction action); std::vector states; diff --git a/src/compiler/rules/character.cpp b/src/compiler/rules/character.cpp index f45996c2..42f7f08a 100644 --- a/src/compiler/rules/character.cpp +++ b/src/compiler/rules/character.cpp @@ -5,79 +5,79 @@ using std::hash; namespace tree_sitter { namespace rules { - CharacterMatch::CharacterMatch(char character) : type(CharacterMatchTypeSpecific) { value.character = character; } - CharacterMatch::CharacterMatch(CharacterClass klass) : type(CharacterMatchTypeClass) { value.character_class = klass; } - CharacterMatch::CharacterMatch(const std::pair bounds) : type(CharacterMatchTypeRange) { + CharacterRange::CharacterRange(char character) : type(CharacterRangeTypeSpecific) { value.character = character; } + CharacterRange::CharacterRange(CharacterClass klass) : type(CharacterRangeTypeClass) { value.character_class = klass; } + CharacterRange::CharacterRange(const std::pair bounds) : type(CharacterRangeTypeRange) { value.range.min_character = bounds.first; value.range.max_character = bounds.second; } - bool CharacterMatch::operator==(const CharacterMatch &right) const { + bool CharacterRange::operator==(const CharacterRange &right) const { if (type != right.type) return false; switch (type) { - case CharacterMatchTypeClass: + case CharacterRangeTypeClass: return (value.character_class == right.value.character_class); - case CharacterMatchTypeSpecific: + case CharacterRangeTypeSpecific: return (value.character == right.value.character); - case CharacterMatchTypeRange: + case CharacterRangeTypeRange: return (value.range.min_character == right.value.range.min_character && value.range.max_character == right.value.range.max_character); } } - string CharacterMatch::to_string() const { + string CharacterRange::to_string() const { switch (type) { - case CharacterMatchTypeClass: + case CharacterRangeTypeClass: switch (value.character_class) { case CharClassDigit: return ""; case CharClassWord: return ""; } - case CharacterMatchTypeSpecific: + case CharacterRangeTypeSpecific: return (value.character == '\0') ? "" : string("'") + value.character + "'"; - case CharacterMatchTypeRange: + case CharacterRangeTypeRange: return (string("'") + value.range.min_character + "'-'" + value.range.max_character + "'"); } } - Character::Character(char character) : matches({ CharacterMatch(character) }), sign(true) {} - Character::Character(CharacterClass char_class) : matches({ CharacterMatch(char_class) }), sign(true) {} - Character::Character(const std::unordered_set &matches, bool sign) : matches(matches), sign(sign) {} + CharacterSet::CharacterSet(char character) : ranges({ CharacterRange(character) }), sign(true) {} + CharacterSet::CharacterSet(CharacterClass char_class) : ranges({ CharacterRange(char_class) }), sign(true) {} + CharacterSet::CharacterSet(const std::unordered_set &ranges, bool sign) : ranges(ranges), sign(sign) {} - bool Character::operator==(const Rule &rule) const { - const Character *other = dynamic_cast(&rule); + bool CharacterSet::operator==(const Rule &rule) const { + const CharacterSet *other = dynamic_cast(&rule); return other && this->operator==(*other); } - bool Character::operator==(const Character &other) const { + bool CharacterSet::operator==(const CharacterSet &other) const { if (other.sign != sign) return false; - if (other.matches != matches) return false; + if (other.ranges != ranges) return false; return true; } - size_t Character::hash_code() const { + size_t CharacterSet::hash_code() const { return typeid(this).hash_code() ^ hash()(to_string()); } - rule_ptr Character::copy() const { - return std::make_shared(*this); + rule_ptr CharacterSet::copy() const { + return std::make_shared(*this); } - string Character::to_string() const { + string CharacterSet::to_string() const { string prefix("#"; } - void Character::accept(Visitor &visitor) const { + void CharacterSet::accept(Visitor &visitor) const { visitor.visit(this); } } diff --git a/src/compiler/rules/character.h b/src/compiler/rules/character.h index 83edcc4f..b12c28e2 100644 --- a/src/compiler/rules/character.h +++ b/src/compiler/rules/character.h @@ -12,13 +12,13 @@ namespace tree_sitter { } CharacterClass; typedef enum { - CharacterMatchTypeSpecific, - CharacterMatchTypeClass, - CharacterMatchTypeRange, - } CharacterMatchType; + CharacterRangeTypeSpecific, + CharacterRangeTypeClass, + CharacterRangeTypeRange, + } CharacterRangeType; - struct CharacterMatch { - CharacterMatchType type; + struct CharacterRange { + CharacterRangeType type; union { CharacterClass character_class; char character; @@ -28,10 +28,10 @@ namespace tree_sitter { } range; } value; - CharacterMatch(char); - CharacterMatch(const std::pair); - CharacterMatch(CharacterClass); - bool operator==(const CharacterMatch &) const; + CharacterRange(char); + CharacterRange(const std::pair); + CharacterRange(CharacterClass); + bool operator==(const CharacterRange &) const; std::string to_string() const; }; } @@ -39,17 +39,17 @@ namespace tree_sitter { namespace std { template<> - struct hash { - size_t operator()(const tree_sitter::rules::CharacterMatch &match) const { + struct hash { + size_t operator()(const tree_sitter::rules::CharacterRange &match) const { auto type = match.type; auto result = hash()(type); switch (type) { - case tree_sitter::rules::CharacterMatchTypeClass: + case tree_sitter::rules::CharacterRangeTypeClass: result ^= hash()(match.value.character_class); - case tree_sitter::rules::CharacterMatchTypeRange: + case tree_sitter::rules::CharacterRangeTypeRange: result ^= hash()(match.value.range.min_character); result ^= hash()(match.value.range.max_character); - case tree_sitter::rules::CharacterMatchTypeSpecific: + case tree_sitter::rules::CharacterRangeTypeSpecific: result ^= hash()(match.value.character); } return result; @@ -60,21 +60,21 @@ namespace std { namespace tree_sitter { namespace rules { - class Character : public Rule { + class CharacterSet : public Rule { public: - Character(char character); - Character(CharacterClass character_class); - Character(char min_character, char max_character); - Character(const std::unordered_set &matches, bool sign); + CharacterSet(char character); + CharacterSet(CharacterClass character_class); + CharacterSet(char min_character, char max_character); + CharacterSet(const std::unordered_set &matches, bool sign); bool operator==(const Rule& other) const; - bool operator==(const Character& other) const; + bool operator==(const CharacterSet& other) const; size_t hash_code() const; rule_ptr copy() const; std::string to_string() const; void accept(Visitor &visitor) const; - std::unordered_set matches; + std::unordered_set ranges; bool sign; }; } @@ -82,7 +82,7 @@ namespace tree_sitter { namespace std { template<> - struct hash : hash {}; + struct hash : hash {}; } #endif diff --git a/src/compiler/rules/pattern.cpp b/src/compiler/rules/pattern.cpp index ec0f7162..38988fb0 100644 --- a/src/compiler/rules/pattern.cpp +++ b/src/compiler/rules/pattern.cpp @@ -44,7 +44,7 @@ namespace tree_sitter { next(); is_affirmative = false; } - std::unordered_set matches; + std::unordered_set matches; while (has_more_input() && (peek() != ']')) matches.insert(single_char()); return character(matches, is_affirmative); @@ -78,8 +78,8 @@ namespace tree_sitter { return result; } - CharacterMatch single_char() { - CharacterMatch value('\0'); + CharacterRange single_char() { + CharacterRange value('\0'); switch (peek()) { case '\\': next(); @@ -91,7 +91,7 @@ namespace tree_sitter { next(); if (peek() == '-') { next(); - value = CharacterMatch({ first_char, peek() }); + value = CharacterRange({ first_char, peek() }); next(); } else { value = first_char; @@ -100,7 +100,7 @@ namespace tree_sitter { return value; } - CharacterMatch escaped_char(char value) { + CharacterRange escaped_char(char value) { switch (value) { case '\\': case '(': diff --git a/src/compiler/rules/rules.cpp b/src/compiler/rules/rules.cpp index 603bfb86..57ae749f 100644 --- a/src/compiler/rules/rules.cpp +++ b/src/compiler/rules/rules.cpp @@ -11,15 +11,15 @@ namespace tree_sitter { } rule_ptr character(char value) { - return make_shared(value); + return make_shared(value); } rule_ptr character(CharacterClass value) { - return make_shared(value); + return make_shared(value); } - rule_ptr character(const std::unordered_set &matches, bool is_affirmative) { - return make_shared(matches, is_affirmative); + rule_ptr character(const std::unordered_set &matches, bool is_affirmative) { + return make_shared(matches, is_affirmative); } rule_ptr choice(const initializer_list &rules) { diff --git a/src/compiler/rules/rules.h b/src/compiler/rules/rules.h index d3080645..80d2aa75 100644 --- a/src/compiler/rules/rules.h +++ b/src/compiler/rules/rules.h @@ -17,8 +17,8 @@ namespace tree_sitter { rule_ptr blank(); rule_ptr character(char value); rule_ptr character(CharacterClass value); - rule_ptr character(const std::unordered_set &matches); - rule_ptr character(const std::unordered_set &matches, bool); + rule_ptr character(const std::unordered_set &matches); + rule_ptr character(const std::unordered_set &matches, bool); rule_ptr choice(const std::initializer_list &rules); rule_ptr pattern(const std::string &value); diff --git a/src/compiler/rules/visitor.cpp b/src/compiler/rules/visitor.cpp index ec2d04bb..49b48d9d 100644 --- a/src/compiler/rules/visitor.cpp +++ b/src/compiler/rules/visitor.cpp @@ -5,7 +5,7 @@ namespace tree_sitter { void Visitor::default_visit(const Rule *rule) {}; void Visitor::visit(const Blank *rule) { default_visit(rule); } void Visitor::visit(const Symbol *rule) { default_visit(rule); } - void Visitor::visit(const Character *rule) { default_visit(rule); } + void Visitor::visit(const CharacterSet *rule) { default_visit(rule); } void Visitor::visit(const Choice *rule) { default_visit(rule); } void Visitor::visit(const Repeat *rule) { default_visit(rule); } void Visitor::visit(const Seq *rule) { default_visit(rule); } diff --git a/src/compiler/rules/visitor.h b/src/compiler/rules/visitor.h index ae95aac3..de23e56c 100644 --- a/src/compiler/rules/visitor.h +++ b/src/compiler/rules/visitor.h @@ -10,7 +10,7 @@ namespace tree_sitter { virtual void default_visit(const Rule *rule); virtual void visit(const Blank *rule); virtual void visit(const Symbol *rule); - virtual void visit(const Character *rule); + virtual void visit(const CharacterSet *rule); virtual void visit(const Choice *rule); virtual void visit(const Repeat *rule); virtual void visit(const Seq *rule);