From 9667b3fd6cdfd27c2b4c08a754ab8182df98fb8b Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Sat, 21 Dec 2013 23:53:26 -0800 Subject: [PATCH] Refactor classes representing individual characters & character classes --- TreeSitter.xcodeproj/project.pbxproj | 32 +++++------ spec/compiler/lr/item_set_spec.cpp | 4 +- spec/compiler/rules/pattern_spec.cpp | 4 +- spec/compiler/rules/rules_spec.cpp | 2 +- src/compiler/char_match.cpp | 62 ++++++++++++++++++++++ src/compiler/char_match.h | 39 ++++++++++++++ src/compiler/rules/char.cpp | 23 -------- src/compiler/rules/char_class.cpp | 28 ---------- src/compiler/rules/char_class.h | 25 --------- src/compiler/rules/character.cpp | 25 +++++++++ src/compiler/rules/{char.h => character.h} | 9 ++-- src/compiler/rules/pattern.cpp | 4 +- src/compiler/rules/rules.cpp | 10 ++-- src/compiler/rules/rules.h | 6 +-- src/compiler/rules/transitions.cpp | 8 +-- src/compiler/rules/visitor.cpp | 3 +- src/compiler/rules/visitor.h | 3 +- 17 files changed, 167 insertions(+), 120 deletions(-) create mode 100644 src/compiler/char_match.cpp create mode 100644 src/compiler/char_match.h delete mode 100644 src/compiler/rules/char.cpp delete mode 100644 src/compiler/rules/char_class.cpp delete mode 100644 src/compiler/rules/char_class.h create mode 100644 src/compiler/rules/character.cpp rename src/compiler/rules/{char.h => character.h} (55%) diff --git a/TreeSitter.xcodeproj/project.pbxproj b/TreeSitter.xcodeproj/project.pbxproj index 60b79ce3..da97714c 100644 --- a/TreeSitter.xcodeproj/project.pbxproj +++ b/TreeSitter.xcodeproj/project.pbxproj @@ -7,7 +7,7 @@ objects = { /* Begin PBXBuildFile section */ - 12130605182C348F00FCF928 /* char.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12130603182C348F00FCF928 /* char.cpp */; }; + 12130605182C348F00FCF928 /* character.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12130603182C348F00FCF928 /* character.cpp */; }; 1213060B182C389100FCF928 /* symbol.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12130609182C389100FCF928 /* symbol.cpp */; }; 1213060E182C398300FCF928 /* choice.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1213060C182C398300FCF928 /* choice.cpp */; }; 12130611182C3A1100FCF928 /* blank.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1213060F182C3A1100FCF928 /* blank.cpp */; }; @@ -21,7 +21,6 @@ 125120A4183083BD00C9B56A /* arithmetic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 125120A3183083BD00C9B56A /* arithmetic.cpp */; }; 129D242C183EB1EB00FE9F71 /* parse_table_builder.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 129D242A183EB1EB00FE9F71 /* parse_table_builder.cpp */; }; 12D136A4183678A2005F3369 /* repeat.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12D136A2183678A2005F3369 /* repeat.cpp */; }; - 12F8BE8E183C79B2006CCF99 /* char_class.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12F8BE8C183C79B2006CCF99 /* char_class.cpp */; }; 12F9A64E182DD5FD00FAF50C /* spec_helper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12F9A64C182DD5FD00FAF50C /* spec_helper.cpp */; }; 12F9A651182DD6BC00FAF50C /* grammar.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12F9A64F182DD6BC00FAF50C /* grammar.cpp */; }; 12FD4061185E68470041A84E /* c_code.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12FD405F185E68470041A84E /* c_code.cpp */; }; @@ -31,7 +30,6 @@ 12FD40B6185EEB5E0041A84E /* arithmetic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 125120A3183083BD00C9B56A /* arithmetic.cpp */; }; 12FD40B8185EEB5E0041A84E /* item.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12130619182C84DF00FCF928 /* item.cpp */; }; 12FD40B9185EEB5E0041A84E /* string.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12130615182C3D2900FCF928 /* string.cpp */; }; - 12FD40BA185EEB5E0041A84E /* char_class.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12F8BE8C183C79B2006CCF99 /* char_class.cpp */; }; 12FD40BB185EEB5E0041A84E /* blank.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1213060F182C3A1100FCF928 /* blank.cpp */; }; 12FD40BD185EEB5E0041A84E /* choice.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1213060C182C398300FCF928 /* choice.cpp */; }; 12FD40BF185EEB5E0041A84E /* c_code.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12FD405F185E68470041A84E /* c_code.cpp */; }; @@ -40,7 +38,7 @@ 12FD40C3185EEB5E0041A84E /* grammar.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12F9A64F182DD6BC00FAF50C /* grammar.cpp */; }; 12FD40C6185EEB5E0041A84E /* repeat.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12D136A2183678A2005F3369 /* repeat.cpp */; }; 12FD40C7185EEB5E0041A84E /* item_set.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12130620182C85D300FCF928 /* item_set.cpp */; }; - 12FD40C8185EEB5E0041A84E /* char.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12130603182C348F00FCF928 /* char.cpp */; }; + 12FD40C8185EEB5E0041A84E /* character.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12130603182C348F00FCF928 /* character.cpp */; }; 12FD40C9185EEB5E0041A84E /* symbol.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12130609182C389100FCF928 /* symbol.cpp */; }; 12FD40CA185EEB5E0041A84E /* rule.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1251209A1830145300C9B56A /* rule.cpp */; }; 12FD40CB185EEB5E0041A84E /* pattern.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 27A340F3EEB184C040521323 /* pattern.cpp */; }; @@ -59,6 +57,8 @@ 12FD40EE186641430041A84E /* rules.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12FD40E818641FB70041A84E /* rules.cpp */; }; 12FD40EF186641510041A84E /* transitions.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12FD40E0186245FE0041A84E /* transitions.cpp */; }; 12FD40F01866415D0041A84E /* visitor.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12FD40E618639B910041A84E /* visitor.cpp */; }; + 12FD40F3186641C00041A84E /* char_match.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12FD40F1186641C00041A84E /* char_match.cpp */; }; + 12FD40F4186641C00041A84E /* char_match.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12FD40F1186641C00041A84E /* char_match.cpp */; }; 27A343CA69E17E0F9EBEDF1C /* pattern.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 27A340F3EEB184C040521323 /* pattern.cpp */; }; /* End PBXBuildFile section */ @@ -84,8 +84,8 @@ /* End PBXCopyFilesBuildPhase section */ /* Begin PBXFileReference section */ - 12130603182C348F00FCF928 /* char.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = char.cpp; sourceTree = ""; }; - 12130604182C348F00FCF928 /* char.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = char.h; sourceTree = ""; }; + 12130603182C348F00FCF928 /* character.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = character.cpp; sourceTree = ""; }; + 12130604182C348F00FCF928 /* character.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = character.h; sourceTree = ""; }; 12130607182C374800FCF928 /* rule.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = rule.h; sourceTree = ""; }; 12130609182C389100FCF928 /* symbol.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = symbol.cpp; sourceTree = ""; }; 1213060A182C389100FCF928 /* symbol.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = symbol.h; sourceTree = ""; }; @@ -120,8 +120,6 @@ 12D136A3183678A2005F3369 /* repeat.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = repeat.h; sourceTree = ""; }; 12E71794181D02A80051A649 /* specs */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; name = specs; path = compiler_specs; sourceTree = BUILT_PRODUCTS_DIR; }; 12E71852181D081C0051A649 /* rules.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = rules.h; path = src/compiler/rules/rules.h; sourceTree = SOURCE_ROOT; }; - 12F8BE8C183C79B2006CCF99 /* char_class.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = char_class.cpp; sourceTree = ""; }; - 12F8BE8D183C79B2006CCF99 /* char_class.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = char_class.h; sourceTree = ""; }; 12F9A64C182DD5FD00FAF50C /* spec_helper.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = spec_helper.cpp; path = spec/compiler/spec_helper.cpp; sourceTree = SOURCE_ROOT; }; 12F9A64D182DD5FD00FAF50C /* spec_helper.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = spec_helper.h; path = spec/compiler/spec_helper.h; sourceTree = SOURCE_ROOT; }; 12F9A64F182DD6BC00FAF50C /* grammar.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = grammar.cpp; sourceTree = ""; }; @@ -140,6 +138,8 @@ 12FD40E41862B3530041A84E /* visitor.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = visitor.h; sourceTree = ""; }; 12FD40E618639B910041A84E /* visitor.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = visitor.cpp; sourceTree = ""; }; 12FD40E818641FB70041A84E /* rules.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = rules.cpp; sourceTree = ""; }; + 12FD40F1186641C00041A84E /* char_match.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = char_match.cpp; sourceTree = ""; }; + 12FD40F2186641C00041A84E /* char_match.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = char_match.h; sourceTree = ""; }; 27A340F3EEB184C040521323 /* pattern.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = pattern.cpp; sourceTree = ""; }; 27A3438C4FA59A3882E8493B /* pattern.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = pattern.h; sourceTree = ""; }; /* End PBXFileReference section */ @@ -167,10 +167,8 @@ children = ( 1213060F182C3A1100FCF928 /* blank.cpp */, 12130610182C3A1100FCF928 /* blank.h */, - 12130603182C348F00FCF928 /* char.cpp */, - 12130604182C348F00FCF928 /* char.h */, - 12F8BE8C183C79B2006CCF99 /* char_class.cpp */, - 12F8BE8D183C79B2006CCF99 /* char_class.h */, + 12130603182C348F00FCF928 /* character.cpp */, + 12130604182C348F00FCF928 /* character.h */, 1213060C182C398300FCF928 /* choice.cpp */, 1213060D182C398300FCF928 /* choice.h */, 27A340F3EEB184C040521323 /* pattern.cpp */, @@ -306,6 +304,8 @@ 12130618182C84B700FCF928 /* lr */, 12130602182C344400FCF928 /* rules */, 12C344421822F27700B07BE3 /* transition_map.h */, + 12FD40F1186641C00041A84E /* char_match.cpp */, + 12FD40F2186641C00041A84E /* char_match.h */, ); path = compiler; sourceTree = ""; @@ -433,7 +433,6 @@ 12FD40D9185FEEDF0041A84E /* pattern_spec.cpp in Sources */, 12130617182C3D2900FCF928 /* string.cpp in Sources */, 12FD40E2186245FE0041A84E /* transitions.cpp in Sources */, - 12F8BE8E183C79B2006CCF99 /* char_class.cpp in Sources */, 12130611182C3A1100FCF928 /* blank.cpp in Sources */, 1213060E182C398300FCF928 /* choice.cpp in Sources */, 12F9A64E182DD5FD00FAF50C /* spec_helper.cpp in Sources */, @@ -446,9 +445,10 @@ 12F9A651182DD6BC00FAF50C /* grammar.cpp in Sources */, 12FD40D6185FEEDB0041A84E /* parse_table_builder_spec.cpp in Sources */, 12D136A4183678A2005F3369 /* repeat.cpp in Sources */, + 12FD40F3186641C00041A84E /* char_match.cpp in Sources */, 12FD40E718639B910041A84E /* visitor.cpp in Sources */, 12130622182C85D300FCF928 /* item_set.cpp in Sources */, - 12130605182C348F00FCF928 /* char.cpp in Sources */, + 12130605182C348F00FCF928 /* character.cpp in Sources */, 1213060B182C389100FCF928 /* symbol.cpp in Sources */, 1251209B1830145300C9B56A /* rule.cpp in Sources */, 27A343CA69E17E0F9EBEDF1C /* pattern.cpp in Sources */, @@ -466,8 +466,8 @@ 12FD40B8185EEB5E0041A84E /* item.cpp in Sources */, 12FD40B9185EEB5E0041A84E /* string.cpp in Sources */, 12FD40EF186641510041A84E /* transitions.cpp in Sources */, - 12FD40BA185EEB5E0041A84E /* char_class.cpp in Sources */, 12FD40BB185EEB5E0041A84E /* blank.cpp in Sources */, + 12FD40F4186641C00041A84E /* char_match.cpp in Sources */, 12FD40BD185EEB5E0041A84E /* choice.cpp in Sources */, 12FD40DF1860064C0041A84E /* tree.c in Sources */, 12FD40BF185EEB5E0041A84E /* c_code.cpp in Sources */, @@ -479,7 +479,7 @@ 12FD40C3185EEB5E0041A84E /* grammar.cpp in Sources */, 12FD40C6185EEB5E0041A84E /* repeat.cpp in Sources */, 12FD40C7185EEB5E0041A84E /* item_set.cpp in Sources */, - 12FD40C8185EEB5E0041A84E /* char.cpp in Sources */, + 12FD40C8185EEB5E0041A84E /* character.cpp in Sources */, 12FD40C9185EEB5E0041A84E /* symbol.cpp in Sources */, 12FD40CA185EEB5E0041A84E /* rule.cpp in Sources */, 12FD40EE186641430041A84E /* rules.cpp in Sources */, diff --git a/spec/compiler/lr/item_set_spec.cpp b/spec/compiler/lr/item_set_spec.cpp index 027cd9f5..8eaaec9f 100644 --- a/spec/compiler/lr/item_set_spec.cpp +++ b/spec/compiler/lr/item_set_spec.cpp @@ -47,8 +47,8 @@ Describe(item_sets) { AssertThat( set.char_transitions(grammar), Equals(transition_map({ - { char_class(CharClassTypeWord), item_set({ Item("variable", choice({ repeat(char_class(CharClassTypeWord)), blank() }), 1) }) }, - { char_class(CharClassTypeDigit), item_set({ Item("number", choice({ repeat(char_class(CharClassTypeDigit)), blank() }), 1) }) }, + { character(CharClassWord), item_set({ Item("variable", choice({ repeat(character(CharClassWord)), blank() }), 1) }) }, + { character(CharClassDigit), item_set({ Item("number", choice({ repeat(character(CharClassDigit)), blank() }), 1) }) }, { character('('), item_set({ Item("left_paren", blank(), 1) }) } }))); } diff --git a/spec/compiler/rules/pattern_spec.cpp b/spec/compiler/rules/pattern_spec.cpp index 5c84f5b1..34ffcc0d 100644 --- a/spec/compiler/rules/pattern_spec.cpp +++ b/spec/compiler/rules/pattern_spec.cpp @@ -21,9 +21,9 @@ Describe(pattern_rules) { AssertThat( rule.to_rule_tree()->to_string(), Equals(seq({ - char_class(CharClassTypeWord), + character(CharClassWord), character('-'), - char_class(CharClassTypeDigit) + character(CharClassDigit) })->to_string())); }; diff --git a/spec/compiler/rules/rules_spec.cpp b/spec/compiler/rules/rules_spec.cpp index f9d6607d..37f05ff8 100644 --- a/spec/compiler/rules/rules_spec.cpp +++ b/spec/compiler/rules/rules_spec.cpp @@ -44,7 +44,7 @@ Describe(Rules) { } It(handles_character_classes) { - auto rule = rules::char_class(rules::CharClassTypeDigit); + auto rule = rules::character(CharClassDigit); AssertThat( rules::transitions(rule), Equals(transition_map({ diff --git a/src/compiler/char_match.cpp b/src/compiler/char_match.cpp new file mode 100644 index 00000000..fac4af4a --- /dev/null +++ b/src/compiler/char_match.cpp @@ -0,0 +1,62 @@ +#include "char_match.h" + +using std::string; + +namespace tree_sitter { + CharMatch CharMatchSpecific(char value) { + CharMatch result = { .type = CharMatchTypeSpecific }; + result.value.character = value; + return result; + } + + CharMatch CharMatchClass(CharClass value) { + CharMatch result = { .type = CharMatchTypeSpecific }; + result.value.character = value; + return result; + } + + CharMatch CharMatchRange(char min, char max) { + CharMatch result = { .type = CharMatchTypeSpecific }; + result.value.range.min_character = min; + result.value.range.max_character = max; + return result; + } + + string CharMatchToString(CharMatch match) { + switch (match.type) { + case CharMatchTypeClass: + switch (match.value.character_class) { + case CharClassDigit: + return ""; + case CharClassWord: + return ""; + } + case CharMatchTypeSpecific: + return string("'") + string(&match.value.character) + "'"; + case CharMatchTypeRange: + return ( + string("'") + + string(&match.value.range.min_character) + "-" + + string(&match.value.range.max_character) + "'"); + } + } + + bool operator==(const CharMatch &left, const CharMatch &right) { + if (left.type != right.type) + return false; + switch (left.type) { + case CharMatchTypeClass: + return (left.value.character_class == right.value.character_class); + case CharMatchTypeSpecific: + return (left.value.character == right.value.character); + case CharMatchTypeRange: + return ( + left.value.range.min_character == right.value.range.min_character && + left.value.range.max_character == right.value.range.max_character); + } + } + + std::ostream& operator<<(std::ostream& stream, const CharMatch &match) { + return stream << CharMatchToString(match); + } +} \ No newline at end of file diff --git a/src/compiler/char_match.h b/src/compiler/char_match.h new file mode 100644 index 00000000..db03b125 --- /dev/null +++ b/src/compiler/char_match.h @@ -0,0 +1,39 @@ +#ifndef __TreeSitter__char_match__ +#define __TreeSitter__char_match__ + +#include + +namespace tree_sitter { + typedef enum { + CharMatchTypeSpecific, + CharMatchTypeClass, + CharMatchTypeRange, + } CharMatchType; + + typedef enum { + CharClassWord, + CharClassDigit + } CharClass; + + struct CharMatch { + CharMatchType type; + union { + CharClass character_class; + char character; + struct { + char min_character; + char max_character; + } range; + } value; + }; + + CharMatch CharMatchSpecific(char); + CharMatch CharMatchClass(CharClass); + CharMatch CharMatchRange(char, char); + std::string CharMatchToString(CharMatch); + + bool operator==(const CharMatch &, const CharMatch &); + std::ostream& operator<<(std::ostream& stream, const CharMatch &rule); +} + +#endif diff --git a/src/compiler/rules/char.cpp b/src/compiler/rules/char.cpp deleted file mode 100644 index ee4543f3..00000000 --- a/src/compiler/rules/char.cpp +++ /dev/null @@ -1,23 +0,0 @@ -#include "rules.h" -#include "transition_map.h" - -using std::string; - -namespace tree_sitter { - namespace rules { - Char::Char(char value) : value(value) {}; - - bool Char::operator==(const Rule &rule) const { - const Char *other = dynamic_cast(&rule); - return other && (other->value == value); - } - - string Char::to_string() const { - return string("'") + value + "'"; - } - - void Char::accept(Visitor &visitor) const { - visitor.visit(this); - } - } -} diff --git a/src/compiler/rules/char_class.cpp b/src/compiler/rules/char_class.cpp deleted file mode 100644 index e06a42f0..00000000 --- a/src/compiler/rules/char_class.cpp +++ /dev/null @@ -1,28 +0,0 @@ -#include "rules.h" -#include "transition_map.h" - -using std::string; - -namespace tree_sitter { - namespace rules { - CharClass::CharClass(CharClassType value) : value(value) {}; - - bool CharClass::operator==(const Rule &rule) const { - const CharClass *other = dynamic_cast(&rule); - return other && (other->value == value); - } - - string CharClass::to_string() const { - switch (value) { - case CharClassTypeDigit: - return ""; - case CharClassTypeWord: - return ""; - } - } - - void CharClass::accept(Visitor &visitor) const { - visitor.visit(this); - } - } -} diff --git a/src/compiler/rules/char_class.h b/src/compiler/rules/char_class.h deleted file mode 100644 index bf0e266a..00000000 --- a/src/compiler/rules/char_class.h +++ /dev/null @@ -1,25 +0,0 @@ -#ifndef __tree_sitter__char_class__ -#define __tree_sitter__char_class__ - -#include "rule.h" - -namespace tree_sitter { - namespace rules { - typedef enum { - CharClassTypeWord, - CharClassTypeDigit - } CharClassType; - - class CharClass : public Rule { - public: - CharClass(CharClassType type); - bool operator==(const Rule& other) const; - std::string to_string() const; - void accept(Visitor &visitor) const; - - const CharClassType value; - }; - } -} - -#endif diff --git a/src/compiler/rules/character.cpp b/src/compiler/rules/character.cpp new file mode 100644 index 00000000..1eeeee9f --- /dev/null +++ b/src/compiler/rules/character.cpp @@ -0,0 +1,25 @@ +#include "rules.h" +#include "transition_map.h" + +using std::string; + +namespace tree_sitter { + namespace rules { + Character::Character(char value) : value(CharMatchSpecific(value)) {}; + Character::Character(CharClass value) : value(CharMatchClass(value)) {}; + Character::Character(char min, char max) : value(CharMatchRange(min, max)) {}; + + bool Character::operator==(const Rule &rule) const { + const Character *other = dynamic_cast(&rule); + return other && (other->value == value); + } + + string Character::to_string() const { + return CharMatchToString(value); + } + + void Character::accept(Visitor &visitor) const { + visitor.visit(this); + } + } +} diff --git a/src/compiler/rules/char.h b/src/compiler/rules/character.h similarity index 55% rename from src/compiler/rules/char.h rename to src/compiler/rules/character.h index cee5b3c8..26213bfc 100644 --- a/src/compiler/rules/char.h +++ b/src/compiler/rules/character.h @@ -2,17 +2,20 @@ #define __tree_sitter__char__ #include "rule.h" +#include "char_match.h" namespace tree_sitter { namespace rules { - class Char : public Rule { + class Character : public Rule { public: - Char(char value); + Character(char character); + Character(CharClass character_class); + Character(char min_character, char max_character); bool operator==(const Rule& other) const; std::string to_string() const; void accept(Visitor &visitor) const; - const char value; + const CharMatch value; }; } } diff --git a/src/compiler/rules/pattern.cpp b/src/compiler/rules/pattern.cpp index 82f4dcb7..970f7a7e 100644 --- a/src/compiler/rules/pattern.cpp +++ b/src/compiler/rules/pattern.cpp @@ -71,9 +71,9 @@ namespace tree_sitter { case ')': return character(value); case 'w': - return char_class(CharClassTypeWord); + return character(CharClassWord); case 'd': - return char_class(CharClassTypeDigit); + return character(CharClassDigit); default: error("unrecognized escape sequence"); return rule_ptr(); diff --git a/src/compiler/rules/rules.cpp b/src/compiler/rules/rules.cpp index 94c827c5..a1f95433 100644 --- a/src/compiler/rules/rules.cpp +++ b/src/compiler/rules/rules.cpp @@ -11,13 +11,13 @@ namespace tree_sitter { } rule_ptr character(char value) { - return make_shared(value); + return make_shared(value); } - - rule_ptr char_class(CharClassType type) { - return make_shared(type); + + rule_ptr character(CharClass value) { + return make_shared(value); } - + rule_ptr choice(const initializer_list &rules) { rule_ptr result; for (auto rule : rules) diff --git a/src/compiler/rules/rules.h b/src/compiler/rules/rules.h index 87a3956f..46732977 100644 --- a/src/compiler/rules/rules.h +++ b/src/compiler/rules/rules.h @@ -8,8 +8,7 @@ #include "seq.h" #include "string.h" #include "pattern.h" -#include "char.h" -#include "char_class.h" +#include "character.h" #include "repeat.h" #include "visitor.h" @@ -17,7 +16,8 @@ namespace tree_sitter { namespace rules { rule_ptr blank(); rule_ptr character(char value); - rule_ptr char_class(CharClassType value); + rule_ptr character(char min, char max); + rule_ptr character(CharClass value); rule_ptr choice(const std::initializer_list &rules); rule_ptr pattern(const std::string &value); rule_ptr repeat(const rule_ptr content); diff --git a/src/compiler/rules/transitions.cpp b/src/compiler/rules/transitions.cpp index b779772d..ab33b464 100644 --- a/src/compiler/rules/transitions.cpp +++ b/src/compiler/rules/transitions.cpp @@ -11,12 +11,8 @@ namespace tree_sitter { value = transition_map(); } - void visit(const CharClass *rule) { - value = transition_map({{ char_class(rule->value), blank() }}); - } - - void visit(const Char *rule) { - value = transition_map({{ character(rule->value), blank() }}); + void visit(const Character *rule) { + value = transition_map({{ std::make_shared(*rule), blank() }}); } void visit(const Symbol *rule) { diff --git a/src/compiler/rules/visitor.cpp b/src/compiler/rules/visitor.cpp index 65b025b1..357ff76f 100644 --- a/src/compiler/rules/visitor.cpp +++ b/src/compiler/rules/visitor.cpp @@ -4,8 +4,7 @@ namespace tree_sitter { namespace rules { void Visitor::visit(const Blank *rule) {} void Visitor::visit(const Symbol *rule) {} - void Visitor::visit(const Char *rule) {} - void Visitor::visit(const CharClass *rule) {} + void Visitor::visit(const Character *rule) {} void Visitor::visit(const Choice *rule) {} void Visitor::visit(const Repeat *rule) {} void Visitor::visit(const Seq *rule) {} diff --git a/src/compiler/rules/visitor.h b/src/compiler/rules/visitor.h index 679328e6..e11c9451 100644 --- a/src/compiler/rules/visitor.h +++ b/src/compiler/rules/visitor.h @@ -9,8 +9,7 @@ namespace tree_sitter { public: virtual void visit(const Blank *rule); virtual void visit(const Symbol *rule); - virtual void visit(const Char *rule); - virtual void visit(const CharClass *rule); + virtual void visit(const Character *rule); virtual void visit(const Choice *rule); virtual void visit(const Repeat *rule); virtual void visit(const Seq *rule);