diff --git a/TreeSitter.xcodeproj/project.pbxproj b/TreeSitter.xcodeproj/project.pbxproj index c9f56a57..4f752797 100644 --- a/TreeSitter.xcodeproj/project.pbxproj +++ b/TreeSitter.xcodeproj/project.pbxproj @@ -25,6 +25,7 @@ 12D1369D18328C5A005F3369 /* item_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12D1369C18328C5A005F3369 /* item_spec.cpp */; }; 12D136A1183570F5005F3369 /* pattern_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12D136A0183570F5005F3369 /* pattern_spec.cpp */; }; 12D136A4183678A2005F3369 /* repeat.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12D136A2183678A2005F3369 /* repeat.cpp */; }; + 12F8BE8E183C79B2006CCF99 /* char_class.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12F8BE8C183C79B2006CCF99 /* char_class.cpp */; }; 12F9A64E182DD5FD00FAF50C /* spec_helper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12F9A64C182DD5FD00FAF50C /* spec_helper.cpp */; }; 12F9A651182DD6BC00FAF50C /* grammar.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12F9A64F182DD6BC00FAF50C /* grammar.cpp */; }; 27A343CA69E17E0F9EBEDF1C /* pattern.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 27A340F3EEB184C040521323 /* pattern.cpp */; }; @@ -148,6 +149,8 @@ 12D136A3183678A2005F3369 /* repeat.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = repeat.h; sourceTree = ""; }; 12E71794181D02A80051A649 /* specs */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = specs; sourceTree = BUILT_PRODUCTS_DIR; }; 12E71852181D081C0051A649 /* rules.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = rules.h; sourceTree = ""; }; + 12F8BE8C183C79B2006CCF99 /* char_class.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = char_class.cpp; sourceTree = ""; }; + 12F8BE8D183C79B2006CCF99 /* char_class.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = char_class.h; sourceTree = ""; }; 12F9A64C182DD5FD00FAF50C /* spec_helper.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = spec_helper.cpp; path = spec/spec_helper.cpp; sourceTree = SOURCE_ROOT; }; 12F9A64D182DD5FD00FAF50C /* spec_helper.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = spec_helper.h; path = spec/spec_helper.h; sourceTree = SOURCE_ROOT; }; 12F9A64F182DD6BC00FAF50C /* grammar.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = grammar.cpp; sourceTree = ""; }; @@ -188,6 +191,8 @@ 12130616182C3D2900FCF928 /* string.h */, 12130609182C389100FCF928 /* symbol.cpp */, 1213060A182C389100FCF928 /* symbol.h */, + 12F8BE8C183C79B2006CCF99 /* char_class.cpp */, + 12F8BE8D183C79B2006CCF99 /* char_class.h */, ); path = rules; sourceTree = ""; @@ -537,6 +542,7 @@ 1214930F181E200B008E9BDA /* rules_spec.cpp in Sources */, 1213061B182C84DF00FCF928 /* item.cpp in Sources */, 12130617182C3D2900FCF928 /* string.cpp in Sources */, + 12F8BE8E183C79B2006CCF99 /* char_class.cpp in Sources */, 12130611182C3A1100FCF928 /* blank.cpp in Sources */, 12D1369D18328C5A005F3369 /* item_spec.cpp in Sources */, 1213060E182C398300FCF928 /* choice.cpp in Sources */, diff --git a/spec/lr/item_set_spec.cpp b/spec/lr/item_set_spec.cpp index bce8fb8b..aab22014 100644 --- a/spec/lr/item_set_spec.cpp +++ b/spec/lr/item_set_spec.cpp @@ -1,23 +1,56 @@ #include "spec_helper.h" #include "../test_grammars/arithmetic.h" +#include using namespace tree_sitter::lr; +using namespace tree_sitter::rules; -Describe(item_sets) { +static item_set_ptr item_set(const std::initializer_list &items) { + return item_set_ptr(new ItemSet(items)); +} + +Describe_Only(item_sets) { Grammar grammar = test_grammars::arithmetic(); It(computes_the_closure_of_an_item_set_under_symbol_expansion) { - Item item = Item::at_beginning_of_rule("term", grammar); - ItemSet item_set = ItemSet({ item }).closure_in_grammar(grammar); + Item item = Item::at_beginning_of_rule("expression", grammar); + ItemSet set = ItemSet(item, grammar); AssertThat( - item_set, - EqualsContainer(ItemSet({ - Item("term", grammar.rule("term"), 0), - Item("factor", grammar.rule("factor"), 0), - Item("variable", grammar.rule("variable"), 0), - Item("number", grammar.rule("number"), 0), - Item("left_paren", grammar.rule("left_paren"), 0), - }))); + set, + EqualsContainer(ItemSet({ + Item("expression", grammar.rule("expression"), 0), + Item("term", grammar.rule("term"), 0), + Item("factor", grammar.rule("factor"), 0), + Item("variable", grammar.rule("variable"), 0), + Item("number", grammar.rule("number"), 0), + Item("left_paren", grammar.rule("left_paren"), 0), + }))); + } + + It(computes_transitions) { + Item item = Item::at_beginning_of_rule("factor", grammar); + ItemSet set = ItemSet(item, grammar); + + AssertThat( + set.sym_transitions(grammar), + EqualsContainer(TransitionMap({ + { sym("variable"), item_set({ Item("factor", blank(), 1) }) }, + { sym("number"), item_set({ Item("factor", blank(), 1) }) }, + { sym("left_paren"), std::make_shared(Item("factor", seq({ sym("expression"), sym("right_paren") }), 1), grammar) }, + }), TransitionMap::elements_equal)); + } + + It(computes_character_transitions) { + Item item = Item::at_beginning_of_rule("factor", grammar); + ItemSet set = ItemSet(item, grammar); + + AssertThat( + set.char_transitions(grammar), + EqualsContainer(TransitionMap({ + { char_class(CharClassTypeWord), item_set({ Item("variable", choice({ repeat(char_class(CharClassTypeWord)), blank() }), 1) }) }, + { char_class(CharClassTypeDigit), item_set({ Item("number", choice({ repeat(char_class(CharClassTypeDigit)), blank() }), 1) }) }, + { character('('), item_set({ Item("left_paren", blank(), 1) }) } + }), TransitionMap::elements_equal)); } }; diff --git a/spec/rules/pattern_spec.cpp b/spec/rules/pattern_spec.cpp index 427cc6e2..05a73fce 100644 --- a/spec/rules/pattern_spec.cpp +++ b/spec/rules/pattern_spec.cpp @@ -15,7 +15,18 @@ Describe(pattern_rules) { character('c') })->to_string())); }; - + + It(parses_character_classes) { + pattern_ptr rule = pattern("\\w-\\d"); + AssertThat( + rule->to_rule_tree()->to_string(), + Equals(seq({ + char_class(CharClassTypeWord), + character('-'), + char_class(CharClassTypeDigit) + })->to_string())); + }; + It(parses_choices) { pattern_ptr rule = pattern("ab|cd|ef"); AssertThat( diff --git a/spec/rules/rules_spec.cpp b/spec/rules/rules_spec.cpp index 021c8a47..e6147cd8 100644 --- a/spec/rules/rules_spec.cpp +++ b/spec/rules/rules_spec.cpp @@ -42,6 +42,15 @@ Describe(Rules) { }))); } + It(handles_character_classes) { + auto rule = rules::char_class(rules::CharClassTypeDigit); + AssertThat( + rule->transitions(), + EqualsTransitionMap(TransitionMap({ + { rule, rules::blank() } + }))); + } + It(handles_choices) { AssertThat( rules::choice({ symbol1, symbol2 })->transitions(), @@ -117,6 +126,18 @@ Describe(Rules) { }) }) }}))); + + repeat = rules::repeat(rules::str("a")); + AssertThat( + repeat->transitions(), + EqualsTransitionMap(TransitionMap({ + { + rules::character('a'), + rules::choice({ + repeat, + rules::blank() + }) + }}))); } }; }; diff --git a/spec/spec_helper.cpp b/spec/spec_helper.cpp index 89ed6281..0624777b 100644 --- a/spec/spec_helper.cpp +++ b/spec/spec_helper.cpp @@ -1,6 +1,6 @@ #include "spec_helper.h" -EqualsContainerConstraint EqualsTransitionMap(const rule_tmap &expected) { - return EqualsContainer(expected, rule_tmap::elements_equal); +EqualsContainerConstraint, rule_tmap_comparator> EqualsTransitionMap(const TransitionMap &expected) { + return EqualsContainer(expected, TransitionMap::elements_equal); } diff --git a/spec/spec_helper.h b/spec/spec_helper.h index 9556f172..e15f544a 100644 --- a/spec/spec_helper.h +++ b/spec/spec_helper.h @@ -14,7 +14,6 @@ using namespace std; using namespace igloo; // Assertion helpers for transition maps -typedef TransitionMap rule_tmap; typedef bool (* rule_tmap_comparator)(const std::pair &, const std::pair &); EqualsContainerConstraint, rule_tmap_comparator> EqualsTransitionMap(const TransitionMap &expected); diff --git a/src/grammar.cpp b/src/grammar.cpp index 6e4440e9..40c4ba3d 100644 --- a/src/grammar.cpp +++ b/src/grammar.cpp @@ -5,7 +5,7 @@ namespace tree_sitter { rules(rules), start_rule_name(rules.begin()->first) {} - const rules::rule_ptr Grammar::rule(const std::string &name) { + const rules::rule_ptr Grammar::rule(const std::string &name) const { auto iter = rules.find(name); return (iter == rules.end()) ? rules::rule_ptr(nullptr) : diff --git a/src/grammar.h b/src/grammar.h index b9ffbe95..db2bda8b 100644 --- a/src/grammar.h +++ b/src/grammar.h @@ -11,7 +11,7 @@ namespace tree_sitter { public: Grammar(const rule_map_init_list &rules); - const rules::rule_ptr rule(const std::string &); + const rules::rule_ptr rule(const std::string &) const; const std::string start_rule_name; private: diff --git a/src/lr/item.cpp b/src/lr/item.cpp index 997221bb..208ee6dc 100644 --- a/src/lr/item.cpp +++ b/src/lr/item.cpp @@ -10,13 +10,13 @@ namespace tree_sitter { rule(rule), consumed_sym_count(consumed_sym_count) {}; - Item Item::at_beginning_of_rule(const std::string &rule_name, Grammar &grammar) { + Item Item::at_beginning_of_rule(const std::string &rule_name, const Grammar &grammar) { return Item(rule_name, grammar.rule(rule_name), 0); } TransitionMap Item::transitions() const { return rule->transitions().map([&](rules::rule_ptr to_rule) { - return item_ptr(new Item(rule_name, to_rule, consumed_sym_count + 1)); + return std::make_shared(rule_name, to_rule, consumed_sym_count + 1); }); }; @@ -24,16 +24,15 @@ namespace tree_sitter { vector result; for (auto pair : rule->transitions()) { shared_ptr sym = dynamic_pointer_cast(pair.first); - if (sym != nullptr) result.push_back(sym); + if (sym) result.push_back(sym); } return result; } bool Item::operator==(const Item &other) const { - return ( - other.rule_name == rule_name && - other.rule == rule && - other.consumed_sym_count == consumed_sym_count); + bool rule_names_eq = other.rule_name == rule_name; + bool rules_eq = (*other.rule == *rule); + return rule_names_eq && rules_eq; } std::ostream& operator<<(ostream &stream, const Item &item) { diff --git a/src/lr/item.h b/src/lr/item.h index 601bf57b..7563191c 100644 --- a/src/lr/item.h +++ b/src/lr/item.h @@ -12,7 +12,7 @@ namespace tree_sitter { class Item { public: Item(const std::string &rule_name, const rules::rule_ptr rule, int consumed_sym_count); - static Item at_beginning_of_rule(const std::string &rule_name, Grammar &grammar); + static Item at_beginning_of_rule(const std::string &rule_name, const Grammar &grammar); TransitionMap transitions() const; std::vector next_symbols() const; diff --git a/src/lr/item_set.cpp b/src/lr/item_set.cpp index f5f806d1..c40e530f 100644 --- a/src/lr/item_set.cpp +++ b/src/lr/item_set.cpp @@ -8,16 +8,12 @@ namespace tree_sitter { namespace lr { ItemSet::ItemSet(const vector &items) : contents(items) {} ItemSet::ItemSet(const initializer_list &items) : contents(items) {} - - TransitionMap ItemSet::transitions() const { - return TransitionMap(); - } - - bool vector_contains(vector items, lr::Item item) { + + static bool vector_contains(vector items, lr::Item item) { return (std::find(items.begin(), items.end(), item) != items.end()); } - void add_item(vector &vector, const Item &item, Grammar &grammar) { + static void add_item(vector &vector, const Item &item, const Grammar &grammar) { if (!vector_contains(vector, item)) { vector.push_back(item); for (rules::sym_ptr rule : item.next_symbols()) { @@ -26,12 +22,51 @@ namespace tree_sitter { } } } + + static vector closure_in_grammar(const Item &item, const Grammar &grammar) { + vector result; + add_item(result, item, grammar); + return result; + } + + ItemSet::ItemSet(const Item &item, const Grammar &grammar) : contents(closure_in_grammar(item, grammar)) {} + + TransitionMap ItemSet::char_transitions(const Grammar &grammar) const { + auto result = TransitionMap(); + for (auto item : *this) { + auto new_set = item.transitions() + .where([&](const rules::rule_ptr &on_rule) -> bool { + return typeid(*on_rule) != typeid(rules::Symbol); + }) + .map([&](const item_ptr &item) -> item_set_ptr { + return std::make_shared(*item, grammar); + }); + result.merge(new_set, [&](const item_set_ptr left, const item_set_ptr right) -> item_set_ptr { + return left; + }); + } + return result; + } - ItemSet ItemSet::closure_in_grammar(Grammar &grammar) const { - vector items; - for (Item item : *this) - add_item(items, item, grammar); - return ItemSet(items); + TransitionMap ItemSet::sym_transitions(const Grammar &grammar) const { + auto result = TransitionMap(); + for (auto item : *this) { + auto new_set = item.transitions() + .where([&](const rules::rule_ptr &on_rule) -> bool { + return typeid(*on_rule) == typeid(rules::Symbol); + }) + .map([&](const item_ptr &item) -> item_set_ptr { + return std::make_shared(*item, grammar); + }); + result.merge(new_set, [&](const item_set_ptr left, const item_set_ptr right) -> item_set_ptr { + return left; + }); + } + return result; + } + + bool ItemSet::operator==(const tree_sitter::lr::ItemSet &other) const { + return contents == other.contents; } #pragma mark - container diff --git a/src/lr/item_set.h b/src/lr/item_set.h index 2151a957..89914669 100644 --- a/src/lr/item_set.h +++ b/src/lr/item_set.h @@ -6,10 +6,14 @@ namespace tree_sitter { namespace lr { + class ItemSet; + typedef std::shared_ptr item_set_ptr; + class ItemSet { public: ItemSet(const std::vector &items); ItemSet(const std::initializer_list &items); + ItemSet(const Item &item, const Grammar &grammar); typedef Item value_type; typedef std::vector::const_iterator const_iterator; @@ -17,12 +21,14 @@ namespace tree_sitter { const_iterator end() const; size_t size() const; - ItemSet closure_in_grammar(Grammar &grammar) const; - TransitionMap transitions() const; + TransitionMap sym_transitions(const Grammar &grammar) const; + TransitionMap char_transitions(const Grammar &grammar) const; + bool operator==(const ItemSet &other) const; const std::vector contents; }; + typedef std::shared_ptr item_set_ptr; std::ostream& operator<<(std::ostream &stream, const ItemSet &item_set); } } diff --git a/src/rules.h b/src/rules.h index 604ae2cf..4f923682 100644 --- a/src/rules.h +++ b/src/rules.h @@ -9,6 +9,7 @@ #include "string.h" #include "pattern.h" #include "char.h" +#include "char_class.h" #include "repeat.h" #endif diff --git a/src/rules/blank.cpp b/src/rules/blank.cpp index 72ecd157..b306aa21 100644 --- a/src/rules/blank.cpp +++ b/src/rules/blank.cpp @@ -14,7 +14,7 @@ namespace tree_sitter { } bool Blank::operator==(const Rule &rule) const { - return dynamic_cast(&rule) != NULL; + return dynamic_cast(&rule) != nullptr; } std::string Blank::to_string() const { diff --git a/src/rules/char.cpp b/src/rules/char.cpp index 6fba0f24..6c693c90 100644 --- a/src/rules/char.cpp +++ b/src/rules/char.cpp @@ -8,7 +8,7 @@ namespace tree_sitter { namespace rules { Char::Char(char value) : value(value) {}; - char_ptr character(char value) { + rule_ptr character(char value) { return std::make_shared(value); } diff --git a/src/rules/char.h b/src/rules/char.h index 48f3ec52..61a2aed9 100644 --- a/src/rules/char.h +++ b/src/rules/char.h @@ -15,8 +15,7 @@ namespace tree_sitter { const char value; }; - typedef std::shared_ptr char_ptr; - char_ptr character(char value); + rule_ptr character(char value); } } diff --git a/src/rules/char_class.cpp b/src/rules/char_class.cpp new file mode 100644 index 00000000..8b7174ac --- /dev/null +++ b/src/rules/char_class.cpp @@ -0,0 +1,33 @@ +#include "char_class.h" +#include "blank.h" +#include "transition_map.h" + +using namespace std; + +namespace tree_sitter { + namespace rules { + CharClass::CharClass(CharClassType value) : value(value) {}; + + rule_ptr char_class(CharClassType type) { + return std::make_shared(type); + } + + TransitionMap CharClass::transitions() const { + return TransitionMap({{ char_class(value), blank() }}); + } + + bool CharClass::operator==(const Rule &rule) const { + const CharClass *other = dynamic_cast(&rule); + return other && (other->value == value); + } + + string CharClass::to_string() const { + switch (value) { + case CharClassTypeDigit: + return ""; + case CharClassTypeWord: + return ""; + } + } + } +} diff --git a/src/rules/char_class.h b/src/rules/char_class.h new file mode 100644 index 00000000..55200c89 --- /dev/null +++ b/src/rules/char_class.h @@ -0,0 +1,27 @@ +#ifndef __tree_sitter__char_class__ +#define __tree_sitter__char_class__ + +#include "rule.h" + +namespace tree_sitter { + namespace rules { + typedef enum { + CharClassTypeWord, + CharClassTypeDigit + } CharClassType; + + class CharClass : public Rule { + public: + CharClass(CharClassType type); + TransitionMap transitions() const; + bool operator==(const Rule& other) const; + std::string to_string() const; + private: + const CharClassType value; + }; + + rule_ptr char_class(CharClassType value); + } +} + +#endif diff --git a/src/rules/choice.cpp b/src/rules/choice.cpp index 7cf57b07..ae37e0aa 100644 --- a/src/rules/choice.cpp +++ b/src/rules/choice.cpp @@ -6,7 +6,10 @@ namespace tree_sitter { Choice::Choice(rule_ptr left, rule_ptr right) : left(left), right(right) {}; rule_ptr choice(const std::initializer_list &rules) { - return build_binary_rule_tree(rules); + rule_ptr result; + for (auto rule : rules) + result = result.get() ? std::make_shared(result, rule) : rule; + return result; } TransitionMap Choice::transitions() const { diff --git a/src/rules/pattern.cpp b/src/rules/pattern.cpp index c6fad832..6ecc18ba 100644 --- a/src/rules/pattern.cpp +++ b/src/rules/pattern.cpp @@ -56,7 +56,7 @@ namespace tree_sitter { break; case '\\': next(); - result = character(peek()); + result = escaped_char(peek()); next(); break; default: @@ -67,6 +67,21 @@ namespace tree_sitter { return result; } + rule_ptr escaped_char(char value) { + switch (value) { + case '(': + case ')': + return character(value); + case 'w': + return char_class(CharClassTypeWord); + case 'd': + return char_class(CharClassTypeDigit); + default: + error("unrecognized escape sequence"); + return rule_ptr(); + } + } + void next() { position++; } @@ -103,7 +118,8 @@ namespace tree_sitter { } bool Pattern::operator ==(tree_sitter::rules::Rule const &other) const { - return false; + auto pattern = dynamic_cast(&other); + return pattern && (pattern->value == value); } std::string Pattern::to_string() const { diff --git a/src/rules/rule.h b/src/rules/rule.h index 6a5cb1b9..24201870 100644 --- a/src/rules/rule.h +++ b/src/rules/rule.h @@ -21,14 +21,6 @@ namespace tree_sitter { std::ostream& operator<<(std::ostream& stream, const Rule &rule); std::ostream& operator<<(std::ostream& stream, const rule_ptr &rule); - - template - rule_ptr build_binary_rule_tree(const std::initializer_list &rules) { - rule_ptr result; - for (auto rule : rules) - result = result.get() ? std::make_shared(result, rule) : rule; - return result; - } } } diff --git a/src/rules/seq.cpp b/src/rules/seq.cpp index 173d1fc8..0bfa64af 100644 --- a/src/rules/seq.cpp +++ b/src/rules/seq.cpp @@ -7,7 +7,12 @@ namespace tree_sitter { Seq::Seq(rule_ptr left, rule_ptr right) : left(left), right(right) {}; rule_ptr seq(const std::initializer_list &rules) { - return build_binary_rule_tree(rules); + rule_ptr result; + for (auto rule : rules) + result = (result.get() && typeid(*result) != typeid(Blank)) ? + std::make_shared(result, rule) : + rule; + return result; } TransitionMap Seq::transitions() const { diff --git a/src/transition_map.h b/src/transition_map.h index a033a6d8..594a3291 100644 --- a/src/transition_map.h +++ b/src/transition_map.h @@ -60,6 +60,15 @@ namespace tree_sitter { } } + + TransitionMap where(std::function filter_fn) { + TransitionMap result; + for (pair_type pair : *this) + if (filter_fn(pair.first)) + result.add(pair.first, pair.second); + return result; + } + template TransitionMap map(std::function(mapped_ptr)> map_fn) { TransitionMap result;