From 92e73a9e7062774847899475e5ba8b2cad597f0e Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 15 Nov 2013 08:46:45 -0800 Subject: [PATCH] Add repeat rules --- TreeSitter.xcodeproj/project.pbxproj | 8 +++++++- spec/rules/rules_spec.cpp | 19 +++++++++++++++++- spec/spec_helper.h | 4 ++-- src/rules.h | 1 + src/rules/pattern.cpp | 20 +++++++++++++------ src/rules/repeat.cpp | 30 ++++++++++++++++++++++++++++ src/rules/repeat.h | 22 ++++++++++++++++++++ src/transition_map.h | 4 ++-- 8 files changed, 96 insertions(+), 12 deletions(-) create mode 100644 src/rules/repeat.cpp create mode 100644 src/rules/repeat.h diff --git a/TreeSitter.xcodeproj/project.pbxproj b/TreeSitter.xcodeproj/project.pbxproj index ff911297..c9f56a57 100644 --- a/TreeSitter.xcodeproj/project.pbxproj +++ b/TreeSitter.xcodeproj/project.pbxproj @@ -24,6 +24,7 @@ 125120A4183083BD00C9B56A /* arithmetic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 125120A3183083BD00C9B56A /* arithmetic.cpp */; }; 12D1369D18328C5A005F3369 /* item_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12D1369C18328C5A005F3369 /* item_spec.cpp */; }; 12D136A1183570F5005F3369 /* pattern_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12D136A0183570F5005F3369 /* pattern_spec.cpp */; }; + 12D136A4183678A2005F3369 /* repeat.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12D136A2183678A2005F3369 /* repeat.cpp */; }; 12F9A64E182DD5FD00FAF50C /* spec_helper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12F9A64C182DD5FD00FAF50C /* spec_helper.cpp */; }; 12F9A651182DD6BC00FAF50C /* grammar.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12F9A64F182DD6BC00FAF50C /* grammar.cpp */; }; 27A343CA69E17E0F9EBEDF1C /* pattern.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 27A340F3EEB184C040521323 /* pattern.cpp */; }; @@ -143,6 +144,8 @@ 12D1369C18328C5A005F3369 /* item_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = item_spec.cpp; path = spec/lr/item_spec.cpp; sourceTree = SOURCE_ROOT; }; 12D1369E18342088005F3369 /* todo.md */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = todo.md; sourceTree = ""; }; 12D136A0183570F5005F3369 /* pattern_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = pattern_spec.cpp; path = spec/rules/pattern_spec.cpp; sourceTree = SOURCE_ROOT; }; + 12D136A2183678A2005F3369 /* repeat.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = repeat.cpp; sourceTree = ""; }; + 12D136A3183678A2005F3369 /* repeat.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = repeat.h; sourceTree = ""; }; 12E71794181D02A80051A649 /* specs */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = specs; sourceTree = BUILT_PRODUCTS_DIR; }; 12E71852181D081C0051A649 /* rules.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = rules.h; sourceTree = ""; }; 12F9A64C182DD5FD00FAF50C /* spec_helper.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = spec_helper.cpp; path = spec/spec_helper.cpp; sourceTree = SOURCE_ROOT; }; @@ -175,6 +178,9 @@ 1213060D182C398300FCF928 /* choice.h */, 27A340F3EEB184C040521323 /* pattern.cpp */, 27A3438C4FA59A3882E8493B /* pattern.h */, + 12D136A2183678A2005F3369 /* repeat.cpp */, + 12D136A3183678A2005F3369 /* repeat.h */, + 1251209A1830145300C9B56A /* rule.cpp */, 12130607182C374800FCF928 /* rule.h */, 12130612182C3A1700FCF928 /* seq.cpp */, 12130613182C3A1700FCF928 /* seq.h */, @@ -182,7 +188,6 @@ 12130616182C3D2900FCF928 /* string.h */, 12130609182C389100FCF928 /* symbol.cpp */, 1213060A182C389100FCF928 /* symbol.h */, - 1251209A1830145300C9B56A /* rule.cpp */, ); path = rules; sourceTree = ""; @@ -541,6 +546,7 @@ 12F9A651182DD6BC00FAF50C /* grammar.cpp in Sources */, 12512093182F307C00C9B56A /* parse_table_spec.cpp in Sources */, 1213061F182C857100FCF928 /* item_set_spec.cpp in Sources */, + 12D136A4183678A2005F3369 /* repeat.cpp in Sources */, 12130622182C85D300FCF928 /* item_set.cpp in Sources */, 12130605182C348F00FCF928 /* char.cpp in Sources */, 1213060B182C389100FCF928 /* symbol.cpp in Sources */, diff --git a/spec/rules/rules_spec.cpp b/spec/rules/rules_spec.cpp index 7c47e777..021c8a47 100644 --- a/spec/rules/rules_spec.cpp +++ b/spec/rules/rules_spec.cpp @@ -101,5 +101,22 @@ Describe(Rules) { { rules::character('b'), rules::blank() } }))); } -}; + + It(handles_repeats) { + rules::rule_ptr repeat = rules::repeat(rules::str("ab")); + AssertThat( + repeat->transitions(), + EqualsTransitionMap(TransitionMap({ + { + rules::character('a'), + rules::seq({ + rules::character('b'), + rules::choice({ + repeat, + rules::blank() + }) + }) + }}))); + } + }; }; diff --git a/spec/spec_helper.h b/spec/spec_helper.h index fda96d3a..9556f172 100644 --- a/spec/spec_helper.h +++ b/spec/spec_helper.h @@ -15,7 +15,7 @@ using namespace igloo; // Assertion helpers for transition maps typedef TransitionMap rule_tmap; -typedef bool (* rule_tmap_comparator)(const rule_tmap::pair_type &, const rule_tmap::pair_type &); -EqualsContainerConstraint EqualsTransitionMap(const rule_tmap &expected); +typedef bool (* rule_tmap_comparator)(const std::pair &, const std::pair &); +EqualsContainerConstraint, rule_tmap_comparator> EqualsTransitionMap(const TransitionMap &expected); #endif diff --git a/src/rules.h b/src/rules.h index f6bc83e6..604ae2cf 100644 --- a/src/rules.h +++ b/src/rules.h @@ -9,5 +9,6 @@ #include "string.h" #include "pattern.h" #include "char.h" +#include "repeat.h" #endif diff --git a/src/rules/pattern.cpp b/src/rules/pattern.cpp index 9e26db1a..3f345988 100644 --- a/src/rules/pattern.cpp +++ b/src/rules/pattern.cpp @@ -41,21 +41,25 @@ namespace tree_sitter { case '(': next(); result = rule(); - if (peek() == ')') { + if (peek() != ')') + error("mismatched parens"); + else next(); - return result; - } else { - throw std::string("Invalid regex pattern: ") + input; - } + break; + case ')': + error("mismatched parens"); break; case '\\': next(); + result = character(peek()); + next(); + break; default: result = character(peek()); next(); - return result; break; } + return result; } void next() { @@ -70,6 +74,10 @@ namespace tree_sitter { return position < length; } + void error(const char *message) { + throw std::string("Invalid regex pattern '") + input + "': " + message; + } + const std::string input; const size_t length; int position; diff --git a/src/rules/repeat.cpp b/src/rules/repeat.cpp new file mode 100644 index 00000000..f4e61275 --- /dev/null +++ b/src/rules/repeat.cpp @@ -0,0 +1,30 @@ +#include "blank.h" +#include "seq.h" +#include "choice.h" +#include "repeat.h" +#include "transition_map.h" + +namespace tree_sitter { + namespace rules { + Repeat::Repeat(const rule_ptr content) : content(content) {} + + rule_ptr repeat(const rule_ptr content) { + return std::make_shared(content); + } + + TransitionMap Repeat::transitions() const { + return content->transitions().map([&](const rule_ptr &value) -> rule_ptr { + return seq({ value, choice({ repeat(content), blank() }) }); + }); + } + + bool Repeat::operator==(const Rule &rule) const { + const Repeat *other = dynamic_cast(&rule); + return other && (*other->content == *content); + } + + std::string Repeat::to_string() const { + return std::string("(repeat ") + content->to_string() + ")"; + } + } +} diff --git a/src/rules/repeat.h b/src/rules/repeat.h new file mode 100644 index 00000000..9557464e --- /dev/null +++ b/src/rules/repeat.h @@ -0,0 +1,22 @@ +#ifndef __tree_sitter__repeat__ +#define __tree_sitter__repeat__ + +#include "rule.h" + +namespace tree_sitter { + namespace rules { + class Repeat : public Rule { + public: + Repeat(rule_ptr content); + TransitionMap transitions() const; + bool operator==(const Rule& other) const; + std::string to_string() const; + private: + const rule_ptr content; + }; + + rule_ptr repeat(const rule_ptr content); + } +} + +#endif diff --git a/src/transition_map.h b/src/transition_map.h index 516ed585..a033a6d8 100644 --- a/src/transition_map.h +++ b/src/transition_map.h @@ -9,18 +9,18 @@ namespace tree_sitter { template class TransitionMap { - public: typedef std::shared_ptr rule_ptr; typedef std::shared_ptr mapped_ptr; typedef std::pair pair_type; typedef std::vector contents_type; + public: + static bool elements_equal(const pair_type &left, const pair_type &right) { return (*left.first == *right.first) && (*left.second == *right.second); } TransitionMap() : contents(contents_type()) {}; - TransitionMap(std::initializer_list> pairs) : contents(pairs) {}; typedef typename contents_type::const_iterator const_iterator;