From 040ec86000a1810c2424cf3cd5259ea21717fcd9 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 14 Nov 2013 12:55:02 -0800 Subject: [PATCH] Move shared rule pointer factories into individual rule files --- TreeSitter.xcodeproj/project.pbxproj | 6 ++-- src/rules.cpp | 41 ---------------------------- src/rules.h | 14 ---------- src/rules/Pattern.cpp | 5 +++- src/rules/Pattern.h | 4 ++- src/rules/blank.cpp | 4 +++ src/rules/blank.h | 3 ++ src/rules/char.cpp | 6 +++- src/rules/char.h | 3 ++ src/rules/choice.cpp | 6 +++- src/rules/choice.h | 2 ++ src/rules/rule.h | 9 ++++++ src/rules/seq.cpp | 6 +++- src/rules/seq.h | 2 ++ src/rules/string.cpp | 15 +++++----- src/rules/string.h | 4 ++- src/rules/symbol.cpp | 7 +++-- src/rules/symbol.h | 5 ++-- todo.md | 36 ++++++++++++++++++++++++ 19 files changed, 102 insertions(+), 76 deletions(-) delete mode 100644 src/rules.cpp create mode 100644 todo.md diff --git a/TreeSitter.xcodeproj/project.pbxproj b/TreeSitter.xcodeproj/project.pbxproj index 13dd410e..ae91c5a9 100644 --- a/TreeSitter.xcodeproj/project.pbxproj +++ b/TreeSitter.xcodeproj/project.pbxproj @@ -20,7 +20,6 @@ 1214930F181E200B008E9BDA /* rules_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 121492EA181E200B008E9BDA /* rules_spec.cpp */; }; 12512093182F307C00C9B56A /* parse_table_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12512092182F307C00C9B56A /* parse_table_spec.cpp */; }; 1251209B1830145300C9B56A /* rule.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1251209A1830145300C9B56A /* rule.cpp */; }; - 1251209D18303CFB00C9B56A /* rules.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1251209C18303CFB00C9B56A /* rules.cpp */; }; 125120A018307DEC00C9B56A /* parse_table.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1251209E18307DEC00C9B56A /* parse_table.cpp */; }; 125120A4183083BD00C9B56A /* arithmetic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 125120A3183083BD00C9B56A /* arithmetic.cpp */; }; 12D1369D18328C5A005F3369 /* item_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12D1369C18328C5A005F3369 /* item_spec.cpp */; }; @@ -135,13 +134,13 @@ 121492EA181E200B008E9BDA /* rules_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = rules_spec.cpp; path = spec/rules_spec.cpp; sourceTree = SOURCE_ROOT; }; 12512092182F307C00C9B56A /* parse_table_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = parse_table_spec.cpp; path = spec/lr/parse_table_spec.cpp; sourceTree = SOURCE_ROOT; }; 1251209A1830145300C9B56A /* rule.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = rule.cpp; sourceTree = ""; }; - 1251209C18303CFB00C9B56A /* rules.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = rules.cpp; sourceTree = ""; }; 1251209E18307DEC00C9B56A /* parse_table.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = parse_table.cpp; sourceTree = ""; }; 1251209F18307DEC00C9B56A /* parse_table.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = parse_table.h; sourceTree = ""; }; 125120A218307FFD00C9B56A /* arithmetic.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = arithmetic.h; path = spec/test_grammars/arithmetic.h; sourceTree = SOURCE_ROOT; }; 125120A3183083BD00C9B56A /* arithmetic.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = arithmetic.cpp; path = spec/test_grammars/arithmetic.cpp; sourceTree = SOURCE_ROOT; }; 12C344421822F27700B07BE3 /* transition_map.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = transition_map.h; sourceTree = ""; }; 12D1369C18328C5A005F3369 /* item_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = item_spec.cpp; path = spec/lr/item_spec.cpp; sourceTree = SOURCE_ROOT; }; + 12D1369E18342088005F3369 /* todo.md */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = todo.md; sourceTree = ""; }; 12E71794181D02A80051A649 /* specs */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = specs; sourceTree = BUILT_PRODUCTS_DIR; }; 12E71852181D081C0051A649 /* rules.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = rules.h; sourceTree = ""; }; 12F9A64C182DD5FD00FAF50C /* spec_helper.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = spec_helper.cpp; path = spec/spec_helper.cpp; sourceTree = SOURCE_ROOT; }; @@ -422,6 +421,7 @@ 12E716F9181D010E0051A649 = { isa = PBXGroup; children = ( + 12D1369E18342088005F3369 /* todo.md */, 12E71701181D01890051A649 /* src */, 12E71796181D02A80051A649 /* spec */, 12E71795181D02A80051A649 /* Products */, @@ -435,7 +435,6 @@ 12F9A650182DD6BC00FAF50C /* grammar.h */, 12130618182C84B700FCF928 /* lr */, 12130602182C344400FCF928 /* rules */, - 1251209C18303CFB00C9B56A /* rules.cpp */, 12E71852181D081C0051A649 /* rules.h */, 12C344421822F27700B07BE3 /* transition_map.h */, ); @@ -519,7 +518,6 @@ 125120A4183083BD00C9B56A /* arithmetic.cpp in Sources */, 1214930F181E200B008E9BDA /* rules_spec.cpp in Sources */, 1213061B182C84DF00FCF928 /* item.cpp in Sources */, - 1251209D18303CFB00C9B56A /* rules.cpp in Sources */, 12130617182C3D2900FCF928 /* string.cpp in Sources */, 12130611182C3A1100FCF928 /* blank.cpp in Sources */, 12D1369D18328C5A005F3369 /* item_spec.cpp in Sources */, diff --git a/src/rules.cpp b/src/rules.cpp deleted file mode 100644 index 14e9749c..00000000 --- a/src/rules.cpp +++ /dev/null @@ -1,41 +0,0 @@ -#include "rules.h" - -namespace tree_sitter { - namespace rules { - rule_ptr blank() { - return rule_ptr(new Blank()); - } - - rule_ptr sym(const std::string &name) { - return rule_ptr(new Symbol(name)); - } - - rule_ptr character(char value) { - return rule_ptr(new Char(value)); - } - - rule_ptr str(const std::string &value) { - return rule_ptr(new String(value)); - } - - rule_ptr pattern(const std::string &value) { - return rule_ptr(new Pattern(value)); - } - - template - rule_ptr build_binary_tree(const std::initializer_list &rules) { - rule_ptr result(nullptr); - for (auto it = rules.end() - 1; it >= rules.begin(); --it) - result = result.get() ? rule_ptr(new RuleClass(*it, result)) : *it; - return result; - } - - rule_ptr seq(const std::initializer_list &rules) { - return build_binary_tree(rules); - } - - rule_ptr choice(const std::initializer_list &rules) { - return build_binary_tree(rules); - } - } -} \ No newline at end of file diff --git a/src/rules.h b/src/rules.h index 99d69d78..f6bc83e6 100644 --- a/src/rules.h +++ b/src/rules.h @@ -10,18 +10,4 @@ #include "pattern.h" #include "char.h" -namespace tree_sitter { - namespace rules { - rule_ptr blank(); - rule_ptr sym(const std::string &name); - rule_ptr character(char value); - rule_ptr str(const std::string &value); - rule_ptr pattern(const std::string &value); - rule_ptr seq(const std::initializer_list &rules); - rule_ptr choice(const std::initializer_list &rules); - - typedef std::shared_ptr sym_ptr; - } -} - #endif diff --git a/src/rules/Pattern.cpp b/src/rules/Pattern.cpp index cd1e52b7..3bdd97f5 100644 --- a/src/rules/Pattern.cpp +++ b/src/rules/Pattern.cpp @@ -4,7 +4,10 @@ namespace tree_sitter { namespace rules { Pattern::Pattern(const std::string &string) : value(string) {}; - Pattern::Pattern(const char *string) : value(string) {}; + + pattern_ptr pattern(const std::string &value) { + return std::make_shared(value); + } TransitionMap Pattern::transitions() const { return tree_sitter::TransitionMap(); diff --git a/src/rules/Pattern.h b/src/rules/Pattern.h index 81a33780..d92254ca 100644 --- a/src/rules/Pattern.h +++ b/src/rules/Pattern.h @@ -7,7 +7,6 @@ namespace tree_sitter { namespace rules { class Pattern : public Rule { public: - Pattern(const char *string); Pattern(const std::string &string); TransitionMap transitions() const; bool operator==(const Rule& other) const; @@ -15,6 +14,9 @@ namespace tree_sitter { private: const std::string value; }; + + typedef std::shared_ptr pattern_ptr; + pattern_ptr pattern(const std::string &value); } } diff --git a/src/rules/blank.cpp b/src/rules/blank.cpp index 6ba3dcfd..72ecd157 100644 --- a/src/rules/blank.cpp +++ b/src/rules/blank.cpp @@ -5,6 +5,10 @@ namespace tree_sitter { namespace rules { Blank::Blank() {} + blank_ptr blank() { + return std::make_shared(); + } + TransitionMap Blank::transitions() const { return TransitionMap(); } diff --git a/src/rules/blank.h b/src/rules/blank.h index d1aad48f..99f1a577 100644 --- a/src/rules/blank.h +++ b/src/rules/blank.h @@ -12,6 +12,9 @@ namespace tree_sitter { bool operator==(const Rule& other) const; std::string to_string() const; }; + + typedef std::shared_ptr blank_ptr; + blank_ptr blank(); } } diff --git a/src/rules/char.cpp b/src/rules/char.cpp index a7e87870..c8d86f3c 100644 --- a/src/rules/char.cpp +++ b/src/rules/char.cpp @@ -7,9 +7,13 @@ using namespace std; namespace tree_sitter { namespace rules { Char::Char(char value) : value(value) {}; + + char_ptr character(char value) { + return std::make_shared(value); + } TransitionMap Char::transitions() const { - return TransitionMap({ rule_ptr(new Char(value)) }, { rule_ptr(new Blank()) }); + return TransitionMap({ character(value) }, { blank() }); } bool Char::operator==(const Rule &rule) const { diff --git a/src/rules/char.h b/src/rules/char.h index efb0e8e3..48f3ec52 100644 --- a/src/rules/char.h +++ b/src/rules/char.h @@ -14,6 +14,9 @@ namespace tree_sitter { private: const char value; }; + + typedef std::shared_ptr char_ptr; + char_ptr character(char value); } } diff --git a/src/rules/choice.cpp b/src/rules/choice.cpp index 4af430b6..91fda915 100644 --- a/src/rules/choice.cpp +++ b/src/rules/choice.cpp @@ -5,10 +5,14 @@ namespace tree_sitter { namespace rules { Choice::Choice(rule_ptr left, rule_ptr right) : left(left), right(right) {}; + rule_ptr choice(const std::initializer_list &rules) { + return build_binary_rule_tree(rules); + } + TransitionMap Choice::transitions() const { auto result = left->transitions(); result.merge(right->transitions(), [&](rule_ptr left, rule_ptr right) -> rule_ptr { - return rule_ptr(new Choice(left, right)); + return choice({ left, right }); }); return result; } diff --git a/src/rules/choice.h b/src/rules/choice.h index ca121c93..f612cdfc 100644 --- a/src/rules/choice.h +++ b/src/rules/choice.h @@ -15,6 +15,8 @@ namespace tree_sitter { const rule_ptr left; const rule_ptr right; }; + + rule_ptr choice(const std::initializer_list &rules); } } diff --git a/src/rules/rule.h b/src/rules/rule.h index 5a4f4e99..2628e6d1 100644 --- a/src/rules/rule.h +++ b/src/rules/rule.h @@ -15,7 +15,16 @@ namespace tree_sitter { }; typedef std::shared_ptr rule_ptr; + std::ostream& operator<<(std::ostream& stream, const Rule &rule); + + template + rule_ptr build_binary_rule_tree(const std::initializer_list &rules) { + rule_ptr result(nullptr); + for (auto it = rules.end() - 1; it >= rules.begin(); --it) + result = result.get() ? std::make_shared(*it, result) : *it; + return result; + } } } diff --git a/src/rules/seq.cpp b/src/rules/seq.cpp index 2285c3d4..70a8fa4a 100644 --- a/src/rules/seq.cpp +++ b/src/rules/seq.cpp @@ -6,12 +6,16 @@ namespace tree_sitter { namespace rules { Seq::Seq(rule_ptr left, rule_ptr right) : left(left), right(right) {}; + rule_ptr seq(const std::initializer_list &rules) { + return build_binary_rule_tree(rules); + } + TransitionMap Seq::transitions() const { return left->transitions().map([&](rule_ptr left_rule) -> rule_ptr { if (typeid(*left_rule) == typeid(Blank)) return right; else - return rule_ptr(new Seq(left_rule, right)); + return seq({ left_rule, right }); }); } diff --git a/src/rules/seq.h b/src/rules/seq.h index bb7f8da5..bb28eaf9 100644 --- a/src/rules/seq.h +++ b/src/rules/seq.h @@ -15,6 +15,8 @@ namespace tree_sitter { const rule_ptr left; const rule_ptr right; }; + + rule_ptr seq(const std::initializer_list &rules); } } diff --git a/src/rules/string.cpp b/src/rules/string.cpp index 32d5cdba..aae1f9c9 100644 --- a/src/rules/string.cpp +++ b/src/rules/string.cpp @@ -7,10 +7,14 @@ namespace tree_sitter { namespace rules { String::String(std::string value) : value(value) {}; + string_ptr str(const std::string &value) { + return std::make_shared(value); + } + TransitionMap String::transitions() const { - auto result = rule_ptr(new Char(value[0])); + rule_ptr result = character(value[0]); for (int i = 1; i < value.length(); i++) - result = rule_ptr(new Seq(result, rule_ptr(new Char(value[i])))); + result = seq({ result, character(value[i]) }); return result->transitions(); } @@ -18,13 +22,10 @@ namespace tree_sitter { const String *other = dynamic_cast(&rule); return (other != NULL) && (other->value == value); } - - String * String::copy() const { - return new String(value); - } - + std::string String::to_string() const { return std::string("(string '") + value + "')"; } + } } \ No newline at end of file diff --git a/src/rules/string.h b/src/rules/string.h index e32fe616..0a18fda9 100644 --- a/src/rules/string.h +++ b/src/rules/string.h @@ -9,12 +9,14 @@ namespace tree_sitter { public: String(std::string value); TransitionMap transitions() const; - String * copy() const; bool operator==(const Rule& other) const; std::string to_string() const; private: const std::string value; }; + + typedef std::shared_ptr string_ptr; + string_ptr str(const std::string &value); } } diff --git a/src/rules/symbol.cpp b/src/rules/symbol.cpp index de341e24..f9e59046 100644 --- a/src/rules/symbol.cpp +++ b/src/rules/symbol.cpp @@ -5,10 +5,13 @@ namespace tree_sitter { namespace rules { Symbol::Symbol(const std::string &name) : name(name) {}; - Symbol::Symbol(const char *name) : name(name) {}; + + sym_ptr sym(const std::string &name) { + return std::make_shared(name); + } TransitionMap Symbol::transitions() const { - return TransitionMap({ rule_ptr(new Symbol(name)) }, { rule_ptr(new Blank()) }); + return TransitionMap({ sym(name) }, { blank() }); } bool Symbol::operator==(const Rule &rule) const { diff --git a/src/rules/symbol.h b/src/rules/symbol.h index e597d2a1..bb3b429e 100644 --- a/src/rules/symbol.h +++ b/src/rules/symbol.h @@ -8,13 +8,14 @@ namespace tree_sitter { class Symbol : public Rule { public: Symbol(const std::string &name); - Symbol(const char *name); TransitionMap transitions() const; - Symbol * copy() const; bool operator==(const Rule& other) const; std::string to_string() const; const std::string name; }; + + typedef std::shared_ptr sym_ptr; + sym_ptr sym(const std::string &name); } } diff --git a/todo.md b/todo.md new file mode 100644 index 00000000..5fdd224c --- /dev/null +++ b/todo.md @@ -0,0 +1,36 @@ +TODO +==== + +# complete the list of rule types + +- add repeat rules +- parse regex rules into trees of choices, sequences, repeats + +# generate lexers for sets of terminal rules (can be mix of throwaway and meaningful) + +Introduce ParseTable type which contains a vector of ParseStates. A ParseState contains a +TransitionMap of ParseActions. For a lexer, a ParseAction can be one of: + - Accept(symbol) + - Advance(state index) + +Then generate a C function for a ParseTable + +# generate parsers from sets of non-termina rules + +For a Parser, the ParseActions can be any of: + - Accept(symbol) + - Shift(symbol) + - Reduce(symbol, number of child symbols) + +# normalize grammars + +- add concept of throwaway-terminals (tokens that won't appear in constructed AST) +- classify rules as non-terminals or terminals +- extract strings and regexes from non-terminal rules into their own throwaway-terminals, + in order to separate lexing from parsing + +After this, a grammar will have these fields: +- non-terminal rules +- terminal rules +- throwaway terminal rules +