From 30315a78d2b7e17dfa6e1e2cb0db9ea5209819dc Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Sat, 28 Dec 2013 23:26:20 -0800 Subject: [PATCH] Put rule transitions in LR namespace --- TreeSitter.xcodeproj/project.pbxproj | 24 +++- spec/compiler/lr/item_spec.cpp | 19 ++- spec/compiler/lr/transitions_spec.cpp | 127 +++++++++++++++++++++ spec/compiler/rules/rules_spec.cpp | 119 ------------------- src/compiler/{ => grammar}/grammar.cpp | 5 +- src/compiler/{ => grammar}/grammar.h | 5 +- src/compiler/lr/item.cpp | 4 +- src/compiler/{rules => lr}/transitions.cpp | 8 +- src/compiler/{rules => lr}/transitions.h | 4 +- src/compiler/rules/rule.cpp | 6 +- 10 files changed, 177 insertions(+), 144 deletions(-) create mode 100644 spec/compiler/lr/transitions_spec.cpp rename src/compiler/{ => grammar}/grammar.cpp (83%) rename src/compiler/{ => grammar}/grammar.h (83%) rename src/compiler/{rules => lr}/transitions.cpp (90%) rename src/compiler/{rules => lr}/transitions.h (59%) diff --git a/TreeSitter.xcodeproj/project.pbxproj b/TreeSitter.xcodeproj/project.pbxproj index c1210840..fb15bffa 100644 --- a/TreeSitter.xcodeproj/project.pbxproj +++ b/TreeSitter.xcodeproj/project.pbxproj @@ -21,6 +21,7 @@ 125120A4183083BD00C9B56A /* arithmetic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 125120A3183083BD00C9B56A /* arithmetic.cpp */; }; 129D242C183EB1EB00FE9F71 /* table_builder.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 129D242A183EB1EB00FE9F71 /* table_builder.cpp */; }; 12D136A4183678A2005F3369 /* repeat.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12D136A2183678A2005F3369 /* repeat.cpp */; }; + 12ED72A7186FC8220089229B /* transitions_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12ED72A6186FC8220089229B /* transitions_spec.cpp */; }; 12F9A64E182DD5FD00FAF50C /* spec_helper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12F9A64C182DD5FD00FAF50C /* spec_helper.cpp */; }; 12F9A651182DD6BC00FAF50C /* grammar.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12F9A64F182DD6BC00FAF50C /* grammar.cpp */; }; 12FD4061185E68470041A84E /* c_code.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12FD405F185E68470041A84E /* c_code.cpp */; }; @@ -122,10 +123,11 @@ 12D136A3183678A2005F3369 /* repeat.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = repeat.h; sourceTree = ""; }; 12E71794181D02A80051A649 /* compiler_specs */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = compiler_specs; sourceTree = BUILT_PRODUCTS_DIR; }; 12E71852181D081C0051A649 /* rules.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = rules.h; path = src/compiler/rules/rules.h; sourceTree = SOURCE_ROOT; }; + 12ED72A6186FC8220089229B /* transitions_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = transitions_spec.cpp; path = spec/compiler/lr/transitions_spec.cpp; sourceTree = SOURCE_ROOT; }; 12F9A64C182DD5FD00FAF50C /* spec_helper.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = spec_helper.cpp; path = spec/compiler/spec_helper.cpp; sourceTree = SOURCE_ROOT; }; 12F9A64D182DD5FD00FAF50C /* spec_helper.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = spec_helper.h; path = spec/compiler/spec_helper.h; sourceTree = SOURCE_ROOT; }; - 12F9A64F182DD6BC00FAF50C /* grammar.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = grammar.cpp; sourceTree = ""; }; - 12F9A650182DD6BC00FAF50C /* grammar.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = grammar.h; sourceTree = ""; }; + 12F9A64F182DD6BC00FAF50C /* grammar.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = grammar.cpp; path = src/compiler/grammar/grammar.cpp; sourceTree = SOURCE_ROOT; }; + 12F9A650182DD6BC00FAF50C /* grammar.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = grammar.h; path = src/compiler/grammar/grammar.h; sourceTree = SOURCE_ROOT; }; 12FD405F185E68470041A84E /* c_code.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = c_code.cpp; path = src/compiler/code_gen/c_code.cpp; sourceTree = SOURCE_ROOT; }; 12FD4060185E68470041A84E /* c_code.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = c_code.h; path = src/compiler/code_gen/c_code.h; sourceTree = SOURCE_ROOT; }; 12FD4063185E75290041A84E /* generate_parsers.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = generate_parsers.cpp; path = spec/compiler/generate_parsers.cpp; sourceTree = SOURCE_ROOT; }; @@ -135,8 +137,8 @@ 12FD40DA185FEF0D0041A84E /* arithmetic_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = arithmetic_spec.cpp; sourceTree = ""; }; 12FD40DC185FF12C0041A84E /* parser.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = parser.c; sourceTree = ""; }; 12FD40DE1860064C0041A84E /* tree.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = tree.c; sourceTree = ""; }; - 12FD40E0186245FE0041A84E /* transitions.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = transitions.cpp; path = ../rules/transitions.cpp; sourceTree = ""; }; - 12FD40E1186245FE0041A84E /* transitions.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = transitions.h; path = ../rules/transitions.h; sourceTree = ""; }; + 12FD40E0186245FE0041A84E /* transitions.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = transitions.cpp; sourceTree = ""; }; + 12FD40E1186245FE0041A84E /* transitions.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = transitions.h; sourceTree = ""; }; 12FD40E41862B3530041A84E /* visitor.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = visitor.h; sourceTree = ""; }; 12FD40E618639B910041A84E /* visitor.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = visitor.cpp; sourceTree = ""; }; 12FD40E818641FB70041A84E /* rules.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = rules.cpp; sourceTree = ""; }; @@ -217,6 +219,7 @@ 1213061C182C854F00FCF928 /* lr */ = { isa = PBXGroup; children = ( + 12ED72A6186FC8220089229B /* transitions_spec.cpp */, 1213061D182C857100FCF928 /* item_set_spec.cpp */, 12512092182F307C00C9B56A /* table_builder_spec.cpp */, 12D1369C18328C5A005F3369 /* item_spec.cpp */, @@ -284,6 +287,15 @@ path = spec; sourceTree = ""; }; + 12ED72A5186FC6D90089229B /* grammar */ = { + isa = PBXGroup; + children = ( + 12F9A64F182DD6BC00FAF50C /* grammar.cpp */, + 12F9A650182DD6BC00FAF50C /* grammar.h */, + ); + path = grammar; + sourceTree = ""; + }; 12FD4062185E74DF0041A84E /* parsers */ = { isa = PBXGroup; children = ( @@ -304,9 +316,8 @@ 12FD40AC185EE4C00041A84E /* compiler */ = { isa = PBXGroup; children = ( + 12ED72A5186FC6D90089229B /* grammar */, 12FD4067185E8AF40041A84E /* code_gen */, - 12F9A64F182DD6BC00FAF50C /* grammar.cpp */, - 12F9A650182DD6BC00FAF50C /* grammar.h */, 12130618182C84B700FCF928 /* lr */, 12130602182C344400FCF928 /* rules */, 12FD40F9186F4EBE0041A84E /* util */, @@ -462,6 +473,7 @@ 12D136A4183678A2005F3369 /* repeat.cpp in Sources */, 12FD40F3186641C00041A84E /* char_match.cpp in Sources */, 12FD40E718639B910041A84E /* visitor.cpp in Sources */, + 12ED72A7186FC8220089229B /* transitions_spec.cpp in Sources */, 12130622182C85D300FCF928 /* item_set.cpp in Sources */, 12130605182C348F00FCF928 /* character.cpp in Sources */, 1213060B182C389100FCF928 /* symbol.cpp in Sources */, diff --git a/spec/compiler/lr/item_spec.cpp b/spec/compiler/lr/item_spec.cpp index fe9d183d..903950ce 100644 --- a/spec/compiler/lr/item_spec.cpp +++ b/spec/compiler/lr/item_spec.cpp @@ -5,14 +5,27 @@ using namespace tree_sitter::lr; START_TEST describe("items", []() { - Grammar grammar = test_grammars::arithmetic(); - - describe("transitions", [&]() { + describe("construction", [&]() { it("finds the item at the start of a rule", [&]() { + Grammar grammar = test_grammars::arithmetic(); Item item = Item::at_beginning_of_rule("expression", grammar); AssertThat(item, Equals(Item("expression", grammar.rule("expression"), 0))); }); }); + + describe("transitions", [&]() { + it("computes the possible advancements", [&]() { + auto char1 = rules::character('a'); + auto char2 = rules::character('b'); + Item item = Item("my-rule", rules::seq({ char1, char2 }), 2); + + AssertThat( + item.transitions(), + Equals(transition_map({ + { char1, make_shared("my-rule", char2, 3) } + }))); + }); + }); }); END_TEST \ No newline at end of file diff --git a/spec/compiler/lr/transitions_spec.cpp b/spec/compiler/lr/transitions_spec.cpp new file mode 100644 index 00000000..560f6029 --- /dev/null +++ b/spec/compiler/lr/transitions_spec.cpp @@ -0,0 +1,127 @@ +#include "spec_helper.h" +#include "transitions.h" + +START_TEST + +describe("rule transitions", []() { + rules::rule_ptr symbol1 = rules::sym("1"); + rules::rule_ptr symbol2 = rules::sym("2"); + rules::rule_ptr symbol3 = rules::sym("3"); + rules::rule_ptr symbol4 = rules::sym("4"); + rules::rule_ptr char1 = rules::character('a'); + + it("handles symbols", [&]() { + AssertThat( + lr::transitions(symbol1), + Equals(transition_map({ + { symbol1, rules::blank() } + }))); + }); + + it("handles characters", [&]() { + AssertThat( + lr::transitions(char1), + Equals(transition_map({ + { char1, rules::blank() } + }))); + }); + + it("handles character classes", [&]() { + auto rule = rules::character(CharClassDigit); + AssertThat( + lr::transitions(rule), + Equals(transition_map({ + { rule, rules::blank() } + }))); + }); + + it("handles choices", [&]() { + AssertThat( + lr::transitions(rules::choice({ symbol1, symbol2 })), + Equals(transition_map({ + { symbol1, rules::blank() }, + { symbol2, rules::blank() } + }))); + }); + + it("handles sequences", [&]() { + AssertThat( + lr::transitions(rules::seq({ symbol1, symbol2 })), + Equals(transition_map({ + { symbol1, symbol2 } + }))); + }); + + it("handles_long_sequences", [&]() { + AssertThat( + lr::transitions(rules::seq({ + symbol1, + symbol2, + symbol3, + symbol4 + })), + Equals(transition_map({ + { symbol1, rules::seq({ symbol2, symbol3, symbol4 }) } + }))); + }); + + it("handles choices with common starting symbols", [&]() { + AssertThat( + lr::transitions( + rules::choice({ + rules::seq({ symbol1, symbol2 }), + rules::seq({ symbol1, symbol3 }) })), + Equals(transition_map({ + { symbol1, rules::choice({ symbol2, symbol3 }) } + }))); + }); + + it("handles strings", [&]() { + AssertThat( + lr::transitions(rules::str("bad")), + Equals(transition_map({ + { rules::character('b'), rules::seq({ rules::character('a'), rules::character('d') }) + } + }))); + }); + + it("handles patterns", [&]() { + AssertThat( + lr::transitions(rules::pattern("a|b")), + Equals(transition_map({ + { rules::character('a'), rules::blank() }, + { rules::character('b'), rules::blank() } + }))); + }); + + it("handles repeats", [&]() { + rules::rule_ptr repeat = rules::repeat(rules::str("ab")); + AssertThat( + lr::transitions(repeat), + Equals(transition_map({ + { + rules::character('a'), + rules::seq({ + rules::character('b'), + rules::choice({ + repeat, + rules::blank() + }) + }) + }}))); + + repeat = rules::repeat(rules::str("a")); + AssertThat( + lr::transitions(repeat), + Equals(transition_map({ + { + rules::character('a'), + rules::choice({ + repeat, + rules::blank() + }) + }}))); + }); +}); + +END_TEST diff --git a/spec/compiler/rules/rules_spec.cpp b/spec/compiler/rules/rules_spec.cpp index 4424b1c3..f2c460dd 100644 --- a/spec/compiler/rules/rules_spec.cpp +++ b/spec/compiler/rules/rules_spec.cpp @@ -7,8 +7,6 @@ describe("Rules", []() { rules::rule_ptr symbol1 = rules::sym("1"); rules::rule_ptr symbol2 = rules::sym("2"); rules::rule_ptr symbol3 = rules::sym("3"); - rules::rule_ptr symbol4 = rules::sym("4"); - rules::rule_ptr char1 = rules::character('a'); describe("construction", [&]() { it("constructs binary trees", [&]() { @@ -23,123 +21,6 @@ describe("Rules", []() { rules::choice({ rules::choice({ symbol1, symbol2 }), symbol3 }))); }); }); - - describe("transitions", [&]() { - it("handles symbols", [&]() { - AssertThat( - rules::transitions(symbol1), - Equals(transition_map({ - { symbol1, rules::blank() } - }))); - }); - - it("handles characters", [&]() { - AssertThat( - rules::transitions(char1), - Equals(transition_map({ - { char1, rules::blank() } - }))); - }); - - it("handles character classes", [&]() { - auto rule = rules::character(CharClassDigit); - AssertThat( - rules::transitions(rule), - Equals(transition_map({ - { rule, rules::blank() } - }))); - }); - - it("handles choices", [&]() { - AssertThat( - rules::transitions(rules::choice({ symbol1, symbol2 })), - Equals(transition_map({ - { symbol1, rules::blank() }, - { symbol2, rules::blank() } - }))); - }); - - it("handles sequences", [&]() { - AssertThat( - rules::transitions(rules::seq({ symbol1, symbol2 })), - Equals(transition_map({ - { symbol1, symbol2 } - }))); - }); - - it("handles_long_sequences", [&]() { - AssertThat( - rules::transitions(rules::seq({ - symbol1, - symbol2, - symbol3, - symbol4 - })), - Equals(transition_map({ - { symbol1, rules::seq({ symbol2, symbol3, symbol4 }) } - }))); - }); - - it("handles choices with common starting symbols", [&]() { - AssertThat( - rules::transitions( - rules::choice({ - rules::seq({ symbol1, symbol2 }), - rules::seq({ symbol1, symbol3 }) })), - Equals(transition_map({ - { symbol1, rules::choice({ symbol2, symbol3 }) } - }))); - }); - - it("handles strings", [&]() { - AssertThat( - rules::transitions(rules::str("bad")), - Equals(transition_map({ - { - rules::character('b'), - rules::seq({ rules::character('a'), rules::character('d') }) - } - }))); - }); - - it("handles patterns", [&]() { - AssertThat( - rules::transitions(rules::pattern("a|b")), - Equals(transition_map({ - { rules::character('a'), rules::blank() }, - { rules::character('b'), rules::blank() } - }))); - }); - - it("handles repeats", [&]() { - rules::rule_ptr repeat = rules::repeat(rules::str("ab")); - AssertThat( - rules::transitions(repeat), - Equals(transition_map({ - { - rules::character('a'), - rules::seq({ - rules::character('b'), - rules::choice({ - repeat, - rules::blank() - }) - }) - }}))); - - repeat = rules::repeat(rules::str("a")); - AssertThat( - rules::transitions(repeat), - Equals(transition_map({ - { - rules::character('a'), - rules::choice({ - repeat, - rules::blank() - }) - }}))); - }); - }); }); END_TEST diff --git a/src/compiler/grammar.cpp b/src/compiler/grammar/grammar.cpp similarity index 83% rename from src/compiler/grammar.cpp rename to src/compiler/grammar/grammar.cpp index a168c1fe..4ea2705e 100644 --- a/src/compiler/grammar.cpp +++ b/src/compiler/grammar/grammar.cpp @@ -3,7 +3,7 @@ using namespace std; namespace tree_sitter { - Grammar::Grammar(const rule_map_init_list &rules) : + Grammar::Grammar(const std::initializer_list> &rules) : rules(rules), start_rule_name(rules.begin()->first) {} @@ -21,5 +21,4 @@ namespace tree_sitter { } return result; } - -} \ No newline at end of file +} diff --git a/src/compiler/grammar.h b/src/compiler/grammar/grammar.h similarity index 83% rename from src/compiler/grammar.h rename to src/compiler/grammar/grammar.h index 069523a3..cad8eecb 100644 --- a/src/compiler/grammar.h +++ b/src/compiler/grammar/grammar.h @@ -7,15 +7,14 @@ namespace tree_sitter { class Grammar { - typedef std::unordered_map rule_map; typedef std::initializer_list> rule_map_init_list; - const rule_map rules; - public: Grammar(const rule_map_init_list &rules); const rules::rule_ptr rule(const std::string &) const; const std::string start_rule_name; std::vector rule_names() const; + + const std::unordered_map rules; }; } diff --git a/src/compiler/lr/item.cpp b/src/compiler/lr/item.cpp index 317aed76..21721873 100644 --- a/src/compiler/lr/item.cpp +++ b/src/compiler/lr/item.cpp @@ -18,14 +18,14 @@ namespace tree_sitter { } transition_map Item::transitions() const { - return rules::transitions(rule).map([&](rules::rule_ptr to_rule) -> item_ptr { + return lr::transitions(rule).map([&](rules::rule_ptr to_rule) -> item_ptr { return std::make_shared(rule_name, to_rule, consumed_sym_count + 1); }); }; vector Item::next_symbols() const { vector result; - for (auto pair : rules::transitions(rule)) { + for (auto pair : lr::transitions(rule)) { shared_ptr sym = dynamic_pointer_cast(pair.first); if (sym) result.push_back(*sym); } diff --git a/src/compiler/rules/transitions.cpp b/src/compiler/lr/transitions.cpp similarity index 90% rename from src/compiler/rules/transitions.cpp rename to src/compiler/lr/transitions.cpp index ab33b464..c095fe13 100644 --- a/src/compiler/rules/transitions.cpp +++ b/src/compiler/lr/transitions.cpp @@ -1,9 +1,11 @@ #include "transitions.h" #include "rules.h" +using namespace tree_sitter::rules; + namespace tree_sitter { - namespace rules { - class TransitionsVisitor : public Visitor { + namespace lr { + class TransitionsVisitor : public rules::Visitor { public: transition_map value; @@ -16,7 +18,7 @@ namespace tree_sitter { } void visit(const Symbol *rule) { - value = transition_map({{ sym(rule->name), blank() }}); + value = transition_map({{ std::make_shared(*rule), blank() }}); } void visit(const Choice *rule) { diff --git a/src/compiler/rules/transitions.h b/src/compiler/lr/transitions.h similarity index 59% rename from src/compiler/rules/transitions.h rename to src/compiler/lr/transitions.h index 153fcc54..eb9c6834 100644 --- a/src/compiler/rules/transitions.h +++ b/src/compiler/lr/transitions.h @@ -5,8 +5,8 @@ #include "transition_map.h" namespace tree_sitter { - namespace rules { - transition_map transitions(const rule_ptr &rule); + namespace lr { + transition_map transitions(const rules::rule_ptr &rule); } } diff --git a/src/compiler/rules/rule.cpp b/src/compiler/rules/rule.cpp index cb12a64c..9185345f 100644 --- a/src/compiler/rules/rule.cpp +++ b/src/compiler/rules/rule.cpp @@ -11,10 +11,10 @@ namespace tree_sitter { } ostream& operator<<(ostream& stream, const rule_ptr &rule) { - if (rule.get() == nullptr) - stream << string(""); + if (rule.get()) + stream << *rule; else - stream << rule->to_string(); + stream << string("#"); return stream; } }