diff --git a/TreeSitter.xcodeproj/project.pbxproj b/TreeSitter.xcodeproj/project.pbxproj index 367b4085..13dd410e 100644 --- a/TreeSitter.xcodeproj/project.pbxproj +++ b/TreeSitter.xcodeproj/project.pbxproj @@ -21,6 +21,9 @@ 12512093182F307C00C9B56A /* parse_table_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12512092182F307C00C9B56A /* parse_table_spec.cpp */; }; 1251209B1830145300C9B56A /* rule.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1251209A1830145300C9B56A /* rule.cpp */; }; 1251209D18303CFB00C9B56A /* rules.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1251209C18303CFB00C9B56A /* rules.cpp */; }; + 125120A018307DEC00C9B56A /* parse_table.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1251209E18307DEC00C9B56A /* parse_table.cpp */; }; + 125120A4183083BD00C9B56A /* arithmetic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 125120A3183083BD00C9B56A /* arithmetic.cpp */; }; + 12D1369D18328C5A005F3369 /* item_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12D1369C18328C5A005F3369 /* item_spec.cpp */; }; 12F9A64E182DD5FD00FAF50C /* spec_helper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12F9A64C182DD5FD00FAF50C /* spec_helper.cpp */; }; 12F9A651182DD6BC00FAF50C /* grammar.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12F9A64F182DD6BC00FAF50C /* grammar.cpp */; }; 27A343CA69E17E0F9EBEDF1C /* Pattern.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 27A340F3EEB184C040521323 /* Pattern.cpp */; }; @@ -133,7 +136,12 @@ 12512092182F307C00C9B56A /* parse_table_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = parse_table_spec.cpp; path = spec/lr/parse_table_spec.cpp; sourceTree = SOURCE_ROOT; }; 1251209A1830145300C9B56A /* rule.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = rule.cpp; sourceTree = ""; }; 1251209C18303CFB00C9B56A /* rules.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = rules.cpp; sourceTree = ""; }; + 1251209E18307DEC00C9B56A /* parse_table.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = parse_table.cpp; sourceTree = ""; }; + 1251209F18307DEC00C9B56A /* parse_table.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = parse_table.h; sourceTree = ""; }; + 125120A218307FFD00C9B56A /* arithmetic.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = arithmetic.h; path = spec/test_grammars/arithmetic.h; sourceTree = SOURCE_ROOT; }; + 125120A3183083BD00C9B56A /* arithmetic.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = arithmetic.cpp; path = spec/test_grammars/arithmetic.cpp; sourceTree = SOURCE_ROOT; }; 12C344421822F27700B07BE3 /* transition_map.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = transition_map.h; sourceTree = ""; }; + 12D1369C18328C5A005F3369 /* item_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = item_spec.cpp; path = spec/lr/item_spec.cpp; sourceTree = SOURCE_ROOT; }; 12E71794181D02A80051A649 /* specs */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = specs; sourceTree = BUILT_PRODUCTS_DIR; }; 12E71852181D081C0051A649 /* rules.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = rules.h; sourceTree = ""; }; 12F9A64C182DD5FD00FAF50C /* spec_helper.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = spec_helper.cpp; path = spec/spec_helper.cpp; sourceTree = SOURCE_ROOT; }; @@ -185,6 +193,8 @@ 1213061A182C84DF00FCF928 /* item.h */, 12130620182C85D300FCF928 /* item_set.cpp */, 12130621182C85D300FCF928 /* item_set.h */, + 1251209E18307DEC00C9B56A /* parse_table.cpp */, + 1251209F18307DEC00C9B56A /* parse_table.h */, ); path = lr; sourceTree = ""; @@ -194,6 +204,7 @@ children = ( 1213061D182C857100FCF928 /* item_set_spec.cpp */, 12512092182F307C00C9B56A /* parse_table_spec.cpp */, + 12D1369C18328C5A005F3369 /* item_spec.cpp */, ); name = lr; path = spec/lr; @@ -398,6 +409,16 @@ path = collections; sourceTree = ""; }; + 125120A118307FCA00C9B56A /* test_grammars */ = { + isa = PBXGroup; + children = ( + 125120A218307FFD00C9B56A /* arithmetic.h */, + 125120A3183083BD00C9B56A /* arithmetic.cpp */, + ); + name = test_grammars; + path = spec/test_grammars; + sourceTree = ""; + }; 12E716F9181D010E0051A649 = { isa = PBXGroup; children = ( @@ -432,6 +453,7 @@ 12E71796181D02A80051A649 /* spec */ = { isa = PBXGroup; children = ( + 125120A118307FCA00C9B56A /* test_grammars */, 1214925C181E200B008E9BDA /* externals */, 1213061C182C854F00FCF928 /* lr */, 121492E9181E200B008E9BDA /* main.cpp */, @@ -494,13 +516,16 @@ buildActionMask = 2147483647; files = ( 12130614182C3A1700FCF928 /* seq.cpp in Sources */, + 125120A4183083BD00C9B56A /* arithmetic.cpp in Sources */, 1214930F181E200B008E9BDA /* rules_spec.cpp in Sources */, 1213061B182C84DF00FCF928 /* item.cpp in Sources */, 1251209D18303CFB00C9B56A /* rules.cpp in Sources */, 12130617182C3D2900FCF928 /* string.cpp in Sources */, 12130611182C3A1100FCF928 /* blank.cpp in Sources */, + 12D1369D18328C5A005F3369 /* item_spec.cpp in Sources */, 1213060E182C398300FCF928 /* choice.cpp in Sources */, 12F9A64E182DD5FD00FAF50C /* spec_helper.cpp in Sources */, + 125120A018307DEC00C9B56A /* parse_table.cpp in Sources */, 1214930E181E200B008E9BDA /* main.cpp in Sources */, 12F9A651182DD6BC00FAF50C /* grammar.cpp in Sources */, 12512093182F307C00C9B56A /* parse_table_spec.cpp in Sources */, diff --git a/spec/lr/item_set_spec.cpp b/spec/lr/item_set_spec.cpp index 4c512aca..b6e0f79c 100644 --- a/spec/lr/item_set_spec.cpp +++ b/spec/lr/item_set_spec.cpp @@ -1,21 +1,23 @@ #include "spec_helper.h" +#include "../test_grammars/arithmetic.h" + +using namespace tree_sitter::lr; Describe(item_sets) { - Describe(transitions) { - Grammar grammar = Grammar({ - "one", - "two" - }, { - rules::sym("one"), - rules::sym("two") - }); + Grammar grammar = test_grammars::arithmetic(); + + It(computes_the_closure_of_an_item_set_under_symbol_expansion) { + Item item = Item::at_beginning_of_rule("term", grammar); + ItemSet item_set = ItemSet({ item }).closure_in_grammar(grammar); - rules::rule_ptr rule = grammar.rules[string("one")]; - lr::Item item = lr::Item(string("one"), rule, 0); - - It(works) { - lr::ItemSet item_set = lr::ItemSet(item, grammar); - item_set.transitions(); - } - }; + AssertThat( + item_set, + EqualsContainer(ItemSet({ + Item("term", grammar.rules["term"], 0), + Item("factor", grammar.rules["factor"], 0), + Item("variable", grammar.rules["variable"], 0), + Item("number", grammar.rules["number"], 0), + Item("left_paren", grammar.rules["left_paren"], 0), + }))); + } }; diff --git a/spec/lr/item_spec.cpp b/spec/lr/item_spec.cpp new file mode 100644 index 00000000..43af20f0 --- /dev/null +++ b/spec/lr/item_spec.cpp @@ -0,0 +1,15 @@ +#include "spec_helper.h" +#include "../test_grammars/arithmetic.h" + +using namespace tree_sitter::lr; + +Describe(items) { + Describe(transitions) { + Grammar grammar = test_grammars::arithmetic(); + + It(finds_the_item_at_the_start_of_a_rule) { + Item item = Item::at_beginning_of_rule("expression", grammar); + AssertThat(item, Equals(Item("expression", grammar.rules["expression"], 0))); + } + }; +}; diff --git a/spec/lr/parse_table_spec.cpp b/spec/lr/parse_table_spec.cpp index e7232422..4d022806 100644 --- a/spec/lr/parse_table_spec.cpp +++ b/spec/lr/parse_table_spec.cpp @@ -1,43 +1,7 @@ #include "spec_helper.h" + Describe(parse_table_construction) { - Grammar grammar = Grammar( - { - "expression", - "term", - "factor", - "number", - "variable", - "plus", - "times", - "left_paren", - "right_paren" - }, { - rules::choice({ - rules::seq({ - rules::sym("term"), - rules::sym("plus"), - rules::sym("term") }), - rules::sym("term") }), - rules::choice({ - rules::seq({ - rules::sym("factor"), - rules::sym("times"), - rules::sym("factor") }), - rules::sym("factor") }), - rules::choice({ - rules::sym("variable"), - rules::sym("number"), - rules::seq({ - rules::sym("left_paren"), - rules::sym("expression"), - rules::sym("right_paren") }) }), - rules::pattern("\\d+"), - rules::pattern("\\w+"), - rules::str("+"), - rules::str("*"), - rules::str("("), - rules::str(")") - } - ); + Describe(the_starting_state) { + }; }; diff --git a/spec/rules_spec.cpp b/spec/rules_spec.cpp index 979359ae..ddf36c82 100644 --- a/spec/rules_spec.cpp +++ b/spec/rules_spec.cpp @@ -11,11 +11,11 @@ Describe(Rules) { It(constructs_binary_trees) { AssertThat( rules::seq({ symbol1, symbol2, symbol3 })->to_string(), - Equals(std::string("(seq 1 (seq 2 3))"))); + Equals(std::string("(seq (sym '1') (seq (sym '2') (sym '3')))"))); AssertThat( rules::choice({ symbol1, symbol2, symbol3 })->to_string(), - Equals(std::string("(choice 1 (choice 2 3))"))); + Equals(std::string("(choice (sym '1') (choice (sym '2') (sym '3')))"))); } }; diff --git a/spec/spec_helper.cpp b/spec/spec_helper.cpp index 1a262dd5..89ed6281 100644 --- a/spec/spec_helper.cpp +++ b/spec/spec_helper.cpp @@ -1,5 +1,6 @@ #include "spec_helper.h" + EqualsContainerConstraint EqualsTransitionMap(const rule_tmap &expected) { return EqualsContainer(expected, rule_tmap::elements_equal); } diff --git a/spec/spec_helper.h b/spec/spec_helper.h index c0005b41..fda96d3a 100644 --- a/spec/spec_helper.h +++ b/spec/spec_helper.h @@ -3,14 +3,15 @@ #include "igloo/igloo_alt.h" #include "transition_map.h" -#include "rule.h" + +#include "rules.h" #include "item.h" #include "item_set.h" #include "grammar.h" -using namespace igloo; using namespace tree_sitter; using namespace std; +using namespace igloo; // Assertion helpers for transition maps typedef TransitionMap rule_tmap; diff --git a/spec/test_grammars/arithmetic.cpp b/spec/test_grammars/arithmetic.cpp new file mode 100644 index 00000000..78330a3a --- /dev/null +++ b/spec/test_grammars/arithmetic.cpp @@ -0,0 +1,47 @@ +#include "arithmetic.h" +#include "rules.h" + +using namespace tree_sitter; +using namespace tree_sitter::rules; + +namespace test_grammars { + Grammar arithmetic() { + return Grammar({ + "expression", + "term", + "factor", + "number", + "variable", + "plus", + "times", + "left_paren", + "right_paren" + }, { + choice({ + seq({ + sym("term"), + sym("plus"), + sym("term") }), + sym("term") }), + choice({ + seq({ + sym("factor"), + sym("times"), + sym("factor") }), + sym("factor") }), + choice({ + sym("variable"), + sym("number"), + seq({ + sym("left_paren"), + sym("expression"), + sym("right_paren") }) }), + pattern("\\d+"), + pattern("\\w+"), + str("+"), + str("*"), + str("("), + str(")") + }); + } +} diff --git a/spec/test_grammars/arithmetic.h b/spec/test_grammars/arithmetic.h new file mode 100644 index 00000000..5031e4f3 --- /dev/null +++ b/spec/test_grammars/arithmetic.h @@ -0,0 +1,10 @@ +#ifndef TreeSitter_arithmetic_h +#define TreeSitter_arithmetic_h + +#include "grammar.h" + +namespace test_grammars { + tree_sitter::Grammar arithmetic(); +} + +#endif diff --git a/src/grammar.cpp b/src/grammar.cpp index 2f543318..7948834b 100644 --- a/src/grammar.cpp +++ b/src/grammar.cpp @@ -6,15 +6,21 @@ namespace tree_sitter { Grammar::Grammar(const rule_map &rules, const std::string &start_rule_name) : rules(rules), start_rule_name(start_rule_name) {}; - - Grammar::Grammar(const initializer_list &rule_names, - const initializer_list &rule_vals) { - rules = rule_map(); + + std::unordered_map build_rule_map(const initializer_list &rule_names, + const initializer_list &rule_vals) { + std::unordered_map result; auto rule_name_i = rule_names.begin(); auto rule_i = rule_vals.begin(); - start_rule_name = *rule_name_i; - for (; rule_i != rule_vals.end(); rule_i++ && rule_name_i++) { - rules[*rule_name_i] = *rule_i; + while (rule_i != rule_vals.end()) { + result[*rule_name_i] = *rule_i; + rule_i++; + rule_name_i++; } + return result; } + + Grammar::Grammar(const initializer_list &names, const initializer_list &values) : + rules(build_rule_map(names, values)), + start_rule_name(*names.begin()) {} } \ No newline at end of file diff --git a/src/grammar.h b/src/grammar.h index fabde4e8..8b28f093 100644 --- a/src/grammar.h +++ b/src/grammar.h @@ -12,7 +12,7 @@ namespace tree_sitter { Grammar(const std::initializer_list &rule_names, const std::initializer_list &rules); rule_map rules; - std::string start_rule_name; + const std::string start_rule_name; }; } diff --git a/src/lr/item.cpp b/src/lr/item.cpp index 71166050..99d5414c 100644 --- a/src/lr/item.cpp +++ b/src/lr/item.cpp @@ -1,4 +1,5 @@ #include "item.h" +#include "grammar.h" using namespace std; @@ -9,16 +10,22 @@ namespace tree_sitter { rule(rule), consumed_sym_count(consumed_sym_count) {}; + Item Item::at_beginning_of_rule(const std::string &rule_name, Grammar &grammar) { + return Item(rule_name, grammar.rules[rule_name], 0); + } + TransitionMap Item::transitions() const { return rule->transitions().map([&](rules::rule_ptr to_rule) { return item_ptr(new Item(rule_name, to_rule, consumed_sym_count + 1)); }); }; - vector Item::next_symbols() const { - vector result; - for (auto pair : rule->transitions()) - result.push_back(pair.second); + vector Item::next_symbols() const { + vector result; + for (auto pair : rule->transitions()) { + shared_ptr sym = dynamic_pointer_cast(pair.first); + if (sym != nullptr) result.push_back(sym); + } return result; } diff --git a/src/lr/item.h b/src/lr/item.h index f2a1196d..601bf57b 100644 --- a/src/lr/item.h +++ b/src/lr/item.h @@ -6,12 +6,16 @@ #include "transition_map.h" namespace tree_sitter { + class Grammar; + namespace lr { class Item { public: Item(const std::string &rule_name, const rules::rule_ptr rule, int consumed_sym_count); + static Item at_beginning_of_rule(const std::string &rule_name, Grammar &grammar); + TransitionMap transitions() const; - std::vector next_symbols() const; + std::vector next_symbols() const; bool operator==(const Item &other) const; const std::string rule_name; @@ -19,7 +23,7 @@ namespace tree_sitter { const int consumed_sym_count; }; - typedef std::shared_ptr item_ptr; + typedef std::shared_ptr item_ptr; std::ostream& operator<<(std::ostream &stream, const Item &item); } } diff --git a/src/lr/item_set.cpp b/src/lr/item_set.cpp index edbf6776..4b4412da 100644 --- a/src/lr/item_set.cpp +++ b/src/lr/item_set.cpp @@ -1,13 +1,62 @@ #include "item_set.h" +#include + +using namespace std; + namespace tree_sitter { namespace lr { - ItemSet::ItemSet(const Item &item, const Grammar &grammar) { - - } + ItemSet::ItemSet(const vector &items) : contents(items) {} + ItemSet::ItemSet(const initializer_list &items) : contents(items) {} TransitionMap ItemSet::transitions() const { return TransitionMap(); } + + bool vector_contains(vector items, lr::Item item) { + return (std::find(items.begin(), items.end(), item) != items.end()); + } + + void add_item(vector &vector, const Item &item, Grammar &grammar) { + if (!vector_contains(vector, item)) { + vector.push_back(item); + for (rules::sym_ptr rule : item.next_symbols()) { + Item next_item = Item::at_beginning_of_rule(rule->name, grammar); + add_item(vector, next_item, grammar); + } + } + } + + ItemSet ItemSet::closure_in_grammar(Grammar &grammar) const { + vector items; + for (Item item : *this) + add_item(items, item, grammar); + return ItemSet(items); + } + +#pragma mark - container + ItemSet::const_iterator ItemSet::begin() const { + return contents.begin(); + } + + ItemSet::const_iterator ItemSet::end() const { + return contents.end(); + } + + size_t ItemSet::size() const { + return contents.size(); + } + +#pragma mark - printing + ostream& operator<<(ostream &stream, const ItemSet &item_set) { + stream << string("(item_set "); + for (Item item : item_set) { + stream << item; + stream << string(" "); + } + stream << string(")"); + return stream; + } + } -} \ No newline at end of file +} diff --git a/src/lr/item_set.h b/src/lr/item_set.h index fa469cef..2151a957 100644 --- a/src/lr/item_set.h +++ b/src/lr/item_set.h @@ -8,9 +8,22 @@ namespace tree_sitter { namespace lr { class ItemSet { public: - ItemSet(const Item &item, const Grammar &grammar); + ItemSet(const std::vector &items); + ItemSet(const std::initializer_list &items); + + typedef Item value_type; + typedef std::vector::const_iterator const_iterator; + const_iterator begin() const; + const_iterator end() const; + size_t size() const; + + ItemSet closure_in_grammar(Grammar &grammar) const; TransitionMap transitions() const; + + const std::vector contents; }; + + std::ostream& operator<<(std::ostream &stream, const ItemSet &item_set); } } diff --git a/src/lr/parse_table.cpp b/src/lr/parse_table.cpp new file mode 100644 index 00000000..305a3132 --- /dev/null +++ b/src/lr/parse_table.cpp @@ -0,0 +1 @@ +#include "parse_table.h" diff --git a/src/lr/parse_table.h b/src/lr/parse_table.h new file mode 100644 index 00000000..f95e19cd --- /dev/null +++ b/src/lr/parse_table.h @@ -0,0 +1,13 @@ +#ifndef __TreeSitter__parse_table__ +#define __TreeSitter__parse_table__ + +namespace tree_sitter { + namespace lr { + class ParseTable { + public: + ParseTable(); + }; + } +} + +#endif diff --git a/src/rules.cpp b/src/rules.cpp index 8f2df603..14e9749c 100644 --- a/src/rules.cpp +++ b/src/rules.cpp @@ -1,11 +1,4 @@ #include "rules.h" -#include "blank.h" -#include "symbol.h" -#include "choice.h" -#include "seq.h" -#include "string.h" -#include "pattern.h" -#include "char.h" namespace tree_sitter { namespace rules { diff --git a/src/rules.h b/src/rules.h index ac8feb24..99d69d78 100644 --- a/src/rules.h +++ b/src/rules.h @@ -1,7 +1,14 @@ #ifndef __TreeSitter__rules__ #define __TreeSitter__rules__ -#include "rules/rule.h" +#include "rule.h" +#include "blank.h" +#include "symbol.h" +#include "choice.h" +#include "seq.h" +#include "string.h" +#include "pattern.h" +#include "char.h" namespace tree_sitter { namespace rules { @@ -12,6 +19,8 @@ namespace tree_sitter { rule_ptr pattern(const std::string &value); rule_ptr seq(const std::initializer_list &rules); rule_ptr choice(const std::initializer_list &rules); + + typedef std::shared_ptr sym_ptr; } } diff --git a/src/rules/string.cpp b/src/rules/string.cpp index 80157b21..32d5cdba 100644 --- a/src/rules/string.cpp +++ b/src/rules/string.cpp @@ -24,7 +24,7 @@ namespace tree_sitter { } std::string String::to_string() const { - return value; + return std::string("(string '") + value + "')"; } } } \ No newline at end of file diff --git a/src/rules/symbol.cpp b/src/rules/symbol.cpp index d3b83fae..de341e24 100644 --- a/src/rules/symbol.cpp +++ b/src/rules/symbol.cpp @@ -17,7 +17,7 @@ namespace tree_sitter { } std::string Symbol::to_string() const { - return name; + return std::string("(sym '") + name + "')"; } } } \ No newline at end of file diff --git a/src/rules/symbol.h b/src/rules/symbol.h index d767e39f..e597d2a1 100644 --- a/src/rules/symbol.h +++ b/src/rules/symbol.h @@ -13,8 +13,7 @@ namespace tree_sitter { Symbol * copy() const; bool operator==(const Rule& other) const; std::string to_string() const; - private: - std::string name; + const std::string name; }; } } diff --git a/src/transition_map.h b/src/transition_map.h index 1b3badd0..7bd65cec 100644 --- a/src/transition_map.h +++ b/src/transition_map.h @@ -92,7 +92,8 @@ namespace tree_sitter { bool started = false; for (auto pair : map) { if (started) stream << std::string(", "); - stream << (pair.first->to_string() + " => " + pair.second->to_string()); + stream << pair.first->to_string() << std::string(" => "); + stream << *pair.second; started = true; } stream << std::string("]");