diff --git a/TreeSitter.xcodeproj/project.pbxproj b/TreeSitter.xcodeproj/project.pbxproj index 4f752797..ba8ea3a2 100644 --- a/TreeSitter.xcodeproj/project.pbxproj +++ b/TreeSitter.xcodeproj/project.pbxproj @@ -18,10 +18,11 @@ 12130622182C85D300FCF928 /* item_set.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12130620182C85D300FCF928 /* item_set.cpp */; }; 1214930E181E200B008E9BDA /* main.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 121492E9181E200B008E9BDA /* main.cpp */; }; 1214930F181E200B008E9BDA /* rules_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 121492EA181E200B008E9BDA /* rules_spec.cpp */; }; - 12512093182F307C00C9B56A /* parse_table_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12512092182F307C00C9B56A /* parse_table_spec.cpp */; }; + 12512093182F307C00C9B56A /* parse_table_builder_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12512092182F307C00C9B56A /* parse_table_builder_spec.cpp */; }; 1251209B1830145300C9B56A /* rule.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1251209A1830145300C9B56A /* rule.cpp */; }; 125120A018307DEC00C9B56A /* parse_table.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1251209E18307DEC00C9B56A /* parse_table.cpp */; }; 125120A4183083BD00C9B56A /* arithmetic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 125120A3183083BD00C9B56A /* arithmetic.cpp */; }; + 129D242C183EB1EB00FE9F71 /* parse_table_builder.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 129D242A183EB1EB00FE9F71 /* parse_table_builder.cpp */; }; 12D1369D18328C5A005F3369 /* item_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12D1369C18328C5A005F3369 /* item_spec.cpp */; }; 12D136A1183570F5005F3369 /* pattern_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12D136A0183570F5005F3369 /* pattern_spec.cpp */; }; 12D136A4183678A2005F3369 /* repeat.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12D136A2183678A2005F3369 /* repeat.cpp */; }; @@ -135,12 +136,14 @@ 121492C6181E200B008E9BDA /* igloo_framework.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = igloo_framework.h; sourceTree = ""; }; 121492E9181E200B008E9BDA /* main.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = main.cpp; path = spec/main.cpp; sourceTree = SOURCE_ROOT; }; 121492EA181E200B008E9BDA /* rules_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = rules_spec.cpp; path = spec/rules/rules_spec.cpp; sourceTree = SOURCE_ROOT; }; - 12512092182F307C00C9B56A /* parse_table_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = parse_table_spec.cpp; path = spec/lr/parse_table_spec.cpp; sourceTree = SOURCE_ROOT; }; + 12512092182F307C00C9B56A /* parse_table_builder_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = parse_table_builder_spec.cpp; path = spec/lr/parse_table_builder_spec.cpp; sourceTree = SOURCE_ROOT; }; 1251209A1830145300C9B56A /* rule.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = rule.cpp; sourceTree = ""; }; 1251209E18307DEC00C9B56A /* parse_table.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = parse_table.cpp; sourceTree = ""; }; 1251209F18307DEC00C9B56A /* parse_table.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = parse_table.h; sourceTree = ""; }; 125120A218307FFD00C9B56A /* arithmetic.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = arithmetic.h; path = spec/test_grammars/arithmetic.h; sourceTree = SOURCE_ROOT; }; 125120A3183083BD00C9B56A /* arithmetic.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = arithmetic.cpp; path = spec/test_grammars/arithmetic.cpp; sourceTree = SOURCE_ROOT; }; + 129D242A183EB1EB00FE9F71 /* parse_table_builder.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = parse_table_builder.cpp; sourceTree = ""; }; + 129D242B183EB1EB00FE9F71 /* parse_table_builder.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = parse_table_builder.h; sourceTree = ""; }; 12C344421822F27700B07BE3 /* transition_map.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = transition_map.h; sourceTree = ""; }; 12D1369C18328C5A005F3369 /* item_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = item_spec.cpp; path = spec/lr/item_spec.cpp; sourceTree = SOURCE_ROOT; }; 12D1369E18342088005F3369 /* todo.md */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = todo.md; sourceTree = ""; }; @@ -206,6 +209,8 @@ 12130621182C85D300FCF928 /* item_set.h */, 1251209E18307DEC00C9B56A /* parse_table.cpp */, 1251209F18307DEC00C9B56A /* parse_table.h */, + 129D242A183EB1EB00FE9F71 /* parse_table_builder.cpp */, + 129D242B183EB1EB00FE9F71 /* parse_table_builder.h */, ); path = lr; sourceTree = ""; @@ -214,7 +219,7 @@ isa = PBXGroup; children = ( 1213061D182C857100FCF928 /* item_set_spec.cpp */, - 12512092182F307C00C9B56A /* parse_table_spec.cpp */, + 12512092182F307C00C9B56A /* parse_table_builder_spec.cpp */, 12D1369C18328C5A005F3369 /* item_spec.cpp */, ); name = lr; @@ -537,6 +542,7 @@ buildActionMask = 2147483647; files = ( 12130614182C3A1700FCF928 /* seq.cpp in Sources */, + 129D242C183EB1EB00FE9F71 /* parse_table_builder.cpp in Sources */, 12D136A1183570F5005F3369 /* pattern_spec.cpp in Sources */, 125120A4183083BD00C9B56A /* arithmetic.cpp in Sources */, 1214930F181E200B008E9BDA /* rules_spec.cpp in Sources */, @@ -550,7 +556,7 @@ 125120A018307DEC00C9B56A /* parse_table.cpp in Sources */, 1214930E181E200B008E9BDA /* main.cpp in Sources */, 12F9A651182DD6BC00FAF50C /* grammar.cpp in Sources */, - 12512093182F307C00C9B56A /* parse_table_spec.cpp in Sources */, + 12512093182F307C00C9B56A /* parse_table_builder_spec.cpp in Sources */, 1213061F182C857100FCF928 /* item_set_spec.cpp in Sources */, 12D136A4183678A2005F3369 /* repeat.cpp in Sources */, 12130622182C85D300FCF928 /* item_set.cpp in Sources */, diff --git a/spec/lr/item_set_spec.cpp b/spec/lr/item_set_spec.cpp index aab22014..dcd39e56 100644 --- a/spec/lr/item_set_spec.cpp +++ b/spec/lr/item_set_spec.cpp @@ -9,7 +9,7 @@ static item_set_ptr item_set(const std::initializer_list &items) { return item_set_ptr(new ItemSet(items)); } -Describe_Only(item_sets) { +Describe(item_sets) { Grammar grammar = test_grammars::arithmetic(); It(computes_the_closure_of_an_item_set_under_symbol_expansion) { @@ -53,4 +53,13 @@ Describe_Only(item_sets) { { character('('), item_set({ Item("left_paren", blank(), 1) }) } }), TransitionMap::elements_equal)); } + + It(can_be_hashed) { + ItemSet set1 = ItemSet(Item::at_beginning_of_rule("factor", grammar), grammar); + ItemSet set2 = ItemSet(Item::at_beginning_of_rule("factor", grammar), grammar); + AssertThat(std::hash()(set1), Equals(std::hash()(set2))); + + ItemSet set3 = ItemSet(Item::at_beginning_of_rule("term", grammar), grammar); + AssertThat(std::hash()(set1), !Equals(std::hash()(set3))); + } }; diff --git a/spec/lr/parse_table_builder_spec.cpp b/spec/lr/parse_table_builder_spec.cpp new file mode 100644 index 00000000..44e1362e --- /dev/null +++ b/spec/lr/parse_table_builder_spec.cpp @@ -0,0 +1,44 @@ +#include "spec_helper.h" +#include "../test_grammars/arithmetic.h" +#include "parse_table_builder.h" +#include "parse_table.h" + +using namespace tree_sitter::lr; + +typedef std::unordered_set actions; + +Describe(ParseTableBuilder_test) { + Grammar grammar = test_grammars::arithmetic(); + ParseTable table = ParseTableBuilder::build_table(grammar); + + It(has_the_right_starting_state) { + AssertThat(table.actions_for(0), Equals(unordered_map({ + { "expression", actions({ ParseAction::Shift(1) }) }, + { "term", actions({ ParseAction::Shift(2) }) }, + { "factor", actions({ ParseAction::Shift(5) }) }, + { "variable", actions({ ParseAction::Shift(8) }) }, + { "number", actions({ ParseAction::Shift(8) }) }, + { "left_paren", actions({ ParseAction::Shift(9) }) } + }))); + } + + It(accepts_when_the_start_symbol_is_reduced) { + AssertThat(table.actions_for(1), Equals(unordered_map({ + { ParseTable::END_OF_INPUT, actions({ ParseAction::Accept() }) } + }))); + } + + It(has_the_right_next_states) { + AssertThat(table.actions_for(2), Equals(unordered_map({ + { "plus", actions({ ParseAction::Shift(3) }) }, + }))); + + AssertThat(table.actions_for(3), Equals(unordered_map({ + { "variable", actions({ ParseAction::Shift(8) }) }, + { "factor", actions({ ParseAction::Shift(5) }) }, + { "left_paren", actions({ ParseAction::Shift(9) }) }, + { "number", actions({ ParseAction::Shift(8) }) }, + { "term", actions({ ParseAction::Shift(4) }) }, + }))); + } +}; diff --git a/spec/lr/parse_table_spec.cpp b/spec/lr/parse_table_spec.cpp deleted file mode 100644 index f7749826..00000000 --- a/spec/lr/parse_table_spec.cpp +++ /dev/null @@ -1,10 +0,0 @@ -#include "spec_helper.h" -#include "../test_grammars/arithmetic.h" - -using namespace tree_sitter::lr; - -Describe(build_parse_tables) { - Describe(lexing_tables) { - - }; -}; diff --git a/spec/spec_helper.cpp b/spec/spec_helper.cpp index 0624777b..2020d65d 100644 --- a/spec/spec_helper.cpp +++ b/spec/spec_helper.cpp @@ -4,3 +4,26 @@ EqualsContainerConstraint, rule_tmap_comparator> EqualsTransitionMap(const TransitionMap &expected) { return EqualsContainer(expected, TransitionMap::elements_equal); } + +namespace tree_sitter { + namespace lr { + std::ostream& operator<<(std::ostream &stream, const unordered_map> &map) { + stream << string("{"); + bool started = false; + for (auto pair : map) { + if (started) stream << string(", "); + stream << string("{") << pair.first << string(", ["); + bool started_set = false; + for (ParseAction action : pair.second) { + if (started_set) stream << ", "; + stream << action; + started_set = true; + } + stream << string("]}"); + started = true; + } + stream << string("}"); + return stream; + } + } +} diff --git a/spec/spec_helper.h b/spec/spec_helper.h index e15f544a..fce2e83a 100644 --- a/spec/spec_helper.h +++ b/spec/spec_helper.h @@ -8,6 +8,8 @@ #include "item.h" #include "item_set.h" #include "grammar.h" +#include "parse_table.h" +#include "parse_table_builder.h" using namespace tree_sitter; using namespace std; @@ -17,4 +19,10 @@ using namespace igloo; typedef bool (* rule_tmap_comparator)(const std::pair &, const std::pair &); EqualsContainerConstraint, rule_tmap_comparator> EqualsTransitionMap(const TransitionMap &expected); +namespace tree_sitter { + namespace lr { + std::ostream& operator<<(std::ostream &stream, const unordered_map> &map); + } +} + #endif diff --git a/src/grammar.cpp b/src/grammar.cpp index 40c4ba3d..a168c1fe 100644 --- a/src/grammar.cpp +++ b/src/grammar.cpp @@ -1,5 +1,7 @@ #include "grammar.h" +using namespace std; + namespace tree_sitter { Grammar::Grammar(const rule_map_init_list &rules) : rules(rules), @@ -11,4 +13,13 @@ namespace tree_sitter { rules::rule_ptr(nullptr) : iter->second; } + + vector Grammar::rule_names() const { + vector result; + for (auto pair : rules) { + result.push_back(pair.first); + } + return result; + } + } \ No newline at end of file diff --git a/src/grammar.h b/src/grammar.h index db2bda8b..069523a3 100644 --- a/src/grammar.h +++ b/src/grammar.h @@ -2,20 +2,20 @@ #define __TreeSitter__grammar__ #include +#include #include "rules.h" namespace tree_sitter { class Grammar { typedef std::unordered_map rule_map; typedef std::initializer_list> rule_map_init_list; + const rule_map rules; public: Grammar(const rule_map_init_list &rules); const rules::rule_ptr rule(const std::string &) const; const std::string start_rule_name; - - private: - const rule_map rules; + std::vector rule_names() const; }; } diff --git a/src/lr/item.cpp b/src/lr/item.cpp index 208ee6dc..75b53618 100644 --- a/src/lr/item.cpp +++ b/src/lr/item.cpp @@ -1,6 +1,8 @@ #include "item.h" #include "grammar.h" +#include + using namespace std; namespace tree_sitter { @@ -35,6 +37,10 @@ namespace tree_sitter { return rule_names_eq && rules_eq; } + bool Item::is_done() const { + return *rule == rules::Blank(); + } + std::ostream& operator<<(ostream &stream, const Item &item) { stream << string("(item '") << @@ -47,4 +53,3 @@ namespace tree_sitter { } } - diff --git a/src/lr/item.h b/src/lr/item.h index 7563191c..fc389357 100644 --- a/src/lr/item.h +++ b/src/lr/item.h @@ -17,6 +17,7 @@ namespace tree_sitter { TransitionMap transitions() const; std::vector next_symbols() const; bool operator==(const Item &other) const; + bool is_done() const; const std::string rule_name; const rules::rule_ptr rule; @@ -25,8 +26,20 @@ namespace tree_sitter { typedef std::shared_ptr item_ptr; std::ostream& operator<<(std::ostream &stream, const Item &item); + } } +namespace std { + template<> + struct hash { + size_t operator()(const tree_sitter::lr::Item &item) { + return + hash()(item.rule_name) ^ + hash()(*item.rule) ^ + hash()(item.consumed_sym_count); + } + }; +} #endif diff --git a/src/lr/item_set.h b/src/lr/item_set.h index 89914669..c049f47b 100644 --- a/src/lr/item_set.h +++ b/src/lr/item_set.h @@ -10,6 +10,7 @@ namespace tree_sitter { typedef std::shared_ptr item_set_ptr; class ItemSet { + const std::vector contents; public: ItemSet(const std::vector &items); ItemSet(const std::initializer_list &items); @@ -25,7 +26,6 @@ namespace tree_sitter { TransitionMap char_transitions(const Grammar &grammar) const; bool operator==(const ItemSet &other) const; - const std::vector contents; }; typedef std::shared_ptr item_set_ptr; @@ -33,4 +33,16 @@ namespace tree_sitter { } } +namespace std { + template<> + struct hash { + size_t operator()(const tree_sitter::lr::ItemSet &item_set) const { + size_t result = hash()(item_set.size()); + for (auto item : item_set) + result ^= hash()(item); + return result; + } + }; +} + #endif diff --git a/src/lr/parse_table.cpp b/src/lr/parse_table.cpp index 305a3132..ceb3aba7 100644 --- a/src/lr/parse_table.cpp +++ b/src/lr/parse_table.cpp @@ -1 +1,102 @@ #include "parse_table.h" + +using namespace std; + +namespace tree_sitter { + namespace lr { + // Action + ParseAction::ParseAction() : + type(ParseActionTypeError), + state_index(-1), + symbol_name(""), + child_symbol_count(-1) {}; + + ParseAction::ParseAction(ParseActionType type, size_t state_index, string symbol_name, size_t child_symbol_count) : + type(type), + state_index(state_index), + symbol_name(symbol_name), + child_symbol_count(child_symbol_count) {}; + + ParseAction ParseAction::Error() { + return ParseAction(ParseActionTypeError, -1, "", -1); + } + + ParseAction ParseAction::Accept() { + return ParseAction(ParseActionTypeAccept, -1, "", -1); + } + + ParseAction ParseAction::Shift(size_t state_index) { + return ParseAction(ParseActionTypeShift, state_index, "", -1); + } + + ParseAction ParseAction::Reduce(std::string symbol_name, size_t child_symbol_count) { + return ParseAction(ParseActionTypeReduce, -1, symbol_name, child_symbol_count); + } + + bool ParseAction::operator==(const ParseAction &other) const { + bool types_eq = type == other.type; + bool state_indices_eq = state_index == other.state_index; + bool symbol_ids_eq = symbol_name == other.symbol_name; + bool child_symbol_counts_eq = child_symbol_count == other.child_symbol_count; + return types_eq && state_indices_eq && symbol_ids_eq && child_symbol_counts_eq; + } + + ostream& operator<<(ostream &stream, const ParseAction &action) { + switch (action.type) { + case ParseActionTypeAccept: + return stream << string("accept"); + case ParseActionTypeAdvance: + return stream << (string("(advance ") + to_string(action.state_index) + ")"); + case ParseActionTypeShift: + return stream << (string("(shift ") + to_string(action.state_index) + ")"); + case ParseActionTypeReduce: + return stream << (string("(reduce ") + action.symbol_name + ")"); + case ParseActionTypeError: + return stream << string("error"); + } + } + + // State + ParseState::ParseState() : actions(unordered_map>()) {} + + // Table + unordered_map get_symbol_id_map(const vector &names) { + unordered_map result; + size_t i = 0; + for (string name : names) { + result[name] = i; + i++; + } + return result; + } + + ParseTable::ParseTable(vector symbol_names) : + symbol_ids(get_symbol_id_map(symbol_names)), + symbol_names(symbol_names), + states(vector()) {}; + + ParseState ParseTable::get_state(size_t index) const { + return states[index]; + } + + size_t ParseTable::add_state() { + states.push_back(ParseState()); + return states.size() - 1; + } + + void ParseTable::add_action(size_t state_index, string sym_name, ParseAction action) { + states[state_index].actions[sym_name].insert(action); + } + + ParseState ParseTable::starting_state() const { + return states[0]; + } + + unordered_map> ParseTable::actions_for(size_t state_index) const { + return states[state_index].actions; + } + + const string ParseTable::START = "__START__"; + const string ParseTable::END_OF_INPUT = "__END__"; + } +} \ No newline at end of file diff --git a/src/lr/parse_table.h b/src/lr/parse_table.h index f95e19cd..4677b19e 100644 --- a/src/lr/parse_table.h +++ b/src/lr/parse_table.h @@ -1,13 +1,78 @@ #ifndef __TreeSitter__parse_table__ #define __TreeSitter__parse_table__ +#include +#include +#include +#include "rule.h" + namespace tree_sitter { namespace lr { - class ParseTable { + typedef enum { + ParseActionTypeAccept, + ParseActionTypeAdvance, + ParseActionTypeError, + ParseActionTypeShift, + ParseActionTypeReduce, + } ParseActionType; + + class ParseAction { public: - ParseTable(); + ParseAction(); + ParseAction(ParseActionType type, size_t state_index, std::string symbol_name, size_t child_symbol_count); + bool operator==(const ParseAction &action) const; + static ParseAction Accept(); + static ParseAction Advance(size_t state_index); + static ParseAction Error(); + static ParseAction Shift(size_t state_index); + static ParseAction Reduce(std::string symbol_name, size_t child_symbol_count); + + ParseActionType type; + size_t child_symbol_count; + std::string symbol_name; + size_t state_index; + }; + + std::ostream& operator<<(std::ostream &stream, const ParseAction &item); + + class ParseState { + public: + ParseState(); + std::unordered_map> actions; + }; + + class ParseTable { + std::vector states; + public: + const std::unordered_map symbol_ids; + const std::vector symbol_names; + + ParseTable(std::vector rule_names); + + ParseState starting_state() const; + ParseState get_state(size_t index) const; + ParseAction action_for(size_t state_index, std::string symbol_name) const; + std::unordered_map> actions_for(size_t state_index) const; + size_t add_state(); + void add_action(size_t state_index, std::string symbol_name, ParseAction action); + + static const std::string START; + static const std::string END_OF_INPUT; }; } } +namespace std { + template<> + struct hash { + size_t operator()(const tree_sitter::lr::ParseAction &action) const { + return ( + hash()(action.type) ^ + hash()(action.symbol_name) ^ + hash()(action.state_index) ^ + hash()(action.child_symbol_count)); + } + }; +} + #endif diff --git a/src/lr/parse_table_builder.cpp b/src/lr/parse_table_builder.cpp new file mode 100644 index 00000000..d45726e2 --- /dev/null +++ b/src/lr/parse_table_builder.cpp @@ -0,0 +1,59 @@ +#include "parse_table_builder.h" +#include "item_set.h" +#include "rules.h" + +using namespace std; + +namespace tree_sitter { + namespace lr { + static int NOT_FOUND = -1; + + ParseTable ParseTableBuilder::build_table(const tree_sitter::Grammar &grammar) { + auto builder = ParseTableBuilder(grammar); + builder.build(); + return builder.table; + } + + ParseTableBuilder::ParseTableBuilder(const Grammar &grammar) : + grammar(grammar), + table(ParseTable(grammar.rule_names())), + state_indices(unordered_map()) {}; + + void ParseTableBuilder::build() { + auto item = Item(ParseTable::START, rules::sym(grammar.start_rule_name), 0); + auto item_set = std::make_shared(item, grammar); + add_item_set(item_set); + } + + size_t ParseTableBuilder::add_item_set(const shared_ptr item_set) { + auto state_index = state_index_for_item_set(*item_set); + if (state_index == NOT_FOUND) { + state_index = table.add_state(); + state_indices[std::hash()(*item_set)] = state_index; + + for (auto transition : item_set->sym_transitions(grammar)) { + rules::sym_ptr symbol = static_pointer_cast(transition.first); + size_t new_state_index = add_item_set(transition.second); + table.add_action(state_index, symbol->name, ParseAction::Shift(new_state_index)); + } + + for (Item item : *item_set) { + if (item.is_done()) { + if (item.rule_name == ParseTable::START) { + table.add_action(state_index, ParseTable::END_OF_INPUT, ParseAction::Accept()); + } else { + for (string rule_name : table.symbol_names) + table.add_action(state_index, rule_name, ParseAction::Reduce(item.rule_name, item.consumed_sym_count)); + } + } + } + } + return state_index; + } + + long ParseTableBuilder::state_index_for_item_set(const ItemSet &item_set) const { + auto entry = state_indices.find(std::hash()(item_set)); + return (entry == state_indices.end()) ? NOT_FOUND : entry->second; + } + } +} \ No newline at end of file diff --git a/src/lr/parse_table_builder.h b/src/lr/parse_table_builder.h new file mode 100644 index 00000000..a2d24ca9 --- /dev/null +++ b/src/lr/parse_table_builder.h @@ -0,0 +1,34 @@ +#ifndef __TreeSitter__parse_table_builder__ +#define __TreeSitter__parse_table_builder__ + +#include +#include "grammar.h" +#include "item_set.h" +#include "parse_table.h" + +namespace tree_sitter { + namespace lr { + class ItemSet; + + struct ItemSetPointerHasher { + std::size_t operator()(const std::shared_ptr &item_set) const { + return std::hash()(*item_set); + } + }; + + class ParseTableBuilder { + const Grammar grammar; + ParseTable table; + std::unordered_map state_indices; + + size_t add_item_set(const std::shared_ptr item_set); + long state_index_for_item_set(const ItemSet &item_set) const; + public: + ParseTableBuilder(const Grammar &grammar); + static ParseTable build_table(const Grammar &grammar); + void build(); + }; + } +} + +#endif diff --git a/src/rules/rule.cpp b/src/rules/rule.cpp index 5599bef5..90361113 100644 --- a/src/rules/rule.cpp +++ b/src/rules/rule.cpp @@ -1,9 +1,10 @@ #include "rule.h" +#include namespace tree_sitter { namespace rules { - bool Rule::operator==(const rule_ptr other) const { - return true; + size_t Rule::hash_code() const { + return std::hash()(to_string()); } std::ostream& operator<<(std::ostream& stream, const Rule &rule) diff --git a/src/rules/rule.h b/src/rules/rule.h index 24201870..cbb386b5 100644 --- a/src/rules/rule.h +++ b/src/rules/rule.h @@ -15,7 +15,7 @@ namespace tree_sitter { virtual TransitionMap transitions() const = 0; virtual bool operator==(const Rule& other) const = 0; virtual std::string to_string() const = 0; - bool operator==(const rule_ptr other) const; + virtual size_t hash_code() const; }; @@ -24,5 +24,13 @@ namespace tree_sitter { } } +namespace std { + template<> + struct hash { + size_t operator()(const tree_sitter::rules::Rule &rule) { + return rule.hash_code(); + } + }; +} #endif \ No newline at end of file diff --git a/src/transition_map.h b/src/transition_map.h index 594a3291..93d94bfa 100644 --- a/src/transition_map.h +++ b/src/transition_map.h @@ -15,7 +15,6 @@ namespace tree_sitter { typedef std::vector contents_type; public: - static bool elements_equal(const pair_type &left, const pair_type &right) { return (*left.first == *right.first) && (*left.second == *right.second); } @@ -72,8 +71,10 @@ namespace tree_sitter { template TransitionMap map(std::function(mapped_ptr)> map_fn) { TransitionMap result; - for (pair_type pair : *this) - result.add(pair.first, map_fn(pair.second)); + for (pair_type pair : *this) { + auto new_value = map_fn(pair.second); + result.add(pair.first, new_value); + } return result; } diff --git a/todo.md b/todo.md index 5fdd224c..7ca5a701 100644 --- a/todo.md +++ b/todo.md @@ -19,7 +19,7 @@ Then generate a C function for a ParseTable For a Parser, the ParseActions can be any of: - Accept(symbol) - - Shift(symbol) + - Shift(state_index) - Reduce(symbol, number of child symbols) # normalize grammars