From d015d57a530e60c7be911cc0974da59d22eae530 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 13 Jan 2014 18:47:57 -0800 Subject: [PATCH] Remove ItemSet class; just use a set --- spec/fixtures/parsers/arithmetic.c | 118 +++++++++--------- src/compiler/build_tables/item.cpp | 4 + src/compiler/build_tables/item.h | 17 ++- src/compiler/build_tables/item_set.cpp | 34 ----- src/compiler/build_tables/item_set.h | 42 ------- .../build_tables/item_set_closure.cpp | 18 +-- src/compiler/build_tables/item_set_closure.h | 4 +- .../build_tables/item_set_transitions.h | 2 +- src/compiler/build_tables/next_symbols.h | 2 +- src/compiler/build_tables/perform.cpp | 8 +- tree_sitter.xcodeproj/project.pbxproj | 6 - 11 files changed, 93 insertions(+), 162 deletions(-) delete mode 100644 src/compiler/build_tables/item_set.cpp delete mode 100644 src/compiler/build_tables/item_set.h diff --git a/spec/fixtures/parsers/arithmetic.c b/spec/fixtures/parsers/arithmetic.c index e299c64e..c194458f 100644 --- a/spec/fixtures/parsers/arithmetic.c +++ b/spec/fixtures/parsers/arithmetic.c @@ -57,17 +57,17 @@ static void ts_lex(TSParser *parser) { case 6: ACCEPT_TOKEN(ts_symbol_4); case 7: - if (LOOKAHEAD_CHAR() == '*') - ADVANCE(8); - LEX_ERROR(1, EXPECT({"'*'"})); - case 8: - ACCEPT_TOKEN(ts_symbol_3); - case 9: if (LOOKAHEAD_CHAR() == ')') - ADVANCE(10); + ADVANCE(8); LEX_ERROR(1, EXPECT({"')'"})); - case 10: + case 8: ACCEPT_TOKEN(ts_symbol_2); + case 9: + if (LOOKAHEAD_CHAR() == '*') + ADVANCE(10); + LEX_ERROR(1, EXPECT({"'*'"})); + case 10: + ACCEPT_TOKEN(ts_symbol_3); default: LEX_PANIC(); } @@ -80,20 +80,20 @@ static TSParseResult ts_parse(const char *input) { case 0: SET_LEX_STATE(0); switch (LOOKAHEAD_SYM()) { - case ts_symbol_variable: - SHIFT(8); case ts_symbol_factor: - SHIFT(5); - case ts_symbol_number: - SHIFT(8); - case ts_symbol_1: SHIFT(9); + case ts_symbol_variable: + SHIFT(5); case ts_symbol_term: SHIFT(2); + case ts_symbol_1: + SHIFT(6); + case ts_symbol_number: + SHIFT(5); case ts_symbol_expression: SHIFT(1); default: - PARSE_ERROR(6, EXPECT({"expression", "term", "1", "number", "factor", "variable"})); + PARSE_ERROR(6, EXPECT({"expression", "number", "1", "term", "variable", "factor"})); } case 1: SET_LEX_STATE(4); @@ -114,18 +114,18 @@ static TSParseResult ts_parse(const char *input) { case 3: SET_LEX_STATE(0); switch (LOOKAHEAD_SYM()) { - case ts_symbol_variable: - SHIFT(8); case ts_symbol_factor: + SHIFT(9); + case ts_symbol_variable: SHIFT(5); case ts_symbol_1: - SHIFT(9); + SHIFT(6); case ts_symbol_number: - SHIFT(8); + SHIFT(5); case ts_symbol_term: SHIFT(4); default: - PARSE_ERROR(5, EXPECT({"term", "number", "1", "factor", "variable"})); + PARSE_ERROR(5, EXPECT({"term", "number", "1", "variable", "factor"})); } case 4: SET_LEX_STATE(4); @@ -134,70 +134,64 @@ static TSParseResult ts_parse(const char *input) { REDUCE(ts_symbol_expression, 3); } case 5: - SET_LEX_STATE(7); - switch (LOOKAHEAD_SYM()) { - case ts_symbol_3: - SHIFT(6); - default: - REDUCE(ts_symbol_term, 1); - } - case 6: - SET_LEX_STATE(0); - switch (LOOKAHEAD_SYM()) { - case ts_symbol_1: - SHIFT(9); - case ts_symbol_number: - SHIFT(8); - case ts_symbol_variable: - SHIFT(8); - case ts_symbol_factor: - SHIFT(7); - default: - PARSE_ERROR(4, EXPECT({"factor", "variable", "number", "1"})); - } - case 7: - SET_LEX_STATE(4); - switch (LOOKAHEAD_SYM()) { - default: - REDUCE(ts_symbol_term, 3); - } - case 8: SET_LEX_STATE(4); switch (LOOKAHEAD_SYM()) { default: REDUCE(ts_symbol_factor, 1); } - case 9: + case 6: SET_LEX_STATE(0); switch (LOOKAHEAD_SYM()) { - case ts_symbol_variable: - SHIFT(8); case ts_symbol_factor: - SHIFT(5); - case ts_symbol_number: - SHIFT(8); - case ts_symbol_1: SHIFT(9); + case ts_symbol_expression: + SHIFT(7); case ts_symbol_term: SHIFT(2); - case ts_symbol_expression: - SHIFT(10); default: - PARSE_ERROR(6, EXPECT({"expression", "term", "1", "number", "factor", "variable"})); + PARSE_ERROR(3, EXPECT({"term", "expression", "factor"})); } - case 10: - SET_LEX_STATE(9); + case 7: + SET_LEX_STATE(7); switch (LOOKAHEAD_SYM()) { case ts_symbol_2: - SHIFT(11); + SHIFT(8); default: PARSE_ERROR(1, EXPECT({"2"})); } + case 8: + SET_LEX_STATE(4); + switch (LOOKAHEAD_SYM()) { + default: + REDUCE(ts_symbol_factor, 3); + } + case 9: + SET_LEX_STATE(9); + switch (LOOKAHEAD_SYM()) { + case ts_symbol_3: + SHIFT(10); + default: + REDUCE(ts_symbol_term, 1); + } + case 10: + SET_LEX_STATE(0); + switch (LOOKAHEAD_SYM()) { + case ts_symbol_factor: + SHIFT(11); + case ts_symbol_1: + SHIFT(6); + case ts_symbol_number: + SHIFT(5); + case ts_symbol_variable: + SHIFT(5); + default: + PARSE_ERROR(4, EXPECT({"variable", "number", "1", "factor"})); + } case 11: SET_LEX_STATE(4); switch (LOOKAHEAD_SYM()) { default: - REDUCE(ts_symbol_factor, 3); + REDUCE(ts_symbol_term, 3); } default: PARSE_PANIC(); diff --git a/src/compiler/build_tables/item.cpp b/src/compiler/build_tables/item.cpp index 8ce8013f..e13a8651 100644 --- a/src/compiler/build_tables/item.cpp +++ b/src/compiler/build_tables/item.cpp @@ -33,6 +33,10 @@ namespace tree_sitter { return rule_names_eq && rules_eq && consumed_sym_counts_eq; } + bool Item::operator<(const Item &other) const { + return rule_name < other.rule_name; + } + bool Item::is_done() const { for (auto pair : rule_transitions(rule)) if (*pair.first == rules::Blank()) diff --git a/src/compiler/build_tables/item.h b/src/compiler/build_tables/item.h index 8a300850..6f45f293 100644 --- a/src/compiler/build_tables/item.h +++ b/src/compiler/build_tables/item.h @@ -4,6 +4,7 @@ #include #include "rule.h" #include +#include namespace tree_sitter { class Grammar; @@ -18,6 +19,7 @@ namespace tree_sitter { static Item at_beginning_of_token(const std::string &rule_name, const Grammar &grammar); bool operator==(const Item &other) const; + bool operator<(const Item &other) const; bool is_done() const; int next_sym_count() const; @@ -26,6 +28,8 @@ namespace tree_sitter { const int consumed_sym_count; }; + typedef std::set ItemSet; + std::ostream& operator<<(std::ostream &stream, const Item &item); } } @@ -33,13 +37,24 @@ namespace tree_sitter { namespace std { template<> struct hash { - size_t operator()(const tree_sitter::build_tables::Item &item) { + size_t operator()(const tree_sitter::build_tables::Item &item) const { return hash()(item.rule_name) ^ hash()(*item.rule) ^ hash()(item.consumed_sym_count); } }; + + template<> + struct hash { + size_t operator()(const tree_sitter::build_tables::ItemSet &item_set) const { + size_t result = hash()(item_set.size()); + for (auto item : item_set) + result ^= hash()(item); + return result; + } + }; } + #endif diff --git a/src/compiler/build_tables/item_set.cpp b/src/compiler/build_tables/item_set.cpp deleted file mode 100644 index 91db6ceb..00000000 --- a/src/compiler/build_tables/item_set.cpp +++ /dev/null @@ -1,34 +0,0 @@ -#include "item_set.h" - -using std::vector; -using std::ostream; -using std::string; - -namespace tree_sitter { - namespace build_tables { - ItemSet::ItemSet(const vector &items) : contents(items) {} - - bool ItemSet::operator==(const tree_sitter::build_tables::ItemSet &other) const { - return contents == other.contents; - } - - ItemSet::const_iterator ItemSet::begin() const { - return contents.begin(); - } - - ItemSet::const_iterator ItemSet::end() const { - return contents.end(); - } - - size_t ItemSet::size() const { - return contents.size(); - } - - ostream& operator<<(ostream &stream, const ItemSet &item_set) { - stream << string("#"); - } - } -} diff --git a/src/compiler/build_tables/item_set.h b/src/compiler/build_tables/item_set.h deleted file mode 100644 index 3f446c3f..00000000 --- a/src/compiler/build_tables/item_set.h +++ /dev/null @@ -1,42 +0,0 @@ -#ifndef __TreeSitter__item_set__ -#define __TreeSitter__item_set__ - -#include "item.h" -#include "rule.h" -#include -#include - -namespace tree_sitter { - namespace build_tables { - class ItemSet; - - class ItemSet { - const std::vector contents; - public: - ItemSet(const std::vector &items); - bool operator==(const ItemSet &other) const; - - typedef Item value_type; - typedef std::vector::const_iterator const_iterator; - const_iterator begin() const; - const_iterator end() const; - size_t size() const; - }; - - std::ostream& operator<<(std::ostream &stream, const ItemSet &item_set); - } -} - -namespace std { - template<> - struct hash { - size_t operator()(const tree_sitter::build_tables::ItemSet &item_set) const { - size_t result = hash()(item_set.size()); - for (auto item : item_set) - result ^= hash()(item); - return result; - } - }; -} - -#endif diff --git a/src/compiler/build_tables/item_set_closure.cpp b/src/compiler/build_tables/item_set_closure.cpp index c05ed6b9..0ccb1900 100644 --- a/src/compiler/build_tables/item_set_closure.cpp +++ b/src/compiler/build_tables/item_set_closure.cpp @@ -8,25 +8,25 @@ using std::vector; namespace tree_sitter { namespace build_tables { - static bool vector_contains(vector items, build_tables::Item item) { + static bool contains(ItemSet items, Item item) { return (std::find(items.begin(), items.end(), item) != items.end()); } - static void add_item(vector &vector, const Item &item, const Grammar &grammar) { - if (!vector_contains(vector, item)) { - vector.push_back(item); + static void add_item(ItemSet &item_set, const Item &item, const Grammar &grammar) { + if (!contains(item_set, item)) { + item_set.insert(item); for (rules::Symbol rule : next_non_terminals(item, grammar)) { Item next_item = Item::at_beginning_of_rule(rule.name, grammar); - add_item(vector, next_item, grammar); + add_item(item_set, next_item, grammar); } } } - ItemSet item_set_closure(const ItemSet &item_set, const Grammar &grammar) { - vector items; + const ItemSet item_set_closure(const ItemSet &item_set, const Grammar &grammar) { + ItemSet result; for (Item item : item_set) - add_item(items, item, grammar); - return ItemSet(items); + add_item(result, item, grammar); + return result; } } } \ No newline at end of file diff --git a/src/compiler/build_tables/item_set_closure.h b/src/compiler/build_tables/item_set_closure.h index 350e06f4..9c03840c 100644 --- a/src/compiler/build_tables/item_set_closure.h +++ b/src/compiler/build_tables/item_set_closure.h @@ -1,13 +1,13 @@ #ifndef __tree_sitter__close_item_set__ #define __tree_sitter__close_item_set__ -#include "item_set.h" +#include "item.h" namespace tree_sitter { class Grammar; namespace build_tables { - ItemSet item_set_closure(const ItemSet &item_set, const Grammar &grammar); + const ItemSet item_set_closure(const ItemSet &item_set, const Grammar &grammar); } } diff --git a/src/compiler/build_tables/item_set_transitions.h b/src/compiler/build_tables/item_set_transitions.h index f9fcbadc..e23404c5 100644 --- a/src/compiler/build_tables/item_set_transitions.h +++ b/src/compiler/build_tables/item_set_transitions.h @@ -4,7 +4,7 @@ #include "character.h" #include "symbol.h" #include "transition_map.h" -#include "item_set.h" +#include "item.h" namespace tree_sitter { namespace build_tables { diff --git a/src/compiler/build_tables/next_symbols.h b/src/compiler/build_tables/next_symbols.h index 6818b525..dbf51922 100644 --- a/src/compiler/build_tables/next_symbols.h +++ b/src/compiler/build_tables/next_symbols.h @@ -1,7 +1,7 @@ #ifndef __tree_sitter__first_terminal__ #define __tree_sitter__first_terminal__ -#include "item_set.h" +#include "item.h" #include "symbol.h" #include diff --git a/src/compiler/build_tables/perform.cpp b/src/compiler/build_tables/perform.cpp index 00e8f4f1..32815c87 100644 --- a/src/compiler/build_tables/perform.cpp +++ b/src/compiler/build_tables/perform.cpp @@ -1,5 +1,5 @@ #include "./perform.h" -#include "item_set.h" +#include "item.h" #include "item_set_closure.h" #include "next_symbols.h" #include "item_set_transitions.h" @@ -83,10 +83,10 @@ namespace tree_sitter { } ItemSet lex_item_set_for_parse_item_set(const ItemSet &parse_item_set) { - vector items; + ItemSet result; for (rules::Symbol symbol : next_terminals(parse_item_set, grammar)) - items.push_back(Item::at_beginning_of_token(symbol.name, lex_grammar)); - return ItemSet(items); + result.insert(Item::at_beginning_of_token(symbol.name, lex_grammar)); + return result; } size_t add_parse_state(const ItemSet &item_set) { diff --git a/tree_sitter.xcodeproj/project.pbxproj b/tree_sitter.xcodeproj/project.pbxproj index 56187eb1..f901d9e3 100644 --- a/tree_sitter.xcodeproj/project.pbxproj +++ b/tree_sitter.xcodeproj/project.pbxproj @@ -27,7 +27,6 @@ 12EDCF9A1881FCD9005A7A07 /* search_for_symbols.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12EDCF931881FCCA005A7A07 /* search_for_symbols.cpp */; }; 12EDCFAF18820387005A7A07 /* parse_table.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12EDCF9D18820116005A7A07 /* parse_table.cpp */; }; 12EDCFB018820392005A7A07 /* item.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12EDCFA218820137005A7A07 /* item.cpp */; }; - 12EDCFB118820395005A7A07 /* item_set.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12EDCFA018820137005A7A07 /* item_set.cpp */; }; 12EDCFB21882039A005A7A07 /* perform.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12EDCFA418820137005A7A07 /* perform.cpp */; }; 12EDCFB31882039A005A7A07 /* rule_transitions.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12EDCFA618820137005A7A07 /* rule_transitions.cpp */; }; 12EDCFB418820519005A7A07 /* compile.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12EDCFAC18820181005A7A07 /* compile.cpp */; }; @@ -117,8 +116,6 @@ 12EDCF9C18820116005A7A07 /* lex_table.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = lex_table.h; sourceTree = ""; }; 12EDCF9D18820116005A7A07 /* parse_table.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = parse_table.cpp; sourceTree = ""; }; 12EDCF9E18820116005A7A07 /* parse_table.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = parse_table.h; sourceTree = ""; }; - 12EDCFA018820137005A7A07 /* item_set.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = item_set.cpp; path = src/compiler/build_tables/item_set.cpp; sourceTree = SOURCE_ROOT; }; - 12EDCFA118820137005A7A07 /* item_set.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = item_set.h; path = src/compiler/build_tables/item_set.h; sourceTree = SOURCE_ROOT; }; 12EDCFA218820137005A7A07 /* item.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = item.cpp; path = src/compiler/build_tables/item.cpp; sourceTree = SOURCE_ROOT; }; 12EDCFA318820137005A7A07 /* item.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = item.h; path = src/compiler/build_tables/item.h; sourceTree = SOURCE_ROOT; }; 12EDCFA418820137005A7A07 /* perform.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = perform.cpp; path = src/compiler/build_tables/perform.cpp; sourceTree = SOURCE_ROOT; }; @@ -209,8 +206,6 @@ children = ( 12EDCFA218820137005A7A07 /* item.cpp */, 12EDCFA318820137005A7A07 /* item.h */, - 12EDCFA018820137005A7A07 /* item_set.cpp */, - 12EDCFA118820137005A7A07 /* item_set.h */, 12EDCFBE18820880005A7A07 /* item_set_closure.cpp */, 12EDCFBF18820880005A7A07 /* item_set_closure.h */, 12EDCFC118820A70005A7A07 /* item_set_transitions.cpp */, @@ -502,7 +497,6 @@ 12EDCFB418820519005A7A07 /* compile.cpp in Sources */, 12BC470718830BC5005AC502 /* next_symbols_spec.cpp in Sources */, 1213060B182C389100FCF928 /* symbol.cpp in Sources */, - 12EDCFB118820395005A7A07 /* item_set.cpp in Sources */, 1251209B1830145300C9B56A /* rule.cpp in Sources */, 27A343CA69E17E0F9EBEDF1C /* pattern.cpp in Sources */, );