Remove ordering of parse and lex items

Using an unordered_map and unordered_set to store these speeds up grammar compilation significantly. The ordering method on items was using the rules' .to_string method to compute the order, which was not a meaningful comparison, and was probably slow.
2014-03-26 13:13:20 -07:00 · 2014-03-26 13:13:20 -07:00 · 05e16a8c46
commit 05e16a8c46
parent 4454925b5a
5 changed files with 700 additions and 657 deletions
--- a/examples/parsers/javascript.c
+++ b/examples/parsers/javascript.c
--- a/src/compiler/build_tables/build_tables.cc
+++ b/src/compiler/build_tables/build_tables.cc
@ -1,7 +1,7 @@
 #include "compiler/build_tables/build_tables.h"
 #include <string>
 #include <utility>
-#include <map>
+#include <unordered_map>
 #include "compiler/prepared_grammar.h"
 #include "compiler/rules/built_in_symbols.h"
 #include "compiler/build_tables/item.h"
@ -12,7 +12,7 @@
 namespace tree_sitter {
    using std::pair;
    using std::string;
-    using std::map;
+    using std::unordered_map;
    using std::make_shared;
    using rules::Symbol;
    using rules::CharacterSet;
@ -23,8 +23,8 @@ namespace tree_sitter {
        class TableBuilder {
            const PreparedGrammar grammar;
            const PreparedGrammar lex_grammar;
-            map<const ParseItemSet, ParseStateId> parse_state_ids;
-            map<const LexItemSet, LexStateId> lex_state_ids;
+            unordered_map<const ParseItemSet, ParseStateId> parse_state_ids;
+            unordered_map<const LexItemSet, LexStateId> lex_state_ids;
            ParseTable parse_table;
            LexTable lex_table;

--- a/src/compiler/build_tables/item.cc
+++ b/src/compiler/build_tables/item.cc
@ -41,24 +41,6 @@ namespace tree_sitter {
            string(">");
        }

-        bool LexItem::operator<(const LexItem &other) const {
-            if (lhs < other.lhs) return true;
-            if (other.lhs < lhs) return false;
-            if (rule->to_string() < other.rule->to_string()) return true;
-            return false;
-        }
-
-        bool ParseItem::operator<(const ParseItem &other) const {
-            if (lhs < other.lhs) return true;
-            if (other.lhs < lhs) return false;
-            if (rule->to_string() < other.rule->to_string()) return true;
-            if (rule->to_string() > other.rule->to_string()) return false;
-            if (consumed_symbols < other.consumed_symbols) return true;
-            if (consumed_symbols > other.consumed_symbols) return false;
-            if (lookahead_sym < other.lookahead_sym) return true;
-            return false;
-        }
-
        LexItem::LexItem(const Symbol &lhs, const rule_ptr rule) : Item(lhs, rule) {}

        bool LexItem::operator==(const LexItem &other) const {
--- a/src/compiler/build_tables/item.h
+++ b/src/compiler/build_tables/item.h
@ -1,7 +1,7 @@
 #ifndef COMPILER_BUILD_TABLES_ITEM_H_
 #define COMPILER_BUILD_TABLES_ITEM_H_

-#include <set>
+#include <unordered_set>
 #include <string>
 #include <vector>
 #include "compiler/rules/symbol.h"
@ -22,7 +22,6 @@ namespace tree_sitter {
        class LexItem : public Item {
        public:
            LexItem(const rules::Symbol &lhs, const rules::rule_ptr rule);
-            bool operator<(const LexItem &other) const;
            bool operator==(const LexItem &other) const;
        };

@ -32,15 +31,14 @@ namespace tree_sitter {
                      const rules::rule_ptr rule,
                      const std::vector<bool> &consumed_symbols,
                      const rules::Symbol &lookahead_sym);
-            bool operator<(const ParseItem &other) const;
            bool operator==(const ParseItem &other) const;

            const std::vector<bool> consumed_symbols;
            const rules::Symbol lookahead_sym;
        };

-        typedef std::set<ParseItem> ParseItemSet;
-        typedef std::set<LexItem> LexItemSet;
+        typedef std::unordered_set<ParseItem> ParseItemSet;
+        typedef std::unordered_set<LexItem> LexItemSet;

        std::ostream& operator<<(std::ostream &stream, const LexItem &item);
        std::ostream& operator<<(std::ostream &stream, const ParseItem &item);
@ -69,8 +67,8 @@ namespace std {
    };

    template<typename T>
-    struct hash<const set<T>> {
-        size_t operator()(const set<T> &set) const {
+    struct hash<const unordered_set<T>> {
+        size_t operator()(const unordered_set<T> &set) const {
            size_t result = hash<size_t>()(set.size());
            for (auto item : set)
                result ^= hash<T>()(item);
--- a/src/compiler/build_tables/item_set_transitions.cc
+++ b/src/compiler/build_tables/item_set_transitions.cc
@ -1,12 +1,12 @@
 #include "compiler/build_tables/item_set_transitions.h"
-#include <set>
+#include <unordered_set>
 #include "compiler/build_tables/item_set_closure.h"
 #include "compiler/build_tables/rule_transitions.h"
 #include "compiler/build_tables/merge_transitions.h"

 namespace tree_sitter {
    using std::map;
-    using std::set;
+    using std::unordered_set;
    using rules::CharacterSet;
    using rules::Symbol;

@ -35,8 +35,8 @@ namespace tree_sitter {
        }

        template<typename T>
-        static set<T> merge_sets(const set<T> &left, const set<T> &right) {
-            set<T> result = left;
+        static unordered_set<T> merge_sets(const unordered_set<T> &left, const unordered_set<T> &right) {
+            unordered_set<T> result = left;
            result.insert(right.begin(), right.end());
            return result;
        }