Remove ordering of parse and lex items

Using an unordered_map and unordered_set to store these
speeds up grammar compilation significantly. The ordering
method on items was using the rules' .to_string method to
compute the order, which was not a meaningful comparison,
and was probably slow.
This commit is contained in:
Max Brunsfeld 2014-03-26 13:13:20 -07:00
parent 4454925b5a
commit 05e16a8c46
5 changed files with 700 additions and 657 deletions

File diff suppressed because it is too large Load diff

View file

@ -1,7 +1,7 @@
#include "compiler/build_tables/build_tables.h"
#include <string>
#include <utility>
#include <map>
#include <unordered_map>
#include "compiler/prepared_grammar.h"
#include "compiler/rules/built_in_symbols.h"
#include "compiler/build_tables/item.h"
@ -12,7 +12,7 @@
namespace tree_sitter {
using std::pair;
using std::string;
using std::map;
using std::unordered_map;
using std::make_shared;
using rules::Symbol;
using rules::CharacterSet;
@ -23,8 +23,8 @@ namespace tree_sitter {
class TableBuilder {
const PreparedGrammar grammar;
const PreparedGrammar lex_grammar;
map<const ParseItemSet, ParseStateId> parse_state_ids;
map<const LexItemSet, LexStateId> lex_state_ids;
unordered_map<const ParseItemSet, ParseStateId> parse_state_ids;
unordered_map<const LexItemSet, LexStateId> lex_state_ids;
ParseTable parse_table;
LexTable lex_table;

View file

@ -41,24 +41,6 @@ namespace tree_sitter {
string(">");
}
bool LexItem::operator<(const LexItem &other) const {
if (lhs < other.lhs) return true;
if (other.lhs < lhs) return false;
if (rule->to_string() < other.rule->to_string()) return true;
return false;
}
bool ParseItem::operator<(const ParseItem &other) const {
if (lhs < other.lhs) return true;
if (other.lhs < lhs) return false;
if (rule->to_string() < other.rule->to_string()) return true;
if (rule->to_string() > other.rule->to_string()) return false;
if (consumed_symbols < other.consumed_symbols) return true;
if (consumed_symbols > other.consumed_symbols) return false;
if (lookahead_sym < other.lookahead_sym) return true;
return false;
}
LexItem::LexItem(const Symbol &lhs, const rule_ptr rule) : Item(lhs, rule) {}
bool LexItem::operator==(const LexItem &other) const {

View file

@ -1,7 +1,7 @@
#ifndef COMPILER_BUILD_TABLES_ITEM_H_
#define COMPILER_BUILD_TABLES_ITEM_H_
#include <set>
#include <unordered_set>
#include <string>
#include <vector>
#include "compiler/rules/symbol.h"
@ -22,7 +22,6 @@ namespace tree_sitter {
class LexItem : public Item {
public:
LexItem(const rules::Symbol &lhs, const rules::rule_ptr rule);
bool operator<(const LexItem &other) const;
bool operator==(const LexItem &other) const;
};
@ -32,15 +31,14 @@ namespace tree_sitter {
const rules::rule_ptr rule,
const std::vector<bool> &consumed_symbols,
const rules::Symbol &lookahead_sym);
bool operator<(const ParseItem &other) const;
bool operator==(const ParseItem &other) const;
const std::vector<bool> consumed_symbols;
const rules::Symbol lookahead_sym;
};
typedef std::set<ParseItem> ParseItemSet;
typedef std::set<LexItem> LexItemSet;
typedef std::unordered_set<ParseItem> ParseItemSet;
typedef std::unordered_set<LexItem> LexItemSet;
std::ostream& operator<<(std::ostream &stream, const LexItem &item);
std::ostream& operator<<(std::ostream &stream, const ParseItem &item);
@ -69,8 +67,8 @@ namespace std {
};
template<typename T>
struct hash<const set<T>> {
size_t operator()(const set<T> &set) const {
struct hash<const unordered_set<T>> {
size_t operator()(const unordered_set<T> &set) const {
size_t result = hash<size_t>()(set.size());
for (auto item : set)
result ^= hash<T>()(item);

View file

@ -1,12 +1,12 @@
#include "compiler/build_tables/item_set_transitions.h"
#include <set>
#include <unordered_set>
#include "compiler/build_tables/item_set_closure.h"
#include "compiler/build_tables/rule_transitions.h"
#include "compiler/build_tables/merge_transitions.h"
namespace tree_sitter {
using std::map;
using std::set;
using std::unordered_set;
using rules::CharacterSet;
using rules::Symbol;
@ -35,8 +35,8 @@ namespace tree_sitter {
}
template<typename T>
static set<T> merge_sets(const set<T> &left, const set<T> &right) {
set<T> result = left;
static unordered_set<T> merge_sets(const unordered_set<T> &left, const unordered_set<T> &right) {
unordered_set<T> result = left;
result.insert(right.begin(), right.end());
return result;
}