From 42d37656ea55877dce0f3e877f8251366441fcec Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 15 Nov 2016 17:51:52 -0800 Subject: [PATCH 1/3] Optimize remove_duplicate_parse_states method Signed-off-by: Nathan Sobo --- .../build_tables/build_parse_table.cc | 87 ++++++++++++++++++- src/compiler/parse_table.cc | 22 +++++ src/compiler/parse_table.h | 38 ++++++++ src/compiler/util/hash_combine.h | 18 ++++ 4 files changed, 163 insertions(+), 2 deletions(-) create mode 100644 src/compiler/util/hash_combine.h diff --git a/src/compiler/build_tables/build_parse_table.cc b/src/compiler/build_tables/build_parse_table.cc index a8d38973..805ab9ef 100644 --- a/src/compiler/build_tables/build_parse_table.cc +++ b/src/compiler/build_tables/build_parse_table.cc @@ -172,9 +172,13 @@ class ParseTableBuilder { new_action->state_index = add_parse_state(next_item_set); } } else { - parse_table.set_nonterminal_action(state_id, symbol.index, add_parse_state(next_item_set)); + ParseStateId next_state = add_parse_state(next_item_set); + parse_table.set_nonterminal_action(state_id, symbol.index, next_state); } } + + ParseState &state = parse_table.states[state_id]; + state.compute_shift_actions_signature(); } void add_reduce_actions(const ParseItemSet &item_set, ParseStateId state_id) { @@ -250,7 +254,86 @@ class ParseTableBuilder { } void remove_duplicate_parse_states() { - remove_duplicate_states(&parse_table); + map> state_indices_by_signature; + + for (ParseStateId i = 0, n = parse_table.states.size(); i < n; i++) { + ParseState &state = parse_table.states[i]; + state_indices_by_signature[state.shift_actions_signature].insert(i); + } + + set deleted_states; + + while (true) { + std::map state_replacements; + + for (auto &pair : state_indices_by_signature) { + auto &state_group = pair.second; + + for (ParseStateId i : state_group) { + for (ParseStateId j : state_group) { + if (j == i) break; + if (!state_replacements.count(j) && parse_table.merge_state(j, i)) { + state_replacements.insert({ i, j }); + deleted_states.insert(i); + break; + } + } + } + } + + if (state_replacements.empty()) break; + + for (ParseStateId i = 0, n = parse_table.states.size(); i < n; i++) { + ParseState &state = parse_table.states[i]; + bool did_update_state = false; + + if (state_replacements.count(i)) { + auto &old_group = state_indices_by_signature[state.shift_actions_signature]; + old_group.erase(i); + } else { + state.each_referenced_state([&state_replacements, &did_update_state](int64_t *state_index) { + auto new_replacement = state_replacements.find(*state_index); + if (new_replacement != state_replacements.end()) { + *state_index = new_replacement->second; + did_update_state = true; + } + }); + + if (did_update_state) { + auto &old_group = state_indices_by_signature[state.shift_actions_signature]; + old_group.erase(i); + state.compute_shift_actions_signature(); + state_indices_by_signature[state.shift_actions_signature].insert(i); + } + } + } + } + + vector new_state_ids(parse_table.states.size()); + size_t deleted_state_count = 0; + auto deleted_state_iter = deleted_states.begin(); + for (size_t i = 0; i < new_state_ids.size(); i++) { + while (deleted_state_iter != deleted_states.end() && *deleted_state_iter < i) { + deleted_state_count++; + deleted_state_iter++; + } + new_state_ids[i] = i - deleted_state_count; + } + + ParseStateId original_state_index = 0; + auto iter = parse_table.states.begin(); + while (iter != parse_table.states.end()) { + if (deleted_states.count(original_state_index)) { + iter = parse_table.states.erase(iter); + } else { + ParseState &state = *iter; + state.each_referenced_state([&new_state_ids](int64_t *state_index) { + *state_index = new_state_ids[*state_index]; + }); + ++iter; + } + original_state_index++; + } } ParseAction *add_terminal_action(ParseStateId state_id, Symbol::Index lookahead, diff --git a/src/compiler/parse_table.cc b/src/compiler/parse_table.cc index 944036a6..ffb64f43 100644 --- a/src/compiler/parse_table.cc +++ b/src/compiler/parse_table.cc @@ -1,9 +1,11 @@ #include "compiler/parse_table.h" #include #include "compiler/precedence_range.h" +#include "compiler/util/hash_combine.h" namespace tree_sitter { +using std::hash; using std::string; using std::ostream; using std::to_string; @@ -11,6 +13,7 @@ using std::set; using std::vector; using std::function; using rules::Symbol; +using util::hash_combine; ParseAction::ParseAction(ParseActionType type, ParseStateId state_index, Symbol symbol, size_t consumed_symbol_count, @@ -150,6 +153,25 @@ void ParseState::each_referenced_state(function fn) { fn(&entry.second); } +void ParseState::compute_shift_actions_signature() { + shift_actions_signature = 0; + for (const auto &pair : nonterminal_entries) { + rules::Symbol::Index lookahead = pair.first; + ParseStateId next_state = pair.second; + hash_combine(&shift_actions_signature, lookahead); + hash_combine(&shift_actions_signature, next_state); + } + + for (const auto &pair : terminal_entries) { + rules::Symbol::Index lookahead = pair.first; + const ParseTableEntry &entry = pair.second; + if (entry.actions.back().type == ParseActionTypeShift) { + hash_combine(&shift_actions_signature, lookahead); + hash_combine(&shift_actions_signature, entry); + } + } +} + bool ParseState::operator==(const ParseState &other) const { return terminal_entries == other.terminal_entries && nonterminal_entries == other.nonterminal_entries; diff --git a/src/compiler/parse_table.h b/src/compiler/parse_table.h index 5f660ecd..473eec42 100644 --- a/src/compiler/parse_table.h +++ b/src/compiler/parse_table.h @@ -5,6 +5,7 @@ #include #include #include +#include "compiler/util/hash_combine.h" #include "compiler/lex_table.h" #include "compiler/rules/symbol.h" #include "compiler/rules/metadata.h" @@ -47,6 +48,7 @@ class ParseAction { rules::Symbol symbol; ParseStateId state_index; size_t consumed_symbol_count; + PrecedenceRange precedence_range; rules::Associativity associativity; const Production *production; @@ -74,10 +76,12 @@ class ParseState { bool merge(const ParseState &); void each_referenced_state(std::function); bool has_shift_action() const; + void compute_shift_actions_signature(); std::map terminal_entries; std::map nonterminal_entries; LexStateId lex_state_id; + size_t shift_actions_signature; }; struct ParseTableSymbolMetadata { @@ -102,4 +106,38 @@ class ParseTable { } // namespace tree_sitter +namespace std { + +using tree_sitter::util::hash_combine; + +template <> +struct hash { + size_t operator()(const tree_sitter::ParseAction &action) const { + size_t result = 0; + hash_combine(&result, action.type); + hash_combine(&result, action.extra); + hash_combine(&result, action.fragile); + hash_combine(&result, action.symbol); + hash_combine(&result, action.state_index); + hash_combine(&result, action.consumed_symbol_count); + return result; + } +}; + +template <> +struct hash { + size_t operator()(const tree_sitter::ParseTableEntry &entry) const { + size_t result = 0; + hash_combine(&result, entry.actions.size()); + for (const tree_sitter::ParseAction &action : entry.actions) { + hash_combine(&result, action); + } + hash_combine(&result, entry.reusable); + hash_combine(&result, entry.depends_on_lookahead); + return result; + } +}; + +} + #endif // COMPILER_PARSE_TABLE_H_ diff --git a/src/compiler/util/hash_combine.h b/src/compiler/util/hash_combine.h new file mode 100644 index 00000000..f8272277 --- /dev/null +++ b/src/compiler/util/hash_combine.h @@ -0,0 +1,18 @@ +#ifndef COMPILER_UTIL_HASH_COMBINE_H_ +#define COMPILER_UTIL_HASH_COMBINE_H_ + +#include + +namespace tree_sitter { +namespace util { + +template +inline void hash_combine(std::size_t *seed, const T &new_value) { + std::hash hasher; + *seed ^= hasher(new_value) + 0x9e3779b9 + (*seed << 6) + (*seed >> 2); +} + +} // namespace util +} // namespace tree_sitter + +#endif // COMPILER_UTIL_HASH_COMBINE_H_ From 6cfd009503eead98b1ce4e3721a2ca6a5f28b720 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 16 Nov 2016 10:21:30 -0800 Subject: [PATCH 2/3] Compute parse state group signature based on the item set --- .../build_tables/build_parse_table.cc | 25 ++++--------- src/compiler/build_tables/parse_item.cc | 21 +++++++++++ src/compiler/build_tables/parse_item.h | 2 ++ src/compiler/parse_table.cc | 21 ----------- src/compiler/parse_table.h | 36 ------------------- 5 files changed, 29 insertions(+), 76 deletions(-) diff --git a/src/compiler/build_tables/build_parse_table.cc b/src/compiler/build_tables/build_parse_table.cc index 805ab9ef..ed08d727 100644 --- a/src/compiler/build_tables/build_parse_table.cc +++ b/src/compiler/build_tables/build_parse_table.cc @@ -146,8 +146,8 @@ class ParseTableBuilder { auto pair = parse_state_ids.find(item_set); if (pair == parse_state_ids.end()) { ParseStateId state_id = parse_table.add_state(); - parse_state_ids[item_set] = state_id; + parse_table.states[state_id].shift_actions_signature = item_set.unfinished_item_signature(); item_sets_to_process.push_back({ std::move(item_set), state_id }); return state_id; } else { @@ -176,9 +176,6 @@ class ParseTableBuilder { parse_table.set_nonterminal_action(state_id, symbol.index, next_state); } } - - ParseState &state = parse_table.states[state_id]; - state.compute_shift_actions_signature(); } void add_reduce_actions(const ParseItemSet &item_set, ParseStateId state_id) { @@ -285,26 +282,16 @@ class ParseTableBuilder { for (ParseStateId i = 0, n = parse_table.states.size(); i < n; i++) { ParseState &state = parse_table.states[i]; - bool did_update_state = false; if (state_replacements.count(i)) { - auto &old_group = state_indices_by_signature[state.shift_actions_signature]; - old_group.erase(i); + state_indices_by_signature[state.shift_actions_signature].erase(i); } else { - state.each_referenced_state([&state_replacements, &did_update_state](int64_t *state_index) { - auto new_replacement = state_replacements.find(*state_index); - if (new_replacement != state_replacements.end()) { - *state_index = new_replacement->second; - did_update_state = true; + state.each_referenced_state([&state_replacements](int64_t *state_index) { + auto replacement = state_replacements.find(*state_index); + if (replacement != state_replacements.end()) { + *state_index = replacement->second; } }); - - if (did_update_state) { - auto &old_group = state_indices_by_signature[state.shift_actions_signature]; - old_group.erase(i); - state.compute_shift_actions_signature(); - state_indices_by_signature[state.shift_actions_signature].insert(i); - } } } } diff --git a/src/compiler/build_tables/parse_item.cc b/src/compiler/build_tables/parse_item.cc index 5054e578..c71ddbdf 100644 --- a/src/compiler/build_tables/parse_item.cc +++ b/src/compiler/build_tables/parse_item.cc @@ -2,6 +2,7 @@ #include #include "compiler/syntax_grammar.h" #include "compiler/rules/built_in_symbols.h" +#include "compiler/util/hash_combine.h" namespace tree_sitter { namespace build_tables { @@ -12,6 +13,9 @@ using std::string; using std::to_string; using std::hash; using rules::Symbol; +using util::hash_combine; + +ParseItem::ParseItem() : variable_index(-1), production(nullptr), step_index(0) {} ParseItem::ParseItem(const Symbol &lhs, const Production &production, unsigned int step_index) @@ -108,6 +112,23 @@ size_t ParseItemSet::Hash::operator()(const ParseItemSet &item_set) const { return result; } +size_t ParseItemSet::unfinished_item_signature() const { + size_t result = 0; + ParseItem previous_item; + for (auto &pair : entries) { + const ParseItem &item = pair.first; + if (item.step_index < item.production->size()) { + if (item.variable_index != previous_item.variable_index && + item.step_index != previous_item.step_index) { + hash_combine(&result, item.variable_index); + hash_combine(&result, item.step_index); + previous_item = item; + } + } + } + return result; +} + ParseItemSet::TransitionMap ParseItemSet::transitions() const { ParseItemSet::TransitionMap result; for (const auto &pair : entries) { diff --git a/src/compiler/build_tables/parse_item.h b/src/compiler/build_tables/parse_item.h index 404b67c3..90260260 100644 --- a/src/compiler/build_tables/parse_item.h +++ b/src/compiler/build_tables/parse_item.h @@ -14,6 +14,7 @@ namespace build_tables { class ParseItem { public: + ParseItem(); ParseItem(const rules::Symbol &, const Production &, unsigned int); struct CompletionStatus { @@ -54,6 +55,7 @@ class ParseItemSet { TransitionMap transitions() const; bool operator==(const ParseItemSet &) const; void add(const ParseItemSet &); + size_t unfinished_item_signature() const; std::map entries; }; diff --git a/src/compiler/parse_table.cc b/src/compiler/parse_table.cc index ffb64f43..a70b7a91 100644 --- a/src/compiler/parse_table.cc +++ b/src/compiler/parse_table.cc @@ -1,7 +1,6 @@ #include "compiler/parse_table.h" #include #include "compiler/precedence_range.h" -#include "compiler/util/hash_combine.h" namespace tree_sitter { @@ -13,7 +12,6 @@ using std::set; using std::vector; using std::function; using rules::Symbol; -using util::hash_combine; ParseAction::ParseAction(ParseActionType type, ParseStateId state_index, Symbol symbol, size_t consumed_symbol_count, @@ -153,25 +151,6 @@ void ParseState::each_referenced_state(function fn) { fn(&entry.second); } -void ParseState::compute_shift_actions_signature() { - shift_actions_signature = 0; - for (const auto &pair : nonterminal_entries) { - rules::Symbol::Index lookahead = pair.first; - ParseStateId next_state = pair.second; - hash_combine(&shift_actions_signature, lookahead); - hash_combine(&shift_actions_signature, next_state); - } - - for (const auto &pair : terminal_entries) { - rules::Symbol::Index lookahead = pair.first; - const ParseTableEntry &entry = pair.second; - if (entry.actions.back().type == ParseActionTypeShift) { - hash_combine(&shift_actions_signature, lookahead); - hash_combine(&shift_actions_signature, entry); - } - } -} - bool ParseState::operator==(const ParseState &other) const { return terminal_entries == other.terminal_entries && nonterminal_entries == other.nonterminal_entries; diff --git a/src/compiler/parse_table.h b/src/compiler/parse_table.h index 473eec42..81142f75 100644 --- a/src/compiler/parse_table.h +++ b/src/compiler/parse_table.h @@ -5,7 +5,6 @@ #include #include #include -#include "compiler/util/hash_combine.h" #include "compiler/lex_table.h" #include "compiler/rules/symbol.h" #include "compiler/rules/metadata.h" @@ -76,7 +75,6 @@ class ParseState { bool merge(const ParseState &); void each_referenced_state(std::function); bool has_shift_action() const; - void compute_shift_actions_signature(); std::map terminal_entries; std::map nonterminal_entries; @@ -106,38 +104,4 @@ class ParseTable { } // namespace tree_sitter -namespace std { - -using tree_sitter::util::hash_combine; - -template <> -struct hash { - size_t operator()(const tree_sitter::ParseAction &action) const { - size_t result = 0; - hash_combine(&result, action.type); - hash_combine(&result, action.extra); - hash_combine(&result, action.fragile); - hash_combine(&result, action.symbol); - hash_combine(&result, action.state_index); - hash_combine(&result, action.consumed_symbol_count); - return result; - } -}; - -template <> -struct hash { - size_t operator()(const tree_sitter::ParseTableEntry &entry) const { - size_t result = 0; - hash_combine(&result, entry.actions.size()); - for (const tree_sitter::ParseAction &action : entry.actions) { - hash_combine(&result, action); - } - hash_combine(&result, entry.reusable); - hash_combine(&result, entry.depends_on_lookahead); - return result; - } -}; - -} - #endif // COMPILER_PARSE_TABLE_H_ From 6935f1d26f3057c3a0319f67011ca7e3a47ba6bc Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 16 Nov 2016 11:46:22 -0800 Subject: [PATCH 3/3] Use hash_combine everywhere --- src/compiler/build_tables/build_lex_table.cc | 2 +- .../build_tables/build_parse_table.cc | 2 +- src/compiler/build_tables/lex_item.cc | 39 ++++++++----- src/compiler/build_tables/lex_item.h | 36 ++++++++---- src/compiler/build_tables/parse_item.cc | 56 +++++++++++-------- src/compiler/build_tables/parse_item.h | 19 ++++--- src/compiler/parse_table.cc | 1 - src/compiler/rules/character_set.cc | 19 ++++--- src/compiler/rules/choice.cc | 7 ++- src/compiler/rules/metadata.cc | 10 ++-- src/compiler/rules/symbol.cc | 8 ++- src/compiler/util/hash_combine.h | 6 ++ 12 files changed, 132 insertions(+), 73 deletions(-) diff --git a/src/compiler/build_tables/build_lex_table.cc b/src/compiler/build_tables/build_lex_table.cc index 94100349..d55e1c94 100644 --- a/src/compiler/build_tables/build_lex_table.cc +++ b/src/compiler/build_tables/build_lex_table.cc @@ -44,7 +44,7 @@ class LexTableBuilder { const LexicalGrammar lex_grammar; vector separator_rules; LexConflictManager conflict_manager; - unordered_map lex_state_ids; + unordered_map lex_state_ids; public: LexTableBuilder(ParseTable *parse_table, const LexicalGrammar &lex_grammar) diff --git a/src/compiler/build_tables/build_parse_table.cc b/src/compiler/build_tables/build_parse_table.cc index ed08d727..17fbdea6 100644 --- a/src/compiler/build_tables/build_parse_table.cc +++ b/src/compiler/build_tables/build_parse_table.cc @@ -36,7 +36,7 @@ class ParseTableBuilder { const LexicalGrammar lexical_grammar; ParseConflictManager conflict_manager; unordered_map recovery_states; - unordered_map parse_state_ids; + unordered_map parse_state_ids; vector> item_sets_to_process; ParseTable parse_table; set conflicts; diff --git a/src/compiler/build_tables/lex_item.cc b/src/compiler/build_tables/lex_item.cc index 72c3b44d..5272f3bd 100644 --- a/src/compiler/build_tables/lex_item.cc +++ b/src/compiler/build_tables/lex_item.cc @@ -8,11 +8,11 @@ #include "compiler/rules/symbol.h" #include "compiler/rules/repeat.h" #include "compiler/rules/visitor.h" +#include "compiler/util/hash_combine.h" namespace tree_sitter { namespace build_tables { -using std::hash; using std::map; using std::string; using std::unordered_set; @@ -69,20 +69,9 @@ LexItem::CompletionStatus LexItem::completion_status() const { return GetCompletionStatus().apply(rule); } -size_t LexItem::Hash::operator()(const LexItem &item) const { - return hash()(item.lhs) ^ hash()(item.rule); -} - -size_t LexItemSet::Hash::operator()(const LexItemSet &item_set) const { - size_t result = hash()(item_set.entries.size()); - for (const auto &item : item_set.entries) - result ^= LexItem::Hash()(item); - return result; -} - LexItemSet::LexItemSet() {} -LexItemSet::LexItemSet(const unordered_set &entries) +LexItemSet::LexItemSet(const unordered_set &entries) : entries(entries) {} bool LexItemSet::operator==(const LexItemSet &other) const { @@ -103,3 +92,27 @@ bool LexItemSet::Transition::operator==(const LexItemSet::Transition &other) con } // namespace build_tables } // namespace tree_sitter + +namespace std { + +using tree_sitter::util::hash_combine; +using tree_sitter::util::symmetric_hash_combine; +using tree_sitter::build_tables::LexItem; +using tree_sitter::build_tables::LexItemSet; + +size_t hash::operator()(const LexItem &item) const { + size_t result = 0; + hash_combine(&result, item.lhs.index); + hash_combine(&result, item.rule); + return result; +} + +size_t hash::operator()(const LexItemSet &item_set) const { + size_t result = 0; + hash_combine(&result, item_set.entries.size()); + for (const auto &item : item_set.entries) + symmetric_hash_combine(&result, item); + return result; +} + +} // namespace std diff --git a/src/compiler/build_tables/lex_item.h b/src/compiler/build_tables/lex_item.h index 80cd1e6b..4c45f80d 100644 --- a/src/compiler/build_tables/lex_item.h +++ b/src/compiler/build_tables/lex_item.h @@ -22,10 +22,6 @@ class LexItem { bool is_string; }; - struct Hash { - size_t operator()(const LexItem &) const; - }; - bool operator==(const LexItem &other) const; CompletionStatus completion_status() const; @@ -33,23 +29,34 @@ class LexItem { rule_ptr rule; }; +} // namespace build_tables +} // namespace tree_sitter + +namespace std { + +template <> +struct hash { + size_t operator()(const tree_sitter::build_tables::LexItem &) const; +}; + +} // namespace std + +namespace tree_sitter { +namespace build_tables { + class LexItemSet { public: LexItemSet(); - explicit LexItemSet(const std::unordered_set &); + explicit LexItemSet(const std::unordered_set &); bool operator==(const LexItemSet &) const; - struct Hash { - size_t operator()(const LexItemSet &) const; - }; - struct Transition; typedef std::map TransitionMap; TransitionMap transitions() const; - std::unordered_set entries; + std::unordered_set entries; }; struct LexItemSet::Transition { @@ -63,4 +70,13 @@ struct LexItemSet::Transition { } // namespace build_tables } // namespace tree_sitter +namespace std { + +template <> +struct hash { + size_t operator()(const tree_sitter::build_tables::LexItemSet &) const; +}; + +} // namespace std + #endif // COMPILER_BUILD_TABLES_LEX_ITEM_H_ diff --git a/src/compiler/build_tables/parse_item.cc b/src/compiler/build_tables/parse_item.cc index c71ddbdf..d691998f 100644 --- a/src/compiler/build_tables/parse_item.cc +++ b/src/compiler/build_tables/parse_item.cc @@ -11,7 +11,6 @@ using std::map; using std::pair; using std::string; using std::to_string; -using std::hash; using rules::Symbol; using util::hash_combine; @@ -82,13 +81,6 @@ rules::Associativity ParseItem::associativity() const { return production->at(step_index).associativity; } -size_t ParseItem::Hash::operator()(const ParseItem &item) const { - size_t result = hash()(item.variable_index); - result ^= hash()(item.step_index); - result ^= hash()(static_cast(item.production)); - return result; -} - ParseItemSet::ParseItemSet() {} ParseItemSet::ParseItemSet(const map &entries) @@ -98,20 +90,6 @@ bool ParseItemSet::operator==(const ParseItemSet &other) const { return entries == other.entries; } -size_t ParseItemSet::Hash::operator()(const ParseItemSet &item_set) const { - size_t result = hash()(item_set.entries.size()); - for (auto &pair : item_set.entries) { - const ParseItem &item = pair.first; - result ^= ParseItem::Hash()(item); - - const LookaheadSet &lookahead_set = pair.second; - result ^= hash()(lookahead_set.entries->size()); - for (Symbol::Index index : *pair.second.entries) - result ^= hash()(index); - } - return result; -} - size_t ParseItemSet::unfinished_item_signature() const { size_t result = 0; ParseItem previous_item; @@ -156,3 +134,37 @@ void ParseItemSet::add(const ParseItemSet &other) { } // namespace build_tables } // namespace tree_sitter + +namespace std { + +using tree_sitter::build_tables::ParseItem; +using tree_sitter::build_tables::ParseItemSet; +using tree_sitter::util::hash_combine; + +template <> +struct hash { + size_t operator()(const ParseItem &item) const { + size_t result = 0; + hash_combine(&result, item.variable_index); + hash_combine(&result, item.step_index); + hash_combine(&result, item.production); + return result; + } +}; + +size_t hash::operator()(const ParseItemSet &item_set) const { + size_t result = 0; + hash_combine(&result, item_set.entries.size()); + for (auto &pair : item_set.entries) { + const ParseItem &item = pair.first; + const auto &lookahead_set = pair.second; + + hash_combine(&result, item); + hash_combine(&result, lookahead_set.entries->size()); + for (auto index : *pair.second.entries) + hash_combine(&result, index); + } + return result; +} + +} // namespace std diff --git a/src/compiler/build_tables/parse_item.h b/src/compiler/build_tables/parse_item.h index 90260260..b8a78480 100644 --- a/src/compiler/build_tables/parse_item.h +++ b/src/compiler/build_tables/parse_item.h @@ -23,10 +23,6 @@ class ParseItem { rules::Associativity associativity; }; - struct Hash { - size_t operator()(const ParseItem &) const; - }; - bool operator==(const ParseItem &other) const; bool operator<(const ParseItem &other) const; rules::Symbol lhs() const; @@ -48,10 +44,6 @@ class ParseItemSet { typedef std::map> TransitionMap; - struct Hash { - size_t operator()(const ParseItemSet &) const; - }; - TransitionMap transitions() const; bool operator==(const ParseItemSet &) const; void add(const ParseItemSet &); @@ -63,4 +55,15 @@ class ParseItemSet { } // namespace build_tables } // namespace tree_sitter +namespace std { + +using tree_sitter::build_tables::ParseItemSet; + +template <> +struct hash { + size_t operator()(const ParseItemSet &item_set) const; +}; + +} // namespace std + #endif // COMPILER_BUILD_TABLES_PARSE_ITEM_H_ diff --git a/src/compiler/parse_table.cc b/src/compiler/parse_table.cc index a70b7a91..944036a6 100644 --- a/src/compiler/parse_table.cc +++ b/src/compiler/parse_table.cc @@ -4,7 +4,6 @@ namespace tree_sitter { -using std::hash; using std::string; using std::ostream; using std::to_string; diff --git a/src/compiler/rules/character_set.cc b/src/compiler/rules/character_set.cc index 9c273575..cff3ab58 100644 --- a/src/compiler/rules/character_set.cc +++ b/src/compiler/rules/character_set.cc @@ -3,14 +3,15 @@ #include #include #include "compiler/rules/visitor.h" +#include "compiler/util/hash_combine.h" namespace tree_sitter { namespace rules { using std::string; -using std::hash; using std::set; using std::vector; +using util::hash_combine; static void add_range(set *characters, uint32_t min, uint32_t max) { for (uint32_t c = min; c <= max; c++) @@ -83,14 +84,14 @@ bool CharacterSet::operator<(const CharacterSet &other) const { } size_t CharacterSet::hash_code() const { - size_t result = hash()(includes_all); - result ^= hash()(included_chars.size()); - for (auto &c : included_chars) - result ^= hash()(c); - result <<= 1; - result ^= hash()(excluded_chars.size()); - for (auto &c : excluded_chars) - result ^= hash()(c); + size_t result = 0; + hash_combine(&result, includes_all); + hash_combine(&result, included_chars.size()); + for (uint32_t c : included_chars) + hash_combine(&result, c); + hash_combine(&result, excluded_chars.size()); + for (uint32_t c : excluded_chars) + hash_combine(&result, c); return result; } diff --git a/src/compiler/rules/choice.cc b/src/compiler/rules/choice.cc index 975adbef..c793aed1 100644 --- a/src/compiler/rules/choice.cc +++ b/src/compiler/rules/choice.cc @@ -2,6 +2,7 @@ #include #include #include "compiler/rules/visitor.h" +#include "compiler/util/hash_combine.h" namespace tree_sitter { namespace rules { @@ -10,6 +11,7 @@ using std::string; using std::make_shared; using std::vector; using std::set; +using util::symmetric_hash_combine; Choice::Choice(const vector &elements) : elements(elements) {} @@ -50,9 +52,10 @@ bool Choice::operator==(const Rule &rule) const { } size_t Choice::hash_code() const { - size_t result = std::hash()(elements.size()); + size_t result = 0; + symmetric_hash_combine(&result, elements.size()); for (const auto &element : elements) - result ^= element->hash_code(); + symmetric_hash_combine(&result, element); return result; } diff --git a/src/compiler/rules/metadata.cc b/src/compiler/rules/metadata.cc index a2512861..97fd0fed 100644 --- a/src/compiler/rules/metadata.cc +++ b/src/compiler/rules/metadata.cc @@ -3,14 +3,15 @@ #include #include "compiler/rules/visitor.h" #include "compiler/rules/blank.h" +#include "compiler/util/hash_combine.h" namespace tree_sitter { namespace rules { -using std::hash; using std::make_shared; using std::map; using std::pair; +using util::hash_combine; Metadata::Metadata(rule_ptr rule, map values) : rule(rule), value(values) {} @@ -25,10 +26,11 @@ bool Metadata::operator==(const Rule &rule) const { } size_t Metadata::hash_code() const { - size_t result = hash()(value.size()); + size_t result = 0; + hash_combine(&result, value.size()); for (auto &pair : value) { - result ^= hash()(pair.first); - result ^= hash()(pair.second); + hash_combine(&result, pair.first); + hash_combine(&result, pair.second); } return result; } diff --git a/src/compiler/rules/symbol.cc b/src/compiler/rules/symbol.cc index 96c4bd60..f85b09c7 100644 --- a/src/compiler/rules/symbol.cc +++ b/src/compiler/rules/symbol.cc @@ -2,13 +2,14 @@ #include #include #include "compiler/rules/visitor.h" +#include "compiler/util/hash_combine.h" namespace tree_sitter { namespace rules { using std::string; using std::to_string; -using std::hash; +using util::hash_combine; Symbol::Symbol(Symbol::Index index) : index(index), is_token(false) {} @@ -24,7 +25,10 @@ bool Symbol::operator==(const Rule &rule) const { } size_t Symbol::hash_code() const { - return hash()(index) ^ hash()(is_token); + size_t result = 0; + hash_combine(&result, index); + hash_combine(&result, is_token); + return result; } rule_ptr Symbol::copy() const { diff --git a/src/compiler/util/hash_combine.h b/src/compiler/util/hash_combine.h index f8272277..9cc3ad17 100644 --- a/src/compiler/util/hash_combine.h +++ b/src/compiler/util/hash_combine.h @@ -12,6 +12,12 @@ inline void hash_combine(std::size_t *seed, const T &new_value) { *seed ^= hasher(new_value) + 0x9e3779b9 + (*seed << 6) + (*seed >> 2); } +template +inline void symmetric_hash_combine(std::size_t *seed, const T &new_value) { + std::hash hasher; + *seed ^= hasher(new_value); +} + } // namespace util } // namespace tree_sitter