From 42d37656ea55877dce0f3e877f8251366441fcec Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 15 Nov 2016 17:51:52 -0800 Subject: [PATCH] Optimize remove_duplicate_parse_states method Signed-off-by: Nathan Sobo --- .../build_tables/build_parse_table.cc | 87 ++++++++++++++++++- src/compiler/parse_table.cc | 22 +++++ src/compiler/parse_table.h | 38 ++++++++ src/compiler/util/hash_combine.h | 18 ++++ 4 files changed, 163 insertions(+), 2 deletions(-) create mode 100644 src/compiler/util/hash_combine.h diff --git a/src/compiler/build_tables/build_parse_table.cc b/src/compiler/build_tables/build_parse_table.cc index a8d38973..805ab9ef 100644 --- a/src/compiler/build_tables/build_parse_table.cc +++ b/src/compiler/build_tables/build_parse_table.cc @@ -172,9 +172,13 @@ class ParseTableBuilder { new_action->state_index = add_parse_state(next_item_set); } } else { - parse_table.set_nonterminal_action(state_id, symbol.index, add_parse_state(next_item_set)); + ParseStateId next_state = add_parse_state(next_item_set); + parse_table.set_nonterminal_action(state_id, symbol.index, next_state); } } + + ParseState &state = parse_table.states[state_id]; + state.compute_shift_actions_signature(); } void add_reduce_actions(const ParseItemSet &item_set, ParseStateId state_id) { @@ -250,7 +254,86 @@ class ParseTableBuilder { } void remove_duplicate_parse_states() { - remove_duplicate_states(&parse_table); + map> state_indices_by_signature; + + for (ParseStateId i = 0, n = parse_table.states.size(); i < n; i++) { + ParseState &state = parse_table.states[i]; + state_indices_by_signature[state.shift_actions_signature].insert(i); + } + + set deleted_states; + + while (true) { + std::map state_replacements; + + for (auto &pair : state_indices_by_signature) { + auto &state_group = pair.second; + + for (ParseStateId i : state_group) { + for (ParseStateId j : state_group) { + if (j == i) break; + if (!state_replacements.count(j) && parse_table.merge_state(j, i)) { + state_replacements.insert({ i, j }); + deleted_states.insert(i); + break; + } + } + } + } + + if (state_replacements.empty()) break; + + for (ParseStateId i = 0, n = parse_table.states.size(); i < n; i++) { + ParseState &state = parse_table.states[i]; + bool did_update_state = false; + + if (state_replacements.count(i)) { + auto &old_group = state_indices_by_signature[state.shift_actions_signature]; + old_group.erase(i); + } else { + state.each_referenced_state([&state_replacements, &did_update_state](int64_t *state_index) { + auto new_replacement = state_replacements.find(*state_index); + if (new_replacement != state_replacements.end()) { + *state_index = new_replacement->second; + did_update_state = true; + } + }); + + if (did_update_state) { + auto &old_group = state_indices_by_signature[state.shift_actions_signature]; + old_group.erase(i); + state.compute_shift_actions_signature(); + state_indices_by_signature[state.shift_actions_signature].insert(i); + } + } + } + } + + vector new_state_ids(parse_table.states.size()); + size_t deleted_state_count = 0; + auto deleted_state_iter = deleted_states.begin(); + for (size_t i = 0; i < new_state_ids.size(); i++) { + while (deleted_state_iter != deleted_states.end() && *deleted_state_iter < i) { + deleted_state_count++; + deleted_state_iter++; + } + new_state_ids[i] = i - deleted_state_count; + } + + ParseStateId original_state_index = 0; + auto iter = parse_table.states.begin(); + while (iter != parse_table.states.end()) { + if (deleted_states.count(original_state_index)) { + iter = parse_table.states.erase(iter); + } else { + ParseState &state = *iter; + state.each_referenced_state([&new_state_ids](int64_t *state_index) { + *state_index = new_state_ids[*state_index]; + }); + ++iter; + } + original_state_index++; + } } ParseAction *add_terminal_action(ParseStateId state_id, Symbol::Index lookahead, diff --git a/src/compiler/parse_table.cc b/src/compiler/parse_table.cc index 944036a6..ffb64f43 100644 --- a/src/compiler/parse_table.cc +++ b/src/compiler/parse_table.cc @@ -1,9 +1,11 @@ #include "compiler/parse_table.h" #include #include "compiler/precedence_range.h" +#include "compiler/util/hash_combine.h" namespace tree_sitter { +using std::hash; using std::string; using std::ostream; using std::to_string; @@ -11,6 +13,7 @@ using std::set; using std::vector; using std::function; using rules::Symbol; +using util::hash_combine; ParseAction::ParseAction(ParseActionType type, ParseStateId state_index, Symbol symbol, size_t consumed_symbol_count, @@ -150,6 +153,25 @@ void ParseState::each_referenced_state(function fn) { fn(&entry.second); } +void ParseState::compute_shift_actions_signature() { + shift_actions_signature = 0; + for (const auto &pair : nonterminal_entries) { + rules::Symbol::Index lookahead = pair.first; + ParseStateId next_state = pair.second; + hash_combine(&shift_actions_signature, lookahead); + hash_combine(&shift_actions_signature, next_state); + } + + for (const auto &pair : terminal_entries) { + rules::Symbol::Index lookahead = pair.first; + const ParseTableEntry &entry = pair.second; + if (entry.actions.back().type == ParseActionTypeShift) { + hash_combine(&shift_actions_signature, lookahead); + hash_combine(&shift_actions_signature, entry); + } + } +} + bool ParseState::operator==(const ParseState &other) const { return terminal_entries == other.terminal_entries && nonterminal_entries == other.nonterminal_entries; diff --git a/src/compiler/parse_table.h b/src/compiler/parse_table.h index 5f660ecd..473eec42 100644 --- a/src/compiler/parse_table.h +++ b/src/compiler/parse_table.h @@ -5,6 +5,7 @@ #include #include #include +#include "compiler/util/hash_combine.h" #include "compiler/lex_table.h" #include "compiler/rules/symbol.h" #include "compiler/rules/metadata.h" @@ -47,6 +48,7 @@ class ParseAction { rules::Symbol symbol; ParseStateId state_index; size_t consumed_symbol_count; + PrecedenceRange precedence_range; rules::Associativity associativity; const Production *production; @@ -74,10 +76,12 @@ class ParseState { bool merge(const ParseState &); void each_referenced_state(std::function); bool has_shift_action() const; + void compute_shift_actions_signature(); std::map terminal_entries; std::map nonterminal_entries; LexStateId lex_state_id; + size_t shift_actions_signature; }; struct ParseTableSymbolMetadata { @@ -102,4 +106,38 @@ class ParseTable { } // namespace tree_sitter +namespace std { + +using tree_sitter::util::hash_combine; + +template <> +struct hash { + size_t operator()(const tree_sitter::ParseAction &action) const { + size_t result = 0; + hash_combine(&result, action.type); + hash_combine(&result, action.extra); + hash_combine(&result, action.fragile); + hash_combine(&result, action.symbol); + hash_combine(&result, action.state_index); + hash_combine(&result, action.consumed_symbol_count); + return result; + } +}; + +template <> +struct hash { + size_t operator()(const tree_sitter::ParseTableEntry &entry) const { + size_t result = 0; + hash_combine(&result, entry.actions.size()); + for (const tree_sitter::ParseAction &action : entry.actions) { + hash_combine(&result, action); + } + hash_combine(&result, entry.reusable); + hash_combine(&result, entry.depends_on_lookahead); + return result; + } +}; + +} + #endif // COMPILER_PARSE_TABLE_H_ diff --git a/src/compiler/util/hash_combine.h b/src/compiler/util/hash_combine.h new file mode 100644 index 00000000..f8272277 --- /dev/null +++ b/src/compiler/util/hash_combine.h @@ -0,0 +1,18 @@ +#ifndef COMPILER_UTIL_HASH_COMBINE_H_ +#define COMPILER_UTIL_HASH_COMBINE_H_ + +#include + +namespace tree_sitter { +namespace util { + +template +inline void hash_combine(std::size_t *seed, const T &new_value) { + std::hash hasher; + *seed ^= hasher(new_value) + 0x9e3779b9 + (*seed << 6) + (*seed >> 2); +} + +} // namespace util +} // namespace tree_sitter + +#endif // COMPILER_UTIL_HASH_COMBINE_H_