Optimize remove_duplicate_parse_states method

Signed-off-by: Nathan Sobo <nathan@github.com>
This commit is contained in:
Max Brunsfeld 2016-11-15 17:51:52 -08:00 committed by Nathan Sobo
parent e7217f1bac
commit 42d37656ea
4 changed files with 163 additions and 2 deletions

View file

@ -172,9 +172,13 @@ class ParseTableBuilder {
new_action->state_index = add_parse_state(next_item_set);
}
} else {
parse_table.set_nonterminal_action(state_id, symbol.index, add_parse_state(next_item_set));
ParseStateId next_state = add_parse_state(next_item_set);
parse_table.set_nonterminal_action(state_id, symbol.index, next_state);
}
}
ParseState &state = parse_table.states[state_id];
state.compute_shift_actions_signature();
}
void add_reduce_actions(const ParseItemSet &item_set, ParseStateId state_id) {
@ -250,7 +254,86 @@ class ParseTableBuilder {
}
void remove_duplicate_parse_states() {
remove_duplicate_states<ParseTable>(&parse_table);
map<size_t, set<ParseStateId>> state_indices_by_signature;
for (ParseStateId i = 0, n = parse_table.states.size(); i < n; i++) {
ParseState &state = parse_table.states[i];
state_indices_by_signature[state.shift_actions_signature].insert(i);
}
set<ParseStateId> deleted_states;
while (true) {
std::map<ParseStateId, ParseStateId> state_replacements;
for (auto &pair : state_indices_by_signature) {
auto &state_group = pair.second;
for (ParseStateId i : state_group) {
for (ParseStateId j : state_group) {
if (j == i) break;
if (!state_replacements.count(j) && parse_table.merge_state(j, i)) {
state_replacements.insert({ i, j });
deleted_states.insert(i);
break;
}
}
}
}
if (state_replacements.empty()) break;
for (ParseStateId i = 0, n = parse_table.states.size(); i < n; i++) {
ParseState &state = parse_table.states[i];
bool did_update_state = false;
if (state_replacements.count(i)) {
auto &old_group = state_indices_by_signature[state.shift_actions_signature];
old_group.erase(i);
} else {
state.each_referenced_state([&state_replacements, &did_update_state](int64_t *state_index) {
auto new_replacement = state_replacements.find(*state_index);
if (new_replacement != state_replacements.end()) {
*state_index = new_replacement->second;
did_update_state = true;
}
});
if (did_update_state) {
auto &old_group = state_indices_by_signature[state.shift_actions_signature];
old_group.erase(i);
state.compute_shift_actions_signature();
state_indices_by_signature[state.shift_actions_signature].insert(i);
}
}
}
}
vector<ParseStateId> new_state_ids(parse_table.states.size());
size_t deleted_state_count = 0;
auto deleted_state_iter = deleted_states.begin();
for (size_t i = 0; i < new_state_ids.size(); i++) {
while (deleted_state_iter != deleted_states.end() && *deleted_state_iter < i) {
deleted_state_count++;
deleted_state_iter++;
}
new_state_ids[i] = i - deleted_state_count;
}
ParseStateId original_state_index = 0;
auto iter = parse_table.states.begin();
while (iter != parse_table.states.end()) {
if (deleted_states.count(original_state_index)) {
iter = parse_table.states.erase(iter);
} else {
ParseState &state = *iter;
state.each_referenced_state([&new_state_ids](int64_t *state_index) {
*state_index = new_state_ids[*state_index];
});
++iter;
}
original_state_index++;
}
}
ParseAction *add_terminal_action(ParseStateId state_id, Symbol::Index lookahead,

View file

@ -1,9 +1,11 @@
#include "compiler/parse_table.h"
#include <string>
#include "compiler/precedence_range.h"
#include "compiler/util/hash_combine.h"
namespace tree_sitter {
using std::hash;
using std::string;
using std::ostream;
using std::to_string;
@ -11,6 +13,7 @@ using std::set;
using std::vector;
using std::function;
using rules::Symbol;
using util::hash_combine;
ParseAction::ParseAction(ParseActionType type, ParseStateId state_index,
Symbol symbol, size_t consumed_symbol_count,
@ -150,6 +153,25 @@ void ParseState::each_referenced_state(function<void(ParseStateId *)> fn) {
fn(&entry.second);
}
void ParseState::compute_shift_actions_signature() {
shift_actions_signature = 0;
for (const auto &pair : nonterminal_entries) {
rules::Symbol::Index lookahead = pair.first;
ParseStateId next_state = pair.second;
hash_combine(&shift_actions_signature, lookahead);
hash_combine(&shift_actions_signature, next_state);
}
for (const auto &pair : terminal_entries) {
rules::Symbol::Index lookahead = pair.first;
const ParseTableEntry &entry = pair.second;
if (entry.actions.back().type == ParseActionTypeShift) {
hash_combine(&shift_actions_signature, lookahead);
hash_combine(&shift_actions_signature, entry);
}
}
}
bool ParseState::operator==(const ParseState &other) const {
return terminal_entries == other.terminal_entries &&
nonterminal_entries == other.nonterminal_entries;

View file

@ -5,6 +5,7 @@
#include <set>
#include <utility>
#include <vector>
#include "compiler/util/hash_combine.h"
#include "compiler/lex_table.h"
#include "compiler/rules/symbol.h"
#include "compiler/rules/metadata.h"
@ -47,6 +48,7 @@ class ParseAction {
rules::Symbol symbol;
ParseStateId state_index;
size_t consumed_symbol_count;
PrecedenceRange precedence_range;
rules::Associativity associativity;
const Production *production;
@ -74,10 +76,12 @@ class ParseState {
bool merge(const ParseState &);
void each_referenced_state(std::function<void(ParseStateId *)>);
bool has_shift_action() const;
void compute_shift_actions_signature();
std::map<rules::Symbol::Index, ParseTableEntry> terminal_entries;
std::map<rules::Symbol::Index, ParseStateId> nonterminal_entries;
LexStateId lex_state_id;
size_t shift_actions_signature;
};
struct ParseTableSymbolMetadata {
@ -102,4 +106,38 @@ class ParseTable {
} // namespace tree_sitter
namespace std {
using tree_sitter::util::hash_combine;
template <>
struct hash<tree_sitter::ParseAction> {
size_t operator()(const tree_sitter::ParseAction &action) const {
size_t result = 0;
hash_combine<int>(&result, action.type);
hash_combine(&result, action.extra);
hash_combine(&result, action.fragile);
hash_combine(&result, action.symbol);
hash_combine(&result, action.state_index);
hash_combine(&result, action.consumed_symbol_count);
return result;
}
};
template <>
struct hash<tree_sitter::ParseTableEntry> {
size_t operator()(const tree_sitter::ParseTableEntry &entry) const {
size_t result = 0;
hash_combine(&result, entry.actions.size());
for (const tree_sitter::ParseAction &action : entry.actions) {
hash_combine(&result, action);
}
hash_combine(&result, entry.reusable);
hash_combine(&result, entry.depends_on_lookahead);
return result;
}
};
}
#endif // COMPILER_PARSE_TABLE_H_

View file

@ -0,0 +1,18 @@
#ifndef COMPILER_UTIL_HASH_COMBINE_H_
#define COMPILER_UTIL_HASH_COMBINE_H_
#include <functional>
namespace tree_sitter {
namespace util {
template <class T>
inline void hash_combine(std::size_t *seed, const T &new_value) {
std::hash<T> hasher;
*seed ^= hasher(new_value) + 0x9e3779b9 + (*seed << 6) + (*seed >> 2);
}
} // namespace util
} // namespace tree_sitter
#endif // COMPILER_UTIL_HASH_COMBINE_H_