Optimize remove_duplicate_parse_states method
Signed-off-by: Nathan Sobo <nathan@github.com>
This commit is contained in:
parent
e7217f1bac
commit
42d37656ea
4 changed files with 163 additions and 2 deletions
|
|
@ -172,9 +172,13 @@ class ParseTableBuilder {
|
|||
new_action->state_index = add_parse_state(next_item_set);
|
||||
}
|
||||
} else {
|
||||
parse_table.set_nonterminal_action(state_id, symbol.index, add_parse_state(next_item_set));
|
||||
ParseStateId next_state = add_parse_state(next_item_set);
|
||||
parse_table.set_nonterminal_action(state_id, symbol.index, next_state);
|
||||
}
|
||||
}
|
||||
|
||||
ParseState &state = parse_table.states[state_id];
|
||||
state.compute_shift_actions_signature();
|
||||
}
|
||||
|
||||
void add_reduce_actions(const ParseItemSet &item_set, ParseStateId state_id) {
|
||||
|
|
@ -250,7 +254,86 @@ class ParseTableBuilder {
|
|||
}
|
||||
|
||||
void remove_duplicate_parse_states() {
|
||||
remove_duplicate_states<ParseTable>(&parse_table);
|
||||
map<size_t, set<ParseStateId>> state_indices_by_signature;
|
||||
|
||||
for (ParseStateId i = 0, n = parse_table.states.size(); i < n; i++) {
|
||||
ParseState &state = parse_table.states[i];
|
||||
state_indices_by_signature[state.shift_actions_signature].insert(i);
|
||||
}
|
||||
|
||||
set<ParseStateId> deleted_states;
|
||||
|
||||
while (true) {
|
||||
std::map<ParseStateId, ParseStateId> state_replacements;
|
||||
|
||||
for (auto &pair : state_indices_by_signature) {
|
||||
auto &state_group = pair.second;
|
||||
|
||||
for (ParseStateId i : state_group) {
|
||||
for (ParseStateId j : state_group) {
|
||||
if (j == i) break;
|
||||
if (!state_replacements.count(j) && parse_table.merge_state(j, i)) {
|
||||
state_replacements.insert({ i, j });
|
||||
deleted_states.insert(i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (state_replacements.empty()) break;
|
||||
|
||||
for (ParseStateId i = 0, n = parse_table.states.size(); i < n; i++) {
|
||||
ParseState &state = parse_table.states[i];
|
||||
bool did_update_state = false;
|
||||
|
||||
if (state_replacements.count(i)) {
|
||||
auto &old_group = state_indices_by_signature[state.shift_actions_signature];
|
||||
old_group.erase(i);
|
||||
} else {
|
||||
state.each_referenced_state([&state_replacements, &did_update_state](int64_t *state_index) {
|
||||
auto new_replacement = state_replacements.find(*state_index);
|
||||
if (new_replacement != state_replacements.end()) {
|
||||
*state_index = new_replacement->second;
|
||||
did_update_state = true;
|
||||
}
|
||||
});
|
||||
|
||||
if (did_update_state) {
|
||||
auto &old_group = state_indices_by_signature[state.shift_actions_signature];
|
||||
old_group.erase(i);
|
||||
state.compute_shift_actions_signature();
|
||||
state_indices_by_signature[state.shift_actions_signature].insert(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
vector<ParseStateId> new_state_ids(parse_table.states.size());
|
||||
size_t deleted_state_count = 0;
|
||||
auto deleted_state_iter = deleted_states.begin();
|
||||
for (size_t i = 0; i < new_state_ids.size(); i++) {
|
||||
while (deleted_state_iter != deleted_states.end() && *deleted_state_iter < i) {
|
||||
deleted_state_count++;
|
||||
deleted_state_iter++;
|
||||
}
|
||||
new_state_ids[i] = i - deleted_state_count;
|
||||
}
|
||||
|
||||
ParseStateId original_state_index = 0;
|
||||
auto iter = parse_table.states.begin();
|
||||
while (iter != parse_table.states.end()) {
|
||||
if (deleted_states.count(original_state_index)) {
|
||||
iter = parse_table.states.erase(iter);
|
||||
} else {
|
||||
ParseState &state = *iter;
|
||||
state.each_referenced_state([&new_state_ids](int64_t *state_index) {
|
||||
*state_index = new_state_ids[*state_index];
|
||||
});
|
||||
++iter;
|
||||
}
|
||||
original_state_index++;
|
||||
}
|
||||
}
|
||||
|
||||
ParseAction *add_terminal_action(ParseStateId state_id, Symbol::Index lookahead,
|
||||
|
|
|
|||
|
|
@ -1,9 +1,11 @@
|
|||
#include "compiler/parse_table.h"
|
||||
#include <string>
|
||||
#include "compiler/precedence_range.h"
|
||||
#include "compiler/util/hash_combine.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
using std::hash;
|
||||
using std::string;
|
||||
using std::ostream;
|
||||
using std::to_string;
|
||||
|
|
@ -11,6 +13,7 @@ using std::set;
|
|||
using std::vector;
|
||||
using std::function;
|
||||
using rules::Symbol;
|
||||
using util::hash_combine;
|
||||
|
||||
ParseAction::ParseAction(ParseActionType type, ParseStateId state_index,
|
||||
Symbol symbol, size_t consumed_symbol_count,
|
||||
|
|
@ -150,6 +153,25 @@ void ParseState::each_referenced_state(function<void(ParseStateId *)> fn) {
|
|||
fn(&entry.second);
|
||||
}
|
||||
|
||||
void ParseState::compute_shift_actions_signature() {
|
||||
shift_actions_signature = 0;
|
||||
for (const auto &pair : nonterminal_entries) {
|
||||
rules::Symbol::Index lookahead = pair.first;
|
||||
ParseStateId next_state = pair.second;
|
||||
hash_combine(&shift_actions_signature, lookahead);
|
||||
hash_combine(&shift_actions_signature, next_state);
|
||||
}
|
||||
|
||||
for (const auto &pair : terminal_entries) {
|
||||
rules::Symbol::Index lookahead = pair.first;
|
||||
const ParseTableEntry &entry = pair.second;
|
||||
if (entry.actions.back().type == ParseActionTypeShift) {
|
||||
hash_combine(&shift_actions_signature, lookahead);
|
||||
hash_combine(&shift_actions_signature, entry);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool ParseState::operator==(const ParseState &other) const {
|
||||
return terminal_entries == other.terminal_entries &&
|
||||
nonterminal_entries == other.nonterminal_entries;
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@
|
|||
#include <set>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include "compiler/util/hash_combine.h"
|
||||
#include "compiler/lex_table.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
|
|
@ -47,6 +48,7 @@ class ParseAction {
|
|||
rules::Symbol symbol;
|
||||
ParseStateId state_index;
|
||||
size_t consumed_symbol_count;
|
||||
|
||||
PrecedenceRange precedence_range;
|
||||
rules::Associativity associativity;
|
||||
const Production *production;
|
||||
|
|
@ -74,10 +76,12 @@ class ParseState {
|
|||
bool merge(const ParseState &);
|
||||
void each_referenced_state(std::function<void(ParseStateId *)>);
|
||||
bool has_shift_action() const;
|
||||
void compute_shift_actions_signature();
|
||||
|
||||
std::map<rules::Symbol::Index, ParseTableEntry> terminal_entries;
|
||||
std::map<rules::Symbol::Index, ParseStateId> nonterminal_entries;
|
||||
LexStateId lex_state_id;
|
||||
size_t shift_actions_signature;
|
||||
};
|
||||
|
||||
struct ParseTableSymbolMetadata {
|
||||
|
|
@ -102,4 +106,38 @@ class ParseTable {
|
|||
|
||||
} // namespace tree_sitter
|
||||
|
||||
namespace std {
|
||||
|
||||
using tree_sitter::util::hash_combine;
|
||||
|
||||
template <>
|
||||
struct hash<tree_sitter::ParseAction> {
|
||||
size_t operator()(const tree_sitter::ParseAction &action) const {
|
||||
size_t result = 0;
|
||||
hash_combine<int>(&result, action.type);
|
||||
hash_combine(&result, action.extra);
|
||||
hash_combine(&result, action.fragile);
|
||||
hash_combine(&result, action.symbol);
|
||||
hash_combine(&result, action.state_index);
|
||||
hash_combine(&result, action.consumed_symbol_count);
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct hash<tree_sitter::ParseTableEntry> {
|
||||
size_t operator()(const tree_sitter::ParseTableEntry &entry) const {
|
||||
size_t result = 0;
|
||||
hash_combine(&result, entry.actions.size());
|
||||
for (const tree_sitter::ParseAction &action : entry.actions) {
|
||||
hash_combine(&result, action);
|
||||
}
|
||||
hash_combine(&result, entry.reusable);
|
||||
hash_combine(&result, entry.depends_on_lookahead);
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif // COMPILER_PARSE_TABLE_H_
|
||||
|
|
|
|||
18
src/compiler/util/hash_combine.h
Normal file
18
src/compiler/util/hash_combine.h
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
#ifndef COMPILER_UTIL_HASH_COMBINE_H_
|
||||
#define COMPILER_UTIL_HASH_COMBINE_H_
|
||||
|
||||
#include <functional>
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace util {
|
||||
|
||||
template <class T>
|
||||
inline void hash_combine(std::size_t *seed, const T &new_value) {
|
||||
std::hash<T> hasher;
|
||||
*seed ^= hasher(new_value) + 0x9e3779b9 + (*seed << 6) + (*seed >> 2);
|
||||
}
|
||||
|
||||
} // namespace util
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_UTIL_HASH_COMBINE_H_
|
||||
Loading…
Add table
Add a link
Reference in a new issue