Compress parse table by allowing reductions w/ unexpected lookaheads

This commit is contained in:
Max Brunsfeld 2016-06-29 09:54:08 -07:00
parent 9c37c06ec7
commit 0e2bbbd7ee
7 changed files with 76 additions and 12 deletions

View file

@ -147,7 +147,7 @@ class LexTableBuilder {
}
auto replacements =
remove_duplicate_states<LexState, AdvanceAction>(&lex_table.states);
remove_duplicate_states<LexTable, AdvanceAction>(&lex_table);
for (ParseState &parse_state : parse_table->states) {
auto replacement = replacements.find(parse_state.lex_state_id);

View file

@ -70,6 +70,11 @@ class ParseTableBuilder {
if (error.type != TSCompileErrorTypeNone)
return { parse_table, error };
for (const ParseState &state : parse_table.states)
for (const auto &pair1 : state.entries)
for (const auto &pair2 : state.entries)
parse_table.symbols[pair1.first].compatible_symbols.insert(pair2.first);
build_error_parse_state();
allow_any_conflict = true;
@ -134,8 +139,7 @@ class ParseTableBuilder {
const ParseItemSet &item_set = recovery_states[symbol];
if (!item_set.entries.empty()) {
ParseStateId state = add_parse_state(item_set);
error_state->entries[symbol].actions.push_back(
ParseAction::Recover(state));
error_state->entries[symbol].actions.push_back(ParseAction::Recover(state));
}
}
@ -268,7 +272,7 @@ class ParseTableBuilder {
}
void remove_duplicate_parse_states() {
remove_duplicate_states<ParseState, ParseAction>(&parse_table.states);
remove_duplicate_states<ParseTable, ParseAction>(&parse_table);
}
ParseAction *add_action(ParseStateId state_id, Symbol lookahead,

View file

@ -7,15 +7,15 @@
namespace tree_sitter {
namespace build_tables {
template <typename StateType, typename ActionType>
std::map<size_t, size_t> remove_duplicate_states(std::vector<StateType> *states) {
template <typename TableType, typename ActionType>
std::map<size_t, size_t> remove_duplicate_states(TableType *table) {
std::map<size_t, size_t> replacements;
while (true) {
std::map<size_t, size_t> duplicates;
for (size_t i = 0, size = states->size(); i < size; i++)
for (size_t i = 0, size = table->states.size(); i < size; i++)
for (size_t j = 0; j < i; j++)
if (states->at(i) == states->at(j)) {
if (!duplicates.count(j) && table->merge_state(j, i)) {
duplicates.insert({ i, j });
break;
}
@ -24,7 +24,7 @@ std::map<size_t, size_t> remove_duplicate_states(std::vector<StateType> *states)
break;
std::map<size_t, size_t> new_replacements;
for (size_t i = 0, size = states->size(); i < size; i++) {
for (size_t i = 0, size = table->states.size(); i < size; i++) {
size_t new_state_index = i;
auto duplicate = duplicates.find(i);
if (duplicate != duplicates.end())
@ -45,7 +45,7 @@ std::map<size_t, size_t> remove_duplicate_states(std::vector<StateType> *states)
replacement.second = new_state_index;
}
for (StateType &state : *states)
for (auto &state : table->states)
state.each_advance_action([&new_replacements](ActionType *action) {
auto new_replacement = new_replacements.find(action->state_index);
if (new_replacement != new_replacements.end())
@ -53,7 +53,7 @@ std::map<size_t, size_t> remove_duplicate_states(std::vector<StateType> *states)
});
for (auto i = duplicates.rbegin(); i != duplicates.rend(); ++i)
states->erase(states->begin() + i->first);
table->states.erase(table->states.begin() + i->first);
}
return replacements;

View file

@ -71,4 +71,8 @@ LexState &LexTable::state(LexStateId id) {
return states[id];
}
bool LexTable::merge_state(size_t i, size_t j) {
return states[i] == states[j];
}
} // namespace tree_sitter

View file

@ -22,7 +22,7 @@ struct AdvanceAction {
AdvanceAction();
AdvanceAction(size_t, PrecedenceRange, bool);
bool operator==(const AdvanceAction &action) const;
bool operator==(const AdvanceAction &other) const;
size_t state_index;
PrecedenceRange precedence_range;
@ -66,6 +66,8 @@ class LexTable {
LexStateId add_state();
LexState &state(LexStateId state_id);
std::vector<LexState> states;
bool merge_state(size_t i, size_t j);
};
} // namespace tree_sitter

View file

@ -190,4 +190,51 @@ ParseAction &ParseTable::add_action(ParseStateId id, Symbol symbol,
return *state.entries[symbol].actions.rbegin();
}
static bool has_entry(const ParseState &state, const ParseTableEntry &entry) {
for (const auto &pair : state.entries)
if (pair.second == entry)
return true;
return false;
}
bool ParseTable::merge_state(size_t i, size_t j) {
ParseState &state = states[i];
ParseState &other = states[j];
for (auto &entry : state.entries) {
const Symbol &symbol = entry.first;
const vector<ParseAction> &actions = entry.second.actions;
const auto &other_entry = other.entries.find(symbol);
if (other_entry == other.entries.end()) {
if (actions.back().type != ParseActionTypeReduce)
return false;
if (!has_entry(other, entry.second))
return false;
} else if (entry.second != other_entry->second) {
return false;
}
}
set<Symbol> symbols_to_merge;
for (auto &entry : other.entries) {
const Symbol &symbol = entry.first;
const vector<ParseAction> &actions = entry.second.actions;
if (!state.entries.count(symbol)) {
if (actions.back().type != ParseActionTypeReduce)
return false;
if (!has_entry(state, entry.second))
return false;
symbols_to_merge.insert(symbol);
}
}
for (const Symbol &symbol : symbols_to_merge)
state.entries[symbol] = other.entries.find(symbol)->second;
return true;
}
} // namespace tree_sitter

View file

@ -61,6 +61,10 @@ struct ParseTableEntry {
ParseTableEntry();
ParseTableEntry(const std::vector<ParseAction> &, bool, bool);
bool operator==(const ParseTableEntry &other) const;
inline bool operator!=(const ParseTableEntry &other) const {
return !operator==(other);
}
};
class ParseState {
@ -68,6 +72,7 @@ class ParseState {
ParseState();
std::set<rules::Symbol> expected_inputs() const;
bool operator==(const ParseState &) const;
bool merge(const ParseState &);
void each_advance_action(std::function<void(ParseAction *)>);
std::map<rules::Symbol, ParseTableEntry> entries;
@ -77,6 +82,7 @@ class ParseState {
struct ParseTableSymbolMetadata {
bool extra;
bool structural;
std::set<rules::Symbol> compatible_symbols;
};
class ParseTable {
@ -87,6 +93,7 @@ class ParseTable {
ParseAction action);
ParseAction &add_action(ParseStateId state_id, rules::Symbol symbol,
ParseAction action);
bool merge_state(size_t i, size_t j);
std::vector<ParseState> states;
std::map<rules::Symbol, ParseTableSymbolMetadata> symbols;