Compress parse table by allowing reductions w/ unexpected lookaheads
This commit is contained in:
parent
9c37c06ec7
commit
0e2bbbd7ee
7 changed files with 76 additions and 12 deletions
|
|
@ -147,7 +147,7 @@ class LexTableBuilder {
|
|||
}
|
||||
|
||||
auto replacements =
|
||||
remove_duplicate_states<LexState, AdvanceAction>(&lex_table.states);
|
||||
remove_duplicate_states<LexTable, AdvanceAction>(&lex_table);
|
||||
|
||||
for (ParseState &parse_state : parse_table->states) {
|
||||
auto replacement = replacements.find(parse_state.lex_state_id);
|
||||
|
|
|
|||
|
|
@ -70,6 +70,11 @@ class ParseTableBuilder {
|
|||
if (error.type != TSCompileErrorTypeNone)
|
||||
return { parse_table, error };
|
||||
|
||||
for (const ParseState &state : parse_table.states)
|
||||
for (const auto &pair1 : state.entries)
|
||||
for (const auto &pair2 : state.entries)
|
||||
parse_table.symbols[pair1.first].compatible_symbols.insert(pair2.first);
|
||||
|
||||
build_error_parse_state();
|
||||
|
||||
allow_any_conflict = true;
|
||||
|
|
@ -134,8 +139,7 @@ class ParseTableBuilder {
|
|||
const ParseItemSet &item_set = recovery_states[symbol];
|
||||
if (!item_set.entries.empty()) {
|
||||
ParseStateId state = add_parse_state(item_set);
|
||||
error_state->entries[symbol].actions.push_back(
|
||||
ParseAction::Recover(state));
|
||||
error_state->entries[symbol].actions.push_back(ParseAction::Recover(state));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -268,7 +272,7 @@ class ParseTableBuilder {
|
|||
}
|
||||
|
||||
void remove_duplicate_parse_states() {
|
||||
remove_duplicate_states<ParseState, ParseAction>(&parse_table.states);
|
||||
remove_duplicate_states<ParseTable, ParseAction>(&parse_table);
|
||||
}
|
||||
|
||||
ParseAction *add_action(ParseStateId state_id, Symbol lookahead,
|
||||
|
|
|
|||
|
|
@ -7,15 +7,15 @@
|
|||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
||||
template <typename StateType, typename ActionType>
|
||||
std::map<size_t, size_t> remove_duplicate_states(std::vector<StateType> *states) {
|
||||
template <typename TableType, typename ActionType>
|
||||
std::map<size_t, size_t> remove_duplicate_states(TableType *table) {
|
||||
std::map<size_t, size_t> replacements;
|
||||
|
||||
while (true) {
|
||||
std::map<size_t, size_t> duplicates;
|
||||
for (size_t i = 0, size = states->size(); i < size; i++)
|
||||
for (size_t i = 0, size = table->states.size(); i < size; i++)
|
||||
for (size_t j = 0; j < i; j++)
|
||||
if (states->at(i) == states->at(j)) {
|
||||
if (!duplicates.count(j) && table->merge_state(j, i)) {
|
||||
duplicates.insert({ i, j });
|
||||
break;
|
||||
}
|
||||
|
|
@ -24,7 +24,7 @@ std::map<size_t, size_t> remove_duplicate_states(std::vector<StateType> *states)
|
|||
break;
|
||||
|
||||
std::map<size_t, size_t> new_replacements;
|
||||
for (size_t i = 0, size = states->size(); i < size; i++) {
|
||||
for (size_t i = 0, size = table->states.size(); i < size; i++) {
|
||||
size_t new_state_index = i;
|
||||
auto duplicate = duplicates.find(i);
|
||||
if (duplicate != duplicates.end())
|
||||
|
|
@ -45,7 +45,7 @@ std::map<size_t, size_t> remove_duplicate_states(std::vector<StateType> *states)
|
|||
replacement.second = new_state_index;
|
||||
}
|
||||
|
||||
for (StateType &state : *states)
|
||||
for (auto &state : table->states)
|
||||
state.each_advance_action([&new_replacements](ActionType *action) {
|
||||
auto new_replacement = new_replacements.find(action->state_index);
|
||||
if (new_replacement != new_replacements.end())
|
||||
|
|
@ -53,7 +53,7 @@ std::map<size_t, size_t> remove_duplicate_states(std::vector<StateType> *states)
|
|||
});
|
||||
|
||||
for (auto i = duplicates.rbegin(); i != duplicates.rend(); ++i)
|
||||
states->erase(states->begin() + i->first);
|
||||
table->states.erase(table->states.begin() + i->first);
|
||||
}
|
||||
|
||||
return replacements;
|
||||
|
|
|
|||
|
|
@ -71,4 +71,8 @@ LexState &LexTable::state(LexStateId id) {
|
|||
return states[id];
|
||||
}
|
||||
|
||||
bool LexTable::merge_state(size_t i, size_t j) {
|
||||
return states[i] == states[j];
|
||||
}
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ struct AdvanceAction {
|
|||
AdvanceAction();
|
||||
AdvanceAction(size_t, PrecedenceRange, bool);
|
||||
|
||||
bool operator==(const AdvanceAction &action) const;
|
||||
bool operator==(const AdvanceAction &other) const;
|
||||
|
||||
size_t state_index;
|
||||
PrecedenceRange precedence_range;
|
||||
|
|
@ -66,6 +66,8 @@ class LexTable {
|
|||
LexStateId add_state();
|
||||
LexState &state(LexStateId state_id);
|
||||
std::vector<LexState> states;
|
||||
|
||||
bool merge_state(size_t i, size_t j);
|
||||
};
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -190,4 +190,51 @@ ParseAction &ParseTable::add_action(ParseStateId id, Symbol symbol,
|
|||
return *state.entries[symbol].actions.rbegin();
|
||||
}
|
||||
|
||||
static bool has_entry(const ParseState &state, const ParseTableEntry &entry) {
|
||||
for (const auto &pair : state.entries)
|
||||
if (pair.second == entry)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
bool ParseTable::merge_state(size_t i, size_t j) {
|
||||
ParseState &state = states[i];
|
||||
ParseState &other = states[j];
|
||||
|
||||
for (auto &entry : state.entries) {
|
||||
const Symbol &symbol = entry.first;
|
||||
const vector<ParseAction> &actions = entry.second.actions;
|
||||
|
||||
const auto &other_entry = other.entries.find(symbol);
|
||||
if (other_entry == other.entries.end()) {
|
||||
if (actions.back().type != ParseActionTypeReduce)
|
||||
return false;
|
||||
if (!has_entry(other, entry.second))
|
||||
return false;
|
||||
} else if (entry.second != other_entry->second) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
set<Symbol> symbols_to_merge;
|
||||
|
||||
for (auto &entry : other.entries) {
|
||||
const Symbol &symbol = entry.first;
|
||||
const vector<ParseAction> &actions = entry.second.actions;
|
||||
|
||||
if (!state.entries.count(symbol)) {
|
||||
if (actions.back().type != ParseActionTypeReduce)
|
||||
return false;
|
||||
if (!has_entry(state, entry.second))
|
||||
return false;
|
||||
symbols_to_merge.insert(symbol);
|
||||
}
|
||||
}
|
||||
|
||||
for (const Symbol &symbol : symbols_to_merge)
|
||||
state.entries[symbol] = other.entries.find(symbol)->second;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -61,6 +61,10 @@ struct ParseTableEntry {
|
|||
ParseTableEntry();
|
||||
ParseTableEntry(const std::vector<ParseAction> &, bool, bool);
|
||||
bool operator==(const ParseTableEntry &other) const;
|
||||
|
||||
inline bool operator!=(const ParseTableEntry &other) const {
|
||||
return !operator==(other);
|
||||
}
|
||||
};
|
||||
|
||||
class ParseState {
|
||||
|
|
@ -68,6 +72,7 @@ class ParseState {
|
|||
ParseState();
|
||||
std::set<rules::Symbol> expected_inputs() const;
|
||||
bool operator==(const ParseState &) const;
|
||||
bool merge(const ParseState &);
|
||||
void each_advance_action(std::function<void(ParseAction *)>);
|
||||
|
||||
std::map<rules::Symbol, ParseTableEntry> entries;
|
||||
|
|
@ -77,6 +82,7 @@ class ParseState {
|
|||
struct ParseTableSymbolMetadata {
|
||||
bool extra;
|
||||
bool structural;
|
||||
std::set<rules::Symbol> compatible_symbols;
|
||||
};
|
||||
|
||||
class ParseTable {
|
||||
|
|
@ -87,6 +93,7 @@ class ParseTable {
|
|||
ParseAction action);
|
||||
ParseAction &add_action(ParseStateId state_id, rules::Symbol symbol,
|
||||
ParseAction action);
|
||||
bool merge_state(size_t i, size_t j);
|
||||
|
||||
std::vector<ParseState> states;
|
||||
std::map<rules::Symbol, ParseTableSymbolMetadata> symbols;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue