From 0e2bbbd7ee300c0357fd4d2b03958f4ddafc971f Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 29 Jun 2016 09:54:08 -0700 Subject: [PATCH] Compress parse table by allowing reductions w/ unexpected lookaheads --- src/compiler/build_tables/build_lex_table.cc | 2 +- .../build_tables/build_parse_table.cc | 10 ++-- .../build_tables/remove_duplicate_states.h | 14 +++--- src/compiler/lex_table.cc | 4 ++ src/compiler/lex_table.h | 4 +- src/compiler/parse_table.cc | 47 +++++++++++++++++++ src/compiler/parse_table.h | 7 +++ 7 files changed, 76 insertions(+), 12 deletions(-) diff --git a/src/compiler/build_tables/build_lex_table.cc b/src/compiler/build_tables/build_lex_table.cc index 93de59f0..56de23cf 100644 --- a/src/compiler/build_tables/build_lex_table.cc +++ b/src/compiler/build_tables/build_lex_table.cc @@ -147,7 +147,7 @@ class LexTableBuilder { } auto replacements = - remove_duplicate_states(&lex_table.states); + remove_duplicate_states(&lex_table); for (ParseState &parse_state : parse_table->states) { auto replacement = replacements.find(parse_state.lex_state_id); diff --git a/src/compiler/build_tables/build_parse_table.cc b/src/compiler/build_tables/build_parse_table.cc index 3ef8e6ea..d027e1e8 100644 --- a/src/compiler/build_tables/build_parse_table.cc +++ b/src/compiler/build_tables/build_parse_table.cc @@ -70,6 +70,11 @@ class ParseTableBuilder { if (error.type != TSCompileErrorTypeNone) return { parse_table, error }; + for (const ParseState &state : parse_table.states) + for (const auto &pair1 : state.entries) + for (const auto &pair2 : state.entries) + parse_table.symbols[pair1.first].compatible_symbols.insert(pair2.first); + build_error_parse_state(); allow_any_conflict = true; @@ -134,8 +139,7 @@ class ParseTableBuilder { const ParseItemSet &item_set = recovery_states[symbol]; if (!item_set.entries.empty()) { ParseStateId state = add_parse_state(item_set); - error_state->entries[symbol].actions.push_back( - ParseAction::Recover(state)); + error_state->entries[symbol].actions.push_back(ParseAction::Recover(state)); } } @@ -268,7 +272,7 @@ class ParseTableBuilder { } void remove_duplicate_parse_states() { - remove_duplicate_states(&parse_table.states); + remove_duplicate_states(&parse_table); } ParseAction *add_action(ParseStateId state_id, Symbol lookahead, diff --git a/src/compiler/build_tables/remove_duplicate_states.h b/src/compiler/build_tables/remove_duplicate_states.h index b70bb351..601737a5 100644 --- a/src/compiler/build_tables/remove_duplicate_states.h +++ b/src/compiler/build_tables/remove_duplicate_states.h @@ -7,15 +7,15 @@ namespace tree_sitter { namespace build_tables { -template -std::map remove_duplicate_states(std::vector *states) { +template +std::map remove_duplicate_states(TableType *table) { std::map replacements; while (true) { std::map duplicates; - for (size_t i = 0, size = states->size(); i < size; i++) + for (size_t i = 0, size = table->states.size(); i < size; i++) for (size_t j = 0; j < i; j++) - if (states->at(i) == states->at(j)) { + if (!duplicates.count(j) && table->merge_state(j, i)) { duplicates.insert({ i, j }); break; } @@ -24,7 +24,7 @@ std::map remove_duplicate_states(std::vector *states) break; std::map new_replacements; - for (size_t i = 0, size = states->size(); i < size; i++) { + for (size_t i = 0, size = table->states.size(); i < size; i++) { size_t new_state_index = i; auto duplicate = duplicates.find(i); if (duplicate != duplicates.end()) @@ -45,7 +45,7 @@ std::map remove_duplicate_states(std::vector *states) replacement.second = new_state_index; } - for (StateType &state : *states) + for (auto &state : table->states) state.each_advance_action([&new_replacements](ActionType *action) { auto new_replacement = new_replacements.find(action->state_index); if (new_replacement != new_replacements.end()) @@ -53,7 +53,7 @@ std::map remove_duplicate_states(std::vector *states) }); for (auto i = duplicates.rbegin(); i != duplicates.rend(); ++i) - states->erase(states->begin() + i->first); + table->states.erase(table->states.begin() + i->first); } return replacements; diff --git a/src/compiler/lex_table.cc b/src/compiler/lex_table.cc index 946fd712..852586e5 100644 --- a/src/compiler/lex_table.cc +++ b/src/compiler/lex_table.cc @@ -71,4 +71,8 @@ LexState &LexTable::state(LexStateId id) { return states[id]; } +bool LexTable::merge_state(size_t i, size_t j) { + return states[i] == states[j]; +} + } // namespace tree_sitter diff --git a/src/compiler/lex_table.h b/src/compiler/lex_table.h index f5f8b4ce..d508e9da 100644 --- a/src/compiler/lex_table.h +++ b/src/compiler/lex_table.h @@ -22,7 +22,7 @@ struct AdvanceAction { AdvanceAction(); AdvanceAction(size_t, PrecedenceRange, bool); - bool operator==(const AdvanceAction &action) const; + bool operator==(const AdvanceAction &other) const; size_t state_index; PrecedenceRange precedence_range; @@ -66,6 +66,8 @@ class LexTable { LexStateId add_state(); LexState &state(LexStateId state_id); std::vector states; + + bool merge_state(size_t i, size_t j); }; } // namespace tree_sitter diff --git a/src/compiler/parse_table.cc b/src/compiler/parse_table.cc index cd2a549a..6efdac28 100644 --- a/src/compiler/parse_table.cc +++ b/src/compiler/parse_table.cc @@ -190,4 +190,51 @@ ParseAction &ParseTable::add_action(ParseStateId id, Symbol symbol, return *state.entries[symbol].actions.rbegin(); } +static bool has_entry(const ParseState &state, const ParseTableEntry &entry) { + for (const auto &pair : state.entries) + if (pair.second == entry) + return true; + return false; +} + +bool ParseTable::merge_state(size_t i, size_t j) { + ParseState &state = states[i]; + ParseState &other = states[j]; + + for (auto &entry : state.entries) { + const Symbol &symbol = entry.first; + const vector &actions = entry.second.actions; + + const auto &other_entry = other.entries.find(symbol); + if (other_entry == other.entries.end()) { + if (actions.back().type != ParseActionTypeReduce) + return false; + if (!has_entry(other, entry.second)) + return false; + } else if (entry.second != other_entry->second) { + return false; + } + } + + set symbols_to_merge; + + for (auto &entry : other.entries) { + const Symbol &symbol = entry.first; + const vector &actions = entry.second.actions; + + if (!state.entries.count(symbol)) { + if (actions.back().type != ParseActionTypeReduce) + return false; + if (!has_entry(state, entry.second)) + return false; + symbols_to_merge.insert(symbol); + } + } + + for (const Symbol &symbol : symbols_to_merge) + state.entries[symbol] = other.entries.find(symbol)->second; + + return true; +} + } // namespace tree_sitter diff --git a/src/compiler/parse_table.h b/src/compiler/parse_table.h index 4ce13bd5..4ffcb273 100644 --- a/src/compiler/parse_table.h +++ b/src/compiler/parse_table.h @@ -61,6 +61,10 @@ struct ParseTableEntry { ParseTableEntry(); ParseTableEntry(const std::vector &, bool, bool); bool operator==(const ParseTableEntry &other) const; + + inline bool operator!=(const ParseTableEntry &other) const { + return !operator==(other); + } }; class ParseState { @@ -68,6 +72,7 @@ class ParseState { ParseState(); std::set expected_inputs() const; bool operator==(const ParseState &) const; + bool merge(const ParseState &); void each_advance_action(std::function); std::map entries; @@ -77,6 +82,7 @@ class ParseState { struct ParseTableSymbolMetadata { bool extra; bool structural; + std::set compatible_symbols; }; class ParseTable { @@ -87,6 +93,7 @@ class ParseTable { ParseAction action); ParseAction &add_action(ParseStateId state_id, rules::Symbol symbol, ParseAction action); + bool merge_state(size_t i, size_t j); std::vector states; std::map symbols;