Restructure parse state merging logic

* Remove remnants of templatized remove_duplicate_states function
* Rename recovery_tokens function to get_compatible_tokens and augment it
  also compute pairs of tokens which could potentially be incompatible
This commit is contained in:
Max Brunsfeld 2017-02-26 12:23:35 -08:00
parent 8d3b72e1d9
commit 3c8e6f9987
13 changed files with 274 additions and 252 deletions

View file

@ -6,14 +6,13 @@
#include <unordered_map>
#include <utility>
#include "compiler/parse_table.h"
#include "compiler/build_tables/remove_duplicate_states.h"
#include "compiler/build_tables/parse_item.h"
#include "compiler/build_tables/parse_item_set_builder.h"
#include "compiler/lexical_grammar.h"
#include "compiler/syntax_grammar.h"
#include "compiler/rules/symbol.h"
#include "compiler/rules/built_in_symbols.h"
#include "compiler/build_tables/recovery_tokens.h"
#include "compiler/build_tables/compatible_tokens.h"
namespace tree_sitter {
namespace build_tables {
@ -41,6 +40,7 @@ class ParseTableBuilder {
set<string> conflicts;
ParseItemSetBuilder item_set_builder;
set<const Production *> fragile_productions;
CompatibleTokensResult compatible_tokens;
bool allow_any_conflict;
public:
@ -49,6 +49,7 @@ class ParseTableBuilder {
: grammar(grammar),
lexical_grammar(lex_grammar),
item_set_builder(grammar, lex_grammar),
compatible_tokens(get_compatible_tokens(lex_grammar)),
allow_any_conflict(false) {}
pair<ParseTable, CompileError> build() {
@ -74,7 +75,7 @@ class ParseTableBuilder {
if (error.type != TSCompileErrorTypeNone)
return { parse_table, error };
parse_table.mergeable_symbols = recovery_tokens(lexical_grammar);
parse_table.mergeable_symbols = compatible_tokens.recovery_tokens;
build_error_parse_state();
@ -302,7 +303,7 @@ class ParseTableBuilder {
set<ParseStateId> deleted_states;
while (true) {
std::map<ParseStateId, ParseStateId> state_replacements;
map<ParseStateId, ParseStateId> state_replacements;
for (auto &pair : state_indices_by_signature) {
auto &state_group = pair.second;
@ -310,7 +311,7 @@ class ParseTableBuilder {
for (ParseStateId i : state_group) {
for (ParseStateId j : state_group) {
if (j == i) break;
if (!state_replacements.count(j) && parse_table.merge_state(j, i)) {
if (!state_replacements.count(j) && merge_parse_state(j, i)) {
state_replacements.insert({ i, j });
deleted_states.insert(i);
break;
@ -364,6 +365,60 @@ class ParseTableBuilder {
}
}
static bool has_entry(const ParseState &state, const ParseTableEntry &entry) {
for (const auto &pair : state.terminal_entries)
if (pair.second == entry)
return true;
return false;
}
bool merge_parse_state(size_t i, size_t j) {
ParseState &state = parse_table.states[i];
ParseState &other = parse_table.states[j];
if (state.nonterminal_entries != other.nonterminal_entries)
return false;
for (auto &entry : state.terminal_entries) {
Symbol lookahead = entry.first;
const vector<ParseAction> &actions = entry.second.actions;
const auto &other_entry = other.terminal_entries.find(lookahead);
if (other_entry == other.terminal_entries.end()) {
if (compatible_tokens.recovery_tokens.count(lookahead) == 0 && !lookahead.is_built_in())
return false;
if (actions.back().type != ParseActionTypeReduce)
return false;
if (!has_entry(other, entry.second))
return false;
} else if (entry.second != other_entry->second) {
return false;
}
}
set<Symbol> symbols_to_merge;
for (auto &entry : other.terminal_entries) {
Symbol lookahead = entry.first;
const vector<ParseAction> &actions = entry.second.actions;
if (!state.terminal_entries.count(lookahead)) {
if (compatible_tokens.recovery_tokens.count(lookahead) == 0 && !lookahead.is_built_in())
return false;
if (actions.back().type != ParseActionTypeReduce)
return false;
if (!has_entry(state, entry.second))
return false;
symbols_to_merge.insert(lookahead);
}
}
for (const Symbol &lookahead : symbols_to_merge)
state.terminal_entries[lookahead] = other.terminal_entries.find(lookahead)->second;
return true;
}
string handle_conflict(const ParseItemSet &item_set, ParseStateId state_id,
Symbol lookahead) {
ParseTableEntry &entry = parse_table.states[state_id].terminal_entries[lookahead];