Restructure parse state merging logic
* Remove remnants of templatized remove_duplicate_states function * Rename recovery_tokens function to get_compatible_tokens and augment it also compute pairs of tokens which could potentially be incompatible
This commit is contained in:
parent
8d3b72e1d9
commit
3c8e6f9987
13 changed files with 274 additions and 252 deletions
|
|
@ -6,14 +6,13 @@
|
|||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include "compiler/parse_table.h"
|
||||
#include "compiler/build_tables/remove_duplicate_states.h"
|
||||
#include "compiler/build_tables/parse_item.h"
|
||||
#include "compiler/build_tables/parse_item_set_builder.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/rules/built_in_symbols.h"
|
||||
#include "compiler/build_tables/recovery_tokens.h"
|
||||
#include "compiler/build_tables/compatible_tokens.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
|
@ -41,6 +40,7 @@ class ParseTableBuilder {
|
|||
set<string> conflicts;
|
||||
ParseItemSetBuilder item_set_builder;
|
||||
set<const Production *> fragile_productions;
|
||||
CompatibleTokensResult compatible_tokens;
|
||||
bool allow_any_conflict;
|
||||
|
||||
public:
|
||||
|
|
@ -49,6 +49,7 @@ class ParseTableBuilder {
|
|||
: grammar(grammar),
|
||||
lexical_grammar(lex_grammar),
|
||||
item_set_builder(grammar, lex_grammar),
|
||||
compatible_tokens(get_compatible_tokens(lex_grammar)),
|
||||
allow_any_conflict(false) {}
|
||||
|
||||
pair<ParseTable, CompileError> build() {
|
||||
|
|
@ -74,7 +75,7 @@ class ParseTableBuilder {
|
|||
if (error.type != TSCompileErrorTypeNone)
|
||||
return { parse_table, error };
|
||||
|
||||
parse_table.mergeable_symbols = recovery_tokens(lexical_grammar);
|
||||
parse_table.mergeable_symbols = compatible_tokens.recovery_tokens;
|
||||
|
||||
build_error_parse_state();
|
||||
|
||||
|
|
@ -302,7 +303,7 @@ class ParseTableBuilder {
|
|||
set<ParseStateId> deleted_states;
|
||||
|
||||
while (true) {
|
||||
std::map<ParseStateId, ParseStateId> state_replacements;
|
||||
map<ParseStateId, ParseStateId> state_replacements;
|
||||
|
||||
for (auto &pair : state_indices_by_signature) {
|
||||
auto &state_group = pair.second;
|
||||
|
|
@ -310,7 +311,7 @@ class ParseTableBuilder {
|
|||
for (ParseStateId i : state_group) {
|
||||
for (ParseStateId j : state_group) {
|
||||
if (j == i) break;
|
||||
if (!state_replacements.count(j) && parse_table.merge_state(j, i)) {
|
||||
if (!state_replacements.count(j) && merge_parse_state(j, i)) {
|
||||
state_replacements.insert({ i, j });
|
||||
deleted_states.insert(i);
|
||||
break;
|
||||
|
|
@ -364,6 +365,60 @@ class ParseTableBuilder {
|
|||
}
|
||||
}
|
||||
|
||||
static bool has_entry(const ParseState &state, const ParseTableEntry &entry) {
|
||||
for (const auto &pair : state.terminal_entries)
|
||||
if (pair.second == entry)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
bool merge_parse_state(size_t i, size_t j) {
|
||||
ParseState &state = parse_table.states[i];
|
||||
ParseState &other = parse_table.states[j];
|
||||
|
||||
if (state.nonterminal_entries != other.nonterminal_entries)
|
||||
return false;
|
||||
|
||||
for (auto &entry : state.terminal_entries) {
|
||||
Symbol lookahead = entry.first;
|
||||
const vector<ParseAction> &actions = entry.second.actions;
|
||||
|
||||
const auto &other_entry = other.terminal_entries.find(lookahead);
|
||||
if (other_entry == other.terminal_entries.end()) {
|
||||
if (compatible_tokens.recovery_tokens.count(lookahead) == 0 && !lookahead.is_built_in())
|
||||
return false;
|
||||
if (actions.back().type != ParseActionTypeReduce)
|
||||
return false;
|
||||
if (!has_entry(other, entry.second))
|
||||
return false;
|
||||
} else if (entry.second != other_entry->second) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
set<Symbol> symbols_to_merge;
|
||||
|
||||
for (auto &entry : other.terminal_entries) {
|
||||
Symbol lookahead = entry.first;
|
||||
const vector<ParseAction> &actions = entry.second.actions;
|
||||
|
||||
if (!state.terminal_entries.count(lookahead)) {
|
||||
if (compatible_tokens.recovery_tokens.count(lookahead) == 0 && !lookahead.is_built_in())
|
||||
return false;
|
||||
if (actions.back().type != ParseActionTypeReduce)
|
||||
return false;
|
||||
if (!has_entry(state, entry.second))
|
||||
return false;
|
||||
symbols_to_merge.insert(lookahead);
|
||||
}
|
||||
}
|
||||
|
||||
for (const Symbol &lookahead : symbols_to_merge)
|
||||
state.terminal_entries[lookahead] = other.terminal_entries.find(lookahead)->second;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
string handle_conflict(const ParseItemSet &item_set, ParseStateId state_id,
|
||||
Symbol lookahead) {
|
||||
ParseTableEntry &entry = parse_table.states[state_id].terminal_entries[lookahead];
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue