Improve parse conflict reporting

This commit is contained in:
Max Brunsfeld 2015-10-14 17:35:47 -07:00
parent 3d0253f9b8
commit 02e549202f
3 changed files with 101 additions and 87 deletions

View file

@ -202,13 +202,8 @@ class ParseTableBuilder {
}
case ConflictTypeUnresolved: {
auto old_goal_syms = goal_symbols(item_set, old_action, lookahead);
auto new_goal_syms = goal_symbols(item_set, new_action, lookahead);
if (has_expected_conflict(old_goal_syms, new_goal_syms))
if (handle_unresolved_conflict(item_set, lookahead))
return &parse_table.add_action(state_id, lookahead, new_action);
else
conflicts.insert(conflict_description(
lookahead, old_action, old_goal_syms, new_action, new_goal_syms));
break;
}
}
@ -220,14 +215,100 @@ class ParseTableBuilder {
return { action.symbol, action.production_id };
}
bool has_expected_conflict(set<Symbol> symbols1, const set<Symbol> &symbols2) {
symbols1.insert(symbols2.begin(), symbols2.end());
for (const auto &conflicting_symbols : grammar.expected_conflicts)
if (symbols1 == conflicting_symbols)
bool handle_unresolved_conflict(const ParseItemSet &item_set,
const Symbol &lookahead) {
set<Symbol> involved_symbols;
set<ParseItem> reduce_items;
set<ParseItem> core_shift_items;
set<ParseItem> other_shift_items;
for (const auto &pair : item_set.entries) {
const ParseItem &item = pair.first;
const LookaheadSet &lookahead_set = pair.second;
const Production &production =
grammar.productions(item.lhs())[item.production_index];
if (item.step_index == production.size()) {
if (lookahead_set.contains(lookahead)) {
involved_symbols.insert(item.lhs());
reduce_items.insert(item);
}
} else {
Symbol next_symbol = production[item.step_index].symbol;
if (item.step_index > 0) {
set<Symbol> first_set = get_first_set(next_symbol);
if (first_set.find(lookahead) != first_set.end()) {
involved_symbols.insert(item.lhs());
core_shift_items.insert(item);
}
} else if (next_symbol == lookahead) {
other_shift_items.insert(item);
}
}
}
for (const auto &conflict_set : grammar.expected_conflicts)
if (involved_symbols == conflict_set)
return true;
string description = "Lookahead symbol: " + symbol_name(lookahead) + "\n";
if (!reduce_items.empty()) {
description += "Reduce items:\n";
for (const ParseItem &item : reduce_items)
description += " " + item_string(item) + "\n";
}
if (!core_shift_items.empty()) {
description += "Core shift items:\n";
for (const ParseItem &item : core_shift_items)
description += " " + item_string(item) + "\n";
}
if (!other_shift_items.empty()) {
description += "Other shift items:\n";
for (const ParseItem &item : other_shift_items)
description += " " + item_string(item) + "\n";
}
conflicts.insert(description);
return false;
}
string item_string(const ParseItem &item) const {
string result = symbol_name(item.lhs()) + " ->";
size_t i = 0;
for (const ProductionStep &step :
grammar.productions(item.lhs())[item.production_index]) {
if (i == item.step_index)
result += " \u2022";
result += " " + symbol_name(step.symbol);
i++;
}
if (i == item.step_index)
result += " \u2022";
return result;
}
set<Symbol> get_first_set(const Symbol &start_symbol) {
set<Symbol> result;
vector<Symbol> symbols_to_process({ start_symbol });
while (!symbols_to_process.empty()) {
Symbol symbol = symbols_to_process.back();
symbols_to_process.pop_back();
if (result.insert(symbol).second) {
for (const Production &production : grammar.productions(symbol)) {
if (!production.empty())
symbols_to_process.push_back({ production[0].symbol });
}
}
}
return result;
}
PrecedenceRange precedence_values_for_item_set(const ParseItemSet &item_set) {
PrecedenceRange result;
for (const auto &pair : item_set.entries) {
@ -240,45 +321,6 @@ class ParseTableBuilder {
return result;
}
set<Symbol> goal_symbols(const ParseItemSet &item_set,
const ParseAction &action,
const Symbol &lookahead_sym) {
set<Symbol> result;
switch (action.type) {
case ParseActionTypeShift: {
for (const auto &pair : item_set.entries) {
const ParseItem &item = pair.first;
const Production &production =
grammar.productions(item.lhs())[item.production_index];
if (item.step_index < production.size() &&
production[item.step_index].symbol == lookahead_sym)
result.insert(item.lhs());
}
break;
}
case ParseActionTypeReduce:
result.insert(action.symbol);
break;
default:
break;
}
return result;
}
string conflict_description(const Symbol &lookahead,
const ParseAction &old_action,
const set<Symbol> &old_goal_symbols,
const ParseAction &new_action,
const set<Symbol> &new_goal_symbols) const {
return "Lookahead: " + symbol_name(lookahead) + "\n" +
"Possible Actions:\n"
"* " +
action_description(old_action, old_goal_symbols) + "\n" + "* " +
action_description(new_action, new_goal_symbols);
}
string symbol_name(const rules::Symbol &symbol) const {
if (symbol.is_built_in()) {
if (symbol == rules::ERROR())
@ -288,48 +330,15 @@ class ParseTableBuilder {
else
return "";
} else if (symbol.is_token) {
return lexical_grammar.variables[symbol.index].name;
const Variable &variable = lexical_grammar.variables[symbol.index];
if (variable.type == VariableTypeNamed)
return variable.name;
else
return "'" + variable.name + "'";
} else {
return grammar.variables[symbol.index].name;
}
}
string action_description(const ParseAction &action,
const set<Symbol> &goal_symbols) const {
string result;
switch (action.type) {
case ParseActionTypeReduce: {
result += "Reduce";
for (const ProductionStep &step :
grammar.productions(action.symbol)[action.production_id])
result += " " + symbol_name(step.symbol);
result += " -> " + symbol_name(action.symbol);
break;
}
case ParseActionTypeShift: {
result += "Shift ";
bool started = false;
for (const auto &symbol : goal_symbols) {
if (started)
result += ", ";
started = true;
result += symbol_name(symbol);
}
break;
}
default:
break;
}
result += (action.precedence_range.min == action.precedence_range.max)
? " (Precedence " + to_string(action.precedence_range.min) + ")"
: " (Precedences " + to_string(action.precedence_range.min) +
", " + to_string(action.precedence_range.max) + ")";
return result;
}
};
pair<ParseTable, const GrammarError *> build_parse_table(

View file

@ -23,6 +23,10 @@ bool LookaheadSet::operator==(const LookaheadSet &other) const {
return *entries == *other.entries;
}
bool LookaheadSet::contains(const Symbol &symbol) const {
return entries->find(symbol) != entries->end();
}
bool LookaheadSet::insert_all(const LookaheadSet &other) {
if (!entries.get())
entries = make_shared<set<Symbol>>();

View file

@ -15,6 +15,7 @@ class LookaheadSet {
bool empty() const;
bool operator==(const LookaheadSet &) const;
bool contains(const rules::Symbol &) const;
bool insert_all(const LookaheadSet &);
bool insert(const rules::Symbol &);