Record parse conflicts when compiling grammars

Need to remove duplicate conflicts
This commit is contained in:
Max Brunsfeld 2014-04-08 18:47:42 -07:00
parent 3982b73ed6
commit f71d7bae16
14 changed files with 246 additions and 167 deletions

View file

@ -32,11 +32,17 @@ namespace tree_sitter {
unordered_map<const LexItemSet, LexStateId> lex_state_ids;
void add_shift_actions(const ParseItemSet &item_set, ParseStateId state_id) {
for (auto transition : sym_transitions(item_set, grammar)) {
Symbol symbol = transition.first;
ParseItemSet item_set = transition.second;
ParseStateId new_state_id = add_parse_state(item_set);
parse_table.add_action(state_id, symbol, ParseAction::Shift(new_state_id));
for (auto &transition : sym_transitions(item_set, grammar)) {
const Symbol &symbol = transition.first;
const ParseItemSet &item_set = transition.second;
auto current_actions = parse_table.states[state_id].actions;
auto current_action = current_actions.find(symbol);
if (current_action == current_actions.end() ||
conflict_manager.resolve_parse_action(symbol, current_action->second, ParseAction::Shift(0))) {
ParseStateId new_state_id = add_parse_state(item_set);
parse_table.add_action(state_id, symbol, ParseAction::Shift(new_state_id));
}
}
}
@ -61,8 +67,8 @@ namespace tree_sitter {
if (item.is_done()) {
auto current_action = lex_table.state(state_id).default_action;
auto new_action = LexAction::Accept(item.lhs);
auto action = conflict_manager.resolve_lex_action(current_action, new_action);
lex_table.add_default_action(state_id, action);
if (conflict_manager.resolve_lex_action(current_action, new_action))
lex_table.add_default_action(state_id, new_action);
}
}
}
@ -73,7 +79,11 @@ namespace tree_sitter {
ParseAction action = (item.lhs == rules::START()) ?
ParseAction::Accept() :
ParseAction::Reduce(item.lhs, item.consumed_symbol_count);
parse_table.add_action(state_id, item.lookahead_sym, action);
auto current_actions = parse_table.states[state_id].actions;
auto current_action = current_actions.find(item.lookahead_sym);
if (current_action == current_actions.end() ||
conflict_manager.resolve_parse_action(item.lookahead_sym, current_action->second, action))
parse_table.add_action(state_id, item.lookahead_sym, action);
}
}
}
@ -145,10 +155,12 @@ namespace tree_sitter {
}
public:
TableBuilder(const PreparedGrammar &grammar, const PreparedGrammar &lex_grammar) :
TableBuilder(const PreparedGrammar &grammar,
const PreparedGrammar &lex_grammar,
const map<Symbol, string> &rule_names) :
grammar(grammar),
lex_grammar(lex_grammar),
conflict_manager(ConflictManager(grammar, lex_grammar))
conflict_manager(ConflictManager(grammar, lex_grammar, rule_names))
{}
void build() {
@ -159,17 +171,21 @@ namespace tree_sitter {
add_error_lex_state();
}
vector<Conflict> conflicts;
const vector<Conflict> & conflicts() {
return conflict_manager.conflicts();
};
ParseTable parse_table;
LexTable lex_table;
};
pair<pair<ParseTable, LexTable>, vector<Conflict>>
build_tables(const PreparedGrammar &grammar,
const PreparedGrammar &lex_grammar) {
TableBuilder builder(grammar, lex_grammar);
const PreparedGrammar &lex_grammar,
const map<Symbol, string> &rule_names) {
TableBuilder builder(grammar, lex_grammar, rule_names);
builder.build();
return { { builder.parse_table, builder.lex_table }, builder.conflicts };
return { { builder.parse_table, builder.lex_table }, builder.conflicts() };
}
}
}

View file

@ -12,7 +12,9 @@ namespace tree_sitter {
namespace build_tables {
std::pair<std::pair<ParseTable, LexTable>, std::vector<Conflict>>
build_tables(const PreparedGrammar &grammar, const PreparedGrammar &lex_grammar);
build_tables(const PreparedGrammar &grammar,
const PreparedGrammar &lex_grammar,
const std::map<rules::Symbol, std::string> &rule_names);
}
}

View file

@ -0,0 +1,100 @@
#include "compiler/build_tables/conflict_manager.h"
#include <vector>
#include <map>
#include <string>
namespace tree_sitter {
namespace build_tables {
using rules::Symbol;
using std::vector;
using std::string;
using std::map;
string message_for_action(const ParseAction &action, const map<Symbol, string> &rule_names) {
switch (action.type) {
case ParseActionTypeShift:
return "shift";
case ParseActionTypeReduce: {
auto pair = rule_names.find(action.symbol);
if (pair != rule_names.end())
return "reduce " + pair->second;
else
return "ERROR " + action.symbol.name;
}
case ParseActionTypeAccept:
return "accept";
case ParseActionTypeError:
return "error";
break;
}
}
void ConflictManager::record_conflict(const rules::Symbol &symbol, const ParseAction &left, const ParseAction &right) {
conflicts_.push_back(Conflict(rule_names.find(symbol)->second + ": " + message_for_action(left, rule_names) + " / " + message_for_action(right, rule_names)));
}
ConflictManager::ConflictManager(const PreparedGrammar &parse_grammar,
const PreparedGrammar &lex_grammar,
const map<Symbol, string> &rule_names) :
parse_grammar(parse_grammar),
lex_grammar(lex_grammar),
rule_names(rule_names)
{}
bool ConflictManager::resolve_parse_action(const rules::Symbol &symbol,
const ParseAction &old_action,
const ParseAction &new_action) {
if (new_action.type < old_action.type)
return !resolve_parse_action(symbol, new_action, old_action);
switch (old_action.type) {
case ParseActionTypeError:
return true;
case ParseActionTypeShift:
switch (new_action.type) {
case ParseActionTypeShift:
record_conflict(symbol, old_action, new_action);
return false;
case ParseActionTypeReduce:
record_conflict(symbol, old_action, new_action);
return false;
default:
return false;
}
case ParseActionTypeReduce:
switch (new_action.type) {
case ParseActionTypeReduce: {
record_conflict(symbol, old_action, new_action);
size_t old_index = parse_grammar.index_of(old_action.symbol);
size_t new_index = parse_grammar.index_of(new_action.symbol);
return new_index < old_index;
}
default:
return false;
}
default:
return false;
}
}
bool ConflictManager::resolve_lex_action(const LexAction &old_action,
const LexAction &new_action) {
switch (old_action.type) {
case LexActionTypeError:
return true;
case LexActionTypeAccept:
if (new_action.type == LexActionTypeAccept) {
size_t old_index = lex_grammar.index_of(old_action.symbol);
size_t new_index = lex_grammar.index_of(new_action.symbol);
return (new_index < old_index);
}
default:;
}
return false;
}
const vector<Conflict> & ConflictManager::conflicts() const {
return conflicts_;
}
}
}

View file

@ -1,92 +0,0 @@
#include "compiler/build_tables/conflict_manager.h"
#include <vector>
namespace tree_sitter {
namespace build_tables {
using rules::Symbol;
using std::vector;
using std::string;
string message_for_action(const ParseAction &action) {
switch (action.type) {
case ParseActionTypeShift:
return "shift";
case ParseActionTypeReduce:
return "reduce " + action.symbol.name;
case ParseActionTypeAccept:
return "accept";
case ParseActionTypeError:
return "error";
break;
}
}
void ConflictManager::record_conflict(const rules::Symbol &symbol, const ParseAction &left, const ParseAction &right) {
conflicts_.push_back(Conflict(symbol.name + ": " +
message_for_action(left) +
" / " +
message_for_action(right)));
}
ConflictManager::ConflictManager(const PreparedGrammar &parse_grammar, const PreparedGrammar &lex_grammar) :
parse_grammar(parse_grammar),
lex_grammar(lex_grammar) {}
ParseAction ConflictManager::resolve_parse_action(const rules::Symbol &symbol, ParseAction left, ParseAction right) {
if (right.type < left.type) {
ParseAction swap = right;
right = left;
left = swap;
}
switch (left.type) {
case ParseActionTypeError:
return right;
case ParseActionTypeShift:
switch (right.type) {
case ParseActionTypeShift:
record_conflict(symbol, left, right);
return left;
case ParseActionTypeReduce:
record_conflict(symbol, left, right);
return left;
default:
return left;
}
case ParseActionTypeReduce:
switch (right.type) {
case ParseActionTypeReduce: {
size_t left_index = parse_grammar.index_of(left.symbol);
size_t right_index = parse_grammar.index_of(right.symbol);
return (right_index < left_index) ? right : left;
}
default:
return right;
}
default:
return left;
}
}
LexAction ConflictManager::resolve_lex_action(const LexAction &left, const LexAction &right) {
switch (left.type) {
case LexActionTypeError:
return right;
case LexActionTypeAccept:
if (right.type == LexActionTypeAccept) {
size_t left_index = lex_grammar.index_of(left.symbol);
size_t right_index = lex_grammar.index_of(right.symbol);
return (right_index < left_index) ? right : left;
} else {
return left;
}
default:
return left;
}
}
const vector<Conflict> & ConflictManager::conflicts() const {
return conflicts_;
}
}
}

View file

@ -2,6 +2,8 @@
#define COMPILER_BUILD_TABLES_CONFLICT_MANAGER_H_
#include <vector>
#include <map>
#include <string>
#include "tree_sitter/compiler.h"
#include "compiler/parse_table.h"
#include "compiler/prepared_grammar.h"
@ -11,13 +13,19 @@ namespace tree_sitter {
class ConflictManager {
const PreparedGrammar parse_grammar;
const PreparedGrammar lex_grammar;
const std::map<rules::Symbol, std::string> rule_names;
std::vector<Conflict> conflicts_;
public:
ConflictManager(const PreparedGrammar &parse_grammar, const PreparedGrammar &lex_grammar);
ConflictManager(const PreparedGrammar &parse_grammar,
const PreparedGrammar &lex_grammar,
const std::map<rules::Symbol, std::string> &rule_names);
LexAction resolve_lex_action(const LexAction &left, const LexAction &right);
ParseAction resolve_parse_action(const rules::Symbol &symbol, ParseAction left, ParseAction right);
bool resolve_lex_action(const LexAction &old_action,
const LexAction &new_action);
bool resolve_parse_action(const rules::Symbol &symbol,
const ParseAction &old_action,
const ParseAction &new_action);
void record_conflict(const rules::Symbol &symbol, const ParseAction &left, const ParseAction &right);
const std::vector<Conflict> & conflicts() const;

View file

@ -15,13 +15,15 @@ namespace tree_sitter {
PreparedGrammar &syntax_grammar = grammars.first;
PreparedGrammar &lexical_grammar = grammars.second;
auto table_build_result = build_tables::build_tables(syntax_grammar, lexical_grammar);
auto symbol_names = name_symbols::name_symbols(syntax_grammar, lexical_grammar);
auto table_build_result = build_tables::build_tables(syntax_grammar, lexical_grammar, symbol_names);
auto tables = table_build_result.first;
auto conflicts = table_build_result.second;
ParseTable &parse_table = tables.first;
LexTable &lex_table = tables.second;
auto symbol_names = name_symbols::name_symbols(parse_table.symbols, lexical_grammar);
return { generate_code::c_code(name, parse_table, lex_table, symbol_names), conflicts };
}
}

View file

@ -5,12 +5,16 @@
#include "compiler/rules/pattern.h"
#include "compiler/rules/string.h"
#include "compiler/util/string_helpers.h"
#include "compiler/rules/built_in_symbols.h"
namespace tree_sitter {
namespace name_symbols {
using std::map;
using std::set;
using std::string;
using rules::Symbol;
using rules::SymbolTypeNormal;
using rules::SymbolTypeAuxiliary;
class TokenName : public rules::RuleFn<string> {
protected:
@ -23,15 +27,25 @@ namespace tree_sitter {
}
};
map<rules::Symbol, string> name_symbols(const set<rules::Symbol> &symbols,
map<rules::Symbol, string> name_symbols(const PreparedGrammar &syntactic_grammar,
const PreparedGrammar &lexical_grammar) {
map<rules::Symbol, string> result;
for (auto &symbol : symbols) {
string name = (symbol.is_auxiliary() && lexical_grammar.has_definition(symbol)) ?
TokenName().apply(lexical_grammar.rule(symbol)) :
symbol.name;
result.insert({ symbol, name });
}
for (const auto &pair : syntactic_grammar.rules)
result.insert({ Symbol(pair.first, SymbolTypeNormal), pair.first });
for (const auto &pair : lexical_grammar.rules)
result.insert({ Symbol(pair.first, SymbolTypeNormal), pair.first });
for (const auto &pair : syntactic_grammar.aux_rules)
result.insert({ Symbol(pair.first, SymbolTypeAuxiliary), pair.first });
for (const auto &pair : lexical_grammar.aux_rules)
result.insert({
Symbol(pair.first, SymbolTypeAuxiliary),
TokenName().apply(pair.second)
});
result.insert({ rules::END_OF_INPUT(), "EOF" });
result.insert({ rules::ERROR(), "ERROR" });
return result;
}
}

View file

@ -10,7 +10,7 @@ namespace tree_sitter {
class PreparedGrammar;
namespace name_symbols {
std::map<rules::Symbol, std::string> name_symbols(const std::set<rules::Symbol> &symbols,
std::map<rules::Symbol, std::string> name_symbols(const PreparedGrammar &syntactic_grammar,
const PreparedGrammar &lexical_grammar);
}
}