Respect expected_conflicts field when building parse table

This commit is contained in:
Max Brunsfeld 2015-06-28 16:22:31 -05:00
parent c9a482bbf3
commit aabcb10cfb
15 changed files with 5054 additions and 348 deletions

View file

@ -16,20 +16,22 @@
namespace tree_sitter {
namespace build_tables {
using std::get;
using std::pair;
using std::vector;
using std::set;
using std::map;
using std::string;
using std::to_string;
using std::unordered_map;
using std::make_shared;
using rules::Symbol;
class ParseTableBuilder {
const SyntaxGrammar grammar;
const LexicalGrammar lexical_grammar;
ParseConflictManager conflict_manager;
unordered_map<const ParseItemSet, ParseStateId> parse_state_ids;
vector<vector<Symbol>> productions;
vector<pair<ParseItemSet, ParseStateId>> item_sets_to_process;
ParseTable parse_table;
std::set<string> conflicts;
@ -37,7 +39,7 @@ class ParseTableBuilder {
public:
ParseTableBuilder(const SyntaxGrammar &grammar,
const LexicalGrammar &lex_grammar)
: grammar(grammar), conflict_manager(grammar, lex_grammar) {}
: grammar(grammar), lexical_grammar(lex_grammar), conflict_manager(grammar) {}
pair<ParseTable, const GrammarError *> build() {
auto start_symbol = grammar.rules.empty()
@ -55,7 +57,7 @@ class ParseTableBuilder {
add_reduce_actions(item_set, state_id);
add_shift_actions(item_set, state_id);
add_shift_extra_actions(state_id);
add_shift_extra_actions(item_set, state_id);
if (!conflicts.empty())
return {
@ -92,13 +94,11 @@ class ParseTableBuilder {
const Symbol &symbol = transition.first;
const ParseItemSet &next_item_set = transition.second;
ParseAction new_action =
ParseAction::Shift(0, precedence_values_for_item_set(next_item_set));
if (should_add_action(state_id, item_set, symbol, new_action)) {
ParseStateId new_state_id = add_parse_state(next_item_set);
new_action.state_index = new_state_id;
parse_table.add_action(state_id, symbol, new_action);
}
ParseAction *new_action = add_action(state_id, symbol,
ParseAction::Shift(0, precedence_values_for_item_set(next_item_set)),
item_set);
if (new_action)
new_action->state_index = add_parse_state(next_item_set);
}
}
@ -113,73 +113,85 @@ class ParseTableBuilder {
? ParseAction::Accept()
: ParseAction::Reduce(item.lhs, item.consumed_symbols.size(),
item.precedence(), item.associativity(),
conflict_manager.get_production_id(item.consumed_symbols));
get_production_id(item.consumed_symbols));
for (const auto &lookahead_sym : lookahead_symbols)
if (should_add_action(state_id, item_set, lookahead_sym, action))
parse_table.add_action(state_id, lookahead_sym, action);
add_action(state_id, lookahead_sym, action, item_set);
}
}
}
void add_shift_extra_actions(ParseStateId state_id) {
const map<Symbol, ParseAction> &actions =
parse_table.states[state_id].actions;
for (const Symbol &ubiquitous_symbol : grammar.ubiquitous_tokens) {
const auto &pair_for_symbol = actions.find(ubiquitous_symbol);
if (pair_for_symbol == actions.end()) {
parse_table.add_action(state_id, ubiquitous_symbol,
ParseAction::ShiftExtra());
}
}
void add_shift_extra_actions(const ParseItemSet &item_set, ParseStateId state_id) {
for (const Symbol &ubiquitous_symbol : grammar.ubiquitous_tokens)
add_action(state_id, ubiquitous_symbol, ParseAction::ShiftExtra(), item_set);
}
void add_reduce_extra_actions(ParseStateId state_id) {
const map<Symbol, ParseAction> &actions =
const ParseItemSet item_set;
const map<Symbol, vector<ParseAction>> &actions =
parse_table.states[state_id].actions;
for (const Symbol &ubiquitous_symbol : grammar.ubiquitous_tokens) {
const auto &pair_for_symbol = actions.find(ubiquitous_symbol);
const auto &entry = actions.find(ubiquitous_symbol);
if (entry == actions.end())
continue;
if (pair_for_symbol != actions.end() &&
pair_for_symbol->second.type == ParseActionTypeShift) {
size_t shift_state_id = pair_for_symbol->second.state_index;
for (const auto &pair : actions) {
const Symbol &lookahead_sym = pair.first;
ParseAction reduce_extra = ParseAction::ReduceExtra(ubiquitous_symbol);
if (should_add_action(shift_state_id, ParseItemSet(), lookahead_sym, reduce_extra))
parse_table.add_action(shift_state_id, lookahead_sym, reduce_extra);
for (const auto &action : entry->second) {
if (action.type == ParseActionTypeShift) {
size_t shift_state_id = action.state_index;
for (const auto &pair : actions) {
const Symbol &lookahead_sym = pair.first;
ParseAction reduce_extra = ParseAction::ReduceExtra(ubiquitous_symbol);
add_action(shift_state_id, lookahead_sym, reduce_extra, item_set);
}
}
}
}
}
bool should_add_action(ParseStateId state_id, const ParseItemSet &item_set,
const Symbol &symbol, const ParseAction &action) {
ParseAction *add_action(ParseStateId state_id, Symbol lookahead_sym,
const ParseAction &action, const ParseItemSet &item_set) {
auto &current_actions = parse_table.states[state_id].actions;
auto current_action = current_actions.find(symbol);
if (current_action == current_actions.end())
return true;
auto current_entry = current_actions.find(lookahead_sym);
if (current_entry == current_actions.end())
return &parse_table.set_action(state_id, lookahead_sym, action);
auto result = conflict_manager.resolve(action, current_action->second,
symbol, item_set);
const ParseAction current_action = current_entry->second[0];
auto resolution = conflict_manager.resolve(action, current_action, lookahead_sym);
switch (resolution.second) {
case ConflictTypeNone:
if (resolution.first)
return &parse_table.set_action(state_id, lookahead_sym, action);
break;
switch (get<1>(result)) {
case ConflictTypeResolved:
if (action.type == ParseActionTypeReduce)
parse_table.fragile_production_ids.insert(action.production_id);
if (current_action->second.type == ParseActionTypeReduce)
parse_table.fragile_production_ids.insert(current_action->second.production_id);
if (current_action.type == ParseActionTypeReduce)
parse_table.fragile_production_ids.insert(current_action.production_id);
if (resolution.first)
return &parse_table.set_action(state_id, lookahead_sym, action);
break;
case ConflictTypeError:
conflicts.insert(get<2>(result));
break;
default:
case ConflictTypeUnresolved: {
set<Symbol> goal_symbols = item_set_goal_symbols(item_set);
if (has_expected_conflict(goal_symbols))
return &parse_table.add_action(state_id, lookahead_sym, action);
else
conflicts.insert(conflict_description(action, current_action, lookahead_sym, goal_symbols));
break;
}
}
return get<0>(result);
return nullptr;
}
bool has_expected_conflict(const set<Symbol> &symbols) {
for (const auto &conflicting_symbols : grammar.expected_conflicts)
if (symbols == conflicting_symbols)
return true;
return false;
}
set<int> precedence_values_for_item_set(const ParseItemSet &item_set) {
@ -191,6 +203,93 @@ class ParseTableBuilder {
}
return result;
}
set<Symbol> item_set_goal_symbols(const ParseItemSet &item_set) {
set<Symbol> result;
for (const auto &pair : item_set) {
const ParseItem &item = pair.first;
if (!item.consumed_symbols.empty())
result.insert(item.lhs);
}
return result;
}
string conflict_description(const ParseAction &new_action,
const ParseAction &old_action,
const rules::Symbol &symbol,
const set<Symbol> &goal_symbols) const {
string symbols_string;
bool started = false;
for (const auto &symbol : goal_symbols) {
if (started)
symbols_string += ", ";
symbols_string += symbol_name(symbol);
started = true;
}
return
"Within: " + symbols_string + "\n"
"Lookahead: " + symbol_name(symbol) + "\n" +
"Possible Actions:\n"
"* " + action_description(old_action) + "\n" +
"* " + action_description(new_action);
}
string symbol_name(const rules::Symbol &symbol) const {
if (symbol.is_built_in()) {
if (symbol == rules::ERROR())
return "ERROR";
else if (symbol == rules::END_OF_INPUT())
return "END_OF_INPUT";
else
return "";
} else if (symbol.is_token())
return lexical_grammar.rule_name(symbol);
else
return grammar.rule_name(symbol);
}
string action_description(const ParseAction &action) const {
string result;
switch (action.type) {
case ParseActionTypeReduce: {
result = "Reduce";
for (const rules::Symbol &symbol : productions[action.production_id])
result += " " + symbol_name(symbol);
result += " -> " + symbol_name(action.symbol);
break;
}
case ParseActionTypeShift: {
result = "Shift";
break;
}
default:
return "";
}
if (action.precedence_values.size() > 1) {
result += " (Precedences " + to_string(*action.precedence_values.begin()) +
", " + to_string(*action.precedence_values.rbegin()) + ")";
} else {
result += " (Precedence " + to_string(*action.precedence_values.begin()) + ")";
}
return result;
}
size_t get_production_id(const vector<rules::Symbol> &symbols) {
auto begin = productions.begin();
auto end = productions.end();
auto iter = find(begin, end, symbols);
if (iter == end) {
productions.push_back(symbols);
return productions.size() - 1;
}
return iter - begin;
}
};
pair<ParseTable, const GrammarError *> build_parse_table(

View file

@ -8,31 +8,26 @@ namespace tree_sitter {
namespace build_tables {
using std::find;
using std::get;
using std::make_tuple;
using std::string;
using std::to_string;
using std::tuple;
using std::pair;
using std::vector;
ParseConflictManager::ParseConflictManager(const SyntaxGrammar &syntax_grammar,
const LexicalGrammar &lexical_grammar) :
syntax_grammar(syntax_grammar),
lexical_grammar(lexical_grammar) {}
ParseConflictManager::ParseConflictManager(const SyntaxGrammar &syntax_grammar)
: syntax_grammar(syntax_grammar) {}
tuple<bool, ConflictType, string>
pair<bool, ConflictType>
ParseConflictManager::resolve(const ParseAction &new_action,
const ParseAction &old_action,
const rules::Symbol &symbol,
const ParseItemSet &item_set) const {
const rules::Symbol &symbol) const {
if (new_action.type < old_action.type) {
auto opposite = resolve(old_action, new_action, symbol, item_set);
return make_tuple(!get<0>(opposite), get<1>(opposite), get<2>(opposite));
auto opposite = resolve(old_action, new_action, symbol);
return {!opposite.first, opposite.second};
}
switch (old_action.type) {
case ParseActionTypeError:
return make_tuple(true, ConflictTypeNone, "");
case ParseActionTypeShiftExtra:
case ParseActionTypeReduceExtra:
return {true, ConflictTypeNone};
case ParseActionTypeShift:
if (new_action.type == ParseActionTypeReduce) {
@ -40,20 +35,20 @@ ParseConflictManager::resolve(const ParseAction &new_action,
int max_precedence = *old_action.precedence_values.rbegin();
int new_precedence = *new_action.precedence_values.rbegin();
if (new_precedence < min_precedence)
return make_tuple(false, ConflictTypeResolved, "");
return {false, ConflictTypeResolved};
else if (new_precedence > max_precedence)
return make_tuple(true, ConflictTypeResolved, "");
return {true, ConflictTypeResolved};
else if (min_precedence == max_precedence) {
switch (new_action.associativity) {
case rules::AssociativityLeft:
return make_tuple(true, ConflictTypeResolved, "");
return {true, ConflictTypeResolved};
case rules::AssociativityRight:
return make_tuple(false, ConflictTypeResolved, "");
return {false, ConflictTypeResolved};
default:
return make_tuple(false, ConflictTypeError, conflict_description(new_action, old_action, symbol, item_set));
return {false, ConflictTypeUnresolved};
}
} else {
return make_tuple(false, ConflictTypeError, conflict_description(new_action, old_action, symbol, item_set));
return {false, ConflictTypeUnresolved};
}
}
@ -62,99 +57,19 @@ ParseConflictManager::resolve(const ParseAction &new_action,
int old_precedence = *old_action.precedence_values.begin();
int new_precedence = *new_action.precedence_values.begin();
if (new_precedence > old_precedence) {
return make_tuple(true, ConflictTypeResolved, "");
return {true, ConflictTypeResolved};
} else if (new_precedence < old_precedence) {
return make_tuple(false, ConflictTypeResolved, "");
return {false, ConflictTypeResolved};
} else {
return make_tuple(false, ConflictTypeError, conflict_description(new_action, old_action, symbol, item_set));
return {false, ConflictTypeUnresolved};
}
}
default:
return make_tuple(false, ConflictTypeNone, "");
}
}
size_t ParseConflictManager::get_production_id(const vector<rules::Symbol> &symbols) {
auto begin = productions.begin();
auto end = productions.end();
auto iter = find(begin, end, symbols);
if (iter == end) {
productions.push_back(symbols);
return productions.size() - 1;
}
return iter - begin;
}
string ParseConflictManager::item_set_description(const ParseItemSet &item_set) const {
string result;
bool started = false;
for (const auto &pair : item_set) {
const ParseItem &item = pair.first;
if (!item.consumed_symbols.empty()) {
if (started) result += ", ";
result += symbol_name(item.lhs);
started = true;
}
}
return result;
}
string ParseConflictManager::conflict_description(const ParseAction &new_action,
const ParseAction &old_action,
const rules::Symbol &symbol,
const ParseItemSet &item_set) const {
return
"Within: " + item_set_description(item_set) + "\n"
"Lookahead: " + symbol_name(symbol) + "\n" +
"Possible Actions:\n"
"* " + action_description(old_action) + "\n" +
"* " + action_description(new_action);
}
string ParseConflictManager::symbol_name(const rules::Symbol &symbol) const {
if (symbol.is_built_in()) {
if (symbol == rules::ERROR())
return "ERROR";
else if (symbol == rules::END_OF_INPUT())
return "END_OF_INPUT";
else
return "";
} else if (symbol.is_token())
return lexical_grammar.rule_name(symbol);
else
return syntax_grammar.rule_name(symbol);
}
string ParseConflictManager::action_description(const ParseAction &action) const {
string result;
switch (action.type) {
case ParseActionTypeReduce: {
result = "Reduce";
for (const rules::Symbol &symbol : productions[action.production_id])
result += " " + symbol_name(symbol);
result += " -> " + symbol_name(action.symbol);
break;
}
case ParseActionTypeShift: {
result = "Shift";
break;
}
default:
return "";
}
if (action.precedence_values.size() > 1) {
result += " (Precedences " + to_string(*action.precedence_values.begin()) +
", " + to_string(*action.precedence_values.rbegin()) + ")";
} else {
result += " (Precedence " + to_string(*action.precedence_values.begin()) + ")";
}
return result;
return {false, ConflictTypeNone};
}
} // namespace build_tables

View file

@ -17,25 +17,16 @@ namespace build_tables {
enum ConflictType {
ConflictTypeNone,
ConflictTypeResolved,
ConflictTypeError
ConflictTypeUnresolved
};
class ParseConflictManager {
const SyntaxGrammar syntax_grammar;
const LexicalGrammar lexical_grammar;
std::vector<std::vector<rules::Symbol>> productions;
public:
ParseConflictManager(const SyntaxGrammar &, const LexicalGrammar &);
size_t get_production_id(const std::vector<rules::Symbol> &);
std::tuple<bool, ConflictType, std::string> resolve(
const ParseAction &, const ParseAction &, const rules::Symbol &, const ParseItemSet &) const;
private:
std::string symbol_name(const rules::Symbol &) const;
std::string item_set_description(const ParseItemSet &) const;
std::string action_description(const ParseAction &) const;
std::string conflict_description(const ParseAction &, const ParseAction &, const rules::Symbol &, const ParseItemSet &) const;
ParseConflictManager(const SyntaxGrammar &);
std::pair<bool, ConflictType> resolve(
const ParseAction &, const ParseAction &, const rules::Symbol &) const;
};
} // namespace build_tables

View file

@ -192,7 +192,7 @@ class CCodeGenerator {
for (const auto &pair : state.actions) {
line("[" + symbol_id(pair.first) + "] = ");
add("ACTIONS(");
add_parse_action(pair.second);
add_parse_actions(pair.second);
add("),");
}
});
@ -279,29 +279,35 @@ class CCodeGenerator {
}
}
void add_parse_action(const ParseAction &action) {
switch (action.type) {
case ParseActionTypeAccept:
add("ACCEPT_INPUT()");
break;
case ParseActionTypeShift:
add("SHIFT(" + to_string(action.state_index) + ")");
break;
case ParseActionTypeShiftExtra:
add("SHIFT_EXTRA()");
break;
case ParseActionTypeReduce:
if (reduce_action_is_fragile(action))
add("REDUCE_FRAGILE(" + symbol_id(action.symbol) + ", " +
to_string(action.consumed_symbol_count) + ")");
else
add("REDUCE(" + symbol_id(action.symbol) + ", " +
to_string(action.consumed_symbol_count) + ")");
break;
case ParseActionTypeReduceExtra:
add("REDUCE_EXTRA(" + symbol_id(action.symbol) + ")");
break;
default: {}
void add_parse_actions(const vector<ParseAction> &actions) {
bool started = false;
for (const auto &action : actions) {
if (started)
add(", ");
switch (action.type) {
case ParseActionTypeAccept:
add("ACCEPT_INPUT()");
break;
case ParseActionTypeShift:
add("SHIFT(" + to_string(action.state_index) + ")");
break;
case ParseActionTypeShiftExtra:
add("SHIFT_EXTRA()");
break;
case ParseActionTypeReduce:
if (reduce_action_is_fragile(action))
add("REDUCE_FRAGILE(" + symbol_id(action.symbol) + ", " +
to_string(action.consumed_symbol_count) + ")");
else
add("REDUCE(" + symbol_id(action.symbol) + ", " +
to_string(action.consumed_symbol_count) + ")");
break;
case ParseActionTypeReduceExtra:
add("REDUCE_EXTRA(" + symbol_id(action.symbol) + ")");
break;
default: {}
}
started = true;
}
}

View file

@ -122,7 +122,11 @@ ostream &operator<<(ostream &stream, const ParseState &state) {
for (auto pair : state.actions) {
if (started)
stream << string(", ");
stream << pair.first << string(" => ") << pair.second;
stream << pair.first << string(" => {");
for (auto &action : pair.second) {
stream << string(" ") << action;
}
stream << string("}");
started = true;
}
stream << string(">");
@ -134,9 +138,16 @@ ParseStateId ParseTable::add_state() {
return states.size() - 1;
}
void ParseTable::add_action(ParseStateId id, Symbol symbol, ParseAction action) {
ParseAction &ParseTable::set_action(ParseStateId id, Symbol symbol, ParseAction action) {
symbols.insert(symbol);
states[id].actions[symbol] = action;
states[id].actions[symbol] = vector<ParseAction>({ action });
return *states[id].actions[symbol].begin();
}
ParseAction &ParseTable::add_action(ParseStateId id, Symbol symbol, ParseAction action) {
symbols.insert(symbol);
states[id].actions[symbol].push_back(action);
return *states[id].actions[symbol].rbegin();
}
} // namespace tree_sitter

View file

@ -15,10 +15,11 @@ typedef uint64_t ParseStateId;
typedef enum {
ParseActionTypeError,
ParseActionTypeShift,
ParseActionTypeShiftExtra,
ParseActionTypeReduce,
ParseActionTypeReduceExtra,
ParseActionTypeShiftExtra,
ParseActionTypeShift,
ParseActionTypeReduce,
ParseActionTypeAccept,
} ParseActionType;
@ -74,7 +75,7 @@ namespace tree_sitter {
class ParseState {
public:
ParseState();
std::map<rules::Symbol, ParseAction> actions;
std::map<rules::Symbol, std::vector<ParseAction>> actions;
std::set<rules::Symbol> expected_inputs() const;
LexStateId lex_state_id;
};
@ -84,7 +85,9 @@ std::ostream &operator<<(std::ostream &stream, const ParseState &state);
class ParseTable {
public:
ParseStateId add_state();
void add_action(ParseStateId state_id, rules::Symbol symbol,
ParseAction &set_action(ParseStateId state_id, rules::Symbol symbol,
ParseAction action);
ParseAction &add_action(ParseStateId state_id, rules::Symbol symbol,
ParseAction action);
std::vector<ParseState> states;