2014-05-04 22:07:52 -07:00
|
|
|
#include "compiler/build_tables/build_parse_table.h"
|
|
|
|
|
#include <string>
|
|
|
|
|
#include <utility>
|
|
|
|
|
#include <map>
|
|
|
|
|
#include <set>
|
|
|
|
|
#include <unordered_map>
|
|
|
|
|
#include "compiler/prepared_grammar.h"
|
|
|
|
|
#include "compiler/rules/built_in_symbols.h"
|
|
|
|
|
#include "compiler/rules/symbol.h"
|
|
|
|
|
#include "compiler/build_tables/parse_conflict_manager.h"
|
|
|
|
|
#include "compiler/build_tables/parse_item.h"
|
|
|
|
|
#include "compiler/build_tables/item_set_closure.h"
|
|
|
|
|
#include "compiler/build_tables/item_set_transitions.h"
|
2014-07-01 20:47:35 -07:00
|
|
|
#include "compiler/build_tables/first_set.h"
|
|
|
|
|
|
2014-05-04 22:07:52 -07:00
|
|
|
namespace tree_sitter {
|
|
|
|
|
using std::pair;
|
|
|
|
|
using std::string;
|
|
|
|
|
using std::vector;
|
|
|
|
|
using std::set;
|
2014-07-01 20:47:35 -07:00
|
|
|
using std::map;
|
2014-05-04 22:07:52 -07:00
|
|
|
using std::unordered_map;
|
|
|
|
|
using std::make_shared;
|
|
|
|
|
using rules::Symbol;
|
|
|
|
|
|
|
|
|
|
namespace build_tables {
|
|
|
|
|
class ParseTableBuilder {
|
2014-06-25 13:27:16 -07:00
|
|
|
const SyntaxGrammar grammar;
|
2014-05-04 22:07:52 -07:00
|
|
|
ParseConflictManager conflict_manager;
|
|
|
|
|
unordered_map<const ParseItemSet, ParseStateId> parse_state_ids;
|
2014-07-13 18:06:33 -07:00
|
|
|
vector<pair<ParseItemSet, ParseStateId>> item_sets_to_process;
|
2014-05-04 22:07:52 -07:00
|
|
|
ParseTable parse_table;
|
2014-05-06 12:54:04 -07:00
|
|
|
|
2014-05-04 23:04:34 -07:00
|
|
|
ParseStateId add_parse_state(const ParseItemSet &item_set) {
|
|
|
|
|
auto pair = parse_state_ids.find(item_set);
|
|
|
|
|
if (pair == parse_state_ids.end()) {
|
|
|
|
|
ParseStateId state_id = parse_table.add_state();
|
|
|
|
|
parse_state_ids[item_set] = state_id;
|
2014-07-13 18:06:33 -07:00
|
|
|
item_sets_to_process.push_back({ item_set, state_id });
|
2014-05-04 23:04:34 -07:00
|
|
|
return state_id;
|
|
|
|
|
} else {
|
|
|
|
|
return pair->second;
|
|
|
|
|
}
|
2014-05-04 22:07:52 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void add_reduce_actions(const ParseItemSet &item_set, ParseStateId state_id) {
|
2014-06-16 08:35:20 -07:00
|
|
|
for (const auto &pair : item_set) {
|
|
|
|
|
const ParseItem &item = pair.first;
|
|
|
|
|
const set<Symbol> &lookahead_symbols = pair.second;
|
2014-06-16 21:33:35 -07:00
|
|
|
|
2014-05-04 22:07:52 -07:00
|
|
|
if (item.is_done()) {
|
|
|
|
|
ParseAction action = (item.lhs == rules::START()) ?
|
2014-05-04 23:04:34 -07:00
|
|
|
ParseAction::Accept() :
|
|
|
|
|
ParseAction::Reduce(item.lhs, item.consumed_symbol_count, item.precedence());
|
2014-07-01 20:47:35 -07:00
|
|
|
for (auto &lookahead_sym : lookahead_symbols)
|
|
|
|
|
if (should_add_action(state_id, lookahead_sym, action))
|
2014-06-16 08:35:20 -07:00
|
|
|
parse_table.add_action(state_id, lookahead_sym, action);
|
2014-05-04 22:07:52 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2014-05-06 12:54:04 -07:00
|
|
|
|
2014-07-01 21:43:26 -07:00
|
|
|
void add_shift_actions(const ParseItemSet &item_set, ParseStateId state_id) {
|
|
|
|
|
for (const auto &transition : sym_transitions(item_set, grammar)) {
|
|
|
|
|
const Symbol &symbol = transition.first;
|
|
|
|
|
const ParseItemSet &next_item_set = transition.second;
|
|
|
|
|
|
|
|
|
|
ParseAction new_action = ParseAction::Shift(0, precedence_values_for_item_set(next_item_set));
|
|
|
|
|
if (should_add_action(state_id, symbol, new_action)) {
|
|
|
|
|
ParseStateId new_state_id = add_parse_state(next_item_set);
|
|
|
|
|
new_action.state_index = new_state_id;
|
|
|
|
|
parse_table.add_action(state_id, symbol, new_action);
|
|
|
|
|
}
|
2014-07-01 20:47:35 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2014-07-13 18:06:33 -07:00
|
|
|
void add_shift_extra_actions(ParseStateId state_id) {
|
2014-07-11 13:21:44 -07:00
|
|
|
const map<Symbol, ParseAction> &actions = parse_table.states[state_id].actions;
|
|
|
|
|
for (const Symbol &ubiquitous_symbol : grammar.ubiquitous_tokens) {
|
|
|
|
|
const auto &pair_for_symbol = actions.find(ubiquitous_symbol);
|
|
|
|
|
if (pair_for_symbol == actions.end()) {
|
|
|
|
|
parse_table.add_action(state_id, ubiquitous_symbol, ParseAction::ShiftExtra());
|
2014-07-13 18:06:33 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void add_reduce_extra_actions(ParseStateId state_id) {
|
|
|
|
|
const map<Symbol, ParseAction> &actions = parse_table.states[state_id].actions;
|
|
|
|
|
for (const Symbol &ubiquitous_symbol : grammar.ubiquitous_tokens) {
|
|
|
|
|
const auto &pair_for_symbol = actions.find(ubiquitous_symbol);
|
|
|
|
|
|
|
|
|
|
if (pair_for_symbol != actions.end() && pair_for_symbol->second.type == ParseActionTypeShift) {
|
2014-07-11 13:21:44 -07:00
|
|
|
size_t shift_state_id = pair_for_symbol->second.state_index;
|
2014-07-01 21:43:26 -07:00
|
|
|
for (const auto &pair : actions) {
|
|
|
|
|
const Symbol &lookahead_sym = pair.first;
|
2014-07-11 13:21:44 -07:00
|
|
|
ParseAction reduce_extra = ParseAction::ReduceExtra(ubiquitous_symbol);
|
|
|
|
|
if (should_add_action(shift_state_id, lookahead_sym, reduce_extra))
|
|
|
|
|
parse_table.add_action(shift_state_id, lookahead_sym, reduce_extra);
|
2014-07-01 21:43:26 -07:00
|
|
|
}
|
|
|
|
|
}
|
2014-07-01 20:47:35 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2014-07-07 13:21:30 -07:00
|
|
|
bool should_add_action(ParseStateId state_id, const Symbol &symbol, const ParseAction &action) {
|
2014-07-01 20:47:35 -07:00
|
|
|
auto current_actions = parse_table.states[state_id].actions;
|
|
|
|
|
auto current_action = current_actions.find(symbol);
|
|
|
|
|
return (
|
|
|
|
|
current_action == current_actions.end() ||
|
2014-07-02 09:01:38 -07:00
|
|
|
conflict_manager.resolve_parse_action(symbol, current_action->second, action));
|
2014-07-01 20:47:35 -07:00
|
|
|
}
|
|
|
|
|
|
2014-05-04 23:04:34 -07:00
|
|
|
set<int> precedence_values_for_item_set(const ParseItemSet &item_set) {
|
|
|
|
|
set<int> result;
|
2014-06-16 08:35:20 -07:00
|
|
|
for (const auto &pair : item_set) {
|
|
|
|
|
const ParseItem &item = pair.first;
|
2014-05-04 23:04:34 -07:00
|
|
|
if (item.consumed_symbol_count > 0)
|
|
|
|
|
result.insert(item.precedence());
|
2014-06-16 08:35:20 -07:00
|
|
|
}
|
2014-05-04 23:04:34 -07:00
|
|
|
return result;
|
2014-05-04 22:07:52 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public:
|
2014-06-25 13:27:16 -07:00
|
|
|
ParseTableBuilder(const SyntaxGrammar &grammar, const LexicalGrammar &lex_grammar) :
|
2014-05-04 22:07:52 -07:00
|
|
|
grammar(grammar),
|
|
|
|
|
conflict_manager(ParseConflictManager(grammar, lex_grammar)) {}
|
|
|
|
|
|
|
|
|
|
pair<ParseTable, vector<Conflict>> build() {
|
2014-06-16 08:35:20 -07:00
|
|
|
ParseItem start_item(rules::START(), make_shared<Symbol>(0), 0);
|
|
|
|
|
add_parse_state(item_set_closure(start_item, { rules::END_OF_INPUT() }, grammar));
|
2014-07-13 18:06:33 -07:00
|
|
|
|
2014-07-13 21:26:21 -07:00
|
|
|
parse_table.symbols.insert(rules::ERROR());
|
|
|
|
|
|
2014-07-13 18:06:33 -07:00
|
|
|
while (!item_sets_to_process.empty()) {
|
|
|
|
|
auto pair = item_sets_to_process.back();
|
|
|
|
|
ParseItemSet &item_set = pair.first;
|
|
|
|
|
ParseStateId &state_id = pair.second;
|
|
|
|
|
item_sets_to_process.pop_back();
|
|
|
|
|
|
|
|
|
|
add_reduce_actions(item_set, state_id);
|
|
|
|
|
add_shift_actions(item_set, state_id);
|
|
|
|
|
add_shift_extra_actions(state_id);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (ParseStateId state_id = 0; state_id < parse_table.states.size(); state_id++)
|
|
|
|
|
add_reduce_extra_actions(state_id);
|
|
|
|
|
|
2014-05-04 22:07:52 -07:00
|
|
|
return { parse_table, conflict_manager.conflicts() };
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
pair<ParseTable, vector<Conflict>>
|
2014-06-25 13:27:16 -07:00
|
|
|
build_parse_table(const SyntaxGrammar &grammar, const LexicalGrammar &lex_grammar) {
|
2014-05-04 22:07:52 -07:00
|
|
|
return ParseTableBuilder(grammar, lex_grammar).build();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|