2014-01-11 15:14:17 -08:00
|
|
|
#include "./perform.h"
|
2014-01-13 18:47:57 -08:00
|
|
|
#include "item.h"
|
2014-01-12 09:13:53 -08:00
|
|
|
#include "item_set_closure.h"
|
2014-01-19 01:49:56 -08:00
|
|
|
#include "first_set.h"
|
2014-01-11 16:48:40 -08:00
|
|
|
#include "item_set_transitions.h"
|
2013-12-27 17:31:08 -08:00
|
|
|
#include "rules.h"
|
|
|
|
|
#include "grammar.h"
|
|
|
|
|
|
2014-01-04 15:30:05 -08:00
|
|
|
using std::pair;
|
2014-01-25 20:47:08 -08:00
|
|
|
using std::string;
|
2014-01-04 15:30:05 -08:00
|
|
|
using std::vector;
|
2014-01-11 16:48:40 -08:00
|
|
|
using std::unordered_map;
|
2013-12-27 17:31:08 -08:00
|
|
|
|
|
|
|
|
namespace tree_sitter {
|
2014-01-11 15:14:17 -08:00
|
|
|
namespace build_tables {
|
2013-12-27 17:31:08 -08:00
|
|
|
static int NOT_FOUND = -1;
|
2014-02-10 18:53:01 -08:00
|
|
|
static rules::Symbol START("start", true);
|
|
|
|
|
static rules::Symbol END_OF_INPUT("end", true);
|
2013-12-27 17:31:08 -08:00
|
|
|
|
2014-01-03 12:45:47 -08:00
|
|
|
class TableBuilder {
|
2013-12-27 17:31:08 -08:00
|
|
|
const Grammar grammar;
|
2014-01-03 22:42:05 -08:00
|
|
|
const Grammar lex_grammar;
|
2014-01-18 09:47:26 -08:00
|
|
|
unordered_map<const ParseItemSet, size_t> parse_state_indices;
|
|
|
|
|
unordered_map<const LexItemSet, size_t> lex_state_indices;
|
2013-12-27 17:31:08 -08:00
|
|
|
ParseTable parse_table;
|
|
|
|
|
LexTable lex_table;
|
|
|
|
|
|
2014-01-18 09:47:26 -08:00
|
|
|
long parse_state_index_for_item_set(const ParseItemSet &item_set) const {
|
2013-12-27 17:31:08 -08:00
|
|
|
auto entry = parse_state_indices.find(item_set);
|
|
|
|
|
return (entry == parse_state_indices.end()) ? NOT_FOUND : entry->second;
|
|
|
|
|
}
|
|
|
|
|
|
2014-01-18 09:47:26 -08:00
|
|
|
long lex_state_index_for_item_set(const LexItemSet &item_set) const {
|
2013-12-27 17:31:08 -08:00
|
|
|
auto entry = lex_state_indices.find(item_set);
|
|
|
|
|
return (entry == lex_state_indices.end()) ? NOT_FOUND : entry->second;
|
|
|
|
|
}
|
|
|
|
|
|
2014-01-18 09:47:26 -08:00
|
|
|
void add_shift_actions(const ParseItemSet &item_set, size_t state_index) {
|
2014-01-25 23:40:51 -08:00
|
|
|
for (auto transition : sym_transitions(item_set, grammar)) {
|
2013-12-27 17:31:08 -08:00
|
|
|
rules::Symbol symbol = *transition.first;
|
2014-01-18 09:47:26 -08:00
|
|
|
ParseItemSet item_set = *transition.second;
|
2013-12-27 17:31:08 -08:00
|
|
|
size_t new_state_index = add_parse_state(item_set);
|
2014-01-27 12:40:06 -08:00
|
|
|
parse_table.add_action(state_index, symbol, ParseAction::Shift(new_state_index));
|
2013-12-27 17:31:08 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2014-01-18 09:47:26 -08:00
|
|
|
void add_advance_actions(const LexItemSet &item_set, size_t state_index) {
|
2014-01-11 16:48:40 -08:00
|
|
|
for (auto transition : char_transitions(item_set, grammar)) {
|
2014-02-03 13:05:51 -08:00
|
|
|
rules::CharacterSet rule = *transition.first;
|
2014-01-18 09:47:26 -08:00
|
|
|
LexItemSet item_set = *transition.second;
|
2013-12-27 17:31:08 -08:00
|
|
|
size_t new_state_index = add_lex_state(item_set);
|
2014-01-29 19:18:21 -08:00
|
|
|
lex_table.add_action(state_index, rule, LexAction::Advance(new_state_index));
|
2013-12-27 17:31:08 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2014-01-18 09:47:26 -08:00
|
|
|
void add_accept_token_actions(const LexItemSet &item_set, size_t state_index) {
|
|
|
|
|
for (LexItem item : item_set) {
|
2013-12-27 17:31:08 -08:00
|
|
|
if (item.is_done()) {
|
2014-01-27 12:40:06 -08:00
|
|
|
lex_table.add_default_action(state_index, LexAction::Accept(item.lhs));
|
2013-12-27 17:31:08 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2014-01-28 22:09:37 -08:00
|
|
|
|
2014-01-18 09:47:26 -08:00
|
|
|
void add_reduce_actions(const ParseItemSet &item_set, size_t state_index) {
|
|
|
|
|
for (ParseItem item : item_set) {
|
2013-12-27 17:31:08 -08:00
|
|
|
if (item.is_done()) {
|
2014-02-10 18:53:01 -08:00
|
|
|
ParseAction action = (item.lhs == START) ?
|
2014-01-21 23:38:23 -08:00
|
|
|
ParseAction::Accept() :
|
2014-01-31 00:13:05 -08:00
|
|
|
ParseAction::Reduce(item.lhs, item.consumed_symbols);
|
2014-01-27 12:40:06 -08:00
|
|
|
parse_table.add_action(state_index, item.lookahead_sym, action);
|
2013-12-27 17:31:08 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2014-01-21 23:38:23 -08:00
|
|
|
void assign_lex_state(size_t state_index) {
|
|
|
|
|
ParseState &state = parse_table.states[state_index];
|
|
|
|
|
LexItemSet item_set;
|
2014-01-27 12:40:06 -08:00
|
|
|
for (auto &symbol : state.expected_inputs()) {
|
2014-02-10 18:53:01 -08:00
|
|
|
if (symbol == END_OF_INPUT)
|
2014-01-27 12:40:06 -08:00
|
|
|
item_set.insert(LexItem(symbol, rules::character('\0')));
|
2014-01-21 23:38:23 -08:00
|
|
|
if (lex_grammar.has_definition(symbol))
|
2014-01-27 12:40:06 -08:00
|
|
|
item_set.insert(LexItem(symbol, lex_grammar.rule(symbol)));
|
2014-01-21 23:38:23 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
state.lex_state_index = add_lex_state(item_set);
|
|
|
|
|
}
|
|
|
|
|
|
2014-01-18 09:47:26 -08:00
|
|
|
size_t add_lex_state(const LexItemSet &item_set) {
|
2013-12-27 17:31:08 -08:00
|
|
|
auto state_index = lex_state_index_for_item_set(item_set);
|
|
|
|
|
if (state_index == NOT_FOUND) {
|
|
|
|
|
state_index = lex_table.add_state();
|
|
|
|
|
lex_state_indices[item_set] = state_index;
|
|
|
|
|
add_advance_actions(item_set, state_index);
|
|
|
|
|
add_accept_token_actions(item_set, state_index);
|
|
|
|
|
}
|
|
|
|
|
return state_index;
|
|
|
|
|
}
|
|
|
|
|
|
2014-01-18 09:47:26 -08:00
|
|
|
size_t add_parse_state(const ParseItemSet &item_set) {
|
2013-12-27 17:31:08 -08:00
|
|
|
auto state_index = parse_state_index_for_item_set(item_set);
|
|
|
|
|
if (state_index == NOT_FOUND) {
|
|
|
|
|
state_index = parse_table.add_state();
|
|
|
|
|
parse_state_indices[item_set] = state_index;
|
|
|
|
|
|
|
|
|
|
add_shift_actions(item_set, state_index);
|
|
|
|
|
add_reduce_actions(item_set, state_index);
|
2014-01-21 23:38:23 -08:00
|
|
|
assign_lex_state(state_index);
|
2013-12-27 17:31:08 -08:00
|
|
|
}
|
|
|
|
|
return state_index;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public:
|
|
|
|
|
|
2014-01-03 22:42:05 -08:00
|
|
|
TableBuilder(const Grammar &grammar, const Grammar &lex_grammar) :
|
2013-12-27 17:31:08 -08:00
|
|
|
grammar(grammar),
|
2014-01-03 22:42:05 -08:00
|
|
|
lex_grammar(lex_grammar) {};
|
2013-12-27 17:31:08 -08:00
|
|
|
|
2014-01-04 15:30:05 -08:00
|
|
|
pair<ParseTable, LexTable> build() {
|
2014-01-28 22:09:37 -08:00
|
|
|
auto item = ParseItem(START, rules::sym(grammar.start_rule_name), {}, END_OF_INPUT);
|
2014-01-18 09:47:26 -08:00
|
|
|
ParseItemSet item_set = item_set_closure(ParseItemSet({ item }), grammar);
|
2013-12-27 17:31:08 -08:00
|
|
|
add_parse_state(item_set);
|
2014-01-04 15:30:05 -08:00
|
|
|
return pair<ParseTable, LexTable>(parse_table, lex_table);
|
2013-12-27 17:31:08 -08:00
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
2014-01-11 15:14:17 -08:00
|
|
|
pair<ParseTable, LexTable> perform(const Grammar &grammar, const Grammar &lex_grammar) {
|
2014-01-03 22:42:05 -08:00
|
|
|
return TableBuilder(grammar, lex_grammar).build();
|
2013-12-27 17:31:08 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|