tree-sitter/src/compiler/build_tables/perform.cpp

148 lines
6.3 KiB
C++
Raw Normal View History

2014-01-11 15:14:17 -08:00
#include "./perform.h"
2014-01-13 18:47:57 -08:00
#include "item.h"
#include "item_set_closure.h"
#include "item_set_transitions.h"
#include "tree_sitter/compiler.h"
2013-12-27 17:31:08 -08:00
2014-02-15 15:43:32 -08:00
#include "stream_methods.h"
2013-12-27 17:31:08 -08:00
namespace tree_sitter {
2014-02-12 22:56:44 -08:00
using std::pair;
using std::string;
using std::map;
using std::make_shared;
2014-02-12 23:06:26 -08:00
using rules::Symbol;
using rules::CharacterSet;
2014-02-12 22:56:44 -08:00
2014-01-11 15:14:17 -08:00
namespace build_tables {
2013-12-27 17:31:08 -08:00
static int NOT_FOUND = -1;
2014-02-12 23:06:26 -08:00
static Symbol START("start", true);
static Symbol END_OF_INPUT("end", true);
2013-12-27 17:31:08 -08:00
class TableBuilder {
2013-12-27 17:31:08 -08:00
const Grammar grammar;
const Grammar lex_grammar;
2014-02-10 21:09:43 -08:00
map<const ParseItemSet, size_t> parse_state_indices;
map<const LexItemSet, size_t> lex_state_indices;
2013-12-27 17:31:08 -08:00
ParseTable parse_table;
LexTable lex_table;
long parse_state_index_for_item_set(const ParseItemSet &item_set) const {
2013-12-27 17:31:08 -08:00
auto entry = parse_state_indices.find(item_set);
return (entry == parse_state_indices.end()) ? NOT_FOUND : entry->second;
}
long lex_state_index_for_item_set(const LexItemSet &item_set) const {
2013-12-27 17:31:08 -08:00
auto entry = lex_state_indices.find(item_set);
return (entry == lex_state_indices.end()) ? NOT_FOUND : entry->second;
}
void add_shift_actions(const ParseItemSet &item_set, size_t state_index) {
2014-01-25 23:40:51 -08:00
for (auto transition : sym_transitions(item_set, grammar)) {
2014-02-12 23:06:26 -08:00
Symbol symbol = transition.first;
2014-02-11 13:15:44 -08:00
ParseItemSet item_set = transition.second;
2013-12-27 17:31:08 -08:00
size_t new_state_index = add_parse_state(item_set);
parse_table.add_action(state_index, symbol, ParseAction::Shift(new_state_index));
2013-12-27 17:31:08 -08:00
}
}
void add_advance_actions(const LexItemSet &item_set, size_t state_index) {
for (auto transition : char_transitions(item_set, grammar)) {
2014-02-12 23:06:26 -08:00
CharacterSet rule = transition.first;
2014-02-11 13:15:44 -08:00
LexItemSet item_set = transition.second;
2013-12-27 17:31:08 -08:00
size_t new_state_index = add_lex_state(item_set);
lex_table.add_action(state_index, rule, LexAction::Advance(new_state_index));
2013-12-27 17:31:08 -08:00
}
}
void add_accept_token_actions(const LexItemSet &item_set, size_t state_index) {
for (LexItem item : item_set) {
2013-12-27 17:31:08 -08:00
if (item.is_done()) {
lex_table.add_default_action(state_index, LexAction::Accept(item.lhs));
2013-12-27 17:31:08 -08:00
}
}
}
void add_reduce_actions(const ParseItemSet &item_set, size_t state_index) {
for (ParseItem item : item_set) {
2013-12-27 17:31:08 -08:00
if (item.is_done()) {
ParseAction action = (item.lhs == START) ?
ParseAction::Accept() :
ParseAction::Reduce(item.lhs, item.consumed_symbols);
parse_table.add_action(state_index, item.lookahead_sym, action);
2013-12-27 17:31:08 -08:00
}
}
}
void assign_lex_state(size_t state_index) {
ParseState &state = parse_table.states[state_index];
LexItemSet item_set;
for (auto &symbol : state.expected_inputs()) {
if (symbol == END_OF_INPUT)
item_set.insert(LexItem(symbol, make_shared<CharacterSet>(std::set<rules::CharacterRange>{ '\0' })));
if (lex_grammar.has_definition(symbol))
item_set.insert(LexItem(symbol, lex_grammar.rule(symbol)));
}
state.lex_state_index = add_lex_state(item_set);
}
size_t add_lex_state(const LexItemSet &item_set) {
2013-12-27 17:31:08 -08:00
auto state_index = lex_state_index_for_item_set(item_set);
if (state_index == NOT_FOUND) {
state_index = lex_table.add_state();
lex_state_indices[item_set] = state_index;
add_advance_actions(item_set, state_index);
add_accept_token_actions(item_set, state_index);
}
return state_index;
}
size_t add_parse_state(const ParseItemSet &item_set) {
2013-12-27 17:31:08 -08:00
auto state_index = parse_state_index_for_item_set(item_set);
if (state_index == NOT_FOUND) {
state_index = parse_table.add_state();
parse_state_indices[item_set] = state_index;
add_shift_actions(item_set, state_index);
add_reduce_actions(item_set, state_index);
assign_lex_state(state_index);
2013-12-27 17:31:08 -08:00
}
return state_index;
}
2014-02-15 15:43:32 -08:00
// TODO - remove
void dump_item_sets() {
std::vector<const ParseItemSet *> item_sets(parse_state_indices.size());
for (auto &pair : parse_state_indices)
item_sets[pair.second] = &pair.first;
for (int i = 0; i < item_sets.size(); i++) {
std:cout << "\n\n" << i;
for (auto &item : *item_sets[i]) {
cout << "\n" << item.lhs;
cout << "\n " << item.rule;
cout << "\n " << item.lookahead_sym.name;
}
}
}
2013-12-27 17:31:08 -08:00
public:
TableBuilder(const Grammar &grammar, const Grammar &lex_grammar) :
2013-12-27 17:31:08 -08:00
grammar(grammar),
lex_grammar(lex_grammar) {};
2013-12-27 17:31:08 -08:00
2014-01-04 15:30:05 -08:00
pair<ParseTable, LexTable> build() {
auto item = ParseItem(START, make_shared<Symbol>(grammar.start_rule_name), {}, END_OF_INPUT);
ParseItemSet item_set = item_set_closure(ParseItemSet({ item }), grammar);
2013-12-27 17:31:08 -08:00
add_parse_state(item_set);
2014-01-04 15:30:05 -08:00
return pair<ParseTable, LexTable>(parse_table, lex_table);
2013-12-27 17:31:08 -08:00
}
};
2014-01-11 15:14:17 -08:00
pair<ParseTable, LexTable> perform(const Grammar &grammar, const Grammar &lex_grammar) {
return TableBuilder(grammar, lex_grammar).build();
2013-12-27 17:31:08 -08:00
}
}
}