#include "./perform.h" #include "prepared_grammar.h" #include "item.h" #include "item_set_closure.h" #include "item_set_transitions.h" #include "tree_sitter/compiler.h" namespace tree_sitter { using std::pair; using std::string; using std::map; using std::make_shared; using rules::Symbol; using rules::CharacterSet; namespace build_tables { static int NOT_FOUND = -2; static Symbol START("start", rules::SymbolTypeAuxiliary); static Symbol END_OF_INPUT("end", rules::SymbolTypeAuxiliary); class TableBuilder { const PreparedGrammar grammar; const PreparedGrammar lex_grammar; map parse_state_ids; map lex_state_ids; ParseTable parse_table; LexTable lex_table; long parse_state_id_for_item_set(const ParseItemSet &item_set) const { auto entry = parse_state_ids.find(item_set); return (entry == parse_state_ids.end()) ? NOT_FOUND : entry->second; } long lex_state_id_for_item_set(const LexItemSet &item_set) const { auto entry = lex_state_ids.find(item_set); return (entry == lex_state_ids.end()) ? NOT_FOUND : entry->second; } void add_shift_actions(const ParseItemSet &item_set, ParseStateId state_id) { for (auto transition : sym_transitions(item_set, grammar)) { Symbol symbol = transition.first; ParseItemSet item_set = transition.second; ParseStateId new_state_id = add_parse_state(item_set); parse_table.add_action(state_id, symbol, ParseAction::Shift(new_state_id)); } } void add_advance_actions(const LexItemSet &item_set, LexStateId state_id) { for (auto transition : char_transitions(item_set, grammar)) { CharacterSet rule = transition.first; LexItemSet item_set = transition.second; LexStateId new_state_id = add_lex_state(item_set); lex_table.add_action(state_id, rule, LexAction::Advance(new_state_id)); } } void add_accept_token_actions(const LexItemSet &item_set, LexStateId state_id) { for (LexItem item : item_set) { if (item.is_done()) { lex_table.add_default_action(state_id, LexAction::Accept(item.lhs)); } } } void add_reduce_actions(const ParseItemSet &item_set, ParseStateId state_id) { for (ParseItem item : item_set) { if (item.is_done()) { ParseAction action = (item.lhs == START) ? ParseAction::Accept() : ParseAction::Reduce(item.lhs, item.consumed_symbols); parse_table.add_action(state_id, item.lookahead_sym, action); } } } void assign_lex_state(ParseStateId state_id) { ParseState &state = parse_table.states[state_id]; LexItemSet item_set; for (auto &symbol : state.expected_inputs()) { if (lex_grammar.has_definition(symbol)) item_set.insert(LexItem(symbol, lex_grammar.rule(symbol))); } state.lex_state_id = add_lex_state(item_set); } LexStateId add_lex_state(const LexItemSet &item_set) { auto state_id = lex_state_id_for_item_set(item_set); if (state_id == NOT_FOUND) { state_id = lex_table.add_state(); lex_state_ids[item_set] = state_id; add_advance_actions(item_set, state_id); add_accept_token_actions(item_set, state_id); } return state_id; } ParseStateId add_parse_state(const ParseItemSet &item_set) { auto state_id = parse_state_id_for_item_set(item_set); if (state_id == NOT_FOUND) { state_id = parse_table.add_state(); parse_state_ids[item_set] = state_id; add_shift_actions(item_set, state_id); add_reduce_actions(item_set, state_id); assign_lex_state(state_id); } return state_id; } void add_error_lex_state() { LexItemSet error_item_set; for (auto &pair : lex_grammar.rules) error_item_set.insert(LexItem(pair.first, pair.second)); add_advance_actions(error_item_set, LexTable::ERROR_STATE_ID); add_accept_token_actions(error_item_set, LexTable::ERROR_STATE_ID); } // void dump_item_sets() { // std::vector item_sets(parse_state_ids.size()); // for (auto &pair : parse_state_ids) // item_sets[pair.second] = &pair.first; // // for (int i = 0; i < item_sets.size(); i++) { // std:cout << "\n\n" << i; // for (auto &item : *item_sets[i]) { // cout << "\n" << item.lhs; // cout << "\n " << item.rule; // cout << "\n " << item.lookahead_sym.name; // } // } // } public: TableBuilder(const PreparedGrammar &grammar, const PreparedGrammar &lex_grammar) : grammar(grammar), lex_grammar(lex_grammar) {}; pair build() { auto item = ParseItem(START, make_shared(grammar.start_rule_name), {}, END_OF_INPUT); ParseItemSet item_set = item_set_closure(ParseItemSet({ item }), grammar); add_parse_state(item_set); add_error_lex_state(); return pair(parse_table, lex_table); } }; pair perform(const PreparedGrammar &grammar, const PreparedGrammar &lex_grammar) { return TableBuilder(grammar, lex_grammar).build(); } } }