#include #include #include #include #include #include #include "compiler/parse_table.h" #include "compiler/build_tables/item_set_closure.h" #include "compiler/build_tables/item_set_transitions.h" #include "compiler/build_tables/parse_conflict_manager.h" #include "compiler/build_tables/parse_item.h" #include "compiler/build_tables/get_completion_status.h" #include "compiler/build_tables/get_metadata.h" #include "compiler/prepared_grammar.h" #include "compiler/rules/symbol.h" #include "compiler/rules/built_in_symbols.h" namespace tree_sitter { namespace build_tables { using std::find; using std::pair; using std::vector; using std::set; using std::map; using std::string; using std::to_string; using std::unordered_map; using std::make_shared; using rules::Symbol; class ParseTableBuilder { const SyntaxGrammar grammar; const LexicalGrammar lexical_grammar; ParseConflictManager conflict_manager; unordered_map parse_state_ids; vector> productions; vector> item_sets_to_process; ParseTable parse_table; std::set conflicts; public: ParseTableBuilder(const SyntaxGrammar &grammar, const LexicalGrammar &lex_grammar) : grammar(grammar), lexical_grammar(lex_grammar), conflict_manager(grammar) {} pair build() { auto start_symbol = grammar.rules.empty() ? make_shared(0, true) : make_shared(0); ParseItem start_item(rules::START(), start_symbol, {}); add_parse_state( item_set_closure(start_item, { rules::END_OF_INPUT() }, grammar)); while (!item_sets_to_process.empty()) { auto pair = item_sets_to_process.back(); ParseItemSet &item_set = pair.first; ParseStateId &state_id = pair.second; item_sets_to_process.pop_back(); add_reduce_actions(item_set, state_id); add_shift_actions(item_set, state_id); add_shift_extra_actions(item_set, state_id); if (!conflicts.empty()) return { parse_table, new GrammarError(GrammarErrorTypeParseConflict, "Unresolved conflict.\n\n" + *conflicts.begin()) }; } for (ParseStateId state = 0; state < parse_table.states.size(); state++) add_reduce_extra_actions(state); parse_table.symbols.insert(rules::ERROR()); return { parse_table, nullptr }; } private: ParseStateId add_parse_state(const ParseItemSet &item_set) { auto pair = parse_state_ids.find(item_set); if (pair == parse_state_ids.end()) { ParseStateId state_id = parse_table.add_state(); parse_state_ids[item_set] = state_id; item_sets_to_process.push_back({ item_set, state_id }); return state_id; } else { return pair->second; } } void add_shift_actions(const ParseItemSet &item_set, ParseStateId state_id) { for (const auto &transition : sym_transitions(item_set, grammar)) { const Symbol &symbol = transition.first; const ParseItemSet &next_item_set = transition.second; ParseAction *new_action = add_action( state_id, symbol, ParseAction::Shift(0, precedence_values_for_item_set(next_item_set)), item_set); if (new_action) new_action->state_index = add_parse_state(next_item_set); } } void add_reduce_actions(const ParseItemSet &item_set, ParseStateId state_id) { for (const auto &pair : item_set) { const ParseItem &item = pair.first; const set &lookahead_symbols = pair.second; CompletionStatus completion_status = get_completion_status(item.rule); if (completion_status.is_done) { ParseAction action = (item.lhs == rules::START()) ? ParseAction::Accept() : ParseAction::Reduce(item.lhs, item.consumed_symbols.size(), completion_status.precedence, completion_status.associativity, get_production_id(item.consumed_symbols)); for (const auto &lookahead_sym : lookahead_symbols) add_action(state_id, lookahead_sym, action, item_set); } } } void add_shift_extra_actions(const ParseItemSet &item_set, ParseStateId state_id) { for (const Symbol &ubiquitous_symbol : grammar.ubiquitous_tokens) add_action(state_id, ubiquitous_symbol, ParseAction::ShiftExtra(), item_set); } void add_reduce_extra_actions(ParseStateId state_id) { const ParseItemSet item_set; const map> &actions = parse_table.states[state_id].actions; for (const Symbol &ubiquitous_symbol : grammar.ubiquitous_tokens) { const auto &entry = actions.find(ubiquitous_symbol); if (entry == actions.end()) continue; for (const auto &action : entry->second) { if (action.type == ParseActionTypeShift) { size_t shift_state_id = action.state_index; for (const auto &pair : actions) { const Symbol &lookahead_sym = pair.first; ParseAction reduce_extra = ParseAction::ReduceExtra(ubiquitous_symbol); add_action(shift_state_id, lookahead_sym, reduce_extra, item_set); } } } } } ParseAction *add_action(ParseStateId state_id, Symbol lookahead_sym, const ParseAction &action, const ParseItemSet &item_set) { auto ¤t_actions = parse_table.states[state_id].actions; auto current_entry = current_actions.find(lookahead_sym); if (current_entry == current_actions.end()) return &parse_table.set_action(state_id, lookahead_sym, action); const ParseAction current_action = current_entry->second[0]; auto resolution = conflict_manager.resolve(action, current_action, lookahead_sym); switch (resolution.second) { case ConflictTypeNone: if (resolution.first) return &parse_table.set_action(state_id, lookahead_sym, action); break; case ConflictTypeResolved: if (action.type == ParseActionTypeReduce) parse_table.fragile_production_ids.insert(action.production_id); if (current_action.type == ParseActionTypeReduce) parse_table.fragile_production_ids.insert(current_action.production_id); if (resolution.first) return &parse_table.set_action(state_id, lookahead_sym, action); break; case ConflictTypeUnresolved: { set goal_symbols = item_set_goal_symbols(item_set); if (has_expected_conflict(goal_symbols)) return &parse_table.add_action(state_id, lookahead_sym, action); else conflicts.insert(conflict_description(action, current_action, lookahead_sym, goal_symbols)); break; } } return nullptr; } bool has_expected_conflict(const set &symbols) { for (const auto &conflicting_symbols : grammar.expected_conflicts) if (symbols == conflicting_symbols) return true; return false; } set precedence_values_for_item_set(const ParseItemSet &item_set) { set result; for (const auto &pair : item_set) { const ParseItem &item = pair.first; if (!item.consumed_symbols.empty()) { auto precedence_range = get_metadata(item.rule, rules::PRECEDENCE); result.insert(precedence_range.min); result.insert(precedence_range.max); } } return result; } set item_set_goal_symbols(const ParseItemSet &item_set) { set result; for (const auto &pair : item_set) { const ParseItem &item = pair.first; if (!item.consumed_symbols.empty()) result.insert(item.lhs); } return result; } string conflict_description(const ParseAction &new_action, const ParseAction &old_action, const rules::Symbol &symbol, const set &goal_symbols) const { string symbols_string; bool started = false; for (const auto &symbol : goal_symbols) { if (started) symbols_string += ", "; symbols_string += symbol_name(symbol); started = true; } return "Within: " + symbols_string + "\n" "Lookahead: " + symbol_name(symbol) + "\n" + "Possible Actions:\n" "* " + action_description(old_action) + "\n" + "* " + action_description(new_action); } string symbol_name(const rules::Symbol &symbol) const { if (symbol.is_built_in()) { if (symbol == rules::ERROR()) return "ERROR"; else if (symbol == rules::END_OF_INPUT()) return "END_OF_INPUT"; else return ""; } else if (symbol.is_token) return lexical_grammar.rules[symbol.index].name; else return grammar.rules[symbol.index].name; } string action_description(const ParseAction &action) const { string result; switch (action.type) { case ParseActionTypeReduce: { result = "Reduce"; for (const rules::Symbol &symbol : productions[action.production_id]) result += " " + symbol_name(symbol); result += " -> " + symbol_name(action.symbol); break; } case ParseActionTypeShift: { result = "Shift"; break; } default: return ""; } if (action.precedence_values.size() > 1) { result += " (Precedences " + to_string(*action.precedence_values.begin()) + ", " + to_string(*action.precedence_values.rbegin()) + ")"; } else { result += " (Precedence " + to_string(*action.precedence_values.begin()) + ")"; } return result; } size_t get_production_id(const vector &symbols) { auto begin = productions.begin(); auto end = productions.end(); auto iter = find(begin, end, symbols); if (iter == end) { productions.push_back(symbols); return productions.size() - 1; } return iter - begin; } }; pair build_parse_table( const SyntaxGrammar &grammar, const LexicalGrammar &lex_grammar) { return ParseTableBuilder(grammar, lex_grammar).build(); } } // namespace build_tables } // namespace tree_sitter