diff --git a/src/compiler/build_tables/build_lex_table.cc b/src/compiler/build_tables/build_lex_table.cc index 97803a9b..23f41438 100644 --- a/src/compiler/build_tables/build_lex_table.cc +++ b/src/compiler/build_tables/build_lex_table.cc @@ -13,12 +13,12 @@ #include "compiler/build_tables/lex_conflict_manager.h" #include "compiler/build_tables/lex_item.h" #include "compiler/build_tables/item_set_transitions.h" -#include "compiler/build_tables/first_set.h" namespace tree_sitter { using std::string; using std::map; using std::unordered_map; + using std::set; using std::make_shared; using rules::Symbol; using rules::CharacterSet; @@ -30,10 +30,43 @@ namespace tree_sitter { LexConflictManager conflict_manager; unordered_map lex_state_ids; LexTable lex_table; + + LexItemSet build_lex_item_set(const set &symbols) { + LexItemSet result; + for (const auto &symbol : symbols) { + if (symbol == rules::ERROR()) + continue; + else if (symbol == rules::END_OF_INPUT()) + result.insert(LexItem(symbol, after_separators(CharacterSet({ 0 }).copy()))); + else if (symbol.is_token()) + result.insert(LexItem(symbol, after_separators(lex_grammar.rule(symbol)))); + } + return result; + } + + LexStateId add_lex_state(const LexItemSet &item_set) { + auto pair = lex_state_ids.find(item_set); + if (pair == lex_state_ids.end()) { + LexStateId state_id = lex_table.add_state(); + lex_state_ids[item_set] = state_id; + add_advance_actions(item_set, state_id); + add_accept_token_actions(item_set, state_id); + add_token_start(item_set, state_id); + return state_id; + } else { + return pair->second; + } + } + + void add_error_lex_state() { + LexItemSet item_set = build_lex_item_set(parse_table->symbols); + add_advance_actions(item_set, LexTable::ERROR_STATE_ID); + add_accept_token_actions(item_set, LexTable::ERROR_STATE_ID); + } void add_advance_actions(const LexItemSet &item_set, LexStateId state_id) { auto transitions = char_transitions(item_set, lex_grammar); - for (auto transition : transitions) { + for (const auto &transition : transitions) { CharacterSet rule = transition.first; LexItemSet new_item_set = transition.second; LexStateId new_state_id = add_lex_state(new_item_set); @@ -41,14 +74,8 @@ namespace tree_sitter { } } - void add_token_start(const LexItemSet &item_set, LexStateId state_id) { - for (auto &item : item_set) - if (item.is_token_start()) - lex_table.state(state_id).is_token_start = true; - } - void add_accept_token_actions(const LexItemSet &item_set, LexStateId state_id) { - for (LexItem item : item_set) { + for (const LexItem &item : item_set) { if (item.is_done()) { auto current_action = lex_table.state(state_id).default_action; auto new_action = LexAction::Accept(item.lhs, item.precedence()); @@ -57,6 +84,12 @@ namespace tree_sitter { } } } + + void add_token_start(const LexItemSet &item_set, LexStateId state_id) { + for (const auto &item : item_set) + if (item.is_token_start()) + lex_table.state(state_id).is_token_start = true; + } rules::rule_ptr after_separators(rules::rule_ptr rule) { return rules::Seq::Build({ @@ -68,50 +101,6 @@ namespace tree_sitter { }); } - LexItemSet lex_item_set_for_parse_state(const ParseState &state) { - LexItemSet result; - for (auto &symbol : state.expected_inputs()) { - if (symbol.is_token() && !symbol.is_built_in()) - result.insert(LexItem(symbol, after_separators(lex_grammar.rule(symbol)))); - if (symbol == rules::END_OF_INPUT()) - result.insert(LexItem(symbol, after_separators(CharacterSet({ 0 }).copy()))); - } - return result; - } - - void assign_lex_state(ParseState *state) { - state->lex_state_id = add_lex_state(lex_item_set_for_parse_state(*state)); - } - - LexStateId add_lex_state(const LexItemSet &item_set) { - auto pair = lex_state_ids.find(item_set); - if (pair == lex_state_ids.end()) { - LexStateId state_id = lex_table.add_state(); - lex_state_ids[item_set] = state_id; - add_token_start(item_set, state_id); - add_advance_actions(item_set, state_id); - add_accept_token_actions(item_set, state_id); - return state_id; - } else { - return pair->second; - } - } - - void add_error_lex_state() { - LexItemSet error_item_set; - for (size_t i = 0; i < lex_grammar.rules.size(); i++) { - LexItem item(Symbol(i, rules::SymbolOptionToken), after_separators(lex_grammar.rules[i].second)); - error_item_set.insert(item); - } - for (size_t i = 0; i < lex_grammar.aux_rules.size(); i++) { - LexItem item(Symbol(i, rules::SymbolOption(rules::SymbolOptionToken|rules::SymbolOptionAuxiliary)), after_separators(lex_grammar.aux_rules[i].second)); - error_item_set.insert(item); - } - error_item_set.insert(LexItem(rules::END_OF_INPUT(), after_separators(CharacterSet({ 0 }).copy()))); - add_advance_actions(error_item_set, LexTable::ERROR_STATE_ID); - add_accept_token_actions(error_item_set, LexTable::ERROR_STATE_ID); - } - public: LexTableBuilder(ParseTable *parse_table, const PreparedGrammar &lex_grammar) : lex_grammar(lex_grammar), @@ -119,8 +108,10 @@ namespace tree_sitter { conflict_manager(LexConflictManager(lex_grammar)) {} LexTable build() { - for (auto &parse_state : parse_table->states) - assign_lex_state(&parse_state); + for (auto &parse_state : parse_table->states) { + LexItemSet item_set = build_lex_item_set(parse_state.expected_inputs()); + parse_state.lex_state_id = add_lex_state(item_set); + } add_error_lex_state(); return lex_table; } diff --git a/src/compiler/build_tables/build_lex_table.h b/src/compiler/build_tables/build_lex_table.h index 5e9e0b37..c6a28fb3 100644 --- a/src/compiler/build_tables/build_lex_table.h +++ b/src/compiler/build_tables/build_lex_table.h @@ -9,7 +9,8 @@ namespace tree_sitter { class ParseTable; namespace build_tables { - LexTable build_lex_table(ParseTable *parse_table, const PreparedGrammar &lex_grammar); + LexTable + build_lex_table(ParseTable *parse_table, const PreparedGrammar &lex_grammar); } } diff --git a/src/compiler/build_tables/build_parse_table.cc b/src/compiler/build_tables/build_parse_table.cc index 7a38f639..b04a8271 100644 --- a/src/compiler/build_tables/build_parse_table.cc +++ b/src/compiler/build_tables/build_parse_table.cc @@ -11,7 +11,6 @@ #include "compiler/build_tables/parse_item.h" #include "compiler/build_tables/item_set_closure.h" #include "compiler/build_tables/item_set_transitions.h" -#include "compiler/build_tables/first_set.h" namespace tree_sitter { using std::pair; @@ -29,49 +28,7 @@ namespace tree_sitter { unordered_map parse_state_ids; SymTransitions sym_transitions; ParseTable parse_table; - - set precedence_values_for_item_set(const ParseItemSet &item_set) { - set result; - for (const auto &item : item_set) - if (item.consumed_symbol_count > 0) - result.insert(item.precedence()); - return result; - } - - void add_shift_actions(const ParseItemSet &item_set, ParseStateId state_id) { - for (auto &transition : sym_transitions(item_set, grammar)) { - const Symbol &symbol = transition.first; - const ParseItemSet &item_set = transition.second; - set precedence_values = precedence_values_for_item_set(item_set); - - auto current_actions = parse_table.states[state_id].actions; - auto current_action = current_actions.find(symbol); - - if (current_action == current_actions.end() || - conflict_manager.resolve_parse_action(symbol, current_action->second, ParseAction::Shift(0, precedence_values))) { - ParseStateId new_state_id = add_parse_state(item_set); - parse_table.add_action(state_id, symbol, ParseAction::Shift(new_state_id, precedence_values)); - } - } - } - - void add_reduce_actions(const ParseItemSet &item_set, ParseStateId state_id) { - for (ParseItem item : item_set) { - if (item.is_done()) { - ParseAction action = (item.lhs == rules::START()) ? - ParseAction::Accept() : - ParseAction::Reduce(item.lhs, item.consumed_symbol_count, item.precedence()); - auto current_actions = parse_table.states[state_id].actions; - auto current_action = current_actions.find(item.lookahead_sym); - - if (current_action == current_actions.end() || - conflict_manager.resolve_parse_action(item.lookahead_sym, current_action->second, action)) { - parse_table.add_action(state_id, item.lookahead_sym, action); - } - } - } - } - + ParseStateId add_parse_state(const ParseItemSet &item_set) { auto pair = parse_state_ids.find(item_set); if (pair == parse_state_ids.end()) { @@ -85,6 +42,47 @@ namespace tree_sitter { } } + void add_shift_actions(const ParseItemSet &item_set, ParseStateId state_id) { + for (const auto &transition : sym_transitions(item_set, grammar)) { + const Symbol &symbol = transition.first; + const ParseItemSet &item_set = transition.second; + auto current_actions = parse_table.states[state_id].actions; + auto current_action = current_actions.find(symbol); + + set precedence_values = precedence_values_for_item_set(item_set); + if (current_action == current_actions.end() || + conflict_manager.resolve_parse_action(symbol, current_action->second, ParseAction::Shift(0, precedence_values))) { + ParseStateId new_state_id = add_parse_state(item_set); + parse_table.add_action(state_id, symbol, ParseAction::Shift(new_state_id, precedence_values)); + } + } + } + + void add_reduce_actions(const ParseItemSet &item_set, ParseStateId state_id) { + for (const ParseItem &item : item_set) { + if (item.is_done()) { + ParseAction action = (item.lhs == rules::START()) ? + ParseAction::Accept() : + ParseAction::Reduce(item.lhs, item.consumed_symbol_count, item.precedence()); + auto current_actions = parse_table.states[state_id].actions; + auto current_action = current_actions.find(item.lookahead_sym); + + if (current_action == current_actions.end() || + conflict_manager.resolve_parse_action(item.lookahead_sym, current_action->second, action)) { + parse_table.add_action(state_id, item.lookahead_sym, action); + } + } + } + } + + set precedence_values_for_item_set(const ParseItemSet &item_set) { + set result; + for (const auto &item : item_set) + if (item.consumed_symbol_count > 0) + result.insert(item.precedence()); + return result; + } + public: ParseTableBuilder(const PreparedGrammar &grammar, const PreparedGrammar &lex_grammar) : grammar(grammar), diff --git a/src/compiler/build_tables/build_tables.cc b/src/compiler/build_tables/build_tables.cc index 3f285248..43b74b69 100644 --- a/src/compiler/build_tables/build_tables.cc +++ b/src/compiler/build_tables/build_tables.cc @@ -14,7 +14,7 @@ namespace tree_sitter { auto parse_table_result = build_parse_table(grammar, lex_grammar); ParseTable parse_table = parse_table_result.first; vector conflicts = parse_table_result.second; - auto lex_table = build_lex_table(&parse_table, lex_grammar); + LexTable lex_table = build_lex_table(&parse_table, lex_grammar); return make_tuple(parse_table, lex_table, conflicts); } } diff --git a/src/compiler/build_tables/lex_item.cc b/src/compiler/build_tables/lex_item.cc index ed7c2953..b82dac7e 100644 --- a/src/compiler/build_tables/lex_item.cc +++ b/src/compiler/build_tables/lex_item.cc @@ -21,15 +21,16 @@ namespace tree_sitter { bool LexItem::is_token_start() const { class IsTokenStart : public rules::RuleFn { bool apply_to(const rules::Seq *rule) { - bool result = apply(rule->left); - if (!result && rule_can_be_blank(rule->left)) - result = apply(rule->right); - return result; + if (apply(rule->left)) + return true; + else if (rule_can_be_blank(rule->left)) + return apply(rule->right); + else + return false; } bool apply_to(const rules::Metadata *rule) { - auto pair = rule->value.find(rules::START_TOKEN); - return (pair != rule->value.end()) && pair->second; + return rule->value_for(rules::START_TOKEN); } };