From b4f2407a49c21d9ef255c09ed70645cce67d9813 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 12 Feb 2016 23:44:05 -0800 Subject: [PATCH] Add forward move states for each terminal symbol --- .../build_tables/build_parse_table.cc | 79 ++++++++++++++----- src/compiler/build_tables/item_set_closure.cc | 10 +-- src/compiler/build_tables/parse_item.cc | 8 ++ src/compiler/build_tables/parse_item.h | 1 + .../build_tables/symbols_by_first_symbol.h | 3 +- src/compiler/parse_table.cc | 5 ++ 6 files changed, 81 insertions(+), 25 deletions(-) diff --git a/src/compiler/build_tables/build_parse_table.cc b/src/compiler/build_tables/build_parse_table.cc index 4805ba3c..354ee8f6 100644 --- a/src/compiler/build_tables/build_parse_table.cc +++ b/src/compiler/build_tables/build_parse_table.cc @@ -10,6 +10,7 @@ #include "compiler/build_tables/remove_duplicate_states.h" #include "compiler/build_tables/parse_item.h" #include "compiler/build_tables/item_set_closure.h" +#include "compiler/build_tables/symbols_by_first_symbol.h" #include "compiler/lexical_grammar.h" #include "compiler/syntax_grammar.h" #include "compiler/rules/symbol.h" @@ -39,11 +40,14 @@ class ParseTableBuilder { std::set conflicts; ParseItemSet null_item_set; std::set fragile_productions; + bool allow_any_conflict; public: ParseTableBuilder(const SyntaxGrammar &grammar, const LexicalGrammar &lex_grammar) - : grammar(grammar), lexical_grammar(lex_grammar) {} + : grammar(grammar), + lexical_grammar(lex_grammar), + allow_any_conflict(false) {} pair build() { Symbol start_symbol = Symbol(0, grammar.variables.empty()); @@ -58,21 +62,16 @@ class ParseTableBuilder { }, })); - while (!item_sets_to_process.empty()) { - auto pair = item_sets_to_process.back(); - ParseItemSet item_set = item_set_closure(pair.first, grammar); - ParseStateId state_id = pair.second; - item_sets_to_process.pop_back(); - - add_reduce_actions(item_set, state_id); - add_shift_actions(item_set, state_id); - - if (!conflicts.empty()) - return { parse_table, - CompileError(TSCompileErrorTypeParseConflict, - "Unresolved conflict.\n\n" + *conflicts.begin()) }; + CompileError error = process_part_state_queue(); + if (error.type != TSCompileErrorTypeNone) { + return { parse_table, error }; } + add_out_of_context_parse_states(); + allow_any_conflict = true; + process_part_state_queue(); + allow_any_conflict = false; + for (ParseStateId state = 0; state < parse_table.states.size(); state++) { add_shift_extra_actions(state); add_reduce_extra_actions(state); @@ -81,12 +80,55 @@ class ParseTableBuilder { mark_fragile_actions(); remove_duplicate_parse_states(); - parse_table.symbols.insert({ rules::ERROR(), {true} }); + parse_table.symbols.insert({ rules::ERROR(), { true } }); return { parse_table, CompileError::none() }; } private: + CompileError process_part_state_queue() { + while (!item_sets_to_process.empty()) { + auto pair = item_sets_to_process.back(); + ParseItemSet item_set = item_set_closure(pair.first, grammar); + + ParseStateId state_id = pair.second; + item_sets_to_process.pop_back(); + + add_reduce_actions(item_set, state_id); + add_shift_actions(item_set, state_id); + + if (!conflicts.empty()) { + return CompileError(TSCompileErrorTypeParseConflict, + "Unresolved conflict.\n\n" + *conflicts.begin()); + } + } + + return CompileError::none(); + } + + void add_out_of_context_parse_states() { + map> symbols_by_first_symbol = + build_tables::symbols_by_first_symbol(grammar); + for (size_t token_index = 0; token_index < lexical_grammar.variables.size(); + token_index++) { + ParseItemSet item_set; + const set &symbols = + symbols_by_first_symbol[Symbol(token_index, true)]; + + for (const auto &parse_state_entry : parse_state_ids) { + for (const auto &pair : parse_state_entry.first.entries) { + const ParseItem &item = pair.first; + const LookaheadSet &lookahead_set = pair.second; + + if (symbols.count(item.next_symbol())) + item_set.entries[item].insert_all(lookahead_set); + } + } + + add_parse_state(item_set); + } + } + ParseStateId add_parse_state(const ParseItemSet &item_set) { auto pair = parse_state_ids.find(item_set); if (pair == parse_state_ids.end()) { @@ -203,6 +245,8 @@ class ParseTableBuilder { const auto ¤t_entry = current_actions.find(lookahead); if (current_entry == current_actions.end()) return &parse_table.set_action(state_id, lookahead, new_action); + if (allow_any_conflict) + return &parse_table.add_action(state_id, lookahead, new_action); const ParseAction old_action = current_entry->second[0]; auto resolution = conflict_manager.resolve(new_action, old_action); @@ -251,14 +295,13 @@ class ParseTableBuilder { const ParseItem &item = pair.first; const LookaheadSet &lookahead_set = pair.second; - if (item.step_index == item.production->size()) { + Symbol next_symbol = item.next_symbol(); + if (next_symbol == rules::NONE()) { if (lookahead_set.contains(lookahead)) { involved_symbols.insert(item.lhs()); reduce_items.insert(item); } } else { - Symbol next_symbol = item.production->at(item.step_index).symbol; - if (item.step_index > 0) { set first_set = get_first_set(next_symbol); if (first_set.count(lookahead)) { diff --git a/src/compiler/build_tables/item_set_closure.cc b/src/compiler/build_tables/item_set_closure.cc index 56ed5066..4f890e98 100644 --- a/src/compiler/build_tables/item_set_closure.cc +++ b/src/compiler/build_tables/item_set_closure.cc @@ -3,6 +3,7 @@ #include #include #include "compiler/syntax_grammar.h" +#include "compiler/rules/built_in_symbols.h" namespace tree_sitter { namespace build_tables { @@ -12,6 +13,7 @@ using std::pair; using std::shared_ptr; using std::make_shared; using rules::Symbol; +using rules::NONE; ParseItemSet item_set_closure(const ParseItemSet &input_item_set, const SyntaxGrammar &grammar) { @@ -32,14 +34,10 @@ ParseItemSet item_set_closure(const ParseItemSet &input_item_set, if (!result.entries[item].insert_all(lookahead_symbols)) continue; - // If the item is at the end of its production, skip to the next item. - if (item.step_index == item.production->size()) - continue; - // If the next symbol in the production is not a non-terminal, skip to the // next item. - Symbol next_symbol = item.production->at(item.step_index).symbol; - if (next_symbol.is_token || next_symbol.is_built_in()) + Symbol next_symbol = item.next_symbol(); + if (next_symbol == NONE() || next_symbol.is_token || next_symbol.is_built_in()) continue; // If the next symbol is the last symbol in the item's production, then the diff --git a/src/compiler/build_tables/parse_item.cc b/src/compiler/build_tables/parse_item.cc index a9e6a20b..01ea3ec1 100644 --- a/src/compiler/build_tables/parse_item.cc +++ b/src/compiler/build_tables/parse_item.cc @@ -1,6 +1,7 @@ #include "compiler/build_tables/parse_item.h" #include #include "compiler/syntax_grammar.h" +#include "compiler/rules/built_in_symbols.h" namespace tree_sitter { namespace build_tables { @@ -61,6 +62,13 @@ int ParseItem::precedence() const { return production->at(step_index).precedence; } +Symbol ParseItem::next_symbol() const { + if (step_index >= production->size()) + return rules::NONE(); + else + return production->at(step_index).symbol; +} + rules::Associativity ParseItem::associativity() const { if (production->empty()) return rules::AssociativityNone; diff --git a/src/compiler/build_tables/parse_item.h b/src/compiler/build_tables/parse_item.h index a8b06546..31a4a59a 100644 --- a/src/compiler/build_tables/parse_item.h +++ b/src/compiler/build_tables/parse_item.h @@ -29,6 +29,7 @@ class ParseItem { bool operator==(const ParseItem &other) const; bool operator<(const ParseItem &other) const; rules::Symbol lhs() const; + rules::Symbol next_symbol() const; int precedence() const; rules::Associativity associativity() const; CompletionStatus completion_status() const; diff --git a/src/compiler/build_tables/symbols_by_first_symbol.h b/src/compiler/build_tables/symbols_by_first_symbol.h index 0cd15178..9a2b5ed2 100644 --- a/src/compiler/build_tables/symbols_by_first_symbol.h +++ b/src/compiler/build_tables/symbols_by_first_symbol.h @@ -11,7 +11,8 @@ struct SyntaxGrammar; namespace build_tables { -std::map> symbols_by_first_symbol(const SyntaxGrammar &); +std::map> symbols_by_first_symbol( + const SyntaxGrammar &); } // namespace build_tables } // namespace tree_sitter diff --git a/src/compiler/parse_table.cc b/src/compiler/parse_table.cc index 81e77df6..de217cb7 100644 --- a/src/compiler/parse_table.cc +++ b/src/compiler/parse_table.cc @@ -167,6 +167,11 @@ ParseAction &ParseTable::add_action(ParseStateId id, Symbol symbol, symbols[symbol]; else symbols[symbol].structural = true; + + for (ParseAction &existing_action : states[id].actions[symbol]) + if (existing_action == action) + return existing_action; + states[id].actions[symbol].push_back(action); return *states[id].actions[symbol].rbegin(); }