From 39c1ab2d5093cfc292303b64030b35cd1c569cae Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 16 Jun 2014 13:20:39 -0700 Subject: [PATCH] Refactor item_set_closure Inline unnecessary function --- .../compiler/build_tables/follow_sets_spec.cc | 59 ------------------- src/compiler/build_tables/follow_sets.cc | 31 ---------- src/compiler/build_tables/follow_sets.h | 26 -------- src/compiler/build_tables/item_set_closure.cc | 57 ++++++++++-------- 4 files changed, 33 insertions(+), 140 deletions(-) delete mode 100644 spec/compiler/build_tables/follow_sets_spec.cc delete mode 100644 src/compiler/build_tables/follow_sets.cc delete mode 100644 src/compiler/build_tables/follow_sets.h diff --git a/spec/compiler/build_tables/follow_sets_spec.cc b/spec/compiler/build_tables/follow_sets_spec.cc deleted file mode 100644 index 2c0d21e9..00000000 --- a/spec/compiler/build_tables/follow_sets_spec.cc +++ /dev/null @@ -1,59 +0,0 @@ -#include "compiler_spec_helper.h" -#include "compiler/prepared_grammar.h" -#include "compiler/build_tables/follow_sets.h" - -using std::set; -using namespace build_tables; -using namespace rules; - -START_TEST - -describe("computing FOLLOW sets", []() { - const PreparedGrammar grammar({ - { "A", sym("a") }, - { "B", sym("b") }, - { "C", choice({ i_sym(0), i_sym(1) }) }, - }, {}); - - it("includes all of the starting non-terminals for the item, and their following terminals", [&]() { - ParseItem item(Symbol(2), choice({ - seq({ i_sym(0), choice({ i_token(0), i_token(1) }) }), - seq({ i_sym(1), i_token(2) }), - }), 0); - - AssertThat(follow_sets(item, { Symbol(10, SymbolOptionToken) }, grammar), Equals(map>({ - { Symbol(0), set({ - Symbol(0, SymbolOptionToken), - Symbol(1, SymbolOptionToken) }) }, - { Symbol(1), set({ - Symbol(2, SymbolOptionToken) }) }, - }))); - }); - - it("does not include terminals at the beginning of the item", [&]() { - ParseItem item(Symbol(2), choice({ - seq({ i_sym(0), choice({ i_token(0), i_token(1) }) }), - seq({ i_token(2), i_token(3) }), - }), 0); - - AssertThat(follow_sets(item, { Symbol(10, SymbolOptionToken) }, grammar), Equals(map>({ - { Symbol(0), set({ - Symbol(0, SymbolOptionToken), - Symbol(1, SymbolOptionToken) }) }, - }))); - }); - - it("includes the item's lookahead symbol if the rule after the non-terminal might be blank", [&]() { - ParseItem item(Symbol(2), choice({ - seq({ i_sym(0), choice({ i_token(0), blank() }) }), - }), 0); - - AssertThat(follow_sets(item, { Symbol(10, SymbolOptionToken) }, grammar), Equals(map>({ - { Symbol(0), set({ - Symbol(0, SymbolOptionToken), - Symbol(10, SymbolOptionToken) }) }, - }))); - }); -}); - -END_TEST diff --git a/src/compiler/build_tables/follow_sets.cc b/src/compiler/build_tables/follow_sets.cc deleted file mode 100644 index 76a701b6..00000000 --- a/src/compiler/build_tables/follow_sets.cc +++ /dev/null @@ -1,31 +0,0 @@ -#include "compiler/build_tables/follow_sets.h" -#include "compiler/build_tables/first_set.h" -#include "compiler/build_tables/rule_transitions.h" -#include "compiler/build_tables/rule_can_be_blank.h" -#include "compiler/prepared_grammar.h" - -namespace tree_sitter { - using std::set; - using std::map; - using rules::Symbol; - using rules::rule_ptr; - - namespace build_tables { - map> follow_sets(const ParseItem &item, - const set &lookahead_symbols, - const PreparedGrammar &grammar) { - map> result; - for (auto &pair : sym_transitions(item.rule)) { - Symbol symbol = pair.first; - rule_ptr next_rule = pair.second; - if (!symbol.is_token() && !symbol.is_built_in()) { - set following_terminals = first_set(next_rule, grammar); - if (rule_can_be_blank(next_rule, grammar)) - following_terminals.insert(lookahead_symbols.begin(), lookahead_symbols.end()); - result.insert({ symbol, following_terminals }); - } - } - return result; - } - } -} diff --git a/src/compiler/build_tables/follow_sets.h b/src/compiler/build_tables/follow_sets.h deleted file mode 100644 index f55c24ba..00000000 --- a/src/compiler/build_tables/follow_sets.h +++ /dev/null @@ -1,26 +0,0 @@ -#ifndef COMPILER_BUILD_TABLES_FOLLOW_SETS_H_ -#define COMPILER_BUILD_TABLES_FOLLOW_SETS_H_ - -#include -#include -#include "compiler/build_tables/parse_item.h" -#include "compiler/rules/symbol.h" - -namespace tree_sitter { - class PreparedGrammar; - - namespace build_tables { - - /* - * Returns a map of non-terminal symbols to sets of terminal symbols. - * The keys are the non-terminals which may appear first in the given - * item. The values are the sets of terminals which can appear immediately - * after the corresponding non-terminals. - */ - std::map> - follow_sets(const ParseItem &item, const std::set &lookahead_symbols, const PreparedGrammar &grammar); - - } -} - -#endif // COMPILER_BUILD_TABLES_FOLLOW_SETS_H_ diff --git a/src/compiler/build_tables/item_set_closure.cc b/src/compiler/build_tables/item_set_closure.cc index 141b2bb5..305f26fc 100644 --- a/src/compiler/build_tables/item_set_closure.cc +++ b/src/compiler/build_tables/item_set_closure.cc @@ -3,44 +3,53 @@ #include #include #include "tree_sitter/compiler.h" -#include "compiler/build_tables/follow_sets.h" +#include "compiler/build_tables/first_set.h" +#include "compiler/build_tables/rule_transitions.h" +#include "compiler/build_tables/rule_can_be_blank.h" #include "compiler/build_tables/item.h" #include "compiler/prepared_grammar.h" namespace tree_sitter { using std::set; - using rules::Symbol; using std::vector; using std::pair; + using rules::Symbol; + using rules::rule_ptr; namespace build_tables { const ParseItemSet item_set_closure(const ParseItem &starting_item, const set &starting_lookahead_symbols, const PreparedGrammar &grammar) { ParseItemSet result; - vector>> pairs_to_add = { {starting_item, starting_lookahead_symbols} }; - while (!pairs_to_add.empty()) { - auto pair = pairs_to_add.back(); - pairs_to_add.pop_back(); - auto &item = pair.first; - auto &lookahead_symbols = pair.second; - - bool new_stuff_added = false; - auto &existing_lookahead_symbols = result[item]; - for (auto &sym : lookahead_symbols) { - auto insertion_result = existing_lookahead_symbols.insert(sym); - if (insertion_result.second) new_stuff_added = true; - } - if (new_stuff_added) { - for (const auto &pair : follow_sets(item, lookahead_symbols, grammar)) { - const Symbol &non_terminal = pair.first; - const set &terminals = pair.second; - pairs_to_add.push_back({ - ParseItem(non_terminal, grammar.rule(non_terminal), 0), - terminals - }); - } + vector>> items_to_process = {{starting_item, starting_lookahead_symbols}}; + while (!items_to_process.empty()) { + ParseItem item = items_to_process.back().first; + set new_lookahead_symbols = items_to_process.back().second; + items_to_process.pop_back(); + + set &lookahead_symbols = result[item]; + size_t previous_size = lookahead_symbols.size(); + lookahead_symbols.insert(new_lookahead_symbols.begin(), new_lookahead_symbols.end()); + + if (lookahead_symbols.size() == previous_size) + continue; + + for (const auto &pair : sym_transitions(item.rule)) { + const Symbol &symbol = pair.first; + const rule_ptr &next_rule = pair.second; + + if (symbol.is_token() || symbol.is_built_in()) + continue; + + set next_lookahead_symbols = first_set(next_rule, grammar); + if (rule_can_be_blank(next_rule, grammar)) + next_lookahead_symbols.insert(lookahead_symbols.begin(), lookahead_symbols.end()); + + items_to_process.push_back({ + ParseItem(symbol, grammar.rule(symbol), 0), + next_lookahead_symbols + }); } }