From c4ef228397c3eeb2f2a27dd1356ccc730a869593 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Sun, 4 Oct 2015 21:33:54 -0700 Subject: [PATCH] Share common lookahead sets between parse item sets --- project.gyp | 1 + .../build_tables/item_set_closure_spec.cc | 10 ++-- .../build_tables/item_set_transitions_spec.cc | 9 ++-- spec/compiler/helpers/stream_methods.cc | 4 ++ spec/compiler/helpers/stream_methods.h | 2 + .../build_tables/build_parse_table.cc | 14 ++--- src/compiler/build_tables/item_set_closure.cc | 54 +++++++++++-------- .../build_tables/item_set_transitions.cc | 8 +-- src/compiler/build_tables/lookahead_set.cc | 41 ++++++++++++++ src/compiler/build_tables/lookahead_set.h | 27 ++++++++++ src/compiler/build_tables/parse_item.cc | 1 - src/compiler/build_tables/parse_item.h | 10 ++-- 12 files changed, 132 insertions(+), 49 deletions(-) create mode 100644 src/compiler/build_tables/lookahead_set.cc create mode 100644 src/compiler/build_tables/lookahead_set.h diff --git a/project.gyp b/project.gyp index 03639d0e..0a08330e 100644 --- a/project.gyp +++ b/project.gyp @@ -20,6 +20,7 @@ 'src/compiler/build_tables/item_set_transitions.cc', 'src/compiler/build_tables/lex_item.cc', 'src/compiler/build_tables/lex_conflict_manager.cc', + 'src/compiler/build_tables/lookahead_set.cc', 'src/compiler/build_tables/parse_item.cc', 'src/compiler/build_tables/parse_conflict_manager.cc', 'src/compiler/build_tables/rule_can_be_blank.cc', diff --git a/spec/compiler/build_tables/item_set_closure_spec.cc b/spec/compiler/build_tables/item_set_closure_spec.cc index 9fc309fe..dace0a23 100644 --- a/spec/compiler/build_tables/item_set_closure_spec.cc +++ b/spec/compiler/build_tables/item_set_closure_spec.cc @@ -1,7 +1,7 @@ #include "compiler/compiler_spec_helper.h" #include "compiler/syntax_grammar.h" #include "compiler/build_tables/item_set_closure.h" -#include "compiler/build_tables/item_set_transitions.h" +#include "compiler/build_tables/lookahead_set.h" #include "compiler/rules/built_in_symbols.h" using namespace build_tables; @@ -45,19 +45,19 @@ describe("item_set_closure", []() { AssertThat(item_set, Equals(ParseItemSet({ { ParseItem(Symbol(0), 0, 0, 100), - set({ Symbol(10, true) }) + LookaheadSet({ Symbol(10, true) }) }, { ParseItem(Symbol(1), 0, 0, 102), - set({ Symbol(11, true) }) + LookaheadSet({ Symbol(11, true) }) }, { ParseItem(Symbol(1), 1, 0, 104), - set({ Symbol(11, true) }) + LookaheadSet({ Symbol(11, true) }) }, { ParseItem(Symbol(2), 0, 0, 105), - set({ Symbol(11, true) }) + LookaheadSet({ Symbol(11, true) }) }, }))); }); diff --git a/spec/compiler/build_tables/item_set_transitions_spec.cc b/spec/compiler/build_tables/item_set_transitions_spec.cc index a211aa06..0b74917a 100644 --- a/spec/compiler/build_tables/item_set_transitions_spec.cc +++ b/spec/compiler/build_tables/item_set_transitions_spec.cc @@ -1,5 +1,6 @@ #include "compiler/compiler_spec_helper.h" #include "compiler/build_tables/item_set_transitions.h" +#include "compiler/build_tables/lookahead_set.h" #include "compiler/syntax_grammar.h" #include "compiler/helpers/rule_helpers.h" @@ -69,7 +70,7 @@ describe("sym_transitions(ParseItemSet, InitialSyntaxGrammar)", [&]() { { // Step 2 of rule_0's production: right before the reference to rule_1. ParseItem(Symbol(0), 0, 2, 103), - set({ Symbol(16, true) }) + LookaheadSet({ Symbol(16, true) }) } }); @@ -81,7 +82,7 @@ describe("sym_transitions(ParseItemSet, InitialSyntaxGrammar)", [&]() { ParseItemSet({ { ParseItem(Symbol(0), 0, 3, 104), - set({ Symbol(16, true) }) + LookaheadSet({ Symbol(16, true) }) } }) }, @@ -92,7 +93,7 @@ describe("sym_transitions(ParseItemSet, InitialSyntaxGrammar)", [&]() { ParseItemSet({ { ParseItem(Symbol(1), 0, 1, 106), - set({ Symbol(13, true) }) + LookaheadSet({ Symbol(13, true) }) }, }) }, @@ -103,7 +104,7 @@ describe("sym_transitions(ParseItemSet, InitialSyntaxGrammar)", [&]() { ParseItemSet({ { ParseItem(Symbol(2), 0, 1, 0), - set({ Symbol(14, true) }) + LookaheadSet({ Symbol(14, true) }) }, }) }, diff --git a/spec/compiler/helpers/stream_methods.cc b/spec/compiler/helpers/stream_methods.cc index 9dadc5d4..7d1391ad 100644 --- a/spec/compiler/helpers/stream_methods.cc +++ b/spec/compiler/helpers/stream_methods.cc @@ -129,6 +129,10 @@ std::ostream &operator<<(std::ostream &stream, const MetadataRange &range) { << to_string(range.max) << string("}"); } +std::ostream &operator<<(std::ostream &stream, const LookaheadSet &set) { + return stream << *set.entries; +} + } // namespace build_tables } // namespace tree_sitter diff --git a/spec/compiler/helpers/stream_methods.h b/spec/compiler/helpers/stream_methods.h index e9805ec9..e5621ca2 100644 --- a/spec/compiler/helpers/stream_methods.h +++ b/spec/compiler/helpers/stream_methods.h @@ -112,10 +112,12 @@ namespace build_tables { struct MetadataRange; class LexItem; class ParseItem; +class LookaheadSet; ostream &operator<<(ostream &, const MetadataRange &); ostream &operator<<(ostream &, const LexItem &); ostream &operator<<(ostream &, const ParseItem &); +ostream &operator<<(ostream &, const LookaheadSet &); } // namespace build_tables } // namespace tree_sitter diff --git a/src/compiler/build_tables/build_parse_table.cc b/src/compiler/build_tables/build_parse_table.cc index 6396b3a1..a0d18b1d 100644 --- a/src/compiler/build_tables/build_parse_table.cc +++ b/src/compiler/build_tables/build_parse_table.cc @@ -47,15 +47,17 @@ class ParseTableBuilder { conflict_manager(grammar) {} pair build() { - ParseItem start_item(rules::START(), 0, 0, -2); add_parse_state(ParseItemSet({ - { start_item, set({ rules::END_OF_INPUT() }) }, + { + ParseItem(rules::START(), 0, 0, -2), + LookaheadSet({ rules::END_OF_INPUT() }), + }, })); while (!item_sets_to_process.empty()) { auto pair = item_sets_to_process.back(); - ParseItemSet &item_set = pair.first; - ParseStateId &state_id = pair.second; + ParseItemSet item_set = std::move(pair.first); + ParseStateId state_id = pair.second; item_sets_to_process.pop_back(); add_reduce_actions(item_set, state_id); @@ -127,7 +129,7 @@ class ParseTableBuilder { void add_reduce_actions(const ParseItemSet &item_set, ParseStateId state_id) { for (const auto &pair : item_set) { const ParseItem &item = pair.first; - const set &lookahead_symbols = pair.second; + const auto &lookahead_symbols = pair.second; CompletionStatus completion_status = get_completion_status(item); if (completion_status.is_done) { @@ -139,7 +141,7 @@ class ParseTableBuilder { completion_status.associativity, item.production_index); - for (const auto &lookahead_sym : lookahead_symbols) + for (const auto &lookahead_sym : *lookahead_symbols.entries) add_action(state_id, lookahead_sym, action, item_set); } } diff --git a/src/compiler/build_tables/item_set_closure.cc b/src/compiler/build_tables/item_set_closure.cc index e6daba1e..04d52861 100644 --- a/src/compiler/build_tables/item_set_closure.cc +++ b/src/compiler/build_tables/item_set_closure.cc @@ -11,50 +11,58 @@ namespace tree_sitter { namespace build_tables { -using std::set; using std::vector; using std::pair; +using std::shared_ptr; +using std::make_shared; using rules::Symbol; -ParseItemSet item_set_closure(const ParseItemSet &input_item_set, const SyntaxGrammar &grammar) { +ParseItemSet item_set_closure(const ParseItemSet &input_item_set, + const SyntaxGrammar &grammar) { ParseItemSet result; - vector>> items_to_process(input_item_set.begin(), - input_item_set.end()); + // An item set's closure is defined recursively. Use an explicit stack to + // store the recursively-added items. + vector> items_to_process(input_item_set.begin(), + input_item_set.end()); while (!items_to_process.empty()) { ParseItem item = items_to_process.back().first; - set new_lookahead_symbols = items_to_process.back().second; + LookaheadSet lookahead_symbols = items_to_process.back().second; items_to_process.pop_back(); - set &lookahead_symbols = result[item]; - size_t previous_size = lookahead_symbols.size(); - lookahead_symbols.insert(new_lookahead_symbols.begin(), - new_lookahead_symbols.end()); - if (lookahead_symbols.size() == previous_size) + // Add the parse-item and lookahead symbols to the item set. + // If they were already present, skip to the next item. + if (!result[item].insert_all(lookahead_symbols)) continue; + // If the item is at the end of its production, skip to the next item. const Production &item_production = grammar.productions(item.lhs())[item.production_index]; - if (item.step_index == item_production.size()) continue; - Symbol symbol = item_production[item.step_index].symbol; - - if (symbol.is_token || symbol.is_built_in()) + // If the next symbol in the production is not a non-terminal, skip to the + // next item. + Symbol next_symbol = item_production[item.step_index].symbol; + if (next_symbol.is_token || next_symbol.is_built_in()) continue; - set next_lookahead_symbols; - unsigned int next_step = item.step_index + 1; + // If the next symbol is the last symbol in the item's production, then the + // lookahead symbols for the new items are the same as for the current item. + // Otherwise, compute the FOLLOW-SET of the symbol in this production. This + // is defined recursively as well, so use another queue to store the + // recursively-added follow symbols. + LookaheadSet next_lookahead_symbols; + size_t next_step = item.step_index + 1; if (next_step == item_production.size()) { next_lookahead_symbols = lookahead_symbols; } else { vector symbols_to_process({ item_production[next_step].symbol }); - while (!symbols_to_process.empty()) { Symbol following_symbol = symbols_to_process.back(); symbols_to_process.pop_back(); - if (!next_lookahead_symbols.insert(following_symbol).second) + + if (!next_lookahead_symbols.insert(following_symbol)) continue; for (const auto &production : grammar.productions(following_symbol)) @@ -63,12 +71,14 @@ ParseItemSet item_set_closure(const ParseItemSet &input_item_set, const SyntaxGr } } + // Add each of the next symbol's productions to be processed recursively. size_t i = 0; - for (const Production &production : grammar.productions(symbol)) { + for (const Production &production : grammar.productions(next_symbol)) { if (!production.empty()) - items_to_process.push_back( - { ParseItem(symbol, i, 0, production[0].rule_id), - next_lookahead_symbols }); + items_to_process.push_back({ + ParseItem(next_symbol, i, 0, production[0].rule_id), + next_lookahead_symbols, + }); i++; } } diff --git a/src/compiler/build_tables/item_set_transitions.cc b/src/compiler/build_tables/item_set_transitions.cc index 9b54da1c..273b4419 100644 --- a/src/compiler/build_tables/item_set_transitions.cc +++ b/src/compiler/build_tables/item_set_transitions.cc @@ -11,19 +11,16 @@ namespace tree_sitter { namespace build_tables { using std::map; -using std::set; -using std::vector; using rules::CharacterSet; using rules::Symbol; map sym_transitions(const ParseItemSet &input_item_set, const SyntaxGrammar &grammar) { - ParseItemSet item_set(item_set_closure(input_item_set, grammar)); map result; for (const auto &pair : item_set) { const ParseItem &item = pair.first; - const set &lookahead_symbols = pair.second; + const LookaheadSet &lookahead_symbols = pair.second; const Production &production = grammar.productions(item.lhs())[item.production_index]; if (item.step_index == production.size()) @@ -34,8 +31,7 @@ map sym_transitions(const ParseItemSet &input_item_set, int rule_id = step < production.size() ? production[step].rule_id : 0; ParseItem new_item(item.lhs(), item.production_index, step, rule_id); - result[symbol][new_item].insert(lookahead_symbols.begin(), - lookahead_symbols.end()); + result[symbol][new_item] = lookahead_symbols; } return result; diff --git a/src/compiler/build_tables/lookahead_set.cc b/src/compiler/build_tables/lookahead_set.cc new file mode 100644 index 00000000..aba3a5fb --- /dev/null +++ b/src/compiler/build_tables/lookahead_set.cc @@ -0,0 +1,41 @@ +#include "compiler/build_tables/lookahead_set.h" +#include +#include +#include "compiler/rules/symbol.h" + +namespace tree_sitter { +namespace build_tables { + +using std::set; +using std::make_shared; +using rules::Symbol; + +LookaheadSet::LookaheadSet() : entries(nullptr) {} + +LookaheadSet::LookaheadSet(const set &symbols) + : entries(make_shared>(symbols)) {} + +bool LookaheadSet::empty() const { + return !entries.get() || entries->empty(); +} + +bool LookaheadSet::operator==(const LookaheadSet &other) const { + return *entries == *other.entries; +} + +bool LookaheadSet::insert_all(const LookaheadSet &other) { + if (!entries.get()) + entries = make_shared>(); + size_t previous_size = entries->size(); + entries->insert(other.entries->begin(), other.entries->end()); + return entries->size() > previous_size; +} + +bool LookaheadSet::insert(const Symbol &symbol) { + if (!entries.get()) + entries = make_shared>(); + return entries->insert(symbol).second; +} + +} // namespace build_tables +} // namespace tree_sitter diff --git a/src/compiler/build_tables/lookahead_set.h b/src/compiler/build_tables/lookahead_set.h new file mode 100644 index 00000000..ff2f25fc --- /dev/null +++ b/src/compiler/build_tables/lookahead_set.h @@ -0,0 +1,27 @@ +#ifndef COMPILER_BUILD_TABLES_LOOKAHEAD_SET_H_ +#define COMPILER_BUILD_TABLES_LOOKAHEAD_SET_H_ + +#include +#include +#include "compiler/rules/symbol.h" + +namespace tree_sitter { +namespace build_tables { + +class LookaheadSet { + public: + LookaheadSet(); + LookaheadSet(const std::set &); + + bool empty() const; + bool operator==(const LookaheadSet &) const; + bool insert_all(const LookaheadSet &); + bool insert(const rules::Symbol &); + + std::shared_ptr> entries; +}; + +} // namespace build_tables +} // namespace tree_sitter + +#endif // COMPILER_BUILD_TABLES_LOOKAHEAD_SET_H_ diff --git a/src/compiler/build_tables/parse_item.cc b/src/compiler/build_tables/parse_item.cc index 785d9642..5b4c1767 100644 --- a/src/compiler/build_tables/parse_item.cc +++ b/src/compiler/build_tables/parse_item.cc @@ -8,7 +8,6 @@ namespace build_tables { using std::string; using std::to_string; -using std::ostream; using rules::Symbol; ParseItem::ParseItem(const Symbol &lhs, unsigned int production_index, diff --git a/src/compiler/build_tables/parse_item.h b/src/compiler/build_tables/parse_item.h index fdbadea3..93d1c5c5 100644 --- a/src/compiler/build_tables/parse_item.h +++ b/src/compiler/build_tables/parse_item.h @@ -1,10 +1,9 @@ #ifndef COMPILER_BUILD_TABLES_PARSE_ITEM_H_ #define COMPILER_BUILD_TABLES_PARSE_ITEM_H_ -#include #include -#include #include "compiler/build_tables/item.h" +#include "compiler/build_tables/lookahead_set.h" #include "compiler/rules/symbol.h" namespace tree_sitter { @@ -13,6 +12,7 @@ namespace build_tables { class ParseItem { public: ParseItem(const rules::Symbol &, unsigned int, unsigned int, int); + bool operator==(const ParseItem &other) const; bool operator<(const ParseItem &other) const; rules::Symbol lhs() const; @@ -23,7 +23,7 @@ class ParseItem { int rule_id; }; -typedef std::map> ParseItemSet; +typedef std::map ParseItemSet; } // namespace build_tables } // namespace tree_sitter @@ -44,8 +44,8 @@ struct hash { size_t result = hash()(set.size()); for (auto &pair : set) { result ^= hash()(pair.first); - result ^= hash()(pair.second.size()); - for (auto &symbol : pair.second) + result ^= hash()(pair.second.entries->size()); + for (auto &symbol : *pair.second.entries) result ^= hash()(symbol); } return result;