From a3006bc2b58c361295c9786186a38cc782c5b080 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 12 Jul 2017 16:02:01 -0700 Subject: [PATCH] Represent LookaheadSet using vectors of bool --- .../build_tables/build_parse_table.cc | 12 +-- src/compiler/build_tables/lookahead_set.cc | 98 +++++++++++++++---- src/compiler/build_tables/lookahead_set.h | 37 ++++++- src/compiler/build_tables/parse_item.cc | 7 +- test/helpers/stream_methods.cc | 10 +- 5 files changed, 128 insertions(+), 36 deletions(-) diff --git a/src/compiler/build_tables/build_parse_table.cc b/src/compiler/build_tables/build_parse_table.cc index 872177bf..f61e1271 100644 --- a/src/compiler/build_tables/build_parse_table.cc +++ b/src/compiler/build_tables/build_parse_table.cc @@ -196,7 +196,7 @@ class ParseTableBuilder { ParseAction::Reduce(item.lhs(), item.step_index, *item.production); int precedence = item.precedence(); - for (Symbol lookahead : *lookahead_symbols.entries) { + lookahead_symbols.for_each([&](Symbol lookahead) { ParseTableEntry &entry = parse_table.states[state_id].terminal_entries[lookahead]; // Only add the highest-precedence Reduce actions to the parse table. @@ -223,7 +223,7 @@ class ParseTableBuilder { } } } - } + }); // If the item is unfinished, create a new item by advancing one symbol. // Add that new item to a successor item set. @@ -694,15 +694,15 @@ class ParseTableBuilder { const LookaheadSet &right_tokens = item_set_builder.get_first_set(symbol); if (!left_tokens.empty() && !right_tokens.empty()) { - for (const Symbol &left_symbol : *left_tokens.entries) { + left_tokens.for_each([&](Symbol left_symbol) { if (left_symbol.is_terminal() && !left_symbol.is_built_in()) { - for (const Symbol &right_symbol : *right_tokens.entries) { + right_tokens.for_each([&](Symbol right_symbol) { if (right_symbol.is_terminal() && !right_symbol.is_built_in()) { following_terminals_by_terminal_index[left_symbol.index].insert(right_symbol.index); } - } + }); } - } + }); } } diff --git a/src/compiler/build_tables/lookahead_set.cc b/src/compiler/build_tables/lookahead_set.cc index b9604c24..443ba0cd 100644 --- a/src/compiler/build_tables/lookahead_set.cc +++ b/src/compiler/build_tables/lookahead_set.cc @@ -6,41 +6,103 @@ namespace tree_sitter { namespace build_tables { -using std::set; -using std::make_shared; +using std::vector; using rules::Symbol; -LookaheadSet::LookaheadSet() : entries(nullptr) {} +LookaheadSet::LookaheadSet() {} -LookaheadSet::LookaheadSet(const set &symbols) - : entries(make_shared>(symbols)) {} +LookaheadSet::LookaheadSet(const vector &symbols) { + for (auto symbol : symbols) insert(symbol); +} bool LookaheadSet::empty() const { - return !entries.get() || entries->empty(); + return terminal_bits.empty() && external_bits.empty() && !eof; } bool LookaheadSet::operator==(const LookaheadSet &other) const { - return *entries == *other.entries; + return + eof == other.eof && + external_bits == other.external_bits && + terminal_bits == other.terminal_bits; } bool LookaheadSet::contains(const Symbol &symbol) const { - return entries->find(symbol) != entries->end(); + if (symbol == rules::END_OF_INPUT()) return eof; + auto &bits = symbol.is_external() ? external_bits : terminal_bits; + return bits.size() > symbol.index && bits[symbol.index]; +} + +size_t LookaheadSet::size() const { + size_t result = 0; + for (bool bit : external_bits) if (bit) result++; + for (bool bit : terminal_bits) if (bit) result++; + if (eof) result++; + return result; } bool LookaheadSet::insert_all(const LookaheadSet &other) { - if (!other.entries.get()) - return false; - if (!entries.get()) - entries = make_shared>(); - size_t previous_size = entries->size(); - entries->insert(other.entries->begin(), other.entries->end()); - return entries->size() > previous_size; + bool result = false; + + if (other.eof) { + if (!eof) { + eof = true; + result = true; + } + } + + if (other.external_bits.size() > external_bits.size()) { + external_bits.resize(other.external_bits.size()); + } + + auto iter = external_bits.begin(); + auto other_iter = other.external_bits.begin(); + auto other_end = other.external_bits.end(); + while (other_iter != other_end) { + if (*other_iter && !*iter) { + result = true; + *iter = true; + } + ++iter; + ++other_iter; + } + + if (other.terminal_bits.size() > terminal_bits.size()) { + terminal_bits.resize(other.terminal_bits.size()); + } + + iter = terminal_bits.begin(); + other_iter = other.terminal_bits.begin(); + other_end = other.terminal_bits.end(); + while (other_iter != other_end) { + if (*other_iter && !*iter) { + result = true; + *iter = true; + } + ++iter; + ++other_iter; + } + + return result; } bool LookaheadSet::insert(const Symbol &symbol) { - if (!entries.get()) - entries = make_shared>(); - return entries->insert(symbol).second; + if (symbol == rules::END_OF_INPUT()) { + if (!eof) { + eof = true; + return true; + } + return false; + } + + auto &bits = symbol.is_external() ? external_bits : terminal_bits; + if (bits.size() <= symbol.index) { + bits.resize(symbol.index + 1); + } + if (!bits[symbol.index]) { + bits[symbol.index] = true; + return true; + } + return false; } } // namespace build_tables diff --git a/src/compiler/build_tables/lookahead_set.h b/src/compiler/build_tables/lookahead_set.h index 74cd63e2..d0aa9ee7 100644 --- a/src/compiler/build_tables/lookahead_set.h +++ b/src/compiler/build_tables/lookahead_set.h @@ -1,25 +1,54 @@ #ifndef COMPILER_BUILD_TABLES_LOOKAHEAD_SET_H_ #define COMPILER_BUILD_TABLES_LOOKAHEAD_SET_H_ -#include -#include +#include #include "compiler/rule.h" namespace tree_sitter { namespace build_tables { class LookaheadSet { + std::vector terminal_bits; + std::vector external_bits; + bool eof = false; + public: LookaheadSet(); - explicit LookaheadSet(const std::set &); + explicit LookaheadSet(const std::vector &); bool empty() const; + size_t size() const; bool operator==(const LookaheadSet &) const; bool contains(const rules::Symbol &) const; bool insert_all(const LookaheadSet &); bool insert(const rules::Symbol &); - std::shared_ptr> entries; + template + void for_each(const Callback &callback) const { + for (auto begin = external_bits.begin(), + end = external_bits.end(), + iter = begin; + iter != end; + ++iter) { + if (*iter) { + callback(rules::Symbol::external(iter - begin)); + } + } + + if (eof) { + callback(rules::END_OF_INPUT()); + } + + for (auto begin = terminal_bits.begin(), + end = terminal_bits.end(), + iter = begin; + iter != end; + ++iter) { + if (*iter) { + callback(rules::Symbol::terminal(iter - begin)); + } + } + } }; } // namespace build_tables diff --git a/src/compiler/build_tables/parse_item.cc b/src/compiler/build_tables/parse_item.cc index 2d4257b6..55db646c 100644 --- a/src/compiler/build_tables/parse_item.cc +++ b/src/compiler/build_tables/parse_item.cc @@ -178,9 +178,10 @@ size_t hash::operator()(const ParseItemSet &item_set) const { const auto &lookahead_set = pair.second; hash_combine(&result, item); - hash_combine(&result, lookahead_set.entries->size()); - for (auto index : *pair.second.entries) - hash_combine(&result, index); + hash_combine(&result, lookahead_set.size()); + lookahead_set.for_each([&result](Symbol symbol) { + hash_combine(&result, symbol); + }); } return result; } diff --git a/test/helpers/stream_methods.cc b/test/helpers/stream_methods.cc index c8c4eb30..9b13303c 100644 --- a/test/helpers/stream_methods.cc +++ b/test/helpers/stream_methods.cc @@ -202,11 +202,11 @@ ostream &operator<<(ostream &stream, const ParseItemSet &item_set) { } ostream &operator<<(ostream &stream, const LookaheadSet &lookaheads) { - if (lookaheads.entries.get()) { - return stream << *lookaheads.entries; - } else { - return stream << "()"; - } + stream << "(LookaheadSet"; + lookaheads.for_each([&stream](Symbol symbol) { + stream << " " << symbol; + }); + return stream << ")"; } ostream &operator<<(ostream &stream, const LexItemSet::Transition &transition) {