From 65bf1389e1e63f04f8f4b47753d79580130a0beb Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 11 Jul 2017 21:17:27 -0700 Subject: [PATCH 1/4] Add a way to automatically inline rules --- src/compiler/build_tables/parse_item.cc | 4 +- .../build_tables/parse_item_set_builder.cc | 103 ++++++++++++++++-- .../build_tables/parse_item_set_builder.h | 9 ++ src/compiler/grammar.h | 1 + src/compiler/parse_grammar.cc | 20 +++- src/compiler/parse_grammar.h | 1 + .../prepare_grammar/expand_repeats.cc | 1 + .../prepare_grammar/extract_tokens.cc | 4 + .../prepare_grammar/flatten_grammar.cc | 1 + .../prepare_grammar/initial_syntax_grammar.h | 1 + .../prepare_grammar/intern_symbols.cc | 7 ++ .../prepare_grammar/interned_grammar.h | 1 + src/compiler/syntax_grammar.h | 1 + .../test_grammars/inline_rules/corpus.txt | 11 ++ .../test_grammars/inline_rules/grammar.json | 68 ++++++++++++ 15 files changed, 219 insertions(+), 14 deletions(-) create mode 100644 test/fixtures/test_grammars/inline_rules/corpus.txt create mode 100644 test/fixtures/test_grammars/inline_rules/grammar.json diff --git a/src/compiler/build_tables/parse_item.cc b/src/compiler/build_tables/parse_item.cc index 0ef56e79..2d4257b6 100644 --- a/src/compiler/build_tables/parse_item.cc +++ b/src/compiler/build_tables/parse_item.cc @@ -156,14 +156,14 @@ struct hash { if (item.is_done()) { if (!item.production->empty()) { hash_combine(&result, item.production->back().precedence); - hash_combine(&result, item.production->back().associativity); + hash_combine(&result, item.production->back().associativity); } } else { for (size_t i = 0, n = item.production->size(); i < n; i++) { auto &step = item.production->at(i); hash_combine(&result, step.symbol); hash_combine(&result, step.precedence); - hash_combine(&result, step.associativity); + hash_combine(&result, step.associativity); } } return result; diff --git a/src/compiler/build_tables/parse_item_set_builder.cc b/src/compiler/build_tables/parse_item_set_builder.cc index 236be6f0..3b119157 100644 --- a/src/compiler/build_tables/parse_item_set_builder.cc +++ b/src/compiler/build_tables/parse_item_set_builder.cc @@ -1,4 +1,5 @@ #include "compiler/build_tables/parse_item_set_builder.h" +#include #include #include #include @@ -11,8 +12,10 @@ namespace tree_sitter { namespace build_tables { +using std::move; using std::vector; using std::set; +using std::find; using std::get; using std::pair; using std::tuple; @@ -21,8 +24,36 @@ using std::make_tuple; using rules::Symbol; using rules::NONE; +static vector inline_production(const ParseItem &item, const SyntaxGrammar &grammar) { + vector result; + for (const Production &production_to_insert : grammar.variables[item.next_symbol().index].productions) { + auto begin = item.production->steps.begin(); + auto end = item.production->steps.end(); + auto step = begin + item.step_index; + + Production production{{begin, step}, item.production->dynamic_precedence}; + production.steps.insert( + production.steps.end(), + production_to_insert.steps.begin(), + production_to_insert.steps.end() + ); + production.back().precedence = item.precedence(); + production.back().associativity = item.associativity(); + production.steps.insert( + production.steps.end(), + step + 1, + end + ); + + if (find(result.begin(), result.end(), production) == result.end()) { + result.push_back(move(production)); + } + } + return result; +} + ParseItemSetBuilder::ParseItemSetBuilder(const SyntaxGrammar &grammar, - const LexicalGrammar &lexical_grammar) { + const LexicalGrammar &lexical_grammar) : grammar{grammar} { vector symbols_to_process; set processed_non_terminals; @@ -145,24 +176,56 @@ ParseItemSetBuilder::ParseItemSetBuilder(const SyntaxGrammar &grammar, for (auto &pair : cached_lookaheads_by_non_terminal) { for (const Production &production : grammar.variables[pair.first].productions) { - component_cache[i].push_back({ - ParseItem(Symbol::non_terminal(pair.first), production, 0), - pair.second.first, - pair.second.second - }); + Symbol lhs = Symbol::non_terminal(pair.first); + ParseItem item(lhs, production, 0); + + if (grammar.variables_to_inline.count(item.next_symbol())) { + vector &inlined_productions = inlined_productions_by_original_production[item]; + if (inlined_productions.empty()) { + inlined_productions = inline_production(item, grammar); + } + + for (const Production &inlined_production : inlined_productions) { + ParseItemSetComponent component{ + ParseItem(lhs, inlined_production, 0), + pair.second.first, + pair.second.second + }; + + if (find(component_cache[i].begin(), component_cache[i].end(), component) == component_cache[i].end()) { + component_cache[i].push_back(component); + } + } + } else if (!grammar.variables_to_inline.count(lhs)) { + ParseItemSetComponent component{ + ParseItem(lhs, production, 0), + pair.second.first, + pair.second.second + }; + + if (find(component_cache[i].begin(), component_cache[i].end(), component) == component_cache[i].end()) { + component_cache[i].push_back(component); + } + } } } } } void ParseItemSetBuilder::apply_transitive_closure(ParseItemSet *item_set) { - for (const auto &pair : item_set->entries) { - const ParseItem &item = pair.first; - const LookaheadSet &lookaheads = pair.second; - if (item.lhs() != rules::START() && item.step_index == 0) continue; + for (auto iter = item_set->entries.begin(), end = item_set->entries.end(); iter != end;) { + const ParseItem &item = iter->first; + const LookaheadSet &lookaheads = iter->second; + if (item.lhs() != rules::START() && item.step_index == 0) { + ++iter; + continue; + } const Symbol &next_symbol = item.next_symbol(); - if (!next_symbol.is_non_terminal() || next_symbol.is_built_in()) continue; + if (!next_symbol.is_non_terminal() || next_symbol.is_built_in()) { + ++iter; + continue; + } LookaheadSet next_lookaheads; size_t next_step = item.step_index + 1; @@ -178,6 +241,24 @@ void ParseItemSetBuilder::apply_transitive_closure(ParseItemSet *item_set) { current_lookaheads.insert_all(component.lookaheads); if (component.propagates_lookaheads) current_lookaheads.insert_all(next_lookaheads); } + + if (grammar.variables_to_inline.count(next_symbol)) { + vector &inlined_productions = inlined_productions_by_original_production[item]; + if (inlined_productions.empty()) { + inlined_productions = inline_production(item, grammar); + } + + for (const Production &inlined_production : inlined_productions) { + item_set->entries.insert({ + ParseItem(item.lhs(), inlined_production, item.step_index), + lookaheads + }); + } + + iter = item_set->entries.erase(iter); + } else { + ++iter; + } } } diff --git a/src/compiler/build_tables/parse_item_set_builder.h b/src/compiler/build_tables/parse_item_set_builder.h index 5357a385..c180f38b 100644 --- a/src/compiler/build_tables/parse_item_set_builder.h +++ b/src/compiler/build_tables/parse_item_set_builder.h @@ -4,6 +4,7 @@ #include "compiler/build_tables/parse_item.h" #include "compiler/rule.h" #include +#include namespace tree_sitter { @@ -17,11 +18,19 @@ class ParseItemSetBuilder { ParseItem item; LookaheadSet lookaheads; bool propagates_lookaheads; + + inline bool operator==(const ParseItemSetComponent &other) { + return item == other.item && + lookaheads == other.lookaheads && + propagates_lookaheads == other.propagates_lookaheads; + } }; + const SyntaxGrammar &grammar; std::map first_sets; std::map last_sets; std::map> component_cache; + std::map> inlined_productions_by_original_production; public: ParseItemSetBuilder(const SyntaxGrammar &, const LexicalGrammar &); diff --git a/src/compiler/grammar.h b/src/compiler/grammar.h index 6d16524b..54fe69e9 100644 --- a/src/compiler/grammar.h +++ b/src/compiler/grammar.h @@ -31,6 +31,7 @@ struct InputGrammar { std::vector extra_tokens; std::vector> expected_conflicts; std::vector external_tokens; + std::unordered_set variables_to_inline; }; } // namespace tree_sitter diff --git a/src/compiler/parse_grammar.cc b/src/compiler/parse_grammar.cc index 7589904c..43ceed51 100644 --- a/src/compiler/parse_grammar.cc +++ b/src/compiler/parse_grammar.cc @@ -205,7 +205,7 @@ ParseGrammarResult parse_grammar(const string &input) { string error_message; string name; InputGrammar grammar; - json_value name_json, rules_json, extras_json, conflicts_json, external_tokens_json; + json_value name_json, rules_json, extras_json, conflicts_json, external_tokens_json, inline_rules_json; json_settings settings = { 0, json_enable_comments, 0, 0, 0, 0 }; char parse_error[json_error_max]; @@ -299,6 +299,24 @@ ParseGrammarResult parse_grammar(const string &input) { } } + inline_rules_json = grammar_json->operator[]("inline"); + if (inline_rules_json.type != json_none) { + if (inline_rules_json.type != json_array) { + error_message = "Inline rules must be an array"; + goto error; + } + + for (size_t i = 0, length = inline_rules_json.u.array.length; i < length; i++) { + json_value *inline_rule_json = inline_rules_json.u.array.values[i]; + if (inline_rule_json->type != json_string) { + error_message = "Inline rules must be an array of rule names"; + goto error; + } + + grammar.variables_to_inline.insert(rules::NamedSymbol{string(inline_rule_json->u.string.ptr)}); + } + } + external_tokens_json = grammar_json->operator[]("externals"); if (external_tokens_json.type != json_none) { if (external_tokens_json.type != json_array) { diff --git a/src/compiler/parse_grammar.h b/src/compiler/parse_grammar.h index 04e7672b..c24cd9ca 100644 --- a/src/compiler/parse_grammar.h +++ b/src/compiler/parse_grammar.h @@ -2,6 +2,7 @@ #define COMPILER_GRAMMAR_JSON_H_ #include +#include #include "tree_sitter/compiler.h" #include "compiler/grammar.h" diff --git a/src/compiler/prepare_grammar/expand_repeats.cc b/src/compiler/prepare_grammar/expand_repeats.cc index 39b2075d..c4a25634 100644 --- a/src/compiler/prepare_grammar/expand_repeats.cc +++ b/src/compiler/prepare_grammar/expand_repeats.cc @@ -94,6 +94,7 @@ InitialSyntaxGrammar expand_repeats(const InitialSyntaxGrammar &grammar) { result.extra_tokens = grammar.extra_tokens; result.expected_conflicts = grammar.expected_conflicts; result.external_tokens = grammar.external_tokens; + result.variables_to_inline = grammar.variables_to_inline; ExpandRepeats expander(result.variables.size()); for (auto &variable : result.variables) { diff --git a/src/compiler/prepare_grammar/extract_tokens.cc b/src/compiler/prepare_grammar/extract_tokens.cc index 6893cde4..73d3d866 100644 --- a/src/compiler/prepare_grammar/extract_tokens.cc +++ b/src/compiler/prepare_grammar/extract_tokens.cc @@ -235,6 +235,10 @@ tuple extract_tokens( syntax_grammar.expected_conflicts.insert(new_conflict_set); } + for (const Symbol &symbol : grammar.variables_to_inline) { + syntax_grammar.variables_to_inline.insert(symbol_replacer.replace_symbol(symbol)); + } + // The grammar's extra tokens can be either token rules or symbols // pointing to token rules. If they are symbols, then they'll be handled by // the parser; add them to the syntax grammar's extra tokens. If they diff --git a/src/compiler/prepare_grammar/flatten_grammar.cc b/src/compiler/prepare_grammar/flatten_grammar.cc index 86c76a86..e41e8398 100644 --- a/src/compiler/prepare_grammar/flatten_grammar.cc +++ b/src/compiler/prepare_grammar/flatten_grammar.cc @@ -111,6 +111,7 @@ SyntaxVariable flatten_rule(const Variable &variable) { pair flatten_grammar(const InitialSyntaxGrammar &grammar) { SyntaxGrammar result; result.external_tokens = grammar.external_tokens; + result.variables_to_inline = grammar.variables_to_inline; for (const auto &expected_conflict : grammar.expected_conflicts) { result.expected_conflicts.insert({ diff --git a/src/compiler/prepare_grammar/initial_syntax_grammar.h b/src/compiler/prepare_grammar/initial_syntax_grammar.h index 55eb2b7e..881c6396 100644 --- a/src/compiler/prepare_grammar/initial_syntax_grammar.h +++ b/src/compiler/prepare_grammar/initial_syntax_grammar.h @@ -16,6 +16,7 @@ struct InitialSyntaxGrammar { std::set extra_tokens; std::set> expected_conflicts; std::vector external_tokens; + std::set variables_to_inline; }; } // namespace prepare_grammar diff --git a/src/compiler/prepare_grammar/intern_symbols.cc b/src/compiler/prepare_grammar/intern_symbols.cc index deaeb122..7bb2a80b 100644 --- a/src/compiler/prepare_grammar/intern_symbols.cc +++ b/src/compiler/prepare_grammar/intern_symbols.cc @@ -142,6 +142,13 @@ pair intern_symbols(const InputGrammar &grammar) result.expected_conflicts.insert(entry); } + for (auto &named_symbol : grammar.variables_to_inline) { + auto symbol = interner.intern_symbol(named_symbol); + if (symbol != rules::NONE()) { + result.variables_to_inline.insert(symbol); + } + } + return {result, CompileError::none()}; } diff --git a/src/compiler/prepare_grammar/interned_grammar.h b/src/compiler/prepare_grammar/interned_grammar.h index 99987f42..c96dfa66 100644 --- a/src/compiler/prepare_grammar/interned_grammar.h +++ b/src/compiler/prepare_grammar/interned_grammar.h @@ -15,6 +15,7 @@ struct InternedGrammar { std::vector extra_tokens; std::set> expected_conflicts; std::vector external_tokens; + std::set variables_to_inline; }; } // namespace prepare_grammar diff --git a/src/compiler/syntax_grammar.h b/src/compiler/syntax_grammar.h index 55e55568..3c3d3b66 100644 --- a/src/compiler/syntax_grammar.h +++ b/src/compiler/syntax_grammar.h @@ -74,6 +74,7 @@ struct SyntaxGrammar { std::set extra_tokens; std::set expected_conflicts; std::vector external_tokens; + std::set variables_to_inline; }; } // namespace tree_sitter diff --git a/test/fixtures/test_grammars/inline_rules/corpus.txt b/test/fixtures/test_grammars/inline_rules/corpus.txt new file mode 100644 index 00000000..af5e496e --- /dev/null +++ b/test/fixtures/test_grammars/inline_rules/corpus.txt @@ -0,0 +1,11 @@ +================================== +Expressions +================================== + +1 + 2 * 3; + +--- + +(statement (sum + (number) + (product (number) (number)))) diff --git a/test/fixtures/test_grammars/inline_rules/grammar.json b/test/fixtures/test_grammars/inline_rules/grammar.json new file mode 100644 index 00000000..4438004f --- /dev/null +++ b/test/fixtures/test_grammars/inline_rules/grammar.json @@ -0,0 +1,68 @@ +{ + "name": "inline_rules", + + "extras": [ + {"type": "PATTERN", "value": "\\s"} + ], + + "inline": [ + "expression" + ], + + "rules": { + "statement": { + "type": "SEQ", + "members": [ + {"type": "SYMBOL", "name": "expression"}, + {"type": "STRING", "value": ";"} + ] + }, + + "expression": { + "type": "CHOICE", + "members": [ + {"type": "SYMBOL", "name": "sum"}, + {"type": "SYMBOL", "name": "product"}, + {"type": "SYMBOL", "name": "number"}, + {"type": "SYMBOL", "name": "parenthesized_expression"} + ] + }, + + "parenthesized_expression": { + "type": "SEQ", + "members": [ + {"type": "STRING", "value": "("}, + {"type": "SYMBOL", "name": "expression"}, + {"type": "STRING", "value": ")"} + ] + }, + + "sum": { + "type": "PREC_LEFT", + "value": 0, + "content": { + "type": "SEQ", + "members": [ + {"type": "SYMBOL", "name": "expression"}, + {"type": "STRING", "value": "+"}, + {"type": "SYMBOL", "name": "expression"} + ] + } + }, + + "product": { + "type": "PREC_LEFT", + "value": 2, + "content": { + "type": "SEQ", + "members": [ + {"type": "SYMBOL", "name": "expression"}, + {"type": "STRING", "value": "*"}, + {"type": "SYMBOL", "name": "expression"} + ] + } + }, + + "number": {"type": "PATTERN", "value": "\\d+"} + } +} From 5c8f7c035e7cfa8b65f119114a3a0f181ecf418a Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 12 Jul 2017 09:42:56 -0700 Subject: [PATCH 2/4] Add stream operator for ParseItemSet --- test/helpers/stream_methods.cc | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/test/helpers/stream_methods.cc b/test/helpers/stream_methods.cc index f88ccaee..c8c4eb30 100644 --- a/test/helpers/stream_methods.cc +++ b/test/helpers/stream_methods.cc @@ -180,6 +180,35 @@ ostream &operator<<(ostream &stream, const LexItemSet &item_set) { return stream << item_set.entries; } +ostream &operator<<(ostream &stream, const ParseItem &item) { + stream << "(ParseItem " << item.lhs() << " ->"; + for (size_t i = 0; i < item.production->size(); i++) { + if (i == item.step_index) { + stream << " •"; + } + stream << " " << item.production->at(i).symbol << " " << item.production->at(i).precedence << + " " << (int)item.production->at(i).associativity; + } + + if (item.step_index == item.production->size()) { + stream << " • "; + } + + return stream << ")"; +} + +ostream &operator<<(ostream &stream, const ParseItemSet &item_set) { + return stream << item_set.entries; +} + +ostream &operator<<(ostream &stream, const LookaheadSet &lookaheads) { + if (lookaheads.entries.get()) { + return stream << *lookaheads.entries; + } else { + return stream << "()"; + } +} + ostream &operator<<(ostream &stream, const LexItemSet::Transition &transition) { return stream << "(Transition " << transition.destination << " prec:" << transition.precedence << ")"; } From e4f57d6fee2ce6c7cc68a1502d1e181ee8c0179f Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 12 Jul 2017 10:12:42 -0700 Subject: [PATCH 3/4] Test more cases in fixture grammar with inline rules --- .../test_grammars/inline_rules/corpus.txt | 15 ++++++++++++--- .../test_grammars/inline_rules/grammar.json | 7 +++++++ 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/test/fixtures/test_grammars/inline_rules/corpus.txt b/test/fixtures/test_grammars/inline_rules/corpus.txt index af5e496e..df37566f 100644 --- a/test/fixtures/test_grammars/inline_rules/corpus.txt +++ b/test/fixtures/test_grammars/inline_rules/corpus.txt @@ -3,9 +3,18 @@ Expressions ================================== 1 + 2 * 3; +4 * 5 + 6; +7 * (8 + 9); --- -(statement (sum - (number) - (product (number) (number)))) +(program + (statement (sum + (number) + (product (number) (number)))) + (statement (sum + (product (number) (number)) + (number))) + (statement (product + (number) + (parenthesized_expression (sum (number) (number)))))) diff --git a/test/fixtures/test_grammars/inline_rules/grammar.json b/test/fixtures/test_grammars/inline_rules/grammar.json index 4438004f..7825314b 100644 --- a/test/fixtures/test_grammars/inline_rules/grammar.json +++ b/test/fixtures/test_grammars/inline_rules/grammar.json @@ -10,6 +10,13 @@ ], "rules": { + "program": { + "type": "REPEAT1", + "content": { + "type": "SYMBOL", + "name": "statement" + } + }, "statement": { "type": "SEQ", "members": [ From a3006bc2b58c361295c9786186a38cc782c5b080 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 12 Jul 2017 16:02:01 -0700 Subject: [PATCH 4/4] Represent LookaheadSet using vectors of bool --- .../build_tables/build_parse_table.cc | 12 +-- src/compiler/build_tables/lookahead_set.cc | 98 +++++++++++++++---- src/compiler/build_tables/lookahead_set.h | 37 ++++++- src/compiler/build_tables/parse_item.cc | 7 +- test/helpers/stream_methods.cc | 10 +- 5 files changed, 128 insertions(+), 36 deletions(-) diff --git a/src/compiler/build_tables/build_parse_table.cc b/src/compiler/build_tables/build_parse_table.cc index 872177bf..f61e1271 100644 --- a/src/compiler/build_tables/build_parse_table.cc +++ b/src/compiler/build_tables/build_parse_table.cc @@ -196,7 +196,7 @@ class ParseTableBuilder { ParseAction::Reduce(item.lhs(), item.step_index, *item.production); int precedence = item.precedence(); - for (Symbol lookahead : *lookahead_symbols.entries) { + lookahead_symbols.for_each([&](Symbol lookahead) { ParseTableEntry &entry = parse_table.states[state_id].terminal_entries[lookahead]; // Only add the highest-precedence Reduce actions to the parse table. @@ -223,7 +223,7 @@ class ParseTableBuilder { } } } - } + }); // If the item is unfinished, create a new item by advancing one symbol. // Add that new item to a successor item set. @@ -694,15 +694,15 @@ class ParseTableBuilder { const LookaheadSet &right_tokens = item_set_builder.get_first_set(symbol); if (!left_tokens.empty() && !right_tokens.empty()) { - for (const Symbol &left_symbol : *left_tokens.entries) { + left_tokens.for_each([&](Symbol left_symbol) { if (left_symbol.is_terminal() && !left_symbol.is_built_in()) { - for (const Symbol &right_symbol : *right_tokens.entries) { + right_tokens.for_each([&](Symbol right_symbol) { if (right_symbol.is_terminal() && !right_symbol.is_built_in()) { following_terminals_by_terminal_index[left_symbol.index].insert(right_symbol.index); } - } + }); } - } + }); } } diff --git a/src/compiler/build_tables/lookahead_set.cc b/src/compiler/build_tables/lookahead_set.cc index b9604c24..443ba0cd 100644 --- a/src/compiler/build_tables/lookahead_set.cc +++ b/src/compiler/build_tables/lookahead_set.cc @@ -6,41 +6,103 @@ namespace tree_sitter { namespace build_tables { -using std::set; -using std::make_shared; +using std::vector; using rules::Symbol; -LookaheadSet::LookaheadSet() : entries(nullptr) {} +LookaheadSet::LookaheadSet() {} -LookaheadSet::LookaheadSet(const set &symbols) - : entries(make_shared>(symbols)) {} +LookaheadSet::LookaheadSet(const vector &symbols) { + for (auto symbol : symbols) insert(symbol); +} bool LookaheadSet::empty() const { - return !entries.get() || entries->empty(); + return terminal_bits.empty() && external_bits.empty() && !eof; } bool LookaheadSet::operator==(const LookaheadSet &other) const { - return *entries == *other.entries; + return + eof == other.eof && + external_bits == other.external_bits && + terminal_bits == other.terminal_bits; } bool LookaheadSet::contains(const Symbol &symbol) const { - return entries->find(symbol) != entries->end(); + if (symbol == rules::END_OF_INPUT()) return eof; + auto &bits = symbol.is_external() ? external_bits : terminal_bits; + return bits.size() > symbol.index && bits[symbol.index]; +} + +size_t LookaheadSet::size() const { + size_t result = 0; + for (bool bit : external_bits) if (bit) result++; + for (bool bit : terminal_bits) if (bit) result++; + if (eof) result++; + return result; } bool LookaheadSet::insert_all(const LookaheadSet &other) { - if (!other.entries.get()) - return false; - if (!entries.get()) - entries = make_shared>(); - size_t previous_size = entries->size(); - entries->insert(other.entries->begin(), other.entries->end()); - return entries->size() > previous_size; + bool result = false; + + if (other.eof) { + if (!eof) { + eof = true; + result = true; + } + } + + if (other.external_bits.size() > external_bits.size()) { + external_bits.resize(other.external_bits.size()); + } + + auto iter = external_bits.begin(); + auto other_iter = other.external_bits.begin(); + auto other_end = other.external_bits.end(); + while (other_iter != other_end) { + if (*other_iter && !*iter) { + result = true; + *iter = true; + } + ++iter; + ++other_iter; + } + + if (other.terminal_bits.size() > terminal_bits.size()) { + terminal_bits.resize(other.terminal_bits.size()); + } + + iter = terminal_bits.begin(); + other_iter = other.terminal_bits.begin(); + other_end = other.terminal_bits.end(); + while (other_iter != other_end) { + if (*other_iter && !*iter) { + result = true; + *iter = true; + } + ++iter; + ++other_iter; + } + + return result; } bool LookaheadSet::insert(const Symbol &symbol) { - if (!entries.get()) - entries = make_shared>(); - return entries->insert(symbol).second; + if (symbol == rules::END_OF_INPUT()) { + if (!eof) { + eof = true; + return true; + } + return false; + } + + auto &bits = symbol.is_external() ? external_bits : terminal_bits; + if (bits.size() <= symbol.index) { + bits.resize(symbol.index + 1); + } + if (!bits[symbol.index]) { + bits[symbol.index] = true; + return true; + } + return false; } } // namespace build_tables diff --git a/src/compiler/build_tables/lookahead_set.h b/src/compiler/build_tables/lookahead_set.h index 74cd63e2..d0aa9ee7 100644 --- a/src/compiler/build_tables/lookahead_set.h +++ b/src/compiler/build_tables/lookahead_set.h @@ -1,25 +1,54 @@ #ifndef COMPILER_BUILD_TABLES_LOOKAHEAD_SET_H_ #define COMPILER_BUILD_TABLES_LOOKAHEAD_SET_H_ -#include -#include +#include #include "compiler/rule.h" namespace tree_sitter { namespace build_tables { class LookaheadSet { + std::vector terminal_bits; + std::vector external_bits; + bool eof = false; + public: LookaheadSet(); - explicit LookaheadSet(const std::set &); + explicit LookaheadSet(const std::vector &); bool empty() const; + size_t size() const; bool operator==(const LookaheadSet &) const; bool contains(const rules::Symbol &) const; bool insert_all(const LookaheadSet &); bool insert(const rules::Symbol &); - std::shared_ptr> entries; + template + void for_each(const Callback &callback) const { + for (auto begin = external_bits.begin(), + end = external_bits.end(), + iter = begin; + iter != end; + ++iter) { + if (*iter) { + callback(rules::Symbol::external(iter - begin)); + } + } + + if (eof) { + callback(rules::END_OF_INPUT()); + } + + for (auto begin = terminal_bits.begin(), + end = terminal_bits.end(), + iter = begin; + iter != end; + ++iter) { + if (*iter) { + callback(rules::Symbol::terminal(iter - begin)); + } + } + } }; } // namespace build_tables diff --git a/src/compiler/build_tables/parse_item.cc b/src/compiler/build_tables/parse_item.cc index 2d4257b6..55db646c 100644 --- a/src/compiler/build_tables/parse_item.cc +++ b/src/compiler/build_tables/parse_item.cc @@ -178,9 +178,10 @@ size_t hash::operator()(const ParseItemSet &item_set) const { const auto &lookahead_set = pair.second; hash_combine(&result, item); - hash_combine(&result, lookahead_set.entries->size()); - for (auto index : *pair.second.entries) - hash_combine(&result, index); + hash_combine(&result, lookahead_set.size()); + lookahead_set.for_each([&result](Symbol symbol) { + hash_combine(&result, symbol); + }); } return result; } diff --git a/test/helpers/stream_methods.cc b/test/helpers/stream_methods.cc index c8c4eb30..9b13303c 100644 --- a/test/helpers/stream_methods.cc +++ b/test/helpers/stream_methods.cc @@ -202,11 +202,11 @@ ostream &operator<<(ostream &stream, const ParseItemSet &item_set) { } ostream &operator<<(ostream &stream, const LookaheadSet &lookaheads) { - if (lookaheads.entries.get()) { - return stream << *lookaheads.entries; - } else { - return stream << "()"; - } + stream << "(LookaheadSet"; + lookaheads.for_each([&stream](Symbol symbol) { + stream << " " << symbol; + }); + return stream << ")"; } ostream &operator<<(ostream &stream, const LexItemSet::Transition &transition) {