diff --git a/spec/compiler/build_tables/build_parse_table_spec.cc b/spec/compiler/build_tables/build_parse_table_spec.cc index c67a9418..dd670799 100644 --- a/spec/compiler/build_tables/build_parse_table_spec.cc +++ b/spec/compiler/build_tables/build_parse_table_spec.cc @@ -15,20 +15,32 @@ describe("build_parse_table", []() { { "rule0", { - Production({ {Symbol(1), 0, 1} }, 0), - Production({ {Symbol(2), 0, 2} }, 0) + Production({ + {Symbol(1), 0, 1}, + {rules::NONE(), 0, 5} + }), + Production({ + {Symbol(2), 0, 2}, + {rules::NONE(), 0, 6} + }) } }, { "rule1", { - Production({ {Symbol(0, SymbolOptionToken), 0, 3} }, 0) + Production({ + {Symbol(0, SymbolOptionToken), 0, 3}, + {rules::NONE(), 0, 7} + }) } }, { "rule2", { - Production({ {Symbol(1, SymbolOptionToken), 0, 4} }, 0) + Production({ + {Symbol(1, SymbolOptionToken), 0, 4}, + {rules::NONE(), 0, 8} + }) } }, }, {}, { Symbol(2, SymbolOptionToken) }); diff --git a/spec/compiler/build_tables/item_set_closure_spec.cc b/spec/compiler/build_tables/item_set_closure_spec.cc index 07d9aab8..4f395c28 100644 --- a/spec/compiler/build_tables/item_set_closure_spec.cc +++ b/spec/compiler/build_tables/item_set_closure_spec.cc @@ -2,6 +2,7 @@ #include "compiler/syntax_grammar.h" #include "compiler/build_tables/item_set_closure.h" #include "compiler/build_tables/item_set_transitions.h" +#include "compiler/rules/built_in_symbols.h" using namespace build_tables; using namespace rules; @@ -15,8 +16,9 @@ describe("item_set_closure", []() { { Production({ {Symbol(1), 0, 100}, - {Symbol(11, SymbolOptionToken), 0, 101} - }, 107), + {Symbol(11, SymbolOptionToken), 0, 101}, + {rules::NONE(), 0, 107} + }), } }, { @@ -24,11 +26,13 @@ describe("item_set_closure", []() { { Production({ {Symbol(12, SymbolOptionToken), 0, 102}, - {Symbol(13, SymbolOptionToken), 0, 103} - }, 108), + {Symbol(13, SymbolOptionToken), 0, 103}, + {rules::NONE(), 0, 108} + }), Production({ {Symbol(2), 0, 104}, - }, 109) + {rules::NONE(), 0, 109} + }) } }, { @@ -36,8 +40,9 @@ describe("item_set_closure", []() { { Production({ {Symbol(14, SymbolOptionToken), 0, 105}, - {Symbol(15, SymbolOptionToken), 0, 106} - }, 110) + {Symbol(15, SymbolOptionToken), 0, 106}, + {rules::NONE(), 0, 110} + }) } }, }, {}, set()); diff --git a/spec/compiler/build_tables/item_set_transitions_spec.cc b/spec/compiler/build_tables/item_set_transitions_spec.cc index 05df4c44..64fe6d5a 100644 --- a/spec/compiler/build_tables/item_set_transitions_spec.cc +++ b/spec/compiler/build_tables/item_set_transitions_spec.cc @@ -52,14 +52,14 @@ describe("sym_transitions(ParseItemSet, InitialSyntaxGrammar)", [&]() { {Symbol(13, SymbolOptionToken), 0, 103}, {Symbol(1), 0, 104}, {Symbol(14, SymbolOptionToken), 0, 105}, - }, 1) + }) }, }, { "B", { Production({ {Symbol(15, SymbolOptionToken), 0, 106}, - }, 2) + }) }, } }, {}, set()); diff --git a/spec/compiler/prepare_grammar/flatten_grammar_spec.cc b/spec/compiler/prepare_grammar/flatten_grammar_spec.cc index 99872010..988cc45b 100644 --- a/spec/compiler/prepare_grammar/flatten_grammar_spec.cc +++ b/spec/compiler/prepare_grammar/flatten_grammar_spec.cc @@ -3,6 +3,7 @@ #include "compiler/prepare_grammar/initial_syntax_grammar.h" #include "compiler/syntax_grammar.h" #include "compiler/helpers/containers.h" +#include "compiler/rules/built_in_symbols.h" START_TEST @@ -48,15 +49,15 @@ describe("flatten_grammar", []() { AssertThat( get_symbol_lists(0), Equals(vector>({ - { Symbol(1), Symbol(2), Symbol(4) }, - { Symbol(1), Symbol(3), Symbol(4) } + { Symbol(1), Symbol(2), Symbol(4), rules::NONE() }, + { Symbol(1), Symbol(3), Symbol(4), rules::NONE() } }))); AssertThat( get_symbol_lists(1), Equals(vector>({ - { Symbol(1), Symbol(2), Symbol(3), Symbol(4), Symbol(6), Symbol(7) }, - { Symbol(1), Symbol(2), Symbol(5), Symbol(6), Symbol(7) } + { Symbol(1), Symbol(2), Symbol(3), Symbol(4), Symbol(6), Symbol(7), rules::NONE() }, + { Symbol(1), Symbol(2), Symbol(5), Symbol(6), Symbol(7), rules::NONE() } }))); }); @@ -74,15 +75,15 @@ describe("flatten_grammar", []() { AssertThat( get_precedence_lists(0), Equals(vector>({ - { 0, 0, 0 }, - { 0, 0, 0 } + { 0, 0, 0, 0 }, + { 0, 0, 0, 0 } }))); AssertThat( get_precedence_lists(1), Equals(vector>({ - { 0, 0, 50, 100, 50, 0 }, - { 0, 0, 50, 50, 0 } + { 0, 0, 50, 100, 50, 0, 0 }, + { 0, 0, 50, 50, 0, 0 } }))); }); @@ -90,7 +91,7 @@ describe("flatten_grammar", []() { SyntaxGrammar grammar = flatten_grammar(input_grammar); auto rule_id = [&](int rule_index, int production_index, int symbol_index) { - return grammar.rules[rule_index].second[production_index].rule_id_at(symbol_index); + return grammar.rules[rule_index].second[production_index][symbol_index].rule_id; }; // Rule 1: last symbol is the same for both productions. diff --git a/src/compiler/build_tables/build_parse_table.cc b/src/compiler/build_tables/build_parse_table.cc index 8d386064..a3001b42 100644 --- a/src/compiler/build_tables/build_parse_table.cc +++ b/src/compiler/build_tables/build_parse_table.cc @@ -176,11 +176,12 @@ class ParseTableBuilder { } bool item_is_done(const ParseItem &item) { - return item.consumed_symbol_count == grammar.productions(item.lhs)[item.production_index].size(); + return item.consumed_symbol_count == + grammar.productions(item.lhs)[item.production_index].symbol_count(); } int item_precedence(const ParseItem &item) { - return grammar.productions(item.lhs)[item.production_index].precedence_at(item.consumed_symbol_count - 1); + return grammar.productions(item.lhs)[item.production_index][item.consumed_symbol_count - 1].precedence; } void record_conflict(const Symbol &sym, const ParseAction &left, diff --git a/src/compiler/build_tables/item_set_closure.cc b/src/compiler/build_tables/item_set_closure.cc index 6f22b7d3..f2a431a9 100644 --- a/src/compiler/build_tables/item_set_closure.cc +++ b/src/compiler/build_tables/item_set_closure.cc @@ -35,18 +35,18 @@ void item_set_closure(ParseItemSet *item_set, const SyntaxGrammar &grammar) { continue; const Production &item_production = grammar.productions(item.lhs)[item.production_index]; - if (item_production.size() <= item.consumed_symbol_count) + if (item.consumed_symbol_count >= item_production.symbol_count()) continue; - Symbol symbol = item_production.symbol_at(item.consumed_symbol_count); + Symbol symbol = item_production[item.consumed_symbol_count].symbol; if (symbol.is_token() || symbol.is_built_in()) continue; set next_lookahead_symbols; - if (item.consumed_symbol_count + 1 >= item_production.size()) { + if (item.consumed_symbol_count + 1 >= item_production.symbol_count()) { next_lookahead_symbols = lookahead_symbols; } else { - vector symbols_to_process({ item_production.symbol_at(item.consumed_symbol_count + 1) }); + vector symbols_to_process({ item_production[item.consumed_symbol_count + 1].symbol }); while (!symbols_to_process.empty()) { Symbol following_symbol = symbols_to_process.back(); @@ -55,14 +55,14 @@ void item_set_closure(ParseItemSet *item_set, const SyntaxGrammar &grammar) { continue; for (const auto &production : grammar.productions(following_symbol)) - symbols_to_process.push_back(production.symbol_at(0)); + symbols_to_process.push_back(production[0].symbol); } } size_t i = 0; for (const Production &production : grammar.productions(symbol)) { items_to_process.push_back({ - ParseItem(symbol, i, production.rule_id_at(0), 0), + ParseItem(symbol, i, production[0].rule_id, 0), next_lookahead_symbols }); i++; diff --git a/src/compiler/build_tables/item_set_transitions.cc b/src/compiler/build_tables/item_set_transitions.cc index 15032028..f1f3abc9 100644 --- a/src/compiler/build_tables/item_set_transitions.cc +++ b/src/compiler/build_tables/item_set_transitions.cc @@ -23,11 +23,11 @@ map sym_transitions(const ParseItemSet &item_set, const ParseItem &item = pair.first; const set &lookahead_symbols = pair.second; const Production &production = grammar.productions(item.lhs)[item.production_index]; - if (production.size() <= item.consumed_symbol_count) + if (item.consumed_symbol_count >= production.symbol_count()) continue; - const Symbol &symbol = production.symbol_at(item.consumed_symbol_count); - int rule_id = production.rule_id_at(item.consumed_symbol_count + 1); + const Symbol &symbol = production[item.consumed_symbol_count].symbol; + int rule_id = production[item.consumed_symbol_count + 1].rule_id; ParseItem new_item(item.lhs, item.production_index, rule_id, item.consumed_symbol_count + 1); result[symbol][new_item].insert(lookahead_symbols.begin(), lookahead_symbols.end()); diff --git a/src/compiler/prepare_grammar/flatten_grammar.cc b/src/compiler/prepare_grammar/flatten_grammar.cc index cc368d25..1c2b82b3 100644 --- a/src/compiler/prepare_grammar/flatten_grammar.cc +++ b/src/compiler/prepare_grammar/flatten_grammar.cc @@ -5,6 +5,7 @@ #include "compiler/rules/seq.h" #include "compiler/rules/symbol.h" #include "compiler/rules/metadata.h" +#include "compiler/rules/built_in_symbols.h" #include #include @@ -63,16 +64,16 @@ class FlattenRule : public rules::RuleFn { Production flatten_rule(const rule_ptr &rule) { FlattenRule flattener; flattener.apply(rule); - return Production(flattener.entries, 0); + int end_precedence = flattener.entries.back().precedence; + flattener.entries.push_back({ rules::NONE(), end_precedence, 0 }); + return Production(flattener.entries); } struct ProductionSlice { vector::const_iterator start; vector::const_iterator end; - int end_precedence; bool operator==(const ProductionSlice &other) const { - if (end_precedence != other.end_precedence) return false; if (end - start != other.end - other.start) return false; for (auto iter1 = start, iter2 = other.start; iter1 != end; ++iter1, ++iter2) if (!(iter1->symbol == iter2->symbol) || iter1->precedence != iter2->precedence) @@ -82,11 +83,10 @@ struct ProductionSlice { }; void assign_rule_ids(Production *production, vector *unique_slices) { - auto &entries = production->entries; - auto end = entries.end(); + auto end = production->entries.end(); - for (auto iter = entries.begin(); iter != end; ++iter) { - ProductionSlice slice{iter, end, 0}; + for (auto iter = production->entries.begin(); iter != end; ++iter) { + ProductionSlice slice{iter, end}; auto existing_id = find(unique_slices->cbegin(), unique_slices->cend(), slice); if (existing_id == unique_slices->end()) { unique_slices->push_back(slice); @@ -95,15 +95,6 @@ void assign_rule_ids(Production *production, vector *unique_sli iter->rule_id = existing_id - unique_slices->cbegin(); } } - - ProductionSlice slice{end, end, production->precedence_at(production->size() - 1)}; - auto existing_id = find(unique_slices->cbegin(), unique_slices->cend(), slice); - if (existing_id == unique_slices->end()) { - unique_slices->push_back(slice); - production->end_rule_id = unique_slices->size() - 1; - } else { - production->end_rule_id = existing_id - unique_slices->cbegin(); - } } SyntaxGrammar flatten_grammar(const InitialSyntaxGrammar &grammar) { @@ -126,7 +117,7 @@ SyntaxGrammar flatten_grammar(const InitialSyntaxGrammar &grammar) { if (rules.empty()) { rules.push_back({ "START", - { Production({ {rules::Symbol(0, rules::SymbolOptionToken), 0, 0} }, 0) } + { Production({ {rules::Symbol(0, rules::SymbolOptionToken), 0, 0} }) } }); } diff --git a/src/compiler/rules/built_in_symbols.cc b/src/compiler/rules/built_in_symbols.cc index 7a648a3d..4ca355a2 100644 --- a/src/compiler/rules/built_in_symbols.cc +++ b/src/compiler/rules/built_in_symbols.cc @@ -7,6 +7,7 @@ Symbol END_OF_INPUT() { return Symbol(-1, SymbolOptionToken); } Symbol ERROR() { return Symbol(-2, SymbolOptionToken); } Symbol START() { return Symbol(-3); } Symbol DOCUMENT() { return Symbol(-4); } +Symbol NONE() { return Symbol(-5); } } // namespace rules } // namespace tree_sitter diff --git a/src/compiler/rules/built_in_symbols.h b/src/compiler/rules/built_in_symbols.h index 63ad3df4..640f99db 100644 --- a/src/compiler/rules/built_in_symbols.h +++ b/src/compiler/rules/built_in_symbols.h @@ -10,6 +10,7 @@ Symbol ERROR(); Symbol START(); Symbol END_OF_INPUT(); Symbol DOCUMENT(); +Symbol NONE(); } // namespace rules } // namespace tree_sitter diff --git a/src/compiler/syntax_grammar.cc b/src/compiler/syntax_grammar.cc index 7a237cdd..82798309 100644 --- a/src/compiler/syntax_grammar.cc +++ b/src/compiler/syntax_grammar.cc @@ -14,7 +14,7 @@ using std::vector; using std::set; static const vector START_PRODUCTIONS({ - Production({ {rules::Symbol(0), 0, -1} }, 2) + Production({ {rules::Symbol(0), 0, -1}, { rules::NONE(), 0, -2} }) }); static const vector NO_PRODUCTIONS({}); @@ -24,29 +24,14 @@ bool ProductionEntry::operator==(const ProductionEntry &other) const { rule_id == other.rule_id; } -Production::Production(const vector &entries, int last_rule_id) : - entries(entries), end_rule_id(last_rule_id) {} +Production::Production(const vector &entries) : entries(entries) {} -int Production::precedence_at(size_t index) const { - if (index >= size()) - return 0; - else - return entries[index].precedence; +size_t Production::symbol_count() const { + return entries.size() - 1; } -int Production::rule_id_at(size_t index) const { - if (index >= size()) - return end_rule_id; - else - return entries[index].rule_id; -} - -const rules::Symbol &Production::symbol_at(size_t index) const { - return entries[index].symbol; -} - -size_t Production::size() const { - return entries.size(); +const ProductionEntry &Production::operator[](int i) const { + return entries[i]; } SyntaxGrammar::SyntaxGrammar() {} @@ -87,8 +72,7 @@ std::ostream &operator<<(std::ostream &stream, const Production &production) { stream << entry; started = true; } - return stream << string(") end_rule_id: ") << - to_string(production.end_rule_id) << string(")"); + return stream << string(")"); } } // namespace tree_sitter diff --git a/src/compiler/syntax_grammar.h b/src/compiler/syntax_grammar.h index d0f2d157..e192309c 100644 --- a/src/compiler/syntax_grammar.h +++ b/src/compiler/syntax_grammar.h @@ -18,15 +18,12 @@ struct ProductionEntry { bool operator==(const ProductionEntry &) const; }; -class Production { -public: +struct Production { + Production(); + Production(const std::vector &); + size_t symbol_count() const; + const ProductionEntry &operator[](int) const; std::vector entries; - int end_rule_id; - Production(const std::vector &, int); - size_t size() const; - const rules::Symbol &symbol_at(size_t) const; - int precedence_at(size_t) const; - int rule_id_at(size_t) const; }; std::ostream &operator<<(std::ostream &, const ProductionEntry &);