diff --git a/spec/compiler/build_tables/first_set_spec.cc b/spec/compiler/build_tables/first_set_spec.cc index 931f185e..753bf54b 100644 --- a/spec/compiler/build_tables/first_set_spec.cc +++ b/spec/compiler/build_tables/first_set_spec.cc @@ -40,14 +40,14 @@ describe("computing FIRST sets", []() { i_token(0), i_token(1) }), i_sym(0) }); - + Grammar grammar({ { "rule0", seq({ i_token(2), i_token(3), i_token(4) }) } }); - + AssertThat(first_set(rule, grammar), Equals(set({ ISymbol(0, SymbolOptionToken), ISymbol(2, SymbolOptionToken), @@ -58,7 +58,7 @@ describe("computing FIRST sets", []() { auto rule = seq({ i_sym(0), i_token(1) }); - + Grammar grammar({ { "rule0", choice({ i_token(0), @@ -80,7 +80,7 @@ describe("computing FIRST sets", []() { i_token(11), }) }, }); - + auto rule = i_sym(0); AssertThat(first_set(rule, grammar), Equals(set({ diff --git a/spec/compiler/helpers/rule_helpers.cc b/spec/compiler/helpers/rule_helpers.cc index c1000b05..4ff03be1 100644 --- a/spec/compiler/helpers/rule_helpers.cc +++ b/spec/compiler/helpers/rule_helpers.cc @@ -17,19 +17,19 @@ namespace tree_sitter { else return CharacterSet(ranges).complement().copy(); } - + rule_ptr i_sym(size_t index) { return make_shared(index); } - + rule_ptr i_aux_sym(size_t index) { return make_shared(index, SymbolOptionAuxiliary); } - + rule_ptr i_token(size_t index) { return make_shared(index, SymbolOptionToken); } - + rule_ptr i_aux_token(size_t index) { return make_shared(index, SymbolOption(SymbolOptionAuxiliary|SymbolOptionToken)); } diff --git a/spec/compiler/prepare_grammar/expand_repeats_spec.cc b/spec/compiler/prepare_grammar/expand_repeats_spec.cc index d39066ac..f8924dd0 100644 --- a/spec/compiler/prepare_grammar/expand_repeats_spec.cc +++ b/spec/compiler/prepare_grammar/expand_repeats_spec.cc @@ -13,7 +13,7 @@ describe("expanding repeat rules in a grammar", []() { PreparedGrammar grammar({ { "rule0", repeat(i_token(0)) }, }, {}); - + AssertThat(expand_repeats(grammar), Equals(PreparedGrammar({ { "rule0", i_aux_sym(0) }, }, { @@ -24,12 +24,12 @@ describe("expanding repeat rules in a grammar", []() { blank() }) }, }))); }); - + it("replaces repeats inside of sequences", [&]() { PreparedGrammar grammar({ { "rule0", seq({ i_token(10), repeat(i_token(11)) }) }, }, {}); - + AssertThat(expand_repeats(grammar), Equals(PreparedGrammar({ { "rule0", seq({ i_token(10), i_aux_sym(0) }) }, }, { @@ -38,12 +38,12 @@ describe("expanding repeat rules in a grammar", []() { blank() }) }, }))); }); - + it("replaces repeats inside of choices", [&]() { PreparedGrammar grammar({ { "rule0", choice({ i_token(10), repeat(i_token(11)) }) }, }, {}); - + AssertThat(expand_repeats(grammar), Equals(PreparedGrammar({ { "rule0", choice({ i_token(10), i_aux_sym(0) }) }, }, { @@ -52,12 +52,12 @@ describe("expanding repeat rules in a grammar", []() { blank() }) }, }))); }); - + it("can replace multiple repeats in the same rule", [&]() { PreparedGrammar grammar({ { "rule0", seq({ repeat(i_token(10)), repeat(i_token(11)) }) }, }, {}); - + AssertThat(expand_repeats(grammar), Equals(PreparedGrammar({ { "rule0", seq({ i_aux_sym(0), i_aux_sym(1) }) }, }, { @@ -73,13 +73,13 @@ describe("expanding repeat rules in a grammar", []() { blank() }) }, }))); }); - + it("can replace repeats in multiple rules", [&]() { PreparedGrammar grammar({ { "rule0", repeat(i_token(10)) }, { "rule1", repeat(i_token(11)) }, }, {}); - + AssertThat(expand_repeats(grammar), Equals(PreparedGrammar({ { "rule0", i_aux_sym(0) }, { "rule1", i_aux_sym(1) }, diff --git a/spec/compiler/prepare_grammar/extract_tokens_spec.cc b/spec/compiler/prepare_grammar/extract_tokens_spec.cc index 059f8ca1..a13e37b8 100644 --- a/spec/compiler/prepare_grammar/extract_tokens_spec.cc +++ b/spec/compiler/prepare_grammar/extract_tokens_spec.cc @@ -13,39 +13,39 @@ describe("extracting tokens from a grammar", []() { pair result = extract_tokens(PreparedGrammar({ { "rule0", seq({ str("ab"), i_sym(0) }) } }, {})); - + AssertThat(result.first, Equals(PreparedGrammar({ { "rule0", seq({ i_aux_token(0), i_sym(0) }) } }, {}))); - + AssertThat(result.second, Equals(PreparedGrammar({}, { { "token0", str("ab") }, }))); }); - + it("moves patterns into the lexical grammar", [&]() { pair result = extract_tokens(PreparedGrammar({ { "rule0", seq({ pattern("a+"), i_sym(0) }) } }, {})); - + AssertThat(result.first, Equals(PreparedGrammar({ { "rule0", seq({ i_aux_token(0), i_sym(0) }) } }, {}))); - + AssertThat(result.second, Equals(PreparedGrammar({}, { { "token0", pattern("a+") }, }))); }); - + it("does not extract blanks into tokens", [&]() { pair result = extract_tokens(Grammar({ { "rule1", choice({ i_sym(0), blank() }) }, })); - + AssertThat(result.first, Equals(PreparedGrammar({ { "rule1", choice({ i_sym(0), blank() }) }, }, {}))); - + AssertThat(result.second, Equals(PreparedGrammar({}, {}))); }); @@ -53,43 +53,43 @@ describe("extracting tokens from a grammar", []() { pair result = extract_tokens(PreparedGrammar({ { "rule0", seq({ str("ab"), i_sym(0), str("ab") }) }, }, {})); - + AssertThat(result.first, Equals(PreparedGrammar({ { "rule0", seq({ i_aux_token(0), i_sym(0), i_aux_token(0) }) } }, {}))); - + AssertThat(result.second, Equals(PreparedGrammar({}, { { "token0", str("ab") }, }))); }); - + it("moves entire rules into the lexical grammar when possible, updating referencing symbols", [&]() { auto result = extract_tokens(PreparedGrammar({ { "rule0", i_sym(1) }, { "rule1", pattern("a|b") }, }, {})); - + AssertThat(result.first, Equals(PreparedGrammar({ { "rule0", i_token(0) } }, {}))); - + AssertThat(result.second, Equals(PreparedGrammar({ { "rule1", pattern("a|b") }, }, {}))); }); - + it("updates symbols whose indices need to change due to deleted rules", [&]() { auto result = extract_tokens(PreparedGrammar({ { "rule0", str("ab") }, { "rule1", i_sym(0) }, { "rule2", i_sym(1) }, }, {})); - + AssertThat(result.first, Equals(PreparedGrammar({ { "rule1", i_token(0) }, { "rule2", i_sym(0) }, }, {}))); - + AssertThat(result.second, Equals(PreparedGrammar({ { "rule0", str("ab") }, }, {}))); diff --git a/spec/compiler/prepare_grammar/intern_symbols_spec.cc b/spec/compiler/prepare_grammar/intern_symbols_spec.cc index 6fd1164a..baee0563 100644 --- a/spec/compiler/prepare_grammar/intern_symbols_spec.cc +++ b/spec/compiler/prepare_grammar/intern_symbols_spec.cc @@ -18,7 +18,7 @@ describe("interning symbols in a grammar", []() { }); auto result = intern_symbols(grammar); - + AssertThat((bool)result.second, IsFalse()); AssertThat(result.first, Equals(PreparedGrammar({ { "x", choice({ i_sym(1), i_sym(2) }) }, @@ -26,15 +26,15 @@ describe("interning symbols in a grammar", []() { { "z", str("stuff") }, }, {}))); }); - + describe("when there are symbols that reference undefined rules", [&]() { it("returns an error", []() { Grammar grammar({ { "x", sym("y") }, }); - + auto result = intern_symbols(grammar); - + AssertThat(result.second->message(), Equals("Undefined rule 'y'")); }); }); diff --git a/spec/compiler/rules/pattern_spec.cc b/spec/compiler/rules/pattern_spec.cc index b4cb5546..5947269b 100644 --- a/spec/compiler/rules/pattern_spec.cc +++ b/spec/compiler/rules/pattern_spec.cc @@ -17,7 +17,7 @@ describe("parsing regex pattern rules", []() { character({ 'c' }) }))); }); - + it("parses wildcard '.' characters", [&]() { Pattern rule("."); AssertThat( @@ -121,7 +121,7 @@ describe("parsing regex pattern rules", []() { character({ '(' }), character({ 'b' }) }))); - + Pattern rule2("a\\."); AssertThat( rule2.to_rule_tree(), diff --git a/src/compiler/generate_code/c_code.cc b/src/compiler/generate_code/c_code.cc index 108a6ccd..2ae1325c 100644 --- a/src/compiler/generate_code/c_code.cc +++ b/src/compiler/generate_code/c_code.cc @@ -83,11 +83,11 @@ namespace tree_sitter { } private: - + const PreparedGrammar & grammar_for_symbol(const rules::ISymbol &symbol) { return symbol.is_token() ? lexical_grammar : syntax_grammar; } - + string symbol_id(const rules::ISymbol &symbol) { if (symbol.is_built_in()) { return (symbol == rules::ERROR()) ? @@ -101,7 +101,7 @@ namespace tree_sitter { return "ts_sym_" + name; } } - + string symbol_name(const rules::ISymbol &symbol) { if (symbol.is_built_in()) { return (symbol == rules::ERROR()) ? "error" : "end"; diff --git a/src/compiler/generate_code/c_code.h b/src/compiler/generate_code/c_code.h index 30e27b58..fc8530ed 100644 --- a/src/compiler/generate_code/c_code.h +++ b/src/compiler/generate_code/c_code.h @@ -8,7 +8,7 @@ namespace tree_sitter { class PreparedGrammar; - + namespace generate_code { std::string c_code(std::string name, const ParseTable &parse_table, diff --git a/src/compiler/generate_code/token_description.cc b/src/compiler/generate_code/token_description.cc index 3dea4ebc..25bc828e 100644 --- a/src/compiler/generate_code/token_description.cc +++ b/src/compiler/generate_code/token_description.cc @@ -6,18 +6,18 @@ namespace tree_sitter { using std::string; - + namespace generate_code { class TokenDescription : public rules::RuleFn { string apply_to(const rules::Pattern *rule) { return "/" + rule->value + "/"; } - + string apply_to(const rules::String *rule) { return "'" + rule->value + "'"; } }; - + std::string token_description(const rules::rule_ptr &rule) { return TokenDescription().apply(rule); } diff --git a/src/compiler/prepare_grammar/extract_tokens.cc b/src/compiler/prepare_grammar/extract_tokens.cc index 97578491..11fdb615 100644 --- a/src/compiler/prepare_grammar/extract_tokens.cc +++ b/src/compiler/prepare_grammar/extract_tokens.cc @@ -29,20 +29,20 @@ namespace tree_sitter { bool apply_to(const rules::String *rule) { return true; } bool apply_to(const rules::Pattern *rule) { return true; } }; - + class SymbolInliner : public rules::IdentityRuleFn { map replacements; using rules::IdentityRuleFn::apply_to; - + int new_index_for_symbol(const ISymbol &symbol) { int result = symbol.index; for (const auto &pair : replacements) - if (pair.first.index < symbol.index && + if (pair.first.index < symbol.index && pair.first.is_auxiliary() == symbol.is_auxiliary()) result--; return result; } - + rule_ptr apply_to(const ISymbol *rule) { auto replacement_pair = replacements.find(*rule); if (replacement_pair != replacements.end()) @@ -52,7 +52,7 @@ namespace tree_sitter { else return make_shared(new_index_for_symbol(*rule), rule->options); } - + public: SymbolInliner(const map &replacements, size_t rule_count, size_t aux_rule_count) : replacements(replacements) @@ -78,7 +78,7 @@ namespace tree_sitter { return result; } } - + public: vector> tokens; }; diff --git a/src/compiler/prepare_grammar/intern_symbols.cc b/src/compiler/prepare_grammar/intern_symbols.cc index 43eef891..86bdbb70 100644 --- a/src/compiler/prepare_grammar/intern_symbols.cc +++ b/src/compiler/prepare_grammar/intern_symbols.cc @@ -15,23 +15,23 @@ namespace tree_sitter { using std::exception; GrammarError::GrammarError(string rule_name) : rule_name(rule_name) {} - + string GrammarError::message() const { return "Undefined rule '" + rule_name + "'"; } - + namespace prepare_grammar { class InternSymbols : public rules::IdentityRuleFn { const Grammar grammar; using rules::IdentityRuleFn::apply_to; - + long index_of(string rule_name) { for (size_t i = 0; i < grammar.rules.size(); i++) if (grammar.rules[i].first == rule_name) return i; return -1; } - + rule_ptr apply_to(const rules::Symbol *rule) { long index = index_of(rule->name); if (index == -1) @@ -44,7 +44,7 @@ namespace tree_sitter { string missing_rule_name; }; - + pair intern_symbols(const Grammar &grammar) { InternSymbols interner(grammar); vector> rules; @@ -58,7 +58,7 @@ namespace tree_sitter { }; rules.push_back({ pair.first, new_rule }); } - + return { PreparedGrammar(rules), nullptr }; } } diff --git a/src/compiler/prepare_grammar/intern_symbols.h b/src/compiler/prepare_grammar/intern_symbols.h index 11c9ba03..d0a12f4d 100644 --- a/src/compiler/prepare_grammar/intern_symbols.h +++ b/src/compiler/prepare_grammar/intern_symbols.h @@ -7,7 +7,7 @@ namespace tree_sitter { class Grammar; class PreparedGrammar; - + class GrammarError { std::string rule_name; public: diff --git a/src/compiler/prepared_grammar.cc b/src/compiler/prepared_grammar.cc index 1e0b04dd..a953ee0b 100644 --- a/src/compiler/prepared_grammar.cc +++ b/src/compiler/prepared_grammar.cc @@ -22,13 +22,13 @@ namespace tree_sitter { const rule_ptr & PreparedGrammar::rule(const ISymbol &symbol) const { return symbol.is_auxiliary() ? - aux_rules[symbol.index].second : + aux_rules[symbol.index].second : rules[symbol.index].second; } - + const string & PreparedGrammar::rule_name(const ISymbol &symbol) const { return symbol.is_auxiliary() ? - aux_rules[symbol.index].first : + aux_rules[symbol.index].first : rules[symbol.index].first; } diff --git a/src/compiler/rules/interned_symbol.cc b/src/compiler/rules/interned_symbol.cc index d7007229..1cd2d0c7 100644 --- a/src/compiler/rules/interned_symbol.cc +++ b/src/compiler/rules/interned_symbol.cc @@ -7,16 +7,16 @@ namespace tree_sitter { using std::string; using std::to_string; using std::hash; - + namespace rules { ISymbol::ISymbol(int index) : index(index), options(SymbolOption(0)) {} - + ISymbol::ISymbol(int index, SymbolOption options) : index(index), options(options) {} - + bool ISymbol::operator==(const ISymbol &other) const { return (other.index == index) && (other.options == options); } @@ -25,39 +25,39 @@ namespace tree_sitter { const ISymbol *other = dynamic_cast(&rule); return other && this->operator==(*other); } - + size_t ISymbol::hash_code() const { return hash()(index) ^ hash()(options); } - + rule_ptr ISymbol::copy() const { return std::make_shared(*this); } - + string ISymbol::to_string() const { string name = (options & SymbolOptionAuxiliary) ? "aux_" : ""; name += (options & SymbolOptionToken) ? "token" : "sym"; return "#<" + name + std::to_string(index) + ">"; } - + bool ISymbol::operator<(const ISymbol &other) const { if (options < other.options) return true; if (options > other.options) return false; return (index < other.index); } - + bool ISymbol::is_token() const { return options & SymbolOptionToken; } - + bool ISymbol::is_built_in() const { return index < 0; } - + bool ISymbol::is_auxiliary() const { return options & SymbolOptionAuxiliary; } - + void ISymbol::accept(Visitor *visitor) const { visitor->visit(this); } diff --git a/src/compiler/rules/interned_symbol.h b/src/compiler/rules/interned_symbol.h index 6028fbeb..03e65c1e 100644 --- a/src/compiler/rules/interned_symbol.h +++ b/src/compiler/rules/interned_symbol.h @@ -9,25 +9,25 @@ namespace tree_sitter { SymbolOptionToken = 1 << 0, SymbolOptionAuxiliary = 1 << 1, } SymbolOption; - + class ISymbol : public Rule { public: explicit ISymbol(int index); ISymbol(int index, SymbolOption options); - + bool operator==(const ISymbol &other) const; bool operator==(const Rule &other) const; - + size_t hash_code() const; rule_ptr copy() const; std::string to_string() const; void accept(Visitor *visitor) const; bool operator<(const ISymbol &other) const; - + bool is_token() const; bool is_built_in() const; bool is_auxiliary() const; - + int index; SymbolOption options; };