diff --git a/examples/grammars/javascript.cc b/examples/grammars/javascript.cc index afafcd6b..e229bd5b 100644 --- a/examples/grammars/javascript.cc +++ b/examples/grammars/javascript.cc @@ -191,5 +191,7 @@ namespace tree_sitter_examples { { "null", keyword("null") }, { "true", keyword("true") }, { "false", keyword("false") }, - }).ubiquitous_tokens({ "comment" }); + }) + .ubiquitous_tokens({ "comment" }) + .separators({ ' ', '\t', '\r' }); } diff --git a/include/tree_sitter/compiler.h b/include/tree_sitter/compiler.h index f2ea2956..24a1875a 100644 --- a/include/tree_sitter/compiler.h +++ b/include/tree_sitter/compiler.h @@ -30,6 +30,7 @@ namespace tree_sitter { protected: const std::vector> rules_; std::vector ubiquitous_tokens_; + std::vector separators_; public: Grammar(const std::vector> &rules); @@ -37,9 +38,11 @@ namespace tree_sitter { std::string start_rule_name() const; const rules::rule_ptr rule(const std::string &name) const; - const std::vector & ubiquitous_tokens() const; - const Grammar & ubiquitous_tokens(const std::vector &ubiquitous_tokens); const std::vector> & rules() const; + const std::vector & ubiquitous_tokens() const; + Grammar & ubiquitous_tokens(const std::vector &ubiquitous_tokens); + const std::vector & separators() const; + Grammar & separators(const std::vector &separators); }; struct Conflict { diff --git a/spec/compiler/build_tables/build_parse_table_spec.cc b/spec/compiler/build_tables/build_parse_table_spec.cc index 5a8277fc..720b55d1 100644 --- a/spec/compiler/build_tables/build_parse_table_spec.cc +++ b/spec/compiler/build_tables/build_parse_table_spec.cc @@ -10,16 +10,16 @@ using namespace build_tables; START_TEST describe("building parse tables", []() { - auto parse_grammar = PreparedGrammar({ + SyntaxGrammar parse_grammar({ { "rule0", choice({ i_sym(1), i_sym(2) }) }, { "rule1", i_token(0) }, { "rule2", i_token(1) }, - }, {}).ubiquitous_tokens({ Symbol(2, SymbolOptionToken) }); + }, {}, { Symbol(2, SymbolOptionToken) }); - PreparedGrammar lex_grammar({ + LexicalGrammar lex_grammar({ { "token0", pattern("[a-c]") }, { "token1", pattern("[b-d]") }, - }, {}); + }, {}, {}); it("first looks for the start rule and its item set closure", [&]() { auto result = build_parse_table(parse_grammar, lex_grammar); diff --git a/spec/compiler/build_tables/conflict_manager_spec.cc b/spec/compiler/build_tables/conflict_manager_spec.cc index 358a7c51..ea91a0db 100644 --- a/spec/compiler/build_tables/conflict_manager_spec.cc +++ b/spec/compiler/build_tables/conflict_manager_spec.cc @@ -1,6 +1,7 @@ #include "compiler/compiler_spec_helper.h" #include "compiler/build_tables/parse_conflict_manager.h" #include "compiler/build_tables/lex_conflict_manager.h" +#include "compiler/prepared_grammar.h" using namespace rules; using namespace build_tables; @@ -10,16 +11,16 @@ START_TEST describe("resolving parse conflicts", []() { bool update; - PreparedGrammar parse_grammar({ + SyntaxGrammar parse_grammar({ { "rule1", seq({ sym("rule2"), sym("token2") }) }, { "rule2", sym("token1") }, - }, {}); + }, {}, {}); - PreparedGrammar lex_grammar({ + LexicalGrammar lex_grammar({ { "token1", pattern("[a-c]") }, { "token2", pattern("[b-d]") }, { "token3", keyword("stuff") }, - }, {}); + }, {}, {}); describe("lexical conflicts", [&]() { Symbol sym1(0, SymbolOptionToken); diff --git a/spec/compiler/build_tables/first_set_spec.cc b/spec/compiler/build_tables/first_set_spec.cc index 187705cc..eb30592b 100644 --- a/spec/compiler/build_tables/first_set_spec.cc +++ b/spec/compiler/build_tables/first_set_spec.cc @@ -10,7 +10,7 @@ using namespace rules; START_TEST describe("computing FIRST sets", []() { - const PreparedGrammar null_grammar({}, {}); + const SyntaxGrammar null_grammar; describe("for a sequence AB", [&]() { it("ignores B when A cannot be blank", [&]() { @@ -41,12 +41,12 @@ describe("computing FIRST sets", []() { i_token(1) }), i_sym(0) }); - PreparedGrammar grammar({ + SyntaxGrammar grammar({ { "rule0", seq({ i_token(2), i_token(3), i_token(4) }) } - }, {}); + }, {}, {}); AssertThat(first_set(rule, grammar), Equals(set({ Symbol(0, SymbolOptionToken), @@ -59,11 +59,11 @@ describe("computing FIRST sets", []() { i_sym(0), i_token(1) }); - PreparedGrammar grammar({ + SyntaxGrammar grammar({ { "rule0", choice({ i_token(0), blank() }) } - }, {}); + }, {}, {}); AssertThat(first_set(rule, grammar), Equals(set({ Symbol(0, SymbolOptionToken), @@ -74,12 +74,12 @@ describe("computing FIRST sets", []() { describe("when there are left-recursive rules", [&]() { it("terminates", [&]() { - PreparedGrammar grammar({ + SyntaxGrammar grammar({ { "rule0", choice({ seq({ i_sym(0), i_token(10) }), i_token(11), }) }, - }, {}); + }, {}, {}); auto rule = i_sym(0); diff --git a/spec/compiler/build_tables/item_set_closure_spec.cc b/spec/compiler/build_tables/item_set_closure_spec.cc index 3d867f09..c29528dd 100644 --- a/spec/compiler/build_tables/item_set_closure_spec.cc +++ b/spec/compiler/build_tables/item_set_closure_spec.cc @@ -9,14 +9,14 @@ using namespace rules; START_TEST describe("computing closures of item sets", []() { - PreparedGrammar grammar({ + SyntaxGrammar grammar({ { "E", seq({ i_sym(1), i_token(11) }) }, { "T", seq({ i_token(12), i_token(13) }) }, - }, {}); + }, {}, {}); it("adds items at the beginnings of referenced rules", [&]() { ParseItemSet item_set = item_set_closure(ParseItem(Symbol(0), grammar.rule(Symbol(0)), 0), diff --git a/spec/compiler/build_tables/item_set_transitions_spec.cc b/spec/compiler/build_tables/item_set_transitions_spec.cc index 04760266..9e5e55d4 100644 --- a/spec/compiler/build_tables/item_set_transitions_spec.cc +++ b/spec/compiler/build_tables/item_set_transitions_spec.cc @@ -8,15 +8,13 @@ using namespace build_tables; START_TEST describe("lexical item set transitions", []() { - PreparedGrammar grammar({}, {}); - describe("when two items in the set have transitions on the same character", [&]() { it("merges the transitions by computing the union of the two item sets", [&]() { LexItemSet set1({ LexItem(Symbol(1), character({ {'a', 'f'} })), LexItem(Symbol(2), character({ {'e', 'x'} })) }); - AssertThat(char_transitions(set1, grammar), Equals(map({ + AssertThat(char_transitions(set1), Equals(map({ { CharacterSet({ {'a', 'd'} }), LexItemSet({ LexItem(Symbol(1), blank()) }) }, { CharacterSet({ {'e', 'f'} }), LexItemSet({ @@ -30,10 +28,10 @@ describe("lexical item set transitions", []() { }); describe("syntactic item set transitions", [&]() { - PreparedGrammar grammar({ + SyntaxGrammar grammar({ { "A", blank() }, { "B", i_token(21) }, - }, {}); + }, {}, {}); it("computes the closure of the new item sets", [&]() { ParseItemSet set1({ diff --git a/spec/compiler/build_tables/rule_can_be_blank_spec.cc b/spec/compiler/build_tables/rule_can_be_blank_spec.cc index f5ed5029..a1d97b13 100644 --- a/spec/compiler/build_tables/rule_can_be_blank_spec.cc +++ b/spec/compiler/build_tables/rule_can_be_blank_spec.cc @@ -56,14 +56,14 @@ describe("checking if rules can be blank", [&]() { }); describe("checking recursively (by expanding non-terminals)", [&]() { - PreparedGrammar grammar({ + SyntaxGrammar grammar({ { "A", choice({ seq({ i_sym(0), i_token(11) }), blank() }) }, { "B", choice({ seq({ i_sym(1), i_token(12) }), i_token(13) }) }, - }, {}); + }, {}, {}); it("terminates for left-recursive rules that can be blank", [&]() { rule = i_sym(0); diff --git a/spec/compiler/build_tables/rule_transitions_spec.cc b/spec/compiler/build_tables/rule_transitions_spec.cc index 35f3c9ab..00b2d7fb 100644 --- a/spec/compiler/build_tables/rule_transitions_spec.cc +++ b/spec/compiler/build_tables/rule_transitions_spec.cc @@ -1,26 +1,11 @@ #include "compiler/compiler_spec_helper.h" #include "compiler/build_tables/rule_transitions.h" #include "compiler/rules/metadata.h" +#include "compiler/helpers/containers.h" using namespace rules; using namespace build_tables; -template -class rule_map : public map { -public: - bool operator==(const map &other) const { - if (this->size() != other.size()) return false; - for (const auto &pair : *this) { - auto other_pair = other.find(pair.first); - if (other_pair == other.end()) return false; - if (!pair.second->operator==(*other_pair->second)) return false; - } - return true; - } - - rule_map(const initializer_list> &list) : map(list) {} -}; - START_TEST describe("rule transitions", []() { diff --git a/spec/compiler/helpers/containers.h b/spec/compiler/helpers/containers.h new file mode 100644 index 00000000..38bf7e73 --- /dev/null +++ b/spec/compiler/helpers/containers.h @@ -0,0 +1,52 @@ +#ifndef HELPERS_CONTAINERS_H_ +#define HELPERS_CONTAINERS_H_ + +#include +#include +#include +#include +#include "tree_sitter/compiler.h" +#include "compiler/rules/rule.h" + +using std::map; +using std::vector; +using std::string; +using std::initializer_list; +using std::pair; +using tree_sitter::rules::rule_ptr; + +template +class rule_map : public map { +public: + bool operator==(const map &other) const { + if (this->size() != other.size()) return false; + for (const auto &pair : *this) { + auto other_pair = other.find(pair.first); + if (other_pair == other.end()) return false; + if (!pair.second->operator==(*other_pair->second)) return false; + } + return true; + } + + rule_map(const initializer_list> &list) : map(list) {} +}; + +class rule_list : public vector> { +public: + bool operator==(const vector> &other) const { + if (this->size() != other.size()) return false; + for (size_t i = 0; i < this->size(); i++) { + auto pair = this->operator[](i); + auto other_pair = other[i]; + if (!pair.second->operator==(*other_pair.second)) + return false; + } + return true; + } + + rule_list(const initializer_list> &list) : + vector>(list) {} +}; + + +#endif // HELPERS_CONTAINERS_H_ diff --git a/spec/compiler/prepare_grammar/expand_repeats_spec.cc b/spec/compiler/prepare_grammar/expand_repeats_spec.cc index d965f49b..66f2a141 100644 --- a/spec/compiler/prepare_grammar/expand_repeats_spec.cc +++ b/spec/compiler/prepare_grammar/expand_repeats_spec.cc @@ -1,6 +1,7 @@ #include "compiler/compiler_spec_helper.h" #include "compiler/prepared_grammar.h" #include "compiler/prepare_grammar/expand_repeats.h" +#include "compiler/helpers/containers.h" START_TEST @@ -9,29 +10,33 @@ using prepare_grammar::expand_repeats; describe("expanding repeat rules in a grammar", []() { it("replaces repeat rules with pairs of recursive rules", [&]() { - PreparedGrammar grammar({ + SyntaxGrammar grammar({ { "rule0", repeat(i_token(0)) }, - }, {}); + }, {}, {}); - AssertThat(expand_repeats(grammar), Equals(PreparedGrammar({ + auto match = expand_repeats(grammar); + + AssertThat(match.rules, Equals(rule_list({ { "rule0", i_aux_sym(0) }, - }, { - { "rule0_repeat0", choice({ - seq({ - i_token(0), - i_aux_sym(0) }), - blank() }) }, + }))); + + AssertThat(match.aux_rules, Equals(rule_list({ + { "rule0_repeat0", choice({ seq({ i_token(0), i_aux_sym(0) }), blank() }) }, }))); }); it("replaces repeats inside of sequences", [&]() { - PreparedGrammar grammar({ + SyntaxGrammar grammar({ { "rule0", seq({ i_token(10), repeat(i_token(11)) }) }, - }, {}); + }, {}, {}); - AssertThat(expand_repeats(grammar), Equals(PreparedGrammar({ + auto match = expand_repeats(grammar); + + AssertThat(match.rules, Equals(rule_list({ { "rule0", seq({ i_token(10), i_aux_sym(0) }) }, - }, { + }))); + + AssertThat(match.aux_rules, Equals(rule_list({ { "rule0_repeat0", choice({ seq({ i_token(11), i_aux_sym(0) }), blank() }) }, @@ -39,13 +44,17 @@ describe("expanding repeat rules in a grammar", []() { }); it("replaces repeats inside of choices", [&]() { - PreparedGrammar grammar({ + SyntaxGrammar grammar({ { "rule0", choice({ i_token(10), repeat(i_token(11)) }) }, - }, {}); + }, {}, {}); - AssertThat(expand_repeats(grammar), Equals(PreparedGrammar({ + auto match = expand_repeats(grammar); + + AssertThat(match.rules, Equals(rule_list({ { "rule0", choice({ i_token(10), i_aux_sym(0) }) }, - }, { + }))); + + AssertThat(match.aux_rules, Equals(rule_list({ { "rule0_repeat0", choice({ seq({ i_token(11), i_aux_sym(0) }), blank() }) }, @@ -53,13 +62,17 @@ describe("expanding repeat rules in a grammar", []() { }); it("can replace multiple repeats in the same rule", [&]() { - PreparedGrammar grammar({ + SyntaxGrammar grammar({ { "rule0", seq({ repeat(i_token(10)), repeat(i_token(11)) }) }, - }, {}); + }, {}, {}); - AssertThat(expand_repeats(grammar), Equals(PreparedGrammar({ + auto match = expand_repeats(grammar); + + AssertThat(match.rules, Equals(rule_list({ { "rule0", seq({ i_aux_sym(0), i_aux_sym(1) }) }, - }, { + }))); + + AssertThat(match.aux_rules, Equals(rule_list({ { "rule0_repeat0", choice({ seq({ i_token(10), @@ -74,15 +87,19 @@ describe("expanding repeat rules in a grammar", []() { }); it("can replace repeats in multiple rules", [&]() { - PreparedGrammar grammar({ + SyntaxGrammar grammar({ { "rule0", repeat(i_token(10)) }, { "rule1", repeat(i_token(11)) }, - }, {}); + }, {}, {}); - AssertThat(expand_repeats(grammar), Equals(PreparedGrammar({ + auto match = expand_repeats(grammar); + + AssertThat(match.rules, Equals(rule_list({ { "rule0", i_aux_sym(0) }, { "rule1", i_aux_sym(1) }, - }, { + }))); + + AssertThat(match.aux_rules, Equals(rule_list({ { "rule0_repeat0", choice({ seq({ i_token(10), i_aux_sym(0) }), blank() }) }, diff --git a/spec/compiler/prepare_grammar/expand_tokens_spec.cc b/spec/compiler/prepare_grammar/expand_tokens_spec.cc index 7bba1836..08f6fac6 100644 --- a/spec/compiler/prepare_grammar/expand_tokens_spec.cc +++ b/spec/compiler/prepare_grammar/expand_tokens_spec.cc @@ -1,5 +1,6 @@ #include "compiler/compiler_spec_helper.h" #include "compiler/prepared_grammar.h" +#include "compiler/helpers/containers.h" #include "compiler/prepare_grammar/expand_tokens.h" START_TEST @@ -9,50 +10,50 @@ using prepare_grammar::expand_tokens; describe("expanding token rules", []() { it("replaces regex patterns with their expansion", [&]() { - PreparedGrammar grammar({ + LexicalGrammar grammar({ { "rule_A", seq({ i_sym(10), pattern("x*"), i_sym(11) }) }, - }, {}); + }, {}, {}); auto result = expand_tokens(grammar); AssertThat(result.second, Equals((const GrammarError *)nullptr)); - AssertThat(result.first, Equals(PreparedGrammar({ + AssertThat(result.first.rules, Equals(rule_list({ { "rule_A", seq({ i_sym(10), repeat(character({ 'x' })), i_sym(11) }) }, - }, {}))); + }))); }); it("replaces string rules with a sequence of characters", [&]() { - PreparedGrammar grammar({ + LexicalGrammar grammar({ { "rule_A", seq({ i_sym(10), str("xyz"), i_sym(11) }) }, - }, {}); + }, {}, {}); auto result = expand_tokens(grammar); AssertThat(result.second, Equals((const GrammarError *)nullptr)); - AssertThat(result.first, Equals(PreparedGrammar({ + AssertThat(result.first.rules, Equals(rule_list({ { "rule_A", seq({ i_sym(10), seq({ character({ 'x' }), character({ 'y' }), character({ 'z' }) }), i_sym(11) }) }, - }, {}))); + }))); }); it("returns an error when the grammar contains an invalid regex", [&]() { - PreparedGrammar grammar({ + LexicalGrammar grammar({ { "rule_A", seq({ pattern("("), str("xyz"), pattern("[") }) }, - }, {}); + }, {}, {}); auto result = expand_tokens(grammar); diff --git a/spec/compiler/prepare_grammar/extract_tokens_spec.cc b/spec/compiler/prepare_grammar/extract_tokens_spec.cc index 0fd858b9..14bd7e5c 100644 --- a/spec/compiler/prepare_grammar/extract_tokens_spec.cc +++ b/spec/compiler/prepare_grammar/extract_tokens_spec.cc @@ -1,160 +1,172 @@ #include "compiler/compiler_spec_helper.h" #include "compiler/prepared_grammar.h" #include "compiler/prepare_grammar/extract_tokens.h" +#include "compiler/prepare_grammar/interned_grammar.h" +#include "compiler/prepared_grammar.h" +#include "compiler/helpers/containers.h" START_TEST using namespace rules; using prepare_grammar::extract_tokens; +using prepare_grammar::InternedGrammar; describe("extracting tokens from a grammar", []() { it("moves string rules into the lexical grammar", [&]() { - pair result = extract_tokens(PreparedGrammar({ - { "rule_A", seq({ str("ab"), i_sym(0) }) } - }, {})); + pair result = extract_tokens(InternedGrammar{ + { + { "rule_A", seq({ str("ab"), i_sym(0) }) } + }, + {}, + {} + }); - AssertThat(result.first, Equals(PreparedGrammar({ + AssertThat(result.first.rules, Equals(rule_list({ { "rule_A", seq({ i_aux_token(0), i_sym(0) }) } - }, {}))); - - AssertThat(result.second, Equals(PreparedGrammar({}, { + }))); + AssertThat(result.first.aux_rules, IsEmpty()) + AssertThat(result.second.rules, IsEmpty()) + AssertThat(result.second.aux_rules, Equals(rule_list({ { "'ab'", str("ab") }, }))); }); it("moves pattern rules into the lexical grammar", [&]() { - pair result = extract_tokens(PreparedGrammar({ - { "rule_A", seq({ pattern("a+"), i_sym(0) }) } - }, {})); + pair result = extract_tokens(InternedGrammar{ + { + { "rule_A", seq({ pattern("a+"), i_sym(0) }) } + }, + {}, + {} + }); - AssertThat(result.first, Equals(PreparedGrammar({ + AssertThat(result.first.rules, Equals(rule_list({ { "rule_A", seq({ i_aux_token(0), i_sym(0) }) } - }, {}))); - - AssertThat(result.second, Equals(PreparedGrammar({}, { + }))); + AssertThat(result.first.aux_rules, IsEmpty()) + AssertThat(result.second.rules, IsEmpty()) + AssertThat(result.second.aux_rules, Equals(rule_list({ { "/a+/", pattern("a+") }, }))); }); it("moves other rules marked as tokens into the lexical grammar", [&]() { - pair result = extract_tokens(PreparedGrammar({ - { "rule_A", seq({ - token(seq({ pattern("."), choice({ str("a"), str("b") }) })), - i_sym(0) }) } - }, {})); + pair result = extract_tokens(InternedGrammar{ + { + { "rule_A", seq({ + token(seq({ pattern("."), choice({ str("a"), str("b") }) })), + i_sym(0) }) } + }, + {}, + {} + }); - AssertThat(result.first, Equals(PreparedGrammar({ + AssertThat(result.first.rules, Equals(rule_list({ { "rule_A", seq({ i_aux_token(0), i_sym(0) }) } - }, {}))); - - AssertThat(result.second, Equals(PreparedGrammar({}, { + }))); + AssertThat(result.first.aux_rules, IsEmpty()) + AssertThat(result.second.rules, IsEmpty()) + AssertThat(result.second.aux_rules, Equals(rule_list({ { "(seq /./ (choice 'a' 'b'))", token(seq({ pattern("."), choice({ str("a"), str("b") }) })) }, }))); }); it("does not extract blanks", [&]() { - pair result = extract_tokens(PreparedGrammar({ - { "rule_A", choice({ i_sym(0), blank() }) }, - }, {})); + pair result = extract_tokens(InternedGrammar{ + { + { "rule_A", choice({ i_sym(0), blank() }) }, + }, + {}, + {} + }); - AssertThat(result.first, Equals(PreparedGrammar({ + AssertThat(result.first.rules, Equals(rule_list({ { "rule_A", choice({ i_sym(0), blank() }) }, - }, {}))); - - AssertThat(result.second, Equals(PreparedGrammar({}, {}))); + }))); + AssertThat(result.first.aux_rules, IsEmpty()) + AssertThat(result.second.rules, IsEmpty()) + AssertThat(result.second.aux_rules, IsEmpty()) }); it("does not create duplicate tokens in the lexical grammar", [&]() { - pair result = extract_tokens(PreparedGrammar({ - { "rule_A", seq({ str("ab"), i_sym(0), str("ab") }) }, - }, {})); + pair result = extract_tokens(InternedGrammar{ + { + { "rule_A", seq({ str("ab"), i_sym(0), str("ab") }) }, + }, + {}, + {} + }); - AssertThat(result.first, Equals(PreparedGrammar({ + AssertThat(result.first.rules, Equals(rule_list({ { "rule_A", seq({ i_aux_token(0), i_sym(0), i_aux_token(0) }) } - }, {}))); - - AssertThat(result.second, Equals(PreparedGrammar({}, { + }))); + AssertThat(result.first.aux_rules, IsEmpty()) + AssertThat(result.second.rules, IsEmpty()) + AssertThat(result.second.aux_rules, Equals(rule_list({ { "'ab'", str("ab") }, - }))); - }); - - it("extracts tokens from the grammar's auxiliary rules", [&]() { - pair result = extract_tokens(PreparedGrammar({}, { - { "rule_A", seq({ str("ab"), i_sym(0) }) } - })); - - AssertThat(result.first, Equals(PreparedGrammar({}, { - { "rule_A", seq({ i_aux_token(0), i_sym(0) }) } - }))); - - AssertThat(result.second, Equals(PreparedGrammar({}, { - { "'ab'", str("ab") }, - }))); + }))) }); describe("when an entire rule can be extracted", [&]() { it("moves the rule the lexical grammar when possible and updates referencing symbols", [&]() { - auto result = extract_tokens(PreparedGrammar({ - { "rule_A", i_sym(1) }, - { "rule_B", pattern("a|b") }, - { "rule_C", token(seq({ str("a"), str("b") })) }, - }, {})); + auto result = extract_tokens(InternedGrammar{ + { + { "rule_A", i_sym(1) }, + { "rule_B", pattern("a|b") }, + { "rule_C", token(seq({ str("a"), str("b") })) }, + }, + {}, + {} + }); - AssertThat(result.first, Equals(PreparedGrammar({ + AssertThat(result.first.rules, Equals(rule_list({ { "rule_A", i_token(0) } - }, {}))); - - AssertThat(result.second, Equals(PreparedGrammar({ + }))); + AssertThat(result.first.aux_rules, IsEmpty()); + AssertThat(result.second.rules, Equals(rule_list({ { "rule_B", pattern("a|b") }, { "rule_C", token(seq({ str("a"), str("b") })) }, - }, {}))); + }))); + AssertThat(result.second.aux_rules, IsEmpty()); }); it("updates symbols whose indices need to change due to deleted rules", [&]() { - auto result = extract_tokens(PreparedGrammar({ - { "rule_A", str("ab") }, - { "rule_B", i_sym(0) }, - { "rule_C", i_sym(1) }, - }, {})); + auto result = extract_tokens(InternedGrammar{ + { + { "rule_A", str("ab") }, + { "rule_B", i_sym(0) }, + { "rule_C", i_sym(1) }, + }, + {}, + {} + }); - AssertThat(result.first, Equals(PreparedGrammar({ + AssertThat(result.first.rules, Equals(rule_list({ { "rule_B", i_token(0) }, { "rule_C", i_sym(0) }, - }, {}))); - - AssertThat(result.second, Equals(PreparedGrammar({ + }))); + AssertThat(result.first.aux_rules, IsEmpty()); + AssertThat(result.second.rules, Equals(rule_list({ { "rule_A", str("ab") }, - }, {}))); + }))); + AssertThat(result.second.aux_rules, IsEmpty()); }); it("updates the grammar's ubiquitous_tokens", [&]() { - auto result = extract_tokens(PreparedGrammar({ - { "rule_A", str("ab") }, - { "rule_B", i_sym(0) }, - { "rule_C", i_sym(1) }, - }, {}).ubiquitous_tokens({ Symbol(0) })); + auto result = extract_tokens(InternedGrammar{ + { + { "rule_A", str("ab") }, + { "rule_B", i_sym(0) }, + { "rule_C", i_sym(1) }, + }, + { Symbol(0) }, + {} + }); - AssertThat(result.first.ubiquitous_tokens(), Equals(vector({ + AssertThat(result.first.ubiquitous_tokens, Equals(vector({ { Symbol(0, SymbolOptionToken) } }))); }); - - it("extracts entire auxiliary rules", [&]() { - auto result = extract_tokens(PreparedGrammar({}, { - { "rule_A", str("ab") }, - { "rule_B", i_aux_sym(0) }, - { "rule_C", i_aux_sym(1) }, - })); - - AssertThat(result.first, Equals(PreparedGrammar({}, { - { "rule_B", i_aux_token(0) }, - { "rule_C", i_aux_sym(0) }, - }))); - - AssertThat(result.second, Equals(PreparedGrammar({}, { - { "rule_A", str("ab") }, - }))); - }); }); }); diff --git a/spec/compiler/prepare_grammar/intern_symbols_spec.cc b/spec/compiler/prepare_grammar/intern_symbols_spec.cc index 61196f08..3804ece6 100644 --- a/spec/compiler/prepare_grammar/intern_symbols_spec.cc +++ b/spec/compiler/prepare_grammar/intern_symbols_spec.cc @@ -3,6 +3,7 @@ #include "compiler/prepare_grammar/intern_symbols.h" #include "compiler/rules/named_symbol.h" #include "compiler/rules/symbol.h" +#include "compiler/helpers/containers.h" START_TEST @@ -20,11 +21,11 @@ describe("interning symbols in a grammar", []() { auto result = intern_symbols(grammar); AssertThat(result.second, Equals((GrammarError *)nullptr)); - AssertThat(result.first, Equals(PreparedGrammar({ + AssertThat(result.first.rules, Equals(rule_list({ { "x", choice({ i_sym(1), i_sym(2) }) }, { "y", i_sym(2) }, { "z", str("stuff") }, - }, {}))); + }))); }); describe("when there are symbols that reference undefined rules", [&]() { @@ -49,10 +50,20 @@ describe("interning symbols in a grammar", []() { auto result = intern_symbols(grammar); AssertThat(result.second, Equals((GrammarError *)nullptr)); - AssertThat(result.first.ubiquitous_tokens(), Equals(vector({ + AssertThat(result.first.ubiquitous_tokens, Equals(vector({ Symbol(2) }))); }); + + it("preserves the grammar's separator character set", [&]() { + auto grammar = Grammar({ + { "z", str("stuff") } + }).separators({ 'x', 'y' }); + + auto result = intern_symbols(grammar); + + AssertThat(result.first.separators, Equals(vector({ 'x', 'y' }))) + }); }); END_TEST diff --git a/src/compiler/build_tables/build_lex_table.cc b/src/compiler/build_tables/build_lex_table.cc index 4076ab0b..a88bf185 100644 --- a/src/compiler/build_tables/build_lex_table.cc +++ b/src/compiler/build_tables/build_lex_table.cc @@ -25,7 +25,7 @@ namespace tree_sitter { namespace build_tables { class LexTableBuilder { - const PreparedGrammar lex_grammar; + const LexicalGrammar lex_grammar; ParseTable *parse_table; LexConflictManager conflict_manager; unordered_map lex_state_ids; @@ -65,7 +65,7 @@ namespace tree_sitter { } void add_advance_actions(const LexItemSet &item_set, LexStateId state_id) { - auto transitions = char_transitions(item_set, lex_grammar); + auto transitions = char_transitions(item_set); for (const auto &transition : transitions) { CharacterSet rule = transition.first; LexItemSet new_item_set = transition.second; @@ -114,7 +114,7 @@ namespace tree_sitter { } public: - LexTableBuilder(ParseTable *parse_table, const PreparedGrammar &lex_grammar) : + LexTableBuilder(ParseTable *parse_table, const LexicalGrammar &lex_grammar) : lex_grammar(lex_grammar), parse_table(parse_table), conflict_manager(LexConflictManager(lex_grammar)) {} @@ -129,7 +129,7 @@ namespace tree_sitter { } }; - LexTable build_lex_table(ParseTable *parse_table, const PreparedGrammar &lex_grammar) { + LexTable build_lex_table(ParseTable *parse_table, const LexicalGrammar &lex_grammar) { return LexTableBuilder(parse_table, lex_grammar).build(); } } diff --git a/src/compiler/build_tables/build_lex_table.h b/src/compiler/build_tables/build_lex_table.h index c6a28fb3..8f0209d9 100644 --- a/src/compiler/build_tables/build_lex_table.h +++ b/src/compiler/build_tables/build_lex_table.h @@ -5,12 +5,11 @@ #include "compiler/lex_table.h" namespace tree_sitter { - class PreparedGrammar; + class LexicalGrammar; class ParseTable; namespace build_tables { - LexTable - build_lex_table(ParseTable *parse_table, const PreparedGrammar &lex_grammar); + LexTable build_lex_table(ParseTable *parse_table, const LexicalGrammar &lex_grammar); } } diff --git a/src/compiler/build_tables/build_parse_table.cc b/src/compiler/build_tables/build_parse_table.cc index 6fbc444f..4296da29 100644 --- a/src/compiler/build_tables/build_parse_table.cc +++ b/src/compiler/build_tables/build_parse_table.cc @@ -23,7 +23,7 @@ namespace tree_sitter { namespace build_tables { class ParseTableBuilder { - const PreparedGrammar grammar; + const SyntaxGrammar grammar; ParseConflictManager conflict_manager; unordered_map parse_state_ids; ParseTable parse_table; @@ -59,7 +59,7 @@ namespace tree_sitter { } void add_ubiquitous_token_actions(const ParseItemSet &item_set, ParseStateId state_id) { - for (const Symbol &symbol : grammar.ubiquitous_tokens()) { + for (const Symbol &symbol : grammar.ubiquitous_tokens) { auto &actions = parse_table.states[state_id].actions; if (actions.find(symbol) == actions.end()) parse_table.add_action(state_id, symbol, ParseAction::Shift(state_id, { 0 })); @@ -99,7 +99,7 @@ namespace tree_sitter { } public: - ParseTableBuilder(const PreparedGrammar &grammar, const PreparedGrammar &lex_grammar) : + ParseTableBuilder(const SyntaxGrammar &grammar, const LexicalGrammar &lex_grammar) : grammar(grammar), conflict_manager(ParseConflictManager(grammar, lex_grammar)) {} @@ -111,7 +111,7 @@ namespace tree_sitter { }; pair> - build_parse_table(const PreparedGrammar &grammar, const PreparedGrammar &lex_grammar) { + build_parse_table(const SyntaxGrammar &grammar, const LexicalGrammar &lex_grammar) { return ParseTableBuilder(grammar, lex_grammar).build(); } } diff --git a/src/compiler/build_tables/build_parse_table.h b/src/compiler/build_tables/build_parse_table.h index 64eaad39..d1414235 100644 --- a/src/compiler/build_tables/build_parse_table.h +++ b/src/compiler/build_tables/build_parse_table.h @@ -7,11 +7,12 @@ #include "compiler/parse_table.h" namespace tree_sitter { - class PreparedGrammar; + class SyntaxGrammar; + class LexicalGrammar; namespace build_tables { std::pair> - build_parse_table(const PreparedGrammar &grammar, const PreparedGrammar &lex_grammar); + build_parse_table(const SyntaxGrammar &grammar, const LexicalGrammar &lex_grammar); } } diff --git a/src/compiler/build_tables/build_tables.cc b/src/compiler/build_tables/build_tables.cc index 43b74b69..a6ac96ab 100644 --- a/src/compiler/build_tables/build_tables.cc +++ b/src/compiler/build_tables/build_tables.cc @@ -1,6 +1,7 @@ #include "compiler/build_tables/build_tables.h" #include "compiler/build_tables/build_parse_table.h" #include "compiler/build_tables/build_lex_table.h" +#include "compiler/prepared_grammar.h" namespace tree_sitter { using std::tuple; @@ -9,8 +10,8 @@ namespace tree_sitter { namespace build_tables { tuple> - build_tables(const PreparedGrammar &grammar, - const PreparedGrammar &lex_grammar) { + build_tables(const SyntaxGrammar &grammar, + const LexicalGrammar &lex_grammar) { auto parse_table_result = build_parse_table(grammar, lex_grammar); ParseTable parse_table = parse_table_result.first; vector conflicts = parse_table_result.second; diff --git a/src/compiler/build_tables/build_tables.h b/src/compiler/build_tables/build_tables.h index 9b5ff9be..b4bbd0ed 100644 --- a/src/compiler/build_tables/build_tables.h +++ b/src/compiler/build_tables/build_tables.h @@ -8,12 +8,13 @@ #include "compiler/lex_table.h" namespace tree_sitter { - class PreparedGrammar; + class SyntaxGrammar; + class LexicalGrammar; namespace build_tables { std::tuple> - build_tables(const PreparedGrammar &grammar, - const PreparedGrammar &lex_grammar); + build_tables(const SyntaxGrammar &grammar, + const LexicalGrammar &lex_grammar); } } diff --git a/src/compiler/build_tables/first_set.cc b/src/compiler/build_tables/first_set.cc index fcf2045f..b61e433f 100644 --- a/src/compiler/build_tables/first_set.cc +++ b/src/compiler/build_tables/first_set.cc @@ -14,11 +14,11 @@ namespace tree_sitter { namespace build_tables { class FirstSet : public rules::RuleFn> { - const PreparedGrammar *grammar; + const SyntaxGrammar *grammar; set visited_symbols; public: - explicit FirstSet(const PreparedGrammar *grammar) : grammar(grammar) {} + explicit FirstSet(const SyntaxGrammar *grammar) : grammar(grammar) {} set apply_to(const Symbol *rule) { auto insertion_result = visited_symbols.insert(*rule); @@ -54,7 +54,7 @@ namespace tree_sitter { } }; - set first_set(const rules::rule_ptr &rule, const PreparedGrammar &grammar) { + set first_set(const rules::rule_ptr &rule, const SyntaxGrammar &grammar) { return FirstSet(&grammar).apply(rule); } } diff --git a/src/compiler/build_tables/first_set.h b/src/compiler/build_tables/first_set.h index 1011bf02..733203b1 100644 --- a/src/compiler/build_tables/first_set.h +++ b/src/compiler/build_tables/first_set.h @@ -6,17 +6,17 @@ #include "compiler/rules/symbol.h" namespace tree_sitter { - class PreparedGrammar; + class SyntaxGrammar; namespace build_tables { /* * Returns the set of terminal symbols that can appear at * the beginning of a string derivable from a given rule, - * in a given gramamr. + * in a given grammar. */ std::set - first_set(const rules::rule_ptr &rule, const PreparedGrammar &grammar); + first_set(const rules::rule_ptr &rule, const SyntaxGrammar &grammar); } } diff --git a/src/compiler/build_tables/item_set_closure.cc b/src/compiler/build_tables/item_set_closure.cc index 4f315b48..1be3b9e5 100644 --- a/src/compiler/build_tables/item_set_closure.cc +++ b/src/compiler/build_tables/item_set_closure.cc @@ -19,7 +19,7 @@ namespace tree_sitter { namespace build_tables { const ParseItemSet item_set_closure(const ParseItem &starting_item, const set &starting_lookahead_symbols, - const PreparedGrammar &grammar) { + const SyntaxGrammar &grammar) { ParseItemSet result; vector>> items_to_process = {{starting_item, starting_lookahead_symbols}}; diff --git a/src/compiler/build_tables/item_set_closure.h b/src/compiler/build_tables/item_set_closure.h index b25a2869..d0e75955 100644 --- a/src/compiler/build_tables/item_set_closure.h +++ b/src/compiler/build_tables/item_set_closure.h @@ -6,12 +6,12 @@ #include "compiler/build_tables/parse_item.h" namespace tree_sitter { - class PreparedGrammar; + class SyntaxGrammar; namespace build_tables { const ParseItemSet item_set_closure(const ParseItem &item, const std::set &lookahead_symbols, - const PreparedGrammar &grammar); + const SyntaxGrammar &grammar); } } diff --git a/src/compiler/build_tables/item_set_transitions.cc b/src/compiler/build_tables/item_set_transitions.cc index acdf5cc2..9c5ab986 100644 --- a/src/compiler/build_tables/item_set_transitions.cc +++ b/src/compiler/build_tables/item_set_transitions.cc @@ -4,6 +4,7 @@ #include "compiler/build_tables/rule_transitions.h" #include "compiler/build_tables/merge_transitions.h" #include "compiler/rules/symbol.h" +#include "compiler/prepared_grammar.h" namespace tree_sitter { using std::map; @@ -13,7 +14,7 @@ namespace tree_sitter { namespace build_tables { map - sym_transitions(const ParseItemSet &item_set, const PreparedGrammar &grammar) { + sym_transitions(const ParseItemSet &item_set, const SyntaxGrammar &grammar) { map result; for (const auto &pair : item_set) { const ParseItem &item = pair.first; @@ -31,7 +32,7 @@ namespace tree_sitter { } map - char_transitions(const LexItemSet &item_set, const PreparedGrammar &grammar) { + char_transitions(const LexItemSet &item_set) { map result; for (const LexItem &item : item_set) { for (auto &transition : char_transitions(item.rule)) { diff --git a/src/compiler/build_tables/item_set_transitions.h b/src/compiler/build_tables/item_set_transitions.h index 6cb62ebc..faf2b37c 100644 --- a/src/compiler/build_tables/item_set_transitions.h +++ b/src/compiler/build_tables/item_set_transitions.h @@ -6,7 +6,7 @@ #include "compiler/build_tables/parse_item.h" namespace tree_sitter { - class PreparedGrammar; + class SyntaxGrammar; namespace rules { class CharacterSet; class Symbol; @@ -14,10 +14,10 @@ namespace tree_sitter { namespace build_tables { std::map - sym_transitions(const ParseItemSet &item_set, const PreparedGrammar &grammar); + sym_transitions(const ParseItemSet &item_set, const SyntaxGrammar &grammar); std::map - char_transitions(const LexItemSet &item_set, const PreparedGrammar &grammar); + char_transitions(const LexItemSet &item_set); } } diff --git a/src/compiler/build_tables/lex_conflict_manager.cc b/src/compiler/build_tables/lex_conflict_manager.cc index 1860c40e..8133bca7 100644 --- a/src/compiler/build_tables/lex_conflict_manager.cc +++ b/src/compiler/build_tables/lex_conflict_manager.cc @@ -4,6 +4,7 @@ #include #include #include "compiler/util/string_helpers.h" +#include "compiler/prepared_grammar.h" namespace tree_sitter { namespace build_tables { @@ -13,7 +14,7 @@ namespace tree_sitter { using std::set; using std::vector; - LexConflictManager::LexConflictManager(const PreparedGrammar &grammar) : + LexConflictManager::LexConflictManager(const LexicalGrammar &grammar) : grammar(grammar) {} bool LexConflictManager::resolve_lex_action(const LexAction &old_action, diff --git a/src/compiler/build_tables/lex_conflict_manager.h b/src/compiler/build_tables/lex_conflict_manager.h index 6bb38abf..76ee3f45 100644 --- a/src/compiler/build_tables/lex_conflict_manager.h +++ b/src/compiler/build_tables/lex_conflict_manager.h @@ -8,10 +8,10 @@ namespace tree_sitter { namespace build_tables { class LexConflictManager { - const PreparedGrammar grammar; + const LexicalGrammar grammar; public: - explicit LexConflictManager(const PreparedGrammar &grammar); + explicit LexConflictManager(const LexicalGrammar &grammar); bool resolve_lex_action(const LexAction &old_action, const LexAction &new_action); }; diff --git a/src/compiler/build_tables/parse_conflict_manager.cc b/src/compiler/build_tables/parse_conflict_manager.cc index 2e4cdcd2..5ca802b7 100644 --- a/src/compiler/build_tables/parse_conflict_manager.cc +++ b/src/compiler/build_tables/parse_conflict_manager.cc @@ -4,6 +4,7 @@ #include #include #include "compiler/util/string_helpers.h" +#include "compiler/prepared_grammar.h" namespace tree_sitter { namespace build_tables { @@ -13,8 +14,8 @@ namespace tree_sitter { using std::set; using std::vector; - ParseConflictManager::ParseConflictManager(const PreparedGrammar &parse_grammar, - const PreparedGrammar &lex_grammar) : + ParseConflictManager::ParseConflictManager(const SyntaxGrammar &parse_grammar, + const LexicalGrammar &lex_grammar) : parse_grammar(parse_grammar), lex_grammar(lex_grammar) {} @@ -87,7 +88,7 @@ namespace tree_sitter { return precedences + ")"; } - string message_for_action(const ParseAction &action, const PreparedGrammar &parse_grammar) { + string message_for_action(const ParseAction &action, const SyntaxGrammar &parse_grammar) { switch (action.type) { case ParseActionTypeShift: return "shift " + precedence_string(action); diff --git a/src/compiler/build_tables/parse_conflict_manager.h b/src/compiler/build_tables/parse_conflict_manager.h index 8f54e24e..b4fa9287 100644 --- a/src/compiler/build_tables/parse_conflict_manager.h +++ b/src/compiler/build_tables/parse_conflict_manager.h @@ -13,13 +13,13 @@ namespace tree_sitter { namespace build_tables { class ParseConflictManager { - const PreparedGrammar parse_grammar; - const PreparedGrammar lex_grammar; + const SyntaxGrammar parse_grammar; + const LexicalGrammar lex_grammar; std::set conflicts_; public: - ParseConflictManager(const PreparedGrammar &parse_grammar, - const PreparedGrammar &lex_grammar); + ParseConflictManager(const SyntaxGrammar &parse_grammar, + const LexicalGrammar &lex_grammar); bool resolve_parse_action(const rules::Symbol &symbol, const ParseAction &old_action, const ParseAction &new_action); diff --git a/src/compiler/build_tables/rule_can_be_blank.cc b/src/compiler/build_tables/rule_can_be_blank.cc index 090ec99b..6a7d8f83 100644 --- a/src/compiler/build_tables/rule_can_be_blank.cc +++ b/src/compiler/build_tables/rule_can_be_blank.cc @@ -39,13 +39,13 @@ namespace tree_sitter { }; class CanBeBlankRecursive : public CanBeBlank { - const PreparedGrammar *grammar; + const SyntaxGrammar *grammar; set visited_symbols; using CanBeBlank::visit; public: using CanBeBlank::apply_to; - explicit CanBeBlankRecursive(const PreparedGrammar *grammar) : grammar(grammar) {} + explicit CanBeBlankRecursive(const SyntaxGrammar *grammar) : grammar(grammar) {} bool apply_to(const rules::Symbol *rule) { if (visited_symbols.find(*rule) == visited_symbols.end()) { @@ -61,7 +61,7 @@ namespace tree_sitter { return CanBeBlank().apply(rule); } - bool rule_can_be_blank(const rules::rule_ptr &rule, const PreparedGrammar &grammar) { + bool rule_can_be_blank(const rules::rule_ptr &rule, const SyntaxGrammar &grammar) { return CanBeBlankRecursive(&grammar).apply(rule); } } diff --git a/src/compiler/build_tables/rule_can_be_blank.h b/src/compiler/build_tables/rule_can_be_blank.h index c62689d9..bf1fbe90 100644 --- a/src/compiler/build_tables/rule_can_be_blank.h +++ b/src/compiler/build_tables/rule_can_be_blank.h @@ -4,11 +4,11 @@ #include "tree_sitter/compiler.h" namespace tree_sitter { - class PreparedGrammar; + class SyntaxGrammar; namespace build_tables { bool rule_can_be_blank(const rules::rule_ptr &rule); - bool rule_can_be_blank(const rules::rule_ptr &rule, const PreparedGrammar &grammar); + bool rule_can_be_blank(const rules::rule_ptr &rule, const SyntaxGrammar &grammar); } } diff --git a/src/compiler/compile.cc b/src/compiler/compile.cc index 3549a053..0177ac32 100644 --- a/src/compiler/compile.cc +++ b/src/compiler/compile.cc @@ -14,8 +14,8 @@ namespace tree_sitter { tuple, const GrammarError *> compile(const Grammar &grammar, std::string name) { auto prepare_grammar_result = prepare_grammar::prepare_grammar(grammar); - const PreparedGrammar &syntax_grammar = get<0>(prepare_grammar_result); - const PreparedGrammar &lexical_grammar = get<1>(prepare_grammar_result); + const SyntaxGrammar &syntax_grammar = get<0>(prepare_grammar_result); + const LexicalGrammar &lexical_grammar = get<1>(prepare_grammar_result); const GrammarError *error = get<2>(prepare_grammar_result); if (error) diff --git a/src/compiler/generate_code/c_code.cc b/src/compiler/generate_code/c_code.cc index 16820d36..157432d9 100644 --- a/src/compiler/generate_code/c_code.cc +++ b/src/compiler/generate_code/c_code.cc @@ -27,16 +27,16 @@ namespace tree_sitter { const string name; const ParseTable parse_table; const LexTable lex_table; - const PreparedGrammar syntax_grammar; - const PreparedGrammar lexical_grammar; + const SyntaxGrammar syntax_grammar; + const LexicalGrammar lexical_grammar; map sanitized_names; public: CCodeGenerator(string name, const ParseTable &parse_table, const LexTable &lex_table, - const PreparedGrammar &syntax_grammar, - const PreparedGrammar &lexical_grammar) : + const SyntaxGrammar &syntax_grammar, + const LexicalGrammar &lexical_grammar) : indent_level(0), name(name), parse_table(parse_table), @@ -107,7 +107,7 @@ namespace tree_sitter { void ubiquitous_symbols_list() { line("UBIQUITOUS_SYMBOLS = {"); indent([&]() { - for (auto &symbol : syntax_grammar.ubiquitous_tokens()) + for (auto &symbol : syntax_grammar.ubiquitous_tokens) line("[" + symbol_id(symbol) + "] = 1,"); }); line("};"); @@ -118,7 +118,7 @@ namespace tree_sitter { line("HIDDEN_SYMBOLS = {"); indent([&]() { for (auto &symbol : parse_table.symbols) - if (!symbol.is_built_in() && (symbol.is_auxiliary() || grammar_for_symbol(symbol).rule_name(symbol)[0] == '_')) + if (!symbol.is_built_in() && (symbol.is_auxiliary() || rule_name(symbol)[0] == '_')) line("[" + symbol_id(symbol) + "] = 1,"); }); line("};"); @@ -178,8 +178,10 @@ namespace tree_sitter { line(); } - const PreparedGrammar & grammar_for_symbol(const rules::Symbol &symbol) { - return symbol.is_token() ? lexical_grammar : syntax_grammar; + string rule_name(const rules::Symbol &symbol) { + return symbol.is_token() ? + lexical_grammar.rule_name(symbol) : + syntax_grammar.rule_name(symbol); } string symbol_id(const rules::Symbol &symbol) { @@ -188,7 +190,7 @@ namespace tree_sitter { "ts_builtin_sym_error" : "ts_builtin_sym_end"; } else { - string name = sanitize_name(grammar_for_symbol(symbol).rule_name(symbol)); + string name = sanitize_name(rule_name(symbol)); if (symbol.is_auxiliary()) return "ts_aux_sym_" + name; else @@ -238,9 +240,9 @@ namespace tree_sitter { if (symbol.is_built_in()) { return (symbol == rules::ERROR()) ? "error" : "end"; } else if (symbol.is_token() && symbol.is_auxiliary()) { - return grammar_for_symbol(symbol).rule_name(symbol); + return rule_name(symbol); } else { - return grammar_for_symbol(symbol).rule_name(symbol); + return rule_name(symbol); } } @@ -397,8 +399,8 @@ namespace tree_sitter { string c_code(string name, const ParseTable &parse_table, const LexTable &lex_table, - const PreparedGrammar &syntax_grammar, - const PreparedGrammar &lexical_grammar) { + const SyntaxGrammar &syntax_grammar, + const LexicalGrammar &lexical_grammar) { return CCodeGenerator(name, parse_table, lex_table, syntax_grammar, lexical_grammar).code(); } } diff --git a/src/compiler/generate_code/c_code.h b/src/compiler/generate_code/c_code.h index fc8530ed..7a4cd1fa 100644 --- a/src/compiler/generate_code/c_code.h +++ b/src/compiler/generate_code/c_code.h @@ -7,14 +7,15 @@ #include "compiler/lex_table.h" namespace tree_sitter { - class PreparedGrammar; + class SyntaxGrammar; + class LexicalGrammar; namespace generate_code { std::string c_code(std::string name, const ParseTable &parse_table, const LexTable &lex_table, - const PreparedGrammar &syntax_grammar, - const PreparedGrammar &lexical_grammar); + const SyntaxGrammar &syntax_grammar, + const LexicalGrammar &lexical_grammar); } } diff --git a/src/compiler/grammar.cc b/src/compiler/grammar.cc index f2cce4f9..a8aa7b7b 100644 --- a/src/compiler/grammar.cc +++ b/src/compiler/grammar.cc @@ -62,11 +62,20 @@ namespace tree_sitter { return ubiquitous_tokens_; } - const Grammar & Grammar::ubiquitous_tokens(const vector &ubiquitous_tokens) { + Grammar & Grammar::ubiquitous_tokens(const vector &ubiquitous_tokens) { ubiquitous_tokens_ = ubiquitous_tokens; return *this; } + const vector & Grammar::separators() const { + return separators_; + } + + Grammar & Grammar::separators(const vector &separators) { + separators_ = separators; + return *this; + } + const vector> & Grammar::rules() const { return rules_; } diff --git a/src/compiler/prepare_grammar/expand_repeats.cc b/src/compiler/prepare_grammar/expand_repeats.cc index d950d00e..f69fe230 100644 --- a/src/compiler/prepare_grammar/expand_repeats.cc +++ b/src/compiler/prepare_grammar/expand_repeats.cc @@ -50,17 +50,16 @@ namespace tree_sitter { vector> aux_rules; }; - PreparedGrammar expand_repeats(const PreparedGrammar &grammar) { - vector> rules, aux_rules(grammar.aux_rules()); + SyntaxGrammar expand_repeats(const SyntaxGrammar &grammar) { + vector> rules, aux_rules(grammar.aux_rules); - for (auto &pair : grammar.rules()) { + for (auto &pair : grammar.rules) { ExpandRepeats expander(pair.first, aux_rules.size()); rules.push_back({ pair.first, expander.apply(pair.second) }); aux_rules.insert(aux_rules.end(), expander.aux_rules.begin(), expander.aux_rules.end()); } - return PreparedGrammar(rules, aux_rules). - ubiquitous_tokens(grammar.ubiquitous_tokens()); + return SyntaxGrammar(rules, aux_rules, grammar.ubiquitous_tokens); } } } diff --git a/src/compiler/prepare_grammar/expand_repeats.h b/src/compiler/prepare_grammar/expand_repeats.h index 636e7546..7a8b6e44 100644 --- a/src/compiler/prepare_grammar/expand_repeats.h +++ b/src/compiler/prepare_grammar/expand_repeats.h @@ -4,10 +4,10 @@ #include "tree_sitter/compiler.h" namespace tree_sitter { - class PreparedGrammar; + class SyntaxGrammar; namespace prepare_grammar { - PreparedGrammar expand_repeats(const PreparedGrammar &); + SyntaxGrammar expand_repeats(const SyntaxGrammar &); } } diff --git a/src/compiler/prepare_grammar/expand_tokens.cc b/src/compiler/prepare_grammar/expand_tokens.cc index d28d1d9d..11c26189 100644 --- a/src/compiler/prepare_grammar/expand_tokens.cc +++ b/src/compiler/prepare_grammar/expand_tokens.cc @@ -43,28 +43,29 @@ namespace tree_sitter { ExpandTokens() : error(nullptr) {} }; - pair - expand_tokens(const PreparedGrammar &grammar) { + pair + expand_tokens(const LexicalGrammar &grammar) { vector> rules, aux_rules; ExpandTokens expander; - for (auto &pair : grammar.rules()) { + for (auto &pair : grammar.rules) { auto rule = expander.apply(pair.second); if (expander.error) - return { PreparedGrammar(), expander.error }; + return { LexicalGrammar({}, {}, {}), expander.error }; rules.push_back({ pair.first, rule }); } - for (auto &pair : grammar.aux_rules()) { + for (auto &pair : grammar.aux_rules) { auto rule = expander.apply(pair.second); if (expander.error) - return { PreparedGrammar(), expander.error }; + return { LexicalGrammar({}, {}, {}), expander.error }; aux_rules.push_back({ pair.first, rule }); } return { - PreparedGrammar(rules, aux_rules).ubiquitous_tokens(grammar.ubiquitous_tokens()), - nullptr }; + LexicalGrammar(rules, aux_rules, grammar.separators), + nullptr, + }; } } } diff --git a/src/compiler/prepare_grammar/expand_tokens.h b/src/compiler/prepare_grammar/expand_tokens.h index 5a6e5ecc..8deb55f4 100644 --- a/src/compiler/prepare_grammar/expand_tokens.h +++ b/src/compiler/prepare_grammar/expand_tokens.h @@ -5,11 +5,11 @@ #include "tree_sitter/compiler.h" namespace tree_sitter { - class PreparedGrammar; + class LexicalGrammar; namespace prepare_grammar { - std::pair - expand_tokens(const PreparedGrammar &); + std::pair + expand_tokens(const LexicalGrammar &); } } diff --git a/src/compiler/prepare_grammar/extract_tokens.cc b/src/compiler/prepare_grammar/extract_tokens.cc index 2d07e175..274f9a54 100644 --- a/src/compiler/prepare_grammar/extract_tokens.cc +++ b/src/compiler/prepare_grammar/extract_tokens.cc @@ -9,6 +9,7 @@ #include "compiler/rules/string.h" #include "compiler/rules/metadata.h" #include "compiler/rules/pattern.h" +#include "compiler/prepare_grammar/interned_grammar.h" #include "compiler/prepare_grammar/token_description.h" namespace tree_sitter { @@ -93,15 +94,15 @@ namespace tree_sitter { vector> tokens; }; - pair extract_tokens(const PreparedGrammar &input_grammar) { + pair extract_tokens(const InternedGrammar &input_grammar) { vector> rules, tokens, aux_rules, aux_tokens; vector ubiquitous_tokens; TokenExtractor extractor; map symbol_replacements; - for (size_t i = 0; i < input_grammar.rules().size(); i++) { - auto pair = input_grammar.rules()[i]; + for (size_t i = 0; i < input_grammar.rules.size(); i++) { + auto pair = input_grammar.rules[i]; if (IsToken().apply(pair.second)) { tokens.push_back(pair); symbol_replacements.insert({ @@ -113,32 +114,17 @@ namespace tree_sitter { } } - for (size_t i = 0; i < input_grammar.aux_rules().size(); i++) { - auto pair = input_grammar.aux_rules()[i]; - if (IsToken().apply(pair.second)) { - aux_tokens.push_back(pair); - symbol_replacements.insert({ - Symbol(i, rules::SymbolOptionAuxiliary), - Symbol(aux_tokens.size() - 1, rules::SymbolOption(rules::SymbolOptionAuxiliary|rules::SymbolOptionToken)) - }); - } else { - aux_rules.push_back({ pair.first, extractor.apply(pair.second) }); - } - } - aux_tokens.insert(aux_tokens.end(), extractor.tokens.begin(), extractor.tokens.end()); SymbolInliner inliner(symbol_replacements); for (auto &pair : rules) pair.second = inliner.apply(pair.second); - for (auto &pair : aux_rules) - pair.second = inliner.apply(pair.second); - for (auto &symbol : input_grammar.ubiquitous_tokens()) + for (auto &symbol : input_grammar.ubiquitous_tokens) ubiquitous_tokens.push_back(inliner.replace_symbol(symbol)); return { - PreparedGrammar(rules, aux_rules).ubiquitous_tokens(ubiquitous_tokens), - PreparedGrammar(tokens, aux_tokens) + SyntaxGrammar(rules, aux_rules, ubiquitous_tokens), + LexicalGrammar(tokens, aux_tokens, {}), }; } } diff --git a/src/compiler/prepare_grammar/extract_tokens.h b/src/compiler/prepare_grammar/extract_tokens.h index 99277774..4f6d1ef6 100644 --- a/src/compiler/prepare_grammar/extract_tokens.h +++ b/src/compiler/prepare_grammar/extract_tokens.h @@ -2,12 +2,14 @@ #define COMPILER_PREPARE_GRAMMAR_EXTRACT_TOKENS_H_ #include +#include "compiler/prepare_grammar/interned_grammar.h" namespace tree_sitter { - class PreparedGrammar; + class SyntaxGrammar; + class LexicalGrammar; namespace prepare_grammar { - std::pair extract_tokens(const PreparedGrammar &); + std::pair extract_tokens(const InternedGrammar &); } } diff --git a/src/compiler/prepare_grammar/intern_symbols.cc b/src/compiler/prepare_grammar/intern_symbols.cc index be48056b..250c117e 100644 --- a/src/compiler/prepare_grammar/intern_symbols.cc +++ b/src/compiler/prepare_grammar/intern_symbols.cc @@ -2,6 +2,7 @@ #include #include #include "tree_sitter/compiler.h" +#include "compiler/prepare_grammar/interned_grammar.h" #include "compiler/prepared_grammar.h" #include "compiler/rules/visitor.h" #include "compiler/rules/named_symbol.h" @@ -37,15 +38,16 @@ namespace tree_sitter { string missing_rule_name; }; - pair missing_rule_error(string rule_name) { + pair missing_rule_error(string rule_name) { + InternedGrammar grammar; return { - PreparedGrammar({}, {}), + grammar, new GrammarError(GrammarErrorTypeUndefinedSymbol, "Undefined rule '" + rule_name + "'") }; } - pair intern_symbols(const Grammar &grammar) { + pair intern_symbols(const Grammar &grammar) { InternSymbols interner(grammar); vector> rules; @@ -64,10 +66,12 @@ namespace tree_sitter { ubiquitous_tokens.push_back(*token); } - return { - PreparedGrammar(rules, {}).ubiquitous_tokens(ubiquitous_tokens), - nullptr - }; + InternedGrammar result; + result.rules = rules; + result.ubiquitous_tokens = ubiquitous_tokens; + result.separators = grammar.separators(); + + return { result, nullptr }; } } } diff --git a/src/compiler/prepare_grammar/intern_symbols.h b/src/compiler/prepare_grammar/intern_symbols.h index 11e791c7..34281c30 100644 --- a/src/compiler/prepare_grammar/intern_symbols.h +++ b/src/compiler/prepare_grammar/intern_symbols.h @@ -4,13 +4,13 @@ #include #include #include "tree_sitter/compiler.h" +#include "compiler/prepare_grammar/interned_grammar.h" namespace tree_sitter { class Grammar; - class PreparedGrammar; namespace prepare_grammar { - std::pair intern_symbols(const Grammar &); + std::pair intern_symbols(const Grammar &); } } diff --git a/src/compiler/prepare_grammar/interned_grammar.h b/src/compiler/prepare_grammar/interned_grammar.h new file mode 100644 index 00000000..f4919aad --- /dev/null +++ b/src/compiler/prepare_grammar/interned_grammar.h @@ -0,0 +1,21 @@ +#ifndef COMPILER_PREPARE_GRAMMAR_INTERNED_GRAMMAR_H_ +#define COMPILER_PREPARE_GRAMMAR_INTERNED_GRAMMAR_H_ + +#include +#include +#include +#include "tree_sitter/compiler.h" +#include "compiler/rules/symbol.h" + +namespace tree_sitter { + namespace prepare_grammar { + class InternedGrammar { + public: + std::vector> rules; + std::vector ubiquitous_tokens; + std::vector separators; + }; + } +} + +#endif // COMPILER_PREPARE_GRAMMAR_INTERNED_GRAMMAR_H_ diff --git a/src/compiler/prepare_grammar/prepare_grammar.cc b/src/compiler/prepare_grammar/prepare_grammar.cc index 4fb75ea3..12a42a33 100644 --- a/src/compiler/prepare_grammar/prepare_grammar.cc +++ b/src/compiler/prepare_grammar/prepare_grammar.cc @@ -4,29 +4,31 @@ #include "compiler/prepare_grammar/expand_repeats.h" #include "compiler/prepare_grammar/expand_tokens.h" #include "compiler/prepare_grammar/intern_symbols.h" +#include "compiler/prepare_grammar/interned_grammar.h" +#include "compiler/prepared_grammar.h" namespace tree_sitter { using std::tuple; using std::make_tuple; namespace prepare_grammar { - tuple + tuple prepare_grammar(const Grammar &input_grammar) { auto result = intern_symbols(input_grammar); - const PreparedGrammar &grammar = result.first; + const InternedGrammar &grammar = result.first; const GrammarError *error = result.second; if (error) - return make_tuple(PreparedGrammar(), PreparedGrammar(), error); + return make_tuple(SyntaxGrammar(), LexicalGrammar(), error); auto grammars = extract_tokens(grammar); - const PreparedGrammar &rule_grammar = expand_repeats(grammars.first); + const SyntaxGrammar &rule_grammar = expand_repeats(grammars.first); auto expand_tokens_result = expand_tokens(grammars.second); - const PreparedGrammar &lex_grammar = expand_tokens_result.first; + const LexicalGrammar &lex_grammar = expand_tokens_result.first; error = expand_tokens_result.second; if (error) - return make_tuple(PreparedGrammar(), PreparedGrammar(), error); + return make_tuple(SyntaxGrammar(), LexicalGrammar(), error); return make_tuple(rule_grammar, lex_grammar, nullptr); } diff --git a/src/compiler/prepare_grammar/prepare_grammar.h b/src/compiler/prepare_grammar/prepare_grammar.h index d21317e8..7c382be9 100644 --- a/src/compiler/prepare_grammar/prepare_grammar.h +++ b/src/compiler/prepare_grammar/prepare_grammar.h @@ -2,14 +2,14 @@ #define COMPILER_PREPARE_GRAMMAR_PREPARE_GRAMMAR_H_ #include +#include "compiler/prepared_grammar.h" namespace tree_sitter { class Grammar; class GrammarError; - class PreparedGrammar; namespace prepare_grammar { - std::tuple + std::tuple prepare_grammar(const Grammar &); } } diff --git a/src/compiler/prepared_grammar.cc b/src/compiler/prepared_grammar.cc index b3c50819..84c64723 100644 --- a/src/compiler/prepared_grammar.cc +++ b/src/compiler/prepared_grammar.cc @@ -7,98 +7,41 @@ namespace tree_sitter { using std::string; using std::pair; - using std::ostream; using std::vector; - using rules::rule_ptr; - using rules::Symbol; - PreparedGrammar::PreparedGrammar() : - rules_({}), - aux_rules_({}), - ubiquitous_tokens_({}) {} - - PreparedGrammar::PreparedGrammar(const std::vector> &rules, - const std::vector> &aux_rules) : - rules_(rules), - aux_rules_(aux_rules), - ubiquitous_tokens_({}) {} - - const rule_ptr & PreparedGrammar::rule(const Symbol &symbol) const { + const rules::rule_ptr & PreparedGrammar::rule(const rules::Symbol &symbol) const { return symbol.is_auxiliary() ? - aux_rules_[symbol.index].second : - rules_[symbol.index].second; + aux_rules[symbol.index].second : + rules[symbol.index].second; } - const string & PreparedGrammar::rule_name(const Symbol &symbol) const { + const string & PreparedGrammar::rule_name(const rules::Symbol &symbol) const { return symbol.is_auxiliary() ? - aux_rules_[symbol.index].first : - rules_[symbol.index].first; + aux_rules[symbol.index].first : + rules[symbol.index].first; } - bool PreparedGrammar::operator==(const PreparedGrammar &other) const { - if (other.rules_.size() != rules_.size()) return false; + PreparedGrammar::PreparedGrammar() {} + SyntaxGrammar::SyntaxGrammar() {} + LexicalGrammar::LexicalGrammar() {} - for (size_t i = 0; i < rules_.size(); i++) { - auto &pair = rules_[i]; - auto &other_pair = other.rules_[i]; - if (other_pair.first != pair.first) return false; - if (!other_pair.second->operator==(*pair.second)) return false; - } + PreparedGrammar::PreparedGrammar( + const vector> &rules, + const vector> &aux_rules) : + rules(rules), + aux_rules(aux_rules) {} - if (other.aux_rules_.size() != aux_rules_.size()) return false; - for (size_t i = 0; i < aux_rules_ - .size(); i++) { - auto &pair = aux_rules_[i]; - auto &other_pair = other.aux_rules_[i]; - if (other_pair.first != pair.first) return false; - if (!other_pair.second->operator==(*pair.second)) return false; - } + SyntaxGrammar::SyntaxGrammar( + const vector> &rules, + const vector> &aux_rules, + const vector &ubiquitous_tokens) : + PreparedGrammar(rules, aux_rules), + ubiquitous_tokens(ubiquitous_tokens) {} - return true; - } - - const vector> & PreparedGrammar::rules() const { - return rules_; - } - - const vector> & PreparedGrammar::aux_rules() const { - return aux_rules_; - } - - const vector & PreparedGrammar::ubiquitous_tokens() const { - return ubiquitous_tokens_; - } - - const PreparedGrammar & PreparedGrammar::ubiquitous_tokens(const vector &ubiquitous_tokens) { - ubiquitous_tokens_ = ubiquitous_tokens; - return *this; - } - - ostream& operator<<(ostream &stream, const PreparedGrammar &grammar) { - stream << string("# "); - stream << pair.second; - started = true; - } - stream << string("}"); - - stream << string(" aux_rules: {"); - started = false; - for (auto pair : grammar.aux_rules()) { - if (started) stream << string(", "); - stream << pair.first; - stream << string(" => "); - stream << pair.second; - started = true; - } - stream << string("}"); - - return stream << string(">"); - } + LexicalGrammar::LexicalGrammar( + const vector> &rules, + const vector> &aux_rules, + const vector &separators) : + PreparedGrammar(rules, aux_rules), + separators(separators) {} } diff --git a/src/compiler/prepared_grammar.h b/src/compiler/prepared_grammar.h index 78f2baf0..6ae727e9 100644 --- a/src/compiler/prepared_grammar.h +++ b/src/compiler/prepared_grammar.h @@ -9,25 +9,40 @@ namespace tree_sitter { class PreparedGrammar { - const std::vector> rules_; - const std::vector> aux_rules_; - std::vector ubiquitous_tokens_; - public: PreparedGrammar(); - PreparedGrammar(const std::vector> &rules, - const std::vector> &aux_rules); + PreparedGrammar( + const std::vector> &rules, + const std::vector> &aux_rules); + + const std::vector> rules; + const std::vector> aux_rules; - bool operator==(const PreparedGrammar &other) const; const std::string & rule_name(const rules::Symbol &symbol) const; const rules::rule_ptr & rule(const rules::Symbol &symbol) const; - const std::vector & ubiquitous_tokens() const; - const PreparedGrammar & ubiquitous_tokens(const std::vector &ubiquitous_tokens); - const std::vector> & rules() const; - const std::vector> & aux_rules() const; }; - std::ostream& operator<<(std::ostream &stream, const PreparedGrammar &grammar); + class SyntaxGrammar : public PreparedGrammar { + public: + SyntaxGrammar(); + SyntaxGrammar( + const std::vector> &rules, + const std::vector> &aux_rules, + const std::vector &ubiquitous_tokens); + + std::vector ubiquitous_tokens; + }; + + class LexicalGrammar : public PreparedGrammar { + public: + LexicalGrammar(); + LexicalGrammar( + const std::vector> &rules, + const std::vector> &aux_rules, + const std::vector &separators); + + std::vector separators; + }; } #endif // COMPILER_PREPARED_GRAMMAR_H_