Make separate types for syntax and lexical grammars

This way, the separator characters can be added as a field to lexical grammars only
2014-06-25 13:27:16 -07:00 · 2014-06-25 13:27:16 -07:00 · 7df35f9b8d
commit 7df35f9b8d
parent d5674d33c4
49 changed files with 467 additions and 395 deletions
--- a/examples/grammars/javascript.cc
+++ b/examples/grammars/javascript.cc
@ -191,5 +191,7 @@ namespace tree_sitter_examples {
        { "null", keyword("null") },
        { "true", keyword("true") },
        { "false", keyword("false") },
-    }).ubiquitous_tokens({ "comment" });
+    })
+        .ubiquitous_tokens({ "comment" })
+        .separators({ ' ', '\t', '\r' });
 }
--- a/include/tree_sitter/compiler.h
+++ b/include/tree_sitter/compiler.h
@ -30,6 +30,7 @@ namespace tree_sitter {
    protected:
        const std::vector<std::pair<std::string, rules::rule_ptr>> rules_;
        std::vector<std::string> ubiquitous_tokens_;
+        std::vector<char> separators_;

    public:
        Grammar(const std::vector<std::pair<std::string, rules::rule_ptr>> &rules);
@ -37,9 +38,11 @@ namespace tree_sitter {
        std::string start_rule_name() const;
        const rules::rule_ptr rule(const std::string &name) const;

-        const std::vector<std::string> & ubiquitous_tokens() const;
-        const Grammar & ubiquitous_tokens(const std::vector<std::string> &ubiquitous_tokens);
        const std::vector<std::pair<std::string, rules::rule_ptr>> & rules() const;
+        const std::vector<std::string> & ubiquitous_tokens() const;
+        Grammar & ubiquitous_tokens(const std::vector<std::string> &ubiquitous_tokens);
+        const std::vector<char> & separators() const;
+        Grammar & separators(const std::vector<char> &separators);
    };

    struct Conflict {
--- a/spec/compiler/build_tables/build_parse_table_spec.cc
+++ b/spec/compiler/build_tables/build_parse_table_spec.cc
@ -10,16 +10,16 @@ using namespace build_tables;
 START_TEST

 describe("building parse tables", []() {
-    auto parse_grammar = PreparedGrammar({
+    SyntaxGrammar parse_grammar({
        { "rule0", choice({ i_sym(1), i_sym(2) }) },
        { "rule1", i_token(0) },
        { "rule2", i_token(1) },
-    }, {}).ubiquitous_tokens({ Symbol(2, SymbolOptionToken) });
+    }, {}, { Symbol(2, SymbolOptionToken) });

-    PreparedGrammar lex_grammar({
+    LexicalGrammar lex_grammar({
        { "token0", pattern("[a-c]") },
        { "token1", pattern("[b-d]") },
-    }, {});
+    }, {}, {});

    it("first looks for the start rule and its item set closure", [&]() {
        auto result = build_parse_table(parse_grammar, lex_grammar);
--- a/spec/compiler/build_tables/conflict_manager_spec.cc
+++ b/spec/compiler/build_tables/conflict_manager_spec.cc
@ -1,6 +1,7 @@
 #include "compiler/compiler_spec_helper.h"
 #include "compiler/build_tables/parse_conflict_manager.h"
 #include "compiler/build_tables/lex_conflict_manager.h"
+#include "compiler/prepared_grammar.h"

 using namespace rules;
 using namespace build_tables;
@ -10,16 +11,16 @@ START_TEST
 describe("resolving parse conflicts", []() {
    bool update;

-    PreparedGrammar parse_grammar({
+    SyntaxGrammar parse_grammar({
        { "rule1", seq({ sym("rule2"), sym("token2") }) },
        { "rule2", sym("token1") },
-    }, {});
+    }, {}, {});

-    PreparedGrammar lex_grammar({
+    LexicalGrammar lex_grammar({
        { "token1", pattern("[a-c]") },
        { "token2", pattern("[b-d]") },
        { "token3", keyword("stuff") },
-    }, {});
+    }, {}, {});

    describe("lexical conflicts", [&]() {
        Symbol sym1(0, SymbolOptionToken);
--- a/spec/compiler/build_tables/first_set_spec.cc
+++ b/spec/compiler/build_tables/first_set_spec.cc
@ -10,7 +10,7 @@ using namespace rules;
 START_TEST

 describe("computing FIRST sets", []() {
-    const PreparedGrammar null_grammar({}, {});
+    const SyntaxGrammar null_grammar;

    describe("for a sequence AB", [&]() {
        it("ignores B when A cannot be blank", [&]() {
@ -41,12 +41,12 @@ describe("computing FIRST sets", []() {
                    i_token(1) }),
                i_sym(0) });

-            PreparedGrammar grammar({
+            SyntaxGrammar grammar({
                { "rule0", seq({
                    i_token(2),
                    i_token(3),
                    i_token(4) }) }
-            }, {});
+            }, {}, {});

            AssertThat(first_set(rule, grammar), Equals(set<Symbol>({
                Symbol(0, SymbolOptionToken),
@ -59,11 +59,11 @@ describe("computing FIRST sets", []() {
                i_sym(0),
                i_token(1) });

-            PreparedGrammar grammar({
+            SyntaxGrammar grammar({
                { "rule0", choice({
                    i_token(0),
                    blank() }) }
-            }, {});
+            }, {}, {});

            AssertThat(first_set(rule, grammar), Equals(set<Symbol>({
                Symbol(0, SymbolOptionToken),
@ -74,12 +74,12 @@ describe("computing FIRST sets", []() {

    describe("when there are left-recursive rules", [&]() {
        it("terminates", [&]() {
-            PreparedGrammar grammar({
+            SyntaxGrammar grammar({
                { "rule0", choice({
                    seq({ i_sym(0), i_token(10) }),
                    i_token(11),
                }) },
-            }, {});
+            }, {}, {});

            auto rule = i_sym(0);

--- a/spec/compiler/build_tables/item_set_closure_spec.cc
+++ b/spec/compiler/build_tables/item_set_closure_spec.cc
@ -9,14 +9,14 @@ using namespace rules;
 START_TEST

 describe("computing closures of item sets", []() {
-    PreparedGrammar grammar({
+    SyntaxGrammar grammar({
        { "E", seq({
            i_sym(1),
            i_token(11) }) },
        { "T", seq({
            i_token(12),
            i_token(13) }) },
-    }, {});
+    }, {}, {});

    it("adds items at the beginnings of referenced rules", [&]() {
        ParseItemSet item_set = item_set_closure(ParseItem(Symbol(0), grammar.rule(Symbol(0)), 0),
--- a/spec/compiler/build_tables/item_set_transitions_spec.cc
+++ b/spec/compiler/build_tables/item_set_transitions_spec.cc
@ -8,15 +8,13 @@ using namespace build_tables;
 START_TEST

 describe("lexical item set transitions", []() {
-    PreparedGrammar grammar({}, {});
-
    describe("when two items in the set have transitions on the same character", [&]() {
        it("merges the transitions by computing the union of the two item sets", [&]() {
            LexItemSet set1({
                LexItem(Symbol(1), character({ {'a', 'f'} })),
                LexItem(Symbol(2), character({ {'e', 'x'} })) });

-            AssertThat(char_transitions(set1, grammar), Equals(map<CharacterSet, LexItemSet>({
+            AssertThat(char_transitions(set1), Equals(map<CharacterSet, LexItemSet>({
                { CharacterSet({ {'a', 'd'} }), LexItemSet({
                    LexItem(Symbol(1), blank()) }) },
                { CharacterSet({ {'e', 'f'} }), LexItemSet({
@ -30,10 +28,10 @@ describe("lexical item set transitions", []() {
 });

 describe("syntactic item set transitions", [&]() {
-    PreparedGrammar grammar({
+    SyntaxGrammar grammar({
        { "A", blank() },
        { "B", i_token(21) },
-    }, {});
+    }, {}, {});

    it("computes the closure of the new item sets", [&]() {
        ParseItemSet set1({
--- a/spec/compiler/build_tables/rule_can_be_blank_spec.cc
+++ b/spec/compiler/build_tables/rule_can_be_blank_spec.cc
@ -56,14 +56,14 @@ describe("checking if rules can be blank", [&]() {
    });

    describe("checking recursively (by expanding non-terminals)", [&]() {
-        PreparedGrammar grammar({
+        SyntaxGrammar grammar({
            { "A", choice({
                seq({ i_sym(0), i_token(11) }),
                blank() }) },
            { "B", choice({
                seq({ i_sym(1), i_token(12) }),
                i_token(13) }) },
-        }, {});
+        }, {}, {});

        it("terminates for left-recursive rules that can be blank", [&]() {
            rule = i_sym(0);
--- a/spec/compiler/build_tables/rule_transitions_spec.cc
+++ b/spec/compiler/build_tables/rule_transitions_spec.cc
@ -1,26 +1,11 @@
 #include "compiler/compiler_spec_helper.h"
 #include "compiler/build_tables/rule_transitions.h"
 #include "compiler/rules/metadata.h"
+#include "compiler/helpers/containers.h"

 using namespace rules;
 using namespace build_tables;

-template<typename K>
-class rule_map : public map<K, rule_ptr> {
-public:
-    bool operator==(const map<K, rule_ptr> &other) const {
-        if (this->size() != other.size()) return false;
-        for (const auto &pair : *this) {
-            auto other_pair = other.find(pair.first);
-            if (other_pair == other.end()) return false;
-            if (!pair.second->operator==(*other_pair->second)) return false;
-        }
-        return true;
-    }
-
-    rule_map(const initializer_list<pair<const K, rule_ptr>> &list) : map<K, rule_ptr>(list) {}
-};
-
 START_TEST

 describe("rule transitions", []() {
--- a/spec/compiler/helpers/containers.h
+++ b/spec/compiler/helpers/containers.h
@ -0,0 +1,52 @@
+#ifndef HELPERS_CONTAINERS_H_
+#define HELPERS_CONTAINERS_H_
+
+#include <map>
+#include <vector>
+#include <string>
+#include <initializer_list>
+#include "tree_sitter/compiler.h"
+#include "compiler/rules/rule.h"
+
+using std::map;
+using std::vector;
+using std::string;
+using std::initializer_list;
+using std::pair;
+using tree_sitter::rules::rule_ptr;
+
+template<typename K>
+class rule_map : public map<K, rule_ptr> {
+public:
+    bool operator==(const map<K, rule_ptr> &other) const {
+        if (this->size() != other.size()) return false;
+        for (const auto &pair : *this) {
+            auto other_pair = other.find(pair.first);
+            if (other_pair == other.end()) return false;
+            if (!pair.second->operator==(*other_pair->second)) return false;
+        }
+        return true;
+    }
+
+    rule_map(const initializer_list<pair<const K, rule_ptr>> &list) : map<K, rule_ptr>(list) {}
+};
+
+class rule_list : public vector<pair<string, rule_ptr>> {
+public:
+    bool operator==(const vector<pair<string, rule_ptr>> &other) const {
+        if (this->size() != other.size()) return false;
+        for (size_t i = 0; i < this->size(); i++) {
+            auto pair = this->operator[](i);
+            auto other_pair = other[i];
+            if (!pair.second->operator==(*other_pair.second))
+                return false;
+        }
+        return true;
+    }
+
+    rule_list(const initializer_list<pair<string, rule_ptr>> &list) : 
+        vector<pair<string, rule_ptr>>(list) {}
+};
+
+
+#endif  // HELPERS_CONTAINERS_H_
--- a/spec/compiler/prepare_grammar/expand_repeats_spec.cc
+++ b/spec/compiler/prepare_grammar/expand_repeats_spec.cc
@ -1,6 +1,7 @@
 #include "compiler/compiler_spec_helper.h"
 #include "compiler/prepared_grammar.h"
 #include "compiler/prepare_grammar/expand_repeats.h"
+#include "compiler/helpers/containers.h"

 START_TEST

@ -9,29 +10,33 @@ using prepare_grammar::expand_repeats;

 describe("expanding repeat rules in a grammar", []() {
    it("replaces repeat rules with pairs of recursive rules", [&]() {
-        PreparedGrammar grammar({
+        SyntaxGrammar grammar({
            { "rule0", repeat(i_token(0)) },
-        }, {});
+        }, {}, {});

-        AssertThat(expand_repeats(grammar), Equals(PreparedGrammar({
+        auto match = expand_repeats(grammar);
+
+        AssertThat(match.rules, Equals(rule_list({
            { "rule0", i_aux_sym(0) },
-        }, {
-            { "rule0_repeat0", choice({
-                seq({
-                    i_token(0),
-                    i_aux_sym(0) }),
-                blank() }) },
+        })));
+
+        AssertThat(match.aux_rules, Equals(rule_list({
+            { "rule0_repeat0", choice({ seq({ i_token(0), i_aux_sym(0) }), blank() }) },
        })));
    });

    it("replaces repeats inside of sequences", [&]() {
-        PreparedGrammar grammar({
+        SyntaxGrammar grammar({
            { "rule0", seq({ i_token(10), repeat(i_token(11)) }) },
-        }, {});
+        }, {}, {});

-        AssertThat(expand_repeats(grammar), Equals(PreparedGrammar({
+        auto match = expand_repeats(grammar);
+
+        AssertThat(match.rules, Equals(rule_list({
            { "rule0", seq({ i_token(10), i_aux_sym(0) }) },
-        }, {
+        })));
+
+        AssertThat(match.aux_rules, Equals(rule_list({
            { "rule0_repeat0", choice({
                seq({ i_token(11), i_aux_sym(0) }),
                blank() }) },
@ -39,13 +44,17 @@ describe("expanding repeat rules in a grammar", []() {
    });

    it("replaces repeats inside of choices", [&]() {
-        PreparedGrammar grammar({
+        SyntaxGrammar grammar({
            { "rule0", choice({ i_token(10), repeat(i_token(11)) }) },
-        }, {});
+        }, {}, {});

-        AssertThat(expand_repeats(grammar), Equals(PreparedGrammar({
+        auto match = expand_repeats(grammar);
+
+        AssertThat(match.rules, Equals(rule_list({
            { "rule0", choice({ i_token(10), i_aux_sym(0) }) },
-        }, {
+        })));
+
+        AssertThat(match.aux_rules, Equals(rule_list({
            { "rule0_repeat0", choice({
                seq({ i_token(11), i_aux_sym(0) }),
                blank() }) },
@ -53,13 +62,17 @@ describe("expanding repeat rules in a grammar", []() {
    });

    it("can replace multiple repeats in the same rule", [&]() {
-        PreparedGrammar grammar({
+        SyntaxGrammar grammar({
            { "rule0", seq({ repeat(i_token(10)), repeat(i_token(11)) }) },
-        }, {});
+        }, {}, {});

-        AssertThat(expand_repeats(grammar), Equals(PreparedGrammar({
+        auto match = expand_repeats(grammar);
+
+        AssertThat(match.rules, Equals(rule_list({
            { "rule0", seq({ i_aux_sym(0), i_aux_sym(1) }) },
-        }, {
+        })));
+        
+        AssertThat(match.aux_rules, Equals(rule_list({
            { "rule0_repeat0", choice({
                seq({
                    i_token(10),
@ -74,15 +87,19 @@ describe("expanding repeat rules in a grammar", []() {
    });

    it("can replace repeats in multiple rules", [&]() {
-        PreparedGrammar grammar({
+        SyntaxGrammar grammar({
            { "rule0", repeat(i_token(10)) },
            { "rule1", repeat(i_token(11)) },
-        }, {});
+        }, {}, {});

-        AssertThat(expand_repeats(grammar), Equals(PreparedGrammar({
+        auto match = expand_repeats(grammar);
+
+        AssertThat(match.rules, Equals(rule_list({
            { "rule0", i_aux_sym(0) },
            { "rule1", i_aux_sym(1) },
-        }, {
+        })));
+
+        AssertThat(match.aux_rules, Equals(rule_list({
            { "rule0_repeat0", choice({
                seq({ i_token(10), i_aux_sym(0) }),
                blank() }) },
--- a/spec/compiler/prepare_grammar/expand_tokens_spec.cc
+++ b/spec/compiler/prepare_grammar/expand_tokens_spec.cc
@ -1,5 +1,6 @@
 #include "compiler/compiler_spec_helper.h"
 #include "compiler/prepared_grammar.h"
+#include "compiler/helpers/containers.h"
 #include "compiler/prepare_grammar/expand_tokens.h"

 START_TEST
@ -9,50 +10,50 @@ using prepare_grammar::expand_tokens;

 describe("expanding token rules", []() {
    it("replaces regex patterns with their expansion", [&]() {
-        PreparedGrammar grammar({
+        LexicalGrammar grammar({
            { "rule_A", seq({
                i_sym(10),
                pattern("x*"),
                i_sym(11) }) },
-        }, {});
+        }, {}, {});

        auto result = expand_tokens(grammar);

        AssertThat(result.second, Equals((const GrammarError *)nullptr));
-        AssertThat(result.first, Equals(PreparedGrammar({
+        AssertThat(result.first.rules, Equals(rule_list({
            { "rule_A", seq({
                i_sym(10),
                repeat(character({ 'x' })),
                i_sym(11) }) },
-        }, {})));
+        })));
    });

    it("replaces string rules with a sequence of characters", [&]() {
-        PreparedGrammar grammar({
+        LexicalGrammar grammar({
            { "rule_A", seq({
                i_sym(10),
                str("xyz"),
                i_sym(11) }) },
-        }, {});
+        }, {}, {});

        auto result = expand_tokens(grammar);

        AssertThat(result.second, Equals((const GrammarError *)nullptr));
-        AssertThat(result.first, Equals(PreparedGrammar({
+        AssertThat(result.first.rules, Equals(rule_list({
            { "rule_A", seq({
                i_sym(10),
                seq({ character({ 'x' }), character({ 'y' }), character({ 'z' }) }),
                i_sym(11) }) },
-        }, {})));
+        })));
    });

    it("returns an error when the grammar contains an invalid regex", [&]() {
-        PreparedGrammar grammar({
+        LexicalGrammar grammar({
            { "rule_A", seq({
                pattern("("),
                str("xyz"),
                pattern("[") }) },
-        }, {});
+        }, {}, {});

        auto result = expand_tokens(grammar);

--- a/spec/compiler/prepare_grammar/extract_tokens_spec.cc
+++ b/spec/compiler/prepare_grammar/extract_tokens_spec.cc
@ -1,160 +1,172 @@
 #include "compiler/compiler_spec_helper.h"
 #include "compiler/prepared_grammar.h"
 #include "compiler/prepare_grammar/extract_tokens.h"
+#include "compiler/prepare_grammar/interned_grammar.h"
+#include "compiler/prepared_grammar.h"
+#include "compiler/helpers/containers.h"

 START_TEST

 using namespace rules;
 using prepare_grammar::extract_tokens;
+using prepare_grammar::InternedGrammar;

 describe("extracting tokens from a grammar", []() {
    it("moves string rules into the lexical grammar", [&]() {
-        pair<PreparedGrammar, PreparedGrammar> result = extract_tokens(PreparedGrammar({
-            { "rule_A", seq({ str("ab"), i_sym(0) }) }
-        }, {}));
+        pair<SyntaxGrammar, LexicalGrammar> result = extract_tokens(InternedGrammar{
+            {
+                { "rule_A", seq({ str("ab"), i_sym(0) }) }
+            },
+            {},
+            {}
+        });

-        AssertThat(result.first, Equals(PreparedGrammar({
+        AssertThat(result.first.rules, Equals(rule_list({
            { "rule_A", seq({ i_aux_token(0), i_sym(0) }) }
-        }, {})));
-
-        AssertThat(result.second, Equals(PreparedGrammar({}, {
+        })));
+        AssertThat(result.first.aux_rules, IsEmpty())
+        AssertThat(result.second.rules, IsEmpty())
+        AssertThat(result.second.aux_rules, Equals(rule_list({
            { "'ab'", str("ab") },
        })));
    });

    it("moves pattern rules into the lexical grammar", [&]() {
-        pair<PreparedGrammar, PreparedGrammar> result = extract_tokens(PreparedGrammar({
-            { "rule_A", seq({ pattern("a+"), i_sym(0) }) }
-        }, {}));
+        pair<SyntaxGrammar, LexicalGrammar> result = extract_tokens(InternedGrammar{
+            {
+                { "rule_A", seq({ pattern("a+"), i_sym(0) }) }
+            },
+            {},
+            {}
+        });

-        AssertThat(result.first, Equals(PreparedGrammar({
+        AssertThat(result.first.rules, Equals(rule_list({
            { "rule_A", seq({ i_aux_token(0), i_sym(0) }) }
-        }, {})));
-
-        AssertThat(result.second, Equals(PreparedGrammar({}, {
+        })));
+        AssertThat(result.first.aux_rules, IsEmpty())
+        AssertThat(result.second.rules, IsEmpty())
+        AssertThat(result.second.aux_rules, Equals(rule_list({
            { "/a+/", pattern("a+") },
        })));
    });

    it("moves other rules marked as tokens into the lexical grammar", [&]() {
-        pair<PreparedGrammar, PreparedGrammar> result = extract_tokens(PreparedGrammar({
-            { "rule_A", seq({
-                token(seq({ pattern("."), choice({ str("a"), str("b") }) })),
-                i_sym(0) }) }
-        }, {}));
+        pair<SyntaxGrammar, LexicalGrammar> result = extract_tokens(InternedGrammar{
+            {
+                { "rule_A", seq({
+                    token(seq({ pattern("."), choice({ str("a"), str("b") }) })),
+                    i_sym(0) }) }
+            },
+            {},
+            {}
+        });

-        AssertThat(result.first, Equals(PreparedGrammar({
+        AssertThat(result.first.rules, Equals(rule_list({
            { "rule_A", seq({ i_aux_token(0), i_sym(0) }) }
-        }, {})));
-
-        AssertThat(result.second, Equals(PreparedGrammar({}, {
+        })));
+        AssertThat(result.first.aux_rules, IsEmpty())
+        AssertThat(result.second.rules, IsEmpty())
+        AssertThat(result.second.aux_rules, Equals(rule_list({
            { "(seq /./ (choice 'a' 'b'))", token(seq({ pattern("."), choice({ str("a"), str("b") }) })) },
        })));
    });

    it("does not extract blanks", [&]() {
-        pair<PreparedGrammar, PreparedGrammar> result = extract_tokens(PreparedGrammar({
-            { "rule_A", choice({ i_sym(0), blank() }) },
-        }, {}));
+        pair<SyntaxGrammar, LexicalGrammar> result = extract_tokens(InternedGrammar{
+            {
+                { "rule_A", choice({ i_sym(0), blank() }) },
+            },
+            {},
+            {}
+        });

-        AssertThat(result.first, Equals(PreparedGrammar({
+        AssertThat(result.first.rules, Equals(rule_list({
            { "rule_A", choice({ i_sym(0), blank() }) },
-        }, {})));
-
-        AssertThat(result.second, Equals(PreparedGrammar({}, {})));
+        })));
+        AssertThat(result.first.aux_rules, IsEmpty())
+        AssertThat(result.second.rules, IsEmpty())
+        AssertThat(result.second.aux_rules, IsEmpty())
    });

    it("does not create duplicate tokens in the lexical grammar", [&]() {
-        pair<PreparedGrammar, PreparedGrammar> result = extract_tokens(PreparedGrammar({
-            { "rule_A", seq({ str("ab"), i_sym(0), str("ab") }) },
-        }, {}));
+        pair<SyntaxGrammar, LexicalGrammar> result = extract_tokens(InternedGrammar{
+            {
+                { "rule_A", seq({ str("ab"), i_sym(0), str("ab") }) },
+            },
+            {},
+            {}
+        });

-        AssertThat(result.first, Equals(PreparedGrammar({
+        AssertThat(result.first.rules, Equals(rule_list({
            { "rule_A", seq({ i_aux_token(0), i_sym(0), i_aux_token(0) }) }
-        }, {})));
-
-        AssertThat(result.second, Equals(PreparedGrammar({}, {
+        })));
+        AssertThat(result.first.aux_rules, IsEmpty())
+        AssertThat(result.second.rules, IsEmpty())
+        AssertThat(result.second.aux_rules, Equals(rule_list({
            { "'ab'", str("ab") },
-        })));
-    });
-
-    it("extracts tokens from the grammar's auxiliary rules", [&]() {
-        pair<PreparedGrammar, PreparedGrammar> result = extract_tokens(PreparedGrammar({}, {
-            { "rule_A", seq({ str("ab"), i_sym(0) }) }
-        }));
-
-        AssertThat(result.first, Equals(PreparedGrammar({}, {
-            { "rule_A", seq({ i_aux_token(0), i_sym(0) }) }
-        })));
-
-        AssertThat(result.second, Equals(PreparedGrammar({}, {
-            { "'ab'", str("ab") },
-        })));
+        })))
    });

    describe("when an entire rule can be extracted", [&]() {
        it("moves the rule the lexical grammar when possible and updates referencing symbols", [&]() {
-            auto result = extract_tokens(PreparedGrammar({
-                { "rule_A", i_sym(1) },
-                { "rule_B", pattern("a|b") },
-                { "rule_C", token(seq({ str("a"), str("b") })) },
-            }, {}));
+            auto result = extract_tokens(InternedGrammar{
+                {
+                    { "rule_A", i_sym(1) },
+                    { "rule_B", pattern("a|b") },
+                    { "rule_C", token(seq({ str("a"), str("b") })) },
+                },
+                {},
+                {}
+            });

-            AssertThat(result.first, Equals(PreparedGrammar({
+            AssertThat(result.first.rules, Equals(rule_list({
                { "rule_A", i_token(0) }
-            }, {})));
-
-            AssertThat(result.second, Equals(PreparedGrammar({
+            })));
+            AssertThat(result.first.aux_rules, IsEmpty());
+            AssertThat(result.second.rules, Equals(rule_list({
                { "rule_B", pattern("a|b") },
                { "rule_C", token(seq({ str("a"), str("b") })) },
-            }, {})));
+            })));
+            AssertThat(result.second.aux_rules, IsEmpty());
        });

        it("updates symbols whose indices need to change due to deleted rules", [&]() {
-            auto result = extract_tokens(PreparedGrammar({
-                { "rule_A", str("ab") },
-                { "rule_B", i_sym(0) },
-                { "rule_C", i_sym(1) },
-            }, {}));
+            auto result = extract_tokens(InternedGrammar{
+                {
+                    { "rule_A", str("ab") },
+                    { "rule_B", i_sym(0) },
+                    { "rule_C", i_sym(1) },
+                },
+                {},
+                {}
+            });

-            AssertThat(result.first, Equals(PreparedGrammar({
+            AssertThat(result.first.rules, Equals(rule_list({
                { "rule_B", i_token(0) },
                { "rule_C", i_sym(0) },
-            }, {})));
-
-            AssertThat(result.second, Equals(PreparedGrammar({
+            })));
+            AssertThat(result.first.aux_rules, IsEmpty());
+            AssertThat(result.second.rules, Equals(rule_list({
                { "rule_A", str("ab") },
-            }, {})));
+            })));
+            AssertThat(result.second.aux_rules, IsEmpty());
        });

        it("updates the grammar's ubiquitous_tokens", [&]() {
-            auto result = extract_tokens(PreparedGrammar({
-                { "rule_A", str("ab") },
-                { "rule_B", i_sym(0) },
-                { "rule_C", i_sym(1) },
-            }, {}).ubiquitous_tokens({ Symbol(0) }));
+            auto result = extract_tokens(InternedGrammar{
+                {
+                    { "rule_A", str("ab") },
+                    { "rule_B", i_sym(0) },
+                    { "rule_C", i_sym(1) },
+                },
+                { Symbol(0) },
+                {}
+            });

-            AssertThat(result.first.ubiquitous_tokens(), Equals(vector<Symbol>({
+            AssertThat(result.first.ubiquitous_tokens, Equals(vector<Symbol>({
                { Symbol(0, SymbolOptionToken) }
            })));
        });
-
-        it("extracts entire auxiliary rules", [&]() {
-            auto result = extract_tokens(PreparedGrammar({}, {
-                { "rule_A", str("ab") },
-                { "rule_B", i_aux_sym(0) },
-                { "rule_C", i_aux_sym(1) },
-            }));
-
-            AssertThat(result.first, Equals(PreparedGrammar({}, {
-                { "rule_B", i_aux_token(0) },
-                { "rule_C", i_aux_sym(0) },
-            })));
-
-            AssertThat(result.second, Equals(PreparedGrammar({}, {
-                { "rule_A", str("ab") },
-            })));
-        });
    });
 });

--- a/spec/compiler/prepare_grammar/intern_symbols_spec.cc
+++ b/spec/compiler/prepare_grammar/intern_symbols_spec.cc
@ -3,6 +3,7 @@
 #include "compiler/prepare_grammar/intern_symbols.h"
 #include "compiler/rules/named_symbol.h"
 #include "compiler/rules/symbol.h"
+#include "compiler/helpers/containers.h"

 START_TEST

@ -20,11 +21,11 @@ describe("interning symbols in a grammar", []() {
        auto result = intern_symbols(grammar);

        AssertThat(result.second, Equals((GrammarError *)nullptr));
-        AssertThat(result.first, Equals(PreparedGrammar({
+        AssertThat(result.first.rules, Equals(rule_list({
            { "x", choice({ i_sym(1), i_sym(2) }) },
            { "y", i_sym(2) },
            { "z", str("stuff") },
-        }, {})));
+        })));
    });

    describe("when there are symbols that reference undefined rules", [&]() {
@ -49,10 +50,20 @@ describe("interning symbols in a grammar", []() {
        auto result = intern_symbols(grammar);

        AssertThat(result.second, Equals((GrammarError *)nullptr));
-        AssertThat(result.first.ubiquitous_tokens(), Equals(vector<Symbol>({
+        AssertThat(result.first.ubiquitous_tokens, Equals(vector<Symbol>({
            Symbol(2)
        })));
    });
+
+    it("preserves the grammar's separator character set", [&]() {
+        auto grammar = Grammar({
+            { "z", str("stuff") }
+        }).separators({ 'x', 'y' });
+
+        auto result = intern_symbols(grammar);
+
+        AssertThat(result.first.separators, Equals(vector<char>({ 'x', 'y' })))
+    });
 });

 END_TEST
--- a/src/compiler/build_tables/build_lex_table.cc
+++ b/src/compiler/build_tables/build_lex_table.cc
@ -25,7 +25,7 @@ namespace tree_sitter {

    namespace build_tables {
        class LexTableBuilder {
-            const PreparedGrammar lex_grammar;
+            const LexicalGrammar lex_grammar;
            ParseTable *parse_table;
            LexConflictManager conflict_manager;
            unordered_map<const LexItemSet, LexStateId> lex_state_ids;
@ -65,7 +65,7 @@ namespace tree_sitter {
            }

            void add_advance_actions(const LexItemSet &item_set, LexStateId state_id) {
-                auto transitions = char_transitions(item_set, lex_grammar);
+                auto transitions = char_transitions(item_set);
                for (const auto &transition : transitions) {
                    CharacterSet rule = transition.first;
                    LexItemSet new_item_set = transition.second;
@ -114,7 +114,7 @@ namespace tree_sitter {
            }

        public:
-            LexTableBuilder(ParseTable *parse_table, const PreparedGrammar &lex_grammar) :
+            LexTableBuilder(ParseTable *parse_table, const LexicalGrammar &lex_grammar) :
                lex_grammar(lex_grammar),
                parse_table(parse_table),
                conflict_manager(LexConflictManager(lex_grammar)) {}
@ -129,7 +129,7 @@ namespace tree_sitter {
            }
        };

-        LexTable build_lex_table(ParseTable *parse_table, const PreparedGrammar &lex_grammar) {
+        LexTable build_lex_table(ParseTable *parse_table, const LexicalGrammar &lex_grammar) {
            return LexTableBuilder(parse_table, lex_grammar).build();
        }
    }
--- a/src/compiler/build_tables/build_lex_table.h
+++ b/src/compiler/build_tables/build_lex_table.h
@ -5,12 +5,11 @@
 #include "compiler/lex_table.h"

 namespace tree_sitter {
-    class PreparedGrammar;
+    class LexicalGrammar;
    class ParseTable;

    namespace build_tables {
-        LexTable
-        build_lex_table(ParseTable *parse_table, const PreparedGrammar &lex_grammar);
+        LexTable build_lex_table(ParseTable *parse_table, const LexicalGrammar &lex_grammar);
    }
 }

--- a/src/compiler/build_tables/build_parse_table.cc
+++ b/src/compiler/build_tables/build_parse_table.cc
@ -23,7 +23,7 @@ namespace tree_sitter {

    namespace build_tables {
        class ParseTableBuilder {
-            const PreparedGrammar grammar;
+            const SyntaxGrammar grammar;
            ParseConflictManager conflict_manager;
            unordered_map<const ParseItemSet, ParseStateId> parse_state_ids;
            ParseTable parse_table;
@ -59,7 +59,7 @@ namespace tree_sitter {
            }

            void add_ubiquitous_token_actions(const ParseItemSet &item_set, ParseStateId state_id) {
-                for (const Symbol &symbol : grammar.ubiquitous_tokens()) {
+                for (const Symbol &symbol : grammar.ubiquitous_tokens) {
                    auto &actions = parse_table.states[state_id].actions;
                    if (actions.find(symbol) == actions.end())
                        parse_table.add_action(state_id, symbol, ParseAction::Shift(state_id, { 0 }));
@ -99,7 +99,7 @@ namespace tree_sitter {
            }

        public:
-            ParseTableBuilder(const PreparedGrammar &grammar, const PreparedGrammar &lex_grammar) :
+            ParseTableBuilder(const SyntaxGrammar &grammar, const LexicalGrammar &lex_grammar) :
                grammar(grammar),
                conflict_manager(ParseConflictManager(grammar, lex_grammar)) {}

@ -111,7 +111,7 @@ namespace tree_sitter {
        };

        pair<ParseTable, vector<Conflict>>
-        build_parse_table(const PreparedGrammar &grammar, const PreparedGrammar &lex_grammar) {
+        build_parse_table(const SyntaxGrammar &grammar, const LexicalGrammar &lex_grammar) {
            return ParseTableBuilder(grammar, lex_grammar).build();
        }
    }
--- a/src/compiler/build_tables/build_parse_table.h
+++ b/src/compiler/build_tables/build_parse_table.h
@ -7,11 +7,12 @@
 #include "compiler/parse_table.h"

 namespace tree_sitter {
-    class PreparedGrammar;
+    class SyntaxGrammar;
+    class LexicalGrammar;

    namespace build_tables {
        std::pair<ParseTable, std::vector<Conflict>>
-        build_parse_table(const PreparedGrammar &grammar, const PreparedGrammar &lex_grammar);
+        build_parse_table(const SyntaxGrammar &grammar, const LexicalGrammar &lex_grammar);
    }
 }

--- a/src/compiler/build_tables/build_tables.cc
+++ b/src/compiler/build_tables/build_tables.cc
@ -1,6 +1,7 @@
 #include "compiler/build_tables/build_tables.h"
 #include "compiler/build_tables/build_parse_table.h"
 #include "compiler/build_tables/build_lex_table.h"
+#include "compiler/prepared_grammar.h"

 namespace tree_sitter {
    using std::tuple;
@ -9,8 +10,8 @@ namespace tree_sitter {

    namespace build_tables {
        tuple<ParseTable, LexTable, vector<Conflict>>
-        build_tables(const PreparedGrammar &grammar,
-                     const PreparedGrammar &lex_grammar) {
+        build_tables(const SyntaxGrammar &grammar,
+                     const LexicalGrammar &lex_grammar) {
            auto parse_table_result = build_parse_table(grammar, lex_grammar);
            ParseTable parse_table = parse_table_result.first;
            vector<Conflict> conflicts = parse_table_result.second;
--- a/src/compiler/build_tables/build_tables.h
+++ b/src/compiler/build_tables/build_tables.h
@ -8,12 +8,13 @@
 #include "compiler/lex_table.h"

 namespace tree_sitter {
-    class PreparedGrammar;
+    class SyntaxGrammar;
+    class LexicalGrammar;

    namespace build_tables {
        std::tuple<ParseTable, LexTable, std::vector<Conflict>>
-        build_tables(const PreparedGrammar &grammar,
-                     const PreparedGrammar &lex_grammar);
+        build_tables(const SyntaxGrammar &grammar,
+                     const LexicalGrammar &lex_grammar);
    }
 }

--- a/src/compiler/build_tables/first_set.cc
+++ b/src/compiler/build_tables/first_set.cc
@ -14,11 +14,11 @@ namespace tree_sitter {

    namespace build_tables {
        class FirstSet : public rules::RuleFn<set<Symbol>> {
-            const PreparedGrammar *grammar;
+            const SyntaxGrammar *grammar;
            set<Symbol> visited_symbols;

        public:
-            explicit FirstSet(const PreparedGrammar *grammar) : grammar(grammar) {}
+            explicit FirstSet(const SyntaxGrammar *grammar) : grammar(grammar) {}

            set<Symbol> apply_to(const Symbol *rule) {
                auto insertion_result = visited_symbols.insert(*rule);
@ -54,7 +54,7 @@ namespace tree_sitter {
            }
        };

-        set<Symbol> first_set(const rules::rule_ptr &rule, const PreparedGrammar &grammar) {
+        set<Symbol> first_set(const rules::rule_ptr &rule, const SyntaxGrammar &grammar) {
            return FirstSet(&grammar).apply(rule);
        }
    }
--- a/src/compiler/build_tables/first_set.h
+++ b/src/compiler/build_tables/first_set.h
@ -6,17 +6,17 @@
 #include "compiler/rules/symbol.h"

 namespace tree_sitter {
-    class PreparedGrammar;
+    class SyntaxGrammar;

    namespace build_tables {

        /*
         *  Returns the set of terminal symbols that can appear at
         *  the beginning of a string derivable from a given rule,
-         *  in a given gramamr.
+         *  in a given grammar.
         */
        std::set<rules::Symbol>
-        first_set(const rules::rule_ptr &rule, const PreparedGrammar &grammar);
+        first_set(const rules::rule_ptr &rule, const SyntaxGrammar &grammar);
    }
 }

--- a/src/compiler/build_tables/item_set_closure.cc
+++ b/src/compiler/build_tables/item_set_closure.cc
@ -19,7 +19,7 @@ namespace tree_sitter {
    namespace build_tables {
        const ParseItemSet item_set_closure(const ParseItem &starting_item,
                                            const set<Symbol> &starting_lookahead_symbols,
-                                            const PreparedGrammar &grammar) {
+                                            const SyntaxGrammar &grammar) {
            ParseItemSet result;

            vector<pair<ParseItem, set<Symbol>>> items_to_process = {{starting_item, starting_lookahead_symbols}};
--- a/src/compiler/build_tables/item_set_closure.h
+++ b/src/compiler/build_tables/item_set_closure.h
@ -6,12 +6,12 @@
 #include "compiler/build_tables/parse_item.h"

 namespace tree_sitter {
-    class PreparedGrammar;
+    class SyntaxGrammar;

    namespace build_tables {
        const ParseItemSet item_set_closure(const ParseItem &item,
                                            const std::set<rules::Symbol> &lookahead_symbols,
-                                            const PreparedGrammar &grammar);
+                                            const SyntaxGrammar &grammar);
    }
 }

--- a/src/compiler/build_tables/item_set_transitions.cc
+++ b/src/compiler/build_tables/item_set_transitions.cc
@ -4,6 +4,7 @@
 #include "compiler/build_tables/rule_transitions.h"
 #include "compiler/build_tables/merge_transitions.h"
 #include "compiler/rules/symbol.h"
+#include "compiler/prepared_grammar.h"

 namespace tree_sitter {
    using std::map;
@ -13,7 +14,7 @@ namespace tree_sitter {

    namespace build_tables {
        map<Symbol, ParseItemSet>
-        sym_transitions(const ParseItemSet &item_set, const PreparedGrammar &grammar) {
+        sym_transitions(const ParseItemSet &item_set, const SyntaxGrammar &grammar) {
            map<Symbol, ParseItemSet> result;
            for (const auto &pair : item_set) {
                const ParseItem &item = pair.first;
@ -31,7 +32,7 @@ namespace tree_sitter {
        }

        map<CharacterSet, LexItemSet>
-        char_transitions(const LexItemSet &item_set, const PreparedGrammar &grammar) {
+        char_transitions(const LexItemSet &item_set) {
            map<CharacterSet, LexItemSet> result;
            for (const LexItem &item : item_set) {
                for (auto &transition : char_transitions(item.rule)) {
--- a/src/compiler/build_tables/item_set_transitions.h
+++ b/src/compiler/build_tables/item_set_transitions.h
@ -6,7 +6,7 @@
 #include "compiler/build_tables/parse_item.h"

 namespace tree_sitter {
-    class PreparedGrammar;
+    class SyntaxGrammar;
    namespace rules {
        class CharacterSet;
        class Symbol;
@ -14,10 +14,10 @@ namespace tree_sitter {

    namespace build_tables {
        std::map<rules::Symbol, ParseItemSet>
-        sym_transitions(const ParseItemSet &item_set, const PreparedGrammar &grammar);
+        sym_transitions(const ParseItemSet &item_set, const SyntaxGrammar &grammar);

        std::map<rules::CharacterSet, LexItemSet>
-        char_transitions(const LexItemSet &item_set, const PreparedGrammar &grammar);
+        char_transitions(const LexItemSet &item_set);
    }
 }

--- a/src/compiler/build_tables/lex_conflict_manager.cc
+++ b/src/compiler/build_tables/lex_conflict_manager.cc
@ -4,6 +4,7 @@
 #include <string>
 #include <set>
 #include "compiler/util/string_helpers.h"
+#include "compiler/prepared_grammar.h"

 namespace tree_sitter {
    namespace build_tables {
@ -13,7 +14,7 @@ namespace tree_sitter {
        using std::set;
        using std::vector;

-        LexConflictManager::LexConflictManager(const PreparedGrammar &grammar) :
+        LexConflictManager::LexConflictManager(const LexicalGrammar &grammar) :
            grammar(grammar) {}

        bool LexConflictManager::resolve_lex_action(const LexAction &old_action,
--- a/src/compiler/build_tables/lex_conflict_manager.h
+++ b/src/compiler/build_tables/lex_conflict_manager.h
@ -8,10 +8,10 @@
 namespace tree_sitter {
    namespace build_tables {
        class LexConflictManager {
-            const PreparedGrammar grammar;
+            const LexicalGrammar grammar;

        public:
-            explicit LexConflictManager(const PreparedGrammar &grammar);
+            explicit LexConflictManager(const LexicalGrammar &grammar);
            bool resolve_lex_action(const LexAction &old_action,
                                    const LexAction &new_action);
        };
--- a/src/compiler/build_tables/parse_conflict_manager.cc
+++ b/src/compiler/build_tables/parse_conflict_manager.cc
@ -4,6 +4,7 @@
 #include <string>
 #include <set>
 #include "compiler/util/string_helpers.h"
+#include "compiler/prepared_grammar.h"

 namespace tree_sitter {
    namespace build_tables {
@ -13,8 +14,8 @@ namespace tree_sitter {
        using std::set;
        using std::vector;

-        ParseConflictManager::ParseConflictManager(const PreparedGrammar &parse_grammar,
-                                                   const PreparedGrammar &lex_grammar) :
+        ParseConflictManager::ParseConflictManager(const SyntaxGrammar &parse_grammar,
+                                                   const LexicalGrammar &lex_grammar) :
            parse_grammar(parse_grammar),
            lex_grammar(lex_grammar) {}

@ -87,7 +88,7 @@ namespace tree_sitter {
            return precedences + ")";
        }

-        string message_for_action(const ParseAction &action, const PreparedGrammar &parse_grammar) {
+        string message_for_action(const ParseAction &action, const SyntaxGrammar &parse_grammar) {
            switch (action.type) {
                case ParseActionTypeShift:
                    return "shift " + precedence_string(action);
--- a/src/compiler/build_tables/parse_conflict_manager.h
+++ b/src/compiler/build_tables/parse_conflict_manager.h
@ -13,13 +13,13 @@
 namespace tree_sitter {
    namespace build_tables {
        class ParseConflictManager {
-            const PreparedGrammar parse_grammar;
-            const PreparedGrammar lex_grammar;
+            const SyntaxGrammar parse_grammar;
+            const LexicalGrammar lex_grammar;
            std::set<Conflict> conflicts_;

        public:
-            ParseConflictManager(const PreparedGrammar &parse_grammar,
-                                 const PreparedGrammar &lex_grammar);
+            ParseConflictManager(const SyntaxGrammar &parse_grammar,
+                                 const LexicalGrammar &lex_grammar);
            bool resolve_parse_action(const rules::Symbol &symbol,
                                      const ParseAction &old_action,
                                      const ParseAction &new_action);
--- a/src/compiler/build_tables/rule_can_be_blank.cc
+++ b/src/compiler/build_tables/rule_can_be_blank.cc
@ -39,13 +39,13 @@ namespace tree_sitter  {
        };

        class CanBeBlankRecursive : public CanBeBlank {
-            const PreparedGrammar *grammar;
+            const SyntaxGrammar *grammar;
            set<rules::Symbol> visited_symbols;
            using CanBeBlank::visit;

        public:
            using CanBeBlank::apply_to;
-            explicit CanBeBlankRecursive(const PreparedGrammar *grammar) : grammar(grammar) {}
+            explicit CanBeBlankRecursive(const SyntaxGrammar *grammar) : grammar(grammar) {}

            bool apply_to(const rules::Symbol *rule) {
                if (visited_symbols.find(*rule) == visited_symbols.end()) {
@ -61,7 +61,7 @@ namespace tree_sitter  {
            return CanBeBlank().apply(rule);
        }

-        bool rule_can_be_blank(const rules::rule_ptr &rule, const PreparedGrammar &grammar) {
+        bool rule_can_be_blank(const rules::rule_ptr &rule, const SyntaxGrammar &grammar) {
            return CanBeBlankRecursive(&grammar).apply(rule);
        }
    }
--- a/src/compiler/build_tables/rule_can_be_blank.h
+++ b/src/compiler/build_tables/rule_can_be_blank.h
@ -4,11 +4,11 @@
 #include "tree_sitter/compiler.h"

 namespace tree_sitter {
-    class PreparedGrammar;
+    class SyntaxGrammar;

    namespace build_tables {
        bool rule_can_be_blank(const rules::rule_ptr &rule);
-        bool rule_can_be_blank(const rules::rule_ptr &rule, const PreparedGrammar &grammar);
+        bool rule_can_be_blank(const rules::rule_ptr &rule, const SyntaxGrammar &grammar);
    }
 }

--- a/src/compiler/compile.cc
+++ b/src/compiler/compile.cc
@ -14,8 +14,8 @@ namespace tree_sitter {
    tuple<string, vector<Conflict>, const GrammarError *>
    compile(const Grammar &grammar, std::string name) {
        auto prepare_grammar_result = prepare_grammar::prepare_grammar(grammar);
-        const PreparedGrammar &syntax_grammar = get<0>(prepare_grammar_result);
-        const PreparedGrammar &lexical_grammar = get<1>(prepare_grammar_result);
+        const SyntaxGrammar &syntax_grammar = get<0>(prepare_grammar_result);
+        const LexicalGrammar &lexical_grammar = get<1>(prepare_grammar_result);
        const GrammarError *error = get<2>(prepare_grammar_result);

        if (error)
--- a/src/compiler/generate_code/c_code.cc
+++ b/src/compiler/generate_code/c_code.cc
@ -27,16 +27,16 @@ namespace tree_sitter {
            const string name;
            const ParseTable parse_table;
            const LexTable lex_table;
-            const PreparedGrammar syntax_grammar;
-            const PreparedGrammar lexical_grammar;
+            const SyntaxGrammar syntax_grammar;
+            const LexicalGrammar lexical_grammar;
            map<string, string> sanitized_names;

        public:
            CCodeGenerator(string name,
                           const ParseTable &parse_table,
                           const LexTable &lex_table,
-                           const PreparedGrammar &syntax_grammar,
-                           const PreparedGrammar &lexical_grammar) :
+                           const SyntaxGrammar &syntax_grammar,
+                           const LexicalGrammar &lexical_grammar) :
                indent_level(0),
                name(name),
                parse_table(parse_table),
@ -107,7 +107,7 @@ namespace tree_sitter {
            void ubiquitous_symbols_list() {
                line("UBIQUITOUS_SYMBOLS = {");
                indent([&]() {
-                    for (auto &symbol : syntax_grammar.ubiquitous_tokens())
+                    for (auto &symbol : syntax_grammar.ubiquitous_tokens)
                        line("[" + symbol_id(symbol) + "] = 1,");
                });
                line("};");
@ -118,7 +118,7 @@ namespace tree_sitter {
                line("HIDDEN_SYMBOLS = {");
                indent([&]() {
                    for (auto &symbol : parse_table.symbols)
-                        if (!symbol.is_built_in() && (symbol.is_auxiliary() || grammar_for_symbol(symbol).rule_name(symbol)[0] == '_'))
+                        if (!symbol.is_built_in() && (symbol.is_auxiliary() || rule_name(symbol)[0] == '_'))
                            line("[" + symbol_id(symbol) + "] = 1,");
                });
                line("};");
@ -178,8 +178,10 @@ namespace tree_sitter {
                line();
            }

-            const PreparedGrammar & grammar_for_symbol(const rules::Symbol &symbol) {
-                return symbol.is_token() ? lexical_grammar : syntax_grammar;
+            string rule_name(const rules::Symbol &symbol) {
+                return symbol.is_token() ?
+                    lexical_grammar.rule_name(symbol) :
+                    syntax_grammar.rule_name(symbol);
            }

            string symbol_id(const rules::Symbol &symbol) {
@ -188,7 +190,7 @@ namespace tree_sitter {
                        "ts_builtin_sym_error" :
                        "ts_builtin_sym_end";
                } else {
-                    string name = sanitize_name(grammar_for_symbol(symbol).rule_name(symbol));
+                    string name = sanitize_name(rule_name(symbol));
                    if (symbol.is_auxiliary())
                        return "ts_aux_sym_" + name;
                    else
@ -238,9 +240,9 @@ namespace tree_sitter {
                if (symbol.is_built_in()) {
                    return (symbol == rules::ERROR()) ? "error" : "end";
                } else if (symbol.is_token() && symbol.is_auxiliary()) {
-                    return grammar_for_symbol(symbol).rule_name(symbol);
+                    return rule_name(symbol);
                } else {
-                    return grammar_for_symbol(symbol).rule_name(symbol);
+                    return rule_name(symbol);
                }
            }

@ -397,8 +399,8 @@ namespace tree_sitter {
        string c_code(string name,
                      const ParseTable &parse_table,
                      const LexTable &lex_table,
-                      const PreparedGrammar &syntax_grammar,
-                      const PreparedGrammar &lexical_grammar) {
+                      const SyntaxGrammar &syntax_grammar,
+                      const LexicalGrammar &lexical_grammar) {
            return CCodeGenerator(name, parse_table, lex_table, syntax_grammar, lexical_grammar).code();
        }
    }
--- a/src/compiler/generate_code/c_code.h
+++ b/src/compiler/generate_code/c_code.h
@ -7,14 +7,15 @@
 #include "compiler/lex_table.h"

 namespace tree_sitter {
-    class PreparedGrammar;
+    class SyntaxGrammar;
+    class LexicalGrammar;

    namespace generate_code {
        std::string c_code(std::string name,
                           const ParseTable &parse_table,
                           const LexTable &lex_table,
-                           const PreparedGrammar &syntax_grammar,
-                           const PreparedGrammar &lexical_grammar);
+                           const SyntaxGrammar &syntax_grammar,
+                           const LexicalGrammar &lexical_grammar);
    }
 }

--- a/src/compiler/grammar.cc
+++ b/src/compiler/grammar.cc
@ -62,11 +62,20 @@ namespace tree_sitter {
        return ubiquitous_tokens_;
    }

-    const Grammar & Grammar::ubiquitous_tokens(const vector<string> &ubiquitous_tokens) {
+    Grammar & Grammar::ubiquitous_tokens(const vector<string> &ubiquitous_tokens) {
        ubiquitous_tokens_ = ubiquitous_tokens;
        return *this;
    }

+    const vector<char> & Grammar::separators() const {
+        return separators_;
+    }
+
+    Grammar & Grammar::separators(const vector<char> &separators) {
+        separators_ = separators;
+        return *this;
+    }
+
    const vector<pair<string, rule_ptr>> & Grammar::rules() const {
        return rules_;
    }
--- a/src/compiler/prepare_grammar/expand_repeats.cc
+++ b/src/compiler/prepare_grammar/expand_repeats.cc
@ -50,17 +50,16 @@ namespace tree_sitter {
            vector<pair<string, rules::rule_ptr>> aux_rules;
        };

-        PreparedGrammar expand_repeats(const PreparedGrammar &grammar) {
-            vector<pair<string, rules::rule_ptr>> rules, aux_rules(grammar.aux_rules());
+        SyntaxGrammar expand_repeats(const SyntaxGrammar &grammar) {
+            vector<pair<string, rules::rule_ptr>> rules, aux_rules(grammar.aux_rules);

-            for (auto &pair : grammar.rules()) {
+            for (auto &pair : grammar.rules) {
                ExpandRepeats expander(pair.first, aux_rules.size());
                rules.push_back({ pair.first, expander.apply(pair.second) });
                aux_rules.insert(aux_rules.end(), expander.aux_rules.begin(), expander.aux_rules.end());
            }

-            return PreparedGrammar(rules, aux_rules).
-                ubiquitous_tokens(grammar.ubiquitous_tokens());
+            return SyntaxGrammar(rules, aux_rules, grammar.ubiquitous_tokens);
        }
    }
 }
--- a/src/compiler/prepare_grammar/expand_repeats.h
+++ b/src/compiler/prepare_grammar/expand_repeats.h
@ -4,10 +4,10 @@
 #include "tree_sitter/compiler.h"

 namespace tree_sitter {
-    class PreparedGrammar;
+    class SyntaxGrammar;

    namespace prepare_grammar {
-        PreparedGrammar expand_repeats(const PreparedGrammar &);
+        SyntaxGrammar expand_repeats(const SyntaxGrammar &);
    }
 }

--- a/src/compiler/prepare_grammar/expand_tokens.cc
+++ b/src/compiler/prepare_grammar/expand_tokens.cc
@ -43,28 +43,29 @@ namespace tree_sitter {
            ExpandTokens() : error(nullptr) {}
        };

-        pair<PreparedGrammar, const GrammarError *>
-        expand_tokens(const PreparedGrammar &grammar) {
+        pair<LexicalGrammar, const GrammarError *>
+        expand_tokens(const LexicalGrammar &grammar) {
            vector<pair<string, rule_ptr>> rules, aux_rules;
            ExpandTokens expander;

-            for (auto &pair : grammar.rules()) {
+            for (auto &pair : grammar.rules) {
                auto rule = expander.apply(pair.second);
                if (expander.error)
-                    return { PreparedGrammar(), expander.error };
+                    return { LexicalGrammar({}, {}, {}), expander.error };
                rules.push_back({ pair.first, rule });
            }

-            for (auto &pair : grammar.aux_rules()) {
+            for (auto &pair : grammar.aux_rules) {
                auto rule = expander.apply(pair.second);
                if (expander.error)
-                    return { PreparedGrammar(), expander.error };
+                    return { LexicalGrammar({}, {}, {}), expander.error };
                aux_rules.push_back({ pair.first, rule });
            }

            return {
-                PreparedGrammar(rules, aux_rules).ubiquitous_tokens(grammar.ubiquitous_tokens()),
-                nullptr };
+                LexicalGrammar(rules, aux_rules, grammar.separators),
+                nullptr,
+            };
        }
    }
 }
--- a/src/compiler/prepare_grammar/expand_tokens.h
+++ b/src/compiler/prepare_grammar/expand_tokens.h
@ -5,11 +5,11 @@
 #include "tree_sitter/compiler.h"

 namespace tree_sitter {
-    class PreparedGrammar;
+    class LexicalGrammar;

    namespace prepare_grammar {
-        std::pair<PreparedGrammar, const GrammarError *>
-        expand_tokens(const PreparedGrammar &);
+        std::pair<LexicalGrammar, const GrammarError *>
+        expand_tokens(const LexicalGrammar &);
    }
 }

--- a/src/compiler/prepare_grammar/extract_tokens.cc
+++ b/src/compiler/prepare_grammar/extract_tokens.cc
@ -9,6 +9,7 @@
 #include "compiler/rules/string.h"
 #include "compiler/rules/metadata.h"
 #include "compiler/rules/pattern.h"
+#include "compiler/prepare_grammar/interned_grammar.h"
 #include "compiler/prepare_grammar/token_description.h"

 namespace tree_sitter {
@ -93,15 +94,15 @@ namespace tree_sitter {
            vector<pair<string, rule_ptr>> tokens;
        };

-        pair<PreparedGrammar, PreparedGrammar> extract_tokens(const PreparedGrammar &input_grammar) {
+        pair<SyntaxGrammar, LexicalGrammar> extract_tokens(const InternedGrammar &input_grammar) {
            vector<pair<string, rule_ptr>> rules, tokens, aux_rules, aux_tokens;
            vector<Symbol> ubiquitous_tokens;

            TokenExtractor extractor;
            map<Symbol, Symbol> symbol_replacements;

-            for (size_t i = 0; i < input_grammar.rules().size(); i++) {
-                auto pair = input_grammar.rules()[i];
+            for (size_t i = 0; i < input_grammar.rules.size(); i++) {
+                auto pair = input_grammar.rules[i];
                if (IsToken().apply(pair.second)) {
                    tokens.push_back(pair);
                    symbol_replacements.insert({
@ -113,32 +114,17 @@ namespace tree_sitter {
                }
            }

-            for (size_t i = 0; i < input_grammar.aux_rules().size(); i++) {
-                auto pair = input_grammar.aux_rules()[i];
-                if (IsToken().apply(pair.second)) {
-                    aux_tokens.push_back(pair);
-                    symbol_replacements.insert({
-                        Symbol(i, rules::SymbolOptionAuxiliary),
-                        Symbol(aux_tokens.size() - 1, rules::SymbolOption(rules::SymbolOptionAuxiliary|rules::SymbolOptionToken))
-                    });
-                } else {
-                    aux_rules.push_back({ pair.first, extractor.apply(pair.second) });
-                }
-            }
-
            aux_tokens.insert(aux_tokens.end(), extractor.tokens.begin(), extractor.tokens.end());

            SymbolInliner inliner(symbol_replacements);
            for (auto &pair : rules)
                pair.second = inliner.apply(pair.second);
-            for (auto &pair : aux_rules)
-                pair.second = inliner.apply(pair.second);
-            for (auto &symbol : input_grammar.ubiquitous_tokens())
+            for (auto &symbol : input_grammar.ubiquitous_tokens)
                ubiquitous_tokens.push_back(inliner.replace_symbol(symbol));

            return {
-                PreparedGrammar(rules, aux_rules).ubiquitous_tokens(ubiquitous_tokens),
-                PreparedGrammar(tokens, aux_tokens)
+                SyntaxGrammar(rules, aux_rules, ubiquitous_tokens),
+                LexicalGrammar(tokens, aux_tokens, {}),
            };
        }
    }
--- a/src/compiler/prepare_grammar/extract_tokens.h
+++ b/src/compiler/prepare_grammar/extract_tokens.h
@ -2,12 +2,14 @@
 #define COMPILER_PREPARE_GRAMMAR_EXTRACT_TOKENS_H_

 #include <utility>
+#include "compiler/prepare_grammar/interned_grammar.h"

 namespace tree_sitter {
-    class PreparedGrammar;
+    class SyntaxGrammar;
+    class LexicalGrammar;

    namespace prepare_grammar {
-        std::pair<PreparedGrammar, PreparedGrammar> extract_tokens(const PreparedGrammar &);
+        std::pair<SyntaxGrammar, LexicalGrammar> extract_tokens(const InternedGrammar &);
    }
 }

--- a/src/compiler/prepare_grammar/intern_symbols.cc
+++ b/src/compiler/prepare_grammar/intern_symbols.cc
@ -2,6 +2,7 @@
 #include <memory>
 #include <vector>
 #include "tree_sitter/compiler.h"
+#include "compiler/prepare_grammar/interned_grammar.h"
 #include "compiler/prepared_grammar.h"
 #include "compiler/rules/visitor.h"
 #include "compiler/rules/named_symbol.h"
@ -37,15 +38,16 @@ namespace tree_sitter {
            string missing_rule_name;
        };

-        pair<PreparedGrammar, const GrammarError *> missing_rule_error(string rule_name) {
+        pair<InternedGrammar, const GrammarError *> missing_rule_error(string rule_name) {
+            InternedGrammar grammar;
            return {
-                PreparedGrammar({}, {}),
+                grammar,
                new GrammarError(GrammarErrorTypeUndefinedSymbol,
                                 "Undefined rule '" + rule_name + "'")
            };
        }

-        pair<PreparedGrammar, const GrammarError *> intern_symbols(const Grammar &grammar) {
+        pair<InternedGrammar, const GrammarError *> intern_symbols(const Grammar &grammar) {
            InternSymbols interner(grammar);
            vector<pair<string, rule_ptr>> rules;

@ -64,10 +66,12 @@ namespace tree_sitter {
                ubiquitous_tokens.push_back(*token);
            }

-            return {
-                PreparedGrammar(rules, {}).ubiquitous_tokens(ubiquitous_tokens),
-                nullptr
-            };
+            InternedGrammar result;
+            result.rules = rules;
+            result.ubiquitous_tokens = ubiquitous_tokens;
+            result.separators = grammar.separators();
+
+            return { result, nullptr };
        }
    }
 }
--- a/src/compiler/prepare_grammar/intern_symbols.h
+++ b/src/compiler/prepare_grammar/intern_symbols.h
@ -4,13 +4,13 @@
 #include <utility>
 #include <string>
 #include "tree_sitter/compiler.h"
+#include "compiler/prepare_grammar/interned_grammar.h"

 namespace tree_sitter {
    class Grammar;
-    class PreparedGrammar;

    namespace prepare_grammar {
-        std::pair<PreparedGrammar, const GrammarError *> intern_symbols(const Grammar &);
+        std::pair<InternedGrammar, const GrammarError *> intern_symbols(const Grammar &);
    }
 }

--- a/src/compiler/prepare_grammar/interned_grammar.h
+++ b/src/compiler/prepare_grammar/interned_grammar.h
@ -0,0 +1,21 @@
+#ifndef COMPILER_PREPARE_GRAMMAR_INTERNED_GRAMMAR_H_
+#define COMPILER_PREPARE_GRAMMAR_INTERNED_GRAMMAR_H_
+
+#include <utility>
+#include <vector>
+#include <string>
+#include "tree_sitter/compiler.h"
+#include "compiler/rules/symbol.h"
+
+namespace tree_sitter {
+    namespace prepare_grammar {
+        class InternedGrammar {
+        public:
+            std::vector<std::pair<std::string, rules::rule_ptr>> rules;
+            std::vector<rules::Symbol> ubiquitous_tokens;
+            std::vector<char> separators;
+        };
+    }
+}
+
+#endif  // COMPILER_PREPARE_GRAMMAR_INTERNED_GRAMMAR_H_
--- a/src/compiler/prepare_grammar/prepare_grammar.cc
+++ b/src/compiler/prepare_grammar/prepare_grammar.cc
@ -4,29 +4,31 @@
 #include "compiler/prepare_grammar/expand_repeats.h"
 #include "compiler/prepare_grammar/expand_tokens.h"
 #include "compiler/prepare_grammar/intern_symbols.h"
+#include "compiler/prepare_grammar/interned_grammar.h"
+#include "compiler/prepared_grammar.h"

 namespace tree_sitter {
    using std::tuple;
    using std::make_tuple;

    namespace prepare_grammar {
-        tuple<PreparedGrammar, PreparedGrammar, const GrammarError *>
+        tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *>
        prepare_grammar(const Grammar &input_grammar) {
            auto result = intern_symbols(input_grammar);
-            const PreparedGrammar &grammar = result.first;
+            const InternedGrammar &grammar = result.first;
            const GrammarError *error = result.second;

            if (error)
-                return make_tuple(PreparedGrammar(), PreparedGrammar(), error);
+                return make_tuple(SyntaxGrammar(), LexicalGrammar(), error);

            auto grammars = extract_tokens(grammar);
-            const PreparedGrammar &rule_grammar = expand_repeats(grammars.first);
+            const SyntaxGrammar &rule_grammar = expand_repeats(grammars.first);
            auto expand_tokens_result = expand_tokens(grammars.second);
-            const PreparedGrammar &lex_grammar = expand_tokens_result.first;
+            const LexicalGrammar &lex_grammar = expand_tokens_result.first;
            error = expand_tokens_result.second;

            if (error)
-                return make_tuple(PreparedGrammar(), PreparedGrammar(), error);
+                return make_tuple(SyntaxGrammar(), LexicalGrammar(), error);

            return make_tuple(rule_grammar, lex_grammar, nullptr);
        }
--- a/src/compiler/prepare_grammar/prepare_grammar.h
+++ b/src/compiler/prepare_grammar/prepare_grammar.h
@ -2,14 +2,14 @@
 #define COMPILER_PREPARE_GRAMMAR_PREPARE_GRAMMAR_H_

 #include <utility>
+#include "compiler/prepared_grammar.h"

 namespace tree_sitter {
    class Grammar;
    class GrammarError;
-    class PreparedGrammar;

    namespace prepare_grammar {
-        std::tuple<PreparedGrammar, PreparedGrammar, const GrammarError *>
+        std::tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *>
        prepare_grammar(const Grammar &);
    }
 }
--- a/src/compiler/prepared_grammar.cc
+++ b/src/compiler/prepared_grammar.cc
@ -7,98 +7,41 @@
 namespace tree_sitter {
    using std::string;
    using std::pair;
-    using std::ostream;
    using std::vector;
-    using rules::rule_ptr;
-    using rules::Symbol;

-    PreparedGrammar::PreparedGrammar() :
-        rules_({}),
-        aux_rules_({}),
-        ubiquitous_tokens_({}) {}
-
-    PreparedGrammar::PreparedGrammar(const std::vector<std::pair<std::string, rules::rule_ptr>> &rules,
-                                     const std::vector<std::pair<std::string, rules::rule_ptr>> &aux_rules) :
-        rules_(rules),
-        aux_rules_(aux_rules),
-        ubiquitous_tokens_({}) {}
-
-    const rule_ptr & PreparedGrammar::rule(const Symbol &symbol) const {
+    const rules::rule_ptr & PreparedGrammar::rule(const rules::Symbol &symbol) const {
        return symbol.is_auxiliary() ?
-            aux_rules_[symbol.index].second :
-            rules_[symbol.index].second;
+            aux_rules[symbol.index].second :
+            rules[symbol.index].second;
    }

-    const string & PreparedGrammar::rule_name(const Symbol &symbol) const {
+    const string & PreparedGrammar::rule_name(const rules::Symbol &symbol) const {
        return symbol.is_auxiliary() ?
-            aux_rules_[symbol.index].first :
-            rules_[symbol.index].first;
+            aux_rules[symbol.index].first :
+            rules[symbol.index].first;
    }

-    bool PreparedGrammar::operator==(const PreparedGrammar &other) const {
-        if (other.rules_.size() != rules_.size()) return false;
+    PreparedGrammar::PreparedGrammar() {}
+    SyntaxGrammar::SyntaxGrammar() {}
+    LexicalGrammar::LexicalGrammar() {}

-        for (size_t i = 0; i < rules_.size(); i++) {
-            auto &pair = rules_[i];
-            auto &other_pair = other.rules_[i];
-            if (other_pair.first != pair.first) return false;
-            if (!other_pair.second->operator==(*pair.second)) return false;
-        }
+    PreparedGrammar::PreparedGrammar(
+        const vector<pair<string, rules::rule_ptr>> &rules,
+        const vector<pair<string, rules::rule_ptr>> &aux_rules) :
+        rules(rules),
+        aux_rules(aux_rules) {}

-        if (other.aux_rules_.size() != aux_rules_.size()) return false;
-        for (size_t i = 0; i < aux_rules_
-             .size(); i++) {
-            auto &pair = aux_rules_[i];
-            auto &other_pair = other.aux_rules_[i];
-            if (other_pair.first != pair.first) return false;
-            if (!other_pair.second->operator==(*pair.second)) return false;
-        }
+    SyntaxGrammar::SyntaxGrammar(
+        const vector<pair<string, rules::rule_ptr>> &rules,
+        const vector<pair<string, rules::rule_ptr>> &aux_rules,
+        const vector<rules::Symbol> &ubiquitous_tokens) :
+        PreparedGrammar(rules, aux_rules),
+        ubiquitous_tokens(ubiquitous_tokens) {}

-        return true;
-    }
-
-    const vector<pair<string, rule_ptr>> & PreparedGrammar::rules() const {
-        return rules_;
-    }
-
-    const vector<pair<string, rule_ptr>> & PreparedGrammar::aux_rules() const {
-        return aux_rules_;
-    }
-
-    const vector<Symbol> & PreparedGrammar::ubiquitous_tokens() const {
-        return ubiquitous_tokens_;
-    }
-
-    const PreparedGrammar & PreparedGrammar::ubiquitous_tokens(const vector<Symbol> &ubiquitous_tokens) {
-        ubiquitous_tokens_ = ubiquitous_tokens;
-        return *this;
-    }
-
-    ostream& operator<<(ostream &stream, const PreparedGrammar &grammar) {
-        stream << string("#<grammar");
-
-        stream << string(" rules: {");
-        bool started = false;
-        for (auto pair : grammar.rules()) {
-            if (started) stream << string(", ");
-            stream << pair.first;
-            stream << string(" => ");
-            stream << pair.second;
-            started = true;
-        }
-        stream << string("}");
-
-        stream << string(" aux_rules: {");
-        started = false;
-        for (auto pair : grammar.aux_rules()) {
-            if (started) stream << string(", ");
-            stream << pair.first;
-            stream << string(" => ");
-            stream << pair.second;
-            started = true;
-        }
-        stream << string("}");
-
-        return stream << string(">");
-    }
+    LexicalGrammar::LexicalGrammar(
+        const vector<pair<string, rules::rule_ptr>> &rules,
+        const vector<pair<string, rules::rule_ptr>> &aux_rules,
+        const vector<char> &separators) :
+        PreparedGrammar(rules, aux_rules),
+        separators(separators) {}
 }
--- a/src/compiler/prepared_grammar.h
+++ b/src/compiler/prepared_grammar.h
@ -9,25 +9,40 @@

 namespace tree_sitter {
    class PreparedGrammar {
-        const std::vector<std::pair<std::string, rules::rule_ptr>> rules_;
-        const std::vector<std::pair<std::string, rules::rule_ptr>> aux_rules_;
-        std::vector<rules::Symbol> ubiquitous_tokens_;
-
    public:
        PreparedGrammar();
-        PreparedGrammar(const std::vector<std::pair<std::string, rules::rule_ptr>> &rules,
-                        const std::vector<std::pair<std::string, rules::rule_ptr>> &aux_rules);
+        PreparedGrammar(
+            const std::vector<std::pair<std::string, rules::rule_ptr>> &rules,
+            const std::vector<std::pair<std::string, rules::rule_ptr>> &aux_rules);
+
+        const std::vector<std::pair<std::string, rules::rule_ptr>> rules;
+        const std::vector<std::pair<std::string, rules::rule_ptr>> aux_rules;

-        bool operator==(const PreparedGrammar &other) const;
        const std::string & rule_name(const rules::Symbol &symbol) const;
        const rules::rule_ptr & rule(const rules::Symbol &symbol) const;
-        const std::vector<rules::Symbol> & ubiquitous_tokens() const;
-        const PreparedGrammar & ubiquitous_tokens(const std::vector<rules::Symbol> &ubiquitous_tokens);
-        const std::vector<std::pair<std::string, rules::rule_ptr>> & rules() const;
-        const std::vector<std::pair<std::string, rules::rule_ptr>> & aux_rules() const;
    };

-    std::ostream& operator<<(std::ostream &stream, const PreparedGrammar &grammar);
+    class SyntaxGrammar : public PreparedGrammar {
+    public:
+        SyntaxGrammar();
+        SyntaxGrammar(
+            const std::vector<std::pair<std::string, rules::rule_ptr>> &rules,
+            const std::vector<std::pair<std::string, rules::rule_ptr>> &aux_rules,
+            const std::vector<rules::Symbol> &ubiquitous_tokens);
+
+        std::vector<rules::Symbol> ubiquitous_tokens;
+    };
+
+    class LexicalGrammar : public PreparedGrammar {
+    public:
+        LexicalGrammar();
+        LexicalGrammar(
+            const std::vector<std::pair<std::string, rules::rule_ptr>> &rules,
+            const std::vector<std::pair<std::string, rules::rule_ptr>> &aux_rules,
+            const std::vector<char> &separators);
+
+        std::vector<char> separators;
+    };
 }

 #endif  // COMPILER_PREPARED_GRAMMAR_H_