Make separate types for syntax and lexical grammars

This way, the separator characters can be added as a field to lexical grammars only
2014-06-25 13:27:16 -07:00 · 2014-06-25 13:27:16 -07:00 · 7df35f9b8d
commit 7df35f9b8d
parent d5674d33c4
49 changed files with 467 additions and 395 deletions
--- a/spec/compiler/build_tables/build_parse_table_spec.cc
+++ b/spec/compiler/build_tables/build_parse_table_spec.cc
@ -10,16 +10,16 @@ using namespace build_tables;
 START_TEST

 describe("building parse tables", []() {
-    auto parse_grammar = PreparedGrammar({
+    SyntaxGrammar parse_grammar({
        { "rule0", choice({ i_sym(1), i_sym(2) }) },
        { "rule1", i_token(0) },
        { "rule2", i_token(1) },
-    }, {}).ubiquitous_tokens({ Symbol(2, SymbolOptionToken) });
+    }, {}, { Symbol(2, SymbolOptionToken) });

-    PreparedGrammar lex_grammar({
+    LexicalGrammar lex_grammar({
        { "token0", pattern("[a-c]") },
        { "token1", pattern("[b-d]") },
-    }, {});
+    }, {}, {});

    it("first looks for the start rule and its item set closure", [&]() {
        auto result = build_parse_table(parse_grammar, lex_grammar);
--- a/spec/compiler/build_tables/conflict_manager_spec.cc
+++ b/spec/compiler/build_tables/conflict_manager_spec.cc
@ -1,6 +1,7 @@
 #include "compiler/compiler_spec_helper.h"
 #include "compiler/build_tables/parse_conflict_manager.h"
 #include "compiler/build_tables/lex_conflict_manager.h"
+#include "compiler/prepared_grammar.h"

 using namespace rules;
 using namespace build_tables;
@ -10,16 +11,16 @@ START_TEST
 describe("resolving parse conflicts", []() {
    bool update;

-    PreparedGrammar parse_grammar({
+    SyntaxGrammar parse_grammar({
        { "rule1", seq({ sym("rule2"), sym("token2") }) },
        { "rule2", sym("token1") },
-    }, {});
+    }, {}, {});

-    PreparedGrammar lex_grammar({
+    LexicalGrammar lex_grammar({
        { "token1", pattern("[a-c]") },
        { "token2", pattern("[b-d]") },
        { "token3", keyword("stuff") },
-    }, {});
+    }, {}, {});

    describe("lexical conflicts", [&]() {
        Symbol sym1(0, SymbolOptionToken);
--- a/spec/compiler/build_tables/first_set_spec.cc
+++ b/spec/compiler/build_tables/first_set_spec.cc
@ -10,7 +10,7 @@ using namespace rules;
 START_TEST

 describe("computing FIRST sets", []() {
-    const PreparedGrammar null_grammar({}, {});
+    const SyntaxGrammar null_grammar;

    describe("for a sequence AB", [&]() {
        it("ignores B when A cannot be blank", [&]() {
@ -41,12 +41,12 @@ describe("computing FIRST sets", []() {
                    i_token(1) }),
                i_sym(0) });

-            PreparedGrammar grammar({
+            SyntaxGrammar grammar({
                { "rule0", seq({
                    i_token(2),
                    i_token(3),
                    i_token(4) }) }
-            }, {});
+            }, {}, {});

            AssertThat(first_set(rule, grammar), Equals(set<Symbol>({
                Symbol(0, SymbolOptionToken),
@ -59,11 +59,11 @@ describe("computing FIRST sets", []() {
                i_sym(0),
                i_token(1) });

-            PreparedGrammar grammar({
+            SyntaxGrammar grammar({
                { "rule0", choice({
                    i_token(0),
                    blank() }) }
-            }, {});
+            }, {}, {});

            AssertThat(first_set(rule, grammar), Equals(set<Symbol>({
                Symbol(0, SymbolOptionToken),
@ -74,12 +74,12 @@ describe("computing FIRST sets", []() {

    describe("when there are left-recursive rules", [&]() {
        it("terminates", [&]() {
-            PreparedGrammar grammar({
+            SyntaxGrammar grammar({
                { "rule0", choice({
                    seq({ i_sym(0), i_token(10) }),
                    i_token(11),
                }) },
-            }, {});
+            }, {}, {});

            auto rule = i_sym(0);

--- a/spec/compiler/build_tables/item_set_closure_spec.cc
+++ b/spec/compiler/build_tables/item_set_closure_spec.cc
@ -9,14 +9,14 @@ using namespace rules;
 START_TEST

 describe("computing closures of item sets", []() {
-    PreparedGrammar grammar({
+    SyntaxGrammar grammar({
        { "E", seq({
            i_sym(1),
            i_token(11) }) },
        { "T", seq({
            i_token(12),
            i_token(13) }) },
-    }, {});
+    }, {}, {});

    it("adds items at the beginnings of referenced rules", [&]() {
        ParseItemSet item_set = item_set_closure(ParseItem(Symbol(0), grammar.rule(Symbol(0)), 0),
--- a/spec/compiler/build_tables/item_set_transitions_spec.cc
+++ b/spec/compiler/build_tables/item_set_transitions_spec.cc
@ -8,15 +8,13 @@ using namespace build_tables;
 START_TEST

 describe("lexical item set transitions", []() {
-    PreparedGrammar grammar({}, {});
-
    describe("when two items in the set have transitions on the same character", [&]() {
        it("merges the transitions by computing the union of the two item sets", [&]() {
            LexItemSet set1({
                LexItem(Symbol(1), character({ {'a', 'f'} })),
                LexItem(Symbol(2), character({ {'e', 'x'} })) });

-            AssertThat(char_transitions(set1, grammar), Equals(map<CharacterSet, LexItemSet>({
+            AssertThat(char_transitions(set1), Equals(map<CharacterSet, LexItemSet>({
                { CharacterSet({ {'a', 'd'} }), LexItemSet({
                    LexItem(Symbol(1), blank()) }) },
                { CharacterSet({ {'e', 'f'} }), LexItemSet({
@ -30,10 +28,10 @@ describe("lexical item set transitions", []() {
 });

 describe("syntactic item set transitions", [&]() {
-    PreparedGrammar grammar({
+    SyntaxGrammar grammar({
        { "A", blank() },
        { "B", i_token(21) },
-    }, {});
+    }, {}, {});

    it("computes the closure of the new item sets", [&]() {
        ParseItemSet set1({
--- a/spec/compiler/build_tables/rule_can_be_blank_spec.cc
+++ b/spec/compiler/build_tables/rule_can_be_blank_spec.cc
@ -56,14 +56,14 @@ describe("checking if rules can be blank", [&]() {
    });

    describe("checking recursively (by expanding non-terminals)", [&]() {
-        PreparedGrammar grammar({
+        SyntaxGrammar grammar({
            { "A", choice({
                seq({ i_sym(0), i_token(11) }),
                blank() }) },
            { "B", choice({
                seq({ i_sym(1), i_token(12) }),
                i_token(13) }) },
-        }, {});
+        }, {}, {});

        it("terminates for left-recursive rules that can be blank", [&]() {
            rule = i_sym(0);
--- a/spec/compiler/build_tables/rule_transitions_spec.cc
+++ b/spec/compiler/build_tables/rule_transitions_spec.cc
@ -1,26 +1,11 @@
 #include "compiler/compiler_spec_helper.h"
 #include "compiler/build_tables/rule_transitions.h"
 #include "compiler/rules/metadata.h"
+#include "compiler/helpers/containers.h"

 using namespace rules;
 using namespace build_tables;

-template<typename K>
-class rule_map : public map<K, rule_ptr> {
-public:
-    bool operator==(const map<K, rule_ptr> &other) const {
-        if (this->size() != other.size()) return false;
-        for (const auto &pair : *this) {
-            auto other_pair = other.find(pair.first);
-            if (other_pair == other.end()) return false;
-            if (!pair.second->operator==(*other_pair->second)) return false;
-        }
-        return true;
-    }
-
-    rule_map(const initializer_list<pair<const K, rule_ptr>> &list) : map<K, rule_ptr>(list) {}
-};
-
 START_TEST

 describe("rule transitions", []() {
--- a/spec/compiler/helpers/containers.h
+++ b/spec/compiler/helpers/containers.h
@ -0,0 +1,52 @@
+#ifndef HELPERS_CONTAINERS_H_
+#define HELPERS_CONTAINERS_H_
+
+#include <map>
+#include <vector>
+#include <string>
+#include <initializer_list>
+#include "tree_sitter/compiler.h"
+#include "compiler/rules/rule.h"
+
+using std::map;
+using std::vector;
+using std::string;
+using std::initializer_list;
+using std::pair;
+using tree_sitter::rules::rule_ptr;
+
+template<typename K>
+class rule_map : public map<K, rule_ptr> {
+public:
+    bool operator==(const map<K, rule_ptr> &other) const {
+        if (this->size() != other.size()) return false;
+        for (const auto &pair : *this) {
+            auto other_pair = other.find(pair.first);
+            if (other_pair == other.end()) return false;
+            if (!pair.second->operator==(*other_pair->second)) return false;
+        }
+        return true;
+    }
+
+    rule_map(const initializer_list<pair<const K, rule_ptr>> &list) : map<K, rule_ptr>(list) {}
+};
+
+class rule_list : public vector<pair<string, rule_ptr>> {
+public:
+    bool operator==(const vector<pair<string, rule_ptr>> &other) const {
+        if (this->size() != other.size()) return false;
+        for (size_t i = 0; i < this->size(); i++) {
+            auto pair = this->operator[](i);
+            auto other_pair = other[i];
+            if (!pair.second->operator==(*other_pair.second))
+                return false;
+        }
+        return true;
+    }
+
+    rule_list(const initializer_list<pair<string, rule_ptr>> &list) : 
+        vector<pair<string, rule_ptr>>(list) {}
+};
+
+
+#endif  // HELPERS_CONTAINERS_H_
--- a/spec/compiler/prepare_grammar/expand_repeats_spec.cc
+++ b/spec/compiler/prepare_grammar/expand_repeats_spec.cc
@ -1,6 +1,7 @@
 #include "compiler/compiler_spec_helper.h"
 #include "compiler/prepared_grammar.h"
 #include "compiler/prepare_grammar/expand_repeats.h"
+#include "compiler/helpers/containers.h"

 START_TEST

@ -9,29 +10,33 @@ using prepare_grammar::expand_repeats;

 describe("expanding repeat rules in a grammar", []() {
    it("replaces repeat rules with pairs of recursive rules", [&]() {
-        PreparedGrammar grammar({
+        SyntaxGrammar grammar({
            { "rule0", repeat(i_token(0)) },
-        }, {});
+        }, {}, {});

-        AssertThat(expand_repeats(grammar), Equals(PreparedGrammar({
+        auto match = expand_repeats(grammar);
+
+        AssertThat(match.rules, Equals(rule_list({
            { "rule0", i_aux_sym(0) },
-        }, {
-            { "rule0_repeat0", choice({
-                seq({
-                    i_token(0),
-                    i_aux_sym(0) }),
-                blank() }) },
+        })));
+
+        AssertThat(match.aux_rules, Equals(rule_list({
+            { "rule0_repeat0", choice({ seq({ i_token(0), i_aux_sym(0) }), blank() }) },
        })));
    });

    it("replaces repeats inside of sequences", [&]() {
-        PreparedGrammar grammar({
+        SyntaxGrammar grammar({
            { "rule0", seq({ i_token(10), repeat(i_token(11)) }) },
-        }, {});
+        }, {}, {});

-        AssertThat(expand_repeats(grammar), Equals(PreparedGrammar({
+        auto match = expand_repeats(grammar);
+
+        AssertThat(match.rules, Equals(rule_list({
            { "rule0", seq({ i_token(10), i_aux_sym(0) }) },
-        }, {
+        })));
+
+        AssertThat(match.aux_rules, Equals(rule_list({
            { "rule0_repeat0", choice({
                seq({ i_token(11), i_aux_sym(0) }),
                blank() }) },
@ -39,13 +44,17 @@ describe("expanding repeat rules in a grammar", []() {
    });

    it("replaces repeats inside of choices", [&]() {
-        PreparedGrammar grammar({
+        SyntaxGrammar grammar({
            { "rule0", choice({ i_token(10), repeat(i_token(11)) }) },
-        }, {});
+        }, {}, {});

-        AssertThat(expand_repeats(grammar), Equals(PreparedGrammar({
+        auto match = expand_repeats(grammar);
+
+        AssertThat(match.rules, Equals(rule_list({
            { "rule0", choice({ i_token(10), i_aux_sym(0) }) },
-        }, {
+        })));
+
+        AssertThat(match.aux_rules, Equals(rule_list({
            { "rule0_repeat0", choice({
                seq({ i_token(11), i_aux_sym(0) }),
                blank() }) },
@ -53,13 +62,17 @@ describe("expanding repeat rules in a grammar", []() {
    });

    it("can replace multiple repeats in the same rule", [&]() {
-        PreparedGrammar grammar({
+        SyntaxGrammar grammar({
            { "rule0", seq({ repeat(i_token(10)), repeat(i_token(11)) }) },
-        }, {});
+        }, {}, {});

-        AssertThat(expand_repeats(grammar), Equals(PreparedGrammar({
+        auto match = expand_repeats(grammar);
+
+        AssertThat(match.rules, Equals(rule_list({
            { "rule0", seq({ i_aux_sym(0), i_aux_sym(1) }) },
-        }, {
+        })));
+        
+        AssertThat(match.aux_rules, Equals(rule_list({
            { "rule0_repeat0", choice({
                seq({
                    i_token(10),
@ -74,15 +87,19 @@ describe("expanding repeat rules in a grammar", []() {
    });

    it("can replace repeats in multiple rules", [&]() {
-        PreparedGrammar grammar({
+        SyntaxGrammar grammar({
            { "rule0", repeat(i_token(10)) },
            { "rule1", repeat(i_token(11)) },
-        }, {});
+        }, {}, {});

-        AssertThat(expand_repeats(grammar), Equals(PreparedGrammar({
+        auto match = expand_repeats(grammar);
+
+        AssertThat(match.rules, Equals(rule_list({
            { "rule0", i_aux_sym(0) },
            { "rule1", i_aux_sym(1) },
-        }, {
+        })));
+
+        AssertThat(match.aux_rules, Equals(rule_list({
            { "rule0_repeat0", choice({
                seq({ i_token(10), i_aux_sym(0) }),
                blank() }) },
--- a/spec/compiler/prepare_grammar/expand_tokens_spec.cc
+++ b/spec/compiler/prepare_grammar/expand_tokens_spec.cc
@ -1,5 +1,6 @@
 #include "compiler/compiler_spec_helper.h"
 #include "compiler/prepared_grammar.h"
+#include "compiler/helpers/containers.h"
 #include "compiler/prepare_grammar/expand_tokens.h"

 START_TEST
@ -9,50 +10,50 @@ using prepare_grammar::expand_tokens;

 describe("expanding token rules", []() {
    it("replaces regex patterns with their expansion", [&]() {
-        PreparedGrammar grammar({
+        LexicalGrammar grammar({
            { "rule_A", seq({
                i_sym(10),
                pattern("x*"),
                i_sym(11) }) },
-        }, {});
+        }, {}, {});

        auto result = expand_tokens(grammar);

        AssertThat(result.second, Equals((const GrammarError *)nullptr));
-        AssertThat(result.first, Equals(PreparedGrammar({
+        AssertThat(result.first.rules, Equals(rule_list({
            { "rule_A", seq({
                i_sym(10),
                repeat(character({ 'x' })),
                i_sym(11) }) },
-        }, {})));
+        })));
    });

    it("replaces string rules with a sequence of characters", [&]() {
-        PreparedGrammar grammar({
+        LexicalGrammar grammar({
            { "rule_A", seq({
                i_sym(10),
                str("xyz"),
                i_sym(11) }) },
-        }, {});
+        }, {}, {});

        auto result = expand_tokens(grammar);

        AssertThat(result.second, Equals((const GrammarError *)nullptr));
-        AssertThat(result.first, Equals(PreparedGrammar({
+        AssertThat(result.first.rules, Equals(rule_list({
            { "rule_A", seq({
                i_sym(10),
                seq({ character({ 'x' }), character({ 'y' }), character({ 'z' }) }),
                i_sym(11) }) },
-        }, {})));
+        })));
    });

    it("returns an error when the grammar contains an invalid regex", [&]() {
-        PreparedGrammar grammar({
+        LexicalGrammar grammar({
            { "rule_A", seq({
                pattern("("),
                str("xyz"),
                pattern("[") }) },
-        }, {});
+        }, {}, {});

        auto result = expand_tokens(grammar);

--- a/spec/compiler/prepare_grammar/extract_tokens_spec.cc
+++ b/spec/compiler/prepare_grammar/extract_tokens_spec.cc
@ -1,160 +1,172 @@
 #include "compiler/compiler_spec_helper.h"
 #include "compiler/prepared_grammar.h"
 #include "compiler/prepare_grammar/extract_tokens.h"
+#include "compiler/prepare_grammar/interned_grammar.h"
+#include "compiler/prepared_grammar.h"
+#include "compiler/helpers/containers.h"

 START_TEST

 using namespace rules;
 using prepare_grammar::extract_tokens;
+using prepare_grammar::InternedGrammar;

 describe("extracting tokens from a grammar", []() {
    it("moves string rules into the lexical grammar", [&]() {
-        pair<PreparedGrammar, PreparedGrammar> result = extract_tokens(PreparedGrammar({
-            { "rule_A", seq({ str("ab"), i_sym(0) }) }
-        }, {}));
+        pair<SyntaxGrammar, LexicalGrammar> result = extract_tokens(InternedGrammar{
+            {
+                { "rule_A", seq({ str("ab"), i_sym(0) }) }
+            },
+            {},
+            {}
+        });

-        AssertThat(result.first, Equals(PreparedGrammar({
+        AssertThat(result.first.rules, Equals(rule_list({
            { "rule_A", seq({ i_aux_token(0), i_sym(0) }) }
-        }, {})));
-
-        AssertThat(result.second, Equals(PreparedGrammar({}, {
+        })));
+        AssertThat(result.first.aux_rules, IsEmpty())
+        AssertThat(result.second.rules, IsEmpty())
+        AssertThat(result.second.aux_rules, Equals(rule_list({
            { "'ab'", str("ab") },
        })));
    });

    it("moves pattern rules into the lexical grammar", [&]() {
-        pair<PreparedGrammar, PreparedGrammar> result = extract_tokens(PreparedGrammar({
-            { "rule_A", seq({ pattern("a+"), i_sym(0) }) }
-        }, {}));
+        pair<SyntaxGrammar, LexicalGrammar> result = extract_tokens(InternedGrammar{
+            {
+                { "rule_A", seq({ pattern("a+"), i_sym(0) }) }
+            },
+            {},
+            {}
+        });

-        AssertThat(result.first, Equals(PreparedGrammar({
+        AssertThat(result.first.rules, Equals(rule_list({
            { "rule_A", seq({ i_aux_token(0), i_sym(0) }) }
-        }, {})));
-
-        AssertThat(result.second, Equals(PreparedGrammar({}, {
+        })));
+        AssertThat(result.first.aux_rules, IsEmpty())
+        AssertThat(result.second.rules, IsEmpty())
+        AssertThat(result.second.aux_rules, Equals(rule_list({
            { "/a+/", pattern("a+") },
        })));
    });

    it("moves other rules marked as tokens into the lexical grammar", [&]() {
-        pair<PreparedGrammar, PreparedGrammar> result = extract_tokens(PreparedGrammar({
-            { "rule_A", seq({
-                token(seq({ pattern("."), choice({ str("a"), str("b") }) })),
-                i_sym(0) }) }
-        }, {}));
+        pair<SyntaxGrammar, LexicalGrammar> result = extract_tokens(InternedGrammar{
+            {
+                { "rule_A", seq({
+                    token(seq({ pattern("."), choice({ str("a"), str("b") }) })),
+                    i_sym(0) }) }
+            },
+            {},
+            {}
+        });

-        AssertThat(result.first, Equals(PreparedGrammar({
+        AssertThat(result.first.rules, Equals(rule_list({
            { "rule_A", seq({ i_aux_token(0), i_sym(0) }) }
-        }, {})));
-
-        AssertThat(result.second, Equals(PreparedGrammar({}, {
+        })));
+        AssertThat(result.first.aux_rules, IsEmpty())
+        AssertThat(result.second.rules, IsEmpty())
+        AssertThat(result.second.aux_rules, Equals(rule_list({
            { "(seq /./ (choice 'a' 'b'))", token(seq({ pattern("."), choice({ str("a"), str("b") }) })) },
        })));
    });

    it("does not extract blanks", [&]() {
-        pair<PreparedGrammar, PreparedGrammar> result = extract_tokens(PreparedGrammar({
-            { "rule_A", choice({ i_sym(0), blank() }) },
-        }, {}));
+        pair<SyntaxGrammar, LexicalGrammar> result = extract_tokens(InternedGrammar{
+            {
+                { "rule_A", choice({ i_sym(0), blank() }) },
+            },
+            {},
+            {}
+        });

-        AssertThat(result.first, Equals(PreparedGrammar({
+        AssertThat(result.first.rules, Equals(rule_list({
            { "rule_A", choice({ i_sym(0), blank() }) },
-        }, {})));
-
-        AssertThat(result.second, Equals(PreparedGrammar({}, {})));
+        })));
+        AssertThat(result.first.aux_rules, IsEmpty())
+        AssertThat(result.second.rules, IsEmpty())
+        AssertThat(result.second.aux_rules, IsEmpty())
    });

    it("does not create duplicate tokens in the lexical grammar", [&]() {
-        pair<PreparedGrammar, PreparedGrammar> result = extract_tokens(PreparedGrammar({
-            { "rule_A", seq({ str("ab"), i_sym(0), str("ab") }) },
-        }, {}));
+        pair<SyntaxGrammar, LexicalGrammar> result = extract_tokens(InternedGrammar{
+            {
+                { "rule_A", seq({ str("ab"), i_sym(0), str("ab") }) },
+            },
+            {},
+            {}
+        });

-        AssertThat(result.first, Equals(PreparedGrammar({
+        AssertThat(result.first.rules, Equals(rule_list({
            { "rule_A", seq({ i_aux_token(0), i_sym(0), i_aux_token(0) }) }
-        }, {})));
-
-        AssertThat(result.second, Equals(PreparedGrammar({}, {
+        })));
+        AssertThat(result.first.aux_rules, IsEmpty())
+        AssertThat(result.second.rules, IsEmpty())
+        AssertThat(result.second.aux_rules, Equals(rule_list({
            { "'ab'", str("ab") },
-        })));
-    });
-
-    it("extracts tokens from the grammar's auxiliary rules", [&]() {
-        pair<PreparedGrammar, PreparedGrammar> result = extract_tokens(PreparedGrammar({}, {
-            { "rule_A", seq({ str("ab"), i_sym(0) }) }
-        }));
-
-        AssertThat(result.first, Equals(PreparedGrammar({}, {
-            { "rule_A", seq({ i_aux_token(0), i_sym(0) }) }
-        })));
-
-        AssertThat(result.second, Equals(PreparedGrammar({}, {
-            { "'ab'", str("ab") },
-        })));
+        })))
    });

    describe("when an entire rule can be extracted", [&]() {
        it("moves the rule the lexical grammar when possible and updates referencing symbols", [&]() {
-            auto result = extract_tokens(PreparedGrammar({
-                { "rule_A", i_sym(1) },
-                { "rule_B", pattern("a|b") },
-                { "rule_C", token(seq({ str("a"), str("b") })) },
-            }, {}));
+            auto result = extract_tokens(InternedGrammar{
+                {
+                    { "rule_A", i_sym(1) },
+                    { "rule_B", pattern("a|b") },
+                    { "rule_C", token(seq({ str("a"), str("b") })) },
+                },
+                {},
+                {}
+            });

-            AssertThat(result.first, Equals(PreparedGrammar({
+            AssertThat(result.first.rules, Equals(rule_list({
                { "rule_A", i_token(0) }
-            }, {})));
-
-            AssertThat(result.second, Equals(PreparedGrammar({
+            })));
+            AssertThat(result.first.aux_rules, IsEmpty());
+            AssertThat(result.second.rules, Equals(rule_list({
                { "rule_B", pattern("a|b") },
                { "rule_C", token(seq({ str("a"), str("b") })) },
-            }, {})));
+            })));
+            AssertThat(result.second.aux_rules, IsEmpty());
        });

        it("updates symbols whose indices need to change due to deleted rules", [&]() {
-            auto result = extract_tokens(PreparedGrammar({
-                { "rule_A", str("ab") },
-                { "rule_B", i_sym(0) },
-                { "rule_C", i_sym(1) },
-            }, {}));
+            auto result = extract_tokens(InternedGrammar{
+                {
+                    { "rule_A", str("ab") },
+                    { "rule_B", i_sym(0) },
+                    { "rule_C", i_sym(1) },
+                },
+                {},
+                {}
+            });

-            AssertThat(result.first, Equals(PreparedGrammar({
+            AssertThat(result.first.rules, Equals(rule_list({
                { "rule_B", i_token(0) },
                { "rule_C", i_sym(0) },
-            }, {})));
-
-            AssertThat(result.second, Equals(PreparedGrammar({
+            })));
+            AssertThat(result.first.aux_rules, IsEmpty());
+            AssertThat(result.second.rules, Equals(rule_list({
                { "rule_A", str("ab") },
-            }, {})));
+            })));
+            AssertThat(result.second.aux_rules, IsEmpty());
        });

        it("updates the grammar's ubiquitous_tokens", [&]() {
-            auto result = extract_tokens(PreparedGrammar({
-                { "rule_A", str("ab") },
-                { "rule_B", i_sym(0) },
-                { "rule_C", i_sym(1) },
-            }, {}).ubiquitous_tokens({ Symbol(0) }));
+            auto result = extract_tokens(InternedGrammar{
+                {
+                    { "rule_A", str("ab") },
+                    { "rule_B", i_sym(0) },
+                    { "rule_C", i_sym(1) },
+                },
+                { Symbol(0) },
+                {}
+            });

-            AssertThat(result.first.ubiquitous_tokens(), Equals(vector<Symbol>({
+            AssertThat(result.first.ubiquitous_tokens, Equals(vector<Symbol>({
                { Symbol(0, SymbolOptionToken) }
            })));
        });
-
-        it("extracts entire auxiliary rules", [&]() {
-            auto result = extract_tokens(PreparedGrammar({}, {
-                { "rule_A", str("ab") },
-                { "rule_B", i_aux_sym(0) },
-                { "rule_C", i_aux_sym(1) },
-            }));
-
-            AssertThat(result.first, Equals(PreparedGrammar({}, {
-                { "rule_B", i_aux_token(0) },
-                { "rule_C", i_aux_sym(0) },
-            })));
-
-            AssertThat(result.second, Equals(PreparedGrammar({}, {
-                { "rule_A", str("ab") },
-            })));
-        });
    });
 });

--- a/spec/compiler/prepare_grammar/intern_symbols_spec.cc
+++ b/spec/compiler/prepare_grammar/intern_symbols_spec.cc
@ -3,6 +3,7 @@
 #include "compiler/prepare_grammar/intern_symbols.h"
 #include "compiler/rules/named_symbol.h"
 #include "compiler/rules/symbol.h"
+#include "compiler/helpers/containers.h"

 START_TEST

@ -20,11 +21,11 @@ describe("interning symbols in a grammar", []() {
        auto result = intern_symbols(grammar);

        AssertThat(result.second, Equals((GrammarError *)nullptr));
-        AssertThat(result.first, Equals(PreparedGrammar({
+        AssertThat(result.first.rules, Equals(rule_list({
            { "x", choice({ i_sym(1), i_sym(2) }) },
            { "y", i_sym(2) },
            { "z", str("stuff") },
-        }, {})));
+        })));
    });

    describe("when there are symbols that reference undefined rules", [&]() {
@ -49,10 +50,20 @@ describe("interning symbols in a grammar", []() {
        auto result = intern_symbols(grammar);

        AssertThat(result.second, Equals((GrammarError *)nullptr));
-        AssertThat(result.first.ubiquitous_tokens(), Equals(vector<Symbol>({
+        AssertThat(result.first.ubiquitous_tokens, Equals(vector<Symbol>({
            Symbol(2)
        })));
    });
+
+    it("preserves the grammar's separator character set", [&]() {
+        auto grammar = Grammar({
+            { "z", str("stuff") }
+        }).separators({ 'x', 'y' });
+
+        auto result = intern_symbols(grammar);
+
+        AssertThat(result.first.separators, Equals(vector<char>({ 'x', 'y' })))
+    });
 });

 END_TEST