In compiler, distinguish between anonymous tokens and hidden rules

2015-09-05 17:05:37 -07:00 · 2015-09-05 17:05:37 -07:00 · 5982b77c97
commit 5982b77c97
parent 4b270c8604
46 changed files with 41131 additions and 40884 deletions
--- a/spec/compiler/build_tables/build_parse_table_spec.cc
+++ b/spec/compiler/build_tables/build_parse_table_spec.cc
@ -1,8 +1,7 @@
 #include "compiler/compiler_spec_helper.h"
 #include "compiler/build_tables/build_parse_table.h"
 #include "compiler/parse_table.h"
-#include "compiler/lexical_grammar.h"
-#include "compiler/syntax_grammar.h"
+#include "compiler/prepared_grammar.h"
 #include "compiler/rules/built_in_symbols.h"

 using namespace rules;
@ -12,15 +11,35 @@ START_TEST

 describe("build_parse_table", []() {
  SyntaxGrammar parse_grammar{{
-    { "rule0", choice({ i_sym(1), i_sym(2) }) },
-    { "rule1", i_token(0) },
-    { "rule2", i_token(1) },
-  }, {}, { Symbol(2, SymbolOptionToken) }, {}};
+    {
+      "rule0",
+      choice({ i_sym(1), i_sym(2) }),
+      RuleEntryTypeNamed
+    },
+    {
+      "rule1",
+      i_token(0),
+      RuleEntryTypeNamed
+    },
+    {
+      "rule2",
+      i_token(1),
+      RuleEntryTypeNamed
+    },
+  }, { Symbol(2, true) }, {}};

  LexicalGrammar lex_grammar{{
-    { "token0", pattern("[a-c]") },
-    { "token1", pattern("[b-d]") },
-  }, {}, {}};
+    {
+      "token0",
+      pattern("[a-c]"),
+      RuleEntryTypeNamed
+    },
+    {
+      "token1",
+      pattern("[b-d]"),
+      RuleEntryTypeNamed
+    },
+  }, {}};

  it("first looks for the start rule and its item set closure", [&]() {
    auto result = build_parse_table(parse_grammar, lex_grammar);
@ -32,11 +51,11 @@ describe("build_parse_table", []() {
      // expanded from the item set closure of the start item
      { Symbol(1), {ParseAction::Shift(2, { 0 })} },
      { Symbol(2), {ParseAction::Shift(2, { 0 })} },
-      { Symbol(0, SymbolOptionToken), {ParseAction::Shift(3, { 0 })} },
-      { Symbol(1, SymbolOptionToken), {ParseAction::Shift(4, { 0 })} },
+      { Symbol(0, true), {ParseAction::Shift(3, { 0 })} },
+      { Symbol(1, true), {ParseAction::Shift(4, { 0 })} },

      // for the ubiquitous_token 'token2'
-      { Symbol(2, SymbolOptionToken), {ParseAction::ShiftExtra()} },
+      { Symbol(2, true), {ParseAction::ShiftExtra()} },
    })));
  });

@ -52,7 +71,7 @@ describe("build_parse_table", []() {
      { END_OF_INPUT(), {ParseAction::Accept()} },

      // for the ubiquitous_token 'token2'
-      { Symbol(2, SymbolOptionToken), {ParseAction::ShiftExtra()} },
+      { Symbol(2, true), {ParseAction::ShiftExtra()} },
    })));
  });

@ -63,7 +82,7 @@ describe("build_parse_table", []() {
      { END_OF_INPUT(), {ParseAction::Reduce(Symbol(0), 1, 0, AssociativityLeft, 0)} },

      // for the ubiquitous_token 'token2'
-      { Symbol(2, SymbolOptionToken), {ParseAction::ShiftExtra()} },
+      { Symbol(2, true), {ParseAction::ShiftExtra()} },
    })));
  });
 });
--- a/spec/compiler/build_tables/first_symbols_spec.cc
+++ b/spec/compiler/build_tables/first_symbols_spec.cc
@ -1,5 +1,5 @@
 #include "compiler/compiler_spec_helper.h"
-#include "compiler/syntax_grammar.h"
+#include "compiler/prepared_grammar.h"
 #include "compiler/build_tables/first_symbols.h"
 #include "compiler/rules/metadata.h"

@ -16,7 +16,7 @@ describe("first_symbols", []() {
      auto rule = seq({ i_token(0), i_token(1) });

      AssertThat(first_symbols(rule, null_grammar), Equals(set<Symbol>({
-        Symbol(0, SymbolOptionToken),
+        Symbol(0, true),
      })));
    });

@ -28,8 +28,8 @@ describe("first_symbols", []() {
        i_token(1) });

      AssertThat(first_symbols(rule, null_grammar), Equals(set<Symbol>({
-        Symbol(0, SymbolOptionToken),
-        Symbol(1, SymbolOptionToken)
+        Symbol(0, true),
+        Symbol(1, true)
      })));
    });

@ -41,16 +41,21 @@ describe("first_symbols", []() {
        i_sym(0) });

      SyntaxGrammar grammar{{
-        { "rule0", seq({
-          i_token(2),
-          i_token(3),
-          i_token(4) }) }
-      }, {}, {}, {}};
+        {
+          "rule0",
+          seq({
+            i_token(2),
+            i_token(3),
+            i_token(4),
+          }),
+          RuleEntryTypeNamed
+        }
+      }, {}, {}};

      AssertThat(first_symbols(rule, grammar), Equals(set<Symbol>({
        Symbol(0),
-        Symbol(0, SymbolOptionToken),
-        Symbol(2, SymbolOptionToken),
+        Symbol(0, true),
+        Symbol(2, true),
      })));
    });

@ -60,15 +65,20 @@ describe("first_symbols", []() {
        i_token(1) });

      SyntaxGrammar grammar{{
-        { "rule0", choice({
-          i_token(0),
-          blank() }) }
-      }, {}, {}, {}};
+        {
+          "rule0",
+          choice({
+            i_token(0),
+            blank(),
+          }),
+          RuleEntryTypeNamed
+        },
+      }, {}, {}};

      AssertThat(first_symbols(rule, grammar), Equals(set<Symbol>({
        Symbol(0),
-        Symbol(0, SymbolOptionToken),
-        Symbol(1, SymbolOptionToken),
+        Symbol(0, true),
+        Symbol(1, true),
      })));
    });
  });
@ -76,17 +86,21 @@ describe("first_symbols", []() {
  describe("when there are left-recursive rules", [&]() {
    it("terminates", [&]() {
      SyntaxGrammar grammar{{
-        { "rule0", choice({
-          seq({ i_sym(0), i_token(10) }),
-          i_token(11),
-        }) },
-      }, {}, {}, {}};
+        {
+          "rule0",
+          choice({
+            seq({ i_sym(0), i_token(10) }),
+            i_token(11),
+          }),
+          RuleEntryTypeNamed
+        },
+      }, {}, {}};

      auto rule = i_sym(0);

      AssertThat(first_symbols(rule, grammar), Equals(set<Symbol>({
        Symbol(0),
-        Symbol(11, SymbolOptionToken)
+        Symbol(11, true)
      })));
    });
  });
@ -95,7 +109,7 @@ describe("first_symbols", []() {
    auto rule = make_shared<Metadata>(i_token(3), map<rules::MetadataKey, int>());

    AssertThat(first_symbols(rule, null_grammar), Equals(set<Symbol>({
-      Symbol(3, SymbolOptionToken),
+      Symbol(3, true),
    })));
  });
 });
--- a/spec/compiler/build_tables/item_set_closure_spec.cc
+++ b/spec/compiler/build_tables/item_set_closure_spec.cc
@ -1,5 +1,5 @@
 #include "compiler/compiler_spec_helper.h"
-#include "compiler/syntax_grammar.h"
+#include "compiler/prepared_grammar.h"
 #include "compiler/build_tables/item_set_closure.h"
 #include "compiler/build_tables/item_set_transitions.h"

@ -10,29 +10,39 @@ START_TEST

 describe("item_set_closure", []() {
  SyntaxGrammar grammar{{
-    { "E", seq({
-      i_sym(1),
-      i_token(11) }) },
-    { "T", seq({
-      i_token(12),
-      i_token(13) }) },
-  }, {}, {}, {}};
+    {
+      "E",
+      seq({
+        i_sym(1),
+        i_token(11),
+      }),
+      RuleEntryTypeNamed,
+    },
+    {
+      "T",
+      seq({
+        i_token(12),
+        i_token(13),
+      }),
+      RuleEntryTypeNamed,
+    },
+  }, {}, {}};

  it("adds items at the beginnings of referenced rules", [&]() {
    ParseItemSet item_set = item_set_closure(
-      ParseItem(Symbol(0), grammar.rule(Symbol(0)), {}),
-      set<Symbol>({ Symbol(10, SymbolOptionToken) }),
+      ParseItem(Symbol(0), grammar.rules[0].rule, {}),
+      set<Symbol>({ Symbol(10, true) }),
      grammar
    );

    AssertThat(item_set, Equals(ParseItemSet({
      {
-        ParseItem(Symbol(1), grammar.rule(Symbol(1)), {}),
-        set<Symbol>({ Symbol(11, SymbolOptionToken) }),
+        ParseItem(Symbol(1), grammar.rules[1].rule, {}),
+        set<Symbol>({ Symbol(11, true) }),
      },
      {
-        ParseItem(Symbol(0), grammar.rule(Symbol(0)), {}),
-        set<Symbol>({ Symbol(10, SymbolOptionToken) }),
+        ParseItem(Symbol(0), grammar.rules[0].rule, {}),
+        set<Symbol>({ Symbol(10, true) }),
      },
    })));
  });
--- a/spec/compiler/build_tables/item_set_transitions_spec.cc
+++ b/spec/compiler/build_tables/item_set_transitions_spec.cc
@ -1,6 +1,6 @@
 #include "compiler/compiler_spec_helper.h"
 #include "compiler/build_tables/item_set_transitions.h"
-#include "compiler/syntax_grammar.h"
+#include "compiler/prepared_grammar.h"
 #include "compiler/helpers/rule_helpers.h"

 using namespace rules;
@ -43,29 +43,37 @@ describe("char_transitions(LexItemSet)", []() {

 describe("sym_transitions(ParseItemSet, SyntaxGrammar)", [&]() {
  SyntaxGrammar grammar{{
-    { "A", blank() },
-    { "B", i_token(21) },
-  }, {}, {}, {}};
+    {
+      "A",
+      blank(),
+      RuleEntryTypeNamed
+    },
+    {
+      "B",
+      i_token(21),
+      RuleEntryTypeNamed
+    },
+  }, {}, {}};

  it("computes the closure of the new item sets", [&]() {
    ParseItemSet set1({
      {
        ParseItem(Symbol(0), seq({ i_token(22), i_sym(1) }), { Symbol(101) }),
-        set<Symbol>({ Symbol(23, SymbolOptionToken) })
+        set<Symbol>({ Symbol(23, true) })
      },
    });

    AssertThat(sym_transitions(set1, grammar), Equals(map<Symbol, ParseItemSet>({
      {
-        Symbol(22, SymbolOptionToken),
+        Symbol(22, true),
        ParseItemSet({
          {
            ParseItem(Symbol(0), i_sym(1), { Symbol(101), Symbol(22) }),
-            set<Symbol>({ Symbol(23, SymbolOptionToken) }),
+            set<Symbol>({ Symbol(23, true) }),
          },
          {
            ParseItem(Symbol(1), i_token(21), {}),
-            set<Symbol>({ Symbol(23, SymbolOptionToken) })
+            set<Symbol>({ Symbol(23, true) })
          },
        })
      },
--- a/spec/compiler/build_tables/lex_conflict_manager_spec.cc
+++ b/spec/compiler/build_tables/lex_conflict_manager_spec.cc
@ -2,7 +2,7 @@
 #include "compiler/rules/built_in_symbols.h"
 #include "compiler/parse_table.h"
 #include "compiler/build_tables/lex_conflict_manager.h"
-#include "compiler/syntax_grammar.h"
+#include "compiler/prepared_grammar.h"

 using namespace rules;
 using namespace build_tables;
@ -11,16 +11,24 @@ START_TEST

 describe("LexConflictManager", []() {
  LexicalGrammar lexical_grammar{{
-    { "other_token", pattern("[a-b]") },
-    { "lookahead_token", pattern("[c-d]") },
-  }, {}, {}};
+    {
+      "other_token",
+      pattern("[a-b]"),
+      RuleEntryTypeNamed
+    },
+    {
+      "lookahead_token",
+      pattern("[c-d]"),
+      RuleEntryTypeNamed
+    },
+  }, {}};

  LexConflictManager conflict_manager(lexical_grammar);

  bool update;
-  Symbol sym1(0, SymbolOptionToken);
-  Symbol sym2(1, SymbolOptionToken);
-  Symbol sym3(2, SymbolOptionToken);
+  Symbol sym1(0, true);
+  Symbol sym2(1, true);
+  Symbol sym3(2, true);

  it("favors non-errors over lexical errors", [&]() {
    update = conflict_manager.resolve(LexAction::Advance(2, {0}), LexAction::Error());
--- a/spec/compiler/build_tables/parse_conflict_manager_spec.cc
+++ b/spec/compiler/build_tables/parse_conflict_manager_spec.cc
@ -2,7 +2,7 @@
 #include "compiler/rules/built_in_symbols.h"
 #include "compiler/parse_table.h"
 #include "compiler/build_tables/parse_conflict_manager.h"
-#include "compiler/syntax_grammar.h"
+#include "compiler/prepared_grammar.h"

 using namespace rules;
 using namespace build_tables;
@ -11,17 +11,37 @@ START_TEST

 describe("ParseConflictManager", []() {
  SyntaxGrammar syntax_grammar{{
-    { "in_progress_rule1", i_token(0) },
-    { "in_progress_rule2", i_token(0) },
-    { "reduced_rule", i_token(0) },
-    { "other_rule1", i_token(0) },
-    { "other_rule2", i_token(0) },
-  }, {}, { Symbol(2, SymbolOptionToken) }, {}};
+    {
+      "in_progress_rule1",
+      i_token(0),
+      RuleEntryTypeNamed,
+    },
+    {
+      "in_progress_rule2",
+      i_token(0),
+      RuleEntryTypeNamed,
+    },
+    {
+      "reduced_rule",
+      i_token(0),
+      RuleEntryTypeNamed,
+    },
+    {
+      "other_rule1",
+      i_token(0),
+      RuleEntryTypeNamed,
+    },
+    {
+      "other_rule2",
+      i_token(0),
+      RuleEntryTypeNamed,
+    },
+  }, { Symbol(2, true) }, {}};

  pair<bool, ConflictType> result;
  Symbol sym1(0);
  Symbol sym2(1);
-  Symbol lookahead_sym(1, SymbolOptionToken);
+  Symbol lookahead_sym(1, true);
  ParseConflictManager *conflict_manager;

  before_each([&]() {
--- a/spec/compiler/build_tables/rule_can_be_blank_spec.cc
+++ b/spec/compiler/build_tables/rule_can_be_blank_spec.cc
@ -1,7 +1,7 @@
 #include "compiler/compiler_spec_helper.h"
 #include "compiler/build_tables/rule_can_be_blank.h"
 #include "compiler/rules/metadata.h"
-#include "compiler/syntax_grammar.h"
+#include "compiler/prepared_grammar.h"

 using namespace rules;
 using build_tables::rule_can_be_blank;
@ -57,13 +57,23 @@ describe("rule_can_be_blank", [&]() {

  describe("checking recursively (by expanding non-terminals)", [&]() {
    SyntaxGrammar grammar{{
-      { "A", choice({
-        seq({ i_sym(0), i_token(11) }),
-        blank() }) },
-      { "B", choice({
-        seq({ i_sym(1), i_token(12) }),
-        i_token(13) }) },
-    }, {}, {}, {}};
+      {
+        "A",
+        choice({
+          seq({ i_sym(0), i_token(11) }),
+          blank()
+        }),
+        RuleEntryTypeNamed,
+      },
+      {
+        "B",
+        choice({
+          seq({ i_sym(1), i_token(12) }),
+          i_token(13)
+        }),
+        RuleEntryTypeNamed,
+      },
+    }, {}, {}};

    it("terminates for left-recursive rules that can be blank", [&]() {
      rule = i_sym(0);
--- a/spec/compiler/helpers/containers.h
+++ b/spec/compiler/helpers/containers.h
@ -48,6 +48,20 @@ class rule_list : public vector<pair<string, rule_ptr>> {
      vector<pair<string, rule_ptr>>(list) {}
 };

+template<typename T>
+class eq_vector : public vector<T> {
+ public:
+  bool operator==(const vector<T> &other) const {
+    if (this->size() != other.size()) return false;
+    for (size_t i = 0; i < this->size(); i++)
+      if (!(this->operator[](i) == other[i]))
+        return false;
+    return true;
+  }
+
+  eq_vector(const initializer_list<T> &list) : vector<T>(list) {}
+};
+
 class rule_vector : public vector<rule_ptr> {
 public:
  bool operator==(const vector<rule_ptr> &other) const {
--- a/spec/compiler/helpers/rule_helpers.cc
+++ b/spec/compiler/helpers/rule_helpers.cc
@ -6,43 +6,41 @@ namespace tree_sitter {
  using std::make_shared;
  using std::set;
  using std::map;
+  using std::ostream;
+  using std::string;
+  using std::to_string;

-  namespace rules {
-    rule_ptr character(const set<uint32_t> &ranges) {
-      return character(ranges, true);
-    }
+  rule_ptr character(const set<uint32_t> &ranges) {
+    return character(ranges, true);
+  }

-    rule_ptr character(const set<uint32_t> &chars, bool sign) {
-      CharacterSet result;
-      if (sign) {
-        for (uint32_t c : chars)
-          result.include(c);
-      } else {
-        result.include_all();
-        for (uint32_t c : chars)
-          result.exclude(c);
-      }
-      return result.copy();
+  rule_ptr character(const set<uint32_t> &chars, bool sign) {
+    rules::CharacterSet result;
+    if (sign) {
+      for (uint32_t c : chars)
+        result.include(c);
+    } else {
+      result.include_all();
+      for (uint32_t c : chars)
+        result.exclude(c);
    }
+    return result.copy();
+  }

-    rule_ptr i_sym(size_t index) {
-      return make_shared<rules::Symbol>(index);
-    }
+  rule_ptr i_sym(size_t index) {
+    return make_shared<rules::Symbol>(index);
+  }

-    rule_ptr i_aux_sym(size_t index) {
-      return make_shared<rules::Symbol>(index, SymbolOptionAuxiliary);
-    }
+  rule_ptr i_token(size_t index) {
+    return make_shared<rules::Symbol>(index, true);
+  }

-    rule_ptr i_token(size_t index) {
-      return make_shared<rules::Symbol>(index, SymbolOptionToken);
-    }
+  rule_ptr metadata(rule_ptr rule, map<rules::MetadataKey, int> values) {
+    return make_shared<rules::Metadata>(rule, values);
+  }

-    rule_ptr i_aux_token(size_t index) {
-      return make_shared<rules::Symbol>(index, SymbolOption(SymbolOptionAuxiliary|SymbolOptionToken));
-    }
-
-    rule_ptr metadata(rule_ptr rule, map<MetadataKey, int> values) {
-      return make_shared<Metadata>(rule, values);
-    }
+  bool operator==(const RuleEntry &left, const RuleEntry &right) {
+    return left.name == right.name && left.rule->operator==(*right.rule) &&
+      left.type == right.type;
  }
 }
--- a/spec/compiler/helpers/rule_helpers.h
+++ b/spec/compiler/helpers/rule_helpers.h
@ -4,17 +4,16 @@
 #include "tree_sitter/compiler.h"
 #include "compiler/rules/character_set.h"
 #include "compiler/rules/metadata.h"
+#include "compiler/prepared_grammar.h"

 namespace tree_sitter {
-  namespace rules {
-    rule_ptr metadata(rule_ptr, std::map<MetadataKey, int>);
-    rule_ptr character(const std::set<uint32_t> &);
-    rule_ptr character(const std::set<uint32_t> &, bool sign);
-    rule_ptr i_sym(size_t index);
-    rule_ptr i_aux_sym(size_t index);
-    rule_ptr i_token(size_t index);
-    rule_ptr i_aux_token(size_t index);
-  }
+  rule_ptr metadata(rule_ptr, std::map<rules::MetadataKey, int>);
+  rule_ptr character(const std::set<uint32_t> &);
+  rule_ptr character(const std::set<uint32_t> &, bool sign);
+  rule_ptr i_sym(size_t index);
+  rule_ptr i_token(size_t index);
+
+  bool operator==(const RuleEntry &left, const RuleEntry &right);
 }

 #endif
--- a/spec/compiler/helpers/stream_methods.h
+++ b/spec/compiler/helpers/stream_methods.h
@ -7,6 +7,7 @@
 #include <map>
 #include <unordered_set>
 #include <vector>
+#include "compiler/prepared_grammar.h"

 using std::cout;

@ -83,4 +84,16 @@ inline std::ostream& operator<<(std::ostream &stream, const std::pair<T1, T2> &p

 }  // namespace std

+namespace tree_sitter {
+
+using std::ostream;
+using std::string;
+using std::to_string;
+
+inline ostream &operator<<(ostream &stream, const RuleEntry &entry) {
+  return stream << string("{") << entry.name << string(", ") << entry.rule << string(", ") << to_string(entry.type) << string("}");
+}
+
+}
+
 #endif
--- a/spec/compiler/prepare_grammar/expand_repeats_spec.cc
+++ b/spec/compiler/prepare_grammar/expand_repeats_spec.cc
@ -1,5 +1,5 @@
 #include "compiler/compiler_spec_helper.h"
-#include "compiler/syntax_grammar.h"
+#include "compiler/prepared_grammar.h"
 #include "compiler/prepare_grammar/expand_repeats.h"
 #include "compiler/helpers/containers.h"

@ -11,131 +11,223 @@ using prepare_grammar::expand_repeats;
 describe("expand_repeats", []() {
  it("replaces repeat rules with pairs of recursive rules", [&]() {
    SyntaxGrammar grammar{{
-      { "rule0", repeat(i_token(0)) },
-    }, {}, {}, {}};
+      {
+        "rule0",
+        repeat(i_token(0)),
+        RuleEntryTypeNamed,
+      },
+    }, {}, {}};

    auto match = expand_repeats(grammar);

-    AssertThat(match.rules, Equals(rule_list({
-      { "rule0", choice({ i_aux_sym(0), blank() }) },
-    })));
-
-    AssertThat(match.aux_rules, Equals(rule_list({
-      { "rule0_repeat0", seq({
-        i_token(0),
-        choice({ i_aux_sym(0), blank() }) }) },
+    AssertThat(match.rules, Equals(eq_vector<RuleEntry>({
+      {
+        "rule0",
+        choice({ i_sym(1), blank() }),
+        RuleEntryTypeNamed,
+      },
+      {
+        "rule0_repeat1",
+        seq({
+          i_token(0),
+          choice({ i_sym(1), blank() })
+        }),
+        RuleEntryTypeHidden
+      },
    })));
  });

  it("replaces repeats inside of sequences", [&]() {
    SyntaxGrammar grammar{{
-      { "rule0", seq({
-        i_token(10),
-        repeat(i_token(11)) }) },
-    }, {}, {}, {}};
+      {
+        "rule0",
+        seq({
+          i_token(10),
+          repeat(i_token(11)),
+        }),
+        RuleEntryTypeNamed,
+      },
+    }, {}, {}};

    auto match = expand_repeats(grammar);

-    AssertThat(match.rules, Equals(rule_list({
-      { "rule0", seq({
-        i_token(10),
-        choice({ i_aux_sym(0), blank() }) }) },
-    })));
-
-    AssertThat(match.aux_rules, Equals(rule_list({
-      { "rule0_repeat0", seq({
-        i_token(11),
-        choice({ i_aux_sym(0), blank() }) }) },
+    AssertThat(match.rules, Equals(eq_vector<RuleEntry>({
+      {
+        "rule0",
+        seq({
+          i_token(10),
+          choice({ i_sym(1), blank() })
+        }),
+        RuleEntryTypeNamed
+      },
+      {
+        "rule0_repeat1",
+        seq({
+          i_token(11),
+          choice({ i_sym(1), blank() })
+        }),
+        RuleEntryTypeHidden
+      },
    })));
  });

  it("replaces repeats inside of choices", [&]() {
    SyntaxGrammar grammar{{
-      { "rule0", choice({ i_token(10), repeat(i_token(11)) }) },
-    }, {}, {}, {}};
+      {
+        "rule0",
+        choice({ i_token(10), repeat(i_token(11)) }),
+        RuleEntryTypeNamed
+      },
+    }, {}, {}};

    auto match = expand_repeats(grammar);

-    AssertThat(match.rules, Equals(rule_list({
-      { "rule0", choice({ i_token(10), i_aux_sym(0), blank() }) },
-    })));
-
-    AssertThat(match.aux_rules, Equals(rule_list({
-      { "rule0_repeat0", seq({
-        i_token(11),
-        choice({ i_aux_sym(0), blank() }) }) },
+    AssertThat(match.rules, Equals(eq_vector<RuleEntry>({
+      {
+        "rule0",
+        choice({ i_token(10), i_sym(1), blank() }),
+        RuleEntryTypeNamed
+      },
+      {
+        "rule0_repeat1",
+        seq({
+          i_token(11),
+          choice({ i_sym(1), blank() }),
+        }),
+        RuleEntryTypeHidden
+      },
    })));
  });

  it("does not create redundant auxiliary rules", [&]() {
    SyntaxGrammar grammar{{
-      { "rule0", choice({
-        seq({ i_token(1), repeat(i_token(4)) }),
-        seq({ i_token(2), repeat(i_token(4)) }) }) },
-      { "rule1", seq({ i_token(3), repeat(i_token(4)) }) },
-    }, {}, {}, {}};
+      {
+        "rule0",
+        choice({
+          seq({ i_token(1), repeat(i_token(4)) }),
+          seq({ i_token(2), repeat(i_token(4)) }),
+        }),
+        RuleEntryTypeNamed
+      },
+      {
+        "rule1",
+        seq({ i_token(3), repeat(i_token(4)) }),
+        RuleEntryTypeNamed
+      },
+    }, {}, {}};

    auto match = expand_repeats(grammar);

-    AssertThat(match.rules, Equals(rule_list({
-      { "rule0", choice({
-        seq({ i_token(1), choice({ i_aux_sym(0), blank() }) }),
-        seq({ i_token(2), choice({ i_aux_sym(0), blank() }) }) }) },
-      { "rule1", seq({ i_token(3), choice({ i_aux_sym(0), blank() }) }) },
-    })));
-
-    AssertThat(match.aux_rules, Equals(rule_list({
-      { "rule0_repeat0", seq({
-        i_token(4),
-        choice({ i_aux_sym(0), blank() }) }) },
+    AssertThat(match.rules, Equals(eq_vector<RuleEntry>({
+      {
+        "rule0",
+        choice({
+          seq({ i_token(1), choice({ i_sym(2), blank() }) }),
+          seq({ i_token(2), choice({ i_sym(2), blank() }) }),
+        }),
+        RuleEntryTypeNamed
+      },
+      {
+        "rule1",
+        seq({ i_token(3), choice({ i_sym(2), blank() }) }),
+        RuleEntryTypeNamed
+      },
+      {
+        "rule0_repeat1",
+        seq({
+          i_token(4),
+          choice({ i_sym(2), blank() }),
+        }),
+        RuleEntryTypeHidden
+      },
    })));
  });

  it("can replace multiple repeats in the same rule", [&]() {
    SyntaxGrammar grammar{{
-      { "rule0", seq({
-        repeat(i_token(10)),
-        repeat(i_token(11)) }) },
-    }, {}, {}, {}};
+      {
+        "rule0",
+        seq({
+          repeat(i_token(10)),
+          repeat(i_token(11)),
+        }),
+        RuleEntryTypeNamed
+      },
+    }, {}, {}};

    auto match = expand_repeats(grammar);

-    AssertThat(match.rules, Equals(rule_list({
-      { "rule0", seq({
-        choice({ i_aux_sym(0), blank() }),
-        choice({ i_aux_sym(1), blank() }) }) },
-    })));
-
-    AssertThat(match.aux_rules, Equals(rule_list({
-      { "rule0_repeat0", seq({
-        i_token(10),
-        choice({ i_aux_sym(0), blank() }) }) },
-      { "rule0_repeat1", seq({
-        i_token(11),
-        choice({ i_aux_sym(1), blank() }) }) },
+    AssertThat(match.rules, Equals(eq_vector<RuleEntry>({
+      {
+        "rule0",
+        seq({
+          choice({ i_sym(1), blank() }),
+          choice({ i_sym(2), blank() }),
+        }),
+        RuleEntryTypeNamed
+      },
+      {
+        "rule0_repeat1",
+        seq({
+          i_token(10),
+          choice({ i_sym(1), blank() }),
+        }),
+        RuleEntryTypeHidden
+      },
+      {
+        "rule0_repeat2",
+        seq({
+          i_token(11),
+          choice({ i_sym(2), blank() }),
+        }),
+        RuleEntryTypeHidden
+      },
    })));
  });

  it("can replace repeats in multiple rules", [&]() {
    SyntaxGrammar grammar{{
-      { "rule0", repeat(i_token(10)) },
-      { "rule1", repeat(i_token(11)) },
-    }, {}, {}, {}};
+      {
+        "rule0",
+        repeat(i_token(10)),
+        RuleEntryTypeNamed,
+      },
+      {
+        "rule1",
+        repeat(i_token(11)),
+        RuleEntryTypeNamed,
+      },
+    }, {}, {}};

    auto match = expand_repeats(grammar);

-    AssertThat(match.rules, Equals(rule_list({
-      { "rule0", choice({ i_aux_sym(0), blank() }) },
-      { "rule1", choice({ i_aux_sym(1), blank() }) },
-    })));
-
-    AssertThat(match.aux_rules, Equals(rule_list({
-      { "rule0_repeat0", seq({
-        i_token(10),
-        choice({ i_aux_sym(0), blank() }) }) },
-      { "rule1_repeat0", seq({
-        i_token(11),
-        choice({ i_aux_sym(1), blank() }) }) },
+    AssertThat(match.rules, Equals(eq_vector<RuleEntry>({
+      {
+        "rule0",
+        choice({ i_sym(2), blank() }),
+        RuleEntryTypeNamed
+      },
+      {
+        "rule1",
+        choice({ i_sym(3), blank() }),
+        RuleEntryTypeNamed
+      },
+      {
+        "rule0_repeat1",
+        seq({
+          i_token(10),
+          choice({ i_sym(2), blank() }),
+        }),
+        RuleEntryTypeHidden
+      },
+      {
+        "rule1_repeat1",
+        seq({
+          i_token(11),
+          choice({ i_sym(3), blank() })
+        }),
+        RuleEntryTypeHidden
+      },
    })));
  });
 });
--- a/spec/compiler/prepare_grammar/expand_tokens_spec.cc
+++ b/spec/compiler/prepare_grammar/expand_tokens_spec.cc
@ -1,5 +1,5 @@
 #include "compiler/compiler_spec_helper.h"
-#include "compiler/lexical_grammar.h"
+#include "compiler/prepared_grammar.h"
 #include "compiler/helpers/containers.h"
 #include "compiler/prepare_grammar/expand_tokens.h"

@ -12,36 +12,64 @@ describe("expand_tokens", []() {
  describe("string rules", [&]() {
    it("replaces strings with sequences of character sets", [&]() {
      LexicalGrammar grammar{{
-        { "rule_A", seq({
-          i_sym(10),
-          str("xyz"),
-          i_sym(11) }) },
-      }, {}, {}};
+        {
+          "rule_A",
+          seq({
+            i_sym(10),
+            str("xyz"),
+            i_sym(11),
+          }),
+          RuleEntryTypeNamed
+        },
+      }, {}};

      auto result = expand_tokens(grammar);

      AssertThat(result.second, Equals((const GrammarError *)nullptr));
-      AssertThat(result.first.rules, Equals(rule_list({
-        { "rule_A", seq({
-          i_sym(10),
-          token(prec(1, seq({ character({ 'x' }), character({ 'y' }), character({ 'z' }) }))),
-          i_sym(11) }) },
+      AssertThat(result.first.rules, Equals(eq_vector<RuleEntry>({
+        {
+          "rule_A",
+          seq({
+            i_sym(10),
+            metadata(seq({
+              character({ 'x' }),
+              character({ 'y' }),
+              character({ 'z' }),
+            }), {
+              {PRECEDENCE, 1},
+              {IS_TOKEN, 1},
+            }),
+            i_sym(11),
+          }),
+          RuleEntryTypeNamed
+        },
      })));
    });

    it("handles strings containing non-ASCII UTF8 characters", [&]() {
      LexicalGrammar grammar{{
-        // α β
-        { "rule_A", str("\u03B1 \u03B2") },
-      }, {}, {}};
+        {
+          "rule_A",
+          str("\u03B1 \u03B2"), // α β
+          RuleEntryTypeNamed
+        },
+      }, {}};

      auto result = expand_tokens(grammar);

-      AssertThat(result.first.rules, Equals(rule_list({
-        { "rule_A", token(prec(1, seq({
-          character({ 945 }),
-          character({ ' ' }),
-          character({ 946 }) }))) }
+      AssertThat(result.first.rules, Equals(eq_vector<RuleEntry>({
+        {
+          "rule_A",
+          metadata(seq({
+            character({ 945 }),
+            character({ ' ' }),
+            character({ 946 }),
+          }), {
+            {PRECEDENCE, 1},
+            {IS_TOKEN, 1},
+          }),
+          RuleEntryTypeNamed
+        }
      })));
    });
  });
@ -49,43 +77,65 @@ describe("expand_tokens", []() {
  describe("regexp rules", [&]() {
    it("replaces regexps with the equivalent rule tree", [&]() {
      LexicalGrammar grammar{{
-        { "rule_A", seq({
-          i_sym(10),
-          pattern("x*"),
-          i_sym(11) }) },
-      }, {}, {}};
+        {
+          "rule_A",
+          seq({
+            i_sym(10),
+            pattern("x*"),
+            i_sym(11),
+          }),
+          RuleEntryTypeNamed
+        },
+      }, {}};

      auto result = expand_tokens(grammar);

      AssertThat(result.second, Equals((const GrammarError *)nullptr));
-      AssertThat(result.first.rules, Equals(rule_list({
-        { "rule_A", seq({
-          i_sym(10),
-          repeat(character({ 'x' })),
-          i_sym(11) }) },
+      AssertThat(result.first.rules, Equals(eq_vector<RuleEntry>({
+        {
+          "rule_A",
+          seq({
+            i_sym(10),
+            repeat(character({ 'x' })),
+            i_sym(11),
+          }),
+          RuleEntryTypeNamed
+        },
      })));
    });

    it("handles regexps containing non-ASCII UTF8 characters", [&]() {
      LexicalGrammar grammar{{
-        // [^α-δ]
-        { "rule_A", pattern("[^\u03B1-\u03B4]*") },
-      }, {}, {}};
+        {
+          "rule_A",
+          pattern("[^\u03B1-\u03B4]*"), // [^α-δ]
+          RuleEntryTypeNamed
+        },
+      }, {}};

      auto result = expand_tokens(grammar);

-      AssertThat(result.first.rules, Equals(rule_list({
-        { "rule_A", repeat(character({ 945, 946, 947, 948 }, false)) }
+      AssertThat(result.first.rules, Equals(eq_vector<RuleEntry>({
+        {
+          "rule_A",
+          repeat(character({ 945, 946, 947, 948 }, false)),
+          RuleEntryTypeNamed
+        }
      })));
    });

    it("returns an error when the grammar contains an invalid regex", [&]() {
      LexicalGrammar grammar{{
-        { "rule_A", seq({
-          pattern("("),
-          str("xyz"),
-          pattern("[") }) },
-      }, {}, {}};
+        {
+          "rule_A",
+          seq({
+            pattern("("),
+            str("xyz"),
+            pattern("["),
+          }),
+          RuleEntryTypeNamed
+        },
+      }, {}};

      auto result = expand_tokens(grammar);

--- a/spec/compiler/prepare_grammar/extract_tokens_spec.cc
+++ b/spec/compiler/prepare_grammar/extract_tokens_spec.cc
@ -1,6 +1,5 @@
 #include "compiler/compiler_spec_helper.h"
-#include "compiler/lexical_grammar.h"
-#include "compiler/syntax_grammar.h"
+#include "compiler/prepared_grammar.h"
 #include "compiler/prepare_grammar/interned_grammar.h"
 #include "compiler/prepare_grammar/extract_tokens.h"
 #include "compiler/helpers/containers.h"
@ -12,271 +11,301 @@ using prepare_grammar::extract_tokens;
 using prepare_grammar::InternedGrammar;

 describe("extract_tokens", []() {
-  it("moves string rules into the lexical grammar", [&]() {
+  it("moves strings, patterns, and sub-rules marked as tokens into the lexical grammar", [&]() {
    auto result = extract_tokens(InternedGrammar{{
-      { "rule_A", seq({ str("ab"), i_sym(0) }) }
+      {
+        "rule_A",
+        repeat(seq({
+          str("ab"),
+          pattern("cd*"),
+          choice({
+            i_sym(1),
+            i_sym(2),
+            token(repeat(choice({ str("ef"), str("gh") }))),
+          }),
+        })),
+      },
+      {
+        "rule_B",
+        pattern("ij+"),
+      },
+      {
+        "rule_C",
+        choice({ str("kl"), blank() }),
+      },
+      {
+        "rule_D",
+        repeat(i_sym(3))
+      }
    }, {}, {}});

-    AssertThat(get<0>(result).rules, Equals(rule_list({
-      { "rule_A", seq({ i_aux_token(0), i_sym(0) }) }
+    SyntaxGrammar &syntax_grammar = get<0>(result);
+    LexicalGrammar &lexical_grammar = get<1>(result);
+    const GrammarError *error = get<2>(result);
+
+    AssertThat(error, Equals<const GrammarError *>(nullptr));
+
+    AssertThat(syntax_grammar.rules, Equals(eq_vector<RuleEntry>({
+      {
+        "rule_A",
+        repeat(seq({
+
+          // This string is now the first token in the lexical grammar.
+          i_token(0),
+
+          // This pattern is now the second rule in the lexical grammar.
+          i_token(1),
+
+          choice({
+            // Rule 1, which this symbol pointed to, has been moved to the
+            // lexical grammar.
+            i_token(3),
+
+            // This symbol's index has been decremented, because a previous rule
+            // was moved to the lexical grammar.
+            i_sym(1),
+
+            // This token rule is now the third rule in the lexical grammar.
+            i_token(2),
+          }),
+        })),
+        RuleEntryTypeNamed,
+      },
+      {
+        "rule_C",
+        choice({ i_token(4), blank() }),
+        RuleEntryTypeNamed,
+      },
+      {
+        "rule_D",
+        repeat(i_sym(2)),
+        RuleEntryTypeNamed,
+      }
    })));
-    AssertThat(get<0>(result).aux_rules, IsEmpty())

-    AssertThat(get<1>(result).rules, IsEmpty())
-    AssertThat(get<1>(result).aux_rules, Equals(rule_list({
-      { "'ab'", str("ab") },
+    AssertThat(lexical_grammar.rules, Equals(eq_vector<RuleEntry>({
+
+      // Strings become anonymous rules.
+      {
+        "ab",
+        str("ab"),
+        RuleEntryTypeAnonymous,
+      },
+
+      // Patterns become hidden rules.
+      {
+        "/cd*/",
+        pattern("cd*"),
+        RuleEntryTypeHidden,
+      },
+
+      // Rules marked as tokens become hidden rules.
+      {
+        "/(ef|gh)*/",
+        repeat(choice({ str("ef"), str("gh") })),
+        RuleEntryTypeHidden,
+      },
+
+      // This named rule was moved wholesale to the lexical grammar.
+      {
+        "rule_B",
+        pattern("ij+"),
+        RuleEntryTypeNamed,
+      },
+
+      // Strings become anonymous rules.
+      {
+        "kl",
+        str("kl"),
+        RuleEntryTypeAnonymous,
+      },
+
    })));
  });

-  it("moves pattern rules into the lexical grammar", [&]() {
-    auto result = extract_tokens(InternedGrammar{{
-      { "rule_A", seq({ pattern("a+"), i_sym(0) }) }
-    }, {}, {}});
-
-    AssertThat(get<0>(result).rules, Equals(rule_list({
-      { "rule_A", seq({ i_aux_token(0), i_sym(0) }) }
-    })));
-    AssertThat(get<0>(result).aux_rules, IsEmpty())
-
-    AssertThat(get<1>(result).rules, IsEmpty())
-    AssertThat(get<1>(result).aux_rules, Equals(rule_list({
-      { "/a+/", pattern("a+") },
-    })));
-  });
-
-  it("moves other rules marked as tokens into the lexical grammar", [&]() {
-    auto result = extract_tokens(InternedGrammar{{
-      { "rule_A", seq({
-        token(seq({ pattern("."), choice({ str("a"), str("b") }) })),
-        i_sym(0) }) }
-    }, {}, {}});
-
-    AssertThat(get<0>(result).rules, Equals(rule_list({
-      { "rule_A", seq({ i_aux_token(0), i_sym(0) }) }
-    })));
-    AssertThat(get<0>(result).aux_rules, IsEmpty())
-
-    AssertThat(get<1>(result).rules, IsEmpty())
-    AssertThat(get<1>(result).aux_rules, Equals(rule_list({
-      { "(seq /./ (choice 'a' 'b'))", token(seq({ pattern("."), choice({ str("a"), str("b") }) })) },
-    })));
-  });
-
-  it("does not move blank rules", [&]() {
-    auto result = extract_tokens(InternedGrammar{{
-      { "rule_A", choice({ i_sym(0), blank() }) },
-    }, {}, {}});
-
-    AssertThat(get<0>(result).rules, Equals(rule_list({
-      { "rule_A", choice({ i_sym(0), blank() }) },
-    })));
-    AssertThat(get<0>(result).aux_rules, IsEmpty())
-
-    AssertThat(get<1>(result).rules, IsEmpty())
-    AssertThat(get<1>(result).aux_rules, IsEmpty())
-  });
-
  it("does not create duplicate tokens in the lexical grammar", [&]() {
    auto result = extract_tokens(InternedGrammar{{
-      { "rule_A", seq({ str("ab"), i_sym(0), str("ab") }) },
+      {
+        "rule_A",
+        seq({
+          str("ab"),
+          i_sym(0),
+          str("ab"),
+        })
+      },
    }, {}, {}});

-    AssertThat(get<0>(result).rules, Equals(rule_list({
-      { "rule_A", seq({ i_aux_token(0), i_sym(0), i_aux_token(0) }) }
-    })));
-    AssertThat(get<0>(result).aux_rules, IsEmpty())
+    SyntaxGrammar &syntax_grammar = get<0>(result);
+    LexicalGrammar &lexical_grammar = get<1>(result);

-    AssertThat(get<1>(result).rules, IsEmpty())
-    AssertThat(get<1>(result).aux_rules, Equals(rule_list({
-      { "'ab'", str("ab") },
+    AssertThat(syntax_grammar.rules, Equals(eq_vector<RuleEntry>({
+      {
+        "rule_A",
+        seq({ i_token(0), i_sym(0), i_token(0) }),
+        RuleEntryTypeNamed
+      }
+    })));
+
+    AssertThat(lexical_grammar.rules, Equals(eq_vector<RuleEntry>({
+      {
+        "ab",
+        str("ab"),
+        RuleEntryTypeAnonymous
+      },
    })))
  });

-  it("updates the grammar's expected conflict symbols", [&]() {
-    auto result = extract_tokens(InternedGrammar{
+  it("does not move entire rules into the lexical grammar if their content is used elsewhere in the grammar", [&]() {
+    auto result = extract_tokens(InternedGrammar{{
      {
-        { "rule_A", str("ok") },
-        { "rule_B", repeat(i_sym(0)) },
-        { "rule_C", repeat(seq({ i_sym(0), i_sym(0) })) },
+        "rule_A",
+        seq({ i_sym(1), str("ab") })
      },
-      { str(" ") },
-      { { Symbol(1), Symbol(2) } }
-    });
+      {
+        "rule_B",
+        str("cd")
+      },
+      {
+        "rule_C",
+        seq({ str("ef"), str("cd") })
+      },
+    }, {}, {}});

-    AssertThat(get<0>(result).rules.size(), Equals<size_t>(2));
-    AssertThat(get<1>(result).rules.size(), Equals<size_t>(1));
-    AssertThat(get<0>(result).expected_conflicts, Equals(set<set<Symbol>>({
+    SyntaxGrammar &syntax_grammar = get<0>(result);
+    LexicalGrammar &lexical_grammar = get<1>(result);
+
+    AssertThat(syntax_grammar.rules, Equals(eq_vector<RuleEntry>({
+      {
+        "rule_A",
+        seq({ i_sym(1), i_token(0) }),
+        RuleEntryTypeNamed
+      },
+      {
+        "rule_B",
+        i_token(1),
+        RuleEntryTypeNamed
+      },
+      {
+        "rule_C",
+        seq({ i_token(2), i_token(1) }),
+        RuleEntryTypeNamed
+      },
+    })));
+
+    AssertThat(lexical_grammar.rules, Equals(eq_vector<RuleEntry>({
+      {
+        "ab",
+        str("ab"),
+        RuleEntryTypeAnonymous
+      },
+      {
+        "cd",
+        str("cd"),
+        RuleEntryTypeAnonymous
+      },
+      {
+        "ef",
+        str("ef"),
+        RuleEntryTypeAnonymous
+      },
+    })));
+  });
+
+  it("renumbers the grammar's expected conflict symbols based on any moved rules", [&]() {
+    auto result = extract_tokens(InternedGrammar{{
+      {
+        "rule_A",
+        str("ok")
+      },
+      {
+        "rule_B",
+        repeat(i_sym(0))
+      },
+      {
+        "rule_C",
+        repeat(seq({ i_sym(0), i_sym(0) }))
+      },
+    }, { str(" ") }, { { Symbol(1), Symbol(2) } }});
+
+    SyntaxGrammar &syntax_grammar = get<0>(result);
+
+    AssertThat(syntax_grammar.rules.size(), Equals<size_t>(2));
+    AssertThat(syntax_grammar.expected_conflicts, Equals(set<set<Symbol>>({
      { Symbol(0), Symbol(1) },
    })));
  });

-  describe("when an entire grammar rule is a token", [&]() {
-    it("moves the rule the lexical grammar and updates referencing symbols", [&]() {
-      auto result = extract_tokens(InternedGrammar{{
-        { "rule_A", i_sym(1) },
-        { "rule_B", pattern("a|b") },
-        { "rule_C", token(seq({ str("a"), str("b") })) },
-      }, {}, {}});
-
-      AssertThat(get<0>(result).rules, Equals(rule_list({
-        { "rule_A", i_token(0) }
-      })));
-      AssertThat(get<0>(result).aux_rules, IsEmpty());
-
-      AssertThat(get<1>(result).rules, Equals(rule_list({
-        { "rule_B", pattern("a|b") },
-        { "rule_C", token(seq({ str("a"), str("b") })) },
-      })));
-
-      // TODO put back
-      // AssertThat(get<1>(result).aux_rules, IsEmpty());
-    });
-
-    it("updates symbols whose indices need to change due to deleted rules", [&]() {
-      auto result = extract_tokens(InternedGrammar{{
-        { "rule_A", str("ab") },
-        { "rule_B", i_sym(0) },
-        { "rule_C", i_sym(1) },
-      }, {}, {}});
-
-      AssertThat(get<0>(result).rules, Equals(rule_list({
-        { "rule_B", i_token(0) },
-        { "rule_C", i_sym(0) },
-      })));
-      AssertThat(get<0>(result).aux_rules, IsEmpty());
-
-      AssertThat(get<1>(result).rules, Equals(rule_list({
-        { "rule_A", str("ab") },
-      })));
-
-      // TODO put back
-      // AssertThat(get<1>(result).aux_rules, IsEmpty());
-    });
-
-    it("does not move the rule if its content is used elsewhere in the grammar", [&]() {
-      auto result = extract_tokens(InternedGrammar{{
-        { "rule_A", seq({ i_sym(1), str("ab") }) },
-        { "rule_B", str("cd") },
-        { "rule_C", seq({ str("ef"), str("cd") }) },
-      }, {}, {}});
-
-      AssertThat(get<0>(result).rules, Equals(rule_list({
-        { "rule_A", seq({ i_sym(1), i_aux_token(0) }) },
-        { "rule_B", i_aux_token(1) },
-        { "rule_C", seq({ i_aux_token(2), i_aux_token(1) }) },
-      })));
-      AssertThat(get<0>(result).aux_rules, IsEmpty());
-
-      AssertThat(get<1>(result).rules, IsEmpty())
-      AssertThat(get<1>(result).aux_rules, Equals(rule_list({
-        { "'ab'", str("ab") },
-        { "'cd'", str("cd") },
-        { "'ef'", str("ef") },
-      })));
-    });
-  });
-
  describe("handling ubiquitous tokens", [&]() {
-    describe("ubiquitous tokens that are not symbols", [&]() {
-      it("adds them to the lexical grammar's separators", [&]() {
-        auto result = extract_tokens(InternedGrammar{{
-          { "rule_A", str("x") },
-        }, {
-          pattern("\\s+"),
-          str("y"),
-        }, {}});
+    it("adds inline ubiquitous tokens to the lexical grammar's separators", [&]() {
+      auto result = extract_tokens(InternedGrammar{{
+        { "rule_A", str("x") },
+      }, {
+        pattern("\\s+"),
+        str("y"),
+      }, {}});

-        AssertThat(get<2>(result), Equals<const GrammarError *>(nullptr));
+      AssertThat(get<2>(result), Equals<const GrammarError *>(nullptr));

-        AssertThat(get<1>(result).separators, Equals(rule_vector({
-          pattern("\\s+"),
-          str("y"),
-        })));
+      AssertThat(get<1>(result).separators, Equals(rule_vector({
+        pattern("\\s+"),
+        str("y"),
+      })));

-        AssertThat(get<0>(result).ubiquitous_tokens, IsEmpty());
-      });
+      AssertThat(get<0>(result).ubiquitous_tokens, IsEmpty());
    });

-    describe("ubiquitous tokens that point to moved rules", [&]() {
-      it("updates them according to the new symbol numbers", [&]() {
-        auto result = extract_tokens(InternedGrammar{ {
-          { "rule_A", seq({ str("w"), i_sym(1) }) },
-          { "rule_B", str("x") },
-          { "rule_C", str("y") },
-        }, {
-          i_sym(2),
-        }, {}});
+    it("updates ubiquitous symbols according to the new symbol numbers", [&]() {
+      auto result = extract_tokens(InternedGrammar{ {
+        { "rule_A", seq({ str("w"), str("x"), i_sym(1) }) },
+        { "rule_B", str("y") },
+        { "rule_C", str("z") },
+      }, {
+        i_sym(2),
+      }, {}});

-        AssertThat(get<2>(result), Equals<const GrammarError *>(nullptr));
+      AssertThat(get<2>(result), Equals<const GrammarError *>(nullptr));

-        AssertThat(get<0>(result).ubiquitous_tokens, Equals(set<Symbol>({
-          { Symbol(1, SymbolOptionToken) },
-        })));
+      AssertThat(get<0>(result).ubiquitous_tokens, Equals(set<Symbol>({
+        { Symbol(3, true) },
+      })));

-        AssertThat(get<1>(result).separators, IsEmpty());
-      });
+      AssertThat(get<1>(result).separators, IsEmpty());
    });

-    describe("ubiquitous tokens that are visible", [&]() {
-      it("preserves them in the syntactic grammar", [&]() {
-        auto result = extract_tokens(InternedGrammar{{
-          { "rule_A", str("ab") },
-          { "rule_B", str("bc") },
-        }, { i_sym(1) }, {}});
+    it("returns an error if any ubiquitous tokens are non-token symbols", [&]() {
+      auto result = extract_tokens(InternedGrammar{{
+        {
+          "rule_A",
+          seq({ str("x"), i_sym(1) }),
+        },
+        {
+          "rule_B",
+          seq({ str("y"), str("z") })
+        },
+      }, { i_sym(1) }, {}});

-        AssertThat(get<2>(result), Equals<const GrammarError *>(nullptr));
-
-        AssertThat(get<0>(result).ubiquitous_tokens, Equals(set<Symbol>({
-          Symbol(1, SymbolOptionToken)
-        })));
-
-        AssertThat(get<1>(result).separators, IsEmpty());
-      });
+      AssertThat(get<2>(result), !Equals<const GrammarError *>(nullptr));
+      AssertThat(get<2>(result), EqualsPointer(
+        new GrammarError(GrammarErrorTypeInvalidUbiquitousToken,
+                         "Not a token: rule_B")));
    });

-    describe("ubiquitous tokens that are used in other grammar rules", [&]() {
-      it("preserves them in the syntactic grammar", [&]() {
-        auto result = extract_tokens(InternedGrammar{{
-          { "rule_A", seq({ i_sym(1), str("ab") }) },
-          { "_rule_B", str("bc") },
-        }, { i_sym(1) }, {}});
+    it("returns an error if any ubiquitous tokens are non-token rules", [&]() {
+      auto result = extract_tokens(InternedGrammar{{
+        {
+          "rule_A",
+          str("x")
+        },
+        {
+          "rule_B",
+          str("y")
+        },
+      }, { choice({ i_sym(1), blank() }) }, {}});

-        AssertThat(get<2>(result), Equals<const GrammarError *>(nullptr));
-
-        AssertThat(get<0>(result).ubiquitous_tokens, Equals(set<Symbol>({
-          Symbol(0, SymbolOptionToken),
-        })));
-
-        AssertThat(get<1>(result).separators, IsEmpty());
-      });
-    });
-
-    describe("ubiquitous tokens that are non-token symbols", [&]() {
-      it("returns an error", [&]() {
-        auto result = extract_tokens(InternedGrammar{{
-          { "rule_A", seq({ str("x"), i_sym(1) }), },
-          { "rule_B", seq({ str("y"), str("z") }) },
-        }, { i_sym(1) }, {}});
-
-        AssertThat(get<2>(result), !Equals<const GrammarError *>(nullptr));
-        AssertThat(get<2>(result), EqualsPointer(
-          new GrammarError(GrammarErrorTypeInvalidUbiquitousToken,
-                           "Not a token: rule_B")));
-      });
-    });
-
-    describe("ubiquitous tokens that are not symbols", [&]() {
-      it("returns an error", [&]() {
-        auto result = extract_tokens(InternedGrammar{{
-          { "rule_A", str("x") },
-          { "rule_B", str("y") },
-        }, { choice({ i_sym(1), blank() }) }, {}});
-
-        AssertThat(get<2>(result), !Equals<const GrammarError *>(nullptr));
-        AssertThat(get<2>(result), EqualsPointer(
-          new GrammarError(GrammarErrorTypeInvalidUbiquitousToken,
-                           "Not a token: (choice (sym 1) (blank))")));
-      });
+      AssertThat(get<2>(result), !Equals<const GrammarError *>(nullptr));
+      AssertThat(get<2>(result), EqualsPointer(
+        new GrammarError(GrammarErrorTypeInvalidUbiquitousToken,
+                         "Not a token: (choice (sym 1) (blank))")));
    });
  });
 });