Merge branch 'flatten-rules-into-productions'

This branch had diverged considerably, so merging it required changing a lot of code. Conflicts: project.gyp spec/compiler/build_tables/action_takes_precedence_spec.cc spec/compiler/build_tables/build_conflict_spec.cc spec/compiler/build_tables/build_parse_table_spec.cc spec/compiler/build_tables/first_symbols_spec.cc spec/compiler/build_tables/item_set_closure_spec.cc spec/compiler/build_tables/item_set_transitions_spec.cc spec/compiler/build_tables/rule_can_be_blank_spec.cc spec/compiler/helpers/containers.h spec/compiler/prepare_grammar/expand_repeats_spec.cc spec/compiler/prepare_grammar/extract_tokens_spec.cc src/compiler/build_tables/action_takes_precedence.h src/compiler/build_tables/build_parse_table.cc src/compiler/build_tables/first_symbols.cc src/compiler/build_tables/first_symbols.h src/compiler/build_tables/item_set_closure.cc src/compiler/build_tables/item_set_transitions.cc src/compiler/build_tables/parse_item.cc src/compiler/build_tables/parse_item.h src/compiler/build_tables/rule_can_be_blank.cc src/compiler/build_tables/rule_can_be_blank.h src/compiler/prepare_grammar/expand_repeats.cc src/compiler/prepare_grammar/extract_tokens.cc src/compiler/prepare_grammar/extract_tokens.h src/compiler/prepare_grammar/prepare_grammar.cc src/compiler/rules/built_in_symbols.cc src/compiler/rules/built_in_symbols.h src/compiler/syntax_grammar.cc src/compiler/syntax_grammar.h
2015-10-01 17:10:39 -07:00 · 2015-10-01 17:10:39 -07:00 · ebc52f109d
commit ebc52f109d
parent dda3939adf 8ac4b9fc17
71 changed files with 30354 additions and 33188 deletions
--- a/project.gyp
+++ b/project.gyp
@ -13,7 +13,6 @@
        'src/compiler/build_tables/build_lex_table.cc',
        'src/compiler/build_tables/build_parse_table.cc',
        'src/compiler/build_tables/build_tables.cc',
-        'src/compiler/build_tables/first_symbols.cc',
        'src/compiler/build_tables/get_completion_status.cc',
        'src/compiler/build_tables/get_metadata.cc',
        'src/compiler/build_tables/item.cc',
@ -32,13 +31,17 @@
        'src/compiler/parse_table.cc',
        'src/compiler/prepare_grammar/expand_repeats.cc',
        'src/compiler/prepare_grammar/expand_tokens.cc',
+        'src/compiler/prepare_grammar/extract_choices.cc',
        'src/compiler/prepare_grammar/extract_tokens.cc',
+        'src/compiler/prepare_grammar/flatten_grammar.cc',
        'src/compiler/prepare_grammar/intern_symbols.cc',
        'src/compiler/prepare_grammar/is_token.cc',
        'src/compiler/prepare_grammar/parse_regex.cc',
        'src/compiler/prepare_grammar/prepare_grammar.cc',
        'src/compiler/prepare_grammar/token_description.cc',
        'src/compiler/rule.cc',
+        'src/compiler/syntax_grammar.cc',
+        'src/compiler/variable.cc',
        'src/compiler/rules/blank.cc',
        'src/compiler/rules/built_in_symbols.cc',
        'src/compiler/rules/character_range.cc',
--- a/spec/compiler/build_tables/first_symbols_spec.cc
+++ b/spec/compiler/build_tables/first_symbols_spec.cc
@ -1,117 +0,0 @@
-#include "compiler/compiler_spec_helper.h"
-#include "compiler/prepared_grammar.h"
-#include "compiler/build_tables/first_symbols.h"
-#include "compiler/rules/metadata.h"
-
-using namespace build_tables;
-using namespace rules;
-
-START_TEST
-
-describe("first_symbols", []() {
-  SyntaxGrammar null_grammar;
-
-  describe("for a sequence AB", [&]() {
-    it("ignores B when A cannot be blank", [&]() {
-      auto rule = seq({ i_token(0), i_token(1) });
-
-      AssertThat(first_symbols(rule, null_grammar), Equals(set<Symbol>({
-        Symbol(0, true),
-      })));
-    });
-
-    it("includes first_symbols(B) when A can be blank", [&]() {
-      auto rule = seq({
-        choice({
-          i_token(0),
-          blank() }),
-        i_token(1) });
-
-      AssertThat(first_symbols(rule, null_grammar), Equals(set<Symbol>({
-        Symbol(0, true),
-        Symbol(1, true)
-      })));
-    });
-
-    it("includes first_symbols(A's right hand side) when A is a non-terminal", [&]() {
-      auto rule = choice({
-        seq({
-          i_token(0),
-          i_token(1) }),
-        i_sym(0) });
-
-      SyntaxGrammar grammar{{
-        {
-          "rule0",
-          seq({
-            i_token(2),
-            i_token(3),
-            i_token(4),
-          }),
-          RuleEntryTypeNamed
-        }
-      }, {}, {}};
-
-      AssertThat(first_symbols(rule, grammar), Equals(set<Symbol>({
-        Symbol(0),
-        Symbol(0, true),
-        Symbol(2, true),
-      })));
-    });
-
-    it("includes first_symbols(B) when A is a non-terminal and its expansion can be blank", [&]() {
-      auto rule = seq({
-        i_sym(0),
-        i_token(1) });
-
-      SyntaxGrammar grammar{{
-        {
-          "rule0",
-          choice({
-            i_token(0),
-            blank(),
-          }),
-          RuleEntryTypeNamed
-        },
-      }, {}, {}};
-
-      AssertThat(first_symbols(rule, grammar), Equals(set<Symbol>({
-        Symbol(0),
-        Symbol(0, true),
-        Symbol(1, true),
-      })));
-    });
-  });
-
-  describe("when there are left-recursive rules", [&]() {
-    it("terminates", [&]() {
-      SyntaxGrammar grammar{{
-        {
-          "rule0",
-          choice({
-            seq({ i_sym(0), i_token(10) }),
-            i_token(11),
-          }),
-          RuleEntryTypeNamed
-        },
-      }, {}, {}};
-
-      auto rule = i_sym(0);
-
-      AssertThat(first_symbols(rule, grammar), Equals(set<Symbol>({
-        Symbol(0),
-        Symbol(11, true)
-      })));
-    });
-  });
-
-  it("ignores metadata rules", [&]() {
-    auto rule = make_shared<Metadata>(i_token(3), map<rules::MetadataKey, int>());
-
-    AssertThat(first_symbols(rule, null_grammar), Equals(set<Symbol>({
-      Symbol(3, true),
-    })));
-  });
-});
-
-END_TEST
--- a/spec/compiler/build_tables/item_set_closure_spec.cc
+++ b/spec/compiler/build_tables/item_set_closure_spec.cc
@ -1,7 +1,8 @@
 #include "compiler/compiler_spec_helper.h"
-#include "compiler/prepared_grammar.h"
+#include "compiler/syntax_grammar.h"
 #include "compiler/build_tables/item_set_closure.h"
 #include "compiler/build_tables/item_set_transitions.h"
+#include "compiler/rules/built_in_symbols.h"

 using namespace build_tables;
 using namespace rules;
@ -10,39 +11,55 @@ START_TEST

 describe("item_set_closure", []() {
  SyntaxGrammar grammar{{
-    {
-      "E",
-      seq({
-        i_sym(1),
-        i_token(11),
+    SyntaxVariable("rule0", VariableTypeNamed, {
+      Production({
+        {Symbol(1), 0, AssociativityNone, 100},
+        {Symbol(11, true), 0, AssociativityNone, 101},
      }),
-      RuleEntryTypeNamed,
-    },
-    {
-      "T",
-      seq({
-        i_token(12),
-        i_token(13),
+    }),
+    SyntaxVariable("rule1", VariableTypeNamed, {
+      Production({
+        {Symbol(12, true), 0, AssociativityNone, 102},
+        {Symbol(13, true), 0, AssociativityNone, 103},
      }),
-      RuleEntryTypeNamed,
-    },
+      Production({
+        {Symbol(2), 0, AssociativityNone, 104},
+      })
+    }),
+    SyntaxVariable("rule2", VariableTypeNamed, {
+      Production({
+        {Symbol(14, true), 0, AssociativityNone, 105},
+        {Symbol(15, true), 0, AssociativityNone, 106},
+      })
+    }),
  }, {}, {}};

  it("adds items at the beginnings of referenced rules", [&]() {
-    ParseItemSet item_set = item_set_closure(
-      ParseItem(Symbol(0), grammar.rules[0].rule, {}),
-      set<Symbol>({ Symbol(10, true) }),
-      grammar
-    );
+    ParseItemSet item_set({
+      {
+        ParseItem(Symbol(0), 0, 0, 100),
+        set<Symbol>({ Symbol(10, true) }),
+      }
+    });
+
+    item_set_closure(&item_set, grammar);

    AssertThat(item_set, Equals(ParseItemSet({
      {
-        ParseItem(Symbol(1), grammar.rules[1].rule, {}),
-        set<Symbol>({ Symbol(11, true) }),
+        ParseItem(Symbol(0), 0, 0, 100),
+        set<Symbol>({ Symbol(10, true) })
      },
      {
-        ParseItem(Symbol(0), grammar.rules[0].rule, {}),
-        set<Symbol>({ Symbol(10, true) }),
+        ParseItem(Symbol(1), 0, 0, 102),
+        set<Symbol>({ Symbol(11, true) })
+      },
+      {
+        ParseItem(Symbol(1), 1, 0, 104),
+        set<Symbol>({ Symbol(11, true) })
+      },
+      {
+        ParseItem(Symbol(2), 0, 0, 105),
+        set<Symbol>({ Symbol(11, true) })
      },
    })));
  });
--- a/spec/compiler/build_tables/item_set_transitions_spec.cc
+++ b/spec/compiler/build_tables/item_set_transitions_spec.cc
@ -1,6 +1,6 @@
 #include "compiler/compiler_spec_helper.h"
 #include "compiler/build_tables/item_set_transitions.h"
-#include "compiler/prepared_grammar.h"
+#include "compiler/syntax_grammar.h"
 #include "compiler/helpers/rule_helpers.h"

 using namespace rules;
@ -17,63 +17,67 @@ describe("char_transitions(LexItemSet)", []() {
      });

      AssertThat(char_transitions(set1), Equals(map<CharacterSet, LexItemSet>({
-          {
-            CharacterSet().include('a', 'd'),
-            LexItemSet({
-              LexItem(Symbol(1), blank()),
-            })
-          },
-          {
-            CharacterSet().include('e', 'f'),
-            LexItemSet({
-              LexItem(Symbol(1), blank()),
-              LexItem(Symbol(2), blank()),
-            })
-          },
-          {
+        {
+          CharacterSet().include('a', 'd'),
+          LexItemSet({
+            LexItem(Symbol(1), blank()),
+          })
+        },
+        {
+          CharacterSet().include('e', 'f'),
+          LexItemSet({
+            LexItem(Symbol(1), blank()),
+            LexItem(Symbol(2), blank()),
+          })
+        },
+        {
          CharacterSet().include('g', 'x'),
-            LexItemSet({
-              LexItem(Symbol(2), blank()),
-            })
-          },
+          LexItemSet({
+            LexItem(Symbol(2), blank()),
+          })
+        },
      })));
    });
  });
 });

-describe("sym_transitions(ParseItemSet, SyntaxGrammar)", [&]() {
-  SyntaxGrammar grammar{{
-    {
-      "A",
-      blank(),
-      RuleEntryTypeNamed
-    },
-    {
-      "B",
-      i_token(21),
-      RuleEntryTypeNamed
-    },
-  }, {}, {}};
-
+describe("sym_transitions(ParseItemSet, InitialSyntaxGrammar)", [&]() {
  it("computes the closure of the new item sets", [&]() {
+    SyntaxGrammar grammar{{
+      SyntaxVariable("A", VariableTypeNamed, {
+        Production({
+          {Symbol(11, true), 0, AssociativityNone, 101},
+          {Symbol(12, true), 0, AssociativityNone, 102},
+          {Symbol(13, true), 0, AssociativityNone, 103},
+          {Symbol(1), 0, AssociativityNone, 104},
+          {Symbol(14, true), 0, AssociativityNone, 105},
+        })
+      }),
+      SyntaxVariable("B", VariableTypeNamed, {
+        Production({
+          {Symbol(15, true), 0, AssociativityNone, 106},
+        })
+      })
+    }, {}, {}};
+
    ParseItemSet set1({
      {
-        ParseItem(Symbol(0), seq({ i_token(22), i_sym(1) }), { Symbol(101) }),
-        set<Symbol>({ Symbol(23, true) })
-      },
+        ParseItem(Symbol(0), 0, 2, 103),
+        set<Symbol>({ Symbol(16, true) })
+      }
    });

    AssertThat(sym_transitions(set1, grammar), Equals(map<Symbol, ParseItemSet>({
      {
-        Symbol(22, true),
+        Symbol(13, true),
        ParseItemSet({
          {
-            ParseItem(Symbol(0), i_sym(1), { Symbol(101), Symbol(22) }),
-            set<Symbol>({ Symbol(23, true) }),
+            ParseItem(Symbol(0), 0, 3, 104),
+            set<Symbol>({ Symbol(16, true) })
          },
          {
-            ParseItem(Symbol(1), i_token(21), {}),
-            set<Symbol>({ Symbol(23, true) })
+            ParseItem(Symbol(1), 0, 0, 106),
+            set<Symbol>({ Symbol(14, true) })
          },
        })
      },
--- a/spec/compiler/build_tables/lex_conflict_manager_spec.cc
+++ b/spec/compiler/build_tables/lex_conflict_manager_spec.cc
@ -2,7 +2,6 @@
 #include "compiler/rules/built_in_symbols.h"
 #include "compiler/parse_table.h"
 #include "compiler/build_tables/lex_conflict_manager.h"
-#include "compiler/prepared_grammar.h"

 using namespace rules;
 using namespace build_tables;
@ -11,16 +10,8 @@ START_TEST

 describe("LexConflictManager", []() {
  LexicalGrammar lexical_grammar{{
-    {
-      "other_token",
-      pattern("[a-b]"),
-      RuleEntryTypeNamed
-    },
-    {
-      "lookahead_token",
-      pattern("[c-d]"),
-      RuleEntryTypeNamed
-    },
+    Variable("other_token", VariableTypeNamed, pattern("[a-b]")),
+    Variable("lookahead_token", VariableTypeNamed, pattern("[c-d]"))
  }, {}};

  LexConflictManager conflict_manager(lexical_grammar);
--- a/spec/compiler/build_tables/parse_conflict_manager_spec.cc
+++ b/spec/compiler/build_tables/parse_conflict_manager_spec.cc
@ -2,7 +2,6 @@
 #include "compiler/rules/built_in_symbols.h"
 #include "compiler/parse_table.h"
 #include "compiler/build_tables/parse_conflict_manager.h"
-#include "compiler/prepared_grammar.h"

 using namespace rules;
 using namespace build_tables;
@ -11,31 +10,11 @@ START_TEST

 describe("ParseConflictManager", []() {
  SyntaxGrammar syntax_grammar{{
-    {
-      "in_progress_rule1",
-      i_token(0),
-      RuleEntryTypeNamed,
-    },
-    {
-      "in_progress_rule2",
-      i_token(0),
-      RuleEntryTypeNamed,
-    },
-    {
-      "reduced_rule",
-      i_token(0),
-      RuleEntryTypeNamed,
-    },
-    {
-      "other_rule1",
-      i_token(0),
-      RuleEntryTypeNamed,
-    },
-    {
-      "other_rule2",
-      i_token(0),
-      RuleEntryTypeNamed,
-    },
+    SyntaxVariable("in_progress_rule1", VariableTypeNamed, { Production() }),
+    SyntaxVariable("in_progress_rule2", VariableTypeNamed, { Production() }),
+    SyntaxVariable("reduced_rule", VariableTypeNamed, { Production() }),
+    SyntaxVariable("other_rule1", VariableTypeNamed, { Production() }),
+    SyntaxVariable("other_rule2", VariableTypeNamed, { Production() }),
  }, { Symbol(2, true) }, {}};

  pair<bool, ConflictType> result;
--- a/spec/compiler/build_tables/rule_can_be_blank_spec.cc
+++ b/spec/compiler/build_tables/rule_can_be_blank_spec.cc
@ -1,7 +1,6 @@
 #include "compiler/compiler_spec_helper.h"
 #include "compiler/build_tables/rule_can_be_blank.h"
 #include "compiler/rules/metadata.h"
-#include "compiler/prepared_grammar.h"

 using namespace rules;
 using build_tables::rule_can_be_blank;
@ -54,37 +53,6 @@ describe("rule_can_be_blank", [&]() {
    rule = make_shared<rules::Metadata>(sym("one"), map<rules::MetadataKey, int>());
    AssertThat(rule_can_be_blank(rule), IsFalse());
  });
-
-  describe("checking recursively (by expanding non-terminals)", [&]() {
-    SyntaxGrammar grammar{{
-      {
-        "A",
-        choice({
-          seq({ i_sym(0), i_token(11) }),
-          blank()
-        }),
-        RuleEntryTypeNamed,
-      },
-      {
-        "B",
-        choice({
-          seq({ i_sym(1), i_token(12) }),
-          i_token(13)
-        }),
-        RuleEntryTypeNamed,
-      },
-    }, {}, {}};
-
-    it("terminates for left-recursive rules that can be blank", [&]() {
-      rule = i_sym(0);
-      AssertThat(rule_can_be_blank(rule, grammar), IsTrue());
-    });
-
-    it("terminates for left-recursive rules that can't be blank", [&]() {
-      rule = i_sym(1);
-      AssertThat(rule_can_be_blank(rule, grammar), IsFalse());
-    });
-  });
 });

 END_TEST
--- a/spec/compiler/compile_examples.cc
+++ b/spec/compiler/compile_examples.cc
@ -29,8 +29,9 @@ describe("compiling the example grammars", []() {
      string code = result.first;
      const GrammarError *error = result.second;

-      AssertThat(error, Equals((GrammarError *)nullptr));
-
+      if (error)
+        AssertThat(error->message, Equals(""));
+      
      ofstream file(example_parser_dir + language + ".c");
      file << get<0>(result);
      file.close();
--- a/spec/compiler/helpers/rule_helpers.cc
+++ b/spec/compiler/helpers/rule_helpers.cc
@ -39,7 +39,7 @@ namespace tree_sitter {
    return make_shared<rules::Metadata>(rule, values);
  }

-  bool operator==(const RuleEntry &left, const RuleEntry &right) {
+  bool operator==(const Variable &left, const Variable &right) {
    return left.name == right.name && left.rule->operator==(*right.rule) &&
      left.type == right.type;
  }
--- a/spec/compiler/helpers/rule_helpers.h
+++ b/spec/compiler/helpers/rule_helpers.h
@ -4,7 +4,7 @@
 #include "tree_sitter/compiler.h"
 #include "compiler/rules/character_set.h"
 #include "compiler/rules/metadata.h"
-#include "compiler/prepared_grammar.h"
+#include "compiler/variable.h"

 namespace tree_sitter {
  rule_ptr metadata(rule_ptr, std::map<rules::MetadataKey, int>);
@ -13,7 +13,7 @@ namespace tree_sitter {
  rule_ptr i_sym(size_t index);
  rule_ptr i_token(size_t index);

-  bool operator==(const RuleEntry &left, const RuleEntry &right);
+  bool operator==(const Variable &left, const Variable &right);
 }

 #endif
--- a/spec/compiler/helpers/stream_methods.cc
+++ b/spec/compiler/helpers/stream_methods.cc
@ -2,6 +2,7 @@
 #include "compiler/compiler_spec_helper.h"
 #include "tree_sitter/compiler.h"
 #include "compiler/parse_table.h"
+#include "compiler/syntax_grammar.h"
 #include "compiler/build_tables/parse_item.h"
 #include "compiler/build_tables/lex_item.h"
 #include "compiler/build_tables/get_metadata.h"
@ -42,8 +43,12 @@ ostream &operator<<(ostream &stream, const rule_ptr &rule) {
  return stream;
 }

-ostream &operator<<(ostream &stream, const RuleEntry &entry) {
-  return stream << string("{") << entry.name << string(", ") << entry.rule << string(", ") << to_string(entry.type) << string("}");
+ostream &operator<<(ostream &stream, const Variable &variable) {
+  return stream << string("{") << variable.name << string(", ") << variable.rule << string(", ") << to_string(variable.type) << string("}");
+}
+
+ostream &operator<<(ostream &stream, const SyntaxVariable &variable) {
+  return stream << string("{") << variable.name << string(", ") << variable.productions << string(", ") << to_string(variable.type) << string("}");
 }

 std::ostream &operator<<(std::ostream &stream, const LexAction &action) {
@ -100,6 +105,10 @@ ostream &operator<<(ostream &stream, const ParseState &state) {
  return stream;
 }

+ostream &operator<<(ostream &stream, const ProductionStep &step) {
+  return stream << string("(production_step symbol:") << step.symbol << string(" precedence:") << to_string(step.precedence) << ")";
+}
+
 namespace build_tables {

 ostream &operator<<(ostream &stream, const build_tables::LexItem &item) {
@ -107,8 +116,11 @@ ostream &operator<<(ostream &stream, const build_tables::LexItem &item) {
                << string(")");
 }

-ostream &operator<<(ostream &stream, const build_tables::ParseItem &item) {
-  return stream << string("(item ") << item.lhs << string(" ") << *item.rule
+ostream &operator<<(ostream &stream, const ParseItem &item) {
+  return stream << string("(item variable:") << to_string(item.variable_index)
+                << string(" production:") << to_string(item.production_index)
+                << string(" step:") << to_string(item.step_index)
+                << string(" remaining_rule:") << to_string(item.rule_id)
                << string(")");
 }

--- a/spec/compiler/helpers/stream_methods.h
+++ b/spec/compiler/helpers/stream_methods.h
@ -37,8 +37,8 @@ inline std::ostream& operator<<(std::ostream &stream, const std::set<T> &set) {
  return stream << ")";
 }

-template<typename T>
-inline std::ostream& operator<<(std::ostream &stream, const std::unordered_set<T> &set) {
+template<typename T, typename H, typename E>
+inline std::ostream& operator<<(std::ostream &stream, const std::unordered_set<T, H, E> &set) {
  stream << std::string("(set: ");
  bool started = false;
  for (auto item : set) {
@ -89,19 +89,23 @@ namespace tree_sitter {
 using std::ostream;
 using std::string;
 using std::to_string;
-struct RuleEntry;
+struct Variable;
+struct SyntaxVariable;
 class LexAction;
 class ParseAction;
 class ParseState;
+struct ProductionStep;

 ostream &operator<<(ostream &, const Grammar &);
 ostream &operator<<(ostream &, const GrammarError &);
 ostream &operator<<(ostream &, const Rule &);
 ostream &operator<<(ostream &, const rule_ptr &);
-ostream &operator<<(ostream &, const RuleEntry &);
-std::ostream &operator<<(ostream &stream, const LexAction &);
-std::ostream &operator<<(ostream &stream, const ParseAction &);
-std::ostream &operator<<(ostream &stream, const ParseState &);
+ostream &operator<<(ostream &, const Variable &);
+ostream &operator<<(ostream &, const SyntaxVariable &);
+ostream &operator<<(ostream &, const LexAction &);
+ostream &operator<<(ostream &, const ParseAction &);
+ostream &operator<<(ostream &, const ParseState &);
+ostream &operator<<(ostream &, const ProductionStep &);

 namespace build_tables {

@ -109,9 +113,9 @@ struct MetadataRange;
 class LexItem;
 class ParseItem;

-ostream &operator<<(ostream &stream, const MetadataRange &);
-ostream &operator<<(ostream &stream, const LexItem &);
-ostream &operator<<(ostream &stream, const ParseItem &);
+ostream &operator<<(ostream &, const MetadataRange &);
+ostream &operator<<(ostream &, const LexItem &);
+ostream &operator<<(ostream &, const ParseItem &);

 }  // namespace build_tables
 }  // namespace tree_sitter
--- a/spec/compiler/prepare_grammar/expand_repeats_spec.cc
+++ b/spec/compiler/prepare_grammar/expand_repeats_spec.cc
@ -1,232 +1,152 @@
 #include "compiler/compiler_spec_helper.h"
-#include "compiler/prepared_grammar.h"
+#include "compiler/prepare_grammar/initial_syntax_grammar.h"
 #include "compiler/prepare_grammar/expand_repeats.h"

 START_TEST

 using namespace rules;
+using prepare_grammar::InitialSyntaxGrammar;
 using prepare_grammar::expand_repeats;

 describe("expand_repeats", []() {
  it("replaces repeat rules with pairs of recursive rules", [&]() {
-    SyntaxGrammar grammar{{
-      {
-        "rule0",
-        repeat(i_token(0)),
-        RuleEntryTypeNamed,
-      },
+    InitialSyntaxGrammar grammar{{
+      Variable("rule0", VariableTypeNamed, repeat(i_token(0))),
    }, {}, {}};

-    auto match = expand_repeats(grammar);
+    auto result = expand_repeats(grammar);

-    AssertThat(match.rules, Equals(vector<RuleEntry>({
-      {
-        "rule0",
-        choice({ i_sym(1), blank() }),
-        RuleEntryTypeNamed,
-      },
-      {
-        "rule0_repeat1",
-        seq({
-          i_token(0),
-          choice({ i_sym(1), blank() })
-        }),
-        RuleEntryTypeAuxiliary
-      },
+    AssertThat(result.variables, Equals(vector<Variable>({
+      Variable("rule0", VariableTypeNamed, choice({ i_sym(1), blank() })),
+      Variable("rule0_repeat1", VariableTypeAuxiliary, seq({
+        i_token(0),
+        choice({ i_sym(1), blank() })
+      })),
    })));
  });

  it("replaces repeats inside of sequences", [&]() {
-    SyntaxGrammar grammar{{
-      {
-        "rule0",
-        seq({
-          i_token(10),
-          repeat(i_token(11)),
-        }),
-        RuleEntryTypeNamed,
-      },
+    InitialSyntaxGrammar grammar{{
+      Variable("rule0", VariableTypeNamed, seq({
+        i_token(10),
+        repeat(i_token(11)),
+      })),
    }, {}, {}};

-    auto match = expand_repeats(grammar);
+    auto result = expand_repeats(grammar);

-    AssertThat(match.rules, Equals(vector<RuleEntry>({
-      {
-        "rule0",
-        seq({
-          i_token(10),
-          choice({ i_sym(1), blank() })
-        }),
-        RuleEntryTypeNamed
-      },
-      {
-        "rule0_repeat1",
-        seq({
-          i_token(11),
-          choice({ i_sym(1), blank() })
-        }),
-        RuleEntryTypeAuxiliary
-      },
+    AssertThat(result.variables, Equals(vector<Variable>({
+      Variable("rule0", VariableTypeNamed, seq({
+        i_token(10),
+        choice({ i_sym(1), blank() })
+      })),
+      Variable("rule0_repeat1", VariableTypeAuxiliary, seq({
+        i_token(11),
+        choice({ i_sym(1), blank() })
+      })),
    })));
  });

  it("replaces repeats inside of choices", [&]() {
-    SyntaxGrammar grammar{{
-      {
-        "rule0",
-        choice({ i_token(10), repeat(i_token(11)) }),
-        RuleEntryTypeNamed
-      },
+    InitialSyntaxGrammar grammar{{
+      Variable("rule0", VariableTypeNamed, choice({
+        i_token(10),
+        repeat(i_token(11))
+      })),
    }, {}, {}};

-    auto match = expand_repeats(grammar);
+    auto result = expand_repeats(grammar);

-    AssertThat(match.rules, Equals(vector<RuleEntry>({
-      {
-        "rule0",
-        choice({ i_token(10), i_sym(1), blank() }),
-        RuleEntryTypeNamed
-      },
-      {
-        "rule0_repeat1",
-        seq({
-          i_token(11),
-          choice({ i_sym(1), blank() }),
-        }),
-        RuleEntryTypeAuxiliary
-      },
+    AssertThat(result.variables, Equals(vector<Variable>({
+      Variable("rule0", VariableTypeNamed, choice({ i_token(10), i_sym(1), blank() })),
+      Variable("rule0_repeat1", VariableTypeAuxiliary, seq({
+        i_token(11),
+        choice({ i_sym(1), blank() }),
+      })),
    })));
  });

  it("does not create redundant auxiliary rules", [&]() {
-    SyntaxGrammar grammar{{
-      {
-        "rule0",
-        choice({
-          seq({ i_token(1), repeat(i_token(4)) }),
-          seq({ i_token(2), repeat(i_token(4)) }),
-        }),
-        RuleEntryTypeNamed
-      },
-      {
-        "rule1",
-        seq({ i_token(3), repeat(i_token(4)) }),
-        RuleEntryTypeNamed
-      },
+    InitialSyntaxGrammar grammar{{
+      Variable("rule0", VariableTypeNamed, choice({
+        seq({ i_token(1), repeat(i_token(4)) }),
+        seq({ i_token(2), repeat(i_token(4)) }),
+      })),
+      Variable("rule1", VariableTypeNamed, seq({
+        i_token(3),
+        repeat(i_token(4))
+      })),
    }, {}, {}};

-    auto match = expand_repeats(grammar);
+    auto result = expand_repeats(grammar);

-    AssertThat(match.rules, Equals(vector<RuleEntry>({
-      {
-        "rule0",
-        choice({
-          seq({ i_token(1), choice({ i_sym(2), blank() }) }),
-          seq({ i_token(2), choice({ i_sym(2), blank() }) }),
-        }),
-        RuleEntryTypeNamed
-      },
-      {
-        "rule1",
-        seq({ i_token(3), choice({ i_sym(2), blank() }) }),
-        RuleEntryTypeNamed
-      },
-      {
-        "rule0_repeat1",
-        seq({
-          i_token(4),
-          choice({ i_sym(2), blank() }),
-        }),
-        RuleEntryTypeAuxiliary
-      },
+    AssertThat(result.variables, Equals(vector<Variable>({
+      Variable("rule0", VariableTypeNamed, choice({
+        seq({ i_token(1), choice({ i_sym(2), blank() }) }),
+        seq({ i_token(2), choice({ i_sym(2), blank() }) }),
+      })),
+      Variable("rule1", VariableTypeNamed, seq({
+        i_token(3),
+        choice({ i_sym(2), blank() })
+      })),
+      Variable("rule0_repeat1", VariableTypeAuxiliary, seq({
+        i_token(4),
+        choice({ i_sym(2), blank() }),
+      })),
    })));
  });

  it("can replace multiple repeats in the same rule", [&]() {
-    SyntaxGrammar grammar{{
-      {
-        "rule0",
-        seq({
-          repeat(i_token(10)),
-          repeat(i_token(11)),
-        }),
-        RuleEntryTypeNamed
-      },
+    InitialSyntaxGrammar grammar{{
+      Variable("rule0", VariableTypeNamed, seq({
+        repeat(i_token(10)),
+        repeat(i_token(11)),
+      })),
    }, {}, {}};

-    auto match = expand_repeats(grammar);
+    auto result = expand_repeats(grammar);

-    AssertThat(match.rules, Equals(vector<RuleEntry>({
-      {
-        "rule0",
-        seq({
-          choice({ i_sym(1), blank() }),
-          choice({ i_sym(2), blank() }),
-        }),
-        RuleEntryTypeNamed
-      },
-      {
-        "rule0_repeat1",
-        seq({
-          i_token(10),
-          choice({ i_sym(1), blank() }),
-        }),
-        RuleEntryTypeAuxiliary
-      },
-      {
-        "rule0_repeat2",
-        seq({
-          i_token(11),
-          choice({ i_sym(2), blank() }),
-        }),
-        RuleEntryTypeAuxiliary
-      },
+    AssertThat(result.variables, Equals(vector<Variable>({
+      Variable("rule0", VariableTypeNamed, seq({
+        choice({ i_sym(1), blank() }),
+        choice({ i_sym(2), blank() }),
+      })),
+      Variable("rule0_repeat1", VariableTypeAuxiliary, seq({
+        i_token(10),
+        choice({ i_sym(1), blank() }),
+      })),
+      Variable("rule0_repeat2", VariableTypeAuxiliary, seq({
+        i_token(11),
+        choice({ i_sym(2), blank() }),
+      })),
    })));
  });

  it("can replace repeats in multiple rules", [&]() {
-    SyntaxGrammar grammar{{
-      {
-        "rule0",
-        repeat(i_token(10)),
-        RuleEntryTypeNamed,
-      },
-      {
-        "rule1",
-        repeat(i_token(11)),
-        RuleEntryTypeNamed,
-      },
+    InitialSyntaxGrammar grammar{{
+      Variable("rule0", VariableTypeNamed, repeat(i_token(10))),
+      Variable("rule1", VariableTypeNamed, repeat(i_token(11))),
    }, {}, {}};

-    auto match = expand_repeats(grammar);
+    auto result = expand_repeats(grammar);

-    AssertThat(match.rules, Equals(vector<RuleEntry>({
-      {
-        "rule0",
+    AssertThat(result.variables, Equals(vector<Variable>({
+      Variable("rule0", VariableTypeNamed, choice({
+        i_sym(2),
+        blank(),
+      })),
+      Variable("rule1", VariableTypeNamed, choice({
+        i_sym(3),
+        blank(),
+      })),
+      Variable("rule0_repeat1", VariableTypeAuxiliary, seq({
+        i_token(10),
        choice({ i_sym(2), blank() }),
-        RuleEntryTypeNamed
-      },
-      {
-        "rule1",
-        choice({ i_sym(3), blank() }),
-        RuleEntryTypeNamed
-      },
-      {
-        "rule0_repeat1",
-        seq({
-          i_token(10),
-          choice({ i_sym(2), blank() }),
-        }),
-        RuleEntryTypeAuxiliary
-      },
-      {
-        "rule1_repeat1",
-        seq({
-          i_token(11),
-          choice({ i_sym(3), blank() })
-        }),
-        RuleEntryTypeAuxiliary
-      },
+      })),
+      Variable("rule1_repeat1", VariableTypeAuxiliary, seq({
+        i_token(11),
+        choice({ i_sym(3), blank() })
+      })),
    })));
  });
 });
--- a/spec/compiler/prepare_grammar/expand_tokens_spec.cc
+++ b/spec/compiler/prepare_grammar/expand_tokens_spec.cc
@ -1,5 +1,5 @@
 #include "compiler/compiler_spec_helper.h"
-#include "compiler/prepared_grammar.h"
+#include "compiler/lexical_grammar.h"
 #include "compiler/prepare_grammar/expand_tokens.h"

 START_TEST
@ -11,64 +11,48 @@ describe("expand_tokens", []() {
  describe("string rules", [&]() {
    it("replaces strings with sequences of character sets", [&]() {
      LexicalGrammar grammar{{
-        {
-          "rule_A",
-          seq({
-            i_sym(10),
-            str("xyz"),
-            i_sym(11),
-          }),
-          RuleEntryTypeNamed
-        },
+        Variable("rule_A", VariableTypeNamed, seq({
+          i_sym(10),
+          str("xyz"),
+          i_sym(11),
+        })),
      }, {}};

      auto result = expand_tokens(grammar);

      AssertThat(result.second, Equals((const GrammarError *)nullptr));
-      AssertThat(result.first.rules, Equals(vector<RuleEntry>({
-        {
-          "rule_A",
-          seq({
-            i_sym(10),
-            metadata(seq({
-              character({ 'x' }),
-              character({ 'y' }),
-              character({ 'z' }),
-            }), {
-              {PRECEDENCE, 1},
-              {IS_TOKEN, 1},
-            }),
-            i_sym(11),
+      AssertThat(result.first.variables, Equals(vector<Variable>({
+        Variable("rule_A", VariableTypeNamed, seq({
+          i_sym(10),
+          metadata(seq({
+            character({ 'x' }),
+            character({ 'y' }),
+            character({ 'z' }),
+          }), {
+            {PRECEDENCE, 1},
+            {IS_TOKEN, 1},
          }),
-          RuleEntryTypeNamed
-        },
+          i_sym(11),
+        })),
      })));
    });

    it("handles strings containing non-ASCII UTF8 characters", [&]() {
      LexicalGrammar grammar{{
-        {
-          "rule_A",
-          str("\u03B1 \u03B2"), // α β
-          RuleEntryTypeNamed
-        },
+        Variable("rule_A", VariableTypeNamed, str("\u03B1 \u03B2")),
      }, {}};

      auto result = expand_tokens(grammar);

-      AssertThat(result.first.rules, Equals(vector<RuleEntry>({
-        {
-          "rule_A",
-          metadata(seq({
-            character({ 945 }),
-            character({ ' ' }),
-            character({ 946 }),
-          }), {
-            {PRECEDENCE, 1},
-            {IS_TOKEN, 1},
-          }),
-          RuleEntryTypeNamed
-        }
+      AssertThat(result.first.variables, Equals(vector<Variable>({
+        Variable("rule_A", VariableTypeNamed, metadata(seq({
+          character({ 945 }),
+          character({ ' ' }),
+          character({ 946 }),
+        }), {
+          {PRECEDENCE, 1},
+          {IS_TOKEN, 1},
+        })),
      })));
    });
  });
@ -76,64 +60,44 @@ describe("expand_tokens", []() {
  describe("regexp rules", [&]() {
    it("replaces regexps with the equivalent rule tree", [&]() {
      LexicalGrammar grammar{{
-        {
-          "rule_A",
-          seq({
-            i_sym(10),
-            pattern("x*"),
-            i_sym(11),
-          }),
-          RuleEntryTypeNamed
-        },
+        Variable("rule_A", VariableTypeNamed, seq({
+          i_sym(10),
+          pattern("x*"),
+          i_sym(11),
+        })),
      }, {}};

      auto result = expand_tokens(grammar);

      AssertThat(result.second, Equals((const GrammarError *)nullptr));
-      AssertThat(result.first.rules, Equals(vector<RuleEntry>({
-        {
-          "rule_A",
-          seq({
-            i_sym(10),
-            repeat(character({ 'x' })),
-            i_sym(11),
-          }),
-          RuleEntryTypeNamed
-        },
+      AssertThat(result.first.variables, Equals(vector<Variable>({
+        Variable("rule_A", VariableTypeNamed, seq({
+          i_sym(10),
+          repeat(character({ 'x' })),
+          i_sym(11),
+        })),
      })));
    });

    it("handles regexps containing non-ASCII UTF8 characters", [&]() {
      LexicalGrammar grammar{{
-        {
-          "rule_A",
-          pattern("[^\u03B1-\u03B4]*"), // [^α-δ]
-          RuleEntryTypeNamed
-        },
+        Variable("rule_A", VariableTypeNamed, pattern("[^\u03B1-\u03B4]*")),
      }, {}};

      auto result = expand_tokens(grammar);

-      AssertThat(result.first.rules, Equals(vector<RuleEntry>({
-        {
-          "rule_A",
-          repeat(character({ 945, 946, 947, 948 }, false)),
-          RuleEntryTypeNamed
-        }
+      AssertThat(result.first.variables, Equals(vector<Variable>({
+        Variable("rule_A", VariableTypeNamed, repeat(character({ 945, 946, 947, 948 }, false))),
      })));
    });

    it("returns an error when the grammar contains an invalid regex", [&]() {
      LexicalGrammar grammar{{
-        {
-          "rule_A",
-          seq({
-            pattern("("),
-            str("xyz"),
-            pattern("["),
-          }),
-          RuleEntryTypeNamed
-        },
+        Variable("rule_A", VariableTypeNamed, seq({
+          pattern("("),
+          str("xyz"),
+          pattern("["),
+        }))
      }, {}};

      auto result = expand_tokens(grammar);
--- a/spec/compiler/prepare_grammar/extract_choices_spec.cc
+++ b/spec/compiler/prepare_grammar/extract_choices_spec.cc
@ -0,0 +1,74 @@
+#include "compiler/compiler_spec_helper.h"
+#include "compiler/prepare_grammar/extract_choices.h"
+
+START_TEST
+
+using namespace rules;
+using prepare_grammar::extract_choices;
+
+class rule_vector : public vector<rule_ptr> {
+ public:
+  bool operator==(const vector<rule_ptr> &other) const {
+    if (this->size() != other.size()) return false;
+    for (size_t i = 0; i < this->size(); i++) {
+      auto rule = this->operator[](i);
+      auto other_rule = other[i];
+      if (!rule->operator==(*rule))
+        return false;
+    }
+    return true;
+  }
+
+  rule_vector(const initializer_list<rule_ptr> &list) :
+      vector<rule_ptr>(list) {}
+};
+
+describe("extract_choices", []() {
+  it("expands rules containing choices into multiple rules", [&]() {
+    auto rule = seq({
+      sym("a"),
+      choice({ sym("b"), sym("c"), sym("d") }),
+      sym("e")
+    });
+
+    AssertThat(extract_choices(rule), Equals(rule_vector({
+      seq({ sym("a"), sym("b"), sym("e") }),
+      seq({ sym("a"), sym("c"), sym("e") }),
+      seq({ sym("a"), sym("d"), sym("e") }),
+    })));
+  });
+
+  it("handles metadata rules", [&]() {
+    auto rule = prec(5, choice({ sym("b"), sym("c"), sym("d") }));
+
+    AssertThat(extract_choices(rule), Equals(rule_vector({
+      prec(5, sym("b")),
+      prec(5, sym("c")),
+      prec(5, sym("d")),
+    })));
+  });
+
+  it("handles nested choices", [&]() {
+    auto rule = choice({
+      seq({ choice({ sym("a"), sym("b") }), sym("c") }),
+      sym("d")
+    });
+
+    AssertThat(extract_choices(rule), Equals(rule_vector({
+      seq({ sym("a"), sym("c") }),
+      seq({ sym("b"), sym("c") }),
+      sym("d"),
+    })));
+  });
+
+  it("handles repeats", [&]() {
+    auto rule = repeat(choice({ sym("a"), sym("b") }));
+
+    AssertThat(extract_choices(rule), Equals(rule_vector({
+      repeat(sym("a")),
+      repeat(sym("b")),
+    })));
+  });
+});
+
+END_TEST
--- a/spec/compiler/prepare_grammar/extract_tokens_spec.cc
+++ b/spec/compiler/prepare_grammar/extract_tokens_spec.cc
@ -1,6 +1,7 @@
 #include "compiler/compiler_spec_helper.h"
-#include "compiler/prepared_grammar.h"
+#include "compiler/lexical_grammar.h"
 #include "compiler/prepare_grammar/interned_grammar.h"
+#include "compiler/prepare_grammar/initial_syntax_grammar.h"
 #include "compiler/prepare_grammar/extract_tokens.h"

 START_TEST
@ -8,238 +9,133 @@ START_TEST
 using namespace rules;
 using prepare_grammar::extract_tokens;
 using prepare_grammar::InternedGrammar;
+using prepare_grammar::InitialSyntaxGrammar;

 describe("extract_tokens", []() {
  it("moves strings, patterns, and sub-rules marked as tokens into the lexical grammar", [&]() {
    auto result = extract_tokens(InternedGrammar{{
-      {
-        "rule_A",
-        repeat(seq({
-          str("ab"),
-          pattern("cd*"),
-          choice({
-            i_sym(1),
-            i_sym(2),
-            token(repeat(choice({ str("ef"), str("gh") }))),
-          }),
-        })),
-        RuleEntryTypeNamed,
-      },
-      {
-        "rule_B",
-        pattern("ij+"),
-        RuleEntryTypeNamed,
-      },
-      {
-        "rule_C",
-        choice({ str("kl"), blank() }),
-        RuleEntryTypeNamed,
-      },
-      {
-        "rule_D",
-        repeat(i_sym(3)),
-        RuleEntryTypeNamed,
-      }
+      Variable("rule_A", VariableTypeNamed, repeat(seq({
+        str("ab"),
+        pattern("cd*"),
+        choice({
+          i_sym(1),
+          i_sym(2),
+          token(repeat(choice({ str("ef"), str("gh") }))),
+        }),
+      }))),
+      Variable("rule_B", VariableTypeNamed, pattern("ij+")),
+      Variable("rule_C", VariableTypeNamed, choice({ str("kl"), blank() })),
+      Variable("rule_D", VariableTypeNamed, repeat(i_sym(3)))
    }, {}, {}});

-    SyntaxGrammar &syntax_grammar = get<0>(result);
+    InitialSyntaxGrammar &syntax_grammar = get<0>(result);
    LexicalGrammar &lexical_grammar = get<1>(result);
    const GrammarError *error = get<2>(result);

    AssertThat(error, Equals<const GrammarError *>(nullptr));

-    AssertThat(syntax_grammar.rules, Equals(vector<RuleEntry>({
-      {
-        "rule_A",
-        repeat(seq({
+    AssertThat(syntax_grammar.variables, Equals(vector<Variable>({
+      Variable("rule_A", VariableTypeNamed, repeat(seq({

-          // This string is now the first token in the lexical grammar.
-          i_token(0),
+        // This string is now the first token in the lexical grammar.
+        i_token(0),

-          // This pattern is now the second rule in the lexical grammar.
-          i_token(1),
+        // This pattern is now the second rule in the lexical grammar.
+        i_token(1),

-          choice({
-            // Rule 1, which this symbol pointed to, has been moved to the
-            // lexical grammar.
-            i_token(3),
+        choice({
+          // Rule 1, which this symbol pointed to, has been moved to the
+          // lexical grammar.
+          i_token(3),

-            // This symbol's index has been decremented, because a previous rule
-            // was moved to the lexical grammar.
-            i_sym(1),
+          // This symbol's index has been decremented, because a previous rule
+          // was moved to the lexical grammar.
+          i_sym(1),

-            // This token rule is now the third rule in the lexical grammar.
-            i_token(2),
-          }),
-        })),
-        RuleEntryTypeNamed,
-      },
-      {
-        "rule_C",
-        choice({ i_token(4), blank() }),
-        RuleEntryTypeNamed,
-      },
-      {
-        "rule_D",
-        repeat(i_sym(2)),
-        RuleEntryTypeNamed,
-      }
+          // This token rule is now the third rule in the lexical grammar.
+          i_token(2),
+        }),
+      }))),
+
+      Variable("rule_C", VariableTypeNamed, choice({ i_token(4), blank() })),
+      Variable("rule_D", VariableTypeNamed, repeat(i_sym(2))),
    })));

-    AssertThat(lexical_grammar.rules, Equals(vector<RuleEntry>({
-
+    AssertThat(lexical_grammar.variables, Equals(vector<Variable>({
      // Strings become anonymous rules.
-      {
-        "ab",
-        str("ab"),
-        RuleEntryTypeAnonymous,
-      },
+      Variable("ab", VariableTypeAnonymous, str("ab")),

      // Patterns become hidden rules.
-      {
-        "/cd*/",
-        pattern("cd*"),
-        RuleEntryTypeAuxiliary,
-      },
+      Variable("/cd*/", VariableTypeAuxiliary, pattern("cd*")),

      // Rules marked as tokens become hidden rules.
-      {
-        "/(ef|gh)*/",
-        repeat(choice({ str("ef"), str("gh") })),
-        RuleEntryTypeAuxiliary,
-      },
+      Variable("/(ef|gh)*/", VariableTypeAuxiliary, repeat(choice({
+        str("ef"),
+        str("gh")
+      }))),

      // This named rule was moved wholesale to the lexical grammar.
-      {
-        "rule_B",
-        pattern("ij+"),
-        RuleEntryTypeNamed,
-      },
+      Variable("rule_B", VariableTypeNamed, pattern("ij+")),

      // Strings become anonymous rules.
-      {
-        "kl",
-        str("kl"),
-        RuleEntryTypeAnonymous,
-      },
-
+      Variable("kl", VariableTypeAnonymous, str("kl")),
    })));
  });

  it("does not create duplicate tokens in the lexical grammar", [&]() {
    auto result = extract_tokens(InternedGrammar{{
-      {
-        "rule_A",
-        seq({
-          str("ab"),
-          i_sym(0),
-          str("ab"),
-        }),
-        RuleEntryTypeNamed,
-      },
+      Variable("rule_A", VariableTypeNamed, seq({
+        str("ab"),
+        i_sym(0),
+        str("ab"),
+      })),
    }, {}, {}});

-    SyntaxGrammar &syntax_grammar = get<0>(result);
+    InitialSyntaxGrammar &syntax_grammar = get<0>(result);
    LexicalGrammar &lexical_grammar = get<1>(result);

-    AssertThat(syntax_grammar.rules, Equals(vector<RuleEntry>({
-      {
-        "rule_A",
-        seq({ i_token(0), i_sym(0), i_token(0) }),
-        RuleEntryTypeNamed
-      }
+    AssertThat(syntax_grammar.variables, Equals(vector<Variable>({
+      Variable("rule_A", VariableTypeNamed, seq({ i_token(0), i_sym(0), i_token(0) })),
    })));

-    AssertThat(lexical_grammar.rules, Equals(vector<RuleEntry>({
-      {
-        "ab",
-        str("ab"),
-        RuleEntryTypeAnonymous
-      },
+    AssertThat(lexical_grammar.variables, Equals(vector<Variable>({
+      Variable("ab", VariableTypeAnonymous, str("ab")),
    })))
  });

  it("does not move entire rules into the lexical grammar if their content is used elsewhere in the grammar", [&]() {
    auto result = extract_tokens(InternedGrammar{{
-      {
-        "rule_A",
-        seq({ i_sym(1), str("ab") }),
-        RuleEntryTypeNamed,
-      },
-      {
-        "rule_B",
-        str("cd"),
-        RuleEntryTypeNamed,
-      },
-      {
-        "rule_C",
-        seq({ str("ef"), str("cd") }),
-        RuleEntryTypeNamed,
-      },
+      Variable("rule_A", VariableTypeNamed, seq({ i_sym(1), str("ab") })),
+      Variable("rule_B", VariableTypeNamed, str("cd")),
+      Variable("rule_C", VariableTypeNamed, seq({ str("ef"), str("cd") })),
    }, {}, {}});

-    SyntaxGrammar &syntax_grammar = get<0>(result);
+    InitialSyntaxGrammar &syntax_grammar = get<0>(result);
    LexicalGrammar &lexical_grammar = get<1>(result);

-    AssertThat(syntax_grammar.rules, Equals(vector<RuleEntry>({
-      {
-        "rule_A",
-        seq({ i_sym(1), i_token(0) }),
-        RuleEntryTypeNamed
-      },
-      {
-        "rule_B",
-        i_token(1),
-        RuleEntryTypeNamed
-      },
-      {
-        "rule_C",
-        seq({ i_token(2), i_token(1) }),
-        RuleEntryTypeNamed
-      },
+    AssertThat(syntax_grammar.variables, Equals(vector<Variable>({
+      Variable("rule_A", VariableTypeNamed, seq({ i_sym(1), i_token(0) })),
+      Variable("rule_B", VariableTypeNamed, i_token(1)),
+      Variable("rule_C", VariableTypeNamed, seq({ i_token(2), i_token(1) })),
    })));

-    AssertThat(lexical_grammar.rules, Equals(vector<RuleEntry>({
-      {
-        "ab",
-        str("ab"),
-        RuleEntryTypeAnonymous
-      },
-      {
-        "cd",
-        str("cd"),
-        RuleEntryTypeAnonymous
-      },
-      {
-        "ef",
-        str("ef"),
-        RuleEntryTypeAnonymous
-      },
+    AssertThat(lexical_grammar.variables, Equals(vector<Variable>({
+      Variable("ab", VariableTypeAnonymous, str("ab")),
+      Variable("cd", VariableTypeAnonymous, str("cd")),
+      Variable("ef", VariableTypeAnonymous, str("ef")),
    })));
  });

  it("renumbers the grammar's expected conflict symbols based on any moved rules", [&]() {
    auto result = extract_tokens(InternedGrammar{{
-      {
-        "rule_A",
-        str("ok"),
-        RuleEntryTypeNamed,
-      },
-      {
-        "rule_B",
-        repeat(i_sym(0)),
-        RuleEntryTypeNamed,
-      },
-      {
-        "rule_C",
-        repeat(seq({ i_sym(0), i_sym(0) })),
-        RuleEntryTypeNamed,
-      },
+      Variable("rule_A", VariableTypeNamed, str("ok")),
+      Variable("rule_B", VariableTypeNamed, repeat(i_sym(0))),
+      Variable("rule_C", VariableTypeNamed, repeat(seq({ i_sym(0), i_sym(0) }))),
    }, { str(" ") }, { { Symbol(1), Symbol(2) } }});

-    SyntaxGrammar &syntax_grammar = get<0>(result);
+    InitialSyntaxGrammar &syntax_grammar = get<0>(result);

-    AssertThat(syntax_grammar.rules.size(), Equals<size_t>(2));
+    AssertThat(syntax_grammar.variables.size(), Equals<size_t>(2));
    AssertThat(syntax_grammar.expected_conflicts, Equals(set<set<Symbol>>({
      { Symbol(0), Symbol(1) },
    })));
@ -248,11 +144,7 @@ describe("extract_tokens", []() {
  describe("handling ubiquitous tokens", [&]() {
    it("adds inline ubiquitous tokens to the lexical grammar's separators", [&]() {
      auto result = extract_tokens(InternedGrammar{{
-        {
-          "rule_A",
-          str("x"),
-          RuleEntryTypeNamed,
-        },
+        Variable("rule_A", VariableTypeNamed, str("x")),
      }, {
        str("y"),
        pattern("\\s+"),
@ -268,22 +160,10 @@ describe("extract_tokens", []() {
    });

    it("updates ubiquitous symbols according to the new symbol numbers", [&]() {
-      auto result = extract_tokens(InternedGrammar{ {
-        {
-          "rule_A",
-          seq({ str("w"), str("x"), i_sym(1) }),
-          RuleEntryTypeNamed
-        },
-        {
-          "rule_B",
-          str("y"),
-          RuleEntryTypeNamed
-        },
-        {
-          "rule_C",
-          str("z"),
-          RuleEntryTypeNamed
-        },
+      auto result = extract_tokens(InternedGrammar{{
+        Variable("rule_A", VariableTypeNamed, seq({ str("w"), str("x"), i_sym(1) })),
+        Variable("rule_B", VariableTypeNamed, str("y")),
+        Variable("rule_C", VariableTypeNamed, str("z")),
      }, {
        i_sym(2),
      }, {}});
@ -299,16 +179,8 @@ describe("extract_tokens", []() {

    it("returns an error if any ubiquitous tokens are non-token symbols", [&]() {
      auto result = extract_tokens(InternedGrammar{{
-        {
-          "rule_A",
-          seq({ str("x"), i_sym(1) }),
-          RuleEntryTypeNamed,
-        },
-        {
-          "rule_B",
-          seq({ str("y"), str("z") }),
-          RuleEntryTypeNamed,
-        },
+        Variable("rule_A", VariableTypeNamed, seq({ str("x"), i_sym(1) })),
+        Variable("rule_B", VariableTypeNamed, seq({ str("y"), str("z") })),
      }, { i_sym(1) }, {}});

      AssertThat(get<2>(result), !Equals<const GrammarError *>(nullptr));
@ -319,16 +191,8 @@ describe("extract_tokens", []() {

    it("returns an error if any ubiquitous tokens are non-token rules", [&]() {
      auto result = extract_tokens(InternedGrammar{{
-        {
-          "rule_A",
-          str("x"),
-          RuleEntryTypeNamed,
-        },
-        {
-          "rule_B",
-          str("y"),
-          RuleEntryTypeNamed,
-        },
+        Variable("rule_A", VariableTypeNamed, str("x")),
+        Variable("rule_B", VariableTypeNamed, str("y")),
      }, { choice({ i_sym(1), blank() }) }, {}});

      AssertThat(get<2>(result), !Equals<const GrammarError *>(nullptr));
--- a/spec/compiler/prepare_grammar/flatten_grammar_spec.cc
+++ b/spec/compiler/prepare_grammar/flatten_grammar_spec.cc
@ -0,0 +1,179 @@
+#include "compiler/compiler_spec_helper.h"
+#include "compiler/prepare_grammar/flatten_grammar.h"
+#include "compiler/prepare_grammar/initial_syntax_grammar.h"
+#include "compiler/syntax_grammar.h"
+#include "compiler/rules/built_in_symbols.h"
+
+template<typename T, typename Func>
+std::vector<typename std::result_of<Func(T)>::type>
+collect(const std::vector<T> &v, Func f) {
+  vector<typename std::result_of<Func(T)>::type> result;
+  for (const T &item : v)
+    result.push_back(f(item));
+  return result;
+}
+
+START_TEST
+
+using namespace rules;
+using prepare_grammar::flatten_grammar;
+using prepare_grammar::InitialSyntaxGrammar;
+
+describe("flatten_grammar", []() {
+  InitialSyntaxGrammar input_grammar{{
+
+    // Choices within rules are extracted, resulting in multiple productions.
+    Variable("variable0", VariableTypeNamed, seq({
+      i_sym(1),
+      choice({ i_sym(2), i_sym(3) }),
+      i_sym(4),
+    })),
+
+    // When multiple precedence values are nested, the inner precedence wins.
+    Variable("variable1", VariableTypeNamed, seq({
+      i_sym(1),
+      prec(101, seq({
+        i_sym(2),
+        choice({
+          prec(102, seq({
+            i_sym(3),
+            i_sym(4)
+          }), AssociativityRight),
+          i_sym(5),
+        }),
+        i_sym(6),
+      })),
+      i_sym(7),
+    })),
+
+    // When a precedence is applied to the end of a rule, its value is assigned
+    // to the last step of the corresponding production.
+    Variable("variable2", VariableTypeHidden, seq({
+      prec(102, seq({
+        i_sym(1),
+        i_sym(2),
+      })),
+      prec(103, seq({
+        i_sym(3),
+        i_sym(4),
+      })),
+    }))
+
+  }, {}, {}};
+
+  SyntaxGrammar grammar = flatten_grammar(input_grammar);
+
+  auto get_symbol_sequences = [&](vector<Production> productions) {
+    return collect(productions, [](Production p) {
+      return collect(p, [](ProductionStep e) {
+        return e.symbol;
+      });
+    });
+  };
+
+  auto get_precedence_sequences = [&](vector<Production> productions) {
+    return collect(productions, [](Production p) {
+      return collect(p, [](ProductionStep e) {
+        return e.precedence;
+      });
+    });
+  };
+
+  auto get_associativity_sequences = [&](vector<Production> productions) {
+    return collect(productions, [](Production p) {
+      return collect(p, [](ProductionStep e) {
+        return e.associativity;
+      });
+    });
+  };
+
+  auto get_rule_id_sequences = [&](vector<Production> productions) {
+    return collect(productions, [](Production p) {
+      return collect(p, [](ProductionStep e) {
+        return e.rule_id;
+      });
+    });
+  };
+
+  it("preserves the names and types of the grammar's variables", [&]() {
+    AssertThat(grammar.variables[0].name, Equals("variable0"));
+    AssertThat(grammar.variables[1].name, Equals("variable1"));
+    AssertThat(grammar.variables[2].name, Equals("variable2"));
+
+    AssertThat(grammar.variables[0].type, Equals(VariableTypeNamed));
+    AssertThat(grammar.variables[1].type, Equals(VariableTypeNamed));
+    AssertThat(grammar.variables[2].type, Equals(VariableTypeHidden));
+  });
+
+  it("turns each variable's rule with a vector of possible symbol sequences", [&]() {
+    AssertThat(
+      get_symbol_sequences(grammar.variables[0].productions),
+      Equals(vector<vector<Symbol>>({
+        { Symbol(1), Symbol(2), Symbol(4) },
+        { Symbol(1), Symbol(3), Symbol(4) }
+      })));
+
+    AssertThat(
+      get_symbol_sequences(grammar.variables[1].productions),
+      Equals(vector<vector<Symbol>>({
+        { Symbol(1), Symbol(2), Symbol(3), Symbol(4), Symbol(6), Symbol(7) },
+        { Symbol(1), Symbol(2), Symbol(5), Symbol(6), Symbol(7) }
+      })));
+
+    AssertThat(
+      get_symbol_sequences(grammar.variables[2].productions),
+      Equals(vector<vector<Symbol>>({
+        { Symbol(1), Symbol(2), Symbol(3), Symbol(4) },
+      })));
+  });
+
+  it("associates each symbol with the precedence binding it to its previous neighbor", [&]() {
+    AssertThat(
+      get_precedence_sequences(grammar.variables[0].productions),
+      Equals(vector<vector<int>>({
+        { 0, 0, 0 },
+        { 0, 0, 0 }
+      })));
+
+    AssertThat(
+      get_precedence_sequences(grammar.variables[1].productions),
+      Equals(vector<vector<int>>({
+        { 0, 0, 101, 102, 101, 0 },
+        { 0, 0, 101, 101, 0 }
+      })));
+
+    AssertThat(
+      get_precedence_sequences(grammar.variables[2].productions),
+      Equals(vector<vector<int>>({
+        { 0, 102, 0, 103 },
+      })));
+  });
+
+  it("associates each symbol with the correct associativity annotation", [&]() {
+    Associativity none = AssociativityNone;
+
+    AssertThat(
+      get_associativity_sequences(grammar.variables[1].productions),
+      Equals(vector<vector<Associativity>>({
+        { none, none, AssociativityLeft, AssociativityRight, AssociativityLeft, none },
+        { none, none, AssociativityLeft, AssociativityLeft, none }
+      })));
+  });
+
+  it("associates each unique remaining subsequence of symbols and precedences with a rule_id", [&]() {
+    // Variable 0: only the last symbol is the same for both productions.
+    auto variable0_step_ids = get_rule_id_sequences(grammar.variables[0].productions);
+    AssertThat(variable0_step_ids[0][0], !Equals(variable0_step_ids[1][0]));
+    AssertThat(variable0_step_ids[0][1], !Equals(variable0_step_ids[1][1]));
+    AssertThat(variable0_step_ids[0][2],  Equals(variable0_step_ids[1][2]));
+
+    // Variable 1: the last *two* symbols are the same for both productions.
+    auto variable1_step_ids = get_rule_id_sequences(grammar.variables[1].productions);
+    AssertThat(variable1_step_ids[0][0], !Equals(variable1_step_ids[1][0]));
+    AssertThat(variable1_step_ids[0][1], !Equals(variable1_step_ids[1][1]));
+    AssertThat(variable1_step_ids[0][4],  Equals(variable1_step_ids[1][3]));
+    AssertThat(variable1_step_ids[0][5],  Equals(variable1_step_ids[1][4]));
+  });
+});
+
+END_TEST
--- a/spec/compiler/prepare_grammar/intern_symbols_spec.cc
+++ b/spec/compiler/prepare_grammar/intern_symbols_spec.cc
@ -19,22 +19,10 @@ describe("intern_symbols", []() {
    auto result = intern_symbols(grammar);

    AssertThat(result.second, Equals((GrammarError *)nullptr));
-    AssertThat(result.first.rules, Equals(vector<RuleEntry>({
-      {
-        "x",
-        choice({ i_sym(1), i_sym(2) }),
-        RuleEntryTypeNamed
-      },
-      {
-        "y",
-        i_sym(2),
-        RuleEntryTypeNamed,
-      },
-      {
-        "_z",
-        str("stuff"),
-        RuleEntryTypeHidden
-      },
+    AssertThat(result.first.variables, Equals(vector<Variable>({
+      Variable("x", VariableTypeNamed, choice({ i_sym(1), i_sym(2) })),
+      Variable("y", VariableTypeNamed, i_sym(2)),
+      Variable("_z", VariableTypeHidden, str("stuff")),
    })));
  });

--- a/spec/fixtures/grammars/javascript.cc
+++ b/spec/fixtures/grammars/javascript.cc
@ -300,10 +300,7 @@ extern const Grammar javascript = Grammar({
    infix_op(">", "_expression", PREC_REL) }) },

  { "type_op", choice({
-    prec(PREC_REL, seq({
-      choice({ sym("_expression"), sym("identifier") }),
-      str("in"),
-      sym("_expression") })),
+    infix_op("in", "_expression", PREC_REL),
    infix_op("instanceof", "_expression", PREC_REL),
    prefix_op("typeof", "_expression", PREC_TYPE) }) },

--- a/spec/fixtures/parsers/c.c
+++ b/spec/fixtures/parsers/c.c
--- a/spec/fixtures/parsers/golang.c
+++ b/spec/fixtures/parsers/golang.c
--- a/spec/fixtures/parsers/javascript.c
+++ b/spec/fixtures/parsers/javascript.c
--- a/spec/runtime/language_specs.cc
+++ b/spec/runtime/language_specs.cc
@ -35,7 +35,7 @@ describe("Languages", [&]() {
    describe(("The " + pair.first + " parser").c_str(), [&]() {
      before_each([&]() {
        ts_document_set_language(doc, pair.second);
-        // ts_document_set_debugger(doc, log_debugger_make(true));
+        // ts_document_set_debugger(doc, log_debugger_make(false));
      });

      for (auto &entry : test_entries_for_language(pair.first)) {
--- a/src/compiler/build_tables/build_lex_table.cc
+++ b/src/compiler/build_tables/build_lex_table.cc
@ -11,7 +11,7 @@
 #include "compiler/build_tables/get_metadata.h"
 #include "compiler/build_tables/lex_item.h"
 #include "compiler/parse_table.h"
-#include "compiler/prepared_grammar.h"
+#include "compiler/lexical_grammar.h"
 #include "compiler/rules/built_in_symbols.h"
 #include "compiler/rules/choice.h"
 #include "compiler/rules/metadata.h"
@ -66,7 +66,7 @@ class LexTableBuilder {

      else if (symbol.is_token)
        result.insert(LexItem(
-          symbol, after_separators(lex_grammar.rules[symbol.index].rule)));
+          symbol, after_separators(lex_grammar.variables[symbol.index].rule)));
    }
    return result;
  }
--- a/src/compiler/build_tables/build_lex_table.h
+++ b/src/compiler/build_tables/build_lex_table.h
@ -6,7 +6,7 @@

 namespace tree_sitter {

-class LexicalGrammar;
+struct LexicalGrammar;
 class ParseTable;

 namespace build_tables {
--- a/src/compiler/build_tables/build_parse_table.cc
+++ b/src/compiler/build_tables/build_parse_table.cc
@ -12,7 +12,8 @@
 #include "compiler/build_tables/parse_item.h"
 #include "compiler/build_tables/get_completion_status.h"
 #include "compiler/build_tables/get_metadata.h"
-#include "compiler/prepared_grammar.h"
+#include "compiler/lexical_grammar.h"
+#include "compiler/syntax_grammar.h"
 #include "compiler/rules/symbol.h"
 #include "compiler/rules/built_in_symbols.h"

@ -35,7 +36,6 @@ class ParseTableBuilder {
  const LexicalGrammar lexical_grammar;
  ParseConflictManager conflict_manager;
  unordered_map<const ParseItemSet, ParseStateId> parse_state_ids;
-  vector<vector<Symbol>> productions;
  vector<pair<ParseItemSet, ParseStateId>> item_sets_to_process;
  ParseTable parse_table;
  std::set<string> conflicts;
@ -48,11 +48,10 @@ class ParseTableBuilder {
        conflict_manager(grammar) {}

  pair<ParseTable, const GrammarError *> build() {
-    auto start_symbol = grammar.rules.empty() ? make_shared<Symbol>(0, true)
-                                              : make_shared<Symbol>(0);
-    ParseItem start_item(rules::START(), start_symbol, {});
-    add_parse_state(
-      item_set_closure(start_item, { rules::END_OF_INPUT() }, grammar));
+    ParseItem start_item(rules::START(), 0, 0, -2);
+    ParseItemSet start_item_set({ { start_item, { rules::END_OF_INPUT() } } });
+    item_set_closure(&start_item_set, grammar);
+    add_parse_state(start_item_set);

    while (!item_sets_to_process.empty()) {
      auto pair = item_sets_to_process.back();
@ -105,20 +104,41 @@ class ParseTableBuilder {
    }
  }

+  struct CompletionStatus {
+    bool is_done;
+    int precedence;
+    Associativity associativity;
+  };
+
+  CompletionStatus get_completion_status(const ParseItem &item) {
+    CompletionStatus result{ false, 0, AssociativityNone };
+    const Production &production =
+      grammar.productions(item.lhs())[item.production_index];
+    if (item.step_index == production.size()) {
+      result.is_done = true;
+      if (item.step_index > 0) {
+        const ProductionStep &step = production[item.step_index - 1];
+        result.precedence = step.precedence;
+        result.associativity = step.associativity;
+      }
+    }
+    return result;
+  }
+
  void add_reduce_actions(const ParseItemSet &item_set, ParseStateId state_id) {
    for (const auto &pair : item_set) {
      const ParseItem &item = pair.first;
      const set<Symbol> &lookahead_symbols = pair.second;

-      CompletionStatus completion_status = get_completion_status(item.rule);
+      CompletionStatus completion_status = get_completion_status(item);
      if (completion_status.is_done) {
        ParseAction action =
-          (item.lhs == rules::START())
+          (item.lhs() == rules::START())
            ? ParseAction::Accept()
-            : ParseAction::Reduce(item.lhs, item.consumed_symbols.size(),
+            : ParseAction::Reduce(Symbol(item.variable_index), item.step_index,
                                  completion_status.precedence,
                                  completion_status.associativity,
-                                  get_production_id(item.consumed_symbols));
+                                  item.production_index);

        for (const auto &lookahead_sym : lookahead_symbols)
          add_action(state_id, lookahead_sym, action, item_set);
@ -157,40 +177,42 @@ class ParseTableBuilder {
    }
  }

-  ParseAction *add_action(ParseStateId state_id, Symbol lookahead_sym,
-                          const ParseAction &action,
+  ParseAction *add_action(ParseStateId state_id, Symbol lookahead,
+                          const ParseAction &new_action,
                          const ParseItemSet &item_set) {
-    auto &current_actions = parse_table.states[state_id].actions;
-    auto current_entry = current_actions.find(lookahead_sym);
+    const auto &current_actions = parse_table.states[state_id].actions;
+    const auto &current_entry = current_actions.find(lookahead);
    if (current_entry == current_actions.end())
-      return &parse_table.set_action(state_id, lookahead_sym, action);
+      return &parse_table.set_action(state_id, lookahead, new_action);

-    const ParseAction current_action = current_entry->second[0];
+    const ParseAction old_action = current_entry->second[0];
    auto resolution =
-      conflict_manager.resolve(action, current_action, lookahead_sym);
+      conflict_manager.resolve(new_action, old_action, lookahead);

    switch (resolution.second) {
      case ConflictTypeNone:
        if (resolution.first)
-          return &parse_table.set_action(state_id, lookahead_sym, action);
+          return &parse_table.set_action(state_id, lookahead, new_action);
        break;

-      case ConflictTypeResolved:
-        if (action.type == ParseActionTypeReduce)
-          parse_table.fragile_production_ids.insert(action.production_id);
-        if (current_action.type == ParseActionTypeReduce)
-          parse_table.fragile_production_ids.insert(current_action.production_id);
+      case ConflictTypeResolved: {
        if (resolution.first)
-          return &parse_table.set_action(state_id, lookahead_sym, action);
+          return &parse_table.set_action(state_id, lookahead, new_action);
+        if (old_action.type == ParseActionTypeReduce)
+          parse_table.fragile_production_ids.insert(production_id(old_action));
+        if (new_action.type == ParseActionTypeReduce)
+          parse_table.fragile_production_ids.insert(production_id(new_action));
        break;
+      }

      case ConflictTypeUnresolved: {
-        set<Symbol> goal_symbols = item_set_goal_symbols(item_set);
-        if (has_expected_conflict(goal_symbols))
-          return &parse_table.add_action(state_id, lookahead_sym, action);
+        auto old_goal_syms = goal_symbols(item_set, old_action, lookahead);
+        auto new_goal_syms = goal_symbols(item_set, new_action, lookahead);
+        if (has_expected_conflict(old_goal_syms, new_goal_syms))
+          return &parse_table.add_action(state_id, lookahead, new_action);
        else
-          conflicts.insert(conflict_description(action, current_action,
-                                                lookahead_sym, goal_symbols));
+          conflicts.insert(conflict_description(
+            lookahead, old_action, old_goal_syms, new_action, new_goal_syms));
        break;
      }
    }
@ -198,9 +220,14 @@ class ParseTableBuilder {
    return nullptr;
  }

-  bool has_expected_conflict(const set<Symbol> &symbols) {
+  pair<Symbol, int> production_id(const ParseAction &action) {
+    return { action.symbol, action.production_id };
+  }
+
+  bool has_expected_conflict(set<Symbol> symbols1, const set<Symbol> &symbols2) {
+    symbols1.insert(symbols2.begin(), symbols2.end());
    for (const auto &conflicting_symbols : grammar.expected_conflicts)
-      if (symbols == conflicting_symbols)
+      if (symbols1 == conflicting_symbols)
        return true;
    return false;
  }
@ -209,46 +236,55 @@ class ParseTableBuilder {
    set<int> result;
    for (const auto &pair : item_set) {
      const ParseItem &item = pair.first;
-      if (!item.consumed_symbols.empty()) {
-        auto precedence_range = get_metadata(item.rule, rules::PRECEDENCE);
-        result.insert(precedence_range.min);
-        result.insert(precedence_range.max);
+      const Production &production =
+        grammar.productions(item.lhs())[item.production_index];
+      if (item.step_index > 0) {
+        if (item.step_index < production.size())
+          result.insert(production[item.step_index].precedence);
+        else
+          result.insert(production[item.step_index - 1].precedence);
      }
    }
    return result;
  }

-  set<Symbol> item_set_goal_symbols(const ParseItemSet &item_set) {
+  set<Symbol> goal_symbols(const ParseItemSet &item_set,
+                           const ParseAction &action,
+                           const Symbol &lookahead_sym) {
    set<Symbol> result;
-    for (const auto &pair : item_set) {
-      const ParseItem &item = pair.first;
-      if (!item.consumed_symbols.empty())
-        result.insert(item.lhs);
+    switch (action.type) {
+      case ParseActionTypeShift: {
+        for (const auto &pair : item_set) {
+          const ParseItem &item = pair.first;
+          const Production &production =
+            grammar.productions(item.lhs())[item.production_index];
+          if (item.step_index < production.size() &&
+              production[item.step_index].symbol == lookahead_sym)
+            result.insert(item.lhs());
+        }
+        break;
+      }
+
+      case ParseActionTypeReduce:
+        result.insert(action.symbol);
+        break;
+
+      default:
+        break;
    }
    return result;
  }

-  string conflict_description(const ParseAction &new_action,
+  string conflict_description(const Symbol &lookahead,
                              const ParseAction &old_action,
-                              const rules::Symbol &symbol,
-                              const set<Symbol> &goal_symbols) const {
-    string symbols_string;
-    bool started = false;
-    for (const auto &symbol : goal_symbols) {
-      if (started)
-        symbols_string += ", ";
-      symbols_string += symbol_name(symbol);
-      started = true;
-    }
-
-    return "Within: " + symbols_string +
-           "\n"
-           "Lookahead: " +
-           symbol_name(symbol) + "\n" +
+                              const set<Symbol> &old_goal_symbols,
+                              const ParseAction &new_action,
+                              const set<Symbol> &new_goal_symbols) const {
+    return "Lookahead: " + symbol_name(lookahead) + "\n" +
           "Possible Actions:\n"
           "* " +
-           action_description(old_action) + "\n" + "* " +
-           action_description(new_action);
+           action_description(old_action, old_goal_symbols) + "\n" + "* " +
+           action_description(new_action, new_goal_symbols);
  }

  string symbol_name(const rules::Symbol &symbol) const {
@ -260,20 +296,31 @@ class ParseTableBuilder {
      else
        return "";
    } else if (symbol.is_token) {
-      return lexical_grammar.rules[symbol.index].name;
+      return lexical_grammar.variables[symbol.index].name;
    } else {
-      return grammar.rules[symbol.index].name;
+      return grammar.variables[symbol.index].name;
    }
  }

-  string action_description(const ParseAction &action) const {
+  string action_description(const ParseAction &action,
+                            const set<Symbol> &goal_symbols) const {
+    string symbols_string;
+    bool started = false;
+    for (const auto &symbol : goal_symbols) {
+      if (started)
+        symbols_string += ", ";
+      symbols_string += symbol_name(symbol);
+      started = true;
+    }
+
    string result;

    switch (action.type) {
      case ParseActionTypeReduce: {
        result = "Reduce";
-        for (const rules::Symbol &symbol : productions[action.production_id])
-          result += " " + symbol_name(symbol);
+        for (const ProductionStep &step :
+             grammar.productions(action.symbol)[action.production_id])
+          result += " " + symbol_name(step.symbol);
        result += " -> " + symbol_name(action.symbol);
        break;
      }
@ -297,17 +344,6 @@ class ParseTableBuilder {

    return result;
  }
-
-  size_t get_production_id(const vector<rules::Symbol> &symbols) {
-    auto begin = productions.begin();
-    auto end = productions.end();
-    auto iter = find(begin, end, symbols);
-    if (iter == end) {
-      productions.push_back(symbols);
-      return productions.size() - 1;
-    }
-    return iter - begin;
-  }
 };

 pair<ParseTable, const GrammarError *> build_parse_table(
--- a/src/compiler/build_tables/build_parse_table.h
+++ b/src/compiler/build_tables/build_parse_table.h
@ -8,8 +8,8 @@

 namespace tree_sitter {

-class SyntaxGrammar;
-class LexicalGrammar;
+struct SyntaxGrammar;
+struct LexicalGrammar;

 namespace build_tables {

--- a/src/compiler/build_tables/build_tables.cc
+++ b/src/compiler/build_tables/build_tables.cc
@ -2,7 +2,8 @@
 #include <tuple>
 #include "compiler/build_tables/build_lex_table.h"
 #include "compiler/build_tables/build_parse_table.h"
-#include "compiler/prepared_grammar.h"
+#include "compiler/syntax_grammar.h"
+#include "compiler/lexical_grammar.h"

 namespace tree_sitter {
 namespace build_tables {
--- a/src/compiler/build_tables/build_tables.h
+++ b/src/compiler/build_tables/build_tables.h
@ -10,8 +10,8 @@

 namespace tree_sitter {

-class SyntaxGrammar;
-class LexicalGrammar;
+struct SyntaxGrammar;
+struct LexicalGrammar;

 namespace build_tables {

--- a/src/compiler/build_tables/first_symbols.cc
+++ b/src/compiler/build_tables/first_symbols.cc
@ -1,67 +0,0 @@
-#include "compiler/build_tables/first_symbols.h"
-#include "compiler/build_tables/rule_can_be_blank.h"
-#include "compiler/prepared_grammar.h"
-#include "compiler/rules/choice.h"
-#include "compiler/rules/metadata.h"
-#include "compiler/rules/seq.h"
-#include "compiler/rules/symbol.h"
-#include "compiler/rules/visitor.h"
-#include "tree_sitter/compiler.h"
-
-namespace tree_sitter {
-namespace build_tables {
-
-using std::set;
-using rules::Symbol;
-
-class FirstSymbols : public rules::RuleFn<set<Symbol>> {
-  const SyntaxGrammar *grammar;
-  set<Symbol> visited_symbols;
-
- public:
-  explicit FirstSymbols(const SyntaxGrammar *grammar) : grammar(grammar) {}
-
- private:
-  set<Symbol> apply_to(const Symbol *rule) {
-    auto insertion_result = visited_symbols.insert(*rule);
-    if (!insertion_result.second)
-      return set<Symbol>();
-
-    set<Symbol> result({ *rule });
-    if (!rule->is_token) {
-      set<Symbol> &&symbols = apply(grammar->rules[rule->index].rule);
-      result.insert(symbols.begin(), symbols.end());
-    }
-
-    return result;
-  }
-
-  set<Symbol> apply_to(const rules::Metadata *rule) {
-    return apply(rule->rule);
-  }
-
-  set<Symbol> apply_to(const rules::Choice *rule) {
-    set<Symbol> result;
-    for (const auto &element : rule->elements) {
-      auto &&element_symbols = apply(element);
-      result.insert(element_symbols.begin(), element_symbols.end());
-    }
-    return result;
-  }
-
-  set<Symbol> apply_to(const rules::Seq *rule) {
-    auto &&result = apply(rule->left);
-    if (rule_can_be_blank(rule->left, *grammar)) {
-      auto &&right_symbols = apply(rule->right);
-      result.insert(right_symbols.begin(), right_symbols.end());
-    }
-    return result;
-  }
-};
-
-set<Symbol> first_symbols(const rule_ptr &rule, const SyntaxGrammar &grammar) {
-  return FirstSymbols(&grammar).apply(rule);
-}
-
-}  // namespace build_tables
-}  // namespace tree_sitter
--- a/src/compiler/build_tables/first_symbols.h
+++ b/src/compiler/build_tables/first_symbols.h
@ -1,24 +0,0 @@
-#ifndef COMPILER_BUILD_TABLES_FIRST_SYMBOLS_H_
-#define COMPILER_BUILD_TABLES_FIRST_SYMBOLS_H_
-
-#include <set>
-#include "compiler/rules/symbol.h"
-#include "tree_sitter/compiler.h"
-
-namespace tree_sitter {
-
-class SyntaxGrammar;
-
-namespace build_tables {
-
-/*
- *  Returns the set of symbols that can appear at the beginning of a sentential
- *  form derivable from a given rule in a given grammar.
- */
-std::set<rules::Symbol> first_symbols(const rule_ptr &rule,
-                                      const SyntaxGrammar &grammar);
-
-}  // namespace build_tables
-}  // namespace tree_sitter
-
-#endif  // COMPILER_BUILD_TABLES_FIRST_SYMBOLS_H_
--- a/src/compiler/build_tables/item_set_closure.cc
+++ b/src/compiler/build_tables/item_set_closure.cc
@ -3,11 +3,10 @@
 #include <vector>
 #include <utility>
 #include "tree_sitter/compiler.h"
-#include "compiler/build_tables/first_symbols.h"
 #include "compiler/build_tables/rule_transitions.h"
 #include "compiler/build_tables/rule_can_be_blank.h"
 #include "compiler/build_tables/item.h"
-#include "compiler/prepared_grammar.h"
+#include "compiler/syntax_grammar.h"

 namespace tree_sitter {
 namespace build_tables {
@ -17,45 +16,63 @@ using std::vector;
 using std::pair;
 using rules::Symbol;

-const ParseItemSet item_set_closure(const ParseItem &starting_item,
-                                    const set<Symbol> &starting_lookahead_symbols,
-                                    const SyntaxGrammar &grammar) {
-  ParseItemSet result;
+void item_set_closure(ParseItemSet *item_set, const SyntaxGrammar &grammar) {
  vector<pair<ParseItem, set<Symbol>>> items_to_process;
-  items_to_process.push_back({ starting_item, starting_lookahead_symbols });
+  items_to_process.insert(items_to_process.end(), item_set->begin(),
+                          item_set->end());
+  item_set->clear();

  while (!items_to_process.empty()) {
    ParseItem item = items_to_process.back().first;
    set<Symbol> new_lookahead_symbols = items_to_process.back().second;
    items_to_process.pop_back();

-    set<Symbol> &lookahead_symbols = result[item];
+    set<Symbol> &lookahead_symbols = item_set->operator[](item);
    size_t previous_size = lookahead_symbols.size();
    lookahead_symbols.insert(new_lookahead_symbols.begin(),
                             new_lookahead_symbols.end());
-
    if (lookahead_symbols.size() == previous_size)
      continue;

-    for (const auto &pair : sym_transitions(item.rule)) {
-      const Symbol &symbol = pair.first;
-      const rule_ptr &next_rule = pair.second;
+    const Production &item_production =
+      grammar.productions(item.lhs())[item.production_index];

-      if (symbol.is_token || symbol.is_built_in())
-        continue;
+    if (item.step_index == item_production.size())
+      continue;

-      set<Symbol> next_lookahead_symbols = first_symbols(next_rule, grammar);
-      if (rule_can_be_blank(next_rule, grammar))
-        next_lookahead_symbols.insert(lookahead_symbols.begin(),
-                                      lookahead_symbols.end());
+    Symbol symbol = item_production[item.step_index].symbol;

-      items_to_process.push_back(
-        { ParseItem(symbol, grammar.rules[symbol.index].rule, {}),
-          next_lookahead_symbols });
+    if (symbol.is_token || symbol.is_built_in())
+      continue;
+
+    set<Symbol> next_lookahead_symbols;
+    unsigned int next_step = item.step_index + 1;
+    if (next_step == item_production.size()) {
+      next_lookahead_symbols = lookahead_symbols;
+    } else {
+      vector<Symbol> symbols_to_process({ item_production[next_step].symbol });
+
+      while (!symbols_to_process.empty()) {
+        Symbol following_symbol = symbols_to_process.back();
+        symbols_to_process.pop_back();
+        if (!next_lookahead_symbols.insert(following_symbol).second)
+          continue;
+
+        for (const auto &production : grammar.productions(following_symbol))
+          if (!production.empty())
+            symbols_to_process.push_back(production[0].symbol);
+      }
+    }
+
+    size_t i = 0;
+    for (const Production &production : grammar.productions(symbol)) {
+      if (!production.empty())
+        items_to_process.push_back(
+          { ParseItem(symbol, i, 0, production[0].rule_id),
+            next_lookahead_symbols });
+      i++;
    }
  }
-
-  return result;
 }

 }  // namespace build_tables
--- a/src/compiler/build_tables/item_set_closure.h
+++ b/src/compiler/build_tables/item_set_closure.h
@ -1,19 +1,16 @@
 #ifndef COMPILER_BUILD_TABLES_ITEM_SET_CLOSURE_H_
 #define COMPILER_BUILD_TABLES_ITEM_SET_CLOSURE_H_

-#include <set>
 #include "compiler/build_tables/parse_item.h"
 #include "compiler/rules/symbol.h"

 namespace tree_sitter {

-class SyntaxGrammar;
+struct SyntaxGrammar;

 namespace build_tables {

-const ParseItemSet item_set_closure(const ParseItem &,
-                                    const std::set<rules::Symbol> &,
-                                    const SyntaxGrammar &);
+void item_set_closure(ParseItemSet *, const SyntaxGrammar &);

 }  // namespace build_tables
 }  // namespace tree_sitter
--- a/src/compiler/build_tables/item_set_transitions.cc
+++ b/src/compiler/build_tables/item_set_transitions.cc
@ -4,7 +4,7 @@
 #include "compiler/build_tables/merge_transitions.h"
 #include "compiler/build_tables/parse_item.h"
 #include "compiler/build_tables/rule_transitions.h"
-#include "compiler/prepared_grammar.h"
+#include "compiler/syntax_grammar.h"
 #include "compiler/rules/symbol.h"

 namespace tree_sitter {
@ -22,20 +22,23 @@ map<Symbol, ParseItemSet> sym_transitions(const ParseItemSet &item_set,
  for (const auto &pair : item_set) {
    const ParseItem &item = pair.first;
    const set<Symbol> &lookahead_symbols = pair.second;
-    for (auto &transition : sym_transitions(item.rule)) {
-      vector<Symbol> consumed_symbols(item.consumed_symbols);
-      consumed_symbols.push_back(transition.first);
-      ParseItem new_item(item.lhs, transition.second, consumed_symbols);
-      merge_sym_transition<ParseItemSet>(
-        &result, { transition.first,
-                   item_set_closure(new_item, lookahead_symbols, grammar) },
-        [](ParseItemSet *left, const ParseItemSet *right) {
-          for (auto &pair : *right)
-            left->operator[](pair.first)
-              .insert(pair.second.begin(), pair.second.end());
-        });
-    }
+    const Production &production =
+      grammar.productions(item.lhs())[item.production_index];
+    if (item.step_index == production.size())
+      continue;
+
+    const Symbol &symbol = production[item.step_index].symbol;
+    unsigned int step = item.step_index + 1;
+    int rule_id = step < production.size() ? production[step].rule_id : 0;
+    ParseItem new_item(item.lhs(), item.production_index, step, rule_id);
+
+    result[symbol][new_item].insert(lookahead_symbols.begin(),
+                                    lookahead_symbols.end());
  }
+
+  for (auto &pair : result)
+    item_set_closure(&pair.second, grammar);
+
  return result;
 }

--- a/src/compiler/build_tables/item_set_transitions.h
+++ b/src/compiler/build_tables/item_set_transitions.h
@ -7,7 +7,7 @@

 namespace tree_sitter {

-class SyntaxGrammar;
+struct SyntaxGrammar;

 namespace rules {
 class CharacterSet;
--- a/src/compiler/build_tables/lex_conflict_manager.h
+++ b/src/compiler/build_tables/lex_conflict_manager.h
@ -2,7 +2,7 @@
 #define COMPILER_BUILD_TABLES_LEX_CONFLICT_MANAGER_H_

 #include "tree_sitter/compiler.h"
-#include "compiler/prepared_grammar.h"
+#include "compiler/lexical_grammar.h"

 namespace tree_sitter {

--- a/src/compiler/build_tables/parse_conflict_manager.h
+++ b/src/compiler/build_tables/parse_conflict_manager.h
@ -3,7 +3,7 @@

 #include <utility>
 #include "tree_sitter/compiler.h"
-#include "compiler/prepared_grammar.h"
+#include "compiler/syntax_grammar.h"
 #include "compiler/build_tables/parse_item.h"

 namespace tree_sitter {
--- a/src/compiler/build_tables/parse_item.cc
+++ b/src/compiler/build_tables/parse_item.cc
@ -1,34 +1,42 @@
 #include "compiler/build_tables/parse_item.h"
 #include <string>
+#include "compiler/syntax_grammar.h"
 #include "tree_sitter/compiler.h"

 namespace tree_sitter {
 namespace build_tables {

 using std::string;
-using std::vector;
+using std::to_string;
 using std::ostream;
+using rules::Symbol;

-ParseItem::ParseItem(const rules::Symbol &lhs, const rule_ptr rule,
-                     const vector<rules::Symbol> &consumed_symbols)
-    : Item(lhs, rule), consumed_symbols(consumed_symbols) {}
+ParseItem::ParseItem(const Symbol &lhs, unsigned int production_index,
+                     unsigned int step_index, int rule_id)
+    : variable_index(lhs.index),
+      production_index(production_index),
+      step_index(step_index),
+      rule_id(rule_id) {}

 bool ParseItem::operator==(const ParseItem &other) const {
-  return (lhs == other.lhs) &&
-         (consumed_symbols.size() == other.consumed_symbols.size()) &&
-         (rule == other.rule || rule->operator==(*other.rule));
+  return (variable_index == other.variable_index) &&
+         (rule_id == other.rule_id) && (step_index == other.step_index);
 }

 bool ParseItem::operator<(const ParseItem &other) const {
-  if (lhs < other.lhs)
+  if (variable_index < other.variable_index)
    return true;
-  if (other.lhs < lhs)
+  if (variable_index > other.variable_index)
    return false;
-  if (consumed_symbols.size() < other.consumed_symbols.size())
+  if (step_index < other.step_index)
    return true;
-  if (other.consumed_symbols.size() < consumed_symbols.size())
+  if (step_index > other.step_index)
    return false;
-  return rule < other.rule;
+  return rule_id < other.rule_id;
+}
+
+Symbol ParseItem::lhs() const {
+  return Symbol(variable_index);
 }

 }  // namespace build_tables
--- a/src/compiler/build_tables/parse_item.h
+++ b/src/compiler/build_tables/parse_item.h
@ -10,13 +10,17 @@
 namespace tree_sitter {
 namespace build_tables {

-class ParseItem : public Item {
+class ParseItem {
 public:
-  ParseItem(const rules::Symbol &lhs, rule_ptr rule,
-            const std::vector<rules::Symbol> &consumed_symbols);
+  ParseItem(const rules::Symbol &, unsigned int, unsigned int, int);
  bool operator==(const ParseItem &other) const;
  bool operator<(const ParseItem &other) const;
-  std::vector<rules::Symbol> consumed_symbols;
+  rules::Symbol lhs() const;
+
+  int variable_index;
+  unsigned int production_index;
+  unsigned int step_index;
+  int rule_id;
 };

 typedef std::map<ParseItem, std::set<rules::Symbol>> ParseItemSet;
@ -29,9 +33,8 @@ namespace std {
 template <>
 struct hash<tree_sitter::build_tables::ParseItem> {
  size_t operator()(const tree_sitter::build_tables::ParseItem &item) const {
-    return hash<tree_sitter::rules::Symbol>()(item.lhs) ^
-           hash<tree_sitter::rule_ptr>()(item.rule) ^
-           hash<size_t>()(item.consumed_symbols.size());
+    return hash<unsigned int>()(item.variable_index) ^
+           hash<int>()(item.rule_id) ^ hash<unsigned int>()(item.step_index);
  }
 };

--- a/src/compiler/build_tables/rule_can_be_blank.cc
+++ b/src/compiler/build_tables/rule_can_be_blank.cc
@ -1,7 +1,5 @@
 #include "compiler/build_tables/rule_can_be_blank.h"
-#include <set>
 #include "tree_sitter/compiler.h"
-#include "compiler/prepared_grammar.h"
 #include "compiler/rules/symbol.h"
 #include "compiler/rules/visitor.h"
 #include "compiler/rules/seq.h"
@ -12,8 +10,6 @@
 namespace tree_sitter {
 namespace build_tables {

-using std::set;
-
 class CanBeBlank : public rules::RuleFn<bool> {
 protected:
  bool apply_to(const rules::Blank *) {
@ -40,35 +36,9 @@ class CanBeBlank : public rules::RuleFn<bool> {
  }
 };

-class CanBeBlankRecursive : public CanBeBlank {
-  const SyntaxGrammar *grammar;
-  set<rules::Symbol> visited_symbols;
-  using CanBeBlank::visit;
-
- public:
-  explicit CanBeBlankRecursive(const SyntaxGrammar *grammar)
-      : grammar(grammar) {}
-
- private:
-  using CanBeBlank::apply_to;
-
-  bool apply_to(const rules::Symbol *rule) {
-    if (visited_symbols.find(*rule) == visited_symbols.end()) {
-      visited_symbols.insert(*rule);
-      return !rule->is_token && apply(grammar->rules[rule->index].rule);
-    } else {
-      return false;
-    }
-  }
-};
-
 bool rule_can_be_blank(const rule_ptr &rule) {
  return CanBeBlank().apply(rule);
 }

-bool rule_can_be_blank(const rule_ptr &rule, const SyntaxGrammar &grammar) {
-  return CanBeBlankRecursive(&grammar).apply(rule);
-}
-
 }  // namespace build_tables
 }  // namespace tree_sitter
--- a/src/compiler/build_tables/rule_can_be_blank.h
+++ b/src/compiler/build_tables/rule_can_be_blank.h
@ -4,13 +4,9 @@
 #include "tree_sitter/compiler.h"

 namespace tree_sitter {
-
-class SyntaxGrammar;
-
 namespace build_tables {

 bool rule_can_be_blank(const rule_ptr &rule);
-bool rule_can_be_blank(const rule_ptr &rule, const SyntaxGrammar &grammar);

 }  // namespace build_tables
 }  // namespace tree_sitter
--- a/src/compiler/compile.cc
+++ b/src/compiler/compile.cc
@ -2,7 +2,8 @@
 #include "compiler/prepare_grammar/prepare_grammar.h"
 #include "compiler/build_tables/build_tables.h"
 #include "compiler/generate_code/c_code.h"
-#include "compiler/prepared_grammar.h"
+#include "compiler/syntax_grammar.h"
+#include "compiler/lexical_grammar.h"

 namespace tree_sitter {

--- a/src/compiler/generate_code/c_code.cc
+++ b/src/compiler/generate_code/c_code.cc
@ -7,7 +7,8 @@
 #include "compiler/generate_code/c_code.h"
 #include "compiler/lex_table.h"
 #include "compiler/parse_table.h"
-#include "compiler/prepared_grammar.h"
+#include "compiler/syntax_grammar.h"
+#include "compiler/lexical_grammar.h"
 #include "compiler/rules/built_in_symbols.h"
 #include "compiler/util/string_helpers.h"

@ -15,19 +16,15 @@ namespace tree_sitter {
 namespace generate_code {
 using std::function;
 using std::map;
+using std::pair;
 using std::set;
 using std::string;
 using std::to_string;
 using std::vector;
 using util::escape_char;

-static RuleEntry ERROR_ENTRY{
-  "error", rule_ptr(), RuleEntryTypeNamed,
-};
-
-static RuleEntry EOF_ENTRY{
-  "end", rule_ptr(), RuleEntryTypeAuxiliary,
-};
+static Variable ERROR_ENTRY("error", VariableTypeNamed, rule_ptr());
+static Variable EOF_ENTRY("end", VariableTypeNamed, rule_ptr());

 static const map<char, string> REPLACEMENTS({
  { '~', "TILDE" },
@ -149,15 +146,15 @@ class CCodeGenerator {
      for (const auto &symbol : parse_table.symbols) {
        line("[" + symbol_id(symbol) + "] = ");

-        switch (entry_for_symbol(symbol).type) {
-          case RuleEntryTypeNamed:
+        switch (symbol_type(symbol)) {
+          case VariableTypeNamed:
            add("TSNodeTypeNamed,");
            break;
-          case RuleEntryTypeAnonymous:
+          case VariableTypeAnonymous:
            add("TSNodeTypeAnonymous,");
            break;
-          case RuleEntryTypeHidden:
-          case RuleEntryTypeAuxiliary:
+          case VariableTypeHidden:
+          case VariableTypeAuxiliary:
            add("TSNodeTypeHidden,");
            break;
        }
@ -338,15 +335,18 @@ class CCodeGenerator {
  }

  string symbol_id(const rules::Symbol &symbol) {
-    RuleEntry entry = entry_for_symbol(symbol);
-    string name = sanitize_name(entry.name);
-    if (symbol.is_built_in())
-      return "ts_builtin_sym_" + name;
+    if (symbol == rules::ERROR())
+      return "ts_builtin_sym_error";
+    if (symbol == rules::END_OF_INPUT())
+      return "ts_builtin_sym_end";

-    switch (entry.type) {
-      case RuleEntryTypeAuxiliary:
+    auto entry = entry_for_symbol(symbol);
+    string name = sanitize_name(entry.first);
+
+    switch (entry.second) {
+      case VariableTypeAuxiliary:
        return "aux_sym_" + name;
-      case RuleEntryTypeAnonymous:
+      case VariableTypeAnonymous:
        return "anon_sym_" + name;
      default:
        return "sym_" + name;
@ -358,26 +358,30 @@ class CCodeGenerator {
      return "ERROR";
    if (symbol == rules::END_OF_INPUT())
      return "END";
-    return entry_for_symbol(symbol).name;
+    return entry_for_symbol(symbol).first;
  }

-  const RuleEntry &entry_for_symbol(const rules::Symbol &symbol) {
+  VariableType symbol_type(const rules::Symbol &symbol) {
    if (symbol == rules::ERROR())
-      return ERROR_ENTRY;
+      return VariableTypeNamed;
    if (symbol == rules::END_OF_INPUT())
-      return EOF_ENTRY;
-    if (symbol.is_token)
-      return lexical_grammar.rules[symbol.index];
-    else
-      return syntax_grammar.rules[symbol.index];
+      return VariableTypeHidden;
+    return entry_for_symbol(symbol).second;
  }

-  string rule_name(const rules::Symbol &symbol) {
-    return entry_for_symbol(symbol).name;
+  pair<string, VariableType> entry_for_symbol(const rules::Symbol &symbol) {
+    if (symbol.is_token) {
+      const Variable &variable = lexical_grammar.variables[symbol.index];
+      return { variable.name, variable.type };
+    } else {
+      const SyntaxVariable &variable = syntax_grammar.variables[symbol.index];
+      return { variable.name, variable.type };
+    }
  }

  bool reduce_action_is_fragile(const ParseAction &action) const {
-    return parse_table.fragile_production_ids.find(action.production_id) !=
+    return parse_table.fragile_production_ids.find(
+             { action.symbol, action.production_id }) !=
           parse_table.fragile_production_ids.end();
  }

--- a/src/compiler/generate_code/c_code.h
+++ b/src/compiler/generate_code/c_code.h
@ -5,10 +5,10 @@

 namespace tree_sitter {

+struct LexicalGrammar;
+struct SyntaxGrammar;
 class LexTable;
-class LexicalGrammar;
 class ParseTable;
-class SyntaxGrammar;

 namespace generate_code {

--- a/src/compiler/lexical_grammar.h
+++ b/src/compiler/lexical_grammar.h
@ -0,0 +1,19 @@
+#ifndef COMPILER_LEXICAL_GRAMMAR_H_
+#define COMPILER_LEXICAL_GRAMMAR_H_
+
+#include <vector>
+#include <string>
+#include <set>
+#include "tree_sitter/compiler.h"
+#include "compiler/variable.h"
+
+namespace tree_sitter {
+
+struct LexicalGrammar {
+  std::vector<Variable> variables;
+  std::vector<rule_ptr> separators;
+};
+
+}  // namespace tree_sitter
+
+#endif  // COMPILER_LEXICAL_GRAMMAR_H_
--- a/src/compiler/parse_table.cc
+++ b/src/compiler/parse_table.cc
@ -60,7 +60,7 @@ ParseAction ParseAction::ReduceExtra(Symbol symbol) {

 ParseAction ParseAction::Reduce(Symbol symbol, size_t consumed_symbol_count,
                                int precedence, Associativity associativity,
-                                int production_id) {
+                                unsigned int production_id) {
  return ParseAction(ParseActionTypeReduce, 0, symbol, consumed_symbol_count,
                     { precedence }, associativity, production_id);
 }
--- a/src/compiler/parse_table.h
+++ b/src/compiler/parse_table.h
@ -35,7 +35,8 @@ class ParseAction {
  static ParseAction Shift(ParseStateId state_index,
                           std::set<int> precedence_values);
  static ParseAction Reduce(rules::Symbol symbol, size_t consumed_symbol_count,
-                            int precedence, Associativity, int production_id);
+                            int precedence, Associativity,
+                            unsigned int production_id);
  static ParseAction ShiftExtra();
  static ParseAction ReduceExtra(rules::Symbol symbol);
  bool operator==(const ParseAction &) const;
@ -87,7 +88,7 @@ class ParseTable {

  std::vector<ParseState> states;
  std::set<rules::Symbol> symbols;
-  std::set<int> fragile_production_ids;
+  std::set<std::pair<rules::Symbol, unsigned int>> fragile_production_ids;
 };

 }  // namespace tree_sitter
--- a/src/compiler/prepare_grammar/expand_repeats.cc
+++ b/src/compiler/prepare_grammar/expand_repeats.cc
@ -2,7 +2,7 @@
 #include <vector>
 #include <string>
 #include <utility>
-#include "compiler/prepared_grammar.h"
+#include "compiler/prepare_grammar/initial_syntax_grammar.h"
 #include "compiler/rules/visitor.h"
 #include "compiler/rules/seq.h"
 #include "compiler/rules/symbol.h"
@ -42,12 +42,10 @@ class ExpandRepeats : public rules::IdentityRuleFn {
      rule_name + string("_repeat") + to_string(++repeat_count);
    Symbol repeat_symbol(offset + index);
    existing_repeats.push_back({ rule->copy(), repeat_symbol });
-    aux_rules.push_back({
-      helper_rule_name,
+    aux_rules.push_back(Variable(
+      helper_rule_name, VariableTypeAuxiliary,
      Seq::build({ inner_rule, Choice::build({ repeat_symbol.copy(),
-                                               make_shared<Blank>() }) }),
-      RuleEntryTypeAuxiliary,
-    });
+                                               make_shared<Blank>() }) })));
    return repeat_symbol.copy();
  }

@ -64,21 +62,21 @@ class ExpandRepeats : public rules::IdentityRuleFn {
    return apply(rule);
  }

-  vector<RuleEntry> aux_rules;
+  vector<Variable> aux_rules;
 };

-SyntaxGrammar expand_repeats(const SyntaxGrammar &grammar) {
-  SyntaxGrammar result;
-  result.rules = grammar.rules;
+InitialSyntaxGrammar expand_repeats(const InitialSyntaxGrammar &grammar) {
+  InitialSyntaxGrammar result;
+  result.variables = grammar.variables;
  result.ubiquitous_tokens = grammar.ubiquitous_tokens;
  result.expected_conflicts = grammar.expected_conflicts;

-  ExpandRepeats expander(result.rules.size());
-  for (auto &rule_entry : result.rules)
-    rule_entry.rule = expander.expand(rule_entry.rule, rule_entry.name);
+  ExpandRepeats expander(result.variables.size());
+  for (auto &variable : result.variables)
+    variable.rule = expander.expand(variable.rule, variable.name);

-  result.rules.insert(result.rules.end(), expander.aux_rules.begin(),
-                      expander.aux_rules.end());
+  result.variables.insert(result.variables.end(), expander.aux_rules.begin(),
+                          expander.aux_rules.end());
  return result;
 }

--- a/src/compiler/prepare_grammar/expand_repeats.h
+++ b/src/compiler/prepare_grammar/expand_repeats.h
@ -4,12 +4,11 @@
 #include "tree_sitter/compiler.h"

 namespace tree_sitter {
-
-class SyntaxGrammar;
-
 namespace prepare_grammar {

-SyntaxGrammar expand_repeats(const SyntaxGrammar &);
+struct InitialSyntaxGrammar;
+
+InitialSyntaxGrammar expand_repeats(const InitialSyntaxGrammar &);

 }  // namespace prepare_grammar
 }  // namespace tree_sitter
--- a/src/compiler/prepare_grammar/expand_tokens.cc
+++ b/src/compiler/prepare_grammar/expand_tokens.cc
@ -3,7 +3,7 @@
 #include <string>
 #include <utility>
 #include <map>
-#include "compiler/prepared_grammar.h"
+#include "compiler/lexical_grammar.h"
 #include "compiler/rules/visitor.h"
 #include "compiler/rules/pattern.h"
 #include "compiler/rules/string.h"
@ -68,11 +68,11 @@ pair<LexicalGrammar, const GrammarError *> expand_tokens(
  LexicalGrammar result;
  ExpandTokens expander;

-  for (auto &entry : grammar.rules) {
-    auto rule = expander.apply(entry.rule);
+  for (const Variable &variable : grammar.variables) {
+    auto rule = expander.apply(variable.rule);
    if (expander.error)
      return { result, expander.error };
-    result.rules.push_back({ entry.name, rule, entry.type });
+    result.variables.push_back(Variable(variable.name, variable.type, rule));
  }

  for (auto &sep : grammar.separators) {
--- a/src/compiler/prepare_grammar/expand_tokens.h
+++ b/src/compiler/prepare_grammar/expand_tokens.h
@ -6,7 +6,7 @@

 namespace tree_sitter {

-class LexicalGrammar;
+struct LexicalGrammar;

 namespace prepare_grammar {

--- a/src/compiler/prepare_grammar/extract_choices.cc
+++ b/src/compiler/prepare_grammar/extract_choices.cc
@ -0,0 +1,57 @@
+#include "compiler/prepare_grammar/extract_choices.h"
+#include <vector>
+#include <memory>
+#include "compiler/rules/visitor.h"
+#include "compiler/rules/seq.h"
+#include "compiler/rules/choice.h"
+#include "compiler/rules/metadata.h"
+#include "compiler/rules/repeat.h"
+
+namespace tree_sitter {
+namespace prepare_grammar {
+
+using std::make_shared;
+using std::vector;
+
+class ExtractChoices : public rules::RuleFn<vector<rule_ptr>> {
+  vector<rule_ptr> default_apply(const Rule *rule) {
+    return vector<rule_ptr>({ rule->copy() });
+  }
+
+  vector<rule_ptr> apply_to(const rules::Seq *rule) {
+    vector<rule_ptr> result;
+    for (auto left_entry : apply(rule->left))
+      for (auto right_entry : apply(rule->right))
+        result.push_back(rules::Seq::build({ left_entry, right_entry }));
+    return result;
+  }
+
+  vector<rule_ptr> apply_to(const rules::Metadata *rule) {
+    vector<rule_ptr> result;
+    for (auto entry : apply(rule->rule))
+      result.push_back(make_shared<rules::Metadata>(entry, rule->value));
+    return result;
+  }
+
+  vector<rule_ptr> apply_to(const rules::Choice *rule) {
+    vector<rule_ptr> result;
+    for (auto element : rule->elements)
+      for (auto entry : apply(element))
+        result.push_back(entry);
+    return result;
+  }
+
+  vector<rule_ptr> apply_to(const rules::Repeat *rule) {
+    vector<rule_ptr> result;
+    for (auto element : apply(rule->content))
+      result.push_back(make_shared<rules::Repeat>(element));
+    return result;
+  }
+};
+
+std::vector<rule_ptr> extract_choices(const rule_ptr &rule) {
+  return ExtractChoices().apply(rule);
+}
+
+}  // namespace prepare_grammar
+}  // namespace tree_sitter
--- a/src/compiler/prepare_grammar/extract_choices.h
+++ b/src/compiler/prepare_grammar/extract_choices.h
@ -0,0 +1,15 @@
+#ifndef COMPILER_PREPARE_GRAMMAR_EXTRACT_CHOICES_H_
+#define COMPILER_PREPARE_GRAMMAR_EXTRACT_CHOICES_H_
+
+#include <vector>
+#include "tree_sitter/compiler.h"
+
+namespace tree_sitter {
+namespace prepare_grammar {
+
+std::vector<rule_ptr> extract_choices(const rule_ptr &);
+
+}  // namespace prepare_grammar
+}  // namespace tree_sitter
+
+#endif  // COMPILER_PREPARE_GRAMMAR_EXTRACT_CHOICES_H_
--- a/src/compiler/prepare_grammar/extract_tokens.cc
+++ b/src/compiler/prepare_grammar/extract_tokens.cc
@ -5,7 +5,8 @@
 #include <string>
 #include <tuple>
 #include "tree_sitter/compiler.h"
-#include "compiler/prepared_grammar.h"
+#include "compiler/lexical_grammar.h"
+#include "compiler/prepare_grammar/initial_syntax_grammar.h"
 #include "compiler/rules/visitor.h"
 #include "compiler/rules/symbol.h"
 #include "compiler/rules/string.h"
@ -56,7 +57,7 @@ class SymbolReplacer : public rules::IdentityRuleFn {
 class TokenExtractor : public rules::IdentityRuleFn {
  using rules::IdentityRuleFn::apply_to;

-  rule_ptr apply_to_token(const Rule *input, RuleEntryType entry_type) {
+  rule_ptr apply_to_token(const Rule *input, VariableType entry_type) {
    for (size_t i = 0; i < tokens.size(); i++)
      if (tokens[i].rule->operator==(*input)) {
        token_usage_counts[i]++;
@ -65,31 +66,29 @@ class TokenExtractor : public rules::IdentityRuleFn {

    rule_ptr rule = input->copy();
    size_t index = tokens.size();
-    tokens.push_back({
-      token_description(rule), rule, entry_type,
-    });
+    tokens.push_back(Variable(token_description(rule), entry_type, rule));
    token_usage_counts.push_back(1);
    return make_shared<Symbol>(index, true);
  }

  rule_ptr apply_to(const rules::String *rule) {
-    return apply_to_token(rule, RuleEntryTypeAnonymous);
+    return apply_to_token(rule, VariableTypeAnonymous);
  }

  rule_ptr apply_to(const rules::Pattern *rule) {
-    return apply_to_token(rule, RuleEntryTypeAuxiliary);
+    return apply_to_token(rule, VariableTypeAuxiliary);
  }

  rule_ptr apply_to(const rules::Metadata *rule) {
    if (rule->value_for(rules::IS_TOKEN) > 0)
-      return apply_to_token(rule->rule.get(), RuleEntryTypeAuxiliary);
+      return apply_to_token(rule->rule.get(), VariableTypeAuxiliary);
    else
      return rules::IdentityRuleFn::apply_to(rule);
  }

 public:
  vector<size_t> token_usage_counts;
-  vector<RuleEntry> tokens;
+  vector<Variable> tokens;
 };

 static const GrammarError *ubiq_token_err(const string &message) {
@ -97,9 +96,9 @@ static const GrammarError *ubiq_token_err(const string &message) {
                          "Not a token: " + message);
 }

-tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> extract_tokens(
+tuple<InitialSyntaxGrammar, LexicalGrammar, const GrammarError *> extract_tokens(
  const InternedGrammar &grammar) {
-  SyntaxGrammar syntax_grammar;
+  InitialSyntaxGrammar syntax_grammar;
  LexicalGrammar lexical_grammar;
  SymbolReplacer symbol_replacer;
  TokenExtractor extractor;
@ -107,31 +106,30 @@ tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> extract_tokens(
  /*
   *  First, extract all of the grammar's tokens into the lexical grammar.
   */
-  vector<RuleEntry> processed_rules;
-  for (const RuleEntry &entry : grammar.rules)
-    processed_rules.push_back({
-      entry.name, extractor.apply(entry.rule), entry.type,
-    });
-  lexical_grammar.rules = extractor.tokens;
+  vector<Variable> processed_variables;
+  for (const Variable &variable : grammar.variables)
+    processed_variables.push_back(
+      Variable(variable.name, variable.type, extractor.apply(variable.rule)));
+  lexical_grammar.variables = extractor.tokens;

  /*
-   *  If a rule's entire content was extracted as a token and that token didn't
-   *  appear within any other rule, then remove that rule from the syntax
+   *  If a variable's entire rule was extracted as a token and that token didn't
+   *  appear within any other rule, then remove that variable from the syntax
   *  grammar, giving its name to the token in the lexical grammar. Any symbols
-   *  that pointed to that rule will need to be updated to point to the rule in
-   *  the lexical grammar. Symbols that pointed to later rules will need to have
-   *  their indices decremented.
+   *  that pointed to that variable will need to be updated to point to the
+   *  variable in the lexical grammar. Symbols that pointed to later variables
+   *  will need to have their indices decremented.
   */
  size_t i = 0;
-  for (const RuleEntry &entry : processed_rules) {
-    auto symbol = dynamic_pointer_cast<const Symbol>(entry.rule);
+  for (const Variable &variable : processed_variables) {
+    auto symbol = dynamic_pointer_cast<const Symbol>(variable.rule);
    if (symbol.get() && symbol->is_token && !symbol->is_built_in() &&
        extractor.token_usage_counts[symbol->index] == 1) {
-      lexical_grammar.rules[symbol->index].type = entry.type;
-      lexical_grammar.rules[symbol->index].name = entry.name;
+      lexical_grammar.variables[symbol->index].type = variable.type;
+      lexical_grammar.variables[symbol->index].name = variable.name;
      symbol_replacer.replacements.insert({ Symbol(i), *symbol });
    } else {
-      syntax_grammar.rules.push_back(entry);
+      syntax_grammar.variables.push_back(variable);
    }
    i++;
  }
@ -139,14 +137,14 @@ tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> extract_tokens(
  /*
   *  Perform any replacements of symbols needed based on the previous step.
   */
-  for (RuleEntry &entry : syntax_grammar.rules)
-    entry.rule = symbol_replacer.apply(entry.rule);
+  for (Variable &variable : syntax_grammar.variables)
+    variable.rule = symbol_replacer.apply(variable.rule);

-  for (auto &symbol_set : grammar.expected_conflicts) {
-    set<Symbol> new_symbol_set;
-    for (const Symbol &symbol : symbol_set)
-      new_symbol_set.insert(symbol_replacer.replace_symbol(symbol));
-    syntax_grammar.expected_conflicts.insert(new_symbol_set);
+  for (const ConflictSet &conflict_set : grammar.expected_conflicts) {
+    ConflictSet new_conflict_set;
+    for (const Symbol &symbol : conflict_set)
+      new_conflict_set.insert(symbol_replacer.replace_symbol(symbol));
+    syntax_grammar.expected_conflicts.insert(new_conflict_set);
  }

  /*
@ -171,7 +169,7 @@ tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> extract_tokens(
    if (!new_symbol.is_token)
      return make_tuple(
        syntax_grammar, lexical_grammar,
-        ubiq_token_err(syntax_grammar.rules[new_symbol.index].name));
+        ubiq_token_err(syntax_grammar.variables[new_symbol.index].name));

    syntax_grammar.ubiquitous_tokens.insert(new_symbol);
  }
--- a/src/compiler/prepare_grammar/extract_tokens.h
+++ b/src/compiler/prepare_grammar/extract_tokens.h
@ -3,18 +3,15 @@

 #include <tuple>
 #include "tree_sitter/compiler.h"
+#include "compiler/lexical_grammar.h"
+#include "compiler/prepare_grammar/initial_syntax_grammar.h"
 #include "compiler/prepare_grammar/interned_grammar.h"

 namespace tree_sitter {
-
-class Grammar;
-class SyntaxGrammar;
-class LexicalGrammar;
-
 namespace prepare_grammar {

-std::tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> extract_tokens(
-  const InternedGrammar &);
+std::tuple<InitialSyntaxGrammar, LexicalGrammar, const GrammarError *>
+  extract_tokens(const InternedGrammar &);

 }  // namespace prepare_grammar
 }  // namespace tree_sitter
--- a/src/compiler/prepare_grammar/flatten_grammar.cc
+++ b/src/compiler/prepare_grammar/flatten_grammar.cc
@ -0,0 +1,154 @@
+#include "compiler/prepare_grammar/flatten_grammar.h"
+#include "compiler/prepare_grammar/extract_choices.h"
+#include "compiler/prepare_grammar/initial_syntax_grammar.h"
+#include "compiler/rules/visitor.h"
+#include "compiler/rules/seq.h"
+#include "compiler/rules/symbol.h"
+#include "compiler/rules/metadata.h"
+#include "compiler/rules/built_in_symbols.h"
+#include <string>
+#include <algorithm>
+
+namespace tree_sitter {
+namespace prepare_grammar {
+
+using std::find;
+using std::string;
+using std::vector;
+
+class FlattenRule : public rules::RuleFn<void> {
+ public:
+  bool has_pending_precedence;
+  int pending_precedence;
+  vector<int> precedence_stack;
+  bool has_pending_associativity;
+  Associativity pending_associativity;
+  vector<Associativity> associativity_stack;
+  Production production;
+
+  FlattenRule()
+      : has_pending_precedence(false),
+        pending_precedence(0),
+        has_pending_associativity(false),
+        pending_associativity(AssociativityNone) {}
+
+  void apply_to(const rules::Symbol *sym) {
+    production.push_back(
+      ProductionStep(*sym, current_precedence(), current_associativity()));
+
+    if (has_pending_precedence) {
+      precedence_stack.push_back(pending_precedence);
+      has_pending_precedence = false;
+    }
+    if (has_pending_associativity) {
+      associativity_stack.push_back(pending_associativity);
+      has_pending_associativity = false;
+    }
+  }
+
+  void apply_to(const rules::Metadata *metadata) {
+    int precedence = metadata->value_for(rules::PRECEDENCE);
+    int associativity = metadata->value_for(rules::ASSOCIATIVITY);
+
+    if (precedence != 0) {
+      pending_precedence = precedence;
+      has_pending_precedence = true;
+    }
+
+    if (associativity != 0) {
+      pending_associativity = static_cast<Associativity>(associativity);
+      has_pending_associativity = true;
+    }
+
+    apply(metadata->rule);
+
+    if (precedence != 0)
+      precedence_stack.pop_back();
+
+    if (associativity != 0)
+      associativity_stack.pop_back();
+  }
+
+  void apply_to(const rules::Seq *seq) {
+    apply(seq->left);
+    apply(seq->right);
+  }
+
+ private:
+  int current_precedence() {
+    if (precedence_stack.empty())
+      return 0;
+    else
+      return precedence_stack.back();
+  }
+
+  Associativity current_associativity() {
+    if (associativity_stack.empty())
+      return AssociativityNone;
+    else
+      return associativity_stack.back();
+  }
+};
+
+Production flatten_rule(const rule_ptr &rule) {
+  FlattenRule flattener;
+  flattener.apply(rule);
+  return flattener.production;
+}
+
+struct ProductionSlice {
+  vector<ProductionStep>::const_iterator start;
+  vector<ProductionStep>::const_iterator end;
+
+  bool operator==(const ProductionSlice &other) const {
+    if (end - start != other.end - other.start)
+      return false;
+    for (auto iter1 = start, iter2 = other.start; iter1 != end; ++iter1, ++iter2)
+      if (!(iter1->symbol == iter2->symbol &&
+            iter1->precedence == iter2->precedence &&
+            iter1->associativity == iter2->associativity))
+        return false;
+    return true;
+  }
+};
+
+void assign_rule_ids(Production *production,
+                     vector<ProductionSlice> *unique_slices) {
+  auto end = production->end();
+
+  for (auto iter = production->begin(); iter != end; ++iter) {
+    ProductionSlice slice{ iter, end };
+    auto existing_id =
+      find(unique_slices->cbegin(), unique_slices->cend(), slice);
+    if (existing_id == unique_slices->end()) {
+      unique_slices->push_back(slice);
+      iter->rule_id = unique_slices->size();
+    } else {
+      iter->rule_id = existing_id - unique_slices->cbegin() + 1;
+    }
+  }
+}
+
+SyntaxGrammar flatten_grammar(const InitialSyntaxGrammar &grammar) {
+  SyntaxGrammar result;
+  result.expected_conflicts = grammar.expected_conflicts;
+  result.ubiquitous_tokens = grammar.ubiquitous_tokens;
+
+  for (const Variable &variable : grammar.variables) {
+    vector<Production> productions;
+    for (const rule_ptr &rule_component : extract_choices(variable.rule))
+      productions.push_back(flatten_rule(rule_component));
+    result.variables.push_back(
+      SyntaxVariable(variable.name, variable.type, productions));
+  }
+
+  vector<ProductionSlice> unique_slices;
+  for (SyntaxVariable &variable : result.variables)
+    for (Production &production : variable.productions)
+      assign_rule_ids(&production, &unique_slices);
+
+  return result;
+}
+
+}  // namespace prepare_grammar
+}  // namespace tree_sitter
--- a/src/compiler/prepare_grammar/flatten_grammar.h
+++ b/src/compiler/prepare_grammar/flatten_grammar.h
@ -0,0 +1,13 @@
+#include <string>
+#include "tree_sitter/compiler.h"
+#include "compiler/syntax_grammar.h"
+
+namespace tree_sitter {
+namespace prepare_grammar {
+
+struct InitialSyntaxGrammar;
+
+SyntaxGrammar flatten_grammar(const InitialSyntaxGrammar &);
+
+}  // namespace prepare_grammar
+}  // namespace tree_sitter
--- a/src/compiler/prepare_grammar/initial_syntax_grammar.h
+++ b/src/compiler/prepare_grammar/initial_syntax_grammar.h
@ -0,0 +1,24 @@
+#ifndef COMPILER_INITIAL_SYNTAX_GRAMMAR_H_
+#define COMPILER_INITIAL_SYNTAX_GRAMMAR_H_
+
+#include <vector>
+#include <string>
+#include <set>
+#include "tree_sitter/compiler.h"
+#include "compiler/rules/symbol.h"
+#include "compiler/variable.h"
+#include "compiler/syntax_grammar.h"
+
+namespace tree_sitter {
+namespace prepare_grammar {
+
+struct InitialSyntaxGrammar {
+  std::vector<Variable> variables;
+  std::set<rules::Symbol> ubiquitous_tokens;
+  std::set<ConflictSet> expected_conflicts;
+};
+
+}  // namespace prepare_grammar
+}  // namespace tree_sitter
+
+#endif  // COMPILER_INITIAL_SYNTAX_GRAMMAR_H_
--- a/src/compiler/prepare_grammar/intern_symbols.cc
+++ b/src/compiler/prepare_grammar/intern_symbols.cc
@ -56,10 +56,9 @@ pair<InternedGrammar, const GrammarError *> intern_symbols(const Grammar &gramma
    if (!interner.missing_rule_name.empty())
      return { result, missing_rule_error(interner.missing_rule_name) };

-    result.rules.push_back({
-      pair.first, new_rule,
-      pair.first[0] == '_' ? RuleEntryTypeHidden : RuleEntryTypeNamed,
-    });
+    result.variables.push_back(Variable(
+      pair.first, pair.first[0] == '_' ? VariableTypeHidden : VariableTypeNamed,
+      new_rule));
  }

  for (auto &rule : grammar.ubiquitous_tokens()) {
--- a/src/compiler/prepare_grammar/intern_symbols.h
+++ b/src/compiler/prepare_grammar/intern_symbols.h
@ -7,9 +7,6 @@
 #include "compiler/prepare_grammar/interned_grammar.h"

 namespace tree_sitter {
-
-class Grammar;
-
 namespace prepare_grammar {

 std::pair<InternedGrammar, const GrammarError *> intern_symbols(const Grammar &);
--- a/src/compiler/prepare_grammar/interned_grammar.h
+++ b/src/compiler/prepare_grammar/interned_grammar.h
@ -5,15 +5,16 @@
 #include <vector>
 #include "tree_sitter/compiler.h"
 #include "compiler/rules/symbol.h"
-#include "compiler/prepared_grammar.h"
+#include "compiler/syntax_grammar.h"
+#include "compiler/variable.h"

 namespace tree_sitter {
 namespace prepare_grammar {

 struct InternedGrammar {
-  std::vector<RuleEntry> rules;
+  std::vector<Variable> variables;
  std::vector<rule_ptr> ubiquitous_tokens;
-  std::set<std::set<rules::Symbol>> expected_conflicts;
+  std::set<ConflictSet> expected_conflicts;
 };

 }  // namespace prepare_grammar
--- a/src/compiler/prepare_grammar/prepare_grammar.cc
+++ b/src/compiler/prepare_grammar/prepare_grammar.cc
@ -1,10 +1,12 @@
 #include "compiler/prepare_grammar/prepare_grammar.h"
-#include <tuple>
 #include "compiler/prepare_grammar/expand_repeats.h"
 #include "compiler/prepare_grammar/expand_tokens.h"
 #include "compiler/prepare_grammar/extract_tokens.h"
 #include "compiler/prepare_grammar/intern_symbols.h"
-#include "compiler/prepared_grammar.h"
+#include "compiler/prepare_grammar/flatten_grammar.h"
+#include "compiler/lexical_grammar.h"
+#include "compiler/prepare_grammar/initial_syntax_grammar.h"
+#include "compiler/syntax_grammar.h"

 namespace tree_sitter {
 namespace prepare_grammar {
@ -28,7 +30,7 @@ tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> prepare_grammar(
    return make_tuple(SyntaxGrammar(), LexicalGrammar(), error);

  // Replace `Repeat` rules with pairs of recursive rules
-  SyntaxGrammar syntax_grammar = expand_repeats(get<0>(extract_result));
+  InitialSyntaxGrammar syntax_grammar = expand_repeats(get<0>(extract_result));

  // Expand `String` and `Pattern` rules into full rule trees
  auto expand_tokens_result = expand_tokens(get<1>(extract_result));
@ -37,7 +39,7 @@ tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> prepare_grammar(
  if (error)
    return make_tuple(SyntaxGrammar(), LexicalGrammar(), error);

-  return make_tuple(syntax_grammar, lex_grammar, nullptr);
+  return make_tuple(flatten_grammar(syntax_grammar), lex_grammar, nullptr);
 }

 }  // namespace prepare_grammar
--- a/src/compiler/prepare_grammar/prepare_grammar.h
+++ b/src/compiler/prepare_grammar/prepare_grammar.h
@ -2,7 +2,8 @@
 #define COMPILER_PREPARE_GRAMMAR_PREPARE_GRAMMAR_H_

 #include <tuple>
-#include "compiler/prepared_grammar.h"
+#include "compiler/syntax_grammar.h"
+#include "compiler/lexical_grammar.h"

 namespace tree_sitter {

--- a/src/compiler/prepared_grammar.h
+++ b/src/compiler/prepared_grammar.h
@ -1,40 +0,0 @@
-#ifndef COMPILER_PREPARED_GRAMMAR_H_
-#define COMPILER_PREPARED_GRAMMAR_H_
-
-#include <vector>
-#include <string>
-#include <set>
-#include "tree_sitter/compiler.h"
-#include "compiler/rules/symbol.h"
-
-namespace tree_sitter {
-
-enum RuleEntryType {
-  RuleEntryTypeNamed,
-  RuleEntryTypeAnonymous,
-  RuleEntryTypeHidden,
-  RuleEntryTypeAuxiliary,
-};
-
-struct RuleEntry {
-  std::string name;
-  rule_ptr rule;
-  RuleEntryType type;
-};
-
-class SyntaxGrammar {
- public:
-  std::vector<RuleEntry> rules;
-  std::set<rules::Symbol> ubiquitous_tokens;
-  std::set<std::set<rules::Symbol>> expected_conflicts;
-};
-
-class LexicalGrammar {
- public:
-  std::vector<RuleEntry> rules;
-  std::vector<rule_ptr> separators;
-};
-
-}  // namespace tree_sitter
-
-#endif  // COMPILER_PREPARED_GRAMMAR_H_
--- a/src/compiler/rules/built_in_symbols.cc
+++ b/src/compiler/rules/built_in_symbols.cc
@ -15,5 +15,9 @@ Symbol START() {
  return Symbol(-3);
 }

+Symbol NONE() {
+  return Symbol(-4);
+}
+
 }  // namespace rules
 }  // namespace tree_sitter
--- a/src/compiler/rules/built_in_symbols.h
+++ b/src/compiler/rules/built_in_symbols.h
@ -9,6 +9,7 @@ namespace rules {
 Symbol ERROR();
 Symbol END_OF_INPUT();
 Symbol START();
+Symbol NONE();

 }  // namespace rules
 }  // namespace tree_sitter
--- a/src/compiler/rules/visitor.h
+++ b/src/compiler/rules/visitor.h
@ -130,6 +130,79 @@ class RuleFn : private Visitor {
  T value_;
 };

+template <>
+class RuleFn<void> : private Visitor {
+ public:
+  void apply(const rule_ptr &rule) {
+    rule->accept(this);
+  }
+
+ protected:
+  virtual void default_apply(const Rule *rule) {}
+
+  virtual void apply_to(const Blank *rule) {
+    return default_apply((const Rule *)rule);
+  }
+  virtual void apply_to(const CharacterSet *rule) {
+    return default_apply((const Rule *)rule);
+  }
+  virtual void apply_to(const Choice *rule) {
+    return default_apply((const Rule *)rule);
+  }
+  virtual void apply_to(const Metadata *rule) {
+    return default_apply((const Rule *)rule);
+  }
+  virtual void apply_to(const Pattern *rule) {
+    return default_apply((const Rule *)rule);
+  }
+  virtual void apply_to(const Repeat *rule) {
+    return default_apply((const Rule *)rule);
+  }
+  virtual void apply_to(const Seq *rule) {
+    return default_apply((const Rule *)rule);
+  }
+  virtual void apply_to(const String *rule) {
+    return default_apply((const Rule *)rule);
+  }
+  virtual void apply_to(const NamedSymbol *rule) {
+    return default_apply((const Rule *)rule);
+  }
+  virtual void apply_to(const Symbol *rule) {
+    return default_apply((const Rule *)rule);
+  }
+
+  void visit(const Blank *rule) {
+    apply_to(rule);
+  }
+  void visit(const CharacterSet *rule) {
+    apply_to(rule);
+  }
+  void visit(const Choice *rule) {
+    apply_to(rule);
+  }
+  void visit(const Metadata *rule) {
+    apply_to(rule);
+  }
+  void visit(const Pattern *rule) {
+    apply_to(rule);
+  }
+  void visit(const Repeat *rule) {
+    apply_to(rule);
+  }
+  void visit(const Seq *rule) {
+    apply_to(rule);
+  }
+  void visit(const String *rule) {
+    apply_to(rule);
+  }
+  void visit(const NamedSymbol *rule) {
+    apply_to(rule);
+  }
+  void visit(const Symbol *rule) {
+    apply_to(rule);
+  }
+};
+
 class IdentityRuleFn : public RuleFn<rule_ptr> {
 protected:
  virtual rule_ptr default_apply(const Rule *rule);
--- a/src/compiler/syntax_grammar.cc
+++ b/src/compiler/syntax_grammar.cc
@ -0,0 +1,63 @@
+#include "compiler/syntax_grammar.h"
+#include <vector>
+#include <string>
+#include <utility>
+#include "compiler/rules/symbol.h"
+#include "compiler/rules/built_in_symbols.h"
+
+namespace tree_sitter {
+
+using std::string;
+using std::to_string;
+using std::pair;
+using std::vector;
+using std::set;
+
+static const vector<Production> START_PRODUCTIONS_TOKEN_ONLY({
+  Production({ ProductionStep(rules::Symbol(0, true), 0, AssociativityNone) }),
+});
+
+static const vector<Production> START_PRODUCTIONS({
+  Production({ ProductionStep(rules::Symbol(0), 0, AssociativityNone) }),
+});
+
+static const vector<Production> NO_PRODUCTIONS({});
+
+SyntaxVariable::SyntaxVariable(const string &name, VariableType type,
+                               const vector<Production> &productions)
+    : name(name), productions(productions), type(type) {}
+
+ProductionStep::ProductionStep(const rules::Symbol &symbol, int precedence,
+                               Associativity associativity)
+    : symbol(symbol),
+      precedence(precedence),
+      associativity(associativity),
+      rule_id(0) {}
+
+ProductionStep::ProductionStep(const rules::Symbol &symbol, int precedence,
+                               Associativity associativity, int rule_id)
+    : symbol(symbol),
+      precedence(precedence),
+      associativity(associativity),
+      rule_id(rule_id) {}
+
+bool ProductionStep::operator==(const ProductionStep &other) const {
+  return symbol == other.symbol && precedence == other.precedence &&
+         rule_id == other.rule_id && associativity == other.associativity;
+}
+
+const vector<Production> &SyntaxGrammar::productions(
+  const rules::Symbol &symbol) const {
+  if (symbol == rules::START()) {
+    if (variables.empty())
+      return START_PRODUCTIONS_TOKEN_ONLY;
+    else
+      return START_PRODUCTIONS;
+  } else if (symbol.is_built_in() || symbol.is_token) {
+    return NO_PRODUCTIONS;
+  } else {
+    return variables[symbol.index].productions;
+  }
+}
+
+}  // namespace tree_sitter
--- a/src/compiler/syntax_grammar.h
+++ b/src/compiler/syntax_grammar.h
@ -0,0 +1,47 @@
+#ifndef COMPILER_PREPARED_GRAMMAR_H_
+#define COMPILER_PREPARED_GRAMMAR_H_
+
+#include <vector>
+#include <string>
+#include <set>
+#include "tree_sitter/compiler.h"
+#include "compiler/rules/symbol.h"
+#include "compiler/variable.h"
+
+namespace tree_sitter {
+
+struct ProductionStep {
+  ProductionStep(const rules::Symbol &, int, Associativity);
+  ProductionStep(const rules::Symbol &, int, Associativity, int);
+  bool operator==(const ProductionStep &) const;
+
+  rules::Symbol symbol;
+  int precedence;
+  Associativity associativity;
+  int rule_id;
+};
+
+typedef std::vector<ProductionStep> Production;
+
+struct SyntaxVariable {
+  SyntaxVariable(const std::string &, VariableType,
+                 const std::vector<Production> &);
+
+  std::string name;
+  std::vector<Production> productions;
+  VariableType type;
+};
+
+typedef std::set<rules::Symbol> ConflictSet;
+
+struct SyntaxGrammar {
+  const std::vector<Production> &productions(const rules::Symbol &) const;
+
+  std::vector<SyntaxVariable> variables;
+  std::set<rules::Symbol> ubiquitous_tokens;
+  std::set<ConflictSet> expected_conflicts;
+};
+
+}  // namespace tree_sitter
+
+#endif  // COMPILER_PREPARED_GRAMMAR_H_
--- a/src/compiler/variable.cc
+++ b/src/compiler/variable.cc
@ -0,0 +1,11 @@
+#include "compiler/variable.h"
+#include <string>
+
+namespace tree_sitter {
+
+using std::string;
+
+Variable::Variable(const string &name, VariableType type, const rule_ptr &rule)
+    : name(name), rule(rule), type(type) {}
+
+}  // namespace tree_sitter
--- a/src/compiler/variable.h
+++ b/src/compiler/variable.h
@ -0,0 +1,26 @@
+#ifndef COMPILER_VARIABLE_H_
+#define COMPILER_VARIABLE_H_
+
+#include "tree_sitter/compiler.h"
+#include "compiler/rules/symbol.h"
+
+namespace tree_sitter {
+
+enum VariableType {
+  VariableTypeHidden,
+  VariableTypeAuxiliary,
+  VariableTypeAnonymous,
+  VariableTypeNamed,
+};
+
+struct Variable {
+  Variable(const std::string &, VariableType, const rule_ptr &);
+
+  std::string name;
+  rule_ptr rule;
+  VariableType type;
+};
+
+}  // namespace tree_sitter
+
+#endif  // COMPILER_VARIABLE_H_