From db9966b57c78d87a5b08ef8a8bd8d8f5f3a5da17 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Sat, 10 Oct 2015 12:33:12 -0700
Subject: [PATCH] Simplify lex item set transitions code

---
 project.gyp                                   |   2 +-
 spec/compiler/build_tables/lex_item_spec.cc   | 254 ++++++++++++++++--
 .../build_tables/merge_transitions_spec.cc    |  90 -------
 .../build_tables/rule_transitions_spec.cc     | 173 ------------
 src/compiler/build_tables/build_lex_table.cc  |   4 +-
 src/compiler/build_tables/item_set_closure.cc |   2 -
 src/compiler/build_tables/lex_item.cc         |  15 +-
 .../build_tables/lex_item_transitions.cc      | 127 +++++++++
 ...e_transitions.h => lex_item_transitions.h} |   5 +-
 src/compiler/build_tables/merge_transitions.h |  54 ----
 src/compiler/build_tables/rule_transitions.cc |  74 -----
 src/compiler/lex_table.cc                     |   3 +-
 .../prepare_grammar/prepare_grammar.cc        |   3 +-
 13 files changed, 378 insertions(+), 428 deletions(-)
 delete mode 100644 spec/compiler/build_tables/merge_transitions_spec.cc
 delete mode 100644 spec/compiler/build_tables/rule_transitions_spec.cc
 create mode 100644 src/compiler/build_tables/lex_item_transitions.cc
 rename src/compiler/build_tables/{rule_transitions.h => lex_item_transitions.h} (65%)
 delete mode 100644 src/compiler/build_tables/merge_transitions.h
 delete mode 100644 src/compiler/build_tables/rule_transitions.cc

diff --git a/project.gyp b/project.gyp
index 488f513e..5e79eba6 100644
--- a/project.gyp
+++ b/project.gyp
@@ -17,12 +17,12 @@
         'src/compiler/build_tables/get_metadata.cc',
         'src/compiler/build_tables/item_set_closure.cc',
         'src/compiler/build_tables/lex_item.cc',
+        'src/compiler/build_tables/lex_item_transitions.cc',
         'src/compiler/build_tables/lex_conflict_manager.cc',
         'src/compiler/build_tables/lookahead_set.cc',
         'src/compiler/build_tables/parse_item.cc',
         'src/compiler/build_tables/parse_conflict_manager.cc',
         'src/compiler/build_tables/rule_can_be_blank.cc',
-        'src/compiler/build_tables/rule_transitions.cc',
         'src/compiler/compile.cc',
         'src/compiler/generate_code/c_code.cc',
         'src/compiler/grammar.cc',
diff --git a/spec/compiler/build_tables/lex_item_spec.cc b/spec/compiler/build_tables/lex_item_spec.cc
index 21f5f41a..8dc963f0 100644
--- a/spec/compiler/build_tables/lex_item_spec.cc
+++ b/spec/compiler/build_tables/lex_item_spec.cc
@@ -40,36 +40,256 @@ describe("LexItem", []() {
   });
 });
 
-describe("lex_item_set_transitions", [&]() {
-  describe("when two items in the set have transitions on the same character", [&]() {
-    it("merges the transitions by computing the union of the two item sets", [&]() {
-      LexItemSet set1({
-        LexItem(Symbol(1), CharacterSet().include('a', 'f').copy()),
-        LexItem(Symbol(2), CharacterSet().include('e', 'x').copy())
-      });
+describe("LexItemSet::transitions()", [&]() {
+  it("handles single characters", [&]() {
+    LexItemSet item_set({
+      LexItem(Symbol(1), character({ 'x' })),
+    });
 
-      AssertThat(set1.transitions(), Equals(map<CharacterSet, LexItemSet>({
+    AssertThat(
+      item_set.transitions(),
+      Equals(map<CharacterSet, LexItemSet>({
         {
-          CharacterSet().include('a', 'd'),
+          CharacterSet().include('x'),
           LexItemSet({
             LexItem(Symbol(1), blank()),
           })
+        }
+      })));
+  });
+
+  it("handles sequences", [&]() {
+    LexItemSet item_set({
+      LexItem(Symbol(1), seq({
+        character({ 'w' }),
+        character({ 'x' }),
+        character({ 'y' }),
+        character({ 'z' }),
+      })),
+    });
+
+    AssertThat(
+      item_set.transitions(),
+      Equals(map<CharacterSet, LexItemSet>({
+        {
+          CharacterSet().include('w'),
+          LexItemSet({
+            LexItem(Symbol(1), seq({
+              character({ 'x' }),
+              character({ 'y' }),
+              character({ 'z' }),
+            })),
+          })
+        }
+      })));
+  });
+
+  it("handles sequences where the left hand side can be blank", [&]() {
+    LexItemSet item_set({
+      LexItem(Symbol(1), seq({
+        choice({
+          character({ 'x' }),
+          blank(),
+        }),
+        character({ 'y' }),
+        character({ 'z' }),
+      })),
+    });
+
+    AssertThat(
+      item_set.transitions(),
+      Equals(map<CharacterSet, LexItemSet>({
+        {
+          CharacterSet().include('x'),
+          LexItemSet({
+            LexItem(Symbol(1), seq({
+              character({ 'y' }),
+              character({ 'z' }),
+            })),
+          })
+        },
+        {
+          CharacterSet().include('y'),
+          LexItemSet({
+            LexItem(Symbol(1), character({ 'z' })),
+          })
+        }
+      })));
+  });
+
+  it("handles blanks", [&]() {
+    LexItemSet item_set({
+      LexItem(Symbol(1), blank()),
+    });
+
+    AssertThat(item_set.transitions(), IsEmpty());
+  });
+
+  it("handles repeats", [&]() {
+    LexItemSet item_set({
+      LexItem(Symbol(1), repeat(seq({
+        character({ 'a' }),
+        character({ 'b' }),
+      }))),
+      LexItem(Symbol(2), repeat(character({ 'c' }))),
+    });
+
+    AssertThat(
+      item_set.transitions(),
+      Equals(map<CharacterSet, LexItemSet>({
+        {
+          CharacterSet().include('a'),
+          LexItemSet({
+            LexItem(Symbol(1), seq({
+              character({ 'b' }),
+              repeat(seq({
+                character({ 'a' }),
+                character({ 'b' }),
+              }))
+            }))
+          })
+        },
+        {
+          CharacterSet().include('c'),
+          LexItemSet({
+            LexItem(Symbol(2), repeat(character({ 'c' }))),
+          })
+        }
+      })));
+  });
+
+  it("handles choices between overlapping character sets", [&]() {
+    LexItemSet item_set({
+      LexItem(Symbol(1), choice({
+        seq({
+          character({ 'a', 'b', 'c', 'd'  }),
+          character({ 'x' }),
+        }),
+        seq({
+          character({ 'c', 'd', 'e', 'f' }),
+          character({ 'y' }),
+        }),
+      }))
+    });
+
+    AssertThat(
+      item_set.transitions(),
+      Equals(map<CharacterSet, LexItemSet>({
+        {
+          CharacterSet().include('a', 'b'),
+          LexItemSet({
+            LexItem(Symbol(1), character({ 'x' })),
+          })
+        },
+        {
+          CharacterSet().include('c', 'd'),
+          LexItemSet({
+            LexItem(Symbol(1), character({ 'x' })),
+            LexItem(Symbol(1), character({ 'y' })),
+          })
         },
         {
           CharacterSet().include('e', 'f'),
           LexItemSet({
-            LexItem(Symbol(1), blank()),
-            LexItem(Symbol(2), blank()),
-          })
-        },
-        {
-          CharacterSet().include('g', 'x'),
-          LexItemSet({
-            LexItem(Symbol(2), blank()),
+            LexItem(Symbol(1), character({ 'y' })),
           })
         },
       })));
+  });
+
+  it("handles choices between a subset and a superset of characters", [&]() {
+    LexItemSet item_set({
+      LexItem(Symbol(1), choice({
+        seq({
+          character({ 'b', 'c', 'd' }),
+          character({ 'x' }),
+        }),
+        seq({
+          character({ 'a', 'b', 'c', 'd', 'e', 'f' }),
+          character({ 'y' }),
+        }),
+      })),
     });
+
+    AssertThat(
+      item_set.transitions(),
+      Equals(map<CharacterSet, LexItemSet>({
+        {
+          CharacterSet().include('a').include('e', 'f'),
+          LexItemSet({
+            LexItem(Symbol(1), character({ 'y' })),
+          })
+        },
+        {
+          CharacterSet().include('b', 'd'),
+          LexItemSet({
+            LexItem(Symbol(1), character({ 'x' })),
+            LexItem(Symbol(1), character({ 'y' })),
+          })
+        },
+      })));
+  });
+
+  it("handles choices between whitelisted and blacklisted character sets", [&]() {
+    LexItemSet item_set({
+      LexItem(Symbol(1), seq({
+        choice({
+          character({ '/' }, false),
+          seq({
+            character({ '\\' }),
+            character({ '/' }),
+          }),
+        }),
+        character({ '/' }),
+      }))
+    });
+
+    AssertThat(
+      item_set.transitions(),
+      Equals(map<CharacterSet, LexItemSet>({
+        {
+          CharacterSet().include_all().exclude('/').exclude('\\'),
+          LexItemSet({
+            LexItem(Symbol(1), character({ '/' })),
+          })
+        },
+        {
+          CharacterSet().include('\\'),
+          LexItemSet({
+            LexItem(Symbol(1), character({ '/' })),
+            LexItem(Symbol(1), seq({ character({ '/' }), character({ '/' }) })),
+          })
+        },
+      })));
+  });
+
+  it("handles different items with overlapping character sets", [&]() {
+    LexItemSet set1({
+      LexItem(Symbol(1), character({ 'a', 'b', 'c', 'd', 'e', 'f' })),
+      LexItem(Symbol(2), character({ 'e', 'f', 'g', 'h', 'i' }))
+    });
+
+    AssertThat(set1.transitions(), Equals(map<CharacterSet, LexItemSet>({
+      {
+        CharacterSet().include('a', 'd'),
+        LexItemSet({
+          LexItem(Symbol(1), blank()),
+        })
+      },
+      {
+        CharacterSet().include('e', 'f'),
+        LexItemSet({
+          LexItem(Symbol(1), blank()),
+          LexItem(Symbol(2), blank()),
+        })
+      },
+      {
+        CharacterSet().include('g', 'i'),
+        LexItemSet({
+          LexItem(Symbol(2), blank()),
+        })
+      },
+    })));
   });
 });
 
diff --git a/spec/compiler/build_tables/merge_transitions_spec.cc b/spec/compiler/build_tables/merge_transitions_spec.cc
deleted file mode 100644
index 58f4fad4..00000000
--- a/spec/compiler/build_tables/merge_transitions_spec.cc
+++ /dev/null
@@ -1,90 +0,0 @@
-#include "compiler/compiler_spec_helper.h"
-#include "compiler/build_tables/merge_transitions.h"
-
-using namespace rules;
-using namespace build_tables;
-
-START_TEST
-
-describe("merge_transition", []() {
-  typedef map<CharacterSet, int> int_map;
-
-  auto do_merge = [&](int_map *left, const pair<CharacterSet, int> &new_pair) {
-    merge_transition<int>(left, new_pair, [](int *l, const int *r) {
-      *l = *l | *r;
-    });
-  };
-
-  describe("when none of the transitions intersect", [&]() {
-    it("returns the union of the two sets of transitions", [&]() {
-      int_map map({
-        { CharacterSet().include('a').include('c'), 1 },
-        { CharacterSet().include('x').include('y'), 2 },
-        { CharacterSet().include('1').include('9'), 4 },
-      });
-
-      do_merge(&map, { CharacterSet().include(' '), 8 });
-      do_merge(&map, { CharacterSet().include('\t'), 16 });
-
-      AssertThat(map, Equals(int_map({
-        { CharacterSet().include('a').include('c'), 1 },
-        { CharacterSet().include('x').include('y'), 2 },
-        { CharacterSet().include('1').include('9'), 4 },
-        { CharacterSet().include(' '), 8 },
-        { CharacterSet().include('\t'), 16 },
-      })));
-    });
-  });
-
-  describe("when transitions intersect", [&]() {
-    it("merges the intersecting transitions using the provided function", [&]() {
-      int_map map({
-        { CharacterSet().include('a', 'f').include('A', 'F'), 1 },
-        { CharacterSet().include('0', '9'), 2 },
-      });
-
-      do_merge(&map, { CharacterSet().include('c'), 4 });
-      do_merge(&map, { CharacterSet().include('3'), 8 });
-
-      AssertThat(map, Equals(int_map({
-        {
-          CharacterSet()
-            .include('a', 'b')
-            .include('d', 'f')
-            .include('A', 'F'),
-          1
-        },
-        {
-          CharacterSet().include('c'),
-          5
-        },
-        {
-          CharacterSet().include('0', '2').include('4', '9'),
-          2
-        },
-        {
-          CharacterSet().include('3'),
-          10
-        },
-      })));
-    });
-  });
-
-  describe("when two of the right transitions intersect the same left transition", [&]() {
-    it("splits the left-hand transition correctly", [&]() {
-      int_map map1({
-        { CharacterSet().include('a').include('c'), 1 },
-      });
-
-      do_merge(&map1, { CharacterSet().include('a'), 2 });
-      do_merge(&map1, { CharacterSet().include('c'), 4 });
-
-      AssertThat(map1, Equals(int_map({
-        { CharacterSet().include('a'), 3 },
-        { CharacterSet().include('c'), 5 },
-      })));
-    });
-  });
-});
-
-END_TEST
diff --git a/spec/compiler/build_tables/rule_transitions_spec.cc b/spec/compiler/build_tables/rule_transitions_spec.cc
deleted file mode 100644
index 57e6fb47..00000000
--- a/spec/compiler/build_tables/rule_transitions_spec.cc
+++ /dev/null
@@ -1,173 +0,0 @@
-#include "compiler/compiler_spec_helper.h"
-#include "compiler/build_tables/rule_transitions.h"
-#include "compiler/rules/metadata.h"
-
-using namespace rules;
-using namespace build_tables;
-
-class transition_map : public std::map<CharacterSet, rule_ptr> {
- public:
-  bool operator==(const std::map<CharacterSet, rule_ptr> &other) const {
-    if (this->size() != other.size()) return false;
-    for (const auto &pair : *this) {
-      auto other_pair = other.find(pair.first);
-      if (other_pair == other.end()) return false;
-      if (!pair.second->operator==(*other_pair->second)) return false;
-    }
-    return true;
-  }
-
-  transition_map(const std::initializer_list<std::pair<const CharacterSet, rule_ptr>> &list) :
-    std::map<CharacterSet, rule_ptr>(list) {}
-};
-
-START_TEST
-
-describe("rule_transitions", []() {
-  it("handles single characters", [&]() {
-    AssertThat(
-      rule_transitions(character({ '1' })),
-      Equals(transition_map({
-        { CharacterSet().include('1'), blank() }
-      })));
-  });
-
-  it("handles sequences", [&]() {
-    AssertThat(
-      rule_transitions(seq({ character({ '1' }), character({ '2' }) })),
-      Equals(transition_map({
-        { CharacterSet().include('1'), character({ '2' }) }
-      })));
-  });
-
-  it("handles long sequences", [&]() {
-    AssertThat(
-      rule_transitions(seq({
-        character({ '1' }),
-        character({ '2' }),
-        character({ '3' }),
-        character({ '4' })
-      })),
-      Equals(transition_map({
-        {
-          CharacterSet().include('1'),
-          seq({ character({ '2' }), character({ '3' }), character({ '4' }) }),
-        }
-      })));
-  });
-
-  it("handles sequences whose left sides can be blank", [&]() {
-    AssertThat(
-      rule_transitions(seq({
-        choice({
-          character({ '1' }),
-          blank() }),
-        seq({
-          character({ '1' }),
-          character({ '2' }) })
-      })), Equals(transition_map({
-        {
-          CharacterSet().include('1'),
-          choice({ seq({ character({ '1' }), character({ '2' }) }), character({ '2' }), }),
-        }
-      })));
-  });
-
-  it("handles choices between overlapping character sets", [&]() {
-    AssertThat(
-      rule_transitions(choice({
-        seq({
-          character({ 'a', 'b', 'c', 'd'  }),
-          sym("x") }),
-        seq({
-          character({ 'c', 'd', 'e', 'f' }),
-          sym("y") }) })),
-      Equals(transition_map({
-        { CharacterSet().include('a', 'b'), sym("x") },
-        { CharacterSet().include('c', 'd'), choice({ sym("x"), sym("y") }) },
-        { CharacterSet().include('e', 'f'), sym("y") },
-      })));
-  });
-
-  it("handles choices between whitelisted and blacklisted character sets", [&]() {
-    AssertThat(
-      rule_transitions(seq({
-        choice({
-          character({ '/' }, false),
-          seq({
-            character({ '\\' }),
-            character({ '/' }) }) }),
-        character({ '/' }) })),
-
-      Equals(transition_map({
-        { CharacterSet()
-            .include_all()
-            .exclude('/')
-            .exclude('\\'),
-          character({ '/' }) },
-        { CharacterSet()
-            .include('\\'),
-          seq({
-            choice({
-              blank(),
-              character({ '/' }) }),
-            character({ '/' }) }) },
-      })));
-  });
-
-  it("handles choices between a subset and a superset of characters", [&]() {
-    AssertThat(
-      rule_transitions(choice({
-        seq({
-          character({ 'b', 'c', 'd' }),
-          sym("x") }),
-        seq({
-          character({ 'a', 'b', 'c', 'd', 'e', 'f' }),
-          sym("y") }) })),
-      Equals(transition_map({
-        { CharacterSet().include('b', 'd'), choice({ sym("x"), sym("y") }) },
-        { CharacterSet().include('a').include('e', 'f'), sym("y") },
-      })));
-
-    AssertThat(
-      rule_transitions(choice({
-        seq({
-          character({ 'a', 'b', 'c', 'd', 'e', 'f' }),
-          sym("x") }),
-        seq({
-          character({ 'b', 'c', 'd' }),
-          sym("y") }) })),
-      Equals(transition_map({
-        { CharacterSet().include('b', 'd'), choice({ sym("x"), sym("y") }) },
-        { CharacterSet().include('a').include('e', 'f'), sym("x") },
-      })));
-  });
-
-  it("handles blanks", [&]() {
-    AssertThat(rule_transitions(blank()), Equals(transition_map({})));
-  });
-
-  it("handles repeats", [&]() {
-    rule_ptr rule = repeat(seq({ character({ 'a' }), character({ 'b' }) }));
-
-    AssertThat(
-      rule_transitions(rule),
-      Equals(transition_map({
-        {
-          CharacterSet().include('a'),
-          seq({
-            character({ 'b' }),
-            rule })
-        }})));
-
-    rule = repeat(character({ 'a' }));
-
-    AssertThat(
-      rule_transitions(rule),
-      Equals(transition_map({
-        { CharacterSet().include('a'), rule }
-      })));
-  });
-});
-
-END_TEST
diff --git a/src/compiler/build_tables/build_lex_table.cc b/src/compiler/build_tables/build_lex_table.cc
index b9a98615..b74a1a7d 100644
--- a/src/compiler/build_tables/build_lex_table.cc
+++ b/src/compiler/build_tables/build_lex_table.cc
@@ -130,8 +130,8 @@ class LexTableBuilder {
     return rules::Seq::build({
       make_shared<rules::Metadata>(
         separator_rule, map<rules::MetadataKey, int>({
-                            { rules::START_TOKEN, 1 }, { rules::PRECEDENCE, -1 },
-                          })),
+                          { rules::START_TOKEN, 1 }, { rules::PRECEDENCE, -1 },
+                        })),
       rule,
     });
   }
diff --git a/src/compiler/build_tables/item_set_closure.cc b/src/compiler/build_tables/item_set_closure.cc
index 6e138798..d23d552a 100644
--- a/src/compiler/build_tables/item_set_closure.cc
+++ b/src/compiler/build_tables/item_set_closure.cc
@@ -3,8 +3,6 @@
 #include <vector>
 #include <utility>
 #include "tree_sitter/compiler.h"
-#include "compiler/build_tables/rule_transitions.h"
-#include "compiler/build_tables/rule_can_be_blank.h"
 #include "compiler/syntax_grammar.h"
 
 namespace tree_sitter {
diff --git a/src/compiler/build_tables/lex_item.cc b/src/compiler/build_tables/lex_item.cc
index d8f7b464..dae87fd4 100644
--- a/src/compiler/build_tables/lex_item.cc
+++ b/src/compiler/build_tables/lex_item.cc
@@ -1,7 +1,6 @@
 #include "compiler/build_tables/lex_item.h"
 #include "compiler/build_tables/get_metadata.h"
-#include "compiler/build_tables/rule_transitions.h"
-#include "compiler/build_tables/merge_transitions.h"
+#include "compiler/build_tables/lex_item_transitions.h"
 #include "compiler/rules/symbol.h"
 #include <unordered_set>
 
@@ -48,16 +47,8 @@ bool LexItemSet::operator==(const LexItemSet &other) const {
 
 map<CharacterSet, LexItemSet> LexItemSet::transitions() const {
   map<CharacterSet, LexItemSet> result;
-  for (const LexItem &item : entries) {
-    for (auto &transition : rule_transitions(item.rule)) {
-      LexItem next_item(item.lhs, transition.second);
-      merge_transition<LexItemSet>(
-        &result, { transition.first, LexItemSet({ next_item }) },
-        [](LexItemSet *left, const LexItemSet *right) {
-          left->entries.insert(right->entries.begin(), right->entries.end());
-        });
-    }
-  }
+  for (const LexItem &item : entries)
+    lex_item_transitions(&result, item);
   return result;
 }
 
diff --git a/src/compiler/build_tables/lex_item_transitions.cc b/src/compiler/build_tables/lex_item_transitions.cc
new file mode 100644
index 00000000..2fc6e7e6
--- /dev/null
+++ b/src/compiler/build_tables/lex_item_transitions.cc
@@ -0,0 +1,127 @@
+#include "compiler/build_tables/lex_item_transitions.h"
+#include <map>
+#include <vector>
+#include <functional>
+#include "compiler/build_tables/rule_can_be_blank.h"
+#include "compiler/rules/blank.h"
+#include "compiler/rules/choice.h"
+#include "compiler/rules/seq.h"
+#include "compiler/rules/repeat.h"
+#include "compiler/rules/metadata.h"
+#include "compiler/rules/symbol.h"
+#include "compiler/rules/character_set.h"
+#include "compiler/rules/visitor.h"
+#include "compiler/build_tables/lex_item.h"
+
+namespace tree_sitter {
+namespace build_tables {
+
+using std::function;
+using std::make_shared;
+using std::map;
+using std::pair;
+using std::vector;
+using rules::CharacterSet;
+
+class LexItemTransitions : public rules::RuleFn<void> {
+  map<CharacterSet, LexItemSet> *transitions;
+  const rules::Symbol &item_lhs;
+
+  LexItemSet transform_item_set(const LexItemSet &item_set,
+                                function<rule_ptr(rule_ptr)> transform) {
+    LexItemSet new_set;
+    for (const LexItem &item : item_set.entries)
+      new_set.entries.insert(LexItem(item.lhs, transform(item.rule)));
+    return new_set;
+  }
+
+  void merge_transition(map<CharacterSet, LexItemSet> *transitions,
+                        CharacterSet new_char_set, LexItemSet new_item_set) {
+    vector<pair<CharacterSet, LexItemSet>> new_entries;
+
+    auto iter = transitions->begin();
+    while (iter != transitions->end()) {
+      CharacterSet existing_char_set = iter->first;
+      LexItemSet &existing_item_set = iter->second;
+
+      CharacterSet intersection = existing_char_set.remove_set(new_char_set);
+      if (!intersection.is_empty()) {
+        new_char_set.remove_set(intersection);
+        if (!existing_char_set.is_empty())
+          new_entries.push_back({ existing_char_set, existing_item_set });
+        existing_item_set.entries.insert(new_item_set.entries.begin(),
+                                         new_item_set.entries.end());
+        new_entries.push_back({ intersection, existing_item_set });
+        transitions->erase(iter++);
+      } else {
+        iter++;
+      }
+    }
+
+    transitions->insert(new_entries.begin(), new_entries.end());
+
+    if (!new_char_set.is_empty())
+      transitions->insert({ new_char_set, new_item_set });
+  }
+
+  void apply_to(const CharacterSet *rule) {
+    merge_transition(transitions, *rule,
+                     LexItemSet({
+                       LexItem(item_lhs, rules::Blank::build()),
+                     }));
+  }
+
+  void apply_to(const rules::Choice *rule) {
+    for (const rule_ptr &element : rule->elements)
+      apply(element);
+  }
+
+  void apply_to(const rules::Seq *rule) {
+    map<CharacterSet, LexItemSet> left_transitions;
+    LexItemTransitions(&left_transitions, item_lhs).apply(rule->left);
+    for (auto &pair : left_transitions)
+      merge_transition(
+        transitions, pair.first,
+        transform_item_set(pair.second, [&rule](rule_ptr item_rule) {
+          return rules::Seq::build({ item_rule, rule->right });
+        }));
+
+    if (rule_can_be_blank(rule->left))
+      apply(rule->right);
+  }
+
+  void apply_to(const rules::Repeat *rule) {
+    map<CharacterSet, LexItemSet> content_transitions;
+    LexItemTransitions(&content_transitions, item_lhs).apply(rule->content);
+    for (auto &pair : content_transitions)
+      merge_transition(
+        transitions, pair.first,
+        transform_item_set(pair.second, [&rule](rule_ptr item_rule) {
+          return rules::Seq::build({ item_rule, rule->copy() });
+        }));
+  }
+
+  void apply_to(const rules::Metadata *rule) {
+    map<CharacterSet, LexItemSet> content_transitions;
+    LexItemTransitions(&content_transitions, item_lhs).apply(rule->rule);
+    for (auto &pair : content_transitions)
+      merge_transition(
+        transitions, pair.first,
+        transform_item_set(pair.second, [&rule](rule_ptr item_rule) {
+          return make_shared<rules::Metadata>(item_rule, rule->value);
+        }));
+  }
+
+ public:
+  LexItemTransitions(map<CharacterSet, LexItemSet> *transitions,
+                     const rules::Symbol &item_lhs)
+      : transitions(transitions), item_lhs(item_lhs) {}
+};
+
+void lex_item_transitions(map<CharacterSet, LexItemSet> *transitions,
+                          const LexItem &item) {
+  LexItemTransitions(transitions, item.lhs).apply(item.rule);
+}
+
+}  // namespace build_tables
+}  // namespace tree_sitter
diff --git a/src/compiler/build_tables/rule_transitions.h b/src/compiler/build_tables/lex_item_transitions.h
similarity index 65%
rename from src/compiler/build_tables/rule_transitions.h
rename to src/compiler/build_tables/lex_item_transitions.h
index b97ded09..b6f24b0a 100644
--- a/src/compiler/build_tables/rule_transitions.h
+++ b/src/compiler/build_tables/lex_item_transitions.h
@@ -2,13 +2,16 @@
 #define COMPILER_BUILD_TABLES_RULE_TRANSITIONS_H_
 
 #include <map>
+#include <set>
 #include "compiler/rules/character_set.h"
 #include "compiler/rules/symbol.h"
+#include "compiler/build_tables/lex_item.h"
 
 namespace tree_sitter {
 namespace build_tables {
 
-std::map<rules::CharacterSet, rule_ptr> rule_transitions(const rule_ptr &);
+void lex_item_transitions(std::map<rules::CharacterSet, LexItemSet> *transitions,
+                          const LexItem &);
 
 }  // namespace build_tables
 }  // namespace tree_sitter
diff --git a/src/compiler/build_tables/merge_transitions.h b/src/compiler/build_tables/merge_transitions.h
deleted file mode 100644
index 0e2ecf7c..00000000
--- a/src/compiler/build_tables/merge_transitions.h
+++ /dev/null
@@ -1,54 +0,0 @@
-#ifndef COMPILER_BUILD_TABLES_MERGE_TRANSITIONS_H_
-#define COMPILER_BUILD_TABLES_MERGE_TRANSITIONS_H_
-
-#include <map>
-#include <utility>
-#include "compiler/rules/character_set.h"
-#include "compiler/rules/symbol.h"
-
-namespace tree_sitter {
-namespace build_tables {
-
-/*
- *  Merges two transition maps with character set keys. If the
- *  two maps contain values for overlapping character sets, the
- *  new value for the two sets' intersection will be computed by
- *  merging the old and new values using the given function.
- */
-template <typename T>
-void merge_transition(std::map<rules::CharacterSet, T> *left,
-                      const std::pair<rules::CharacterSet, T> &new_pair,
-                      std::function<void(T *, const T *)> merge_fn) {
-  rules::CharacterSet new_char_set = new_pair.first;
-  T new_value = new_pair.second;
-
-  std::map<rules::CharacterSet, T> pairs_to_insert;
-
-  auto iter = left->begin();
-  while (iter != left->end()) {
-    rules::CharacterSet char_set = iter->first;
-    T value = iter->second;
-
-    rules::CharacterSet intersection = char_set.remove_set(new_char_set);
-    if (!intersection.is_empty()) {
-      new_char_set.remove_set(intersection);
-      if (!char_set.is_empty())
-        pairs_to_insert.insert({ char_set, value });
-      merge_fn(&value, &new_value);
-      pairs_to_insert.insert({ intersection, value });
-      left->erase(iter++);
-    } else {
-      ++iter;
-    }
-  }
-
-  left->insert(pairs_to_insert.begin(), pairs_to_insert.end());
-
-  if (!new_char_set.is_empty())
-    left->insert({ new_char_set, new_value });
-}
-
-}  // namespace build_tables
-}  // namespace tree_sitter
-
-#endif  // COMPILER_BUILD_TABLES_MERGE_TRANSITIONS_H_
diff --git a/src/compiler/build_tables/rule_transitions.cc b/src/compiler/build_tables/rule_transitions.cc
deleted file mode 100644
index c0c1d3ad..00000000
--- a/src/compiler/build_tables/rule_transitions.cc
+++ /dev/null
@@ -1,74 +0,0 @@
-#include "compiler/build_tables/rule_transitions.h"
-#include "compiler/build_tables/rule_can_be_blank.h"
-#include "compiler/build_tables/merge_transitions.h"
-#include "compiler/rules/blank.h"
-#include "compiler/rules/choice.h"
-#include "compiler/rules/seq.h"
-#include "compiler/rules/repeat.h"
-#include "compiler/rules/metadata.h"
-#include "compiler/rules/symbol.h"
-#include "compiler/rules/character_set.h"
-#include "compiler/rules/visitor.h"
-
-namespace tree_sitter {
-namespace build_tables {
-
-using std::map;
-using std::make_shared;
-using rules::CharacterSet;
-using rules::Choice;
-using rules::Symbol;
-
-class RuleTransitions : public rules::RuleFn<map<CharacterSet, rule_ptr>> {
- private:
-  void merge_transitions(map<CharacterSet, rule_ptr> *left,
-                         const map<CharacterSet, rule_ptr> &right) {
-    for (auto &pair : right)
-      merge_transition<rule_ptr>(left, pair,
-                                 [](rule_ptr *left, const rule_ptr *right) {
-                                   *left = Choice::build({ *left, *right });
-                                 });
-  }
-
-  map<CharacterSet, rule_ptr> apply_to(const CharacterSet *rule) {
-    return map<CharacterSet, rule_ptr>(
-      { { *rule, make_shared<rules::Blank>() } });
-  }
-
-  map<CharacterSet, rule_ptr> apply_to(const rules::Choice *rule) {
-    map<CharacterSet, rule_ptr> result;
-    for (const auto &el : rule->elements)
-      merge_transitions(&result, this->apply(el));
-    return result;
-  }
-
-  map<CharacterSet, rule_ptr> apply_to(const rules::Seq *rule) {
-    auto result = this->apply(rule->left);
-    for (auto &pair : result)
-      pair.second = rules::Seq::build({ pair.second, rule->right });
-    if (rule_can_be_blank(rule->left))
-      merge_transitions(&result, this->apply(rule->right));
-    return result;
-  }
-
-  map<CharacterSet, rule_ptr> apply_to(const rules::Repeat *rule) {
-    auto result = this->apply(rule->content);
-    for (auto &pair : result)
-      pair.second = rules::Seq::build({ pair.second, rule->copy() });
-    return result;
-  }
-
-  map<CharacterSet, rule_ptr> apply_to(const rules::Metadata *rule) {
-    auto result = this->apply(rule->rule);
-    for (auto &pair : result)
-      pair.second = make_shared<rules::Metadata>(pair.second, rule->value);
-    return result;
-  }
-};
-
-map<CharacterSet, rule_ptr> rule_transitions(const rule_ptr &rule) {
-  return RuleTransitions().apply(rule);
-}
-
-}  // namespace build_tables
-}  // namespace tree_sitter
diff --git a/src/compiler/lex_table.cc b/src/compiler/lex_table.cc
index d50ecef0..255c24f4 100644
--- a/src/compiler/lex_table.cc
+++ b/src/compiler/lex_table.cc
@@ -27,7 +27,8 @@ LexAction LexAction::Error() {
   return LexAction(LexActionTypeError, -1, Symbol(-1), { 0, 0 });
 }
 
-LexAction LexAction::Advance(size_t state_index, PrecedenceRange precedence_range) {
+LexAction LexAction::Advance(size_t state_index,
+                             PrecedenceRange precedence_range) {
   return LexAction(LexActionTypeAdvance, state_index, Symbol(-1),
                    precedence_range);
 }
diff --git a/src/compiler/prepare_grammar/prepare_grammar.cc b/src/compiler/prepare_grammar/prepare_grammar.cc
index ad80aaa7..f8ff3a59 100644
--- a/src/compiler/prepare_grammar/prepare_grammar.cc
+++ b/src/compiler/prepare_grammar/prepare_grammar.cc
@@ -40,7 +40,8 @@ tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> prepare_grammar(
   if (error)
     return make_tuple(SyntaxGrammar(), LexicalGrammar(), error);
 
-  return make_tuple(flatten_grammar(syntax_grammar), normalize_rules(lex_grammar), nullptr);
+  return make_tuple(flatten_grammar(syntax_grammar),
+                    normalize_rules(lex_grammar), nullptr);
 }
 
 }  // namespace prepare_grammar