diff --git a/README.md b/README.md
index 472827a8..15def40e 100644
--- a/README.md
+++ b/README.md
@@ -176,11 +176,11 @@ tokens, like `(` and `+`. This is useful when analyzing the meaning of a documen
 #include "tree_sitter/runtime.h"
 
 // Declare the language function that was generated from your grammar.
-TSLanguage *ts_language_arithmetic();
+TSLanguage *tree_sitter_arithmetic();
 
 int main() {
   TSDocument *document = ts_document_new();
-  ts_document_set_language(document, ts_language_arithmetic());
+  ts_document_set_language(document, tree_sitter_arithmetic());
   ts_document_set_input_string(document, "a + b * 5");
   ts_document_parse(document);
 
diff --git a/doc/grammar-schema.json b/doc/grammar-schema.json
index 5f43b279..f37cd983 100644
--- a/doc/grammar-schema.json
+++ b/doc/grammar-schema.json
@@ -40,6 +40,14 @@
           "pattern": "^[a-zA-Z_]\\w*$"
         }
       }
+    },
+
+    "externals": {
+      "type": "array",
+      "items": {
+        "type": "string",
+        "pattern": "^[a-zA-Z_]\\w*$"
+      }
     }
   },
 
diff --git a/include/tree_sitter/compiler.h b/include/tree_sitter/compiler.h
index b362e535..1c287fd5 100644
--- a/include/tree_sitter/compiler.h
+++ b/include/tree_sitter/compiler.h
@@ -10,7 +10,8 @@ typedef enum {
   TSCompileErrorTypeInvalidGrammar,
   TSCompileErrorTypeInvalidRegex,
   TSCompileErrorTypeUndefinedSymbol,
-  TSCompileErrorTypeInvalidUbiquitousToken,
+  TSCompileErrorTypeInvalidExtraToken,
+  TSCompileErrorTypeInvalidExternalToken,
   TSCompileErrorTypeLexConflict,
   TSCompileErrorTypeParseConflict,
   TSCompileErrorTypeEpsilonRule,
diff --git a/include/tree_sitter/parser.h b/include/tree_sitter/parser.h
index 3a5bab9a..197015f4 100644
--- a/include/tree_sitter/parser.h
+++ b/include/tree_sitter/parser.h
@@ -12,6 +12,8 @@ extern "C" {
 typedef unsigned short TSSymbol;
 typedef unsigned short TSStateId;
 
+typedef uint8_t TSExternalTokenState[16];
+
 #define ts_builtin_sym_error ((TSSymbol)-1)
 #define ts_builtin_sym_end 0
 
@@ -23,7 +25,7 @@ typedef struct {
 } TSSymbolMetadata;
 
 typedef struct {
-  void (*advance)(void *, TSStateId, bool);
+  void (*advance)(void *, bool);
   int32_t lookahead;
   TSSymbol result_symbol;
 } TSLexer;
@@ -48,6 +50,11 @@ typedef struct {
   bool fragile : 1;
 } TSParseAction;
 
+typedef struct {
+  uint16_t lex_state;
+  uint16_t external_lex_state;
+} TSLexMode;
+
 typedef union {
   TSParseAction action;
   struct {
@@ -58,14 +65,26 @@ typedef union {
 } TSParseActionEntry;
 
 typedef struct TSLanguage {
+  uint32_t version;
   uint32_t symbol_count;
   uint32_t token_count;
+  uint32_t external_token_count;
   const char **symbol_names;
   const TSSymbolMetadata *symbol_metadata;
   const unsigned short *parse_table;
   const TSParseActionEntry *parse_actions;
-  const TSStateId *lex_states;
+  const TSLexMode *lex_modes;
   bool (*lex_fn)(TSLexer *, TSStateId);
+  struct {
+    const bool *states;
+    const TSSymbol *symbol_map;
+    void *(*create)();
+    void (*destroy)(void *);
+    void (*reset)(void *);
+    bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist);
+    bool (*serialize)(void *, TSExternalTokenState);
+    void (*deserialize)(void *, const TSExternalTokenState);
+  } external_scanner;
 } TSLanguage;
 
 /*
@@ -79,14 +98,14 @@ typedef struct TSLanguage {
 
 #define ADVANCE(state_value)                   \
   {                                            \
-    lexer->advance(lexer, state_value, false); \
+    lexer->advance(lexer, false); \
     state = state_value;                       \
     goto next_state;                           \
   }
 
 #define SKIP(state_value)                     \
   {                                           \
-    lexer->advance(lexer, state_value, true); \
+    lexer->advance(lexer, true); \
     state = state_value;                      \
     goto next_state;                          \
   }
@@ -146,21 +165,21 @@ typedef struct TSLanguage {
     { .type = TSParseActionTypeAccept } \
   }
 
-#define EXPORT_LANGUAGE(language_name)                     \
-  static TSLanguage language = {                           \
-    .symbol_count = SYMBOL_COUNT,                          \
-    .token_count = TOKEN_COUNT,                            \
-    .symbol_metadata = ts_symbol_metadata,                 \
-    .parse_table = (const unsigned short *)ts_parse_table, \
-    .parse_actions = ts_parse_actions,                     \
-    .lex_states = ts_lex_states,                           \
-    .symbol_names = ts_symbol_names,                       \
-    .lex_fn = ts_lex,                                      \
-  };                                                       \
-                                                           \
-  const TSLanguage *language_name() {                      \
-    return &language;                                      \
-  }
+#define GET_LANGUAGE(...)                                          \
+  static TSLanguage language = {                                   \
+    .version = LANGUAGE_VERSION,                                   \
+    .symbol_count = SYMBOL_COUNT,                                  \
+    .token_count = TOKEN_COUNT,                                    \
+    .symbol_metadata = ts_symbol_metadata,                         \
+    .parse_table = (const unsigned short *)ts_parse_table,         \
+    .parse_actions = ts_parse_actions,                             \
+    .lex_modes = ts_lex_modes,                                     \
+    .symbol_names = ts_symbol_names,                               \
+    .lex_fn = ts_lex,                                              \
+    .external_token_count = EXTERNAL_TOKEN_COUNT,                  \
+    .external_scanner = {__VA_ARGS__}                              \
+  };                                                               \
+  return &language                                                 \
 
 #ifdef __cplusplus
 }
diff --git a/include/tree_sitter/runtime.h b/include/tree_sitter/runtime.h
index 68e804f1..00d8e7c4 100644
--- a/include/tree_sitter/runtime.h
+++ b/include/tree_sitter/runtime.h
@@ -9,6 +9,8 @@ extern "C" {
 #include <stdint.h>
 #include <stdbool.h>
 
+#define TREE_SITTER_LANGUAGE_VERSION 1
+
 typedef unsigned short TSSymbol;
 typedef struct TSLanguage TSLanguage;
 typedef struct TSDocument TSDocument;
@@ -114,6 +116,7 @@ uint32_t ts_document_parse_count(const TSDocument *);
 
 uint32_t ts_language_symbol_count(const TSLanguage *);
 const char *ts_language_symbol_name(const TSLanguage *, TSSymbol);
+uint32_t ts_language_version(const TSLanguage *);
 
 #ifdef __cplusplus
 }
diff --git a/script/fetch-fixtures b/script/fetch-fixtures
index bb727298..7009d70f 100755
--- a/script/fetch-fixtures
+++ b/script/fetch-fixtures
@@ -7,6 +7,7 @@ GRAMMARS=(
   json
   c
   cpp
+  python
 )
 
 for grammar in ${GRAMMARS[@]}; do
@@ -21,7 +22,7 @@ for grammar in ${GRAMMARS[@]}; do
 
   (
     cd $grammar_dir;
-    git reset --hard;
-    git pull origin master;
+    git fetch origin
+    git reset --hard origin/master;
   )
 done
diff --git a/spec/compiler/build_tables/distinctive_tokens_spec.cc b/spec/compiler/build_tables/distinctive_tokens_spec.cc
index 104cd721..f01d76cb 100644
--- a/spec/compiler/build_tables/distinctive_tokens_spec.cc
+++ b/spec/compiler/build_tables/distinctive_tokens_spec.cc
@@ -27,7 +27,7 @@ describe("recovery_tokens(rule)", []() {
       })),
     };
 
-    AssertThat(recovery_tokens(grammar), Equals<set<Symbol::Index>>({ 1 }));
+    AssertThat(recovery_tokens(grammar), Equals<set<Symbol>>({ Symbol(1, Symbol::Terminal) }));
   });
 });
 
diff --git a/spec/compiler/build_tables/lex_conflict_manager_spec.cc b/spec/compiler/build_tables/lex_conflict_manager_spec.cc
index 7f43e175..3aa75a4c 100644
--- a/spec/compiler/build_tables/lex_conflict_manager_spec.cc
+++ b/spec/compiler/build_tables/lex_conflict_manager_spec.cc
@@ -14,10 +14,10 @@ START_TEST
 describe("LexConflictManager::resolve(new_action, old_action)", []() {
   LexConflictManager conflict_manager;
   bool update;
-  Symbol sym1(0, true);
-  Symbol sym2(1, true);
-  Symbol sym3(2, true);
-  Symbol sym4(3, true);
+  Symbol sym1(0, Symbol::Terminal);
+  Symbol sym2(1, Symbol::Terminal);
+  Symbol sym3(2, Symbol::Terminal);
+  Symbol sym4(3, Symbol::Terminal);
   LexItemSet item_set({ LexItem(sym4, blank() )});
 
   it("favors advance actions over empty accept token actions", [&]() {
diff --git a/spec/compiler/build_tables/lex_item_spec.cc b/spec/compiler/build_tables/lex_item_spec.cc
index 94997956..7042922f 100644
--- a/spec/compiler/build_tables/lex_item_spec.cc
+++ b/spec/compiler/build_tables/lex_item_spec.cc
@@ -14,7 +14,7 @@ START_TEST
 describe("LexItem", []() {
   describe("completion_status()", [&]() {
     it("indicates whether the item is done, its precedence, and whether it is a string", [&]() {
-      LexItem item1(Symbol(0, true), character({ 'a', 'b', 'c' }));
+      LexItem item1(Symbol(0, Symbol::Terminal), character({ 'a', 'b', 'c' }));
       AssertThat(item1.completion_status().is_done, IsFalse());
       AssertThat(item1.completion_status().precedence, Equals(PrecedenceRange()));
       AssertThat(item1.completion_status().is_string, IsFalse());
@@ -23,7 +23,7 @@ describe("LexItem", []() {
       params.precedence = 3;
       params.has_precedence = true;
       params.is_string = 1;
-      LexItem item2(Symbol(0, true), choice({
+      LexItem item2(Symbol(0, Symbol::Terminal), choice({
         metadata(blank(), params),
         character({ 'a', 'b', 'c' })
       }));
@@ -32,7 +32,7 @@ describe("LexItem", []() {
       AssertThat(item2.completion_status().precedence, Equals(PrecedenceRange(3)));
       AssertThat(item2.completion_status().is_string, IsTrue());
 
-      LexItem item3(Symbol(0, true), repeat(character({ ' ', '\t' })));
+      LexItem item3(Symbol(0, Symbol::Terminal), repeat(character({ ' ', '\t' })));
       AssertThat(item3.completion_status().is_done, IsTrue());
       AssertThat(item3.completion_status().precedence, Equals(PrecedenceRange()));
       AssertThat(item3.completion_status().is_string, IsFalse());
@@ -43,7 +43,7 @@ describe("LexItem", []() {
 describe("LexItemSet::transitions()", [&]() {
   it("handles single characters", [&]() {
     LexItemSet item_set({
-      LexItem(Symbol(1), character({ 'x' })),
+      LexItem(Symbol(1, Symbol::NonTerminal), character({ 'x' })),
     });
 
     AssertThat(
@@ -53,7 +53,7 @@ describe("LexItemSet::transitions()", [&]() {
           CharacterSet().include('x'),
           Transition{
             LexItemSet({
-              LexItem(Symbol(1), blank()),
+              LexItem(Symbol(1, Symbol::NonTerminal), blank()),
             }),
             PrecedenceRange(),
             false
@@ -67,7 +67,7 @@ describe("LexItemSet::transitions()", [&]() {
     params.is_main_token = true;
 
     LexItemSet item_set({
-      LexItem(Symbol(1), metadata(character({ 'x' }), params)),
+      LexItem(Symbol(1, Symbol::NonTerminal), metadata(character({ 'x' }), params)),
     });
 
     AssertThat(
@@ -77,7 +77,7 @@ describe("LexItemSet::transitions()", [&]() {
           CharacterSet().include('x'),
           Transition{
             LexItemSet({
-              LexItem(Symbol(1), metadata(blank(), params)),
+              LexItem(Symbol(1, Symbol::NonTerminal), metadata(blank(), params)),
             }),
             PrecedenceRange(),
             true
@@ -88,7 +88,7 @@ describe("LexItemSet::transitions()", [&]() {
 
   it("handles sequences", [&]() {
     LexItemSet item_set({
-      LexItem(Symbol(1), seq({
+      LexItem(Symbol(1, Symbol::NonTerminal), seq({
         character({ 'w' }),
         character({ 'x' }),
         character({ 'y' }),
@@ -103,7 +103,7 @@ describe("LexItemSet::transitions()", [&]() {
           CharacterSet().include('w'),
           Transition{
             LexItemSet({
-              LexItem(Symbol(1), seq({
+              LexItem(Symbol(1, Symbol::NonTerminal), seq({
                 character({ 'x' }),
                 character({ 'y' }),
                 character({ 'z' }),
@@ -118,7 +118,7 @@ describe("LexItemSet::transitions()", [&]() {
 
   it("handles sequences with nested precedence", [&]() {
     LexItemSet item_set({
-      LexItem(Symbol(1), seq({
+      LexItem(Symbol(1, Symbol::NonTerminal), seq({
         prec(3, seq({
           character({ 'v' }),
           prec(4, seq({
@@ -140,7 +140,7 @@ describe("LexItemSet::transitions()", [&]() {
             // The outer precedence is now 'active', because we are within its
             // contained rule.
             LexItemSet({
-              LexItem(Symbol(1), seq({
+              LexItem(Symbol(1, Symbol::NonTerminal), seq({
                 active_prec(3, seq({
                   prec(4, seq({
                     character({ 'w' }),
@@ -168,7 +168,7 @@ describe("LexItemSet::transitions()", [&]() {
           Transition{
             // The inner precedence is now 'active'
             LexItemSet({
-              LexItem(Symbol(1), seq({
+              LexItem(Symbol(1, Symbol::NonTerminal), seq({
                 active_prec(3, seq({
                   active_prec(4, character({ 'x' })),
                   character({ 'y' }) })),
@@ -193,7 +193,7 @@ describe("LexItemSet::transitions()", [&]() {
           CharacterSet().include('x'),
           Transition{
             LexItemSet({
-              LexItem(Symbol(1), seq({
+              LexItem(Symbol(1, Symbol::NonTerminal), seq({
                 active_prec(3, character({ 'y' })),
                 character({ 'z' }),
               })),
@@ -216,7 +216,7 @@ describe("LexItemSet::transitions()", [&]() {
           CharacterSet().include('y'),
           Transition{
             LexItemSet({
-              LexItem(Symbol(1), character({ 'z' })),
+              LexItem(Symbol(1, Symbol::NonTerminal), character({ 'z' })),
             }),
             PrecedenceRange(3),
             false
@@ -227,7 +227,7 @@ describe("LexItemSet::transitions()", [&]() {
 
   it("handles sequences where the left hand side can be blank", [&]() {
     LexItemSet item_set({
-      LexItem(Symbol(1), seq({
+      LexItem(Symbol(1, Symbol::NonTerminal), seq({
         choice({
           character({ 'x' }),
           blank(),
@@ -244,7 +244,7 @@ describe("LexItemSet::transitions()", [&]() {
           CharacterSet().include('x'),
           Transition{
             LexItemSet({
-              LexItem(Symbol(1), seq({
+              LexItem(Symbol(1, Symbol::NonTerminal), seq({
                 character({ 'y' }),
                 character({ 'z' }),
               })),
@@ -257,7 +257,7 @@ describe("LexItemSet::transitions()", [&]() {
           CharacterSet().include('y'),
           Transition{
             LexItemSet({
-              LexItem(Symbol(1), character({ 'z' })),
+              LexItem(Symbol(1, Symbol::NonTerminal), character({ 'z' })),
             }),
             PrecedenceRange(),
             false
@@ -268,7 +268,7 @@ describe("LexItemSet::transitions()", [&]() {
 
   it("handles blanks", [&]() {
     LexItemSet item_set({
-      LexItem(Symbol(1), blank()),
+      LexItem(Symbol(1, Symbol::NonTerminal), blank()),
     });
 
     AssertThat(item_set.transitions(), IsEmpty());
@@ -276,11 +276,11 @@ describe("LexItemSet::transitions()", [&]() {
 
   it("handles repeats", [&]() {
     LexItemSet item_set({
-      LexItem(Symbol(1), repeat1(seq({
+      LexItem(Symbol(1, Symbol::NonTerminal), repeat1(seq({
         character({ 'a' }),
         character({ 'b' }),
       }))),
-      LexItem(Symbol(2), repeat1(character({ 'c' }))),
+      LexItem(Symbol(2, Symbol::NonTerminal), repeat1(character({ 'c' }))),
     });
 
     AssertThat(
@@ -290,14 +290,14 @@ describe("LexItemSet::transitions()", [&]() {
           CharacterSet().include('a'),
           Transition{
             LexItemSet({
-              LexItem(Symbol(1), seq({
+              LexItem(Symbol(1, Symbol::NonTerminal), seq({
                 character({ 'b' }),
                 repeat1(seq({
                   character({ 'a' }),
                   character({ 'b' }),
                 }))
               })),
-              LexItem(Symbol(1), character({ 'b' })),
+              LexItem(Symbol(1, Symbol::NonTerminal), character({ 'b' })),
             }),
             PrecedenceRange(),
             false
@@ -307,8 +307,8 @@ describe("LexItemSet::transitions()", [&]() {
           CharacterSet().include('c'),
           Transition{
             LexItemSet({
-              LexItem(Symbol(2), repeat1(character({ 'c' }))),
-              LexItem(Symbol(2), blank()),
+              LexItem(Symbol(2, Symbol::NonTerminal), repeat1(character({ 'c' }))),
+              LexItem(Symbol(2, Symbol::NonTerminal), blank()),
             }),
             PrecedenceRange(),
             false
@@ -319,7 +319,7 @@ describe("LexItemSet::transitions()", [&]() {
 
   it("handles repeats with precedence", [&]() {
     LexItemSet item_set({
-      LexItem(Symbol(1), active_prec(-1, repeat1(character({ 'a' }))))
+      LexItem(Symbol(1, Symbol::NonTerminal), active_prec(-1, repeat1(character({ 'a' }))))
     });
 
     AssertThat(
@@ -329,8 +329,8 @@ describe("LexItemSet::transitions()", [&]() {
           CharacterSet().include('a'),
           Transition{
             LexItemSet({
-              LexItem(Symbol(1), active_prec(-1, repeat1(character({ 'a' })))),
-              LexItem(Symbol(1), active_prec(-1, blank())),
+              LexItem(Symbol(1, Symbol::NonTerminal), active_prec(-1, repeat1(character({ 'a' })))),
+              LexItem(Symbol(1, Symbol::NonTerminal), active_prec(-1, blank())),
             }),
             PrecedenceRange(-1),
             false
@@ -341,7 +341,7 @@ describe("LexItemSet::transitions()", [&]() {
 
   it("handles choices between overlapping character sets", [&]() {
     LexItemSet item_set({
-      LexItem(Symbol(1), choice({
+      LexItem(Symbol(1, Symbol::NonTerminal), choice({
         active_prec(2, seq({
           character({ 'a', 'b', 'c', 'd'  }),
           character({ 'x' }),
@@ -360,7 +360,7 @@ describe("LexItemSet::transitions()", [&]() {
           CharacterSet().include('a', 'b'),
           Transition{
             LexItemSet({
-              LexItem(Symbol(1), active_prec(2, character({ 'x' }))),
+              LexItem(Symbol(1, Symbol::NonTerminal), active_prec(2, character({ 'x' }))),
             }),
             PrecedenceRange(2),
             false
@@ -370,8 +370,8 @@ describe("LexItemSet::transitions()", [&]() {
           CharacterSet().include('c', 'd'),
           Transition{
             LexItemSet({
-              LexItem(Symbol(1), active_prec(2, character({ 'x' }))),
-              LexItem(Symbol(1), active_prec(3, character({ 'y' }))),
+              LexItem(Symbol(1, Symbol::NonTerminal), active_prec(2, character({ 'x' }))),
+              LexItem(Symbol(1, Symbol::NonTerminal), active_prec(3, character({ 'y' }))),
             }),
             PrecedenceRange(2, 3),
             false
@@ -381,7 +381,7 @@ describe("LexItemSet::transitions()", [&]() {
           CharacterSet().include('e', 'f'),
           Transition{
             LexItemSet({
-              LexItem(Symbol(1), active_prec(3, character({ 'y' }))),
+              LexItem(Symbol(1, Symbol::NonTerminal), active_prec(3, character({ 'y' }))),
             }),
             PrecedenceRange(3),
             false
@@ -392,7 +392,7 @@ describe("LexItemSet::transitions()", [&]() {
 
   it("handles choices between a subset and a superset of characters", [&]() {
     LexItemSet item_set({
-      LexItem(Symbol(1), choice({
+      LexItem(Symbol(1, Symbol::NonTerminal), choice({
         seq({
           character({ 'b', 'c', 'd' }),
           character({ 'x' }),
@@ -411,7 +411,7 @@ describe("LexItemSet::transitions()", [&]() {
           CharacterSet().include('a').include('e', 'f'),
           Transition{
             LexItemSet({
-              LexItem(Symbol(1), character({ 'y' })),
+              LexItem(Symbol(1, Symbol::NonTerminal), character({ 'y' })),
             }),
             PrecedenceRange(),
             false
@@ -421,8 +421,8 @@ describe("LexItemSet::transitions()", [&]() {
           CharacterSet().include('b', 'd'),
           Transition{
             LexItemSet({
-              LexItem(Symbol(1), character({ 'x' })),
-              LexItem(Symbol(1), character({ 'y' })),
+              LexItem(Symbol(1, Symbol::NonTerminal), character({ 'x' })),
+              LexItem(Symbol(1, Symbol::NonTerminal), character({ 'y' })),
             }),
             PrecedenceRange(),
             false
@@ -433,7 +433,7 @@ describe("LexItemSet::transitions()", [&]() {
 
   it("handles choices between whitelisted and blacklisted character sets", [&]() {
     LexItemSet item_set({
-      LexItem(Symbol(1), seq({
+      LexItem(Symbol(1, Symbol::NonTerminal), seq({
         choice({
           character({ '/' }, false),
           seq({
@@ -452,7 +452,7 @@ describe("LexItemSet::transitions()", [&]() {
           CharacterSet().include_all().exclude('/').exclude('\\'),
           Transition{
             LexItemSet({
-              LexItem(Symbol(1), character({ '/' })),
+              LexItem(Symbol(1, Symbol::NonTerminal), character({ '/' })),
             }),
             PrecedenceRange(),
             false
@@ -462,8 +462,8 @@ describe("LexItemSet::transitions()", [&]() {
           CharacterSet().include('\\'),
           Transition{
             LexItemSet({
-              LexItem(Symbol(1), character({ '/' })),
-              LexItem(Symbol(1), seq({ character({ '/' }), character({ '/' }) })),
+              LexItem(Symbol(1, Symbol::NonTerminal), character({ '/' })),
+              LexItem(Symbol(1, Symbol::NonTerminal), seq({ character({ '/' }), character({ '/' }) })),
             }),
             PrecedenceRange(),
             false
@@ -474,8 +474,8 @@ describe("LexItemSet::transitions()", [&]() {
 
   it("handles different items with overlapping character sets", [&]() {
     LexItemSet set1({
-      LexItem(Symbol(1), character({ 'a', 'b', 'c', 'd', 'e', 'f' })),
-      LexItem(Symbol(2), character({ 'e', 'f', 'g', 'h', 'i' }))
+      LexItem(Symbol(1, Symbol::NonTerminal), character({ 'a', 'b', 'c', 'd', 'e', 'f' })),
+      LexItem(Symbol(2, Symbol::NonTerminal), character({ 'e', 'f', 'g', 'h', 'i' }))
     });
 
     AssertThat(set1.transitions(), Equals(LexItemSet::TransitionMap({
@@ -483,7 +483,7 @@ describe("LexItemSet::transitions()", [&]() {
         CharacterSet().include('a', 'd'),
         Transition{
           LexItemSet({
-            LexItem(Symbol(1), blank()),
+            LexItem(Symbol(1, Symbol::NonTerminal), blank()),
           }),
           PrecedenceRange(),
           false
@@ -493,8 +493,8 @@ describe("LexItemSet::transitions()", [&]() {
         CharacterSet().include('e', 'f'),
         Transition{
           LexItemSet({
-            LexItem(Symbol(1), blank()),
-            LexItem(Symbol(2), blank()),
+            LexItem(Symbol(1, Symbol::NonTerminal), blank()),
+            LexItem(Symbol(2, Symbol::NonTerminal), blank()),
           }),
           PrecedenceRange(),
           false
@@ -504,7 +504,7 @@ describe("LexItemSet::transitions()", [&]() {
         CharacterSet().include('g', 'i'),
         Transition{
           LexItemSet({
-            LexItem(Symbol(2), blank()),
+            LexItem(Symbol(2, Symbol::NonTerminal), blank()),
           }),
           PrecedenceRange(),
           false
diff --git a/spec/compiler/build_tables/parse_item_set_builder_spec.cc b/spec/compiler/build_tables/parse_item_set_builder_spec.cc
index a1dd2231..6548f37a 100644
--- a/spec/compiler/build_tables/parse_item_set_builder_spec.cc
+++ b/spec/compiler/build_tables/parse_item_set_builder_spec.cc
@@ -27,26 +27,26 @@ describe("ParseItemSetBuilder", []() {
     SyntaxGrammar grammar{{
       SyntaxVariable("rule0", VariableTypeNamed, {
         Production({
-          {Symbol(1), 0, AssociativityNone},
-          {Symbol(11, true), 0, AssociativityNone},
+          {Symbol(1, Symbol::NonTerminal), 0, AssociativityNone},
+          {Symbol(11, Symbol::Terminal), 0, AssociativityNone},
         }),
       }),
       SyntaxVariable("rule1", VariableTypeNamed, {
         Production({
-          {Symbol(12, true), 0, AssociativityNone},
-          {Symbol(13, true), 0, AssociativityNone},
+          {Symbol(12, Symbol::Terminal), 0, AssociativityNone},
+          {Symbol(13, Symbol::Terminal), 0, AssociativityNone},
         }),
         Production({
-          {Symbol(2), 0, AssociativityNone},
+          {Symbol(2, Symbol::NonTerminal), 0, AssociativityNone},
         })
       }),
       SyntaxVariable("rule2", VariableTypeNamed, {
         Production({
-          {Symbol(14, true), 0, AssociativityNone},
-          {Symbol(15, true), 0, AssociativityNone},
+          {Symbol(14, Symbol::Terminal), 0, AssociativityNone},
+          {Symbol(15, Symbol::Terminal), 0, AssociativityNone},
         })
       }),
-    }, {}, {}};
+    }, {}, {}, {}};
 
     auto production = [&](int variable_index, int production_index) -> const Production & {
       return grammar.variables[variable_index].productions[production_index];
@@ -54,8 +54,8 @@ describe("ParseItemSetBuilder", []() {
 
     ParseItemSet item_set({
       {
-        ParseItem(Symbol(0), production(0, 0), 0),
-        LookaheadSet({ 10 }),
+        ParseItem(Symbol(0, Symbol::NonTerminal), production(0, 0), 0),
+        LookaheadSet({ Symbol(10, Symbol::Terminal) }),
       }
     });
 
@@ -64,20 +64,20 @@ describe("ParseItemSetBuilder", []() {
 
     AssertThat(item_set, Equals(ParseItemSet({
       {
-        ParseItem(Symbol(0), production(0, 0), 0),
-        LookaheadSet({ 10 })
+        ParseItem(Symbol(0, Symbol::NonTerminal), production(0, 0), 0),
+        LookaheadSet({ Symbol(10, Symbol::Terminal) })
+        },
+      {
+        ParseItem(Symbol(1, Symbol::NonTerminal), production(1, 0), 0),
+        LookaheadSet({ Symbol(11, Symbol::Terminal) })
       },
       {
-        ParseItem(Symbol(1), production(1, 0), 0),
-        LookaheadSet({ 11 })
+        ParseItem(Symbol(1, Symbol::NonTerminal), production(1, 1), 0),
+        LookaheadSet({ Symbol(11, Symbol::Terminal) })
       },
       {
-        ParseItem(Symbol(1), production(1, 1), 0),
-        LookaheadSet({ 11 })
-      },
-      {
-        ParseItem(Symbol(2), production(2, 0), 0),
-        LookaheadSet({ 11 })
+        ParseItem(Symbol(2, Symbol::NonTerminal), production(2, 0), 0),
+        LookaheadSet({ Symbol(11, Symbol::Terminal) })
       },
     })));
   });
@@ -86,18 +86,18 @@ describe("ParseItemSetBuilder", []() {
     SyntaxGrammar grammar{{
       SyntaxVariable("rule0", VariableTypeNamed, {
         Production({
-          {Symbol(1), 0, AssociativityNone},
-          {Symbol(11, true), 0, AssociativityNone},
+          {Symbol(1, Symbol::NonTerminal), 0, AssociativityNone},
+          {Symbol(11, Symbol::Terminal), 0, AssociativityNone},
         }),
       }),
       SyntaxVariable("rule1", VariableTypeNamed, {
         Production({
-          {Symbol(12, true), 0, AssociativityNone},
-          {Symbol(13, true), 0, AssociativityNone},
+          {Symbol(12, Symbol::Terminal), 0, AssociativityNone},
+          {Symbol(13, Symbol::Terminal), 0, AssociativityNone},
         }),
         Production({})
       }),
-    }, {}, {}};
+    }, {}, {}, {}};
 
     auto production = [&](int variable_index, int production_index) -> const Production & {
       return grammar.variables[variable_index].productions[production_index];
@@ -105,8 +105,8 @@ describe("ParseItemSetBuilder", []() {
 
     ParseItemSet item_set({
       {
-        ParseItem(Symbol(0), production(0, 0), 0),
-        LookaheadSet({ 10 }),
+        ParseItem(Symbol(0, Symbol::NonTerminal), production(0, 0), 0),
+        LookaheadSet({ Symbol(10, Symbol::Terminal) }),
       }
     });
 
@@ -115,16 +115,16 @@ describe("ParseItemSetBuilder", []() {
 
     AssertThat(item_set, Equals(ParseItemSet({
       {
-        ParseItem(Symbol(0), production(0, 0), 0),
-        LookaheadSet({ 10 })
+        ParseItem(Symbol(0, Symbol::NonTerminal), production(0, 0), 0),
+        LookaheadSet({ Symbol(10, Symbol::Terminal) })
       },
       {
-        ParseItem(Symbol(1), production(1, 0), 0),
-        LookaheadSet({ 11 })
+        ParseItem(Symbol(1, Symbol::NonTerminal), production(1, 0), 0),
+        LookaheadSet({ Symbol(11, Symbol::Terminal) })
       },
       {
-        ParseItem(Symbol(1), production(1, 1), 0),
-        LookaheadSet({ 11 })
+        ParseItem(Symbol(1, Symbol::NonTerminal), production(1, 1), 0),
+        LookaheadSet({ Symbol(11, Symbol::Terminal) })
       },
     })));
   });
diff --git a/spec/compiler/prepare_grammar/expand_repeats_spec.cc b/spec/compiler/prepare_grammar/expand_repeats_spec.cc
index c25ff47c..d8c93a41 100644
--- a/spec/compiler/prepare_grammar/expand_repeats_spec.cc
+++ b/spec/compiler/prepare_grammar/expand_repeats_spec.cc
@@ -13,7 +13,7 @@ describe("expand_repeats", []() {
   it("replaces repeat rules with pairs of recursive rules", [&]() {
     InitialSyntaxGrammar grammar{{
       Variable("rule0", VariableTypeNamed, repeat1(i_token(0))),
-    }, {}, {}};
+    }, {}, {}, {}};
 
     auto result = expand_repeats(grammar);
 
@@ -32,7 +32,7 @@ describe("expand_repeats", []() {
         i_token(10),
         repeat1(i_token(11)),
       })),
-    }, {}, {}};
+    }, {}, {}, {}};
 
     auto result = expand_repeats(grammar);
 
@@ -54,7 +54,7 @@ describe("expand_repeats", []() {
         i_token(10),
         repeat1(i_token(11))
       })),
-    }, {}, {}};
+    }, {}, {}, {}};
 
     auto result = expand_repeats(grammar);
 
@@ -80,7 +80,7 @@ describe("expand_repeats", []() {
         i_token(3),
         repeat1(i_token(4))
       })),
-    }, {}, {}};
+    }, {}, {}, {}};
 
     auto result = expand_repeats(grammar);
 
@@ -106,7 +106,7 @@ describe("expand_repeats", []() {
         repeat1(i_token(10)),
         repeat1(i_token(11)),
       })),
-    }, {}, {}};
+    }, {}, {}, {}};
 
     auto result = expand_repeats(grammar);
 
@@ -130,7 +130,7 @@ describe("expand_repeats", []() {
     InitialSyntaxGrammar grammar{{
       Variable("rule0", VariableTypeNamed, repeat1(i_token(10))),
       Variable("rule1", VariableTypeNamed, repeat1(i_token(11))),
-    }, {}, {}};
+    }, {}, {}, {}};
 
     auto result = expand_repeats(grammar);
 
diff --git a/spec/compiler/prepare_grammar/extract_tokens_spec.cc b/spec/compiler/prepare_grammar/extract_tokens_spec.cc
index 9f871ec4..3aa576df 100644
--- a/spec/compiler/prepare_grammar/extract_tokens_spec.cc
+++ b/spec/compiler/prepare_grammar/extract_tokens_spec.cc
@@ -5,6 +5,7 @@
 #include "compiler/prepare_grammar/extract_tokens.h"
 #include "helpers/rule_helpers.h"
 #include "helpers/equals_pointer.h"
+#include "helpers/stream_methods.h"
 
 START_TEST
 
@@ -28,7 +29,7 @@ describe("extract_tokens", []() {
       Variable("rule_B", VariableTypeNamed, pattern("ij+")),
       Variable("rule_C", VariableTypeNamed, choice({ str("kl"), blank() })),
       Variable("rule_D", VariableTypeNamed, repeat1(i_sym(3)))
-    }, {}, {}});
+    }, {}, {}, {}});
 
     InitialSyntaxGrammar &syntax_grammar = get<0>(result);
     LexicalGrammar &lexical_grammar = get<1>(result);
@@ -91,7 +92,7 @@ describe("extract_tokens", []() {
         i_sym(0),
         str("ab"),
       })),
-    }, {}, {}});
+    }, {}, {}, {}});
 
     InitialSyntaxGrammar &syntax_grammar = get<0>(result);
     LexicalGrammar &lexical_grammar = get<1>(result);
@@ -110,7 +111,7 @@ describe("extract_tokens", []() {
       Variable("rule_A", VariableTypeNamed, seq({ i_sym(1), str("ab") })),
       Variable("rule_B", VariableTypeNamed, str("cd")),
       Variable("rule_C", VariableTypeNamed, seq({ str("ef"), str("cd") })),
-    }, {}, {}});
+    }, {}, {}, {}});
 
     InitialSyntaxGrammar &syntax_grammar = get<0>(result);
     LexicalGrammar &lexical_grammar = get<1>(result);
@@ -129,17 +130,26 @@ describe("extract_tokens", []() {
   });
 
   it("renumbers the grammar's expected conflict symbols based on any moved rules", [&]() {
-    auto result = extract_tokens(InternedGrammar{{
-      Variable("rule_A", VariableTypeNamed, str("ok")),
-      Variable("rule_B", VariableTypeNamed, repeat(i_sym(0))),
-      Variable("rule_C", VariableTypeNamed, repeat(seq({ i_sym(0), i_sym(0) }))),
-    }, { str(" ") }, { { Symbol(1), Symbol(2) } }});
+    auto result = extract_tokens(InternedGrammar{
+      {
+        Variable("rule_A", VariableTypeNamed, str("ok")),
+        Variable("rule_B", VariableTypeNamed, repeat(i_sym(0))),
+        Variable("rule_C", VariableTypeNamed, repeat(seq({ i_sym(0), i_sym(0) }))),
+      },
+      {
+        str(" ")
+      },
+      {
+        { Symbol(1, Symbol::NonTerminal), Symbol(2, Symbol::NonTerminal) }
+      },
+      {}
+    });
 
     InitialSyntaxGrammar &syntax_grammar = get<0>(result);
 
     AssertThat(syntax_grammar.variables.size(), Equals<size_t>(2));
     AssertThat(syntax_grammar.expected_conflicts, Equals(set<set<Symbol>>({
-      { Symbol(0), Symbol(1) },
+      { Symbol(0, Symbol::NonTerminal), Symbol(1, Symbol::NonTerminal) },
     })));
   });
 
@@ -150,7 +160,7 @@ describe("extract_tokens", []() {
       }, {
         str("y"),
         pattern("\\s+"),
-      }, {}});
+      }, {}, {}});
 
       AssertThat(get<2>(result), Equals(CompileError::none()));
 
@@ -167,11 +177,11 @@ describe("extract_tokens", []() {
         Variable("rule_B", VariableTypeNamed, str("y")),
       }, {
         str("y"),
-      }, {}});
+      }, {}, {}});
 
       AssertThat(get<2>(result), Equals(CompileError::none()));
       AssertThat(get<1>(result).separators.size(), Equals<size_t>(0));
-      AssertThat(get<0>(result).extra_tokens, Equals(set<Symbol>({ Symbol(1, true) })));
+      AssertThat(get<0>(result).extra_tokens, Equals(set<Symbol>({ Symbol(1, Symbol::Terminal) })));
     });
 
     it("updates extra symbols according to the new symbol numbers", [&]() {
@@ -181,12 +191,12 @@ describe("extract_tokens", []() {
         Variable("rule_C", VariableTypeNamed, str("z")),
       }, {
         i_sym(2),
-      }, {}});
+      }, {}, {}});
 
       AssertThat(get<2>(result), Equals(CompileError::none()));
 
       AssertThat(get<0>(result).extra_tokens, Equals(set<Symbol>({
-        { Symbol(3, true) },
+        { Symbol(3, Symbol::Terminal) },
       })));
 
       AssertThat(get<1>(result).separators, IsEmpty());
@@ -196,11 +206,11 @@ describe("extract_tokens", []() {
       auto result = extract_tokens(InternedGrammar{{
         Variable("rule_A", VariableTypeNamed, seq({ str("x"), i_sym(1) })),
         Variable("rule_B", VariableTypeNamed, seq({ str("y"), str("z") })),
-      }, { i_sym(1) }, {}});
+      }, { i_sym(1) }, {}, {}});
 
       AssertThat(get<2>(result), !Equals(CompileError::none()));
       AssertThat(get<2>(result), Equals(
-        CompileError(TSCompileErrorTypeInvalidUbiquitousToken,
+        CompileError(TSCompileErrorTypeInvalidExtraToken,
                          "Not a token: rule_B")));
     });
 
@@ -208,14 +218,34 @@ describe("extract_tokens", []() {
       auto result = extract_tokens(InternedGrammar{{
         Variable("rule_A", VariableTypeNamed, str("x")),
         Variable("rule_B", VariableTypeNamed, str("y")),
-      }, { choice({ i_sym(1), blank() }) }, {}});
+      }, { choice({ i_sym(1), blank() }) }, {}, {}});
 
       AssertThat(get<2>(result), !Equals(CompileError::none()));
-      AssertThat(get<2>(result), Equals(
-        CompileError(TSCompileErrorTypeInvalidUbiquitousToken,
-                         "Not a token: (choice (sym 1) (blank))")));
+      AssertThat(get<2>(result), Equals(CompileError(
+        TSCompileErrorTypeInvalidExtraToken,
+        "Not a token: (choice (non-terminal 1) (blank))"
+      )));
     });
   });
+
+  it("returns an error if an external token has the same name as a non-terminal rule", [&]() {
+    auto result = extract_tokens(InternedGrammar{
+      {
+        Variable("rule_A", VariableTypeNamed, seq({ str("x"), i_sym(1) })),
+        Variable("rule_B", VariableTypeNamed, seq({ str("y"), str("z") })),
+      },
+      {},
+      {},
+      {
+        ExternalToken {"rule_A", VariableTypeNamed, Symbol(0, Symbol::NonTerminal)}
+      }
+    });
+
+    AssertThat(get<2>(result), Equals(CompileError(
+      TSCompileErrorTypeInvalidExternalToken,
+      "Name 'rule_A' cannot be used for both an external token and a non-terminal rule"
+    )));
+  });
 });
 
 END_TEST
diff --git a/spec/compiler/prepare_grammar/flatten_grammar_spec.cc b/spec/compiler/prepare_grammar/flatten_grammar_spec.cc
index 3efd4e03..823da8e6 100644
--- a/spec/compiler/prepare_grammar/flatten_grammar_spec.cc
+++ b/spec/compiler/prepare_grammar/flatten_grammar_spec.cc
@@ -36,19 +36,19 @@ describe("flatten_grammar", []() {
     AssertThat(result.type, Equals(VariableTypeNamed));
     AssertThat(result.productions, Equals(vector<Production>({
       Production({
-        {Symbol(1), 0, AssociativityNone},
-        {Symbol(2), 101, AssociativityLeft},
-        {Symbol(3), 102, AssociativityRight},
-        {Symbol(4), 101, AssociativityLeft},
-        {Symbol(6), 0, AssociativityNone},
-        {Symbol(7), 0, AssociativityNone},
+        {Symbol(1, Symbol::NonTerminal), 0, AssociativityNone},
+        {Symbol(2, Symbol::NonTerminal), 101, AssociativityLeft},
+        {Symbol(3, Symbol::NonTerminal), 102, AssociativityRight},
+        {Symbol(4, Symbol::NonTerminal), 101, AssociativityLeft},
+        {Symbol(6, Symbol::NonTerminal), 0, AssociativityNone},
+        {Symbol(7, Symbol::NonTerminal), 0, AssociativityNone},
       }),
       Production({
-        {Symbol(1), 0, AssociativityNone},
-        {Symbol(2), 101, AssociativityLeft},
-        {Symbol(5), 101, AssociativityLeft},
-        {Symbol(6), 0, AssociativityNone},
-        {Symbol(7), 0, AssociativityNone},
+        {Symbol(1, Symbol::NonTerminal), 0, AssociativityNone},
+        {Symbol(2, Symbol::NonTerminal), 101, AssociativityLeft},
+        {Symbol(5, Symbol::NonTerminal), 101, AssociativityLeft},
+        {Symbol(6, Symbol::NonTerminal), 0, AssociativityNone},
+        {Symbol(7, Symbol::NonTerminal), 0, AssociativityNone},
       })
     })))
   });
@@ -65,8 +65,8 @@ describe("flatten_grammar", []() {
 
     AssertThat(result.productions, Equals(vector<Production>({
       Production({
-        {Symbol(1), 101, AssociativityLeft},
-        {Symbol(2), 101, AssociativityLeft},
+        {Symbol(1, Symbol::NonTerminal), 101, AssociativityLeft},
+        {Symbol(2, Symbol::NonTerminal), 101, AssociativityLeft},
       })
     })))
 
@@ -80,7 +80,7 @@ describe("flatten_grammar", []() {
 
     AssertThat(result.productions, Equals(vector<Production>({
       Production({
-        {Symbol(1), 101, AssociativityLeft},
+        {Symbol(1, Symbol::NonTerminal), 101, AssociativityLeft},
       })
     })))
   });
diff --git a/spec/compiler/prepare_grammar/intern_symbols_spec.cc b/spec/compiler/prepare_grammar/intern_symbols_spec.cc
index 4c417e57..9142eab6 100644
--- a/spec/compiler/prepare_grammar/intern_symbols_spec.cc
+++ b/spec/compiler/prepare_grammar/intern_symbols_spec.cc
@@ -3,8 +3,10 @@
 #include "compiler/grammar.h"
 #include "compiler/rules/named_symbol.h"
 #include "compiler/rules/symbol.h"
+#include "compiler/rules/built_in_symbols.h"
 #include "helpers/equals_pointer.h"
 #include "helpers/rule_helpers.h"
+#include "helpers/stream_methods.h"
 
 START_TEST
 
@@ -17,7 +19,7 @@ describe("intern_symbols", []() {
       { "x", choice({ sym("y"), sym("_z") }) },
       { "y", sym("_z") },
       { "_z", str("stuff") }
-    }, {}, {}};
+    }, {}, {}, {}};
 
     auto result = intern_symbols(grammar);
 
@@ -33,7 +35,7 @@ describe("intern_symbols", []() {
     it("returns an error", []() {
       Grammar grammar{{
         { "x", sym("y") },
-      }, {}, {}};
+      }, {}, {}, {}};
 
       auto result = intern_symbols(grammar);
 
@@ -48,7 +50,7 @@ describe("intern_symbols", []() {
       { "z", str("stuff") }
     }, {
       sym("z")
-    }, {}};
+    }, {}, {}};
 
     auto result = intern_symbols(grammar);
 
@@ -56,6 +58,32 @@ describe("intern_symbols", []() {
     AssertThat(result.first.extra_tokens.size(), Equals<size_t>(1));
     AssertThat(*result.first.extra_tokens.begin(), EqualsPointer(i_sym(2)));
   });
+
+  it("records any rule names that match external token names", [&]() {
+    Grammar grammar{{
+      { "x", choice({ sym("y"), sym("z") }) },
+      { "y", sym("z") },
+      { "z", str("stuff") }
+    }, {}, {}, {
+      "w",
+      "z"
+    }};
+
+    auto result = intern_symbols(grammar);
+
+    AssertThat(result.first.external_tokens, Equals(vector<ExternalToken>({
+      {
+        "w",
+        VariableTypeNamed,
+        rules::NONE()
+      },
+      {
+        "z",
+        VariableTypeNamed,
+        Symbol(2, Symbol::NonTerminal)
+      }
+    })))
+  });
 });
 
 END_TEST
diff --git a/spec/compiler/rules/repeat_spec.cc b/spec/compiler/rules/repeat_spec.cc
index 63680563..9c84c8e5 100644
--- a/spec/compiler/rules/repeat_spec.cc
+++ b/spec/compiler/rules/repeat_spec.cc
@@ -9,7 +9,7 @@ START_TEST
 describe("Repeat", []() {
   describe("constructing repeats", [&]() {
     it("doesn't create redundant repeats", [&]() {
-      auto sym = make_shared<Symbol>(1);
+      auto sym = make_shared<Symbol>(1, Symbol::NonTerminal);
       auto repeat = Repeat::build(sym);
       auto outer_repeat = Repeat::build(repeat);
 
diff --git a/spec/fixtures/error_corpus/python_errors.txt b/spec/fixtures/error_corpus/python_errors.txt
new file mode 100644
index 00000000..7ff9f240
--- /dev/null
+++ b/spec/fixtures/error_corpus/python_errors.txt
@@ -0,0 +1,29 @@
+==========================================
+errors in if statements
+==========================================
+
+if a is:
+    print b
+    print c
+
+---
+
+(module
+  (if_statement (identifier) (ERROR)
+    (print_statement (identifier))
+    (print_statement (identifier))))
+
+==========================================
+errors in function definitions
+==========================================
+
+def a()::
+  b
+  c
+
+---
+
+(module
+  (function_definition (identifier) (parameters) (ERROR)
+    (expression_statement (identifier))
+    (expression_statement (identifier))))
diff --git a/spec/fixtures/external_scanners/extra_external_tokens.c b/spec/fixtures/external_scanners/extra_external_tokens.c
new file mode 100644
index 00000000..5c409639
--- /dev/null
+++ b/spec/fixtures/external_scanners/extra_external_tokens.c
@@ -0,0 +1,42 @@
+#include <tree_sitter/parser.h>
+
+enum {
+  COMMENT,
+};
+
+void *tree_sitter_extra_external_tokens_external_scanner_create() {
+  return NULL;
+}
+
+void tree_sitter_extra_external_tokens_external_scanner_reset(void *payload) {
+}
+
+bool tree_sitter_extra_external_tokens_external_scanner_serialize(void *payload, TSExternalTokenState state) {
+  return true;
+}
+
+void tree_sitter_extra_external_tokens_external_scanner_deserialize(void *payload, TSExternalTokenState state) {
+}
+
+bool tree_sitter_extra_external_tokens_external_scanner_scan(
+  void *payload, TSLexer *lexer, const bool *whitelist) {
+
+  while (lexer->lookahead == ' ') {
+    lexer->advance(lexer, true);
+  }
+
+  if (lexer->lookahead == '#') {
+    lexer->advance(lexer, false);
+    while (lexer->lookahead != '\n') {
+      lexer->advance(lexer, false);
+    }
+
+    lexer->result_symbol = COMMENT;
+    return true;
+  }
+
+  return false;
+}
+
+void tree_sitter_extra_external_tokens_external_scanner_destroy(void *payload) {
+}
diff --git a/spec/fixtures/external_scanners/percent_strings.c b/spec/fixtures/external_scanners/percent_strings.c
new file mode 100644
index 00000000..9f68696e
--- /dev/null
+++ b/spec/fixtures/external_scanners/percent_strings.c
@@ -0,0 +1,118 @@
+#include <stdbool.h>
+#include <tree_sitter/parser.h>
+
+enum {
+  percent_string,
+  percent_string_start,
+  percent_string_end
+};
+
+typedef struct {
+  int32_t open_delimiter;
+  int32_t close_delimiter;
+  uint32_t depth;
+} Scanner;
+
+void *tree_sitter_external_scanner_example_external_scanner_create() {
+  Scanner *scanner = malloc(sizeof(Scanner));
+  *scanner = (Scanner){
+    .open_delimiter = 0,
+    .close_delimiter = 0,
+    .depth = 0
+  };
+  return scanner;
+}
+
+bool tree_sitter_external_scanner_example_external_scanner_scan(
+  void *payload, TSLexer *lexer, const bool *whitelist) {
+  Scanner *scanner = payload;
+
+  if (whitelist[percent_string]) {
+    while (lexer->lookahead == ' ' ||
+           lexer->lookahead == '\t' ||
+           lexer->lookahead == '\n') {
+      lexer->advance(lexer, true);
+    }
+
+    if (lexer->lookahead != '%') return false;
+    lexer->advance(lexer, false);
+
+    switch (lexer->lookahead) {
+      case '(':
+        scanner->open_delimiter = '(';
+        scanner->close_delimiter = ')';
+        scanner->depth = 1;
+        break;
+      case '[':
+        scanner->open_delimiter = '[';
+        scanner->close_delimiter = ']';
+        scanner->depth = 1;
+        break;
+      case '{':
+        scanner->open_delimiter = '{';
+        scanner->close_delimiter = '}';
+        scanner->depth = 1;
+        break;
+      default:
+        return false;
+    }
+
+    lexer->advance(lexer, false);
+
+    for (;;) {
+      if (scanner->depth == 0) {
+        lexer->result_symbol = percent_string;
+        return true;
+      }
+
+      if (lexer->lookahead == scanner->open_delimiter) {
+        scanner->depth++;
+      } else if (lexer->lookahead == scanner->close_delimiter) {
+        scanner->depth--;
+      } else if (lexer->lookahead == '#') {
+        lexer->advance(lexer, false);
+        if (lexer->lookahead == '{') {
+          lexer->advance(lexer, false);
+          lexer->result_symbol = percent_string_start;
+          return true;
+        }
+      }
+
+      lexer->advance(lexer, false);
+    }
+  } else if (whitelist[percent_string_end]) {
+    if (lexer->lookahead != '}') return false;
+    lexer->advance(lexer, false);
+
+    for (;;) {
+      if (scanner->depth == 0) {
+        lexer->result_symbol = percent_string_end;
+        return true;
+      }
+
+      if (lexer->lookahead == scanner->open_delimiter) {
+        scanner->depth++;
+      } else if (lexer->lookahead == scanner->close_delimiter) {
+        scanner->depth--;
+      }
+
+      lexer->advance(lexer, false);
+    }
+  }
+
+  return false;
+}
+
+void tree_sitter_external_scanner_example_external_scanner_reset(void *payload) {
+}
+
+bool tree_sitter_external_scanner_example_external_scanner_serialize(void *payload, TSExternalTokenState state) {
+  return true;
+}
+
+void tree_sitter_external_scanner_example_external_scanner_deserialize(void *payload, TSExternalTokenState state) {
+}
+
+void tree_sitter_external_scanner_example_external_scanner_destroy(void *payload) {
+  free(payload);
+}
diff --git a/spec/fixtures/external_scanners/shared_external_tokens.c b/spec/fixtures/external_scanners/shared_external_tokens.c
new file mode 100644
index 00000000..0bee00d8
--- /dev/null
+++ b/spec/fixtures/external_scanners/shared_external_tokens.c
@@ -0,0 +1,63 @@
+#include <stdbool.h>
+#include <tree_sitter/parser.h>
+
+enum {
+  STRING,
+  LINE_BREAK
+};
+
+void *tree_sitter_shared_external_tokens_external_scanner_create() {
+  return NULL;
+}
+
+void tree_sitter_shared_external_tokens_external_scanner_reset(void *payload) {
+}
+
+bool tree_sitter_shared_external_tokens_external_scanner_serialize(void *payload, TSExternalTokenState state) {
+  return true;
+}
+
+void tree_sitter_shared_external_tokens_external_scanner_deserialize(void *payload, TSExternalTokenState state) {
+}
+
+bool tree_sitter_shared_external_tokens_external_scanner_scan(
+  void *payload, TSLexer *lexer, const bool *whitelist) {
+
+  // If a line-break is a valid lookahead token, only skip spaces.
+  if (whitelist[LINE_BREAK]) {
+    while (lexer->lookahead == ' ') {
+      lexer->advance(lexer, true);
+    }
+
+    if (lexer->lookahead == '\n') {
+      lexer->advance(lexer, false);
+      lexer->result_symbol = LINE_BREAK;
+      return true;
+    }
+  }
+
+  // If a line-break is not a valid lookahead token, skip line breaks as well
+  // as spaces.
+  if (whitelist[STRING]) {
+    while (lexer->lookahead == ' ' || lexer->lookahead == '\n') {
+      lexer->advance(lexer, true);
+    }
+
+    if (lexer->lookahead == '\'') {
+      lexer->advance(lexer, false);
+
+      while (lexer->lookahead != '\'') {
+        lexer->advance(lexer, false);
+      }
+
+      lexer->advance(lexer, false);
+      lexer->result_symbol = STRING;
+      return true;
+    }
+  }
+
+  return false;
+}
+
+void tree_sitter_shared_external_tokens_external_scanner_destroy(void *payload) {
+}
diff --git a/spec/helpers/dedent.h b/spec/helpers/dedent.h
new file mode 100644
index 00000000..1387acf9
--- /dev/null
+++ b/spec/helpers/dedent.h
@@ -0,0 +1,12 @@
+#include "compiler/util/string_helpers.h"
+#include <string>
+
+static std::string dedent(std::string input) {
+  size_t indent_level = input.find_first_not_of("\n ") - input.find_first_not_of("\n");
+  std::string whitespace = "\n" + std::string(indent_level, ' ');
+  tree_sitter::util::str_replace(&input, whitespace, "\n");
+  return input.substr(
+    input.find_first_not_of("\n "),
+    input.find_last_not_of("\n ") + 1
+  );
+}
diff --git a/spec/helpers/load_language.cc b/spec/helpers/load_language.cc
index 9409da42..c59eca95 100644
--- a/spec/helpers/load_language.cc
+++ b/spec/helpers/load_language.cc
@@ -28,10 +28,11 @@ const char *libcompiler_path =
   "out/Test/libcompiler.a";
 #endif
 
-static std::string run_cmd(const char *cmd, const char *args[]) {
+static std::string run_command(const char *cmd, const char *args[]) {
   int child_pid = fork();
-  if (child_pid < 0)
+  if (child_pid < 0) {
     return "fork failed";
+  }
 
   if (child_pid == 0) {
     close(0);
@@ -39,7 +40,6 @@ static std::string run_cmd(const char *cmd, const char *args[]) {
     dup2(2, 1);
     dup2(1, 2);
     execvp(cmd, (char * const * )args);
-    return "";
   }
 
   int status;
@@ -47,12 +47,16 @@ static std::string run_cmd(const char *cmd, const char *args[]) {
     waitpid(child_pid, &status, 0);
   } while (!WIFEXITED(status));
 
-  if (WEXITSTATUS(status) == 0)
+  if (WEXITSTATUS(status) == 0) {
     return "";
-  else
+  } else {
     return "command failed";
+  }
+}
 
-  return "";
+static bool file_exists(const string &path) {
+  struct stat file_stat;
+  return stat(path.c_str(), &file_stat) == 0;
 }
 
 static int get_modified_time(const string &path) {
@@ -67,46 +71,46 @@ static int get_modified_time(const string &path) {
 
 const TSLanguage *load_language(const string &source_filename,
                                 const string &lib_filename,
-                                const string &language_name) {
-  string language_function_name = "ts_language_" + language_name;
+                                const string &language_name,
+                                string external_scanner_filename = "") {
+  string language_function_name = "tree_sitter_" + language_name;
   string header_dir = getenv("PWD") + string("/include");
   int source_mtime = get_modified_time(source_filename);
   int header_mtime = get_modified_time(header_dir + "/tree_sitter/parser.h");
   int lib_mtime = get_modified_time(lib_filename);
+  int external_scanner_mtime = get_modified_time(external_scanner_filename);
 
-  if (!header_mtime || lib_mtime < header_mtime || lib_mtime < source_mtime) {
-    string obj_filename = lib_filename + ".o";
-    const char *compiler_name = getenv("CC");
-    if (!compiler_name) {
-      compiler_name = "gcc";
-    }
+  if (!header_mtime || lib_mtime < header_mtime || lib_mtime < source_mtime ||
+      lib_mtime < external_scanner_mtime) {
+    const char *compiler_name = getenv("CXX");
+    if (!compiler_name) compiler_name = "c++";
 
-    const char *compile_argv[] = {
-      compiler_name,
-      "-x", "c",
-      "-fPIC",
-      "-g",
-      "-I", header_dir.c_str(),
-      "-c", source_filename.c_str(),
-      "-o", obj_filename.c_str(),
-      NULL
-    };
-    string compile_error = run_cmd("gcc", compile_argv);
-    if (!compile_error.empty()) {
-      AssertThat(string(compile_error), IsEmpty());
-      return nullptr;
-    }
-
-    const char *link_argv[] = {
+    vector<const char *> compile_args = {
       compiler_name,
       "-shared",
-      "-Wl", obj_filename.c_str(),
+      "-fPIC",
+      "-I", header_dir.c_str(),
       "-o", lib_filename.c_str(),
-      NULL
+      "-x", "c",
+      source_filename.c_str()
     };
-    string link_error = run_cmd("gcc", link_argv);
-    if (!link_error.empty()) {
-      AssertThat(link_error, IsEmpty());
+
+    if (!external_scanner_filename.empty()) {
+      compile_args.push_back("-g");
+      string extension = external_scanner_filename.substr(external_scanner_filename.rfind("."));
+      if (extension == ".c") {
+        compile_args.push_back("-xc");
+      } else {
+        compile_args.push_back("-xc++");
+      }
+      compile_args.push_back(external_scanner_filename.c_str());
+    }
+
+    compile_args.push_back(nullptr);
+
+    string compile_error = run_command(compiler_name, compile_args.data());
+    if (!compile_error.empty()) {
+      AssertThat(string(compile_error), IsEmpty());
       return nullptr;
     }
   }
@@ -118,19 +122,19 @@ const TSLanguage *load_language(const string &source_filename,
     return nullptr;
   }
 
-  void *symbol_value = dlsym(parser_lib, language_function_name.c_str());
-  if (!symbol_value) {
+  void *language_function = dlsym(parser_lib, language_function_name.c_str());
+  if (!language_function) {
     std::string message(dlerror());
     AssertThat(message, IsEmpty());
     return nullptr;
   }
 
-  typedef TSLanguage * (* LanguageFunction)();
-  LanguageFunction language_fn = reinterpret_cast<LanguageFunction>(symbol_value);
-  return language_fn();
+  return reinterpret_cast<TSLanguage *(*)()>(language_function)();
 }
 
-const TSLanguage *load_compile_result(const string &name, const TSCompileResult &compile_result) {
+const TSLanguage *load_compile_result(const string &name,
+                                      const TSCompileResult &compile_result,
+                                      string external_scanner_path) {
   if (compile_result.error_type != TSCompileErrorTypeNone) {
     Assert::Failure(string("Compilation failed ") + compile_result.error_message);
     return nullptr;
@@ -146,7 +150,7 @@ const TSLanguage *load_compile_result(const string &name, const TSCompileResult
   source_file << compile_result.code;
   source_file.close();
 
-  const TSLanguage *language = load_language(source_filename, lib_filename, name);
+  auto language = load_language(source_filename, lib_filename, name, external_scanner_path);
   free(compile_result.code);
   return language;
 }
@@ -158,6 +162,10 @@ const TSLanguage *get_test_language(const string &language_name) {
   string language_dir = string("spec/fixtures/grammars/") + language_name;
   string grammar_filename = language_dir + "/src/grammar.json";
   string parser_filename = language_dir + "/src/parser.c";
+  string external_scanner_filename = language_dir + "/src/scanner.cc";
+  if (!file_exists(external_scanner_filename)) {
+    external_scanner_filename = "";
+  }
 
   int grammar_mtime = get_modified_time(grammar_filename);
   if (!grammar_mtime)
@@ -192,7 +200,7 @@ const TSLanguage *get_test_language(const string &language_name) {
 
   mkdir("out/tmp", 0777);
   string lib_filename = "out/tmp/" + language_name + ".so";
-  const TSLanguage *language = load_language(parser_filename, lib_filename, language_name);
+  const TSLanguage *language = load_language(parser_filename, lib_filename, language_name, external_scanner_filename);
   loaded_languages[language_name] = language;
   return language;
 };
diff --git a/spec/helpers/load_language.h b/spec/helpers/load_language.h
index 41b1458e..41d8b739 100644
--- a/spec/helpers/load_language.h
+++ b/spec/helpers/load_language.h
@@ -5,7 +5,8 @@
 #include "tree_sitter/runtime.h"
 #include <string>
 
-const TSLanguage *load_compile_result(const std::string &, const TSCompileResult &);
+const TSLanguage *load_compile_result(const std::string &, const TSCompileResult &,
+                                      std::string external_scanner_path = "");
 const TSLanguage *get_test_language(const std::string &language_name);
 
 #endif  // HELPERS_LOAD_LANGUAGE_H_
diff --git a/spec/helpers/point_helpers.cc b/spec/helpers/point_helpers.cc
index e9c99259..60f4f9a7 100644
--- a/spec/helpers/point_helpers.cc
+++ b/spec/helpers/point_helpers.cc
@@ -15,7 +15,9 @@ bool operator==(const TSRange &left, const TSRange &right) {
 }
 
 bool operator==(const Length &left, const Length &right) {
-  return length_eq(left, right);
+  return left.bytes == right.bytes &&
+    left.chars == right.chars &&
+    left.extent == right.extent;
 }
 
 bool operator<(const TSPoint &left, const TSPoint &right) {
diff --git a/spec/helpers/rule_helpers.cc b/spec/helpers/rule_helpers.cc
index 8bf32360..0b010d2e 100644
--- a/spec/helpers/rule_helpers.cc
+++ b/spec/helpers/rule_helpers.cc
@@ -9,6 +9,7 @@ namespace tree_sitter {
   using std::ostream;
   using std::string;
   using std::to_string;
+  using rules::Symbol;
 
   rule_ptr character(const set<uint32_t> &ranges) {
     return character(ranges, true);
@@ -28,11 +29,11 @@ namespace tree_sitter {
   }
 
   rule_ptr i_sym(size_t index) {
-    return make_shared<rules::Symbol>(index);
+    return make_shared<Symbol>(index, Symbol::NonTerminal);
   }
 
   rule_ptr i_token(size_t index) {
-    return make_shared<rules::Symbol>(index, true);
+    return make_shared<Symbol>(index, Symbol::Terminal);
   }
 
   rule_ptr metadata(rule_ptr rule, rules::MetadataParams params) {
diff --git a/spec/helpers/scope_sequence.cc b/spec/helpers/scope_sequence.cc
index 87e059dc..d6e2e3b1 100644
--- a/spec/helpers/scope_sequence.cc
+++ b/spec/helpers/scope_sequence.cc
@@ -23,20 +23,21 @@ static void append_to_scope_sequence(ScopeSequence *sequence,
                                      ScopeStack *current_scopes,
                                      TSNode node, TSDocument *document,
                                      const std::string &text) {
-  append_text_to_scope_sequence(sequence, current_scopes, text, ts_node_start_byte(node) - sequence->size());
+  append_text_to_scope_sequence(
+    sequence, current_scopes, text, ts_node_start_byte(node) - sequence->size()
+  );
 
-  string scope = ts_node_type(node, document);
-  current_scopes->push_back(scope);
-  size_t child_count = ts_node_child_count(node);
-  if (child_count > 0) {
-    for (size_t i = 0; i < child_count; i++) {
-      TSNode child = ts_node_child(node, i);
-      append_to_scope_sequence(sequence, current_scopes, child, document, text);
-    }
-  } else {
-    size_t length = ts_node_end_byte(node) - ts_node_start_byte(node);
-    append_text_to_scope_sequence(sequence, current_scopes, text, length);
+  current_scopes->push_back(ts_node_type(node, document));
+
+  for (size_t i = 0, n = ts_node_child_count(node); i < n; i++) {
+    TSNode child = ts_node_child(node, i);
+    append_to_scope_sequence(sequence, current_scopes, child, document, text);
   }
+
+  append_text_to_scope_sequence(
+    sequence, current_scopes, text, ts_node_end_byte(node) - sequence->size()
+  );
+
   current_scopes->pop_back();
 }
 
diff --git a/spec/helpers/stream_methods.cc b/spec/helpers/stream_methods.cc
index 4d411d66..a4b275ea 100644
--- a/spec/helpers/stream_methods.cc
+++ b/spec/helpers/stream_methods.cc
@@ -10,16 +10,7 @@ namespace tree_sitter {
 
 ostream &operator<<(ostream &stream, const Grammar &grammar) {
   stream << string("#<grammar");
-  stream << string(" rules: {");
-  bool started = false;
-  for (auto pair : grammar.rules) {
-    if (started)
-      stream << string(", ");
-    stream << pair.first;
-    stream << string(" => ");
-    stream << pair.second;
-    started = true;
-  }
+  stream << " rules: " << grammar.rules;
   return stream << string("}>");
 }
 
@@ -85,6 +76,11 @@ ostream &operator<<(ostream &stream, const ParseState &state) {
   return stream << string(">");
 }
 
+ostream &operator<<(ostream &stream, const ExternalToken &external_token) {
+  return stream << "{" << external_token.name << ", " << external_token.type <<
+    "," << external_token.corresponding_internal_token << "}";
+}
+
 ostream &operator<<(ostream &stream, const ProductionStep &step) {
   stream << "(symbol: " << step.symbol << ", precedence:" << to_string(step.precedence);
   stream << ", associativity: ";
diff --git a/spec/helpers/stream_methods.h b/spec/helpers/stream_methods.h
index 515060eb..28b201c3 100644
--- a/spec/helpers/stream_methods.h
+++ b/spec/helpers/stream_methods.h
@@ -97,6 +97,7 @@ struct AdvanceAction;
 struct AcceptTokenAction;
 class ParseAction;
 class ParseState;
+struct ExternalToken;
 struct ProductionStep;
 struct PrecedenceRange;
 
@@ -110,6 +111,7 @@ ostream &operator<<(ostream &, const AdvanceAction &);
 ostream &operator<<(ostream &, const AcceptTokenAction &);
 ostream &operator<<(ostream &, const ParseAction &);
 ostream &operator<<(ostream &, const ParseState &);
+ostream &operator<<(ostream &, const ExternalToken &);
 ostream &operator<<(ostream &, const ProductionStep &);
 ostream &operator<<(ostream &, const PrecedenceRange &);
 
diff --git a/spec/integration/compile_grammar_spec.cc b/spec/integration/compile_grammar_spec.cc
index d41d76e4..ed2109c2 100644
--- a/spec/integration/compile_grammar_spec.cc
+++ b/spec/integration/compile_grammar_spec.cc
@@ -1,19 +1,11 @@
 #include "spec_helper.h"
 #include "runtime/alloc.h"
 #include "helpers/load_language.h"
+#include "helpers/stderr_logger.h"
+#include "helpers/dedent.h"
 #include "compiler/util/string_helpers.h"
 #include <map>
 
-static string dedent(string input) {
-  size_t indent_level = input.find_first_not_of("\n ") - input.find_first_not_of("\n");
-  string whitespace = "\n" + string(indent_level, ' ');
-  util::str_replace(&input, whitespace, "\n");
-  return input.substr(
-    input.find_first_not_of("\n "),
-    input.find_last_not_of("\n ") + 1
-  );
-}
-
 static string fill_template(string input, map<string, string> parameters) {
   string result = input;
   for (const auto &pair : parameters) {
@@ -507,6 +499,190 @@ describe("compile_grammar", []() {
     });
   });
 
+  describe("external scanners", [&]() {
+    it("can tokenize using arbitrary user-defined scanner functions", [&]() {
+      string grammar = R"JSON({
+        "name": "external_scanner_example",
+
+        "externals": [
+          "_percent_string",
+          "_percent_string_start",
+          "_percent_string_end"
+        ],
+
+        "extras": [
+          {"type": "PATTERN", "value": "\\s"}
+        ],
+
+        "rules": {
+          "expression": {
+            "type": "CHOICE",
+            "members": [
+              {"type": "SYMBOL", "name": "string"},
+              {"type": "SYMBOL", "name": "sum"},
+              {"type": "SYMBOL", "name": "identifier"}
+            ]
+          },
+
+          "sum": {
+            "type": "PREC_LEFT",
+            "value": 0,
+            "content": {
+              "type": "SEQ",
+              "members": [
+                {"type": "SYMBOL", "name": "expression"},
+                {"type": "STRING", "value": "+"},
+                {"type": "SYMBOL", "name": "expression"}
+              ]
+            }
+          },
+
+          "string": {
+            "type": "CHOICE",
+            "members": [
+              {"type": "SYMBOL", "name": "_percent_string"},
+              {
+                "type": "SEQ",
+                "members": [
+                  {"type": "SYMBOL", "name": "_percent_string_start"},
+                  {"type": "SYMBOL", "name": "expression"},
+                  {"type": "SYMBOL", "name": "_percent_string_end"}
+                ]
+              },
+            ]
+          },
+
+          "identifier": {
+            "type": "PATTERN",
+            "value": "\\a+"
+          }
+        }
+      })JSON";
+
+      TSCompileResult result = ts_compile_grammar(grammar.c_str());
+      AssertThat(result.error_message, IsNull());
+
+      ts_document_set_language(document, load_compile_result(
+        "external_scanner_example",
+        result,
+        "spec/fixtures/external_scanners/percent_strings.c"
+      ));
+
+      ts_document_set_input_string(document, "x + %(sup (external) scanner?)");
+      ts_document_parse(document);
+      assert_root_node("(expression (sum (expression (identifier)) (expression (string))))");
+
+      ts_document_set_input_string(document, "%{sup {} #{x + y} {} scanner?}");
+      ts_document_parse(document);
+      assert_root_node("(expression (string (expression (sum (expression (identifier)) (expression (identifier))))))");
+    });
+
+    it("allows external scanners to refer to tokens that are defined internally", [&]() {
+      string grammar = R"JSON({
+        "name": "shared_external_tokens",
+
+        "externals": [
+          "string",
+          "line_break"
+        ],
+
+        "extras": [
+          {"type": "PATTERN", "value": "\\s"}
+        ],
+
+        "rules": {
+          "statement": {
+            "type": "SEQ",
+            "members": [
+              {"type": "SYMBOL", "name": "_expression"},
+              {"type": "SYMBOL", "name": "_expression"},
+              {"type": "SYMBOL", "name": "line_break"}
+            ]
+          },
+
+          "_expression": {
+            "type": "CHOICE",
+            "members": [
+              {"type": "SYMBOL", "name": "string"},
+              {"type": "SYMBOL", "name": "variable"},
+              {"type": "SYMBOL", "name": "number"}
+            ]
+          },
+
+          "variable": {"type": "PATTERN", "value": "\\a+"},
+          "number": {"type": "PATTERN", "value": "\\d+"},
+          "line_break": {"type": "STRING", "value": "\n"}
+        }
+      })JSON";
+
+      TSCompileResult result = ts_compile_grammar(grammar.c_str());
+      AssertThat(result.error_message, IsNull());
+
+      ts_document_set_language(document, load_compile_result(
+        "shared_external_tokens",
+        result,
+        "spec/fixtures/external_scanners/shared_external_tokens.c"
+      ));
+
+      ts_document_set_input_string(document, "a b\n");
+      ts_document_parse(document);
+      assert_root_node("(statement (variable) (variable) (line_break))");
+
+      ts_document_set_input_string(document, "a \nb\n");
+      ts_document_parse(document);
+      assert_root_node("(statement (variable) (variable) (line_break))");
+
+      ts_document_set_input_string(document, "'hello' 'world'\n");
+      ts_document_parse(document);
+      assert_root_node("(statement (string) (string) (line_break))");
+
+      ts_document_set_input_string(document, "'hello' \n'world'\n");
+      ts_document_parse(document);
+      assert_root_node("(statement (string) (string) (line_break))");
+    });
+
+    it("allows external tokens to be used as extras", [&]() {
+      string grammar = R"JSON({
+        "name": "extra_external_tokens",
+
+        "externals": [
+          "comment"
+        ],
+
+        "extras": [
+          {"type": "PATTERN", "value": "\\s"},
+          {"type": "SYMBOL", "name": "comment"}
+        ],
+
+        "rules": {
+          "assignment": {
+            "type": "SEQ",
+            "members": [
+              {"type": "SYMBOL", "name": "variable"},
+              {"type": "STRING", "value": "="},
+              {"type": "SYMBOL", "name": "variable"}
+            ]
+          },
+
+          "variable": {"type": "PATTERN", "value": "\\a+"}
+        }
+      })JSON";
+
+      TSCompileResult result = ts_compile_grammar(grammar.c_str());
+      AssertThat(result.error_message, IsNull());
+
+      ts_document_set_language(document, load_compile_result(
+        "extra_external_tokens",
+        result,
+        "spec/fixtures/external_scanners/extra_external_tokens.c"
+      ));
+
+      ts_document_set_input_string(document, "x = # a comment\n y");
+      ts_document_parse(document);
+      assert_root_node("(assignment (variable) (comment) (variable))");
+    });
+  });
+
   describe("when the grammar's start symbol is a token", [&]() {
     it("parses the token", [&]() {
       TSCompileResult result = ts_compile_grammar(R"JSON(
diff --git a/spec/integration/corpus_specs.cc b/spec/integration/corpus_specs.cc
index 9d716ed1..c399e8f9 100644
--- a/spec/integration/corpus_specs.cc
+++ b/spec/integration/corpus_specs.cc
@@ -84,6 +84,7 @@ describe("The Corpus", []() {
     "json",
     "c",
     "cpp",
+    "python",
   });
 
   for (auto &language_name : test_languages) {
diff --git a/spec/runtime/document_spec.cc b/spec/runtime/document_spec.cc
index 0fb7a640..52e65ffb 100644
--- a/spec/runtime/document_spec.cc
+++ b/spec/runtime/document_spec.cc
@@ -5,6 +5,7 @@
 #include "helpers/tree_helpers.h"
 #include "helpers/point_helpers.h"
 #include "helpers/spy_logger.h"
+#include "helpers/stderr_logger.h"
 #include "helpers/spy_input.h"
 #include "helpers/load_language.h"
 
@@ -15,22 +16,22 @@ TSPoint point(size_t row, size_t column) {
 START_TEST
 
 describe("Document", [&]() {
-  TSDocument *doc;
+  TSDocument *document;
   TSNode root;
 
   before_each([&]() {
     record_alloc::start();
-    doc = ts_document_new();
+    document = ts_document_new();
   });
 
   after_each([&]() {
-    ts_document_free(doc);
+    ts_document_free(document);
     record_alloc::stop();
     AssertThat(record_alloc::outstanding_allocation_indices(), IsEmpty());
   });
 
   auto assert_node_string_equals = [&](TSNode node, const string &expected) {
-    char *str = ts_node_string(node, doc);
+    char *str = ts_node_string(node, document);
     string actual(str);
     ts_free(str);
     AssertThat(actual, Equals(expected));
@@ -42,11 +43,11 @@ describe("Document", [&]() {
     before_each([&]() {
       spy_input = new SpyInput("{\"key\": [null, 2]}", 3);
 
-      ts_document_set_language(doc, get_test_language("json"));
-      ts_document_set_input_string(doc, "{\"key\": [1, 2]}");
-      ts_document_parse(doc);
+      ts_document_set_language(document, get_test_language("json"));
+      ts_document_set_input_string(document, "{\"key\": [1, 2]}");
+      ts_document_parse(document);
 
-      root = ts_document_root_node(doc);
+      root = ts_document_root_node(document);
       assert_node_string_equals(
         root,
         "(object (pair (string) (array (number) (number))))");
@@ -61,11 +62,11 @@ describe("Document", [&]() {
       spy_input->content = string((const char *)content, sizeof(content));
       spy_input->encoding = TSInputEncodingUTF16;
 
-      ts_document_set_input(doc, spy_input->input());
-      ts_document_invalidate(doc);
-      ts_document_parse(doc);
+      ts_document_set_input(document, spy_input->input());
+      ts_document_invalidate(document);
+      ts_document_parse(document);
 
-      root = ts_document_root_node(doc);
+      root = ts_document_root_node(document);
       assert_node_string_equals(
         root,
         "(array (true) (false))");
@@ -77,27 +78,27 @@ describe("Document", [&]() {
       spy_input->encoding = TSInputEncodingUTF16;
       // spy_input->measure_columns_in_bytes
 
-      ts_document_set_input(doc, spy_input->input());
-      ts_document_invalidate(doc);
-      ts_document_parse(doc);
+      ts_document_set_input(document, spy_input->input());
+      ts_document_invalidate(document);
+      ts_document_parse(document);
     });
 
     it("allows the input to be retrieved later", [&]() {
-      ts_document_set_input(doc, spy_input->input());
-      AssertThat(ts_document_input(doc).payload, Equals<void *>(spy_input));
-      AssertThat(ts_document_input(doc).read, Equals(spy_input->input().read));
-      AssertThat(ts_document_input(doc).seek, Equals(spy_input->input().seek));
+      ts_document_set_input(document, spy_input->input());
+      AssertThat(ts_document_input(document).payload, Equals<void *>(spy_input));
+      AssertThat(ts_document_input(document).read, Equals(spy_input->input().read));
+      AssertThat(ts_document_input(document).seek, Equals(spy_input->input().seek));
     });
 
     it("does not assume that the document's text has changed", [&]() {
-      ts_document_set_input(doc, spy_input->input());
-      AssertThat(ts_document_root_node(doc), Equals<TSNode>(root));
+      ts_document_set_input(document, spy_input->input());
+      AssertThat(ts_document_root_node(document), Equals<TSNode>(root));
       AssertThat(ts_node_has_changes(root), IsFalse());
       AssertThat(spy_input->strings_read, Equals(vector<string>({ "" })));
     });
 
     it("reads text from the new input for future parses", [&]() {
-      ts_document_set_input(doc, spy_input->input());
+      ts_document_set_input(document, spy_input->input());
 
       // Insert 'null', delete '1'.
       TSInputEdit edit = {};
@@ -105,28 +106,28 @@ describe("Document", [&]() {
       edit.extent_added.column = edit.bytes_added = 4;
       edit.extent_removed.column = edit.bytes_removed = 1;
 
-      ts_document_edit(doc, edit);
-      ts_document_parse(doc);
+      ts_document_edit(document, edit);
+      ts_document_parse(document);
 
-      TSNode new_root = ts_document_root_node(doc);
+      TSNode new_root = ts_document_root_node(document);
       assert_node_string_equals(
         new_root,
         "(object (pair (string) (array (null) (number))))");
-      AssertThat(spy_input->strings_read, Equals(vector<string>({" [null, 2"})));
+      AssertThat(spy_input->strings_read, Equals(vector<string>({" [null, 2" })));
     });
 
     it("reads from the new input correctly when the old input was blank", [&]() {
-      ts_document_set_input_string(doc, "");
-      ts_document_parse(doc);
-      TSNode new_root = ts_document_root_node(doc);
+      ts_document_set_input_string(document, "");
+      ts_document_parse(document);
+      TSNode new_root = ts_document_root_node(document);
       AssertThat(ts_node_end_char(new_root), Equals<size_t>(0));
       assert_node_string_equals(
         new_root,
         "(ERROR)");
 
-      ts_document_set_input_string(doc, "1");
-      ts_document_parse(doc);
-      new_root = ts_document_root_node(doc);
+      ts_document_set_input_string(document, "1");
+      ts_document_parse(document);
+      new_root = ts_document_root_node(document);
       AssertThat(ts_node_end_char(new_root), Equals<size_t>(1));
       assert_node_string_equals(
         new_root,
@@ -136,33 +137,44 @@ describe("Document", [&]() {
 
   describe("set_language(language)", [&]() {
     before_each([&]() {
-      ts_document_set_input_string(doc, "{\"key\": [1, 2]}\n");
+      ts_document_set_input_string(document, "{\"key\": [1, 2]}\n");
     });
 
     it("uses the given language for future parses", [&]() {
-      ts_document_set_language(doc, get_test_language("json"));
-      ts_document_parse(doc);
+      ts_document_set_language(document, get_test_language("json"));
+      ts_document_parse(document);
 
-      root = ts_document_root_node(doc);
+      root = ts_document_root_node(document);
       assert_node_string_equals(
         root,
         "(object (pair (string) (array (number) (number))))");
     });
 
     it("clears out any previous tree", [&]() {
-      ts_document_set_language(doc, get_test_language("json"));
-      ts_document_parse(doc);
+      ts_document_set_language(document, get_test_language("json"));
+      ts_document_parse(document);
 
-      ts_document_set_language(doc, get_test_language("javascript"));
-      AssertThat(ts_document_root_node(doc).data, Equals<void *>(nullptr));
+      ts_document_set_language(document, get_test_language("javascript"));
+      AssertThat(ts_document_root_node(document).data, Equals<void *>(nullptr));
 
-      ts_document_parse(doc);
-      root = ts_document_root_node(doc);
+      ts_document_parse(document);
+      root = ts_document_root_node(document);
       assert_node_string_equals(
         root,
         "(program (expression_statement "
           "(object (pair (string) (array (number) (number))))))");
     });
+
+    it("does not allow setting a language with a different version number", [&]() {
+      TSLanguage language = *get_test_language("json");
+      AssertThat(ts_language_version(&language), Equals<uint32_t>(TREE_SITTER_LANGUAGE_VERSION));
+
+      language.version++;
+      AssertThat(ts_language_version(&language), !Equals<uint32_t>(TREE_SITTER_LANGUAGE_VERSION));
+
+      ts_document_set_language(document, &language);
+      AssertThat(ts_document_language(document), IsNull());
+    });
   });
 
   describe("set_logger(TSLogger)", [&]() {
@@ -170,45 +182,39 @@ describe("Document", [&]() {
 
     before_each([&]() {
       logger = new SpyLogger();
-      ts_document_set_language(doc, get_test_language("json"));
-      ts_document_set_input_string(doc, "[1, 2]");
+      ts_document_set_language(document, get_test_language("json"));
+      ts_document_set_input_string(document, "[1, 2]");
     });
 
     after_each([&]() {
       delete logger;
     });
 
-    it("calls the debugger with a message for each lex action", [&]() {
-      ts_document_set_logger(doc, logger->logger());
-      ts_document_parse(doc);
-
-      AssertThat(logger->messages, Contains("lookahead char:'1'"));
-      AssertThat(logger->messages, Contains("lookahead char:'['"));
-    });
-
     it("calls the debugger with a message for each parse action", [&]() {
-      ts_document_set_logger(doc, logger->logger());
-      ts_document_parse(doc);
+      ts_document_set_logger(document, logger->logger());
+      ts_document_parse(document);
 
       AssertThat(logger->messages, Contains("new_parse"));
-      AssertThat(logger->messages, Contains("lookahead char:'['"));
+      AssertThat(logger->messages, Contains("skip character:' '"));
+      AssertThat(logger->messages, Contains("consume character:'['"));
+      AssertThat(logger->messages, Contains("consume character:'1'"));
       AssertThat(logger->messages, Contains("reduce sym:array, child_count:4"));
       AssertThat(logger->messages, Contains("accept"));
     });
 
     it("allows the debugger to be retrieved later", [&]() {
-      ts_document_set_logger(doc, logger->logger());
-      AssertThat(ts_document_logger(doc).payload, Equals(logger));
+      ts_document_set_logger(document, logger->logger());
+      AssertThat(ts_document_logger(document).payload, Equals(logger));
     });
 
     describe("disabling debugging", [&]() {
       before_each([&]() {
-        ts_document_set_logger(doc, logger->logger());
-        ts_document_set_logger(doc, {NULL, NULL});
+        ts_document_set_logger(document, logger->logger());
+        ts_document_set_logger(document, {NULL, NULL});
       });
 
       it("does not call the debugger any more", [&]() {
-        ts_document_parse(doc);
+        ts_document_parse(document);
         AssertThat(logger->messages, IsEmpty());
       });
     });
@@ -218,12 +224,12 @@ describe("Document", [&]() {
     SpyInput *input;
 
     before_each([&]() {
-      ts_document_set_language(doc, get_test_language("javascript"));
+      ts_document_set_language(document, get_test_language("javascript"));
       input = new SpyInput("{a: null};", 3);
-      ts_document_set_input(doc, input->input());
-      ts_document_parse(doc);
+      ts_document_set_input(document, input->input());
+      ts_document_parse(document);
       assert_node_string_equals(
-        ts_document_root_node(doc),
+        ts_document_root_node(document),
         "(program (expression_statement (object (pair (identifier) (null)))))");
     });
 
@@ -231,26 +237,25 @@ describe("Document", [&]() {
       delete input;
     });
 
-    auto get_ranges = [&](std::function<TSInputEdit()> callback) -> vector<TSRange> {
+    auto get_invalidated_ranges_for_edit = [&](std::function<TSInputEdit()> callback) -> vector<TSRange> {
       TSInputEdit edit = callback();
-      ts_document_edit(doc, edit);
+      ts_document_edit(document, edit);
 
       TSRange *ranges;
       uint32_t range_count = 0;
-
-      ts_document_parse_and_get_changed_ranges(doc, &ranges, &range_count);
+      ts_document_parse_and_get_changed_ranges(document, &ranges, &range_count);
 
       vector<TSRange> result;
-      for (size_t i = 0; i < range_count; i++)
+      for (size_t i = 0; i < range_count; i++) {
         result.push_back(ranges[i]);
+      }
       ts_free(ranges);
-
       return result;
     };
 
     it("reports changes when one token has been updated", [&]() {
       // Replace `null` with `nothing`
-      auto ranges = get_ranges([&]() {
+      auto ranges = get_invalidated_ranges_for_edit([&]() {
         return input->replace(input->content.find("ull"), 1, "othing");
       });
 
@@ -262,7 +267,7 @@ describe("Document", [&]() {
       })));
 
       // Replace `nothing` with `null` again
-      ranges = get_ranges([&]() {
+      ranges = get_invalidated_ranges_for_edit([&]() {
         return input->undo();
       });
 
@@ -276,7 +281,7 @@ describe("Document", [&]() {
 
     it("reports changes when tokens have been appended", [&]() {
       // Add a second key-value pair
-      auto ranges = get_ranges([&]() {
+      auto ranges = get_invalidated_ranges_for_edit([&]() {
         return input->replace(input->content.find("}"), 0, ", b: false");
       });
 
@@ -288,12 +293,12 @@ describe("Document", [&]() {
       })));
 
       // Add a third key-value pair in between the first two
-      ranges = get_ranges([&]() {
+      ranges = get_invalidated_ranges_for_edit([&]() {
         return input->replace(input->content.find(", b"), 0, ", c: 1");
       });
 
       assert_node_string_equals(
-        ts_document_root_node(doc),
+        ts_document_root_node(document),
         "(program (expression_statement (object "
           "(pair (identifier) (null)) "
           "(pair (identifier) (number)) "
@@ -307,41 +312,39 @@ describe("Document", [&]() {
       })));
 
       // Delete the middle pair.
-      ranges = get_ranges([&]() {
+      ranges = get_invalidated_ranges_for_edit([&]() {
         return input->undo();
       });
 
       assert_node_string_equals(
-        ts_document_root_node(doc),
+        ts_document_root_node(document),
         "(program (expression_statement (object "
           "(pair (identifier) (null)) "
           "(pair (identifier) (false)))))");
 
-      AssertThat(ranges, Equals(vector<TSRange>({
-      })));
+      AssertThat(ranges, IsEmpty());
 
       // Delete the second pair.
-      ranges = get_ranges([&]() {
+      ranges = get_invalidated_ranges_for_edit([&]() {
         return input->undo();
       });
 
       assert_node_string_equals(
-        ts_document_root_node(doc),
+        ts_document_root_node(document),
         "(program (expression_statement (object "
           "(pair (identifier) (null)))))");
 
-      AssertThat(ranges, Equals(vector<TSRange>({
-      })));
+      AssertThat(ranges, IsEmpty());
     });
 
     it("reports changes when trees have been wrapped", [&]() {
       // Wrap the object in an assignment expression.
-      auto ranges = get_ranges([&]() {
+      auto ranges = get_invalidated_ranges_for_edit([&]() {
         return input->replace(input->content.find("null"), 0, "b === ");
       });
 
       assert_node_string_equals(
-        ts_document_root_node(doc),
+        ts_document_root_node(document),
         "(program (expression_statement (object "
           "(pair (identifier) (rel_op (identifier) (null))))))");
 
diff --git a/spec/runtime/parser_spec.cc b/spec/runtime/parser_spec.cc
index a14fa68e..88633f1f 100644
--- a/spec/runtime/parser_spec.cc
+++ b/spec/runtime/parser_spec.cc
@@ -4,11 +4,13 @@
 #include "helpers/spy_input.h"
 #include "helpers/load_language.h"
 #include "helpers/record_alloc.h"
+#include "helpers/stderr_logger.h"
+#include "helpers/dedent.h"
 
 START_TEST
 
 describe("Parser", [&]() {
-  TSDocument *doc;
+  TSDocument *document;
   SpyInput *input;
   TSNode root;
   size_t chunk_size;
@@ -18,90 +20,76 @@ describe("Parser", [&]() {
 
     chunk_size = 3;
     input = nullptr;
-
-    doc = ts_document_new();
+    document = ts_document_new();
   });
 
   after_each([&]() {
-    if (doc)
-      ts_document_free(doc);
-
-    if (input)
-      delete input;
+    if (document) ts_document_free(document);
+    if (input) delete input;
 
     record_alloc::stop();
     AssertThat(record_alloc::outstanding_allocation_indices(), IsEmpty());
   });
 
-  auto set_text = [&](const char *text) {
+  auto set_text = [&](string text) {
     input = new SpyInput(text, chunk_size);
-    ts_document_set_input(doc, input->input());
-    ts_document_parse(doc);
+    ts_document_set_input(document, input->input());
+    ts_document_parse(document);
 
-    root = ts_document_root_node(doc);
-    AssertThat(ts_node_end_byte(root), Equals(strlen(text)));
+    root = ts_document_root_node(document);
+    AssertThat(ts_node_end_byte(root), Equals(text.size()));
     input->clear();
   };
 
-  auto insert_text = [&](size_t position, string text) {
-    size_t prev_size = ts_node_end_byte(root);
-    ts_document_edit(doc, input->replace(position, 0, text));
-    ts_document_parse(doc);
-
-    root = ts_document_root_node(doc);
-    size_t new_size = ts_node_end_byte(root);
-    AssertThat(new_size, Equals(prev_size + text.size()));
-  };
-
-  auto delete_text = [&](size_t position, size_t length) {
-    size_t prev_size = ts_node_end_byte(root);
-    ts_document_edit(doc, input->replace(position, length, ""));
-    ts_document_parse(doc);
-
-    root = ts_document_root_node(doc);
-    size_t new_size = ts_node_end_byte(root);
-    AssertThat(new_size, Equals(prev_size - length));
-  };
-
   auto replace_text = [&](size_t position, size_t length, string new_text) {
     size_t prev_size = ts_node_end_byte(root);
 
-    ts_document_edit(doc, input->replace(position, length, new_text));
-    ts_document_parse(doc);
+    ts_document_edit(document, input->replace(position, length, new_text));
+    ts_document_parse(document);
 
-    root = ts_document_root_node(doc);
+    root = ts_document_root_node(document);
     size_t new_size = ts_node_end_byte(root);
     AssertThat(new_size, Equals(prev_size - length + new_text.size()));
   };
 
+  auto insert_text = [&](size_t position, string text) {
+    replace_text(position, 0, text);
+  };
+
+  auto delete_text = [&](size_t position, size_t length) {
+    replace_text(position, length, "");
+  };
+
+  auto undo = [&]() {
+    ts_document_edit(document, input->undo());
+    ts_document_parse(document);
+  };
+
   auto assert_root_node = [&](const string &expected) {
-    TSNode node = ts_document_root_node(doc);
-    char *str = ts_node_string(node, doc);
-    string actual(str);
-    ts_free(str);
+    TSNode node = ts_document_root_node(document);
+    char *node_string = ts_node_string(node, document);
+    string actual(node_string);
+    ts_free(node_string);
     AssertThat(actual, Equals(expected));
   };
 
+  auto get_node_text = [&](TSNode node) {
+    size_t start = ts_node_start_byte(node);
+    size_t end = ts_node_end_byte(node);
+    return input->content.substr(start, end - start);
+  };
+
   describe("handling errors", [&]() {
-    before_each([&]() {
-      ts_document_set_language(doc, get_test_language("json"));
-    });
-
-    auto get_node_text = [&](TSNode node) {
-      size_t start = ts_node_start_byte(node);
-      size_t end = ts_node_end_byte(node);
-      return input->content.substr(start, end - start);
-    };
-
     describe("when there is an invalid substring right before a valid token", [&]() {
       it("computes the error node's size and position correctly", [&]() {
+        ts_document_set_language(document, get_test_language("json"));
         set_text("  [123,  @@@@@,   true]");
 
         assert_root_node(
           "(array (number) (ERROR (UNEXPECTED '@')) (true))");
 
         TSNode error = ts_node_named_child(root, 1);
-        AssertThat(ts_node_type(error, doc), Equals("ERROR"));
+        AssertThat(ts_node_type(error, document), Equals("ERROR"));
         AssertThat(get_node_text(error), Equals(",  @@@@@"));
         AssertThat(ts_node_child_count(error), Equals<size_t>(2));
 
@@ -112,56 +100,59 @@ describe("Parser", [&]() {
         AssertThat(get_node_text(garbage), Equals("@@@@@"));
 
         TSNode node_after_error = ts_node_named_child(root, 2);
-        AssertThat(ts_node_type(node_after_error, doc), Equals("true"));
+        AssertThat(ts_node_type(node_after_error, document), Equals("true"));
         AssertThat(get_node_text(node_after_error), Equals("true"));
       });
     });
 
     describe("when there is an unexpected string in the middle of a token", [&]() {
       it("computes the error node's size and position correctly", [&]() {
+        ts_document_set_language(document, get_test_language("json"));
         set_text("  [123, faaaaalse, true]");
 
         assert_root_node(
           "(array (number) (ERROR (UNEXPECTED 'a')) (true))");
 
         TSNode error = ts_node_named_child(root, 1);
-        AssertThat(ts_node_type(error, doc), Equals("ERROR"));
+        AssertThat(ts_node_type(error, document), Equals("ERROR"));
         AssertThat(ts_node_child_count(error), Equals<size_t>(2));
 
         TSNode comma = ts_node_child(error, 0);
-        AssertThat(ts_node_type(comma, doc), Equals(","));
+        AssertThat(ts_node_type(comma, document), Equals(","));
         AssertThat(get_node_text(comma), Equals(","));
 
         TSNode garbage = ts_node_child(error, 1);
-        AssertThat(ts_node_type(garbage, doc), Equals("ERROR"));
+        AssertThat(ts_node_type(garbage, document), Equals("ERROR"));
         AssertThat(get_node_text(garbage), Equals("faaaaalse"));
 
         TSNode last = ts_node_named_child(root, 2);
-        AssertThat(ts_node_type(last, doc), Equals("true"));
+        AssertThat(ts_node_type(last, document), Equals("true"));
         AssertThat(ts_node_start_byte(last), Equals(strlen("  [123, faaaaalse, ")));
       });
     });
 
     describe("when there is one unexpected token between two valid tokens", [&]() {
       it("computes the error node's size and position correctly", [&]() {
+        ts_document_set_language(document, get_test_language("json"));
         set_text("  [123, true false, true]");
 
         assert_root_node(
           "(array (number) (true) (ERROR (false)) (true))");
 
         TSNode error = ts_node_named_child(root, 2);
-        AssertThat(ts_node_type(error, doc), Equals("ERROR"));
+        AssertThat(ts_node_type(error, document), Equals("ERROR"));
         AssertThat(get_node_text(error), Equals("false"));
         AssertThat(ts_node_child_count(error), Equals<size_t>(1));
 
         TSNode last = ts_node_named_child(root, 1);
-        AssertThat(ts_node_type(last, doc), Equals("true"));
+        AssertThat(ts_node_type(last, document), Equals("true"));
         AssertThat(get_node_text(last), Equals("true"));
       });
     });
 
     describe("when there is an unexpected string at the end of a token", [&]() {
       it("computes the error's size and position correctly", [&]() {
+        ts_document_set_language(document, get_test_language("json"));
         set_text("  [123, \"hi\n, true]");
 
         assert_root_node(
@@ -171,7 +162,7 @@ describe("Parser", [&]() {
 
     describe("when there is an unterminated error", [&]() {
       it("maintains a consistent tree", [&]() {
-        ts_document_set_language(doc, get_test_language("javascript"));
+        ts_document_set_language(document, get_test_language("javascript"));
         set_text("a; /* b");
         assert_root_node(
           "(ERROR (program (expression_statement (identifier))) (UNEXPECTED EOF))");
@@ -180,14 +171,9 @@ describe("Parser", [&]() {
   });
 
   describe("handling extra tokens", [&]() {
-    // In the javascript example grammar, ASI works by using newlines as
-    // terminators in statements, but also as extra tokens.
-    before_each([&]() {
-      ts_document_set_language(doc, get_test_language("javascript"));
-    });
-
     describe("when the token appears as part of a grammar rule", [&]() {
-      it("is incorporated into the tree", [&]() {
+      it("incorporates it into the tree", [&]() {
+        ts_document_set_language(document, get_test_language("javascript"));
         set_text("fn()\n");
 
         assert_root_node(
@@ -196,7 +182,8 @@ describe("Parser", [&]() {
     });
 
     describe("when the token appears somewhere else", [&]() {
-      it("is incorporated into the tree", [&]() {
+      it("incorporates it into the tree", [&]() {
+        ts_document_set_language(document, get_test_language("javascript"));
         set_text(
           "fn()\n"
           "  .otherFn();");
@@ -211,7 +198,8 @@ describe("Parser", [&]() {
     });
 
     describe("when several extra tokens appear in a row", [&]() {
-      it("is incorporated into the tree", [&]() {
+      it("incorporates them into the tree", [&]() {
+        ts_document_set_language(document, get_test_language("javascript"));
         set_text(
           "fn()\n\n"
           "// This is a comment"
@@ -230,199 +218,219 @@ describe("Parser", [&]() {
   });
 
   describe("editing", [&]() {
-    before_each([&]() {
-      ts_document_set_language(doc, get_test_language("javascript"));
+    describe("creating new tokens near the end of the input", [&]() {
+      it("updates the parse tree and re-reads only the changed portion of the text", [&]() {
+        ts_document_set_language(document, get_test_language("javascript"));
+        set_text("x * (100 + abc);");
+
+        assert_root_node(
+          "(program (expression_statement (math_op "
+            "(identifier) "
+            "(math_op (number) (identifier)))))");
+
+        insert_text(strlen("x * (100 + abc"), ".d");
+
+        assert_root_node(
+          "(program (expression_statement (math_op "
+            "(identifier) "
+            "(math_op (number) (member_access (identifier) (identifier))))))");
+
+        AssertThat(input->strings_read, Equals(vector<string>({ " + abc.d)" })));
+      });
     });
 
-    describe("inserting text", [&]() {
-      describe("creating new tokens near the end of the input", [&]() {
-        it("updates the parse tree and re-reads only the changed portion of the text", [&]() {
-          set_text("x * (100 + abc);");
+    describe("creating new tokens near the beginning of the input", [&]() {
+      it("updates the parse tree and re-reads only the changed portion of the input", [&]() {
+        chunk_size = 2;
 
-          assert_root_node(
-            "(program (expression_statement (math_op "
-              "(identifier) "
-              "(math_op (number) (identifier)))))");
+        ts_document_set_language(document, get_test_language("javascript"));
+        set_text("123 + 456 * (10 + x);");
 
-          insert_text(strlen("x * (100 + abc"), ".d");
+        assert_root_node(
+          "(program (expression_statement (math_op "
+            "(number) "
+            "(math_op (number) (math_op (number) (identifier))))))");
 
-          assert_root_node(
-            "(program (expression_statement (math_op "
-              "(identifier) "
-              "(math_op (number) (member_access (identifier) (identifier))))))");
+        insert_text(strlen("123"), " || 5");
 
-          AssertThat(input->strings_read, Equals(vector<string>({ " + abc.d)" })));
-        });
-      });
-
-      describe("creating new tokens near the beginning of the input", [&]() {
-        it("updates the parse tree and re-reads only the changed portion of the input", [&]() {
-          chunk_size = 2;
-
-          set_text("123 + 456 * (10 + x);");
-
-          assert_root_node(
-            "(program (expression_statement (math_op "
+        assert_root_node(
+          "(program (expression_statement (bool_op "
+            "(number) "
+            "(math_op "
               "(number) "
-              "(math_op (number) (math_op (number) (identifier))))))");
+              "(math_op (number) (math_op (number) (identifier)))))))");
 
-          insert_text(strlen("123"), " || 5");
-
-          assert_root_node(
-            "(program (expression_statement (bool_op "
-              "(number) "
-              "(math_op "
-                "(number) "
-                "(math_op (number) (math_op (number) (identifier)))))))");
-
-          AssertThat(input->strings_read, Equals(vector<string>({ "123 || 5 +" })));
-        });
+        AssertThat(input->strings_read, Equals(vector<string>({ "123 || 5 +" })));
       });
+    });
 
-      describe("introducing an error", [&]() {
-        it("gives the error the right size", [&]() {
-          ts_document_set_language(doc, get_test_language("javascript"));
+    describe("introducing an error", [&]() {
+      it("gives the error the right size", [&]() {
+        ts_document_set_language(document, get_test_language("javascript"));
+        set_text("var x = y;");
 
-          set_text("var x = y;");
+        assert_root_node(
+          "(program (var_declaration (var_assignment "
+            "(identifier) (identifier))))");
 
-          assert_root_node(
-            "(program (var_declaration (var_assignment "
-              "(identifier) (identifier))))");
+        insert_text(strlen("var x = y"), " *");
 
-          insert_text(strlen("var x = y"), " *");
+        assert_root_node(
+          "(program (var_declaration (var_assignment "
+            "(identifier) (identifier)) (ERROR)))");
 
-          assert_root_node(
-            "(program (var_declaration (var_assignment "
-              "(identifier) (identifier)) (ERROR)))");
+        insert_text(strlen("var x = y *"), " z");
 
-          insert_text(strlen("var x = y *"), " z");
-
-          assert_root_node(
-            "(program (var_declaration (var_assignment "
-              "(identifier) (math_op (identifier) (identifier)))))");
-        });
+        assert_root_node(
+          "(program (var_declaration (var_assignment "
+            "(identifier) (math_op (identifier) (identifier)))))");
       });
+    });
 
-      describe("into the middle of an existing token", [&]() {
-        it("updates the parse tree", [&]() {
-          set_text("abc * 123;");
+    describe("into the middle of an existing token", [&]() {
+      it("updates the parse tree", [&]() {
+        ts_document_set_language(document, get_test_language("javascript"));
+        set_text("abc * 123;");
 
-          assert_root_node(
-            "(program (expression_statement (math_op (identifier) (number))))");
+        assert_root_node(
+          "(program (expression_statement (math_op (identifier) (number))))");
 
-          insert_text(strlen("ab"), "XYZ");
+        insert_text(strlen("ab"), "XYZ");
 
-          assert_root_node(
-            "(program (expression_statement (math_op (identifier) (number))))");
+        assert_root_node(
+          "(program (expression_statement (math_op (identifier) (number))))");
 
-          TSNode node = ts_node_named_descendant_for_char_range(root, 1, 1);
-          AssertThat(ts_node_type(node, doc), Equals("identifier"));
-          AssertThat(ts_node_end_byte(node), Equals(strlen("abXYZc")));
-        });
+        TSNode node = ts_node_named_descendant_for_char_range(root, 1, 1);
+        AssertThat(ts_node_type(node, document), Equals("identifier"));
+        AssertThat(ts_node_end_byte(node), Equals(strlen("abXYZc")));
       });
+    });
 
-      describe("at the end of an existing token", [&]() {
-        it("updates the parse tree", [&]() {
-          set_text("abc * 123;");
+    describe("at the end of an existing token", [&]() {
+      it("updates the parse tree", [&]() {
+        ts_document_set_language(document, get_test_language("javascript"));
+        set_text("abc * 123;");
 
-          assert_root_node(
-            "(program (expression_statement (math_op (identifier) (number))))");
+        assert_root_node(
+          "(program (expression_statement (math_op (identifier) (number))))");
 
-          insert_text(strlen("abc"), "XYZ");
+        insert_text(strlen("abc"), "XYZ");
 
-          assert_root_node(
-            "(program (expression_statement (math_op (identifier) (number))))");
+        assert_root_node(
+          "(program (expression_statement (math_op (identifier) (number))))");
 
-          TSNode node = ts_node_named_descendant_for_char_range(root, 1, 1);
-          AssertThat(ts_node_type(node, doc), Equals("identifier"));
-          AssertThat(ts_node_end_byte(node), Equals(strlen("abcXYZ")));
-        });
+        TSNode node = ts_node_named_descendant_for_char_range(root, 1, 1);
+        AssertThat(ts_node_type(node, document), Equals("identifier"));
+        AssertThat(ts_node_end_byte(node), Equals(strlen("abcXYZ")));
       });
+    });
 
-      describe("into a node containing a extra token", [&]() {
-        it("updates the parse tree", [&]() {
-          set_text("123 *\n"
+    describe("inserting text into a node containing a extra token", [&]() {
+      it("updates the parse tree", [&]() {
+        ts_document_set_language(document, get_test_language("javascript"));
+        set_text("123 *\n"
+          "// a-comment\n"
+          "abc;");
+
+        assert_root_node(
+          "(program (expression_statement (math_op "
+            "(number) "
+            "(comment) "
+            "(identifier))))");
+
+        insert_text(
+          strlen("123 *\n"
             "// a-comment\n"
-            "abc;");
+            "abc"),
+          "XYZ");
 
-          assert_root_node(
-            "(program (expression_statement (math_op "
-              "(number) "
-              "(comment) "
-              "(identifier))))");
-
-          insert_text(
-            strlen("123 *\n"
-              "// a-comment\n"
-              "abc"),
-            "XYZ");
-
-          assert_root_node(
-            "(program (expression_statement (math_op "
-              "(number) "
-              "(comment) "
-              "(identifier))))");
-        });
+        assert_root_node(
+          "(program (expression_statement (math_op "
+            "(number) "
+            "(comment) "
+            "(identifier))))");
       });
     });
 
-    describe("deleting text", [&]() {
-      describe("when a critical token is removed", [&]() {
-        it("updates the parse tree, creating an error", [&]() {
-          set_text("123 * 456; 789 * 123;");
+    describe("when a critical token is removed", [&]() {
+      it("updates the parse tree, creating an error", [&]() {
+        ts_document_set_language(document, get_test_language("javascript"));
+        set_text("123 * 456; 789 * 123;");
 
-          assert_root_node(
-            "(program "
-              "(expression_statement (math_op (number) (number))) "
-              "(expression_statement (math_op (number) (number))))");
+        assert_root_node(
+          "(program "
+            "(expression_statement (math_op (number) (number))) "
+            "(expression_statement (math_op (number) (number))))");
 
-          delete_text(strlen("123 "), 2);
+        delete_text(strlen("123 "), 2);
 
-          assert_root_node(
-            "(program "
-              "(expression_statement (number) (ERROR (number))) "
-              "(expression_statement (math_op (number) (number))))");
-        });
+        assert_root_node(
+          "(program "
+            "(expression_statement (number) (ERROR (number))) "
+            "(expression_statement (math_op (number) (number))))");
       });
     });
 
-    describe("replacing text", [&]() {
-      it("does not try to re-use nodes that are within the edited region", [&]() {
-        ts_document_set_language(doc, get_test_language("javascript"));
+    describe("with external tokens", [&]() {
+      it("maintains the external scanner's state during incremental parsing", [&]() {
+        ts_document_set_language(document, get_test_language("python"));
+        string text = dedent(R"PYTHON(
+          if a:
+              print b
+          return c
+        )PYTHON");
 
-        set_text("{ x: (b.c) };");
+        set_text(text);
+        assert_root_node("(module "
+          "(if_statement (identifier) "
+            "(print_statement (identifier))) "
+          "(return_statement (expression_list (identifier))))");
 
-        assert_root_node(
-          "(program (expression_statement (object (pair "
-            "(identifier) (member_access (identifier) (identifier))))))");
+        replace_text(text.find("return"), 0, "    ");
+        assert_root_node("(module "
+          "(if_statement (identifier) "
+            "(print_statement (identifier)) "
+            "(return_statement (expression_list (identifier)))))");
 
-        replace_text(strlen("{ x: "), strlen("(b.c)"), "b.c");
-
-        assert_root_node(
-          "(program (expression_statement (object (pair "
-            "(identifier) (member_access (identifier) (identifier))))))");
+        undo();
+        assert_root_node("(module "
+          "(if_statement (identifier) "
+            "(print_statement (identifier))) "
+          "(return_statement (expression_list (identifier))))");
       });
     });
 
+    it("does not try to re-use nodes that are within the edited region", [&]() {
+      ts_document_set_language(document, get_test_language("javascript"));
+      set_text("{ x: (b.c) };");
+
+      assert_root_node(
+        "(program (expression_statement (object (pair "
+          "(identifier) (member_access (identifier) (identifier))))))");
+
+      replace_text(strlen("{ x: "), strlen("(b.c)"), "b.c");
+
+      assert_root_node(
+        "(program (expression_statement (object (pair "
+          "(identifier) (member_access (identifier) (identifier))))))");
+    });
+
     it("updates the document's parse count", [&]() {
-      ts_document_set_language(doc, get_test_language("javascript"));
-      AssertThat(ts_document_parse_count(doc), Equals<size_t>(0));
+      ts_document_set_language(document, get_test_language("javascript"));
+      AssertThat(ts_document_parse_count(document), Equals<size_t>(0));
 
       set_text("{ x: (b.c) };");
-      AssertThat(ts_document_parse_count(doc), Equals<size_t>(1));
+      AssertThat(ts_document_parse_count(document), Equals<size_t>(1));
 
       insert_text(strlen("{ x"), "yz");
-      AssertThat(ts_document_parse_count(doc), Equals<size_t>(2));
+      AssertThat(ts_document_parse_count(document), Equals<size_t>(2));
     });
   });
 
   describe("lexing", [&]() {
-    before_each([&]() {
-      ts_document_set_language(doc, get_test_language("javascript"));
-    });
-
     describe("handling tokens containing wildcard patterns (e.g. comments)", [&]() {
       it("terminates them at the end of the document", [&]() {
+        ts_document_set_language(document, get_test_language("javascript"));
         set_text("x; // this is a comment");
 
         assert_root_node(
@@ -437,6 +445,7 @@ describe("Parser", [&]() {
 
     it("recognizes UTF8 characters as single characters", [&]() {
       // 'ΩΩΩ — ΔΔ';
+      ts_document_set_language(document, get_test_language("javascript"));
       set_text("'\u03A9\u03A9\u03A9 \u2014 \u0394\u0394';");
 
       assert_root_node(
diff --git a/spec/runtime/stack_spec.cc b/spec/runtime/stack_spec.cc
index 4d4b01fd..20180843 100644
--- a/spec/runtime/stack_spec.cc
+++ b/spec/runtime/stack_spec.cc
@@ -521,6 +521,31 @@ describe("Stack", [&]() {
       free_slice_array(&pop.slices);
     });
   });
+
+  describe("setting external token state", [&]() {
+    TSExternalTokenState external_token_state1, external_token_state2;
+
+    it("allows the state to be retrieved", [&]() {
+      AssertThat(ts_stack_external_token_state(stack, 0), Equals(nullptr));
+
+      ts_stack_set_external_token_state(stack, 0, &external_token_state1);
+      AssertThat(ts_stack_external_token_state(stack, 0), Equals(&external_token_state1));
+
+      ts_stack_copy_version(stack, 0);
+      AssertThat(ts_stack_external_token_state(stack, 0), Equals(&external_token_state1));
+    });
+
+    it("does not merge stack versions with different external token states", [&]() {
+      ts_stack_copy_version(stack, 0);
+      ts_stack_push(stack, 0, trees[0], false, 5);
+      ts_stack_push(stack, 1, trees[0], false, 5);
+
+      ts_stack_set_external_token_state(stack, 0, &external_token_state1);
+      ts_stack_set_external_token_state(stack, 0, &external_token_state2);
+
+      AssertThat(ts_stack_merge(stack, 0, 1), IsFalse());
+    });
+  });
 });
 
 END_TEST
diff --git a/spec/runtime/tree_spec.cc b/spec/runtime/tree_spec.cc
index 9f451829..bdc8145f 100644
--- a/spec/runtime/tree_spec.cc
+++ b/spec/runtime/tree_spec.cc
@@ -22,47 +22,32 @@ void assert_consistent(const Tree *tree) {
 
 START_TEST
 
-enum {
-  cat = 1,
-  dog,
-  eel,
-  fox,
-  goat,
-  hog,
-};
-
 describe("Tree", []() {
-  Tree *tree1, *tree2, *parent1;
+  enum {
+    symbol1 = 1,
+    symbol2,
+    symbol3,
+    symbol4,
+    symbol5,
+    symbol6,
+    symbol7,
+    symbol8,
+    symbol9,
+  };
+
   TSSymbolMetadata visible = {true, true, false, true};
   TSSymbolMetadata invisible = {false, false, false, true};
 
-  before_each([&]() {
-    tree1 = ts_tree_make_leaf(cat, {2, 1, {0, 1}}, {5, 4, {0, 4}}, visible);
-    tree2 = ts_tree_make_leaf(cat, {1, 1, {0, 1}}, {3, 3, {0, 3}}, visible);
-
-    ts_tree_retain(tree1);
-    ts_tree_retain(tree2);
-    parent1 = ts_tree_make_node(dog, 2, tree_array({
-      tree1,
-      tree2,
-    }), visible);
-  });
-
-  after_each([&]() {
-    ts_tree_release(tree1);
-    ts_tree_release(tree2);
-    ts_tree_release(parent1);
-  });
-
-  describe("make_leaf(sym, size, padding, is_hidden)", [&]() {
-    it("does not record that it is fragile", [&]() {
-      AssertThat(tree1->fragile_left, IsFalse());
-      AssertThat(tree1->fragile_right, IsFalse());
+  describe("make_leaf", [&]() {
+    it("does not mark the tree as fragile", [&]() {
+      Tree *tree = ts_tree_make_leaf(symbol1, {2, 1, {0, 1}}, {5, 4, {0, 4}}, visible);
+      AssertThat(tree->fragile_left, IsFalse());
+      AssertThat(tree->fragile_right, IsFalse());
     });
   });
 
-  describe("make_error(size, padding, lookahead_char)", [&]() {
-    it("records that it is fragile", [&]() {
+  describe("make_error", [&]() {
+    it("marks the tree as fragile", [&]() {
       Tree *error_tree = ts_tree_make_error(
         length_zero(),
         length_zero(),
@@ -75,15 +60,33 @@ describe("Tree", []() {
     });
   });
 
-  describe("make_node(symbol, child_count, children, is_hidden)", [&]() {
-    it("computes its size based on its child nodes", [&]() {
-      AssertThat(parent1->size.bytes, Equals<size_t>(
-        tree1->size.bytes + + tree2->padding.bytes + tree2->size.bytes));
-      AssertThat(parent1->size.chars, Equals<size_t>(
-        tree1->size.chars + + tree2->padding.chars + tree2->size.chars));
+  describe("make_node", [&]() {
+    Tree *tree1, *tree2, *parent1;
+
+    before_each([&]() {
+      tree1 = ts_tree_make_leaf(symbol1, {2, 1, {0, 1}}, {5, 4, {0, 4}}, visible);
+      tree2 = ts_tree_make_leaf(symbol2, {1, 1, {0, 1}}, {3, 3, {0, 3}}, visible);
+
+      ts_tree_retain(tree1);
+      ts_tree_retain(tree2);
+      parent1 = ts_tree_make_node(symbol3, 2, tree_array({
+        tree1,
+        tree2,
+      }), visible);
     });
 
-    it("computes its padding based on its first child", [&]() {
+    after_each([&]() {
+      ts_tree_release(tree1);
+      ts_tree_release(tree2);
+      ts_tree_release(parent1);
+    });
+
+    it("computes its size and padding based on its child nodes", [&]() {
+      AssertThat(parent1->size.bytes, Equals<size_t>(
+        tree1->size.bytes + tree2->padding.bytes + tree2->size.bytes));
+      AssertThat(parent1->size.chars, Equals<size_t>(
+        tree1->size.chars + tree2->padding.chars + tree2->size.chars));
+
       AssertThat(parent1->padding.bytes, Equals<size_t>(tree1->padding.bytes));
       AssertThat(parent1->padding.chars, Equals<size_t>(tree1->padding.chars));
     });
@@ -97,7 +100,7 @@ describe("Tree", []() {
 
         ts_tree_retain(tree1);
         ts_tree_retain(tree2);
-        parent = ts_tree_make_node(eel, 2, tree_array({
+        parent = ts_tree_make_node(symbol3, 2, tree_array({
           tree1,
           tree2,
         }), visible);
@@ -121,7 +124,7 @@ describe("Tree", []() {
 
         ts_tree_retain(tree1);
         ts_tree_retain(tree2);
-        parent = ts_tree_make_node(eel, 2, tree_array({
+        parent = ts_tree_make_node(symbol3, 2, tree_array({
           tree1,
           tree2,
         }), visible);
@@ -145,7 +148,7 @@ describe("Tree", []() {
 
         ts_tree_retain(tree1);
         ts_tree_retain(tree2);
-        parent = ts_tree_make_node(eel, 2, tree_array({
+        parent = ts_tree_make_node(symbol3, 2, tree_array({
           tree1,
           tree2,
         }), visible);
@@ -162,14 +165,14 @@ describe("Tree", []() {
     });
   });
 
-  describe("edit(InputEdit)", [&]() {
+  describe("edit", [&]() {
     Tree *tree = nullptr;
 
     before_each([&]() {
-      tree = ts_tree_make_node(cat, 3, tree_array({
-        ts_tree_make_leaf(dog, {2, 2, {0, 2}}, {3, 3, {0, 3}}, visible),
-        ts_tree_make_leaf(eel, {2, 2, {0, 2}}, {3, 3, {0, 3}}, visible),
-        ts_tree_make_leaf(fox, {2, 2, {0, 2}}, {3, 3, {0, 3}}, visible),
+      tree = ts_tree_make_node(symbol1, 3, tree_array({
+        ts_tree_make_leaf(symbol2, {2, 2, {0, 2}}, {3, 3, {0, 3}}, visible),
+        ts_tree_make_leaf(symbol3, {2, 2, {0, 2}}, {3, 3, {0, 3}}, visible),
+        ts_tree_make_leaf(symbol4, {2, 2, {0, 2}}, {3, 3, {0, 3}}, visible),
       }), visible);
 
       AssertThat(tree->padding, Equals<Length>({2, 2, {0, 2}}));
@@ -180,7 +183,6 @@ describe("Tree", []() {
       ts_tree_release(tree);
     });
 
-
     describe("edits within a tree's padding", [&]() {
       it("resizes the padding of the tree and its leftmost descendants", [&]() {
         TSInputEdit edit;
@@ -312,69 +314,124 @@ describe("Tree", []() {
     });
   });
 
-  describe("equality", [&]() {
+  describe("eq", [&]() {
+    Tree *leaf;
+
+    before_each([&]() {
+      leaf = ts_tree_make_leaf(symbol1, {2, 1, {0, 1}}, {5, 4, {0, 4}}, visible);
+    });
+
+    after_each([&]() {
+      ts_tree_release(leaf);
+    });
+
     it("returns true for identical trees", [&]() {
-      Tree *tree1_copy = ts_tree_make_leaf(cat, {2, 1, {1, 1}}, {5, 4, {1, 4}}, visible);
-      AssertThat(ts_tree_eq(tree1, tree1_copy), IsTrue());
+      Tree *leaf_copy = ts_tree_make_leaf(symbol1, {2, 1, {1, 1}}, {5, 4, {1, 4}}, visible);
+      AssertThat(ts_tree_eq(leaf, leaf_copy), IsTrue());
 
-      Tree *tree2_copy = ts_tree_make_leaf(cat, {1, 1, {0, 1}}, {3, 3, {0, 3}}, visible);
-      AssertThat(ts_tree_eq(tree2, tree2_copy), IsTrue());
-
-      Tree *parent2 = ts_tree_make_node(dog, 2, tree_array({
-        tree1_copy,
-        tree2_copy,
+      Tree *parent = ts_tree_make_node(symbol2, 2, tree_array({
+        leaf,
+        leaf_copy,
       }), visible);
+      ts_tree_retain(leaf);
+      ts_tree_retain(leaf_copy);
 
-      AssertThat(ts_tree_eq(parent1, parent2), IsTrue());
+      Tree *parent_copy = ts_tree_make_node(symbol2, 2, tree_array({
+        leaf,
+        leaf_copy,
+      }), visible);
+      ts_tree_retain(leaf);
+      ts_tree_retain(leaf_copy);
 
-      ts_tree_release(parent2);
+      AssertThat(ts_tree_eq(parent, parent_copy), IsTrue());
+
+      ts_tree_release(leaf_copy);
+      ts_tree_release(parent);
+      ts_tree_release(parent_copy);
     });
 
     it("returns false for trees with different symbols", [&]() {
-      Tree *different_tree = ts_tree_make_leaf(
-        tree1->symbol + 1,
-        tree1->padding,
-        tree1->size,
+      Tree *different_leaf = ts_tree_make_leaf(
+        leaf->symbol + 1,
+        leaf->padding,
+        leaf->size,
         visible);
 
-      AssertThat(ts_tree_eq(tree1, different_tree), IsFalse());
-      ts_tree_release(different_tree);
+      AssertThat(ts_tree_eq(leaf, different_leaf), IsFalse());
+      ts_tree_release(different_leaf);
     });
 
     it("returns false for trees with different options", [&]() {
-      Tree *tree1_copy = ts_tree_make_leaf(cat, tree1->padding, tree1->size, invisible);
-      AssertThat(ts_tree_eq(tree1, tree1_copy), IsFalse());
-      ts_tree_release(tree1_copy);
+      Tree *different_leaf = ts_tree_make_leaf(symbol1, leaf->padding, leaf->size, invisible);
+      AssertThat(ts_tree_eq(leaf, different_leaf), IsFalse());
+      ts_tree_release(different_leaf);
     });
 
     it("returns false for trees with different sizes", [&]() {
-      Tree *tree1_copy = ts_tree_make_leaf(cat, {2, 1, {0, 1}}, tree1->size, invisible);
-      AssertThat(ts_tree_eq(tree1, tree1_copy), IsFalse());
-      ts_tree_release(tree1_copy);
+      Tree *different_leaf = ts_tree_make_leaf(symbol1, {2, 1, {0, 1}}, leaf->size, invisible);
+      AssertThat(ts_tree_eq(leaf, different_leaf), IsFalse());
+      ts_tree_release(different_leaf);
 
-      tree1_copy = ts_tree_make_leaf(cat, tree1->padding, {5, 4, {1, 10}}, invisible);
-      AssertThat(ts_tree_eq(tree1, tree1_copy), IsFalse());
-      ts_tree_release(tree1_copy);
+      different_leaf = ts_tree_make_leaf(symbol1, leaf->padding, {5, 4, {1, 10}}, invisible);
+      AssertThat(ts_tree_eq(leaf, different_leaf), IsFalse());
+      ts_tree_release(different_leaf);
     });
 
     it("returns false for trees with different children", [&]() {
-      Tree *different_tree = ts_tree_make_leaf(
-        tree1->symbol + 1,
-        tree1->padding,
-        tree1->size,
-        visible);
+      Tree *leaf2 = ts_tree_make_leaf(symbol2, {1, 1, {0, 1}}, {3, 3, {0, 3}}, visible);
 
-      ts_tree_retain(different_tree);
-      ts_tree_retain(tree2);
-      Tree *different_parent = ts_tree_make_node(dog, 2, tree_array({
-        different_tree, tree2,
+      Tree *parent = ts_tree_make_node(symbol2, 2, tree_array({
+        leaf,
+        leaf2,
+      }), visible);
+      ts_tree_retain(leaf);
+      ts_tree_retain(leaf2);
+
+      Tree *different_parent = ts_tree_make_node(symbol2, 2, tree_array({
+        leaf2,
+        leaf,
+      }), visible);
+      ts_tree_retain(leaf2);
+      ts_tree_retain(leaf);
+
+      AssertThat(ts_tree_eq(different_parent, parent), IsFalse());
+      AssertThat(ts_tree_eq(parent, different_parent), IsFalse());
+
+      ts_tree_release(leaf2);
+      ts_tree_release(parent);
+      ts_tree_release(different_parent);
+    });
+  });
+
+  describe("last_external_token_state", [&]() {
+    Length padding = {1, 1, {0, 1}};
+    Length size = {2, 2, {0, 2}};
+
+    auto make_external = [](Tree *tree) {
+      tree->has_external_tokens = true;
+      tree->has_external_token_state = true;
+      return tree;
+    };
+
+    it("returns the last serialized external token state in the given tree", [&]() {
+      Tree *tree1, *tree2, *tree3, *tree4, *tree5, *tree6, *tree7, *tree8, *tree9;
+
+      tree1 = ts_tree_make_node(symbol1, 2, tree_array({
+        (tree2 = ts_tree_make_node(symbol2, 3, tree_array({
+          (tree3 = make_external(ts_tree_make_leaf(symbol3, padding, size, visible))),
+          (tree4 = ts_tree_make_leaf(symbol4, padding, size, visible)),
+          (tree5 = ts_tree_make_leaf(symbol5, padding, size, visible)),
+        }), visible)),
+        (tree6 = ts_tree_make_node(symbol6, 2, tree_array({
+          (tree7 = ts_tree_make_node(symbol7, 1, tree_array({
+            (tree8 = ts_tree_make_leaf(symbol8, padding, size, visible)),
+          }), visible)),
+          (tree9 = ts_tree_make_leaf(symbol9, padding, size, visible)),
+        }), visible)),
       }), visible);
 
-      AssertThat(ts_tree_eq(different_parent, parent1), IsFalse());
-      AssertThat(ts_tree_eq(parent1, different_parent), IsFalse());
-
-      ts_tree_release(different_tree);
-      ts_tree_release(different_parent);
+      auto state = ts_tree_last_external_token_state(tree1);
+      AssertThat(state, Equals(&tree3->external_token_state));
     });
   });
 });
diff --git a/src/compiler/build_tables/build_lex_table.cc b/src/compiler/build_tables/build_lex_table.cc
index 151da7cf..29d8f4d0 100644
--- a/src/compiler/build_tables/build_lex_table.cc
+++ b/src/compiler/build_tables/build_lex_table.cc
@@ -64,7 +64,7 @@ class LexTableBuilder {
  private:
   void add_lex_state_for_parse_state(ParseState *parse_state) {
     parse_state->lex_state_id =
-      add_lex_state(item_set_for_tokens(parse_state->expected_inputs()));
+      add_lex_state(item_set_for_terminals(parse_state->terminal_entries));
   }
 
   LexStateId add_lex_state(const LexItemSet &item_set) {
@@ -112,24 +112,27 @@ class LexTableBuilder {
   void mark_fragile_tokens() {
     for (ParseState &state : parse_table->states) {
       for (auto &entry : state.terminal_entries) {
-        auto homonyms = conflict_manager.possible_homonyms.find(entry.first);
-        if (homonyms != conflict_manager.possible_homonyms.end())
-          for (Symbol::Index homonym : homonyms->second)
-            if (state.terminal_entries.count(homonym)) {
-              entry.second.reusable = false;
-              break;
-            }
+        Symbol symbol = entry.first;
+        if (symbol.is_token()) {
+          auto homonyms = conflict_manager.possible_homonyms.find(symbol.index);
+          if (homonyms != conflict_manager.possible_homonyms.end())
+            for (Symbol::Index homonym : homonyms->second)
+              if (state.terminal_entries.count(Symbol(homonym, Symbol::Terminal))) {
+                entry.second.reusable = false;
+                break;
+              }
 
-        if (!entry.second.reusable)
-          continue;
+          if (!entry.second.reusable)
+            continue;
 
-        auto extensions = conflict_manager.possible_extensions.find(entry.first);
-        if (extensions != conflict_manager.possible_extensions.end())
-          for (Symbol::Index extension : extensions->second)
-            if (state.terminal_entries.count(extension)) {
-              entry.second.depends_on_lookahead = true;
-              break;
-            }
+          auto extensions = conflict_manager.possible_extensions.find(symbol.index);
+          if (extensions != conflict_manager.possible_extensions.end())
+            for (Symbol::Index extension : extensions->second)
+              if (state.terminal_entries.count(Symbol(extension, Symbol::Terminal))) {
+                entry.second.depends_on_lookahead = true;
+                break;
+              }
+        }
       }
     }
   }
@@ -150,24 +153,27 @@ class LexTableBuilder {
     }
   }
 
-  LexItemSet item_set_for_tokens(const set<Symbol> &symbols) {
+  LexItemSet item_set_for_terminals(const map<Symbol, ParseTableEntry> &terminals) {
     LexItemSet result;
-    for (const Symbol &symbol : symbols)
-      for (const rule_ptr &rule : rules_for_symbol(symbol))
-        for (const rule_ptr &separator_rule : separator_rules)
-          result.entries.insert(LexItem(
-            symbol,
-            Metadata::separator(
-              Seq::build({
-                separator_rule,
-                Metadata::main_token(rule) }))));
+    for (const auto &pair : terminals) {
+      Symbol symbol = pair.first;
+      if (symbol.is_token()) {
+        for (const rule_ptr &rule : rules_for_symbol(symbol)) {
+          for (const rule_ptr &separator_rule : separator_rules) {
+            result.entries.insert(LexItem(
+              symbol,
+              Metadata::separator(
+                Seq::build({
+                  separator_rule,
+                  Metadata::main_token(rule) }))));
+          }
+        }
+      }
+    }
     return result;
   }
 
   vector<rule_ptr> rules_for_symbol(const rules::Symbol &symbol) {
-    if (!symbol.is_token)
-      return {};
-
     if (symbol == rules::END_OF_INPUT())
       return { CharacterSet().include(0).copy() };
 
diff --git a/src/compiler/build_tables/build_parse_table.cc b/src/compiler/build_tables/build_parse_table.cc
index 91444310..9fb6859f 100644
--- a/src/compiler/build_tables/build_parse_table.cc
+++ b/src/compiler/build_tables/build_parse_table.cc
@@ -52,7 +52,10 @@ class ParseTableBuilder {
         allow_any_conflict(false) {}
 
   pair<ParseTable, CompileError> build() {
-    Symbol start_symbol = Symbol(0, grammar.variables.empty());
+    Symbol start_symbol = grammar.variables.empty() ?
+      Symbol(0, Symbol::Terminal) :
+      Symbol(0, Symbol::NonTerminal);
+
     Production start_production({
       ProductionStep(start_symbol, 0, rules::AssociativityNone),
     });
@@ -63,7 +66,7 @@ class ParseTableBuilder {
     add_parse_state(ParseItemSet({
       {
         ParseItem(rules::START(), start_production, 0),
-        LookaheadSet({ END_OF_INPUT().index }),
+        LookaheadSet({ END_OF_INPUT() }),
       },
     }));
 
@@ -107,21 +110,25 @@ class ParseTableBuilder {
   void build_error_parse_state() {
     ParseState error_state;
 
-    for (const Symbol::Index index : parse_table.mergeable_symbols) {
-      add_out_of_context_parse_state(&error_state, Symbol(index, true));
+    for (const Symbol symbol : parse_table.mergeable_symbols) {
+      add_out_of_context_parse_state(&error_state, symbol);
     }
 
     for (const Symbol &symbol : grammar.extra_tokens) {
-      if (!error_state.terminal_entries.count(symbol.index)) {
-        error_state.terminal_entries[symbol.index].actions.push_back(ParseAction::ShiftExtra());
+      if (!error_state.terminal_entries.count(symbol)) {
+        error_state.terminal_entries[symbol].actions.push_back(ParseAction::ShiftExtra());
       }
     }
 
-    for (size_t i = 0; i < grammar.variables.size(); i++) {
-      add_out_of_context_parse_state(&error_state, Symbol(i, false));
+    for (size_t i = 0; i < grammar.external_tokens.size(); i++) {
+      add_out_of_context_parse_state(&error_state, Symbol(i, Symbol::External));
     }
 
-    error_state.terminal_entries[END_OF_INPUT().index].actions.push_back(ParseAction::Recover(0));
+    for (size_t i = 0; i < grammar.variables.size(); i++) {
+      add_out_of_context_parse_state(&error_state, Symbol(i, Symbol::NonTerminal));
+    }
+
+    error_state.terminal_entries[END_OF_INPUT()].actions.push_back(ParseAction::Recover(0));
     parse_table.states[0] = error_state;
   }
 
@@ -130,10 +137,10 @@ class ParseTableBuilder {
     const ParseItemSet &item_set = recovery_states[symbol];
     if (!item_set.entries.empty()) {
       ParseStateId state = add_parse_state(item_set);
-      if (symbol.is_token) {
-        error_state->terminal_entries[symbol.index].actions.assign({ ParseAction::Recover(state) });
-      } else {
+      if (symbol.is_non_terminal()) {
         error_state->nonterminal_entries[symbol.index] = state;
+      } else {
+        error_state->terminal_entries[symbol].actions.assign({ ParseAction::Recover(state) });
       }
     }
   }
@@ -152,9 +159,9 @@ class ParseTableBuilder {
   }
 
   string add_actions(const ParseItemSet &item_set, ParseStateId state_id) {
-    map<Symbol::Index, ParseItemSet> terminal_successors;
+    map<Symbol, ParseItemSet> terminal_successors;
     map<Symbol::Index, ParseItemSet> nonterminal_successors;
-    set<Symbol::Index> lookaheads_with_conflicts;
+    set<Symbol> lookaheads_with_conflicts;
 
     for (const auto &pair : item_set.entries) {
       const ParseItem &item = pair.first;
@@ -168,7 +175,7 @@ class ParseTableBuilder {
           ParseAction::Reduce(item.lhs(), item.step_index, *item.production);
 
         int precedence = item.precedence();
-        for (const Symbol::Index lookahead : *lookahead_symbols.entries) {
+        for (Symbol lookahead : *lookahead_symbols.entries) {
           ParseTableEntry &entry = parse_table.states[state_id].terminal_entries[lookahead];
 
           // Only add the highest-precedence Reduce actions to the parse table.
@@ -203,10 +210,10 @@ class ParseTableBuilder {
         Symbol symbol = item.production->at(item.step_index).symbol;
         ParseItem new_item(item.lhs(), *item.production, item.step_index + 1);
 
-        if (symbol.is_token) {
-          terminal_successors[symbol.index].entries[new_item] = lookahead_symbols;
-        } else {
+        if (symbol.is_non_terminal()) {
           nonterminal_successors[symbol.index].entries[new_item] = lookahead_symbols;
+        } else {
+          terminal_successors[symbol].entries[new_item] = lookahead_symbols;
         }
       }
     }
@@ -214,7 +221,7 @@ class ParseTableBuilder {
     // Add a Shift action for each possible successor state. Shift actions for
     // terminal lookaheads can conflict with Reduce actions added previously.
     for (auto &pair : terminal_successors) {
-      Symbol::Index lookahead = pair.first;
+      Symbol lookahead = pair.first;
       ParseItemSet &next_item_set = pair.second;
       ParseStateId next_state_id = add_parse_state(next_item_set);
       ParseState &state = parse_table.states[state_id];
@@ -223,7 +230,7 @@ class ParseTableBuilder {
       if (!allow_any_conflict) {
         if (had_existing_action)
           lookaheads_with_conflicts.insert(lookahead);
-        recovery_states[Symbol(lookahead, true)].add(next_item_set);
+        recovery_states[lookahead].add(next_item_set);
       }
     }
 
@@ -234,10 +241,10 @@ class ParseTableBuilder {
       ParseStateId next_state = add_parse_state(next_item_set);
       parse_table.set_nonterminal_action(state_id, lookahead, next_state);
       if (!allow_any_conflict)
-        recovery_states[Symbol(lookahead, false)].add(next_item_set);
+        recovery_states[Symbol(lookahead, Symbol::NonTerminal)].add(next_item_set);
     }
 
-    for (Symbol::Index lookahead : lookaheads_with_conflicts) {
+    for (Symbol lookahead : lookaheads_with_conflicts) {
       string conflict = handle_conflict(item_set, state_id, lookahead);
       if (!conflict.empty()) return conflict;
     }
@@ -245,9 +252,9 @@ class ParseTableBuilder {
     ParseAction shift_extra = ParseAction::ShiftExtra();
     ParseState &state = parse_table.states[state_id];
     for (const Symbol &extra_symbol : grammar.extra_tokens) {
-      if (!state.terminal_entries.count(extra_symbol.index) ||
+      if (!state.terminal_entries.count(extra_symbol) ||
           state.has_shift_action() || allow_any_conflict) {
-        parse_table.add_terminal_action(state_id, extra_symbol.index, shift_extra);
+        parse_table.add_terminal_action(state_id, extra_symbol, shift_extra);
       }
     }
 
@@ -257,7 +264,6 @@ class ParseTableBuilder {
   void mark_fragile_actions() {
     for (ParseState &state : parse_table.states) {
       for (auto &entry : state.terminal_entries) {
-        const Symbol symbol(entry.first, true);
         auto &actions = entry.second.actions;
 
         for (ParseAction &action : actions) {
@@ -359,7 +365,7 @@ class ParseTableBuilder {
   }
 
   string handle_conflict(const ParseItemSet &item_set, ParseStateId state_id,
-                         Symbol::Index lookahead) {
+                         Symbol lookahead) {
     ParseTableEntry &entry = parse_table.states[state_id].terminal_entries[lookahead];
     int reduction_precedence = entry.actions.front().precedence();
     set<ParseItem> shift_items;
@@ -468,7 +474,7 @@ class ParseTableBuilder {
       description += "  " + symbol_name(earliest_starting_item.production->at(i).symbol);
     }
 
-    description += "  \u2022  " + symbol_name(Symbol(lookahead, true)) + "  \u2026";
+    description += "  \u2022  " + symbol_name(lookahead) + "  \u2026";
     description += "\n\n";
 
     description += "Possible interpretations:\n\n";
@@ -487,7 +493,7 @@ class ParseTableBuilder {
           description += "  " + symbol_name(step.symbol);
         }
         description += ")";
-        description += "  \u2022  " + symbol_name(Symbol(lookahead, true)) + "  \u2026";
+        description += "  \u2022  " + symbol_name(lookahead) + "  \u2026";
         description += "\n";
       }
     }
@@ -564,14 +570,23 @@ class ParseTableBuilder {
         return "END_OF_INPUT";
       else
         return "";
-    } else if (symbol.is_token) {
-      const Variable &variable = lexical_grammar.variables[symbol.index];
-      if (variable.type == VariableTypeNamed)
-        return variable.name;
-      else
-        return "'" + variable.name + "'";
-    } else {
-      return grammar.variables[symbol.index].name;
+    }
+
+    switch (symbol.type) {
+      case Symbol::Terminal: {
+        const Variable &variable = lexical_grammar.variables[symbol.index];
+        if (variable.type == VariableTypeNamed)
+          return variable.name;
+        else
+          return "'" + variable.name + "'";
+      }
+      case Symbol::NonTerminal: {
+        return grammar.variables[symbol.index].name;
+      }
+      case Symbol::External:
+      default: {
+        return grammar.external_tokens[symbol.index].name;
+      }
     }
   }
 
diff --git a/src/compiler/build_tables/lookahead_set.cc b/src/compiler/build_tables/lookahead_set.cc
index 1ecb0baf..239bc029 100644
--- a/src/compiler/build_tables/lookahead_set.cc
+++ b/src/compiler/build_tables/lookahead_set.cc
@@ -12,8 +12,8 @@ using rules::Symbol;
 
 LookaheadSet::LookaheadSet() : entries(nullptr) {}
 
-LookaheadSet::LookaheadSet(const set<Symbol::Index> &symbols)
-    : entries(make_shared<set<Symbol::Index>>(symbols)) {}
+LookaheadSet::LookaheadSet(const set<Symbol> &symbols)
+    : entries(make_shared<set<Symbol>>(symbols)) {}
 
 bool LookaheadSet::empty() const {
   return !entries.get() || entries->empty();
@@ -23,7 +23,7 @@ bool LookaheadSet::operator==(const LookaheadSet &other) const {
   return *entries == *other.entries;
 }
 
-bool LookaheadSet::contains(const Symbol::Index &symbol) const {
+bool LookaheadSet::contains(const Symbol &symbol) const {
   return entries->find(symbol) != entries->end();
 }
 
@@ -31,15 +31,15 @@ bool LookaheadSet::insert_all(const LookaheadSet &other) {
   if (!other.entries.get())
     return false;
   if (!entries.get())
-    entries = make_shared<set<Symbol::Index>>();
+    entries = make_shared<set<Symbol>>();
   size_t previous_size = entries->size();
   entries->insert(other.entries->begin(), other.entries->end());
   return entries->size() > previous_size;
 }
 
-bool LookaheadSet::insert(const Symbol::Index &symbol) {
+bool LookaheadSet::insert(const Symbol &symbol) {
   if (!entries.get())
-    entries = make_shared<set<Symbol::Index>>();
+    entries = make_shared<set<Symbol>>();
   return entries->insert(symbol).second;
 }
 
diff --git a/src/compiler/build_tables/lookahead_set.h b/src/compiler/build_tables/lookahead_set.h
index fe99b4d5..e62ee34d 100644
--- a/src/compiler/build_tables/lookahead_set.h
+++ b/src/compiler/build_tables/lookahead_set.h
@@ -11,15 +11,15 @@ namespace build_tables {
 class LookaheadSet {
  public:
   LookaheadSet();
-  explicit LookaheadSet(const std::set<rules::Symbol::Index> &);
+  explicit LookaheadSet(const std::set<rules::Symbol> &);
 
   bool empty() const;
   bool operator==(const LookaheadSet &) const;
-  bool contains(const rules::Symbol::Index &) const;
+  bool contains(const rules::Symbol &) const;
   bool insert_all(const LookaheadSet &);
-  bool insert(const rules::Symbol::Index &);
+  bool insert(const rules::Symbol &);
 
-  std::shared_ptr<std::set<rules::Symbol::Index>> entries;
+  std::shared_ptr<std::set<rules::Symbol>> entries;
 };
 
 }  // namespace build_tables
diff --git a/src/compiler/build_tables/parse_item.cc b/src/compiler/build_tables/parse_item.cc
index 39b131cb..b9c3831b 100644
--- a/src/compiler/build_tables/parse_item.cc
+++ b/src/compiler/build_tables/parse_item.cc
@@ -41,7 +41,7 @@ bool ParseItem::operator<(const ParseItem &other) const {
 }
 
 Symbol ParseItem::lhs() const {
-  return Symbol(variable_index);
+  return Symbol(variable_index, Symbol::NonTerminal);
 }
 
 bool ParseItem::is_done() const {
@@ -105,38 +105,6 @@ size_t ParseItemSet::unfinished_item_signature() const {
   return result;
 }
 
-ParseItemSet::ActionMap ParseItemSet::actions() const {
-  ParseItemSet::ActionMap result;
-
-  for (const auto &pair : entries) {
-    const ParseItem &item = pair.first;
-    const LookaheadSet &lookahead_symbols = pair.second;
-
-    if (item.step_index == item.production->size()) {
-      int precedence = item.precedence();
-      for (const Symbol::Index lookahead : *lookahead_symbols.entries) {
-        Action &action = result.terminal_actions[lookahead];
-        if (precedence > action.completion_precedence) {
-          action.completions.assign({ &item });
-        } else if (precedence == action.completion_precedence) {
-          action.completions.push_back({ &item });
-        }
-      }
-    } else {
-      Symbol symbol = item.production->at(item.step_index).symbol;
-      ParseItem new_item(item.lhs(), *item.production, item.step_index + 1);
-
-      if (symbol.is_token) {
-        result.terminal_actions[symbol.index].continuation.entries[new_item] = lookahead_symbols;
-      } else {
-        result.nonterminal_continuations[symbol.index].entries[new_item] = lookahead_symbols;
-      }
-    }
-  }
-
-  return result;
-}
-
 void ParseItemSet::add(const ParseItemSet &other) {
   for (const auto &pair : other.entries)
     entries[pair.first].insert_all(pair.second);
diff --git a/src/compiler/build_tables/parse_item.h b/src/compiler/build_tables/parse_item.h
index a091ac9d..a3785638 100644
--- a/src/compiler/build_tables/parse_item.h
+++ b/src/compiler/build_tables/parse_item.h
@@ -41,16 +41,6 @@ class ParseItemSet {
   ParseItemSet();
   explicit ParseItemSet(const std::map<ParseItem, LookaheadSet> &);
 
-  struct Completion;
-  struct Action;
-
-  struct ActionMap {
-    std::map<rules::Symbol::Index, Action> terminal_actions;
-    std::map<rules::Symbol::Index, ParseItemSet> nonterminal_continuations;
-  };
-
-  ActionMap actions() const;
-
   bool operator==(const ParseItemSet &) const;
   void add(const ParseItemSet &);
   size_t unfinished_item_signature() const;
@@ -58,22 +48,6 @@ class ParseItemSet {
   std::map<ParseItem, LookaheadSet> entries;
 };
 
-struct ParseItemSet::Completion {
-  const ParseItem *item;
-  int precedence;
-  rules::Associativity associativity;
-
-  bool operator<(const ParseItemSet::Completion &other) {
-    return precedence < other.precedence;
-  }
-};
-
-struct ParseItemSet::Action {
-  ParseItemSet continuation;
-  std::vector<const ParseItem *> completions;
-  int completion_precedence;
-};
-
 }  // namespace build_tables
 }  // namespace tree_sitter
 
diff --git a/src/compiler/build_tables/parse_item_set_builder.cc b/src/compiler/build_tables/parse_item_set_builder.cc
index 34b347fe..0a2039d3 100644
--- a/src/compiler/build_tables/parse_item_set_builder.cc
+++ b/src/compiler/build_tables/parse_item_set_builder.cc
@@ -27,12 +27,17 @@ ParseItemSetBuilder::ParseItemSetBuilder(const SyntaxGrammar &grammar,
   set<Symbol::Index> processed_non_terminals;
 
   for (size_t i = 0, n = lexical_grammar.variables.size(); i < n; i++) {
-    Symbol symbol(i, true);
-    first_sets.insert({symbol, LookaheadSet({ static_cast<Symbol::Index>(i) })});
+    Symbol symbol(i, Symbol::Terminal);
+    first_sets.insert({symbol, LookaheadSet({ symbol })});
+  }
+
+  for (size_t i = 0, n = grammar.external_tokens.size(); i < n; i++) {
+    Symbol symbol(i, Symbol::External);
+    first_sets.insert({symbol, LookaheadSet({ symbol })});
   }
 
   for (size_t i = 0, n = grammar.variables.size(); i < n; i++) {
-    Symbol symbol(i);
+    Symbol symbol(i, Symbol::NonTerminal);
     LookaheadSet first_set;
 
     processed_non_terminals.clear();
@@ -42,10 +47,10 @@ ParseItemSetBuilder::ParseItemSetBuilder(const SyntaxGrammar &grammar,
       Symbol current_symbol = symbols_to_process.back();
       symbols_to_process.pop_back();
 
-      if (current_symbol.is_token) {
-        first_set.insert(current_symbol.index);
+      if (!current_symbol.is_non_terminal()) {
+        first_set.insert(current_symbol);
       } else if (processed_non_terminals.insert(current_symbol.index).second) {
-        for (const Production &production : grammar.productions(current_symbol)) {
+        for (const Production &production : grammar.variables[current_symbol.index].productions) {
           if (!production.empty()) {
             symbols_to_process.push_back(production[0].symbol);
           }
@@ -59,11 +64,11 @@ ParseItemSetBuilder::ParseItemSetBuilder(const SyntaxGrammar &grammar,
   vector<ParseItemSetComponent> components_to_process;
 
   for (size_t i = 0, n = grammar.variables.size(); i < n; i++) {
-    Symbol symbol(i);
+    Symbol symbol(i, Symbol::NonTerminal);
     map<ParseItem, pair<LookaheadSet, bool>> cache_entry;
 
     components_to_process.clear();
-    for (const Production &production : grammar.productions(symbol)) {
+    for (const Production &production : grammar.variables[i].productions) {
       components_to_process.push_back(ParseItemSetComponent{
         ParseItem(symbol, production, 0),
         LookaheadSet(),
@@ -87,7 +92,7 @@ ParseItemSetBuilder::ParseItemSetBuilder(const SyntaxGrammar &grammar,
 
       if (component_is_new) {
         Symbol next_symbol = item.next_symbol();
-        if (next_symbol.is_built_in() || next_symbol.is_token)
+        if (!next_symbol.is_non_terminal() || next_symbol.is_built_in())
           continue;
 
         LookaheadSet next_lookaheads;
@@ -102,7 +107,7 @@ ParseItemSetBuilder::ParseItemSetBuilder(const SyntaxGrammar &grammar,
           propagates_lookaheads = false;
         }
 
-        for (const Production &production : grammar.productions(next_symbol)) {
+        for (const Production &production : grammar.variables[next_symbol.index].productions) {
           components_to_process.push_back(ParseItemSetComponent{
             ParseItem(next_symbol, production, 0),
             next_lookaheads,
@@ -130,7 +135,7 @@ void ParseItemSetBuilder::apply_transitive_closure(ParseItemSet *item_set) {
     const LookaheadSet &lookaheads = pair.second;
 
     const Symbol &next_symbol = item.next_symbol();
-    if (!next_symbol.is_token && !next_symbol.is_built_in()) {
+    if (next_symbol.is_non_terminal() && !next_symbol.is_built_in()) {
       LookaheadSet next_lookaheads;
       size_t next_step = item.step_index + 1;
       if (next_step == item.production->size()) {
diff --git a/src/compiler/build_tables/recovery_tokens.cc b/src/compiler/build_tables/recovery_tokens.cc
index 479de6b8..84b175bc 100644
--- a/src/compiler/build_tables/recovery_tokens.cc
+++ b/src/compiler/build_tables/recovery_tokens.cc
@@ -47,8 +47,8 @@ class FirstCharacters : public CharacterAggregator<true, false> {};
 class LastCharacters : public CharacterAggregator<false, true> {};
 class AllCharacters : public CharacterAggregator<true, true> {};
 
-set<Symbol::Index> recovery_tokens(const LexicalGrammar &grammar) {
-  set<Symbol::Index> result;
+set<Symbol> recovery_tokens(const LexicalGrammar &grammar) {
+  set<Symbol> result;
 
   AllCharacters all_separator_characters;
   for (const rule_ptr &separator : grammar.separators)
@@ -79,7 +79,7 @@ set<Symbol::Index> recovery_tokens(const LexicalGrammar &grammar) {
       !all_characters.result.intersects(all_separator_characters.result);
 
     if ((has_distinct_start && has_distinct_end) || has_no_separators)
-      result.insert(i);
+      result.insert(Symbol(i, Symbol::Terminal));
   }
 
   return result;
diff --git a/src/compiler/build_tables/recovery_tokens.h b/src/compiler/build_tables/recovery_tokens.h
index 4873b5a9..c97a8cfd 100644
--- a/src/compiler/build_tables/recovery_tokens.h
+++ b/src/compiler/build_tables/recovery_tokens.h
@@ -11,7 +11,7 @@ struct LexicalGrammar;
 
 namespace build_tables {
 
-std::set<rules::Symbol::Index> recovery_tokens(const LexicalGrammar &);
+std::set<rules::Symbol> recovery_tokens(const LexicalGrammar &);
 
 }  // namespace build_tables
 }  // namespace tree_sitter
diff --git a/src/compiler/generate_code/c_code.cc b/src/compiler/generate_code/c_code.cc
index b7058603..bc84e557 100644
--- a/src/compiler/generate_code/c_code.cc
+++ b/src/compiler/generate_code/c_code.cc
@@ -11,9 +11,11 @@
 #include "compiler/lexical_grammar.h"
 #include "compiler/rules/built_in_symbols.h"
 #include "compiler/util/string_helpers.h"
+#include "tree_sitter/runtime.h"
 
 namespace tree_sitter {
 namespace generate_code {
+
 using std::function;
 using std::map;
 using std::pair;
@@ -22,6 +24,7 @@ using std::string;
 using std::to_string;
 using std::vector;
 using util::escape_char;
+using rules::Symbol;
 
 static Variable EOF_ENTRY("end", VariableTypeNamed, rule_ptr());
 
@@ -73,9 +76,8 @@ class CCodeGenerator {
   const LexicalGrammar lexical_grammar;
   map<string, string> sanitized_names;
   vector<pair<size_t, ParseTableEntry>> parse_table_entries;
-  vector<pair<size_t, set<rules::Symbol>>> in_progress_symbols;
+  vector<set<Symbol::Index>> external_scanner_states;
   size_t next_parse_action_list_index;
-  size_t next_in_progress_symbol_list_index;
 
  public:
   CCodeGenerator(string name, const ParseTable &parse_table,
@@ -87,19 +89,26 @@ class CCodeGenerator {
         lex_table(lex_table),
         syntax_grammar(syntax_grammar),
         lexical_grammar(lexical_grammar),
-        next_parse_action_list_index(0),
-        next_in_progress_symbol_list_index(0) {}
+        next_parse_action_list_index(0) {}
 
   string code() {
     buffer = "";
 
     add_includes();
-    add_state_and_symbol_counts();
+    add_warning_pragma();
+    add_stats();
     add_symbol_enum();
     add_symbol_names_list();
-    add_symbol_node_types_list();
+    add_symbol_metadata_list();
     add_lex_function();
-    add_lex_states_list();
+    add_lex_modes_list();
+
+    if (!syntax_grammar.external_tokens.empty()) {
+      add_external_token_enum();
+      add_external_scanner_symbol_map();
+      add_external_scanner_states_list();
+    }
+
     add_parse_table();
     add_parser_export();
 
@@ -112,10 +121,25 @@ class CCodeGenerator {
     line();
   }
 
-  void add_state_and_symbol_counts() {
+  void add_warning_pragma() {
+    line("#pragma GCC diagnostic push");
+    line("#pragma GCC diagnostic ignored \"-Wmissing-field-initializers\"");
+    line();
+  }
+
+  void add_stats() {
+    size_t token_count = 1 + lexical_grammar.variables.size();
+    for (const ExternalToken &external_token : syntax_grammar.external_tokens) {
+      if (external_token.corresponding_internal_token == rules::NONE()) {
+        token_count++;
+      }
+    }
+
+    line("#define LANGUAGE_VERSION " + to_string(TREE_SITTER_LANGUAGE_VERSION));
     line("#define STATE_COUNT " + to_string(parse_table.states.size()));
     line("#define SYMBOL_COUNT " + to_string(parse_table.symbols.size()));
-    line("#define TOKEN_COUNT " + to_string(lexical_grammar.variables.size() + 1));
+    line("#define TOKEN_COUNT " + to_string(token_count));
+    line("#define EXTERNAL_TOKEN_COUNT " + to_string(syntax_grammar.external_tokens.size()));
     line();
   }
 
@@ -124,7 +148,7 @@ class CCodeGenerator {
     indent([&]() {
       size_t i = 1;
       for (const auto &entry : parse_table.symbols) {
-        const rules::Symbol &symbol = entry.first;
+        const Symbol &symbol = entry.first;
         if (!symbol.is_built_in()) {
           line(symbol_id(symbol) + " = " + to_string(i) + ",");
           i++;
@@ -146,11 +170,11 @@ class CCodeGenerator {
     line();
   }
 
-  void add_symbol_node_types_list() {
+  void add_symbol_metadata_list() {
     line("static const TSSymbolMetadata ts_symbol_metadata[SYMBOL_COUNT] = {");
     indent([&]() {
       for (const auto &entry : parse_table.symbols) {
-        const rules::Symbol &symbol = entry.first;
+        const Symbol &symbol = entry.first;
         line("[" + symbol_id(symbol) + "] = {");
         indent([&]() {
           switch (symbol_type(symbol)) {
@@ -198,13 +222,102 @@ class CCodeGenerator {
     line();
   }
 
-  void add_lex_states_list() {
-    line("static TSStateId ts_lex_states[STATE_COUNT] = {");
+  void add_lex_modes_list() {
+    add_external_scanner_state({});
+
+    map<Symbol::Index, Symbol::Index> external_tokens_by_corresponding_internal_token;
+    for (size_t i = 0, n = lexical_grammar.variables.size(); i < n; i++) {
+      for (size_t j = 0; j < syntax_grammar.external_tokens.size(); j++) {
+        const ExternalToken &external_token = syntax_grammar.external_tokens[j];
+        if (external_token.corresponding_internal_token.index == Symbol::Index(i)) {
+          external_tokens_by_corresponding_internal_token.insert({i, j});
+          break;
+        }
+      }
+    }
+
+    line("static TSLexMode ts_lex_modes[STATE_COUNT] = {");
     indent([&]() {
       size_t state_id = 0;
-      for (const auto &state : parse_table.states)
-        line("[" + to_string(state_id++) + "] = " +
-             to_string(state.lex_state_id) + ",");
+
+      for (const auto &state : parse_table.states) {
+        line("[" + to_string(state_id++) + "] = {.lex_state = ");
+        add(to_string(state.lex_state_id));
+
+        bool needs_external_scanner = false;
+        set<Symbol::Index> external_token_indices;
+        for (const auto &pair : state.terminal_entries) {
+          Symbol symbol = pair.first;
+          if (symbol.is_external()) {
+            needs_external_scanner = true;
+            external_token_indices.insert(symbol.index);
+          } else if (symbol.is_token()) {
+            auto corresponding_external_token =
+              external_tokens_by_corresponding_internal_token.find(symbol.index);
+            if (corresponding_external_token != external_tokens_by_corresponding_internal_token.end()) {
+              external_token_indices.insert(corresponding_external_token->second);
+            }
+          }
+        }
+
+        if (needs_external_scanner) {
+          add(", .external_lex_state = " + add_external_scanner_state(external_token_indices));
+        }
+
+        add("},");
+      }
+    });
+    line("};");
+    line();
+  }
+
+  string add_external_scanner_state(set<Symbol::Index> external_token_ids) {
+    for (size_t i = 0, n = external_scanner_states.size(); i < n; i++)
+      if (external_scanner_states[i] == external_token_ids)
+        return to_string(i);
+    external_scanner_states.push_back(external_token_ids);
+    return to_string(external_scanner_states.size() - 1);
+  }
+
+  void add_external_token_enum() {
+    line("enum {");
+    indent([&]() {
+      for (size_t i = 0; i < syntax_grammar.external_tokens.size(); i++)
+        line(external_token_id(i) + ",");
+    });
+    line("};");
+    line();
+  }
+
+  void add_external_scanner_symbol_map() {
+    line("TSSymbol ts_external_scanner_symbol_map[EXTERNAL_TOKEN_COUNT] = {");
+    indent([&]() {
+      for (size_t i = 0; i < syntax_grammar.external_tokens.size(); i++) {
+        line("[" + external_token_id(i) + "] = " + symbol_id(Symbol(i, Symbol::External)) + ",");
+      }
+    });
+    line("};");
+    line();
+  }
+
+  void add_external_scanner_states_list() {
+    line("static bool ts_external_scanner_states[");
+    add(to_string(external_scanner_states.size()));
+    add("][EXTERNAL_TOKEN_COUNT] = {");
+    indent([&]() {
+      size_t i = 0;
+      for (const auto &valid_external_lookaheads : external_scanner_states) {
+        if (!valid_external_lookaheads.empty()) {
+          line("[" + to_string(i) + "] = {");
+          indent([&]() {
+            for (Symbol::Index id : valid_external_lookaheads) {
+              line("[" + external_token_id(id) + "] = true,");
+            }
+          });
+          line("},");
+        }
+        i++;
+      }
     });
     line("};");
     line();
@@ -214,9 +327,6 @@ class CCodeGenerator {
     add_parse_action_list_id(ParseTableEntry{ {}, false, false });
 
     size_t state_id = 0;
-    line("#pragma GCC diagnostic push");
-    line("#pragma GCC diagnostic ignored \"-Wmissing-field-initializers\"");
-    line();
     line("static unsigned short ts_parse_table[STATE_COUNT][SYMBOL_COUNT] = {");
 
     indent([&]() {
@@ -224,12 +334,12 @@ class CCodeGenerator {
         line("[" + to_string(state_id++) + "] = {");
         indent([&]() {
           for (const auto &entry : state.nonterminal_entries) {
-            line("[" + symbol_id(rules::Symbol(entry.first)) + "] = STATE(");
+            line("[" + symbol_id(Symbol(entry.first, Symbol::NonTerminal)) + "] = STATE(");
             add(to_string(entry.second));
             add("),");
           }
           for (const auto &entry : state.terminal_entries) {
-            line("[" + symbol_id(rules::Symbol(entry.first, true)) + "] = ACTIONS(");
+            line("[" + symbol_id(entry.first) + "] = ACTIONS(");
             add(to_string(add_parse_action_list_id(entry.second)));
             add("),");
           }
@@ -242,12 +352,42 @@ class CCodeGenerator {
     line();
     add_parse_action_list();
     line();
-    line("#pragma GCC diagnostic pop");
-    line();
   }
 
   void add_parser_export() {
-    line("EXPORT_LANGUAGE(ts_language_" + name + ");");
+    string language_function_name = "tree_sitter_" + name;
+    string external_scanner_name = language_function_name + "_external_scanner";
+
+    if (!syntax_grammar.external_tokens.empty()) {
+      line("void *" + external_scanner_name + "_create();");
+      line("void " + external_scanner_name + "_destroy();");
+      line("void " + external_scanner_name + "_reset(void *);");
+      line("bool " + external_scanner_name + "_scan(void *, TSLexer *, const bool *);");
+      line("bool " + external_scanner_name + "_serialize(void *, TSExternalTokenState);");
+      line("void " + external_scanner_name + "_deserialize(void *, const TSExternalTokenState);");
+      line();
+    }
+
+    line("const TSLanguage *" + language_function_name + "() {");
+    indent([&]() {
+      line("GET_LANGUAGE(");
+      if (syntax_grammar.external_tokens.empty()) {
+        add(");");
+      } else {
+        indent([&]() {
+          line("(const bool *)ts_external_scanner_states,");
+          line("ts_external_scanner_symbol_map,");
+          line(external_scanner_name + "_create,");
+          line(external_scanner_name + "_destroy,");
+          line(external_scanner_name + "_reset,");
+          line(external_scanner_name + "_scan,");
+          line(external_scanner_name + "_serialize,");
+          line(external_scanner_name + "_deserialize,");
+        });
+        line(");");
+      }
+    });
+    line("}");
     line();
   }
 
@@ -379,22 +519,13 @@ class CCodeGenerator {
     return result;
   }
 
-  size_t add_in_progress_symbol_list_id(const set<rules::Symbol> &symbols) {
-    for (const auto &pair : in_progress_symbols) {
-      if (pair.second == symbols) {
-        return pair.first;
-      }
-    }
-
-    size_t result = next_in_progress_symbol_list_index;
-    in_progress_symbols.push_back({ result, symbols });
-    next_in_progress_symbol_list_index += 1 + symbols.size();
-    return result;
-  }
-
   // Helper functions
 
-  string symbol_id(const rules::Symbol &symbol) {
+  string external_token_id(Symbol::Index index) {
+    return "ts_external_token_" + syntax_grammar.external_tokens[index].name;
+  }
+
+  string symbol_id(const Symbol &symbol) {
     if (symbol == rules::END_OF_INPUT())
       return "ts_builtin_sym_end";
 
@@ -411,25 +542,33 @@ class CCodeGenerator {
     }
   }
 
-  string symbol_name(const rules::Symbol &symbol) {
+  string symbol_name(const Symbol &symbol) {
     if (symbol == rules::END_OF_INPUT())
       return "END";
     return entry_for_symbol(symbol).first;
   }
 
-  VariableType symbol_type(const rules::Symbol &symbol) {
+  VariableType symbol_type(const Symbol &symbol) {
     if (symbol == rules::END_OF_INPUT())
       return VariableTypeHidden;
     return entry_for_symbol(symbol).second;
   }
 
-  pair<string, VariableType> entry_for_symbol(const rules::Symbol &symbol) {
-    if (symbol.is_token) {
-      const Variable &variable = lexical_grammar.variables[symbol.index];
-      return { variable.name, variable.type };
-    } else {
-      const SyntaxVariable &variable = syntax_grammar.variables[symbol.index];
-      return { variable.name, variable.type };
+  pair<string, VariableType> entry_for_symbol(const Symbol &symbol) {
+    switch (symbol.type) {
+      case Symbol::NonTerminal: {
+        const SyntaxVariable &variable = syntax_grammar.variables[symbol.index];
+        return { variable.name, variable.type };
+      }
+      case Symbol::Terminal: {
+        const Variable &variable = lexical_grammar.variables[symbol.index];
+        return { variable.name, variable.type };
+      }
+      case Symbol::External:
+      default: {
+        const ExternalToken &token = syntax_grammar.external_tokens[symbol.index];
+        return { token.name, token.type };
+      }
     }
   }
 
diff --git a/src/compiler/grammar.h b/src/compiler/grammar.h
index a8955c02..0a07280c 100644
--- a/src/compiler/grammar.h
+++ b/src/compiler/grammar.h
@@ -12,6 +12,7 @@ struct Grammar {
   std::vector<std::pair<std::string, rule_ptr>> rules;
   std::vector<rule_ptr> extra_tokens;
   std::vector<std::vector<std::string>> expected_conflicts;
+  std::vector<std::string> external_tokens;
 };
 
 }  // namespace tree_sitter
diff --git a/src/compiler/parse_grammar.cc b/src/compiler/parse_grammar.cc
index 185d919b..327c0f31 100644
--- a/src/compiler/parse_grammar.cc
+++ b/src/compiler/parse_grammar.cc
@@ -210,7 +210,7 @@ ParseGrammarResult parse_grammar(const string &input) {
   string error_message;
   string name;
   Grammar grammar;
-  json_value name_json, rules_json, extras_json, conflicts_json;
+  json_value name_json, rules_json, extras_json, conflicts_json, external_tokens_json;
 
   json_settings settings = { 0, json_enable_comments, 0, 0, 0, 0 };
   char parse_error[json_error_max];
@@ -302,6 +302,25 @@ ParseGrammarResult parse_grammar(const string &input) {
     }
   }
 
+  external_tokens_json = grammar_json->operator[]("externals");
+  if (external_tokens_json.type != json_none) {
+    if (external_tokens_json.type != json_array) {
+      error_message = "External tokens must be an array";
+      goto error;
+    }
+
+    for (size_t i = 0, length = external_tokens_json.u.array.length; i < length; i++) {
+      json_value *token_name_json = external_tokens_json.u.array.values[i];
+      if (token_name_json->type != json_string) {
+        error_message = "External token values must be strings";
+        goto error;
+      }
+
+      string token_name = token_name_json->u.string.ptr;
+      grammar.external_tokens.push_back(token_name);
+    }
+  }
+
   json_value_free(grammar_json);
   return { name, grammar, "" };
 
diff --git a/src/compiler/parse_table.cc b/src/compiler/parse_table.cc
index e6e4badd..a04eec8c 100644
--- a/src/compiler/parse_table.cc
+++ b/src/compiler/parse_table.cc
@@ -1,6 +1,7 @@
 #include "compiler/parse_table.h"
 #include <string>
 #include "compiler/precedence_range.h"
+#include "compiler/rules/built_in_symbols.h"
 
 namespace tree_sitter {
 
@@ -28,7 +29,7 @@ ParseAction::ParseAction()
       extra(false),
       fragile(false),
       state_index(-1),
-      symbol(Symbol(-1)),
+      symbol(rules::NONE()),
       consumed_symbol_count(0),
       production(nullptr) {}
 
@@ -43,11 +44,11 @@ ParseAction ParseAction::Accept() {
 }
 
 ParseAction ParseAction::Shift(ParseStateId state_index) {
-  return ParseAction(ParseActionTypeShift, state_index, Symbol(-1), 0, nullptr);
+  return ParseAction(ParseActionTypeShift, state_index, rules::NONE(), 0, nullptr);
 }
 
 ParseAction ParseAction::Recover(ParseStateId state_index) {
-  return ParseAction(ParseActionTypeRecover, state_index, Symbol(-1), 0,
+  return ParseAction(ParseActionTypeRecover, state_index, rules::NONE(), 0,
                      nullptr);
 }
 
@@ -150,9 +151,7 @@ bool ParseState::has_shift_action() const {
 set<Symbol> ParseState::expected_inputs() const {
   set<Symbol> result;
   for (auto &entry : terminal_entries)
-    result.insert(Symbol(entry.first, true));
-  for (auto &entry : nonterminal_entries)
-    result.insert(Symbol(entry.first, false));
+    result.insert(entry.first);
   return result;
 }
 
@@ -182,33 +181,24 @@ ParseStateId ParseTable::add_state() {
   return states.size() - 1;
 }
 
-ParseAction &ParseTable::set_terminal_action(ParseStateId state_id,
-                                             Symbol::Index index,
-                                             ParseAction action) {
-  states[state_id].terminal_entries[index].actions.clear();
-  return add_terminal_action(state_id, index, action);
-}
-
 ParseAction &ParseTable::add_terminal_action(ParseStateId state_id,
-                                             Symbol::Index index,
+                                             Symbol lookahead,
                                              ParseAction action) {
-  Symbol symbol(index, true);
   if (action.type == ParseActionTypeShift && action.extra)
-    symbols[symbol].extra = true;
+    symbols[lookahead].extra = true;
   else
-    symbols[symbol].structural = true;
+    symbols[lookahead].structural = true;
 
-  ParseTableEntry &entry = states[state_id].terminal_entries[index];
+  ParseTableEntry &entry = states[state_id].terminal_entries[lookahead];
   entry.actions.push_back(action);
   return *entry.actions.rbegin();
 }
 
 void ParseTable::set_nonterminal_action(ParseStateId state_id,
-                                        Symbol::Index index,
+                                        Symbol::Index lookahead,
                                         ParseStateId next_state_id) {
-  Symbol symbol(index, false);
-  symbols[symbol].structural = true;
-  states[state_id].nonterminal_entries[index] = next_state_id;
+  symbols[Symbol(lookahead, Symbol::NonTerminal)].structural = true;
+  states[state_id].nonterminal_entries[lookahead] = next_state_id;
 }
 
 static bool has_entry(const ParseState &state, const ParseTableEntry &entry) {
@@ -226,12 +216,12 @@ bool ParseTable::merge_state(size_t i, size_t j) {
     return false;
 
   for (auto &entry : state.terminal_entries) {
-    Symbol::Index index = entry.first;
+    Symbol lookahead = entry.first;
     const vector<ParseAction> &actions = entry.second.actions;
 
-    const auto &other_entry = other.terminal_entries.find(index);
+    const auto &other_entry = other.terminal_entries.find(lookahead);
     if (other_entry == other.terminal_entries.end()) {
-      if (mergeable_symbols.count(index) == 0 && !Symbol::is_built_in(index))
+      if (mergeable_symbols.count(lookahead) == 0 && !lookahead.is_built_in())
         return false;
       if (actions.back().type != ParseActionTypeReduce)
         return false;
@@ -242,25 +232,25 @@ bool ParseTable::merge_state(size_t i, size_t j) {
     }
   }
 
-  set<Symbol::Index> symbols_to_merge;
+  set<Symbol> symbols_to_merge;
 
   for (auto &entry : other.terminal_entries) {
-    Symbol::Index index = entry.first;
+    Symbol lookahead = entry.first;
     const vector<ParseAction> &actions = entry.second.actions;
 
-    if (!state.terminal_entries.count(index)) {
-      if (mergeable_symbols.count(index) == 0 && !Symbol::is_built_in(index))
+    if (!state.terminal_entries.count(lookahead)) {
+      if (mergeable_symbols.count(lookahead) == 0 && !lookahead.is_built_in())
         return false;
       if (actions.back().type != ParseActionTypeReduce)
         return false;
       if (!has_entry(state, entry.second))
         return false;
-      symbols_to_merge.insert(index);
+      symbols_to_merge.insert(lookahead);
     }
   }
 
-  for (const Symbol::Index &index : symbols_to_merge)
-    state.terminal_entries[index] = other.terminal_entries.find(index)->second;
+  for (const Symbol &lookahead : symbols_to_merge)
+    state.terminal_entries[lookahead] = other.terminal_entries.find(lookahead)->second;
 
   return true;
 }
diff --git a/src/compiler/parse_table.h b/src/compiler/parse_table.h
index 59eee4a8..79eec4fc 100644
--- a/src/compiler/parse_table.h
+++ b/src/compiler/parse_table.h
@@ -76,7 +76,7 @@ class ParseState {
   void each_referenced_state(std::function<void(ParseStateId *)>);
   bool has_shift_action() const;
 
-  std::map<rules::Symbol::Index, ParseTableEntry> terminal_entries;
+  std::map<rules::Symbol, ParseTableEntry> terminal_entries;
   std::map<rules::Symbol::Index, ParseStateId> nonterminal_entries;
   LexStateId lex_state_id;
   size_t shift_actions_signature;
@@ -91,15 +91,14 @@ class ParseTable {
  public:
   std::set<rules::Symbol> all_symbols() const;
   ParseStateId add_state();
-  ParseAction &add_terminal_action(ParseStateId state_id, int, ParseAction);
-  ParseAction &set_terminal_action(ParseStateId state_id, int index, ParseAction);
-  void set_nonterminal_action(ParseStateId state_id, int index, ParseStateId);
+  ParseAction &add_terminal_action(ParseStateId state_id, rules::Symbol, ParseAction);
+  void set_nonterminal_action(ParseStateId, rules::Symbol::Index, ParseStateId);
   bool merge_state(size_t i, size_t j);
 
   std::vector<ParseState> states;
   std::map<rules::Symbol, ParseTableSymbolMetadata> symbols;
 
-  std::set<rules::Symbol::Index> mergeable_symbols;
+  std::set<rules::Symbol> mergeable_symbols;
 };
 
 }  // namespace tree_sitter
diff --git a/src/compiler/prepare_grammar/expand_repeats.cc b/src/compiler/prepare_grammar/expand_repeats.cc
index 7963e94b..331c9cea 100644
--- a/src/compiler/prepare_grammar/expand_repeats.cc
+++ b/src/compiler/prepare_grammar/expand_repeats.cc
@@ -39,7 +39,7 @@ class ExpandRepeats : public rules::IdentityRuleFn {
     rule_ptr inner_rule = apply(rule->content);
     size_t index = aux_rules.size();
     string helper_rule_name = rule_name + "_repeat" + to_string(++repeat_count);
-    Symbol repeat_symbol(offset + index);
+    Symbol repeat_symbol(offset + index, Symbol::NonTerminal);
     existing_repeats.push_back({ rule->copy(), repeat_symbol });
     aux_rules.push_back(
       Variable(helper_rule_name, VariableTypeAuxiliary,
@@ -65,6 +65,7 @@ InitialSyntaxGrammar expand_repeats(const InitialSyntaxGrammar &grammar) {
   result.variables = grammar.variables;
   result.extra_tokens = grammar.extra_tokens;
   result.expected_conflicts = grammar.expected_conflicts;
+  result.external_tokens = grammar.external_tokens;
 
   ExpandRepeats expander(result.variables.size());
   for (auto &variable : result.variables)
diff --git a/src/compiler/prepare_grammar/extract_tokens.cc b/src/compiler/prepare_grammar/extract_tokens.cc
index bf7ac514..9d161ca8 100644
--- a/src/compiler/prepare_grammar/extract_tokens.cc
+++ b/src/compiler/prepare_grammar/extract_tokens.cc
@@ -38,7 +38,7 @@ class SymbolReplacer : public rules::IdentityRuleFn {
   map<Symbol, Symbol> replacements;
 
   Symbol replace_symbol(const Symbol &symbol) {
-    if (symbol.is_built_in() || symbol.is_token)
+    if (!symbol.is_non_terminal())
       return symbol;
 
     auto replacement_pair = replacements.find(symbol);
@@ -49,7 +49,7 @@ class SymbolReplacer : public rules::IdentityRuleFn {
     for (const auto &pair : replacements)
       if (pair.first.index < symbol.index)
         new_index--;
-    return Symbol(new_index);
+    return Symbol(new_index, Symbol::NonTerminal);
   }
 };
 
@@ -60,14 +60,14 @@ class TokenExtractor : public rules::IdentityRuleFn {
     for (size_t i = 0; i < tokens.size(); i++)
       if (tokens[i].rule->operator==(*input)) {
         token_usage_counts[i]++;
-        return make_shared<Symbol>(i, true);
+        return make_shared<Symbol>(i, Symbol::Terminal);
       }
 
     rule_ptr rule = input->copy();
     size_t index = tokens.size();
     tokens.push_back(Variable(token_description(rule), entry_type, rule));
     token_usage_counts.push_back(1);
-    return make_shared<Symbol>(index, true);
+    return make_shared<Symbol>(index, Symbol::Terminal);
   }
 
   rule_ptr apply_to(const rules::String *rule) {
@@ -90,9 +90,8 @@ class TokenExtractor : public rules::IdentityRuleFn {
   vector<Variable> tokens;
 };
 
-static CompileError ubiq_token_err(const string &message) {
-  return CompileError(TSCompileErrorTypeInvalidUbiquitousToken,
-                      "Not a token: " + message);
+static CompileError extra_token_error(const string &message) {
+  return CompileError(TSCompileErrorTypeInvalidExtraToken, "Not a token: " + message);
 }
 
 tuple<InitialSyntaxGrammar, LexicalGrammar, CompileError> extract_tokens(
@@ -122,11 +121,10 @@ tuple<InitialSyntaxGrammar, LexicalGrammar, CompileError> extract_tokens(
   size_t i = 0;
   for (const Variable &variable : processed_variables) {
     auto symbol = variable.rule->as<Symbol>();
-    if (symbol && symbol->is_token && !symbol->is_built_in() &&
-        extractor.token_usage_counts[symbol->index] == 1) {
+    if (symbol && symbol->is_token() && extractor.token_usage_counts[symbol->index] == 1) {
       lexical_grammar.variables[symbol->index].type = variable.type;
       lexical_grammar.variables[symbol->index].name = variable.name;
-      symbol_replacer.replacements.insert({ Symbol(i), *symbol });
+      symbol_replacer.replacements.insert({ Symbol(i, Symbol::NonTerminal), *symbol });
     } else {
       syntax_grammar.variables.push_back(variable);
     }
@@ -158,7 +156,7 @@ tuple<InitialSyntaxGrammar, LexicalGrammar, CompileError> extract_tokens(
     bool used_elsewhere_in_grammar = false;
     for (const Variable &variable : lexical_grammar.variables) {
       if (variable.rule->operator==(*rule)) {
-        syntax_grammar.extra_tokens.insert(Symbol(i, true));
+        syntax_grammar.extra_tokens.insert(Symbol(i, Symbol::Terminal));
         used_elsewhere_in_grammar = true;
       }
       i++;
@@ -175,17 +173,39 @@ tuple<InitialSyntaxGrammar, LexicalGrammar, CompileError> extract_tokens(
     auto symbol = rule->as<Symbol>();
     if (!symbol)
       return make_tuple(syntax_grammar, lexical_grammar,
-                        ubiq_token_err(rule->to_string()));
+                        extra_token_error(rule->to_string()));
 
     Symbol new_symbol = symbol_replacer.replace_symbol(*symbol);
-    if (!new_symbol.is_token)
+    if (new_symbol.is_non_terminal()) {
       return make_tuple(
         syntax_grammar, lexical_grammar,
-        ubiq_token_err(syntax_grammar.variables[new_symbol.index].name));
+        extra_token_error(syntax_grammar.variables[new_symbol.index].name));
+    }
 
     syntax_grammar.extra_tokens.insert(new_symbol);
   }
 
+  for (const ExternalToken &external_token : grammar.external_tokens) {
+    Symbol internal_token = symbol_replacer.replace_symbol(external_token.corresponding_internal_token);
+
+    if (internal_token.is_non_terminal()) {
+      return make_tuple(
+        syntax_grammar,
+        lexical_grammar,
+        CompileError(
+          TSCompileErrorTypeInvalidExternalToken,
+          "Name '" + external_token.name + "' cannot be used for both an external token and a non-terminal rule"
+        )
+      );
+    }
+
+    syntax_grammar.external_tokens.push_back({
+      external_token.name,
+      external_token.type,
+      internal_token
+    });
+  }
+
   return make_tuple(syntax_grammar, lexical_grammar, CompileError::none());
 }
 
diff --git a/src/compiler/prepare_grammar/flatten_grammar.cc b/src/compiler/prepare_grammar/flatten_grammar.cc
index ddba9a5f..8ac0e33c 100644
--- a/src/compiler/prepare_grammar/flatten_grammar.cc
+++ b/src/compiler/prepare_grammar/flatten_grammar.cc
@@ -92,6 +92,7 @@ pair<SyntaxGrammar, CompileError> flatten_grammar(const InitialSyntaxGrammar &gr
   SyntaxGrammar result;
   result.expected_conflicts = grammar.expected_conflicts;
   result.extra_tokens = grammar.extra_tokens;
+  result.external_tokens = grammar.external_tokens;
 
   bool is_start = true;
   for (const Variable &variable : grammar.variables) {
diff --git a/src/compiler/prepare_grammar/initial_syntax_grammar.h b/src/compiler/prepare_grammar/initial_syntax_grammar.h
index fe1ff37d..1ac319cb 100644
--- a/src/compiler/prepare_grammar/initial_syntax_grammar.h
+++ b/src/compiler/prepare_grammar/initial_syntax_grammar.h
@@ -1,13 +1,12 @@
 #ifndef COMPILER_PREPARE_GRAMMAR_INITIAL_SYNTAX_GRAMMAR_H_
 #define COMPILER_PREPARE_GRAMMAR_INITIAL_SYNTAX_GRAMMAR_H_
 
-#include <vector>
-#include <string>
 #include <set>
+#include <vector>
 #include "tree_sitter/compiler.h"
 #include "compiler/rules/symbol.h"
-#include "compiler/variable.h"
 #include "compiler/syntax_grammar.h"
+#include "compiler/variable.h"
 
 namespace tree_sitter {
 namespace prepare_grammar {
@@ -16,6 +15,7 @@ struct InitialSyntaxGrammar {
   std::vector<Variable> variables;
   std::set<rules::Symbol> extra_tokens;
   std::set<ConflictSet> expected_conflicts;
+  std::vector<ExternalToken> external_tokens;
 };
 
 }  // namespace prepare_grammar
diff --git a/src/compiler/prepare_grammar/intern_symbols.cc b/src/compiler/prepare_grammar/intern_symbols.cc
index cd01719c..0786982b 100644
--- a/src/compiler/prepare_grammar/intern_symbols.cc
+++ b/src/compiler/prepare_grammar/intern_symbols.cc
@@ -8,6 +8,7 @@
 #include "compiler/rules/blank.h"
 #include "compiler/rules/named_symbol.h"
 #include "compiler/rules/symbol.h"
+#include "compiler/rules/built_in_symbols.h"
 
 namespace tree_sitter {
 namespace prepare_grammar {
@@ -17,8 +18,9 @@ using std::vector;
 using std::set;
 using std::pair;
 using std::make_shared;
+using rules::Symbol;
 
-class InternSymbols : public rules::IdentityRuleFn {
+class SymbolInterner : public rules::IdentityRuleFn {
   using rules::IdentityRuleFn::apply_to;
 
   rule_ptr apply_to(const rules::NamedSymbol *rule) {
@@ -34,11 +36,14 @@ class InternSymbols : public rules::IdentityRuleFn {
   std::shared_ptr<rules::Symbol> symbol_for_rule_name(string rule_name) {
     for (size_t i = 0; i < grammar.rules.size(); i++)
       if (grammar.rules[i].first == rule_name)
-        return make_shared<rules::Symbol>(i);
+        return make_shared<Symbol>(i, Symbol::NonTerminal);
+    for (size_t i = 0; i < grammar.external_tokens.size(); i++)
+      if (grammar.external_tokens[i] == rule_name)
+        return make_shared<rules::Symbol>(i, Symbol::External);
     return nullptr;
   }
 
-  explicit InternSymbols(const Grammar &grammar) : grammar(grammar) {}
+  explicit SymbolInterner(const Grammar &grammar) : grammar(grammar) {}
   const Grammar grammar;
   string missing_rule_name;
 };
@@ -50,16 +55,35 @@ CompileError missing_rule_error(string rule_name) {
 
 pair<InternedGrammar, CompileError> intern_symbols(const Grammar &grammar) {
   InternedGrammar result;
-  InternSymbols interner(grammar);
+
+  for (auto &external_token_name : grammar.external_tokens) {
+    Symbol corresponding_internal_token = rules::NONE();
+    for (size_t i = 0, n = grammar.rules.size(); i < n; i++) {
+      if (grammar.rules[i].first == external_token_name) {
+        corresponding_internal_token = Symbol(i, Symbol::NonTerminal);
+        break;
+      }
+    }
+
+    result.external_tokens.push_back(ExternalToken{
+      external_token_name,
+      external_token_name[0] == '_' ? VariableTypeHidden : VariableTypeNamed,
+      corresponding_internal_token
+    });
+  }
+
+  SymbolInterner interner(grammar);
 
   for (auto &pair : grammar.rules) {
     auto new_rule = interner.apply(pair.second);
     if (!interner.missing_rule_name.empty())
       return { result, missing_rule_error(interner.missing_rule_name) };
 
-    result.variables.push_back(Variable(
-      pair.first, pair.first[0] == '_' ? VariableTypeHidden : VariableTypeNamed,
-      new_rule));
+    result.variables.push_back(Variable{
+      pair.first,
+      pair.first[0] == '_' ? VariableTypeHidden : VariableTypeNamed,
+      new_rule
+    });
   }
 
   for (auto &rule : grammar.extra_tokens) {
diff --git a/src/compiler/prepare_grammar/interned_grammar.h b/src/compiler/prepare_grammar/interned_grammar.h
index c08c07dd..c8a14647 100644
--- a/src/compiler/prepare_grammar/interned_grammar.h
+++ b/src/compiler/prepare_grammar/interned_grammar.h
@@ -15,6 +15,7 @@ struct InternedGrammar {
   std::vector<Variable> variables;
   std::vector<rule_ptr> extra_tokens;
   std::set<ConflictSet> expected_conflicts;
+  std::vector<ExternalToken> external_tokens;
 };
 
 }  // namespace prepare_grammar
diff --git a/src/compiler/rules/built_in_symbols.cc b/src/compiler/rules/built_in_symbols.cc
index a7a877ec..0fe45f68 100644
--- a/src/compiler/rules/built_in_symbols.cc
+++ b/src/compiler/rules/built_in_symbols.cc
@@ -4,15 +4,15 @@ namespace tree_sitter {
 namespace rules {
 
 Symbol END_OF_INPUT() {
-  return Symbol(-1, true);
+  return Symbol(-1, Symbol::Terminal);
 }
 
 Symbol START() {
-  return Symbol(-2);
+  return Symbol(-2, Symbol::NonTerminal);
 }
 
 Symbol NONE() {
-  return Symbol(-3);
+  return Symbol(-3, Symbol::Type(-1));
 }
 
 }  // namespace rules
diff --git a/src/compiler/rules/symbol.cc b/src/compiler/rules/symbol.cc
index f85b09c7..e826cb0f 100644
--- a/src/compiler/rules/symbol.cc
+++ b/src/compiler/rules/symbol.cc
@@ -11,12 +11,10 @@ using std::string;
 using std::to_string;
 using util::hash_combine;
 
-Symbol::Symbol(Symbol::Index index) : index(index), is_token(false) {}
-
-Symbol::Symbol(Symbol::Index index, bool is_token) : index(index), is_token(is_token) {}
+Symbol::Symbol(Symbol::Index index, Symbol::Type type) : index(index), type(type) {}
 
 bool Symbol::operator==(const Symbol &other) const {
-  return (other.index == index) && (other.is_token == is_token);
+  return (other.index == index) && (other.type == type);
 }
 
 bool Symbol::operator==(const Rule &rule) const {
@@ -27,7 +25,7 @@ bool Symbol::operator==(const Rule &rule) const {
 size_t Symbol::hash_code() const {
   size_t result = 0;
   hash_combine(&result, index);
-  hash_combine(&result, is_token);
+  hash_combine<int>(&result, type);
   return result;
 }
 
@@ -36,14 +34,22 @@ rule_ptr Symbol::copy() const {
 }
 
 string Symbol::to_string() const {
-  string name = is_token ? "token" : "sym";
-  return "(" + name + " " + std::to_string(index) + ")";
+  switch (type) {
+    case Symbol::Terminal:
+      return "(terminal " + std::to_string(index) + ")";
+    case Symbol::NonTerminal:
+      return "(non-terminal " + std::to_string(index) + ")";
+    case Symbol::External:
+      return "(external " + std::to_string(index) + ")";
+    default:
+      return "(none)";
+  }
 }
 
 bool Symbol::operator<(const Symbol &other) const {
-  if (is_token && !other.is_token)
+  if (type < other.type)
     return true;
-  if (!is_token && other.is_token)
+  if (other.type < type)
     return false;
   return (index < other.index);
 }
@@ -56,6 +62,18 @@ bool Symbol::is_built_in() const {
   return is_built_in(index);
 }
 
+bool Symbol::is_token() const {
+  return type == Symbol::Terminal;
+}
+
+bool Symbol::is_external() const {
+  return type == Symbol::External;
+}
+
+bool Symbol::is_non_terminal() const {
+  return type == Symbol::NonTerminal;
+}
+
 void Symbol::accept(Visitor *visitor) const {
   visitor->visit(this);
 }
diff --git a/src/compiler/rules/symbol.h b/src/compiler/rules/symbol.h
index 4ae9ece3..a963433c 100644
--- a/src/compiler/rules/symbol.h
+++ b/src/compiler/rules/symbol.h
@@ -11,9 +11,13 @@ class Symbol : public Rule {
  public:
   typedef int Index;
 
+  typedef enum {
+    External,
+    Terminal,
+    NonTerminal,
+  } Type;
 
-  explicit Symbol(Index index);
-  Symbol(Index index, bool is_token);
+  Symbol(Index index, Type type);
 
   bool operator==(const Symbol &other) const;
   bool operator==(const Rule &other) const;
@@ -26,9 +30,12 @@ class Symbol : public Rule {
   bool operator<(const Symbol &other) const;
   static bool is_built_in(Index);
   bool is_built_in() const;
+  bool is_token() const;
+  bool is_external() const;
+  bool is_non_terminal() const;
 
   Index index;
-  bool is_token;
+  Type type;
 };
 
 }  // namespace rules
diff --git a/src/compiler/rules/visitor.h b/src/compiler/rules/visitor.h
index b8301183..c75e31dc 100644
--- a/src/compiler/rules/visitor.h
+++ b/src/compiler/rules/visitor.h
@@ -16,6 +16,7 @@ class String;
 class Symbol;
 class Pattern;
 class Metadata;
+class ExternalToken;
 
 class Visitor {
  public:
@@ -29,6 +30,7 @@ class Visitor {
   virtual void visit(const String *rule) = 0;
   virtual void visit(const NamedSymbol *rule) = 0;
   virtual void visit(const Symbol *rule) = 0;
+  virtual void visit(const ExternalToken *rule) = 0;
   virtual ~Visitor();
 };
 
@@ -86,6 +88,10 @@ class RuleFn : private Visitor {
     return default_apply((const Rule *)rule);
   }
 
+  virtual T apply_to(const ExternalToken *rule) {
+    return default_apply((const Rule *)rule);
+  }
+
   void visit(const Blank *rule) {
     value_ = apply_to(rule);
   }
@@ -126,6 +132,10 @@ class RuleFn : private Visitor {
     value_ = apply_to(rule);
   }
 
+  void visit(const ExternalToken *rule) {
+    value_ = apply_to(rule);
+  }
+
  private:
   T value_;
 };
@@ -170,6 +180,9 @@ class RuleFn<void> : private Visitor {
   virtual void apply_to(const Symbol *rule) {
     return default_apply((const Rule *)rule);
   }
+  virtual void apply_to(const ExternalToken *rule) {
+    return default_apply((const Rule *)rule);
+  }
 
   void visit(const Blank *rule) {
     apply_to(rule);
@@ -201,6 +214,9 @@ class RuleFn<void> : private Visitor {
   void visit(const Symbol *rule) {
     apply_to(rule);
   }
+  void visit(const ExternalToken *rule) {
+    apply_to(rule);
+  }
 };
 
 class IdentityRuleFn : public RuleFn<rule_ptr> {
diff --git a/src/compiler/syntax_grammar.cc b/src/compiler/syntax_grammar.cc
index 706ec828..aa3074e8 100644
--- a/src/compiler/syntax_grammar.cc
+++ b/src/compiler/syntax_grammar.cc
@@ -13,8 +13,6 @@ using std::pair;
 using std::vector;
 using std::set;
 
-static const vector<Production> NO_PRODUCTIONS;
-
 SyntaxVariable::SyntaxVariable(const string &name, VariableType type,
                                const vector<Production> &productions)
     : name(name), productions(productions), type(type) {}
@@ -23,18 +21,14 @@ ProductionStep::ProductionStep(const rules::Symbol &symbol, int precedence,
                                rules::Associativity associativity)
     : symbol(symbol), precedence(precedence), associativity(associativity) {}
 
+bool ExternalToken::operator==(const ExternalToken &other) const {
+  return name == other.name && type == other.type &&
+    corresponding_internal_token == other.corresponding_internal_token;
+}
+
 bool ProductionStep::operator==(const ProductionStep &other) const {
   return symbol == other.symbol && precedence == other.precedence &&
          associativity == other.associativity;
 }
 
-const vector<Production> &SyntaxGrammar::productions(
-  const rules::Symbol &symbol) const {
-  if (symbol.is_built_in() || symbol.is_token) {
-    return NO_PRODUCTIONS;
-  } else {
-    return variables[symbol.index].productions;
-  }
-}
-
 }  // namespace tree_sitter
diff --git a/src/compiler/syntax_grammar.h b/src/compiler/syntax_grammar.h
index 89745fa5..e3af8f28 100644
--- a/src/compiler/syntax_grammar.h
+++ b/src/compiler/syntax_grammar.h
@@ -10,6 +10,14 @@
 
 namespace tree_sitter {
 
+struct ExternalToken {
+  std::string name;
+  VariableType type;
+  rules::Symbol corresponding_internal_token;
+
+  bool operator==(const ExternalToken &) const;
+};
+
 struct ProductionStep {
   ProductionStep(const rules::Symbol &, int, rules::Associativity);
   bool operator==(const ProductionStep &) const;
@@ -33,11 +41,10 @@ struct SyntaxVariable {
 typedef std::set<rules::Symbol> ConflictSet;
 
 struct SyntaxGrammar {
-  const std::vector<Production> &productions(const rules::Symbol &) const;
-
   std::vector<SyntaxVariable> variables;
   std::set<rules::Symbol> extra_tokens;
   std::set<ConflictSet> expected_conflicts;
+  std::vector<ExternalToken> external_tokens;
 };
 
 }  // namespace tree_sitter
diff --git a/src/runtime/document.c b/src/runtime/document.c
index 65f9e435..8c1eb779 100644
--- a/src/runtime/document.c
+++ b/src/runtime/document.c
@@ -36,8 +36,9 @@ const TSLanguage *ts_document_language(TSDocument *self) {
 }
 
 void ts_document_set_language(TSDocument *self, const TSLanguage *language) {
+  if (language->version != TREE_SITTER_LANGUAGE_VERSION) return;
   ts_document_invalidate(self);
-  self->parser.language = language;
+  parser_set_language(&self->parser, language);
   if (self->tree) {
     ts_tree_release(self->tree);
     self->tree = NULL;
diff --git a/src/runtime/language.c b/src/runtime/language.c
index af08bb38..7f1bdefa 100644
--- a/src/runtime/language.c
+++ b/src/runtime/language.c
@@ -34,6 +34,10 @@ uint32_t ts_language_symbol_count(const TSLanguage *language) {
   return language->symbol_count;
 }
 
+uint32_t ts_language_version(const TSLanguage *language) {
+  return language->version;
+}
+
 TSSymbolMetadata ts_language_symbol_metadata(const TSLanguage *language,
                                              TSSymbol symbol) {
   if (symbol == ts_builtin_sym_error)
diff --git a/src/runtime/language.h b/src/runtime/language.h
index a4f44b11..20e6ec5d 100644
--- a/src/runtime/language.h
+++ b/src/runtime/language.h
@@ -19,6 +19,10 @@ void ts_language_table_entry(const TSLanguage *, TSStateId, TSSymbol, TableEntry
 
 TSSymbolMetadata ts_language_symbol_metadata(const TSLanguage *, TSSymbol);
 
+static inline bool ts_language_is_symbol_external(const TSLanguage *self, TSSymbol symbol) {
+  return 0 < symbol && symbol < self->external_token_count + 1;
+}
+
 static inline const TSParseAction *ts_language_actions(const TSLanguage *self,
                                                        TSStateId state,
                                                        TSSymbol symbol,
@@ -49,6 +53,16 @@ static inline TSStateId ts_language_next_state(const TSLanguage *self,
   }
 }
 
+static inline const bool *
+ts_language_enabled_external_tokens(const TSLanguage *self,
+                                    unsigned external_scanner_state) {
+  if (external_scanner_state == 0) {
+    return NULL;
+  } else {
+    return self->external_scanner.states + self->external_token_count * external_scanner_state;
+  }
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/runtime/length.h b/src/runtime/length.h
index 2477bbe1..352215d2 100644
--- a/src/runtime/length.h
+++ b/src/runtime/length.h
@@ -21,12 +21,11 @@ static inline void length_set_unknown_chars(Length *self) {
 }
 
 static inline Length length_min(Length len1, Length len2) {
-  return (len1.chars < len2.chars) ? len1 : len2;
+  return (len1.bytes < len2.bytes) ? len1 : len2;
 }
 
 static inline Length length_add(Length len1, Length len2) {
   Length result;
-  result.chars = len1.chars + len2.chars;
   result.bytes = len1.bytes + len2.bytes;
   result.extent = point_add(len1.extent, len2.extent);
 
@@ -57,10 +56,4 @@ static inline Length length_zero() {
   return (Length){ 0, 0, {0, 0} };
 }
 
-static inline bool length_eq(Length self, Length other) {
-  return self.bytes == other.bytes && self.chars == other.chars &&
-         self.extent.row == other.extent.row &&
-         self.extent.column == other.extent.column;
-}
-
 #endif
diff --git a/src/runtime/lexer.c b/src/runtime/lexer.c
index 32910935..902c2d3b 100644
--- a/src/runtime/lexer.c
+++ b/src/runtime/lexer.c
@@ -11,11 +11,8 @@
     self->logger.log(self->logger.payload, TSLogTypeLex, self->debug_buffer); \
   }
 
-#define LOG_LOOKAHEAD()                                            \
-  LOG((0 < self->data.lookahead && self->data.lookahead < 256) \
-        ? "lookahead char:'%c'"                                    \
-        : "lookahead char:%d",                                     \
-      self->data.lookahead);
+#define LOG_CHARACTER(message, character) \
+  LOG(character < 255 ? message " character:'%c'" : message " character:%d", character)
 
 static const char empty_chunk[2] = { 0, 0 };
 
@@ -42,11 +39,9 @@ static void ts_lexer__get_lookahead(Lexer *self) {
       utf8proc_iterate(chunk, size, &self->data.lookahead);
   else
     self->lookahead_size = utf16_iterate(chunk, size, &self->data.lookahead);
-
-  LOG_LOOKAHEAD();
 }
 
-static void ts_lexer__advance(void *payload, TSStateId state, bool skip) {
+static void ts_lexer__advance(void *payload, bool skip) {
   Lexer *self = (Lexer *)payload;
   if (self->chunk == empty_chunk)
     return;
@@ -63,10 +58,10 @@ static void ts_lexer__advance(void *payload, TSStateId state, bool skip) {
   }
 
   if (skip) {
-    LOG("skip_separator state:%d", state);
+    LOG_CHARACTER("skip", self->data.lookahead);
     self->token_start_position = self->current_position;
   } else {
-    LOG("advance state:%d", state);
+    LOG_CHARACTER("consume", self->data.lookahead);
   }
 
   if (self->current_position.bytes >= self->chunk_start + self->chunk_size)
@@ -93,6 +88,7 @@ void ts_lexer_init(Lexer *self) {
       .payload = NULL,
       .log = NULL
     },
+    .last_external_token_state = NULL,
   };
   ts_lexer_reset(self, length_zero());
 }
@@ -115,17 +111,16 @@ static inline void ts_lexer__reset(Lexer *self, Length position) {
 void ts_lexer_set_input(Lexer *self, TSInput input) {
   self->input = input;
   ts_lexer__reset(self, length_zero());
+  self->last_external_token_state = NULL;
 }
 
 void ts_lexer_reset(Lexer *self, Length position) {
-  if (!length_eq(position, self->current_position))
+  if (position.bytes != self->current_position.bytes) {
     ts_lexer__reset(self, position);
-  return;
+  }
 }
 
-void ts_lexer_start(Lexer *self, TSStateId lex_state) {
-  LOG("start_lex state:%d, pos:%u", lex_state, self->current_position.chars);
-
+void ts_lexer_start(Lexer *self) {
   self->token_start_position = self->current_position;
   self->data.result_symbol = 0;
 
diff --git a/src/runtime/lexer.h b/src/runtime/lexer.h
index 1b047e5b..67470f6f 100644
--- a/src/runtime/lexer.h
+++ b/src/runtime/lexer.h
@@ -25,12 +25,13 @@ typedef struct {
   TSInput input;
   TSLogger logger;
   char debug_buffer[TS_DEBUG_BUFFER_SIZE];
+  const TSExternalTokenState *last_external_token_state;
 } Lexer;
 
 void ts_lexer_init(Lexer *);
 void ts_lexer_set_input(Lexer *, TSInput);
 void ts_lexer_reset(Lexer *, Length);
-void ts_lexer_start(Lexer *, TSStateId);
+void ts_lexer_start(Lexer *);
 
 #ifdef __cplusplus
 }
diff --git a/src/runtime/node.c b/src/runtime/node.c
index d5bcb1a0..15e2d5cf 100644
--- a/src/runtime/node.c
+++ b/src/runtime/node.c
@@ -39,7 +39,15 @@ static inline bool ts_node__is_relevant(TSNode self, bool include_anonymous) {
 static inline uint32_t ts_node__relevant_child_count(TSNode self,
                                                    bool include_anonymous) {
   const Tree *tree = ts_node__tree(self);
-  return include_anonymous ? tree->visible_child_count : tree->named_child_count;
+  if (tree->child_count > 0) {
+    if (include_anonymous) {
+      return tree->visible_child_count;
+    } else {
+      return tree->named_child_count;
+    }
+  } else {
+    return 0;
+  }
 }
 
 static inline TSNode ts_node__direct_parent(TSNode self, uint32_t *index) {
@@ -324,11 +332,21 @@ TSNode ts_node_named_child(TSNode self, uint32_t child_index) {
 }
 
 uint32_t ts_node_child_count(TSNode self) {
-  return ts_node__tree(self)->visible_child_count;
+  const Tree *tree = ts_node__tree(self);
+  if (tree->child_count > 0) {
+    return tree->visible_child_count;
+  } else {
+    return 0;
+  }
 }
 
 uint32_t ts_node_named_child_count(TSNode self) {
-  return ts_node__tree(self)->named_child_count;
+  const Tree *tree = ts_node__tree(self);
+  if (tree->child_count > 0) {
+    return tree->named_child_count;
+  } else {
+    return 0;
+  }
 }
 
 TSNode ts_node_next_sibling(TSNode self) {
diff --git a/src/runtime/parser.c b/src/runtime/parser.c
index 2f5879a4..191354a3 100644
--- a/src/runtime/parser.c
+++ b/src/runtime/parser.c
@@ -109,28 +109,6 @@ static bool parser__breakdown_top_of_stack(Parser *self, StackVersion version) {
   return did_break_down;
 }
 
-static void parser__pop_reusable_node(ReusableNode *reusable_node) {
-  reusable_node->byte_index += ts_tree_total_bytes(reusable_node->tree);
-  while (reusable_node->tree) {
-    Tree *parent = reusable_node->tree->context.parent;
-    uint32_t next_index = reusable_node->tree->context.index + 1;
-    if (parent && parent->child_count > next_index) {
-      reusable_node->tree = parent->children[next_index];
-      return;
-    }
-    reusable_node->tree = parent;
-  }
-}
-
-static bool parser__breakdown_reusable_node(ReusableNode *reusable_node) {
-  if (reusable_node->tree->child_count == 0) {
-    return false;
-  } else {
-    reusable_node->tree = reusable_node->tree->children[0];
-    return true;
-  }
-}
-
 static bool parser__breakdown_lookahead(Parser *self, Tree **lookahead,
                                         TSStateId state,
                                         ReusableNode *reusable_node) {
@@ -140,12 +118,11 @@ static bool parser__breakdown_lookahead(Parser *self, Tree **lookahead,
           reusable_node->tree->fragile_left ||
           reusable_node->tree->fragile_right)) {
     LOG("state_mismatch sym:%s", SYM_NAME(reusable_node->tree->symbol));
-    parser__breakdown_reusable_node(reusable_node);
+    reusable_node_breakdown(reusable_node);
     result = true;
   }
 
   if (result) {
-    LOG("lookahead sym:%s", SYM_NAME(reusable_node->tree->symbol));
     ts_tree_release(*lookahead);
     ts_tree_retain(*lookahead = reusable_node->tree);
   }
@@ -153,16 +130,20 @@ static bool parser__breakdown_lookahead(Parser *self, Tree **lookahead,
   return result;
 }
 
-static void parser__pop_reusable_node_leaf(ReusableNode *reusable_node) {
-  while (reusable_node->tree->child_count > 0)
-    reusable_node->tree = reusable_node->tree->children[0];
-  parser__pop_reusable_node(reusable_node);
+static inline bool ts_lex_mode_eq(TSLexMode self, TSLexMode other) {
+  return self.lex_state == other.lex_state &&
+    self.external_lex_state == other.external_lex_state;
 }
 
 static bool parser__can_reuse(Parser *self, TSStateId state, Tree *tree,
                               TableEntry *table_entry) {
-  if (tree->first_leaf.lex_state == self->language->lex_states[state])
+  TSLexMode current_lex_mode = self->language->lex_modes[state];
+  if (ts_lex_mode_eq(tree->first_leaf.lex_mode, current_lex_mode))
     return true;
+  if (current_lex_mode.external_lex_state != 0)
+    return false;
+  if (tree->size.bytes == 0)
+    return false;
   if (!table_entry->is_reusable)
     return false;
   if (!table_entry->depends_on_lookahead)
@@ -208,28 +189,76 @@ static bool parser__condense_stack(Parser *self) {
   return result;
 }
 
-static Tree *parser__lex(Parser *self, TSStateId parse_state) {
-  TSStateId start_state = self->language->lex_states[parse_state];
-  TSStateId current_state = start_state;
-  Length start_position = self->lexer.current_position;
-  LOG("lex state:%d", start_state);
+static void parser__restore_external_scanner(Parser *self, StackVersion version) {
+  const TSExternalTokenState *state = ts_stack_external_token_state(self->stack, version);
+  if (self->lexer.last_external_token_state != state) {
+    LOG("restore_external_scanner");
+    self->lexer.last_external_token_state = state;
+    if (state) {
+      self->language->external_scanner.deserialize(
+        self->external_scanner_payload,
+        *state
+      );
+    } else {
+      self->language->external_scanner.reset(self->external_scanner_payload);
+    }
+  }
+}
 
+static Tree *parser__lex(Parser *self, StackVersion version) {
+  TSStateId parse_state = ts_stack_top_state(self->stack, version);
+  Length start_position = ts_stack_top_position(self->stack, version);
+  TSLexMode lex_mode = self->language->lex_modes[parse_state];
+  const bool *valid_external_tokens = ts_language_enabled_external_tokens(
+    self->language,
+    lex_mode.external_lex_state
+  );
+
+  bool found_external_token = false;
+  bool found_error = false;
   bool skipped_error = false;
   int32_t first_error_character = 0;
   Length error_start_position, error_end_position;
+  ts_lexer_reset(&self->lexer, start_position);
 
-  ts_lexer_start(&self->lexer, start_state);
+  for (;;) {
+    Length current_position = self->lexer.current_position;
 
-  while (!self->language->lex_fn(&self->lexer.data, current_state)) {
-    if (current_state != ERROR_STATE) {
+    if (valid_external_tokens) {
+      LOG("lex_external state:%d, row:%u, column:%u", lex_mode.external_lex_state,
+          current_position.extent.row, current_position.extent.column);
+      parser__restore_external_scanner(self, version);
+      ts_lexer_start(&self->lexer);
+      if (self->language->external_scanner.scan(self->external_scanner_payload,
+                                                &self->lexer.data, valid_external_tokens)) {
+        found_external_token = true;
+        break;
+      }
+      ts_lexer_reset(&self->lexer, current_position);
+    }
+
+    LOG("lex_internal state:%d, row:%u, column:%u", lex_mode.lex_state,
+        current_position.extent.row, current_position.extent.column);
+    ts_lexer_start(&self->lexer);
+    if (self->language->lex_fn(&self->lexer.data, lex_mode.lex_state)) {
+      break;
+    }
+
+    if (!found_error) {
       LOG("retry_in_error_mode");
-      current_state = ERROR_STATE;
+      found_error = true;
+      lex_mode = self->language->lex_modes[ERROR_STATE];
+      valid_external_tokens = ts_language_enabled_external_tokens(
+        self->language,
+        lex_mode.external_lex_state
+      );
       ts_lexer_reset(&self->lexer, start_position);
-      ts_lexer_start(&self->lexer, current_state);
       continue;
     }
 
     if (!skipped_error) {
+      LOG("skip_unrecognized_character");
+      skipped_error = true;
       error_start_position = self->lexer.token_start_position;
       first_error_character = self->lexer.data.lookahead;
     }
@@ -239,15 +268,13 @@ static Tree *parser__lex(Parser *self, TSStateId parse_state) {
         self->lexer.data.result_symbol = ts_builtin_sym_error;
         break;
       }
-      self->lexer.data.advance(&self->lexer, ERROR_STATE, false);
+      self->lexer.data.advance(&self->lexer, false);
     }
 
-    skipped_error = true;
     error_end_position = self->lexer.current_position;
   }
 
   Tree *result;
-
   if (skipped_error) {
     Length padding = length_sub(error_start_position, start_position);
     Length size = length_sub(error_end_position, error_start_position);
@@ -255,20 +282,28 @@ static Tree *parser__lex(Parser *self, TSStateId parse_state) {
     result = ts_tree_make_error(size, padding, first_error_character);
   } else {
     TSSymbol symbol = self->lexer.data.result_symbol;
-    Length padding =
-      length_sub(self->lexer.token_start_position, start_position);
-    Length size = length_sub(self->lexer.current_position,
-                                  self->lexer.token_start_position);
-    result =
-      ts_tree_make_leaf(symbol, padding, size,
-                        ts_language_symbol_metadata(self->language, symbol));
+    if (found_external_token) {
+      symbol = self->language->external_scanner.symbol_map[symbol];
+    }
+
+    Length padding = length_sub(self->lexer.token_start_position, start_position);
+    Length size = length_sub(self->lexer.current_position, self->lexer.token_start_position);
+    TSSymbolMetadata metadata = ts_language_symbol_metadata(self->language, symbol);
+    result = ts_tree_make_leaf(symbol, padding, size, metadata);
+
+    if (found_external_token) {
+      result->has_external_tokens = true;
+      result->has_external_token_state = true;
+      memset(result->external_token_state, 0, sizeof(TSExternalTokenState));
+      self->language->external_scanner.serialize(self->external_scanner_payload, result->external_token_state);
+      self->lexer.last_external_token_state = &result->external_token_state;
+    }
   }
 
-  if (!result)
-    return NULL;
-
   result->parse_state = parse_state;
-  result->first_leaf.lex_state = start_state;
+  result->first_leaf.lex_mode = lex_mode;
+
+  LOG("lexed_lookahead sym:%s, size:%u", SYM_NAME(result->symbol), result->size.bytes);
   return result;
 }
 
@@ -277,21 +312,31 @@ static void parser__clear_cached_token(Parser *self) {
   self->cached_token = NULL;
 }
 
+static inline bool ts_external_token_state_eq(const TSExternalTokenState *self,
+                                              const TSExternalTokenState *other) {
+  if (self == other) {
+    return true;
+  } else if (!self || !other) {
+    return false;
+  } else {
+    return memcmp(self, other, sizeof(TSExternalTokenState)) == 0;
+  }
+}
+
 static Tree *parser__get_lookahead(Parser *self, StackVersion version,
-                                     ReusableNode *reusable_node) {
+                                   ReusableNode *reusable_node,
+                                   bool *is_fresh) {
   Length position = ts_stack_top_position(self->stack, version);
 
   while (reusable_node->tree) {
     if (reusable_node->byte_index > position.bytes) {
-      LOG("before_reusable sym:%s, pos:%u",
-          SYM_NAME(reusable_node->tree->symbol), reusable_node->byte_index);
+      LOG("before_reusable_node sym:%s", SYM_NAME(reusable_node->tree->symbol));
       break;
     }
 
     if (reusable_node->byte_index < position.bytes) {
-      LOG("past_reusable sym:%s, pos:%u",
-          SYM_NAME(reusable_node->tree->symbol), reusable_node->byte_index);
-      parser__pop_reusable_node(reusable_node);
+      LOG("past_reusable sym:%s", SYM_NAME(reusable_node->tree->symbol));
+      reusable_node_pop(reusable_node);
       continue;
     }
 
@@ -299,8 +344,8 @@ static Tree *parser__get_lookahead(Parser *self, StackVersion version,
       LOG("cant_reuse_changed tree:%s, size:%u",
           SYM_NAME(reusable_node->tree->symbol),
           reusable_node->tree->size.bytes);
-      if (!parser__breakdown_reusable_node(reusable_node)) {
-        parser__pop_reusable_node(reusable_node);
+      if (!reusable_node_breakdown(reusable_node)) {
+        reusable_node_pop(reusable_node);
         parser__breakdown_top_of_stack(self, version);
       }
       continue;
@@ -310,8 +355,21 @@ static Tree *parser__get_lookahead(Parser *self, StackVersion version,
       LOG("cant_reuse_error tree:%s, size:%u",
           SYM_NAME(reusable_node->tree->symbol),
           reusable_node->tree->size.bytes);
-      if (!parser__breakdown_reusable_node(reusable_node)) {
-        parser__pop_reusable_node(reusable_node);
+      if (!reusable_node_breakdown(reusable_node)) {
+        reusable_node_pop(reusable_node);
+        parser__breakdown_top_of_stack(self, version);
+      }
+      continue;
+    }
+
+    if (!ts_external_token_state_eq(
+          reusable_node->preceding_external_token_state,
+          ts_stack_external_token_state(self->stack, version))) {
+      LOG("cant_reuse_external_tokens tree:%s, size:%u",
+          SYM_NAME(reusable_node->tree->symbol),
+          reusable_node->tree->size.bytes);
+      if (!reusable_node_breakdown(reusable_node)) {
+        reusable_node_pop(reusable_node);
         parser__breakdown_top_of_stack(self, version);
       }
       continue;
@@ -327,9 +385,8 @@ static Tree *parser__get_lookahead(Parser *self, StackVersion version,
     return self->cached_token;
   }
 
-  ts_lexer_reset(&self->lexer, position);
-  TSStateId parse_state = ts_stack_top_state(self->stack, version);
-  return parser__lex(self, parse_state);
+  *is_fresh = true;
+  return parser__lex(self, version);
 }
 
 static bool parser__select_tree(Parser *self, Tree *left, Tree *right) {
@@ -407,6 +464,10 @@ static void parser__shift(Parser *self, StackVersion version, TSStateId state,
 
   bool is_pending = lookahead->child_count > 0;
   ts_stack_push(self->stack, version, lookahead, is_pending, state);
+  if (lookahead->has_external_token_state) {
+    ts_stack_set_external_token_state(
+      self->stack, version, ts_tree_last_external_token_state(lookahead));
+  }
   ts_tree_release(lookahead);
 }
 
@@ -729,9 +790,13 @@ static void parser__start(Parser *self, TSInput input, Tree *previous_tree) {
     LOG("new_parse");
   }
 
+  if (self->language->external_scanner.reset) {
+    self->language->external_scanner.reset(self->external_scanner_payload);
+  }
+
   ts_lexer_set_input(&self->lexer, input);
   ts_stack_clear(self->stack);
-  self->reusable_node = (ReusableNode){ previous_tree, 0 };
+  self->reusable_node = reusable_node_new(previous_tree);
   self->cached_token = NULL;
   self->finished_tree = NULL;
 }
@@ -950,30 +1015,29 @@ static void parser__recover(Parser *self, StackVersion version, TSStateId state,
 static void parser__advance(Parser *self, StackVersion version,
                             ReusableNode *reusable_node) {
   bool validated_lookahead = false;
-  Tree *lookahead = parser__get_lookahead(self, version, reusable_node);
+  Tree *lookahead = parser__get_lookahead(self, version, reusable_node, &validated_lookahead);
 
   for (;;) {
     TSStateId state = ts_stack_top_state(self->stack, version);
 
     TableEntry table_entry;
-    ts_language_table_entry(self->language, state, lookahead->first_leaf.symbol,
-                            &table_entry);
+    ts_language_table_entry(self->language, state, lookahead->first_leaf.symbol, &table_entry);
 
     if (!validated_lookahead) {
       if (!parser__can_reuse(self, state, lookahead, &table_entry)) {
-        if (lookahead == reusable_node->tree)
-          parser__pop_reusable_node_leaf(reusable_node);
-        else
+        if (lookahead == reusable_node->tree) {
+          reusable_node_pop_leaf(reusable_node);
+        } else {
           parser__clear_cached_token(self);
+        }
 
         ts_tree_release(lookahead);
-        lookahead = parser__get_lookahead(self, version, reusable_node);
+        lookahead = parser__get_lookahead(self, version, reusable_node, &validated_lookahead);
         continue;
       }
 
       validated_lookahead = true;
-      LOG("lookahead sym:%s, size:%u", SYM_NAME(lookahead->symbol),
-          lookahead->size.bytes);
+      LOG("reused_lookahead sym:%s, size:%u", SYM_NAME(lookahead->symbol), lookahead->size.bytes);
     }
 
     bool reduction_stopped_at_error = false;
@@ -996,12 +1060,11 @@ static void parser__advance(Parser *self, StackVersion version,
           }
 
           if (lookahead->child_count > 0) {
-            if (parser__breakdown_lookahead(self, &lookahead, state,
-                                            reusable_node)) {
+            if (parser__breakdown_lookahead(self, &lookahead, state, reusable_node)) {
               if (!parser__can_reuse(self, state, lookahead, &table_entry)) {
-                parser__pop_reusable_node(reusable_node);
+                reusable_node_pop(reusable_node);
                 ts_tree_release(lookahead);
-                lookahead = parser__get_lookahead(self, version, reusable_node);
+                lookahead = parser__get_lookahead(self, version, reusable_node, &validated_lookahead);
               }
             }
 
@@ -1011,7 +1074,7 @@ static void parser__advance(Parser *self, StackVersion version,
           parser__shift(self, version, next_state, lookahead, extra);
 
           if (lookahead == reusable_node->tree)
-            parser__pop_reusable_node(reusable_node);
+            reusable_node_pop(reusable_node);
 
           ts_tree_release(lookahead);
           return;
@@ -1053,7 +1116,7 @@ static void parser__advance(Parser *self, StackVersion version,
 
         case TSParseActionTypeRecover: {
           while (lookahead->child_count > 0) {
-            parser__breakdown_reusable_node(reusable_node);
+            reusable_node_breakdown(reusable_node);
             ts_tree_release(lookahead);
             lookahead = reusable_node->tree;
             ts_tree_retain(lookahead);
@@ -1061,7 +1124,7 @@ static void parser__advance(Parser *self, StackVersion version,
 
           parser__recover(self, version, action.params.to_state, lookahead);
           if (lookahead == reusable_node->tree)
-            parser__pop_reusable_node(reusable_node);
+            reusable_node_pop(reusable_node);
           ts_tree_release(lookahead);
           return;
         }
@@ -1103,6 +1166,18 @@ bool parser_init(Parser *self) {
   return true;
 }
 
+void parser_set_language(Parser *self, const TSLanguage *language) {
+  if (self->external_scanner_payload && self->language->external_scanner.destroy)
+    self->language->external_scanner.destroy(self->external_scanner_payload);
+
+  if (language && language->external_scanner.create)
+    self->external_scanner_payload = language->external_scanner.create();
+  else
+    self->external_scanner_payload = NULL;
+
+  self->language = language;
+}
+
 void parser_destroy(Parser *self) {
   if (self->stack)
     ts_stack_delete(self->stack);
@@ -1112,6 +1187,7 @@ void parser_destroy(Parser *self) {
     array_delete(&self->tree_path1);
   if (self->tree_path2.contents)
     array_delete(&self->tree_path2);
+  parser_set_language(self, NULL);
 }
 
 Tree *parser_parse(Parser *self, TSInput input, Tree *old_tree) {
@@ -1128,15 +1204,14 @@ Tree *parser_parse(Parser *self, TSInput input, Tree *old_tree) {
 
       while (!ts_stack_is_halted(self->stack, version)) {
         position = ts_stack_top_position(self->stack, version).chars;
-        if (position > last_position ||
-            (version > 0 && position == last_position))
+        if (position > last_position || (version > 0 && position == last_position))
           break;
 
         LOG("process version:%d, version_count:%u, state:%d, row:%u, col:%u",
             version, ts_stack_version_count(self->stack),
             ts_stack_top_state(self->stack, version),
-            ts_stack_top_position(self->stack, version).extent.row + 1,
-            ts_stack_top_position(self->stack, version).extent.column + 1);
+            ts_stack_top_position(self->stack, version).extent.row,
+            ts_stack_top_position(self->stack, version).extent.column);
 
         parser__advance(self, version, &reusable_node);
         LOG_STACK();
diff --git a/src/runtime/parser.h b/src/runtime/parser.h
index 41512e12..a7b8dde3 100644
--- a/src/runtime/parser.h
+++ b/src/runtime/parser.h
@@ -8,13 +8,9 @@ extern "C" {
 #include "runtime/stack.h"
 #include "runtime/array.h"
 #include "runtime/lexer.h"
+#include "runtime/reusable_node.h"
 #include "runtime/reduce_action.h"
 
-typedef struct {
-  Tree *tree;
-  uint32_t byte_index;
-} ReusableNode;
-
 typedef struct {
   Lexer lexer;
   Stack *stack;
@@ -29,11 +25,14 @@ typedef struct {
   ReusableNode reusable_node;
   TreePath tree_path1;
   TreePath tree_path2;
+  void *external_scanner_payload;
+  Tree *last_external_token;
 } Parser;
 
 bool parser_init(Parser *);
 void parser_destroy(Parser *);
 Tree *parser_parse(Parser *, TSInput, Tree *);
+void parser_set_language(Parser *, const TSLanguage *);
 
 #ifdef __cplusplus
 }
diff --git a/src/runtime/reusable_node.h b/src/runtime/reusable_node.h
new file mode 100644
index 00000000..b9777638
--- /dev/null
+++ b/src/runtime/reusable_node.h
@@ -0,0 +1,50 @@
+#include "runtime/tree.h"
+
+typedef struct {
+  Tree *tree;
+  uint32_t byte_index;
+  bool has_preceding_external_token;
+  const TSExternalTokenState *preceding_external_token_state;
+} ReusableNode;
+
+static inline ReusableNode reusable_node_new(Tree *tree) {
+  return (ReusableNode){
+    .tree = tree,
+    .byte_index = 0,
+    .has_preceding_external_token = false,
+    .preceding_external_token_state = NULL,
+  };
+}
+
+static inline void reusable_node_pop(ReusableNode *self) {
+  self->byte_index += ts_tree_total_bytes(self->tree);
+  if (self->tree->has_external_tokens) {
+    self->has_preceding_external_token = true;
+    self->preceding_external_token_state = ts_tree_last_external_token_state(self->tree);
+  }
+
+  while (self->tree) {
+    Tree *parent = self->tree->context.parent;
+    uint32_t next_index = self->tree->context.index + 1;
+    if (parent && parent->child_count > next_index) {
+      self->tree = parent->children[next_index];
+      return;
+    }
+    self->tree = parent;
+  }
+}
+
+static inline void reusable_node_pop_leaf(ReusableNode *self) {
+  while (self->tree->child_count > 0)
+    self->tree = self->tree->children[0];
+  reusable_node_pop(self);
+}
+
+static inline bool reusable_node_breakdown(ReusableNode *self) {
+  if (self->tree->child_count == 0) {
+    return false;
+  } else {
+    self->tree = self->tree->children[0];
+    return true;
+  }
+}
diff --git a/src/runtime/stack.c b/src/runtime/stack.c
index bdc5945c..fc875396 100644
--- a/src/runtime/stack.c
+++ b/src/runtime/stack.c
@@ -50,6 +50,7 @@ typedef struct {
   StackNode *node;
   bool is_halted;
   unsigned push_count;
+  const TSExternalTokenState *external_token_state;
 } StackHead;
 
 struct Stack {
@@ -168,11 +169,13 @@ static void stack_node_add_link(StackNode *self, StackLink link) {
 }
 
 static StackVersion ts_stack__add_version(Stack *self, StackNode *node,
-                                          unsigned push_count) {
+                                          unsigned push_count,
+                                          const TSExternalTokenState *external_token_state) {
   StackHead head = {
     .node = node,
     .is_halted = false,
     .push_count = push_count,
+    .external_token_state = external_token_state,
   };
   array_push(&self->heads, head);
   stack_node_retain(node);
@@ -180,7 +183,8 @@ static StackVersion ts_stack__add_version(Stack *self, StackNode *node,
 }
 
 static void ts_stack__add_slice(Stack *self, StackNode *node, TreeArray *trees,
-                                unsigned push_count) {
+                                unsigned push_count,
+                                const TSExternalTokenState *external_token_state) {
   for (uint32_t i = self->slices.size - 1; i + 1 > 0; i--) {
     StackVersion version = self->slices.contents[i].version;
     if (self->heads.contents[version].node == node) {
@@ -190,7 +194,7 @@ static void ts_stack__add_slice(Stack *self, StackNode *node, TreeArray *trees,
     }
   }
 
-  StackVersion version = ts_stack__add_version(self, node, push_count);
+  StackVersion version = ts_stack__add_version(self, node, push_count, external_token_state);
   StackSlice slice = { *trees, version };
   array_push(&self->slices, slice);
 }
@@ -202,6 +206,7 @@ INLINE StackPopResult stack__iter(Stack *self, StackVersion version,
 
   StackHead *head = array_get(&self->heads, version);
   unsigned push_count = head->push_count;
+  const TSExternalTokenState *external_token_state = head->external_token_state;
   Iterator iterator = {
     .node = head->node,
     .trees = array_new(),
@@ -229,7 +234,8 @@ INLINE StackPopResult stack__iter(Stack *self, StackVersion version,
         if (!should_stop)
           ts_tree_array_copy(trees, &trees);
         array_reverse(&trees);
-        ts_stack__add_slice(self, node, &trees, push_count + iterator->push_count);
+        ts_stack__add_slice(self, node, &trees, push_count + iterator->push_count,
+                            external_token_state);
       }
 
       if (should_stop) {
@@ -288,7 +294,12 @@ Stack *ts_stack_new() {
   self->base_node =
     stack_node_new(NULL, NULL, false, 1, length_zero(), &self->node_pool);
   stack_node_retain(self->base_node);
-  array_push(&self->heads, ((StackHead){ self->base_node, false, 0 }));
+  array_push(&self->heads, ((StackHead){
+    self->base_node,
+    false,
+    0,
+    NULL
+  }));
 
   return self;
 }
@@ -327,11 +338,19 @@ unsigned ts_stack_push_count(const Stack *self, StackVersion version) {
   return array_get(&self->heads, version)->push_count;
 }
 
-void ts_stack_decrease_push_count(const Stack *self, StackVersion version,
+void ts_stack_decrease_push_count(Stack *self, StackVersion version,
                                   unsigned decrement) {
   array_get(&self->heads, version)->push_count -= decrement;
 }
 
+const TSExternalTokenState *ts_stack_external_token_state(const Stack *self, StackVersion version) {
+  return array_get(&self->heads, version)->external_token_state;
+}
+
+void ts_stack_set_external_token_state(Stack *self, StackVersion version, const TSExternalTokenState *state) {
+  array_get(&self->heads, version)->external_token_state = state;
+}
+
 ErrorStatus ts_stack_error_status(const Stack *self, StackVersion version) {
   StackHead *head = array_get(&self->heads, version);
   return (ErrorStatus){
@@ -480,7 +499,8 @@ bool ts_stack_merge(Stack *self, StackVersion version, StackVersion new_version)
   if (new_node->state == node->state &&
       new_node->position.chars == node->position.chars &&
       new_node->error_count == node->error_count &&
-      new_node->error_cost == node->error_cost) {
+      new_node->error_cost == node->error_cost &&
+      new_head->external_token_state == head->external_token_state) {
     for (uint32_t j = 0; j < new_node->link_count; j++)
       stack_node_add_link(node, new_node->links[j]);
     if (new_head->push_count > head->push_count)
@@ -505,7 +525,12 @@ void ts_stack_clear(Stack *self) {
   for (uint32_t i = 0; i < self->heads.size; i++)
     stack_node_release(self->heads.contents[i].node, &self->node_pool);
   array_clear(&self->heads);
-  array_push(&self->heads, ((StackHead){ self->base_node, false, 0 }));
+  array_push(&self->heads, ((StackHead){
+    self->base_node,
+    false,
+    0,
+    NULL
+  }));
 }
 
 bool ts_stack_print_dot_graph(Stack *self, const char **symbol_names, FILE *f) {
@@ -528,8 +553,20 @@ bool ts_stack_print_dot_graph(Stack *self, const char **symbol_names, FILE *f) {
     fprintf(
       f,
       "node_head_%u -> node_%p [label=%u, fontcolor=blue, weight=10000, "
-      "labeltooltip=\"push_count: %u\"]\n",
+      "labeltooltip=\"push_count: %u",
       i, head->node, i, head->push_count);
+
+    if (head->external_token_state) {
+      const TSExternalTokenState *s = head->external_token_state;
+      fprintf(f,
+        "\nexternal_token_state: "
+        "%2X %2X %2X %2X %2X %2X %2X %2X %2X %2X %2X %2X %2X %2X %2X %2X",
+        (*s)[0], (*s)[1], (*s)[2], (*s)[3], (*s)[4], (*s)[5], (*s)[6], (*s)[7],
+        (*s)[8], (*s)[9], (*s)[10], (*s)[11], (*s)[12], (*s)[13], (*s)[14], (*s)[15]
+      );
+    }
+
+    fprintf(f, "\"]\n");
     array_push(&self->iterators, ((Iterator){.node = head->node }));
   }
 
diff --git a/src/runtime/stack.h b/src/runtime/stack.h
index 64d9842b..2e88d72a 100644
--- a/src/runtime/stack.h
+++ b/src/runtime/stack.h
@@ -65,7 +65,11 @@ TSStateId ts_stack_top_state(const Stack *, StackVersion);
 
 unsigned ts_stack_push_count(const Stack *, StackVersion);
 
-void ts_stack_decrease_push_count(const Stack *, StackVersion, unsigned);
+void ts_stack_decrease_push_count(Stack *, StackVersion, unsigned);
+
+const TSExternalTokenState *ts_stack_external_token_state(const Stack *, StackVersion);
+
+void ts_stack_set_external_token_state(Stack *, StackVersion, const TSExternalTokenState *);
 
 /*
  *  Get the position at the top of the given version of the stack. If the stack
diff --git a/src/runtime/tree.c b/src/runtime/tree.c
index c94b1f9f..858ad90e 100644
--- a/src/runtime/tree.c
+++ b/src/runtime/tree.c
@@ -25,10 +25,7 @@ Tree *ts_tree_make_leaf(TSSymbol sym, Length padding, Length size,
     .visible = metadata.visible,
     .named = metadata.named,
     .has_changes = false,
-    .first_leaf = {
-      .symbol = sym,
-      .lex_state = 0
-    }
+    .first_leaf.symbol = sym,
   };
   return result;
 }
@@ -111,6 +108,8 @@ void ts_tree_set_children(Tree *self, uint32_t child_count, Tree **children) {
   self->named_child_count = 0;
   self->visible_child_count = 0;
   self->error_cost = 0;
+  self->has_external_tokens = false;
+  self->has_external_token_state = false;
 
   for (uint32_t i = 0; i < child_count; i++) {
     Tree *child = children[i];
@@ -128,11 +127,14 @@ void ts_tree_set_children(Tree *self, uint32_t child_count, Tree **children) {
       self->visible_child_count++;
       if (child->named)
         self->named_child_count++;
-    } else {
+    } else if (child->child_count > 0) {
       self->visible_child_count += child->visible_child_count;
       self->named_child_count += child->named_child_count;
     }
 
+    if (child->has_external_tokens) self->has_external_tokens = true;
+    if (child->has_external_token_state) self->has_external_token_state = true;
+
     if (child->symbol == ts_builtin_sym_error) {
       self->fragile_left = self->fragile_right = true;
       self->parse_state = TS_TREE_STATE_NONE;
@@ -377,6 +379,21 @@ void ts_tree_edit(Tree *self, const TSInputEdit *edit) {
   }
 }
 
+const TSExternalTokenState *ts_tree_last_external_token_state(const Tree *tree) {
+  while (tree->child_count > 0) {
+    for (uint32_t i = tree->child_count - 1; i + 1 > 0; i--) {
+      Tree *child = tree->children[i];
+      if (child->has_external_token_state) {
+        tree = child;
+        break;
+      } else if (child->has_external_tokens) {
+        return NULL;
+      }
+    }
+  }
+  return &tree->external_token_state;
+}
+
 static size_t ts_tree__write_char_to_string(char *s, size_t n, int32_t c) {
   if (c == 0)
     return snprintf(s, n, "EOF");
diff --git a/src/runtime/tree.h b/src/runtime/tree.h
index c37d61ab..d5916e31 100644
--- a/src/runtime/tree.h
+++ b/src/runtime/tree.h
@@ -22,10 +22,13 @@ typedef struct Tree {
   } context;
 
   uint32_t child_count;
-  uint32_t visible_child_count;
-  uint32_t named_child_count;
   union {
-    struct Tree **children;
+    struct {
+      uint32_t visible_child_count;
+      uint32_t named_child_count;
+      struct Tree **children;
+    };
+    TSExternalTokenState external_token_state;
     int32_t lookahead_char;
   };
 
@@ -38,7 +41,7 @@ typedef struct Tree {
 
   struct {
     TSSymbol symbol;
-    TSStateId lex_state;
+    TSLexMode lex_mode;
   } first_leaf;
 
   unsigned short ref_count;
@@ -48,6 +51,8 @@ typedef struct Tree {
   bool fragile_left : 1;
   bool fragile_right : 1;
   bool has_changes : 1;
+  bool has_external_tokens : 1;
+  bool has_external_token_state : 1;
 } Tree;
 
 typedef struct {
@@ -81,6 +86,7 @@ void ts_tree_assign_parents(Tree *, TreePath *);
 void ts_tree_edit(Tree *, const TSInputEdit *edit);
 char *ts_tree_string(const Tree *, const TSLanguage *, bool include_all);
 void ts_tree_print_dot_graph(const Tree *, const TSLanguage *, FILE *);
+const TSExternalTokenState *ts_tree_last_external_token_state(const Tree *);
 
 static inline uint32_t ts_tree_total_bytes(const Tree *self) {
   return self->padding.bytes + self->size.bytes;
diff --git a/src/runtime/tree_path.h b/src/runtime/tree_path.h
index bba32718..f64dd02f 100644
--- a/src/runtime/tree_path.h
+++ b/src/runtime/tree_path.h
@@ -21,20 +21,20 @@ static void range_array_add(RangeArray *results, TSPoint start, TSPoint end) {
   }
 }
 
-static bool tree_path_descend(TreePath *path, TSPoint position) {
+static bool tree_path_descend(TreePath *path, Length position) {
   uint32_t original_size = path->size;
+
   bool did_descend;
   do {
     did_descend = false;
     TreePathEntry entry = *array_back(path);
-    Length child_position = entry.position;
+    Length child_left = entry.position;
     for (uint32_t i = 0; i < entry.tree->child_count; i++) {
       Tree *child = entry.tree->children[i];
-      Length child_right_position =
-        length_add(child_position, ts_tree_total_size(child));
-      if (point_lt(position, child_right_position.extent)) {
-        TreePathEntry child_entry = { child, child_position, i };
-        if (child->visible) {
+      Length child_right = length_add(child_left, ts_tree_total_size(child));
+      if (position.bytes < child_right.bytes) {
+        TreePathEntry child_entry = { child, child_left, i };
+        if (child->visible || child->child_count == 0) {
           array_push(path, child_entry);
           return true;
         } else if (child->visible_child_count > 0) {
@@ -43,39 +43,44 @@ static bool tree_path_descend(TreePath *path, TSPoint position) {
           break;
         }
       }
-      child_position = child_right_position;
+      child_left = child_right;
     }
   } while (did_descend);
+
   path->size = original_size;
   return false;
 }
 
 static uint32_t tree_path_advance(TreePath *path) {
   uint32_t ascend_count = 0;
+
   while (path->size > 0) {
     TreePathEntry entry = array_pop(path);
-    if (path->size == 0)
-      break;
+    if (path->size == 0) break;
     TreePathEntry parent_entry = *array_back(path);
     if (parent_entry.tree->visible) ascend_count++;
-    Length position =
-      length_add(entry.position, ts_tree_total_size(entry.tree));
+
+    Length position = length_add(entry.position, ts_tree_total_size(entry.tree));
     for (uint32_t i = entry.child_index + 1; i < parent_entry.tree->child_count; i++) {
       Tree *next_child = parent_entry.tree->children[i];
-      if (next_child->visible || next_child->visible_child_count > 0) {
+      if (next_child->visible ||
+          next_child->child_count == 0 ||
+          next_child->visible_child_count > 0) {
         if (parent_entry.tree->visible) ascend_count--;
         array_push(path, ((TreePathEntry){
           .tree = next_child,
           .child_index = i,
           .position = position,
         }));
-        if (!next_child->visible)
-          tree_path_descend(path, (TSPoint){ 0, 0 });
+        if (!next_child->visible) {
+          tree_path_descend(path, length_zero());
+        }
         return ascend_count;
       }
       position = length_add(position, ts_tree_total_size(next_child));
     }
   }
+
   return ascend_count;
 }
 
@@ -94,8 +99,27 @@ static void tree_path_init(TreePath *path, Tree *tree) {
     .position = { 0, 0, { 0, 0 } },
     .child_index = 0,
   }));
-  if (!tree->visible)
-    tree_path_descend(path, (TSPoint){ 0, 0 });
+  if (!tree->visible) {
+    tree_path_descend(path, length_zero());
+  }
+}
+
+Tree *tree_path_visible_tree(TreePath *self) {
+  for (uint32_t i = self->size - 1; i + 1 > 0; i--) {
+    Tree *tree = self->contents[i].tree;
+    if (tree->visible) return tree;
+  }
+  return NULL;
+}
+
+Length tree_path_start_position(TreePath *self) {
+  TreePathEntry entry = *array_back(self);
+  return length_add(entry.position, entry.tree->padding);
+}
+
+Length tree_path_end_position(TreePath *self) {
+  TreePathEntry entry = *array_back(self);
+  return length_add(length_add(entry.position, entry.tree->padding), entry.tree->size);
 }
 
 static bool tree_must_eq(Tree *old_tree, Tree *new_tree) {
@@ -112,67 +136,59 @@ static bool tree_must_eq(Tree *old_tree, Tree *new_tree) {
 
 static void tree_path_get_changes(TreePath *old_path, TreePath *new_path,
                                   TSRange **ranges, uint32_t *range_count) {
-  TSPoint position = { 0, 0 };
+  Length position = length_zero();
   RangeArray results = array_new();
 
   while (old_path->size && new_path->size) {
     bool is_changed = false;
-    TSPoint next_position = position;
+    Length next_position = position;
 
-    TreePathEntry old_entry = *array_back(old_path);
-    TreePathEntry new_entry = *array_back(new_path);
-    Tree *old_tree = old_entry.tree;
-    Tree *new_tree = new_entry.tree;
-    uint32_t old_start_byte = old_entry.position.bytes + old_tree->padding.bytes;
-    uint32_t new_start_byte = new_entry.position.bytes + new_tree->padding.bytes;
-    TSPoint old_start_point =
-      point_add(old_entry.position.extent, old_tree->padding.extent);
-    TSPoint new_start_point =
-      point_add(new_entry.position.extent, new_tree->padding.extent);
-    TSPoint old_end_point = point_add(old_start_point, old_tree->size.extent);
-    TSPoint new_end_point = point_add(new_start_point, new_tree->size.extent);
+    Tree *old_tree = tree_path_visible_tree(old_path);
+    Tree *new_tree = tree_path_visible_tree(new_path);
+    Length old_start = tree_path_start_position(old_path);
+    Length new_start = tree_path_start_position(new_path);
+    Length old_end = tree_path_end_position(old_path);
+    Length new_end = tree_path_end_position(new_path);
 
     // #define NAME(t) (ts_language_symbol_name(language, ((Tree *)(t))->symbol))
-    // printf("At [%-2lu, %-2lu] Compare (%-20s\t [%-2lu, %-2lu] - [%lu, %lu])\tvs\t(%-20s\t [%lu, %lu] - [%lu, %lu])\n",
-    //   position.row, position.column, NAME(old_tree), old_start_point.row,
-    //   old_start_point.column, old_end_point.row, old_end_point.column,
-    //   NAME(new_tree), new_start_point.row, new_start_point.column,
-    //   new_end_point.row, new_end_point.column);
+    // printf("At [%-2u, %-2u] Compare (%-20s\t [%-2u, %-2u] - [%u, %u])\tvs\t(%-20s\t [%u, %u] - [%u, %u])\n",
+    //   position.extent.row, position.extent.column,
+    //   NAME(old_tree), old_start.extent.row, old_start.extent.column, old_end.extent.row, old_end.extent.column,
+    //   NAME(new_tree), new_start.extent.row, new_start.extent.column, new_end.extent.row, new_end.extent.column);
 
-    if (point_lt(position, old_start_point)) {
-      if (point_lt(position, new_start_point)) {
-        next_position = point_min(old_start_point, new_start_point);
+    if (position.bytes < old_start.bytes) {
+      if (position.bytes < new_start.bytes) {
+        next_position = length_min(old_start, new_start);
       } else {
         is_changed = true;
-        next_position = old_start_point;
+        next_position = old_start;
       }
-    } else if (point_lt(position, new_start_point)) {
+    } else if (position.bytes < new_start.bytes) {
       is_changed = true;
-      next_position = new_start_point;
-    } else if (old_start_byte == new_start_byte &&
-               tree_must_eq(old_tree, new_tree)) {
-      next_position = old_end_point;
+      next_position = new_start;
+    } else if (old_start.bytes == new_start.bytes && tree_must_eq(old_tree, new_tree)) {
+      next_position = old_end;
     } else if (old_tree->symbol == new_tree->symbol) {
       if (tree_path_descend(old_path, position)) {
         if (!tree_path_descend(new_path, position)) {
           tree_path_ascend(old_path, 1);
           is_changed = true;
-          next_position = new_end_point;
+          next_position = new_end;
         }
       } else if (tree_path_descend(new_path, position)) {
         tree_path_ascend(new_path, 1);
         is_changed = true;
-        next_position = old_end_point;
+        next_position = old_end;
       } else {
-        next_position = point_min(old_end_point, new_end_point);
+        next_position = length_min(old_end, new_end);
       }
     } else {
       is_changed = true;
-      next_position = point_min(old_end_point, new_end_point);
+      next_position = length_min(old_end, new_end);
     }
 
-    bool at_old_end = point_lte(old_end_point, next_position);
-    bool at_new_end = point_lte(new_end_point, next_position);
+    bool at_old_end = old_end.bytes <= next_position.bytes;
+    bool at_new_end = new_end.bytes <= next_position.bytes;
 
     if (at_new_end && at_old_end) {
       uint32_t old_ascend_count = tree_path_advance(old_path);
@@ -190,7 +206,7 @@ static void tree_path_get_changes(TreePath *old_path, TreePath *new_path,
       tree_path_ascend(new_path, ascend_count);
     }
 
-    if (is_changed) range_array_add(&results, position, next_position);
+    if (is_changed) range_array_add(&results, position.extent, next_position.extent);
     position = next_position;
   }
 
diff --git a/todo.md b/todo.md
deleted file mode 100644
index 0fd7f7b0..00000000
--- a/todo.md
+++ /dev/null
@@ -1,32 +0,0 @@
-TODO
-====
-
-### Handling ambiguity (GLR)
-* Add a simple way to specify syntactic ambiguity resolutions in the Grammar (e.g. 'prefer declarations to statements' in C), similar to bison's `dprec`
-construct.
-
-### Runtime System
-* Refactoring: make separate symbol for unexpected characters than for interior error nodes.
-
-### Testing / Quality
-* Start running the clang-analyzer on the codebase on Travis-CI.
-* Use the Valgrind leak checker to fix the memory leaks in the runtime library.
-* Randomize the editing in the language tests, using a seed that can be specified in order to reproduce failures.
-
-### Ubiquitous token handling
-* Fix the unintuitive tree that results when ubiquitous tokens are last child of their parent node.
-
-### Error handling
-* Use information about nesting depth of tokens like '(' and ')' to make error recovery more accurate.
-
-### Grammar Features
-* Regexp assertions
-  - [ ] '^'
-  - [ ] '$'
-  - [ ] '\b'
-* Composing languages
-  - [ ] Rule for referencing named grammar
-  - [ ] Grammar registry object in runtime
-  - [ ] Parsing returns control to parent language
-* Indentation tokens
-