diff --git a/spec/fixtures/external_scanners/extra_external_tokens.c b/spec/fixtures/external_scanners/extra_external_tokens.c
deleted file mode 100644
index 5c409639..00000000
--- a/spec/fixtures/external_scanners/extra_external_tokens.c
+++ /dev/null
@@ -1,42 +0,0 @@
-#include <tree_sitter/parser.h>
-
-enum {
-  COMMENT,
-};
-
-void *tree_sitter_extra_external_tokens_external_scanner_create() {
-  return NULL;
-}
-
-void tree_sitter_extra_external_tokens_external_scanner_reset(void *payload) {
-}
-
-bool tree_sitter_extra_external_tokens_external_scanner_serialize(void *payload, TSExternalTokenState state) {
-  return true;
-}
-
-void tree_sitter_extra_external_tokens_external_scanner_deserialize(void *payload, TSExternalTokenState state) {
-}
-
-bool tree_sitter_extra_external_tokens_external_scanner_scan(
-  void *payload, TSLexer *lexer, const bool *whitelist) {
-
-  while (lexer->lookahead == ' ') {
-    lexer->advance(lexer, true);
-  }
-
-  if (lexer->lookahead == '#') {
-    lexer->advance(lexer, false);
-    while (lexer->lookahead != '\n') {
-      lexer->advance(lexer, false);
-    }
-
-    lexer->result_symbol = COMMENT;
-    return true;
-  }
-
-  return false;
-}
-
-void tree_sitter_extra_external_tokens_external_scanner_destroy(void *payload) {
-}
diff --git a/spec/fixtures/test_grammars/anonymous_tokens_with_escaped_chars/corpus.txt b/spec/fixtures/test_grammars/anonymous_tokens_with_escaped_chars/corpus.txt
new file mode 100644
index 00000000..06a7bf0b
--- /dev/null
+++ b/spec/fixtures/test_grammars/anonymous_tokens_with_escaped_chars/corpus.txt
@@ -0,0 +1,32 @@
+================================================
+anonymous tokens defined with character classes
+================================================
+1234
+---
+
+(first_rule)
+
+=================================================
+anonymous tokens defined with LF escape sequence
+=================================================
+
+
+---
+
+(first_rule)
+
+=================================================
+anonymous tokens defined with CR escape sequence
+=================================================
+
+---
+
+(first_rule)
+
+================================================
+anonymous tokens with quotes
+================================================
+'hello'
+---
+
+(first_rule)
diff --git a/spec/fixtures/test_grammars/anonymous_tokens_with_escaped_chars/grammar.json b/spec/fixtures/test_grammars/anonymous_tokens_with_escaped_chars/grammar.json
new file mode 100644
index 00000000..d2613776
--- /dev/null
+++ b/spec/fixtures/test_grammars/anonymous_tokens_with_escaped_chars/grammar.json
@@ -0,0 +1,14 @@
+{
+  "name": "anonymous_tokens_with_escaped_chars",
+  "rules": {
+    "first_rule": {
+      "type": "CHOICE",
+      "members": [
+        {"type": "STRING", "value": "\n"},
+        {"type": "STRING", "value": "\r"},
+        {"type": "STRING", "value": "'hello'"},
+        {"type": "PATTERN", "value": "\\d+"}
+      ]
+    }
+  }
+}
\ No newline at end of file
diff --git a/spec/fixtures/test_grammars/associativity_left/corpus.txt b/spec/fixtures/test_grammars/associativity_left/corpus.txt
new file mode 100644
index 00000000..4ab8e0db
--- /dev/null
+++ b/spec/fixtures/test_grammars/associativity_left/corpus.txt
@@ -0,0 +1,8 @@
+===================
+chained operations
+===================
+x+y+z
+---
+(expression (math_operation
+  (expression (math_operation (expression (identifier)) (expression (identifier))))
+  (expression (identifier))))
\ No newline at end of file
diff --git a/spec/fixtures/test_grammars/associativity_left/grammar.json b/spec/fixtures/test_grammars/associativity_left/grammar.json
new file mode 100644
index 00000000..b1a25914
--- /dev/null
+++ b/spec/fixtures/test_grammars/associativity_left/grammar.json
@@ -0,0 +1,31 @@
+{
+  "name": "associativity_left",
+
+  "rules": {
+    "expression": {
+      "type": "CHOICE",
+      "members": [
+        {"type": "SYMBOL", "name": "math_operation"},
+        {"type": "SYMBOL", "name": "identifier"}
+      ]
+    },
+
+    "math_operation": {
+      "type": "PREC_LEFT",
+      "value": 0,
+      "content": {
+        "type": "SEQ",
+        "members": [
+          {"type": "SYMBOL", "name": "expression"},
+          {"type": "STRING", "value": "+"},
+          {"type": "SYMBOL", "name": "expression"}
+        ]
+      }
+    },
+
+    "identifier": {
+      "type": "PATTERN",
+      "value": "[a-zA-Z]+"
+    }
+  }
+}
\ No newline at end of file
diff --git a/spec/fixtures/test_grammars/associativity_missing/expected_error.txt b/spec/fixtures/test_grammars/associativity_missing/expected_error.txt
new file mode 100644
index 00000000..f9cc955d
--- /dev/null
+++ b/spec/fixtures/test_grammars/associativity_missing/expected_error.txt
@@ -0,0 +1,13 @@
+Unresolved conflict for symbol sequence:
+
+  expression  '+'  expression  •  '+'  …
+
+Possible interpretations:
+
+  1:  (math_operation  expression  '+'  expression)  •  '+'  …
+  2:  expression  '+'  (math_operation  expression  •  '+'  expression)
+
+Possible resolutions:
+
+  1:  Specify a left or right associativity in `math_operation`
+  2:  Add a conflict for these rules: `math_operation`
diff --git a/spec/fixtures/test_grammars/associativity_missing/grammar.json b/spec/fixtures/test_grammars/associativity_missing/grammar.json
new file mode 100644
index 00000000..e5bd9d83
--- /dev/null
+++ b/spec/fixtures/test_grammars/associativity_missing/grammar.json
@@ -0,0 +1,27 @@
+{
+  "name": "associativity_missing",
+
+  "rules": {
+    "expression": {
+      "type": "CHOICE",
+      "members": [
+        {"type": "SYMBOL", "name": "math_operation"},
+        {"type": "SYMBOL", "name": "identifier"}
+      ]
+    },
+
+    "math_operation": {
+      "type": "SEQ",
+      "members": [
+        {"type": "SYMBOL", "name": "expression"},
+        {"type": "STRING", "value": "+"},
+        {"type": "SYMBOL", "name": "expression"}
+      ]
+    },
+
+    "identifier": {
+      "type": "PATTERN",
+      "value": "[a-zA-Z]+"
+    }
+  }
+}
\ No newline at end of file
diff --git a/spec/fixtures/test_grammars/associativity_right/corpus.txt b/spec/fixtures/test_grammars/associativity_right/corpus.txt
new file mode 100644
index 00000000..280bbc31
--- /dev/null
+++ b/spec/fixtures/test_grammars/associativity_right/corpus.txt
@@ -0,0 +1,8 @@
+===================
+chained operations
+===================
+x+y+z
+---
+(expression (math_operation
+  (expression (identifier))
+  (expression (math_operation (expression (identifier)) (expression (identifier))))))
diff --git a/spec/fixtures/test_grammars/associativity_right/grammar.json b/spec/fixtures/test_grammars/associativity_right/grammar.json
new file mode 100644
index 00000000..80ce1ebb
--- /dev/null
+++ b/spec/fixtures/test_grammars/associativity_right/grammar.json
@@ -0,0 +1,31 @@
+{
+  "name": "associativity_right",
+
+  "rules": {
+    "expression": {
+      "type": "CHOICE",
+      "members": [
+        {"type": "SYMBOL", "name": "math_operation"},
+        {"type": "SYMBOL", "name": "identifier"}
+      ]
+    },
+
+    "math_operation": {
+      "type": "PREC_RIGHT",
+      "value": 0,
+      "content": {
+        "type": "SEQ",
+        "members": [
+          {"type": "SYMBOL", "name": "expression"},
+          {"type": "STRING", "value": "+"},
+          {"type": "SYMBOL", "name": "expression"}
+        ]
+      }
+    },
+
+    "identifier": {
+      "type": "PATTERN",
+      "value": "[a-zA-Z]+"
+    }
+  }
+}
\ No newline at end of file
diff --git a/spec/fixtures/test_grammars/conflicting_precedence/expected_error.txt b/spec/fixtures/test_grammars/conflicting_precedence/expected_error.txt
new file mode 100644
index 00000000..a38dd8b5
--- /dev/null
+++ b/spec/fixtures/test_grammars/conflicting_precedence/expected_error.txt
@@ -0,0 +1,15 @@
+Unresolved conflict for symbol sequence:
+
+  expression  '+'  expression  •  '*'  …
+
+Possible interpretations:
+
+  1:  (sum  expression  '+'  expression)  •  '*'  …
+  2:  expression  '+'  (product  expression  •  '*'  expression)
+  3:  expression  '+'  (other_thing  expression  •  '*'  '*')
+
+Possible resolutions:
+
+  1:  Specify a higher precedence in `product` and `other_thing` than in the other rules.
+  2:  Specify a higher precedence in `sum` than in the other rules.
+  3:  Add a conflict for these rules: `sum` `product` `other_thing`
diff --git a/spec/fixtures/test_grammars/conflicting_precedence/grammar.json b/spec/fixtures/test_grammars/conflicting_precedence/grammar.json
new file mode 100644
index 00000000..4e82de64
--- /dev/null
+++ b/spec/fixtures/test_grammars/conflicting_precedence/grammar.json
@@ -0,0 +1,58 @@
+{
+  "name": "conflicting_precedence",
+
+  "rules": {
+    "expression": {
+      "type": "CHOICE",
+      "members": [
+        {"type": "SYMBOL", "name": "sum"},
+        {"type": "SYMBOL", "name": "product"},
+        {"type": "SYMBOL", "name": "other_thing"}
+      ]
+    },
+
+    "sum": {
+      "type": "PREC_LEFT",
+      "value": 0,
+      "content": {
+        "type": "SEQ",
+        "members": [
+          {"type": "SYMBOL", "name": "expression"},
+          {"type": "STRING", "value": "+"},
+          {"type": "SYMBOL", "name": "expression"}
+        ]
+      }
+    },
+
+    "product": {
+      "type": "PREC_LEFT",
+      "value": 1,
+      "content": {
+        "type": "SEQ",
+        "members": [
+          {"type": "SYMBOL", "name": "expression"},
+          {"type": "STRING", "value": "*"},
+          {"type": "SYMBOL", "name": "expression"}
+        ]
+      }
+    },
+
+    "other_thing": {
+      "type": "PREC_LEFT",
+      "value": -1,
+      "content": {
+        "type": "SEQ",
+        "members": [
+          {"type": "SYMBOL", "name": "expression"},
+          {"type": "STRING", "value": "*"},
+          {"type": "STRING", "value": "*"}
+        ]
+      }
+    },
+
+    "identifier": {
+      "type": "PATTERN",
+      "value": "[a-zA-Z]+"
+    }
+  }
+}
\ No newline at end of file
diff --git a/spec/fixtures/test_grammars/epsilon_rules/expected_error.txt b/spec/fixtures/test_grammars/epsilon_rules/expected_error.txt
new file mode 100644
index 00000000..39b3d5fa
--- /dev/null
+++ b/spec/fixtures/test_grammars/epsilon_rules/expected_error.txt
@@ -0,0 +1,2 @@
+The rule `rule_2` matches the empty string.
+Tree-sitter currently does not support syntactic rules that match the empty string.
diff --git a/spec/fixtures/test_grammars/epsilon_rules/grammar.json b/spec/fixtures/test_grammars/epsilon_rules/grammar.json
new file mode 100644
index 00000000..5be5b983
--- /dev/null
+++ b/spec/fixtures/test_grammars/epsilon_rules/grammar.json
@@ -0,0 +1,15 @@
+{
+  "name": "epsilon_rules",
+
+  "rules": {
+    "rule_1": {"type": "SYMBOL", "name": "rule_2"},
+
+    "rule_2": {
+      "type": "CHOICE",
+      "members": [
+        {"type": "SYMBOL", "name": "rule_1"},
+        {"type": "BLANK"}
+      ]
+    }
+  }
+}
\ No newline at end of file
diff --git a/spec/fixtures/test_grammars/external_and_internal_tokens/corpus.txt b/spec/fixtures/test_grammars/external_and_internal_tokens/corpus.txt
new file mode 100644
index 00000000..4d691420
--- /dev/null
+++ b/spec/fixtures/test_grammars/external_and_internal_tokens/corpus.txt
@@ -0,0 +1,41 @@
+=========================================
+single-line statements - internal tokens
+=========================================
+
+a b
+
+---
+
+(statement (variable) (variable) (line_break))
+
+=========================================
+multi-line statements - internal tokens
+=========================================
+
+a
+b
+
+---
+
+(statement (variable) (variable) (line_break))
+
+=========================================
+single-line statements - external tokens
+=========================================
+
+'hello' 'world'
+
+---
+
+(statement (string) (string) (line_break))
+
+=========================================
+multi-line statements - external tokens
+=========================================
+
+'hello'
+'world'
+
+---
+
+(statement (string) (string) (line_break))
diff --git a/spec/fixtures/test_grammars/external_and_internal_tokens/grammar.json b/spec/fixtures/test_grammars/external_and_internal_tokens/grammar.json
new file mode 100644
index 00000000..f24e1c1c
--- /dev/null
+++ b/spec/fixtures/test_grammars/external_and_internal_tokens/grammar.json
@@ -0,0 +1,36 @@
+{
+  "name": "external_and_internal_tokens",
+
+  "externals": [
+    "string",
+    "line_break"
+  ],
+
+  "extras": [
+    {"type": "PATTERN", "value": "\\s"}
+  ],
+
+  "rules": {
+    "statement": {
+      "type": "SEQ",
+      "members": [
+        {"type": "SYMBOL", "name": "_expression"},
+        {"type": "SYMBOL", "name": "_expression"},
+        {"type": "SYMBOL", "name": "line_break"}
+      ]
+    },
+
+    "_expression": {
+      "type": "CHOICE",
+      "members": [
+        {"type": "SYMBOL", "name": "string"},
+        {"type": "SYMBOL", "name": "variable"},
+        {"type": "SYMBOL", "name": "number"}
+      ]
+    },
+
+    "variable": {"type": "PATTERN", "value": "\\a+"},
+    "number": {"type": "PATTERN", "value": "\\d+"},
+    "line_break": {"type": "STRING", "value": "\n"}
+  }
+}
\ No newline at end of file
diff --git a/spec/fixtures/test_grammars/external_and_internal_tokens/readme.md b/spec/fixtures/test_grammars/external_and_internal_tokens/readme.md
new file mode 100644
index 00000000..14ae934f
--- /dev/null
+++ b/spec/fixtures/test_grammars/external_and_internal_tokens/readme.md
@@ -0,0 +1 @@
+This grammar has an external scanner whose `scan` method needs to be able to check for the validity of an *internal* token. This is done by including the names of that internal token (`_line_break`) in the grammar's `externals` field.
\ No newline at end of file
diff --git a/spec/fixtures/external_scanners/shared_external_tokens.c b/spec/fixtures/test_grammars/external_and_internal_tokens/scanner.c
similarity index 62%
rename from spec/fixtures/external_scanners/shared_external_tokens.c
rename to spec/fixtures/test_grammars/external_and_internal_tokens/scanner.c
index 0bee00d8..4d0acd0a 100644
--- a/spec/fixtures/external_scanners/shared_external_tokens.c
+++ b/spec/fixtures/test_grammars/external_and_internal_tokens/scanner.c
@@ -1,4 +1,3 @@
-#include <stdbool.h>
 #include <tree_sitter/parser.h>
 
 enum {
@@ -6,21 +5,17 @@ enum {
   LINE_BREAK
 };
 
-void *tree_sitter_shared_external_tokens_external_scanner_create() {
-  return NULL;
-}
+void *tree_sitter_external_and_internal_tokens_external_scanner_create() { return NULL; }
 
-void tree_sitter_shared_external_tokens_external_scanner_reset(void *payload) {
-}
+void tree_sitter_external_and_internal_tokens_external_scanner_destroy(void *payload) {}
 
-bool tree_sitter_shared_external_tokens_external_scanner_serialize(void *payload, TSExternalTokenState state) {
-  return true;
-}
+void tree_sitter_external_and_internal_tokens_external_scanner_reset(void *payload) {}
 
-void tree_sitter_shared_external_tokens_external_scanner_deserialize(void *payload, TSExternalTokenState state) {
-}
+bool tree_sitter_external_and_internal_tokens_external_scanner_serialize(void *payload, TSExternalTokenState state) { return true; }
 
-bool tree_sitter_shared_external_tokens_external_scanner_scan(
+void tree_sitter_external_and_internal_tokens_external_scanner_deserialize(void *payload, TSExternalTokenState state) {}
+
+bool tree_sitter_external_and_internal_tokens_external_scanner_scan(
   void *payload, TSLexer *lexer, const bool *whitelist) {
 
   // If a line-break is a valid lookahead token, only skip spaces.
@@ -58,6 +53,3 @@ bool tree_sitter_shared_external_tokens_external_scanner_scan(
 
   return false;
 }
-
-void tree_sitter_shared_external_tokens_external_scanner_destroy(void *payload) {
-}
diff --git a/spec/fixtures/test_grammars/external_extra_tokens/corpus.txt b/spec/fixtures/test_grammars/external_extra_tokens/corpus.txt
new file mode 100644
index 00000000..ceac4b8a
--- /dev/null
+++ b/spec/fixtures/test_grammars/external_extra_tokens/corpus.txt
@@ -0,0 +1,10 @@
+========================
+extra external tokens
+========================
+
+x = # a comment
+y
+
+---
+
+(assignment (variable) (comment) (variable))
diff --git a/spec/fixtures/test_grammars/external_extra_tokens/grammar.json b/spec/fixtures/test_grammars/external_extra_tokens/grammar.json
new file mode 100644
index 00000000..ed13b34a
--- /dev/null
+++ b/spec/fixtures/test_grammars/external_extra_tokens/grammar.json
@@ -0,0 +1,25 @@
+{
+  "name": "external_extra_tokens",
+
+  "externals": [
+    "comment"
+  ],
+
+  "extras": [
+    {"type": "PATTERN", "value": "\\s"},
+    {"type": "SYMBOL", "name": "comment"}
+  ],
+
+  "rules": {
+    "assignment": {
+      "type": "SEQ",
+      "members": [
+        {"type": "SYMBOL", "name": "variable"},
+        {"type": "STRING", "value": "="},
+        {"type": "SYMBOL", "name": "variable"}
+      ]
+    },
+
+    "variable": {"type": "PATTERN", "value": "\\a+"}
+  }
+}
\ No newline at end of file
diff --git a/spec/fixtures/test_grammars/external_extra_tokens/scanner.c b/spec/fixtures/test_grammars/external_extra_tokens/scanner.c
new file mode 100644
index 00000000..4bd3e22e
--- /dev/null
+++ b/spec/fixtures/test_grammars/external_extra_tokens/scanner.c
@@ -0,0 +1,36 @@
+#include <tree_sitter/parser.h>
+
+enum {
+  COMMENT,
+};
+
+void *tree_sitter_external_extra_tokens_external_scanner_create() { return NULL; }
+
+void tree_sitter_external_extra_tokens_external_scanner_destroy(void *payload) {}
+
+void tree_sitter_external_extra_tokens_external_scanner_reset(void *payload) {}
+
+bool tree_sitter_external_extra_tokens_external_scanner_serialize(void *payload, TSExternalTokenState state) { return true; }
+
+void tree_sitter_external_extra_tokens_external_scanner_deserialize(void *payload, TSExternalTokenState state) {}
+
+bool tree_sitter_external_extra_tokens_external_scanner_scan(
+  void *payload, TSLexer *lexer, const bool *whitelist) {
+
+  while (lexer->lookahead == ' ') {
+    lexer->advance(lexer, true);
+  }
+
+  if (lexer->lookahead == '#') {
+    lexer->advance(lexer, false);
+    while (lexer->lookahead != '\n') {
+      lexer->advance(lexer, false);
+    }
+
+    lexer->result_symbol = COMMENT;
+    return true;
+  }
+
+  return false;
+}
+
diff --git a/spec/fixtures/test_grammars/external_tokens/corpus.txt b/spec/fixtures/test_grammars/external_tokens/corpus.txt
new file mode 100644
index 00000000..94153c16
--- /dev/null
+++ b/spec/fixtures/test_grammars/external_tokens/corpus.txt
@@ -0,0 +1,22 @@
+========================
+simple external tokens
+=========================
+
+x + %(sup (external) scanner?)
+
+---
+
+(expression (sum (expression (identifier)) (expression (string))))
+
+==================================
+external tokens that require state
+==================================
+
+%{sup {} #{x + y} {} scanner?}
+
+---
+
+(expression (string
+  (expression (sum
+    (expression (identifier))
+    (expression (identifier))))))
diff --git a/spec/fixtures/test_grammars/external_tokens/grammar.json b/spec/fixtures/test_grammars/external_tokens/grammar.json
new file mode 100644
index 00000000..8a175404
--- /dev/null
+++ b/spec/fixtures/test_grammars/external_tokens/grammar.json
@@ -0,0 +1,57 @@
+{
+  "name": "external_tokens",
+
+  "externals": [
+    "_percent_string",
+    "_percent_string_start",
+    "_percent_string_end"
+  ],
+
+  "extras": [
+    {"type": "PATTERN", "value": "\\s"}
+  ],
+
+  "rules": {
+    "expression": {
+      "type": "CHOICE",
+      "members": [
+        {"type": "SYMBOL", "name": "string"},
+        {"type": "SYMBOL", "name": "sum"},
+        {"type": "SYMBOL", "name": "identifier"}
+      ]
+    },
+
+    "sum": {
+      "type": "PREC_LEFT",
+      "value": 0,
+      "content": {
+        "type": "SEQ",
+        "members": [
+          {"type": "SYMBOL", "name": "expression"},
+          {"type": "STRING", "value": "+"},
+          {"type": "SYMBOL", "name": "expression"}
+        ]
+      }
+    },
+
+    "string": {
+      "type": "CHOICE",
+      "members": [
+        {"type": "SYMBOL", "name": "_percent_string"},
+        {
+          "type": "SEQ",
+          "members": [
+            {"type": "SYMBOL", "name": "_percent_string_start"},
+            {"type": "SYMBOL", "name": "expression"},
+            {"type": "SYMBOL", "name": "_percent_string_end"}
+          ]
+        },
+      ]
+    },
+
+    "identifier": {
+      "type": "PATTERN",
+      "value": "\\a+"
+    }
+  }
+}
\ No newline at end of file
diff --git a/spec/fixtures/external_scanners/percent_strings.c b/spec/fixtures/test_grammars/external_tokens/scanner.c
similarity index 80%
rename from spec/fixtures/external_scanners/percent_strings.c
rename to spec/fixtures/test_grammars/external_tokens/scanner.c
index 9f68696e..7622e74d 100644
--- a/spec/fixtures/external_scanners/percent_strings.c
+++ b/spec/fixtures/test_grammars/external_tokens/scanner.c
@@ -1,4 +1,3 @@
-#include <stdbool.h>
 #include <tree_sitter/parser.h>
 
 enum {
@@ -13,7 +12,7 @@ typedef struct {
   uint32_t depth;
 } Scanner;
 
-void *tree_sitter_external_scanner_example_external_scanner_create() {
+void *tree_sitter_external_tokens_external_scanner_create() {
   Scanner *scanner = malloc(sizeof(Scanner));
   *scanner = (Scanner){
     .open_delimiter = 0,
@@ -23,7 +22,17 @@ void *tree_sitter_external_scanner_example_external_scanner_create() {
   return scanner;
 }
 
-bool tree_sitter_external_scanner_example_external_scanner_scan(
+void tree_sitter_external_tokens_external_scanner_destroy(void *payload) {
+  free(payload);
+}
+
+void tree_sitter_external_tokens_external_scanner_reset(void *payload) {}
+
+bool tree_sitter_external_tokens_external_scanner_serialize(void *payload, TSExternalTokenState state) { return true; }
+
+void tree_sitter_external_tokens_external_scanner_deserialize(void *payload, TSExternalTokenState state) {}
+
+bool tree_sitter_external_tokens_external_scanner_scan(
   void *payload, TSLexer *lexer, const bool *whitelist) {
   Scanner *scanner = payload;
 
@@ -103,16 +112,3 @@ bool tree_sitter_external_scanner_example_external_scanner_scan(
   return false;
 }
 
-void tree_sitter_external_scanner_example_external_scanner_reset(void *payload) {
-}
-
-bool tree_sitter_external_scanner_example_external_scanner_serialize(void *payload, TSExternalTokenState state) {
-  return true;
-}
-
-void tree_sitter_external_scanner_example_external_scanner_deserialize(void *payload, TSExternalTokenState state) {
-}
-
-void tree_sitter_external_scanner_example_external_scanner_destroy(void *payload) {
-  free(payload);
-}
diff --git a/spec/fixtures/test_grammars/precedence_on_single_child_missing/expected_error.txt b/spec/fixtures/test_grammars/precedence_on_single_child_missing/expected_error.txt
new file mode 100644
index 00000000..b1be0828
--- /dev/null
+++ b/spec/fixtures/test_grammars/precedence_on_single_child_missing/expected_error.txt
@@ -0,0 +1,15 @@
+Unresolved conflict for symbol sequence:
+
+  identifier  •  '{'  …
+
+Possible interpretations:
+
+  1:  (expression  identifier)  •  '{'  …
+  2:  (function_call  identifier  •  block)
+
+Possible resolutions:
+
+  1:  Specify a higher precedence in `function_call` than in the other rules.
+  2:  Specify a higher precedence in `expression` than in the other rules.
+  3:  Specify a left or right associativity in `expression`
+  4:  Add a conflict for these rules: `expression` `function_call`
diff --git a/spec/fixtures/test_grammars/precedence_on_single_child_missing/grammar.json b/spec/fixtures/test_grammars/precedence_on_single_child_missing/grammar.json
new file mode 100644
index 00000000..19852708
--- /dev/null
+++ b/spec/fixtures/test_grammars/precedence_on_single_child_missing/grammar.json
@@ -0,0 +1,63 @@
+{
+  "name": "precedence_on_single_child_missing",
+
+  "extras": [
+    {"type": "PATTERN", "value": "\\s"}
+  ],
+
+  "rules": {
+    "expression": {
+      "type": "CHOICE",
+      "members": [
+        {"type": "SYMBOL", "name": "function_call"},
+        {"type": "SYMBOL", "name": "identifier"}
+      ]
+    },
+
+    "function_call": {
+      "type": "PREC_RIGHT",
+      "value": 0,
+      "content": {
+        "type": "CHOICE",
+        "members": [
+          {
+            "type": "SEQ",
+            "members": [
+              {"type": "SYMBOL", "name": "identifier"},
+              {"type": "SYMBOL", "name": "expression"}
+            ]
+          },
+          {
+            "type": "SEQ",
+            "members": [
+              {"type": "SYMBOL", "name": "identifier"},
+              {"type": "SYMBOL", "name": "block"}
+            ]
+          },
+          {
+            "type": "SEQ",
+            "members": [
+              {"type": "SYMBOL", "name": "identifier"},
+              {"type": "SYMBOL", "name": "expression"},
+              {"type": "SYMBOL", "name": "block"}
+            ]
+          }
+        ]
+      }
+    },
+
+    "block": {
+      "type": "SEQ",
+      "members": [
+        {"type": "STRING", "value": "{"},
+        {"type": "SYMBOL", "name": "expression"},
+        {"type": "STRING", "value": "}"}
+      ]
+    },
+
+    "identifier": {
+      "type": "PATTERN",
+      "value": "[a-zA-Z]+"
+    }
+  }
+}
\ No newline at end of file
diff --git a/spec/fixtures/test_grammars/precedence_on_single_child_missing/readme.md b/spec/fixtures/test_grammars/precedence_on_single_child_missing/readme.md
new file mode 100644
index 00000000..9db7345f
--- /dev/null
+++ b/spec/fixtures/test_grammars/precedence_on_single_child_missing/readme.md
@@ -0,0 +1,14 @@
+This language has function calls similar to Ruby's, with no parentheses required, and optional blocks.
+
+There is a shift/reduce conflict here:
+
+```
+foo bar { baz }
+       ^
+```
+
+The possible actions are:
+1. `reduce(expression, 1)` - `bar` is an expression being passed to the `foo` function.
+2. `shift` - `bar` is a function being called with the block `{ baz }`
+
+The grammars `precedence_on_single_child_negative` and `precedence_on_single_child_positive` show possible resolutions to this conflict.
\ No newline at end of file
diff --git a/spec/fixtures/test_grammars/precedence_on_single_child_negative/corpus.txt b/spec/fixtures/test_grammars/precedence_on_single_child_negative/corpus.txt
new file mode 100644
index 00000000..69678dae
--- /dev/null
+++ b/spec/fixtures/test_grammars/precedence_on_single_child_negative/corpus.txt
@@ -0,0 +1,12 @@
+===========================
+function calls with blocks
+===========================
+
+foo bar { baz }
+
+---
+
+(expression (function_call
+  (identifier)
+  (expression (identifier))
+  (block (expression (identifier)))))
\ No newline at end of file
diff --git a/spec/fixtures/test_grammars/precedence_on_single_child_negative/grammar.json b/spec/fixtures/test_grammars/precedence_on_single_child_negative/grammar.json
new file mode 100644
index 00000000..fc237f54
--- /dev/null
+++ b/spec/fixtures/test_grammars/precedence_on_single_child_negative/grammar.json
@@ -0,0 +1,63 @@
+{
+  "name": "precedence_on_single_child_negative",
+
+  "extras": [
+    {"type": "PATTERN", "value": "\\s"}
+  ],
+
+  "rules": {
+    "expression": {
+      "type": "CHOICE",
+      "members": [
+        {"type": "SYMBOL", "name": "function_call"},
+        {"type": "SYMBOL", "name": "identifier"}
+      ]
+    },
+
+    "function_call": {
+      "type": "PREC_RIGHT",
+      "value": -1,
+      "content": {
+        "type": "CHOICE",
+        "members": [
+          {
+            "type": "SEQ",
+            "members": [
+              {"type": "SYMBOL", "name": "identifier"},
+              {"type": "SYMBOL", "name": "expression"}
+            ]
+          },
+          {
+            "type": "SEQ",
+            "members": [
+              {"type": "SYMBOL", "name": "identifier"},
+              {"type": "SYMBOL", "name": "block"}
+            ]
+          },
+          {
+            "type": "SEQ",
+            "members": [
+              {"type": "SYMBOL", "name": "identifier"},
+              {"type": "SYMBOL", "name": "expression"},
+              {"type": "SYMBOL", "name": "block"}
+            ]
+          }
+        ]
+      }
+    },
+
+    "block": {
+      "type": "SEQ",
+      "members": [
+        {"type": "STRING", "value": "{"},
+        {"type": "SYMBOL", "name": "expression"},
+        {"type": "STRING", "value": "}"}
+      ]
+    },
+
+    "identifier": {
+      "type": "PATTERN",
+      "value": "[a-zA-Z]+"
+    }
+  }
+}
\ No newline at end of file
diff --git a/spec/fixtures/test_grammars/precedence_on_single_child_negative/readme.md b/spec/fixtures/test_grammars/precedence_on_single_child_negative/readme.md
new file mode 100644
index 00000000..5b2cd804
--- /dev/null
+++ b/spec/fixtures/test_grammars/precedence_on_single_child_negative/readme.md
@@ -0,0 +1 @@
+This grammar resolves the conflict shown in the `precedence_on_single_child_missing` grammar by giving `function_call` a negative precedence. This causes reducing the `bar` variable to an expression to be preferred over shifting the `{` token as part of `function_call`.
\ No newline at end of file
diff --git a/spec/fixtures/test_grammars/precedence_on_single_child_positive/corpus.txt b/spec/fixtures/test_grammars/precedence_on_single_child_positive/corpus.txt
new file mode 100644
index 00000000..ee01d488
--- /dev/null
+++ b/spec/fixtures/test_grammars/precedence_on_single_child_positive/corpus.txt
@@ -0,0 +1,13 @@
+===========================
+function calls with blocks
+===========================
+
+foo bar { baz }
+
+---
+
+(expression (function_call
+  (identifier)
+  (expression (function_call
+    (identifier)
+    (block (expression (identifier)))))))
\ No newline at end of file
diff --git a/spec/fixtures/test_grammars/precedence_on_single_child_positive/grammar.json b/spec/fixtures/test_grammars/precedence_on_single_child_positive/grammar.json
new file mode 100644
index 00000000..7ffa73ed
--- /dev/null
+++ b/spec/fixtures/test_grammars/precedence_on_single_child_positive/grammar.json
@@ -0,0 +1,63 @@
+{
+  "name": "precedence_on_single_child_positive",
+
+  "extras": [
+    {"type": "PATTERN", "value": "\\s"}
+  ],
+
+  "rules": {
+    "expression": {
+      "type": "CHOICE",
+      "members": [
+        {"type": "SYMBOL", "name": "function_call"},
+        {"type": "SYMBOL", "name": "identifier"}
+      ]
+    },
+
+    "function_call": {
+      "type": "PREC_RIGHT",
+      "value": 1,
+      "content": {
+        "type": "CHOICE",
+        "members": [
+          {
+            "type": "SEQ",
+            "members": [
+              {"type": "SYMBOL", "name": "identifier"},
+              {"type": "SYMBOL", "name": "expression"}
+            ]
+          },
+          {
+            "type": "SEQ",
+            "members": [
+              {"type": "SYMBOL", "name": "identifier"},
+              {"type": "SYMBOL", "name": "block"}
+            ]
+          },
+          {
+            "type": "SEQ",
+            "members": [
+              {"type": "SYMBOL", "name": "identifier"},
+              {"type": "SYMBOL", "name": "expression"},
+              {"type": "SYMBOL", "name": "block"}
+            ]
+          }
+        ]
+      }
+    },
+
+    "block": {
+      "type": "SEQ",
+      "members": [
+        {"type": "STRING", "value": "{"},
+        {"type": "SYMBOL", "name": "expression"},
+        {"type": "STRING", "value": "}"}
+      ]
+    },
+
+    "identifier": {
+      "type": "PATTERN",
+      "value": "[a-zA-Z]+"
+    }
+  }
+}
\ No newline at end of file
diff --git a/spec/fixtures/test_grammars/precedence_on_single_child_positive/readme.md b/spec/fixtures/test_grammars/precedence_on_single_child_positive/readme.md
new file mode 100644
index 00000000..3bb78e41
--- /dev/null
+++ b/spec/fixtures/test_grammars/precedence_on_single_child_positive/readme.md
@@ -0,0 +1 @@
+This grammar resolves the conflict shown in the `precedence_on_single_child_missing` grammar by giving `function_call` a positive precedence. This causes shifting the `{` token as part of `function_call` to be preferred over reducing the `bar` variable to an expression.
\ No newline at end of file
diff --git a/spec/fixtures/test_grammars/precedence_on_subsequence/corpus.txt b/spec/fixtures/test_grammars/precedence_on_subsequence/corpus.txt
new file mode 100644
index 00000000..1b3666f6
--- /dev/null
+++ b/spec/fixtures/test_grammars/precedence_on_subsequence/corpus.txt
@@ -0,0 +1,24 @@
+==========================================
+curly brace blocks with high precedence
+==========================================
+
+a b {}
+
+---
+
+(expression (function_call
+  (identifier)
+  (expression (function_call (identifier) (block)))))
+
+==========================================
+do blocks with low precedence
+==========================================
+
+a b do end
+
+---
+
+(expression (function_call
+  (identifier)
+  (expression (identifier))
+  (do_block)))
diff --git a/spec/fixtures/test_grammars/precedence_on_subsequence/grammar.json b/spec/fixtures/test_grammars/precedence_on_subsequence/grammar.json
new file mode 100644
index 00000000..d05db765
--- /dev/null
+++ b/spec/fixtures/test_grammars/precedence_on_subsequence/grammar.json
@@ -0,0 +1,135 @@
+{
+  "name": "precedence_on_subsequence",
+
+  "extras": [
+    {"type": "PATTERN", "value": "\\s"}
+  ],
+
+  "rules": {
+    "expression": {
+      "type": "PREC_LEFT",
+      "value": 0,
+      "content": {
+        "type": "CHOICE",
+        "members": [
+          {"type": "SYMBOL", "name": "function_call"},
+          {"type": "SYMBOL", "name": "identifier"},
+          {"type": "SYMBOL", "name": "scope_resolution"}
+        ]
+      }
+    },
+
+    "function_call": {
+      "type": "CHOICE",
+      "members": [
+        {
+          "type": "SEQ",
+          "members": [
+            {"type": "SYMBOL", "name": "identifier"},
+            {"type": "SYMBOL", "name": "expression"}
+          ]
+        },
+
+        {
+          "type": "PREC",
+          "value": 1,
+          "content": {
+            "type": "SEQ",
+            "members": [
+              {"type": "SYMBOL", "name": "identifier"},
+              {"type": "SYMBOL", "name": "block"}
+            ]
+          }
+        },
+
+        {
+          "type": "PREC",
+          "value": -1,
+          "content": {
+            "type": "SEQ",
+            "members": [
+              {"type": "SYMBOL", "name": "identifier"},
+              {"type": "SYMBOL", "name": "do_block"}
+            ]
+          }
+        },
+
+        {
+          "type": "SEQ",
+          "members": [
+            {"type": "SYMBOL", "name": "identifier"},
+            {
+              "type": "PREC",
+              "value": 1,
+              "content": {
+                "type": "SEQ",
+                "members": [
+                  {"type": "SYMBOL", "name": "expression"},
+                  {"type": "SYMBOL", "name": "block"}
+                ]
+              }
+            }
+          ]
+        },
+
+        {
+          "type": "SEQ",
+          "members": [
+            {"type": "SYMBOL", "name": "identifier"},
+            {
+              "type": "PREC",
+              "value": -1,
+              "content": {
+                "type": "SEQ",
+                "members": [
+                  {"type": "SYMBOL", "name": "expression"},
+                  {"type": "SYMBOL", "name": "do_block"}
+                ]
+              }
+            }
+          ]
+        }
+      ]
+    },
+
+    "scope_resolution": {
+      "type": "PREC_LEFT",
+      "value": 1,
+      "content": {
+        "type": "CHOICE",
+        "members": [
+          {
+            "type": "SEQ",
+            "members": [
+              {"type": "SYMBOL", "name": "expression"},
+              {"type": "STRING", "value": "::"},
+              {"type": "SYMBOL", "name": "expression"}
+            ]
+          },
+          {
+            "type": "SEQ",
+            "members": [
+              {"type": "STRING", "value": "::"},
+              {"type": "SYMBOL", "name": "expression"},
+            ]
+          }
+        ]
+      }
+    },
+
+    "block": {
+      "type": "STRING",
+      "value": "{}"
+    },
+
+    "do_block": {
+      "type": "STRING",
+      "value": "do end"
+    },
+
+    "identifier": {
+      "type": "PATTERN",
+      "value": "[a-zA-Z]+"
+    }
+  }
+}
\ No newline at end of file
diff --git a/spec/fixtures/test_grammars/readme.md b/spec/fixtures/test_grammars/readme.md
new file mode 100644
index 00000000..a8f0449d
--- /dev/null
+++ b/spec/fixtures/test_grammars/readme.md
@@ -0,0 +1,3 @@
+These small grammars demonstrate specific features or test for certain specific regressions.
+
+For some of them, compilation is expected to fail with a given error message. For others, the resulting parser is expected to produce certain trees.
\ No newline at end of file
diff --git a/spec/fixtures/test_grammars/readme_grammar/corpus.txt b/spec/fixtures/test_grammars/readme_grammar/corpus.txt
new file mode 100644
index 00000000..df339f20
--- /dev/null
+++ b/spec/fixtures/test_grammars/readme_grammar/corpus.txt
@@ -0,0 +1,13 @@
+==================================
+the readme example
+==================================
+
+a + b * c
+
+---
+
+(expression (sum
+  (expression (variable))
+  (expression (product
+     (expression (variable))
+     (expression (variable))))))
\ No newline at end of file
diff --git a/spec/fixtures/test_grammars/readme_grammar/grammar.json b/spec/fixtures/test_grammars/readme_grammar/grammar.json
new file mode 100644
index 00000000..fd496068
--- /dev/null
+++ b/spec/fixtures/test_grammars/readme_grammar/grammar.json
@@ -0,0 +1,67 @@
+{
+  "name": "readme_grammar",
+
+  // Things that can appear anywhere in the language, like comments
+  // and whitespace, are expressed as 'extras'.
+  "extras": [
+    {"type": "PATTERN", "value": "\\s"},
+    {"type": "SYMBOL", "name": "comment"}
+  ],
+
+  "rules": {
+
+    // The first rule listed in the grammar becomes the 'start rule'.
+    "expression": {
+      "type": "CHOICE",
+      "members": [
+        {"type": "SYMBOL", "name": "sum"},
+        {"type": "SYMBOL", "name": "product"},
+        {"type": "SYMBOL", "name": "number"},
+        {"type": "SYMBOL", "name": "variable"},
+        {
+          "type": "SEQ",
+          "members": [
+            {"type": "STRING", "value": "("},
+            {"type": "SYMBOL", "name": "expression"},
+            {"type": "STRING", "value": ")"}
+          ]
+        }
+      ]
+    },
+
+    // Tokens like '+' and '*' are described directly within the
+    // grammar's rules, as opposed to in a seperate lexer description.
+    "sum": {
+      "type": "PREC_LEFT",
+      "value": 1,
+      "content": {
+        "type": "SEQ",
+        "members": [
+          {"type": "SYMBOL", "name": "expression"},
+          {"type": "STRING", "value": "+"},
+          {"type": "SYMBOL", "name": "expression"}
+        ]
+      }
+    },
+
+    // Ambiguities can be resolved at compile time by assigning precedence
+    // values to rule subtrees.
+    "product": {
+      "type": "PREC_LEFT",
+      "value": 2,
+      "content": {
+        "type": "SEQ",
+        "members": [
+          {"type": "SYMBOL", "name": "expression"},
+          {"type": "STRING", "value": "*"},
+          {"type": "SYMBOL", "name": "expression"}
+        ]
+      }
+    },
+
+    // Tokens can be specified using ECMAScript regexps.
+    "number": {"type": "PATTERN", "value": "\\d+"},
+    "comment": {"type": "PATTERN", "value": "#.*"},
+    "variable": {"type": "PATTERN", "value": "[a-zA-Z]\\w*"}
+  }
+}
\ No newline at end of file
diff --git a/spec/fixtures/test_grammars/start_rule_is_blank/corpus.txt b/spec/fixtures/test_grammars/start_rule_is_blank/corpus.txt
new file mode 100644
index 00000000..2b028562
--- /dev/null
+++ b/spec/fixtures/test_grammars/start_rule_is_blank/corpus.txt
@@ -0,0 +1,7 @@
+========================
+the empty string
+=======================
+
+---
+
+(first_rule)
\ No newline at end of file
diff --git a/spec/fixtures/test_grammars/start_rule_is_blank/grammar.json b/spec/fixtures/test_grammars/start_rule_is_blank/grammar.json
new file mode 100644
index 00000000..94b6c6c4
--- /dev/null
+++ b/spec/fixtures/test_grammars/start_rule_is_blank/grammar.json
@@ -0,0 +1,6 @@
+{
+  "name": "start_rule_is_blank",
+  "rules": {
+    "first_rule": {"type": "BLANK"}
+  }
+}
\ No newline at end of file
diff --git a/spec/fixtures/test_grammars/start_rule_is_token/corpus.txt b/spec/fixtures/test_grammars/start_rule_is_token/corpus.txt
new file mode 100644
index 00000000..aaa4e20b
--- /dev/null
+++ b/spec/fixtures/test_grammars/start_rule_is_token/corpus.txt
@@ -0,0 +1,6 @@
+===========================
+the single token
+==========================
+the-value
+---
+(first_rule)
diff --git a/spec/fixtures/test_grammars/start_rule_is_token/grammar.json b/spec/fixtures/test_grammars/start_rule_is_token/grammar.json
new file mode 100644
index 00000000..9b60c0d4
--- /dev/null
+++ b/spec/fixtures/test_grammars/start_rule_is_token/grammar.json
@@ -0,0 +1,6 @@
+{
+  "name": "start_rule_is_token",
+  "rules": {
+    "first_rule": {"type": "STRING", "value": "the-value"}
+  }
+}
\ No newline at end of file
diff --git a/spec/helpers/file_helpers.cc b/spec/helpers/file_helpers.cc
new file mode 100644
index 00000000..3c08bec2
--- /dev/null
+++ b/spec/helpers/file_helpers.cc
@@ -0,0 +1,61 @@
+#include "helpers/file_helpers.h"
+#include <sys/stat.h>
+#include <errno.h>
+#include <fstream>
+#include <dirent.h>
+
+using std::string;
+using std::ifstream;
+using std::istreambuf_iterator;
+using std::ofstream;
+using std::vector;
+
+bool file_exists(const string &path) {
+  struct stat file_stat;
+  return stat(path.c_str(), &file_stat) == 0;
+}
+
+int get_modified_time(const string &path) {
+  struct stat file_stat;
+  if (stat(path.c_str(), &file_stat) != 0) {
+    if (errno != ENOENT)
+      fprintf(stderr, "Error in stat() for path: %s\n", + path.c_str());
+    return 0;
+  }
+  return file_stat.st_mtime;
+}
+
+string read_file(const string &path) {
+  ifstream file(path);
+  istreambuf_iterator<char> file_iterator(file), end_iterator;
+  string content(file_iterator, end_iterator);
+  file.close();
+  return content;
+}
+
+void write_file(const string &path, const string &content) {
+  ofstream file(path);
+  file << content;
+  file.close();
+}
+
+vector<string> list_directory(const string &path) {
+  vector<string> result;
+
+  DIR *dir = opendir(path.c_str());
+  if (!dir) {
+    printf("\nTest error - no such directory '%s'", path.c_str());
+    return result;
+  }
+
+  struct dirent *dir_entry;
+  while ((dir_entry = readdir(dir))) {
+    string name(dir_entry->d_name);
+    if (name != "." && name != "..") {
+      result.push_back(name);
+    }
+  }
+
+  closedir(dir);
+  return result;
+}
\ No newline at end of file
diff --git a/spec/helpers/file_helpers.h b/spec/helpers/file_helpers.h
new file mode 100644
index 00000000..c3d798ea
--- /dev/null
+++ b/spec/helpers/file_helpers.h
@@ -0,0 +1,14 @@
+#ifndef HELPERS_FILE_HELPERS_H_
+#define HELPERS_FILE_HELPERS_H_
+
+#include <string>
+#include <vector>
+#include <sys/stat.h>
+
+bool file_exists(const std::string &path);
+int get_modified_time(const std::string &path);
+std::string read_file(const std::string &path);
+void write_file(const std::string &path, const std::string &content);
+std::vector<std::string> list_directory(const std::string &path);
+
+#endif  // HELPERS_FILE_HELPERS_H_
diff --git a/spec/helpers/load_language.cc b/spec/helpers/load_language.cc
index c59eca95..71829c5d 100644
--- a/spec/helpers/load_language.cc
+++ b/spec/helpers/load_language.cc
@@ -1,12 +1,12 @@
 #include "spec_helper.h"
 #include "helpers/load_language.h"
+#include "helpers/file_helpers.h"
 #include <unistd.h>
 #include <dlfcn.h>
 #include <sys/types.h>
 #include <sys/wait.h>
 #include <map>
 #include <string>
-#include <sys/stat.h>
 #include <fstream>
 #include <stdlib.h>
 #include "tree_sitter/compiler.h"
@@ -54,25 +54,10 @@ static std::string run_command(const char *cmd, const char *args[]) {
   }
 }
 
-static bool file_exists(const string &path) {
-  struct stat file_stat;
-  return stat(path.c_str(), &file_stat) == 0;
-}
-
-static int get_modified_time(const string &path) {
-  struct stat file_stat;
-  if (stat(path.c_str(), &file_stat) != 0) {
-    if (errno != ENOENT)
-      fprintf(stderr, "Error in stat() for path: %s\n", + path.c_str());
-    return 0;
-  }
-  return file_stat.st_mtime;
-}
-
-const TSLanguage *load_language(const string &source_filename,
-                                const string &lib_filename,
-                                const string &language_name,
-                                string external_scanner_filename = "") {
+static const TSLanguage *load_language(const string &source_filename,
+                                       const string &lib_filename,
+                                       const string &language_name,
+                                       string external_scanner_filename = "") {
   string language_function_name = "tree_sitter_" + language_name;
   string header_dir = getenv("PWD") + string("/include");
   int source_mtime = get_modified_time(source_filename);
@@ -132,9 +117,9 @@ const TSLanguage *load_language(const string &source_filename,
   return reinterpret_cast<TSLanguage *(*)()>(language_function)();
 }
 
-const TSLanguage *load_compile_result(const string &name,
-                                      const TSCompileResult &compile_result,
-                                      string external_scanner_path) {
+const TSLanguage *load_test_language(const string &name,
+                                     const TSCompileResult &compile_result,
+                                     string external_scanner_path) {
   if (compile_result.error_type != TSCompileErrorTypeNone) {
     Assert::Failure(string("Compilation failed ") + compile_result.error_message);
     return nullptr;
@@ -155,7 +140,7 @@ const TSLanguage *load_compile_result(const string &name,
   return language;
 }
 
-const TSLanguage *get_test_language(const string &language_name) {
+const TSLanguage *load_real_language(const string &language_name) {
   if (loaded_languages[language_name])
     return loaded_languages[language_name];
 
@@ -182,20 +167,14 @@ const TSLanguage *get_test_language(const string &language_name) {
   if (parser_mtime < grammar_mtime || parser_mtime < libcompiler_mtime) {
     printf("\n" "Regenerating the %s parser...\n", language_name.c_str());
 
-    ifstream grammar_file(grammar_filename);
-    istreambuf_iterator<char> grammar_file_iterator(grammar_file), end_iterator;
-    string grammar_json(grammar_file_iterator, end_iterator);
-    grammar_file.close();
-
+    string grammar_json = read_file(grammar_filename);
     TSCompileResult result = ts_compile_grammar(grammar_json.c_str());
     if (result.error_type != TSCompileErrorTypeNone) {
       fprintf(stderr, "Failed to compile %s grammar: %s\n", language_name.c_str(), result.error_message);
       return nullptr;
     }
 
-    ofstream parser_file(parser_filename);
-    parser_file << result.code;
-    parser_file.close();
+    write_file(parser_filename, result.code);
   }
 
   mkdir("out/tmp", 0777);
diff --git a/spec/helpers/load_language.h b/spec/helpers/load_language.h
index 41d8b739..c34a33ca 100644
--- a/spec/helpers/load_language.h
+++ b/spec/helpers/load_language.h
@@ -5,8 +5,10 @@
 #include "tree_sitter/runtime.h"
 #include <string>
 
-const TSLanguage *load_compile_result(const std::string &, const TSCompileResult &,
-                                      std::string external_scanner_path = "");
-const TSLanguage *get_test_language(const std::string &language_name);
+const TSLanguage *load_real_language(const std::string &name);
+
+const TSLanguage *load_test_language(const std::string &name,
+                                     const TSCompileResult &compile_result,
+                                     std::string external_scanner_path = "");
 
 #endif  // HELPERS_LOAD_LANGUAGE_H_
diff --git a/spec/helpers/read_test_entries.cc b/spec/helpers/read_test_entries.cc
index 970b7c57..e743253f 100644
--- a/spec/helpers/read_test_entries.cc
+++ b/spec/helpers/read_test_entries.cc
@@ -1,20 +1,18 @@
 #include "helpers/read_test_entries.h"
+#include <assert.h>
 #include <string>
-#include <fstream>
-#include <streambuf>
-#include <dirent.h>
-
 #include <regex>
+#include "helpers/file_helpers.h"
+
 using std::regex;
 using std::regex_search;
 using std::regex_replace;
-using std::smatch;
 using std::regex_constants::extended;
-
+using std::smatch;
 using std::string;
 using std::vector;
-using std::ifstream;
-using std::istreambuf_iterator;
+
+string fixtures_dir = "spec/fixtures/";
 
 static string trim_output(const string &input) {
   string result(input);
@@ -27,7 +25,7 @@ static string trim_output(const string &input) {
 
 static vector<TestEntry> parse_test_entries(string content) {
   regex header_pattern("===+\n"  "([^=]+)\n"  "===+\n", extended);
-  regex separator_pattern("---+\n", extended);
+  regex separator_pattern("---+\r?\n", extended);
   vector<string> descriptions;
   vector<string> bodies;
 
@@ -55,51 +53,42 @@ static vector<TestEntry> parse_test_entries(string content) {
         body.substr(0, matches.position() - 1),
         trim_output(body.substr(matches.position() + matches[0].length()))
       });
+    } else {
+      puts(("Invalid corpus entry with description: " + descriptions[i]).c_str());
+      abort();
     }
   }
 
   return result;
 }
 
-static vector<string> list_directory(string dir_name) {
-  vector<string> result;
-
-  DIR *dir = opendir(dir_name.c_str());
-  if (!dir) {
-    printf("\nTest error - no such directory '%s'", dir_name.c_str());
-    return result;
-  }
-
-  struct dirent *dir_entry;
-  while ((dir_entry = readdir(dir))) {
-    string name(dir_entry->d_name);
-    if (name != "." && name != "..")
-      result.push_back(dir_name + "/" + name);
-  }
-
-  closedir(dir);
-  return result;
-}
-
-static string read_file(string filename) {
-  ifstream file(filename);
-  string result((istreambuf_iterator<char>(file)), istreambuf_iterator<char>());
-  return result;
-}
-
-vector<TestEntry> read_corpus_entries(string language_name) {
+vector<TestEntry> read_real_language_corpus(string language_name) {
   vector<TestEntry> result;
 
-  string fixtures_dir = "spec/fixtures/";
-
   string test_directory = fixtures_dir + "grammars/" + language_name + "/grammar_test";
-  for (string &test_filename : list_directory(test_directory))
-    for (TestEntry &entry : parse_test_entries(read_file(test_filename)))
+  for (string &test_filename : list_directory(test_directory)) {
+    for (TestEntry &entry : parse_test_entries(read_file(test_directory + "/" + test_filename))) {
       result.push_back(entry);
+    }
+  }
 
   string error_test_filename = fixtures_dir + "/error_corpus/" + language_name + "_errors.txt";
-  for (TestEntry &entry : parse_test_entries(read_file(error_test_filename)))
+  for (TestEntry &entry : parse_test_entries(read_file(error_test_filename))) {
     result.push_back(entry);
+  }
 
   return result;
 }
+
+vector<TestEntry> read_test_language_corpus(string language_name) {
+  vector<TestEntry> result;
+
+  string test_directory = fixtures_dir + "test_grammars/" + language_name;
+  for (string &test_filename : list_directory(test_directory)) {
+    for (TestEntry &entry : parse_test_entries(read_file(test_directory + "/" + test_filename))) {
+      result.push_back(entry);
+    }
+  }
+
+  return result;
+}
\ No newline at end of file
diff --git a/spec/helpers/read_test_entries.h b/spec/helpers/read_test_entries.h
index 69f949fc..3de397f1 100644
--- a/spec/helpers/read_test_entries.h
+++ b/spec/helpers/read_test_entries.h
@@ -10,6 +10,7 @@ struct TestEntry {
 	std::string tree_string;
 };
 
-std::vector<TestEntry> read_corpus_entries(std::string directory);
+std::vector<TestEntry> read_real_language_corpus(std::string name);
+std::vector<TestEntry> read_test_language_corpus(std::string name);
 
 #endif
diff --git a/spec/integration/compile_grammar_spec.cc b/spec/integration/compile_grammar_spec.cc
deleted file mode 100644
index ed2109c2..00000000
--- a/spec/integration/compile_grammar_spec.cc
+++ /dev/null
@@ -1,847 +0,0 @@
-#include "spec_helper.h"
-#include "runtime/alloc.h"
-#include "helpers/load_language.h"
-#include "helpers/stderr_logger.h"
-#include "helpers/dedent.h"
-#include "compiler/util/string_helpers.h"
-#include <map>
-
-static string fill_template(string input, map<string, string> parameters) {
-  string result = input;
-  for (const auto &pair : parameters) {
-    util::str_replace(&result, "{{" + pair.first + "}}", pair.second);
-  }
-  return result;
-}
-
-START_TEST
-
-describe("compile_grammar", []() {
-  TSDocument *document;
-
-  before_each([&]() {
-    document = ts_document_new();
-  });
-
-  after_each([&]() {
-    ts_document_free(document);
-  });
-
-  auto assert_root_node = [&](const string &expected_string) {
-    TSNode root_node = ts_document_root_node(document);
-    char *node_string = ts_node_string(root_node, document);
-    AssertThat(node_string, Equals(expected_string));
-    ts_free(node_string);
-  };
-
-  describe("conflicts", [&]() {
-    it("can resolve shift/reduce conflicts using associativities", [&]() {
-      string grammar_template = R"JSON({
-        "name": "associativity_example",
-
-        "rules": {
-          "expression": {
-            "type": "CHOICE",
-            "members": [
-              {"type": "SYMBOL", "name": "math_operation"},
-              {"type": "SYMBOL", "name": "identifier"}
-            ]
-          },
-
-          "math_operation": {
-            "type": "{{math_operation_prec_type}}",
-            "value": 0,
-            "content": {
-              "type": "SEQ",
-              "members": [
-                {"type": "SYMBOL", "name": "expression"},
-                {"type": "STRING", "value": "+"},
-                {"type": "SYMBOL", "name": "expression"}
-              ]
-            }
-          },
-
-          "identifier": {
-            "type": "PATTERN",
-            "value": "[a-zA-Z]+"
-          }
-        }
-      })JSON";
-
-      // Ambiguity, which '+' applies first?
-      ts_document_set_input_string(document, "x+y+z");
-
-      TSCompileResult result = ts_compile_grammar(fill_template(grammar_template, {
-        {"math_operation_prec_type", "PREC"}
-      }).c_str());
-
-      AssertThat(result.error_message, Equals(dedent(R"MESSAGE(
-        Unresolved conflict for symbol sequence:
-
-          expression  '+'  expression  •  '+'  …
-
-        Possible interpretations:
-
-          1:  (math_operation  expression  '+'  expression)  •  '+'  …
-          2:  expression  '+'  (math_operation  expression  •  '+'  expression)
-
-        Possible resolutions:
-
-          1:  Specify a left or right associativity in `math_operation`
-          2:  Add a conflict for these rules: `math_operation`
-      )MESSAGE")));
-
-      result = ts_compile_grammar(fill_template(grammar_template, {
-        {"math_operation_prec_type", "PREC_LEFT"}
-      }).c_str());
-
-      ts_document_set_language(document, load_compile_result("associativity_example", result));
-      ts_document_parse(document);
-      assert_root_node("(expression (math_operation "
-        "(expression (math_operation (expression (identifier)) (expression (identifier)))) "
-        "(expression (identifier))))");
-
-      result = ts_compile_grammar(fill_template(grammar_template, {
-        {"math_operation_prec_type", "PREC_RIGHT"}
-      }).c_str());
-
-      ts_document_set_language(document, load_compile_result("associativity_example", result));
-      ts_document_parse(document);
-      assert_root_node("(expression (math_operation "
-        "(expression (identifier)) "
-        "(expression (math_operation (expression (identifier)) (expression (identifier))))))");
-    });
-
-    it("can resolve shift/reduce conflicts involving single-child rules using precedence", [&]() {
-      string grammar_template = R"JSON({
-        "name": "associativity_example",
-
-        "extras": [
-          {"type": "PATTERN", "value": "\\s"}
-        ],
-
-        "rules": {
-          "expression": {
-            "type": "CHOICE",
-            "members": [
-              {"type": "SYMBOL", "name": "function_call"},
-              {"type": "SYMBOL", "name": "identifier"}
-            ]
-          },
-
-          "function_call": {
-            "type": "PREC_RIGHT",
-            "value": {{function_call_precedence}},
-            "content": {
-              "type": "CHOICE",
-              "members": [
-                {
-                  "type": "SEQ",
-                  "members": [
-                    {"type": "SYMBOL", "name": "identifier"},
-                    {"type": "SYMBOL", "name": "expression"}
-                  ]
-                },
-                {
-                  "type": "SEQ",
-                  "members": [
-                    {"type": "SYMBOL", "name": "identifier"},
-                    {"type": "SYMBOL", "name": "block"}
-                  ]
-                },
-                {
-                  "type": "SEQ",
-                  "members": [
-                    {"type": "SYMBOL", "name": "identifier"},
-                    {"type": "SYMBOL", "name": "expression"},
-                    {"type": "SYMBOL", "name": "block"}
-                  ]
-                }
-              ]
-            }
-          },
-
-          "block": {
-            "type": "SEQ",
-            "members": [
-              {"type": "STRING", "value": "{"},
-              {"type": "SYMBOL", "name": "expression"},
-              {"type": "STRING", "value": "}"}
-            ]
-          },
-
-          "identifier": {
-            "type": "PATTERN",
-            "value": "[a-zA-Z]+"
-          }
-        }
-      })JSON";
-
-      // Ambiguity: is the trailing block associated with `bar` or `foo`?
-      ts_document_set_input_string(document, "foo bar { baz }");
-
-      TSCompileResult result = ts_compile_grammar(fill_template(grammar_template, {
-        {"function_call_precedence", "0"}
-      }).c_str());
-
-      AssertThat(result.error_message, Equals(dedent(R"MESSAGE(
-        Unresolved conflict for symbol sequence:
-
-          identifier  •  '{'  …
-
-        Possible interpretations:
-
-          1:  (expression  identifier)  •  '{'  …
-          2:  (function_call  identifier  •  block)
-
-        Possible resolutions:
-
-          1:  Specify a higher precedence in `function_call` than in the other rules.
-          2:  Specify a higher precedence in `expression` than in the other rules.
-          3:  Specify a left or right associativity in `expression`
-          4:  Add a conflict for these rules: `expression` `function_call`
-      )MESSAGE")));
-
-      // Giving function calls lower precedence than expressions causes `bar`
-      // to be treated as an expression passed to `foo`, not as a function
-      // that's being called with a block.
-      result = ts_compile_grammar(fill_template(grammar_template, {
-        {"function_call_precedence", "-1"}
-      }).c_str());
-
-      AssertThat(result.error_message, IsNull());
-      ts_document_set_language(document, load_compile_result("associativity_example", result));
-      ts_document_parse(document);
-      assert_root_node("(expression (function_call "
-        "(identifier) "
-        "(expression (identifier)) "
-        "(block (expression (identifier)))))");
-
-      // Giving function calls higher precedence than expressions causes `bar`
-      // to be treated as a function that's being called with a block, not as
-      // an expression passed to `foo`.
-      result = ts_compile_grammar(fill_template(grammar_template, {
-        {"function_call_precedence", "1"}
-      }).c_str());
-
-      AssertThat(result.error_message, IsNull());
-      ts_document_set_language(document, load_compile_result("associativity_example", result));
-      ts_document_set_input_string(document, "foo bar { baz }");
-      ts_document_parse(document);
-      assert_root_node("(expression (function_call "
-        "(identifier) "
-        "(expression (function_call "
-          "(identifier) "
-          "(block (expression (identifier)))))))");
-    });
-
-    it("handles precedence applied to specific rule subsequences (regression)", [&]() {
-      TSCompileResult result = ts_compile_grammar(R"JSON({
-        "name": "precedence_on_subsequence",
-
-        "extras": [
-          {"type": "STRING", "value": " "}
-        ],
-
-        "rules": {
-          "expression": {
-            "type": "PREC_LEFT",
-            "value": 0,
-            "content": {
-              "type": "CHOICE",
-              "members": [
-                {"type": "SYMBOL", "name": "function_call"},
-                {"type": "SYMBOL", "name": "identifier"},
-                {"type": "SYMBOL", "name": "scope_resolution"}
-              ]
-            }
-          },
-
-          "function_call": {
-            "type": "CHOICE",
-            "members": [
-              {
-                "type": "SEQ",
-                "members": [
-                  {"type": "SYMBOL", "name": "identifier"},
-                  {"type": "SYMBOL", "name": "expression"}
-                ]
-              },
-
-              {
-                "type": "PREC",
-                "value": 1,
-                "content": {
-                  "type": "SEQ",
-                  "members": [
-                    {"type": "SYMBOL", "name": "identifier"},
-                    {"type": "SYMBOL", "name": "block"}
-                  ]
-                }
-              },
-
-              {
-                "type": "PREC",
-                "value": -1,
-                "content": {
-                  "type": "SEQ",
-                  "members": [
-                    {"type": "SYMBOL", "name": "identifier"},
-                    {"type": "SYMBOL", "name": "do_block"}
-                  ]
-                }
-              },
-
-              {
-                "type": "SEQ",
-                "members": [
-                  {"type": "SYMBOL", "name": "identifier"},
-                  {
-                    "type": "PREC",
-                    "value": 1,
-                    "content": {
-                      "type": "SEQ",
-                      "members": [
-                        {"type": "SYMBOL", "name": "expression"},
-                        {"type": "SYMBOL", "name": "block"}
-                      ]
-                    }
-                  }
-                ]
-              },
-
-              {
-                "type": "SEQ",
-                "members": [
-                  {"type": "SYMBOL", "name": "identifier"},
-                  {
-                    "type": "PREC",
-                    "value": -1,
-                    "content": {
-                      "type": "SEQ",
-                      "members": [
-                        {"type": "SYMBOL", "name": "expression"},
-                        {"type": "SYMBOL", "name": "do_block"}
-                      ]
-                    }
-                  }
-                ]
-              }
-            ]
-          },
-
-          "scope_resolution": {
-            "type": "PREC_LEFT",
-            "value": 1,
-            "content": {
-              "type": "CHOICE",
-              "members": [
-                {
-                  "type": "SEQ",
-                  "members": [
-                    {"type": "SYMBOL", "name": "expression"},
-                    {"type": "STRING", "value": "::"},
-                    {"type": "SYMBOL", "name": "expression"}
-                  ]
-                },
-                {
-                  "type": "SEQ",
-                  "members": [
-                    {"type": "STRING", "value": "::"},
-                    {"type": "SYMBOL", "name": "expression"},
-                  ]
-                }
-              ]
-            }
-          },
-
-          "block": {
-            "type": "STRING",
-            "value": "{}"
-          },
-
-          "do_block": {
-            "type": "STRING",
-            "value": "do end"
-          },
-
-          "identifier": {
-            "type": "PATTERN",
-            "value": "[a-zA-Z]+"
-          }
-        }
-      })JSON");
-
-      auto language = load_compile_result("precedence_on_subsequence", result);
-      ts_document_set_language(document, language);
-
-      ts_document_set_input_string(document, "a b {}");
-      ts_document_parse(document);
-      assert_root_node("(expression (function_call "
-        "(identifier) "
-        "(expression (function_call (identifier) (block)))))");
-
-      ts_document_set_input_string(document, "a b do end");
-      ts_document_parse(document);
-      assert_root_node("(expression (function_call "
-        "(identifier) "
-        "(expression (identifier)) "
-        "(do_block)))");
-    });
-
-    it("does not allow conflicting precedences", [&]() {
-      string grammar_template = R"JSON({
-        "name": "conflicting_precedence_example",
-
-        "rules": {
-          "expression": {
-            "type": "CHOICE",
-            "members": [
-              {"type": "SYMBOL", "name": "sum"},
-              {"type": "SYMBOL", "name": "product"},
-              {"type": "SYMBOL", "name": "other_thing"}
-            ]
-          },
-
-          "sum": {
-            "type": "PREC_LEFT",
-            "value": 0,
-            "content": {
-              "type": "SEQ",
-              "members": [
-                {"type": "SYMBOL", "name": "expression"},
-                {"type": "STRING", "value": "+"},
-                {"type": "SYMBOL", "name": "expression"}
-              ]
-            }
-          },
-
-          "product": {
-            "type": "PREC_LEFT",
-            "value": 1,
-            "content": {
-              "type": "SEQ",
-              "members": [
-                {"type": "SYMBOL", "name": "expression"},
-                {"type": "STRING", "value": "*"},
-                {"type": "SYMBOL", "name": "expression"}
-              ]
-            }
-          },
-
-          "other_thing": {
-            "type": "PREC_LEFT",
-            "value": -1,
-            "content": {
-              "type": "SEQ",
-              "members": [
-                {"type": "SYMBOL", "name": "expression"},
-                {"type": "STRING", "value": "*"},
-                {"type": "STRING", "value": "*"}
-              ]
-            }
-          },
-
-          "identifier": {
-            "type": "PATTERN",
-            "value": "[a-zA-Z]+"
-          }
-        }
-      })JSON";
-
-      TSCompileResult result = ts_compile_grammar(fill_template(grammar_template, {
-      }).c_str());
-
-      AssertThat(result.error_message, Equals(dedent(R"MESSAGE(
-        Unresolved conflict for symbol sequence:
-
-          expression  '+'  expression  •  '*'  …
-
-        Possible interpretations:
-
-          1:  (sum  expression  '+'  expression)  •  '*'  …
-          2:  expression  '+'  (product  expression  •  '*'  expression)
-          3:  expression  '+'  (other_thing  expression  •  '*'  '*')
-
-        Possible resolutions:
-
-          1:  Specify a higher precedence in `product` and `other_thing` than in the other rules.
-          2:  Specify a higher precedence in `sum` than in the other rules.
-          3:  Add a conflict for these rules: `sum` `product` `other_thing`
-      )MESSAGE")));
-    });
-  });
-
-  describe("when the grammar contains rules that match the empty string", [&]() {
-    it("reports an error", [&]() {
-      TSCompileResult result = ts_compile_grammar(R"JSON(
-        {
-          "name": "empty_rules",
-
-          "rules": {
-            "rule_1": {"type": "SYMBOL", "name": "rule_2"},
-
-            "rule_2": {
-              "type": "CHOICE",
-              "members": [
-                {"type": "SYMBOL", "name": "rule_1"},
-                {"type": "BLANK"}
-              ]
-            }
-          }
-        }
-      )JSON");
-
-      AssertThat(result.error_message, Equals(dedent(R"MESSAGE(
-        The rule `rule_2` matches the empty string.
-        Tree-sitter currently does not support syntactic rules that match the empty string.
-      )MESSAGE")));
-    });
-  });
-
-  describe("external scanners", [&]() {
-    it("can tokenize using arbitrary user-defined scanner functions", [&]() {
-      string grammar = R"JSON({
-        "name": "external_scanner_example",
-
-        "externals": [
-          "_percent_string",
-          "_percent_string_start",
-          "_percent_string_end"
-        ],
-
-        "extras": [
-          {"type": "PATTERN", "value": "\\s"}
-        ],
-
-        "rules": {
-          "expression": {
-            "type": "CHOICE",
-            "members": [
-              {"type": "SYMBOL", "name": "string"},
-              {"type": "SYMBOL", "name": "sum"},
-              {"type": "SYMBOL", "name": "identifier"}
-            ]
-          },
-
-          "sum": {
-            "type": "PREC_LEFT",
-            "value": 0,
-            "content": {
-              "type": "SEQ",
-              "members": [
-                {"type": "SYMBOL", "name": "expression"},
-                {"type": "STRING", "value": "+"},
-                {"type": "SYMBOL", "name": "expression"}
-              ]
-            }
-          },
-
-          "string": {
-            "type": "CHOICE",
-            "members": [
-              {"type": "SYMBOL", "name": "_percent_string"},
-              {
-                "type": "SEQ",
-                "members": [
-                  {"type": "SYMBOL", "name": "_percent_string_start"},
-                  {"type": "SYMBOL", "name": "expression"},
-                  {"type": "SYMBOL", "name": "_percent_string_end"}
-                ]
-              },
-            ]
-          },
-
-          "identifier": {
-            "type": "PATTERN",
-            "value": "\\a+"
-          }
-        }
-      })JSON";
-
-      TSCompileResult result = ts_compile_grammar(grammar.c_str());
-      AssertThat(result.error_message, IsNull());
-
-      ts_document_set_language(document, load_compile_result(
-        "external_scanner_example",
-        result,
-        "spec/fixtures/external_scanners/percent_strings.c"
-      ));
-
-      ts_document_set_input_string(document, "x + %(sup (external) scanner?)");
-      ts_document_parse(document);
-      assert_root_node("(expression (sum (expression (identifier)) (expression (string))))");
-
-      ts_document_set_input_string(document, "%{sup {} #{x + y} {} scanner?}");
-      ts_document_parse(document);
-      assert_root_node("(expression (string (expression (sum (expression (identifier)) (expression (identifier))))))");
-    });
-
-    it("allows external scanners to refer to tokens that are defined internally", [&]() {
-      string grammar = R"JSON({
-        "name": "shared_external_tokens",
-
-        "externals": [
-          "string",
-          "line_break"
-        ],
-
-        "extras": [
-          {"type": "PATTERN", "value": "\\s"}
-        ],
-
-        "rules": {
-          "statement": {
-            "type": "SEQ",
-            "members": [
-              {"type": "SYMBOL", "name": "_expression"},
-              {"type": "SYMBOL", "name": "_expression"},
-              {"type": "SYMBOL", "name": "line_break"}
-            ]
-          },
-
-          "_expression": {
-            "type": "CHOICE",
-            "members": [
-              {"type": "SYMBOL", "name": "string"},
-              {"type": "SYMBOL", "name": "variable"},
-              {"type": "SYMBOL", "name": "number"}
-            ]
-          },
-
-          "variable": {"type": "PATTERN", "value": "\\a+"},
-          "number": {"type": "PATTERN", "value": "\\d+"},
-          "line_break": {"type": "STRING", "value": "\n"}
-        }
-      })JSON";
-
-      TSCompileResult result = ts_compile_grammar(grammar.c_str());
-      AssertThat(result.error_message, IsNull());
-
-      ts_document_set_language(document, load_compile_result(
-        "shared_external_tokens",
-        result,
-        "spec/fixtures/external_scanners/shared_external_tokens.c"
-      ));
-
-      ts_document_set_input_string(document, "a b\n");
-      ts_document_parse(document);
-      assert_root_node("(statement (variable) (variable) (line_break))");
-
-      ts_document_set_input_string(document, "a \nb\n");
-      ts_document_parse(document);
-      assert_root_node("(statement (variable) (variable) (line_break))");
-
-      ts_document_set_input_string(document, "'hello' 'world'\n");
-      ts_document_parse(document);
-      assert_root_node("(statement (string) (string) (line_break))");
-
-      ts_document_set_input_string(document, "'hello' \n'world'\n");
-      ts_document_parse(document);
-      assert_root_node("(statement (string) (string) (line_break))");
-    });
-
-    it("allows external tokens to be used as extras", [&]() {
-      string grammar = R"JSON({
-        "name": "extra_external_tokens",
-
-        "externals": [
-          "comment"
-        ],
-
-        "extras": [
-          {"type": "PATTERN", "value": "\\s"},
-          {"type": "SYMBOL", "name": "comment"}
-        ],
-
-        "rules": {
-          "assignment": {
-            "type": "SEQ",
-            "members": [
-              {"type": "SYMBOL", "name": "variable"},
-              {"type": "STRING", "value": "="},
-              {"type": "SYMBOL", "name": "variable"}
-            ]
-          },
-
-          "variable": {"type": "PATTERN", "value": "\\a+"}
-        }
-      })JSON";
-
-      TSCompileResult result = ts_compile_grammar(grammar.c_str());
-      AssertThat(result.error_message, IsNull());
-
-      ts_document_set_language(document, load_compile_result(
-        "extra_external_tokens",
-        result,
-        "spec/fixtures/external_scanners/extra_external_tokens.c"
-      ));
-
-      ts_document_set_input_string(document, "x = # a comment\n y");
-      ts_document_parse(document);
-      assert_root_node("(assignment (variable) (comment) (variable))");
-    });
-  });
-
-  describe("when the grammar's start symbol is a token", [&]() {
-    it("parses the token", [&]() {
-      TSCompileResult result = ts_compile_grammar(R"JSON(
-        {
-          "name": "one_token_language",
-          "rules": {
-            "first_rule": {"type": "STRING", "value": "the-value"}
-          }
-        }
-      )JSON");
-
-      ts_document_set_language(document, load_compile_result("one_token_language", result));
-
-      ts_document_set_input_string(document, "the-value");
-      ts_document_parse(document);
-      assert_root_node("(first_rule)");
-    });
-  });
-
-  describe("when the grammar's start symbol is blank", [&]() {
-    it("parses the empty string", [&]() {
-      TSCompileResult result = ts_compile_grammar(R"JSON(
-        {
-          "name": "blank_language",
-          "rules": {
-            "first_rule": {"type": "BLANK"}
-          }
-        }
-      )JSON");
-
-      ts_document_set_language(document, load_compile_result("blank_language", result));
-
-      ts_document_set_input_string(document, "");
-      ts_document_parse(document);
-      assert_root_node("(first_rule)");
-    });
-  });
-
-  describe("when the grammar contains anonymous tokens with escaped characters", [&]() {
-    it("escapes the escaped characters properly in the generated parser", [&]() {
-      TSCompileResult result = ts_compile_grammar(R"JSON(
-        {
-          "name": "escaped_char_language",
-          "rules": {
-            "first_rule": {
-              "type": "CHOICE",
-              "members": [
-                {"type": "STRING", "value": "\n"},
-                {"type": "STRING", "value": "\r"},
-                {"type": "STRING", "value": "'hello'"},
-                {"type": "PATTERN", "value": "\\d+"}
-              ]
-            }
-          }
-        }
-      )JSON");
-
-      ts_document_set_language(document, load_compile_result("escaped_char_language", result));
-
-      ts_document_set_input_string(document, "1234");
-      ts_document_parse(document);
-      assert_root_node("(first_rule)");
-
-      ts_document_set_input_string(document, "\n");
-      ts_document_parse(document);
-      assert_root_node("(first_rule)");
-
-      ts_document_set_input_string(document, "'hello'");
-      ts_document_parse(document);
-      assert_root_node("(first_rule)");
-    });
-  });
-
-  describe("the grammar in the README", [&]() {
-    it("parses the input in the README", [&]() {
-      TSCompileResult result = ts_compile_grammar(R"JSON(
-        {
-          "name": "arithmetic",
-
-          // Things that can appear anywhere in the language, like comments
-          // and whitespace, are expressed as 'extras'.
-          "extras": [
-            {"type": "PATTERN", "value": "\\s"},
-            {"type": "SYMBOL", "name": "comment"}
-          ],
-
-          "rules": {
-
-            // The first rule listed in the grammar becomes the 'start rule'.
-            "expression": {
-              "type": "CHOICE",
-              "members": [
-                {"type": "SYMBOL", "name": "sum"},
-                {"type": "SYMBOL", "name": "product"},
-                {"type": "SYMBOL", "name": "number"},
-                {"type": "SYMBOL", "name": "variable"},
-                {
-                  "type": "SEQ",
-                  "members": [
-                    {"type": "STRING", "value": "("},
-                    {"type": "SYMBOL", "name": "expression"},
-                    {"type": "STRING", "value": ")"}
-                  ]
-                }
-              ]
-            },
-
-            // Tokens like '+' and '*' are described directly within the
-            // grammar's rules, as opposed to in a seperate lexer description.
-            "sum": {
-              "type": "PREC_LEFT",
-              "value": 1,
-              "content": {
-                "type": "SEQ",
-                "members": [
-                  {"type": "SYMBOL", "name": "expression"},
-                  {"type": "STRING", "value": "+"},
-                  {"type": "SYMBOL", "name": "expression"}
-                ]
-              }
-            },
-
-            // Ambiguities can be resolved at compile time by assigning precedence
-            // values to rule subtrees.
-            "product": {
-              "type": "PREC_LEFT",
-              "value": 2,
-              "content": {
-                "type": "SEQ",
-                "members": [
-                  {"type": "SYMBOL", "name": "expression"},
-                  {"type": "STRING", "value": "*"},
-                  {"type": "SYMBOL", "name": "expression"}
-                ]
-              }
-            },
-
-            // Tokens can be specified using ECMAScript regexps.
-            "number": {"type": "PATTERN", "value": "\\d+"},
-            "comment": {"type": "PATTERN", "value": "#.*"},
-            "variable": {"type": "PATTERN", "value": "[a-zA-Z]\\w*"}
-          }
-        }
-      )JSON");
-
-      const TSLanguage *language = load_compile_result("arithmetic", result);
-
-      ts_document_set_language(document, language);
-      ts_document_set_input_string(document, "a + b * c");
-      ts_document_parse(document);
-
-      assert_root_node(
-        "(expression (sum "
-          "(expression (variable)) "
-          "(expression (product "
-             "(expression (variable)) "
-             "(expression (variable))))))");
-    });
-  });
-});
-
-END_TEST
diff --git a/spec/integration/corpus_specs.cc b/spec/integration/corpus_specs.cc
deleted file mode 100644
index c399e8f9..00000000
--- a/spec/integration/corpus_specs.cc
+++ /dev/null
@@ -1,185 +0,0 @@
-#include "spec_helper.h"
-#include "runtime/alloc.h"
-#include "helpers/load_language.h"
-#include "helpers/read_test_entries.h"
-#include "helpers/spy_input.h"
-#include "helpers/stderr_logger.h"
-#include "helpers/point_helpers.h"
-#include "helpers/encoding_helpers.h"
-#include "helpers/record_alloc.h"
-#include "helpers/random_helpers.h"
-#include "helpers/scope_sequence.h"
-#include <set>
-
-static void assert_correct_tree_shape(const TSDocument *document, string tree_string) {
-  TSNode root_node = ts_document_root_node(document);
-  const char *node_string = ts_node_string(root_node, document);
-  string result(node_string);
-  ts_free((void *)node_string);
-  AssertThat(result, Equals(tree_string));
-}
-
-static void assert_consistent_sizes(TSNode node) {
-  size_t child_count = ts_node_child_count(node);
-  size_t start_byte = ts_node_start_byte(node);
-  size_t end_byte = ts_node_end_byte(node);
-  TSPoint start_point = ts_node_start_point(node);
-  TSPoint end_point = ts_node_end_point(node);
-  bool some_child_has_changes = false;
-
-  AssertThat(start_byte, !IsGreaterThan(end_byte));
-  AssertThat(start_point, !IsGreaterThan(end_point));
-
-  size_t last_child_end_byte = start_byte;
-  TSPoint last_child_end_point = start_point;
-
-  for (size_t i = 0; i < child_count; i++) {
-    TSNode child = ts_node_child(node, i);
-    size_t child_start_byte = ts_node_start_byte(child);
-    TSPoint child_start_point = ts_node_start_point(child);
-
-    AssertThat(child_start_byte, !IsLessThan(last_child_end_byte));
-    AssertThat(child_start_point, !IsLessThan(last_child_end_point));
-    assert_consistent_sizes(child);
-    if (ts_node_has_changes(child))
-      some_child_has_changes = true;
-
-    last_child_end_byte = ts_node_end_byte(child);
-    last_child_end_point = ts_node_end_point(child);
-  }
-
-  if (child_count > 0) {
-    AssertThat(end_byte, !IsLessThan(last_child_end_byte));
-    AssertThat(end_point, !IsLessThan(last_child_end_point));
-  }
-
-  if (some_child_has_changes) {
-    AssertThat(ts_node_has_changes(node), IsTrue());
-  }
-}
-
-static void assert_correct_tree_size(TSDocument *document, string content) {
-  TSNode root_node = ts_document_root_node(document);
-  size_t expected_size = content.size();
-
-  // In the JSON grammar, the start rule (`_value`) is hidden, so the node
-  // returned from `ts_document_root_node` (e.g. an `object` node), does not
-  // actually point to the root of the tree. In this weird case, trailing
-  // whitespace is not included in the root node's size.
-  //
-  // TODO: Fix this inconsistency. Maybe disallow the start rule being hidden?
-  if (ts_document_language(document) == get_test_language("json") &&
-      string(ts_node_type(root_node, document)) != "ERROR")
-    expected_size = content.find_last_not_of("\n ") + 1;
-
-  AssertThat(ts_node_end_byte(root_node), Equals(expected_size));
-  assert_consistent_sizes(root_node);
-}
-
-START_TEST
-
-describe("The Corpus", []() {
-  vector<string> test_languages({
-    "javascript",
-    "json",
-    "c",
-    "cpp",
-    "python",
-  });
-
-  for (auto &language_name : test_languages) {
-    describe(("the " + language_name + " language").c_str(), [&]() {
-      TSDocument *document;
-
-      before_each([&]() {
-        record_alloc::start();
-        document = ts_document_new();
-        ts_document_set_language(document, get_test_language(language_name));
-
-        // ts_document_set_logger(document, stderr_logger_new(true));
-        // ts_document_print_debugging_graphs(document, true);
-      });
-
-      after_each([&]() {
-        ts_document_free(document);
-        AssertThat(record_alloc::outstanding_allocation_indices(), IsEmpty());
-      });
-
-      for (auto &entry : read_corpus_entries(language_name)) {
-        SpyInput *input;
-
-        auto it_handles_edit_sequence = [&](string name, std::function<void()> edit_sequence){
-          it(("parses " + entry.description + ": " + name).c_str(), [&]() {
-            input = new SpyInput(entry.input, 3);
-            ts_document_set_input(document, input->input());
-            edit_sequence();
-            assert_correct_tree_shape(document, entry.tree_string);
-            assert_correct_tree_size(document, input->content);
-            delete input;
-          });
-        };
-
-        it_handles_edit_sequence("initial parse", [&]() {
-          ts_document_parse(document);
-        });
-
-        std::set<std::pair<size_t, size_t>> deletions;
-        std::set<std::pair<size_t, string>> insertions;
-
-        for (size_t i = 0; i < 60; i++) {
-          size_t edit_position = random() % utf8_char_count(entry.input);
-          size_t deletion_size = random() % (utf8_char_count(entry.input) - edit_position);
-          string inserted_text = random_words(random() % 4 + 1);
-
-          if (insertions.insert({edit_position, inserted_text}).second) {
-            string description = "\"" + inserted_text + "\" at " + to_string(edit_position);
-
-            it_handles_edit_sequence("repairing an insertion of " + description, [&]() {
-              ts_document_edit(document, input->replace(edit_position, 0, inserted_text));
-              ts_document_parse(document);
-              assert_correct_tree_size(document, input->content);
-
-              ts_document_edit(document, input->undo());
-              assert_correct_tree_size(document, input->content);
-
-              TSRange *ranges;
-              uint32_t range_count;
-              ScopeSequence old_scope_sequence = build_scope_sequence(document, input->content);
-              ts_document_parse_and_get_changed_ranges(document, &ranges, &range_count);
-
-              ScopeSequence new_scope_sequence = build_scope_sequence(document, input->content);
-              verify_changed_ranges(old_scope_sequence, new_scope_sequence,
-                                    input->content, ranges, range_count);
-              ts_free(ranges);
-            });
-          }
-
-          if (deletions.insert({edit_position, deletion_size}).second) {
-            string desription = to_string(edit_position) + "-" + to_string(edit_position + deletion_size);
-
-            it_handles_edit_sequence("repairing a deletion of " + desription, [&]() {
-              ts_document_edit(document, input->replace(edit_position, deletion_size, ""));
-              ts_document_parse(document);
-              assert_correct_tree_size(document, input->content);
-
-              ts_document_edit(document, input->undo());
-              assert_correct_tree_size(document, input->content);
-
-              TSRange *ranges;
-              uint32_t range_count;
-              ScopeSequence old_scope_sequence = build_scope_sequence(document, input->content);
-              ts_document_parse_and_get_changed_ranges(document, &ranges, &range_count);
-
-              ScopeSequence new_scope_sequence = build_scope_sequence(document, input->content);
-              verify_changed_ranges(old_scope_sequence, new_scope_sequence,
-                                    input->content, ranges, range_count);
-              ts_free(ranges);
-            });
-          }
-        }
-      }
-    });
-  }
-});
-
-END_TEST
diff --git a/spec/integration/real_grammars.cc b/spec/integration/real_grammars.cc
new file mode 100644
index 00000000..a7c2137d
--- /dev/null
+++ b/spec/integration/real_grammars.cc
@@ -0,0 +1,181 @@
+#include "spec_helper.h"
+#include "runtime/alloc.h"
+#include "helpers/load_language.h"
+#include "helpers/read_test_entries.h"
+#include "helpers/spy_input.h"
+#include "helpers/stderr_logger.h"
+#include "helpers/point_helpers.h"
+#include "helpers/encoding_helpers.h"
+#include "helpers/record_alloc.h"
+#include "helpers/random_helpers.h"
+#include "helpers/scope_sequence.h"
+#include <set>
+
+static void assert_consistent_sizes(TSNode node) {
+  size_t child_count = ts_node_child_count(node);
+  size_t start_byte = ts_node_start_byte(node);
+  size_t end_byte = ts_node_end_byte(node);
+  TSPoint start_point = ts_node_start_point(node);
+  TSPoint end_point = ts_node_end_point(node);
+  bool some_child_has_changes = false;
+
+  AssertThat(start_byte, !IsGreaterThan(end_byte));
+  AssertThat(start_point, !IsGreaterThan(end_point));
+
+  size_t last_child_end_byte = start_byte;
+  TSPoint last_child_end_point = start_point;
+
+  for (size_t i = 0; i < child_count; i++) {
+    TSNode child = ts_node_child(node, i);
+    size_t child_start_byte = ts_node_start_byte(child);
+    TSPoint child_start_point = ts_node_start_point(child);
+
+    AssertThat(child_start_byte, !IsLessThan(last_child_end_byte));
+    AssertThat(child_start_point, !IsLessThan(last_child_end_point));
+    assert_consistent_sizes(child);
+    if (ts_node_has_changes(child))
+      some_child_has_changes = true;
+
+    last_child_end_byte = ts_node_end_byte(child);
+    last_child_end_point = ts_node_end_point(child);
+  }
+
+  if (child_count > 0) {
+    AssertThat(end_byte, !IsLessThan(last_child_end_byte));
+    AssertThat(end_point, !IsLessThan(last_child_end_point));
+  }
+
+  if (some_child_has_changes) {
+    AssertThat(ts_node_has_changes(node), IsTrue());
+  }
+}
+
+static void assert_correct_tree_size(TSDocument *document, string content) {
+  TSNode root_node = ts_document_root_node(document);
+  size_t expected_size = content.size();
+
+  // In the JSON grammar, the start rule (`_value`) is hidden, so the node
+  // returned from `ts_document_root_node` (e.g. an `object` node), does not
+  // actually point to the root of the tree. In this weird case, trailing
+  // whitespace is not included in the root node's size.
+  //
+  // TODO: Fix this inconsistency. Maybe disallow the start rule being hidden?
+  if (ts_document_language(document) == load_real_language("json") &&
+      string(ts_node_type(root_node, document)) != "ERROR")
+    expected_size = content.find_last_not_of("\n ") + 1;
+
+  AssertThat(ts_node_end_byte(root_node), Equals(expected_size));
+  assert_consistent_sizes(root_node);
+}
+
+START_TEST
+
+vector<string> test_languages({
+  "javascript",
+  "json",
+  "c",
+  "cpp",
+  "python",
+});
+
+for (auto &language_name : test_languages) {
+  describe(("the " + language_name + " language").c_str(), [&]() {
+    TSDocument *document;
+
+    before_each([&]() {
+      record_alloc::start();
+      document = ts_document_new();
+      ts_document_set_language(document, load_real_language(language_name));
+
+      // ts_document_set_logger(document, stderr_logger_new(true));
+      // ts_document_print_debugging_graphs(document, true);
+    });
+
+    after_each([&]() {
+      ts_document_free(document);
+      AssertThat(record_alloc::outstanding_allocation_indices(), IsEmpty());
+    });
+
+    for (auto &entry : read_real_language_corpus(language_name)) {
+      SpyInput *input;
+
+      auto it_handles_edit_sequence = [&](string name, std::function<void()> edit_sequence){
+        it(("parses " + entry.description + ": " + name).c_str(), [&]() {
+          input = new SpyInput(entry.input, 3);
+          ts_document_set_input(document, input->input());
+          edit_sequence();
+
+          TSNode root_node = ts_document_root_node(document);
+          const char *node_string = ts_node_string(root_node, document);
+          string result(node_string);
+          ts_free((void *)node_string);
+          AssertThat(result, Equals(entry.tree_string));
+
+          assert_correct_tree_size(document, input->content);
+          delete input;
+        });
+      };
+
+      it_handles_edit_sequence("initial parse", [&]() {
+        ts_document_parse(document);
+      });
+
+      std::set<std::pair<size_t, size_t>> deletions;
+      std::set<std::pair<size_t, string>> insertions;
+
+      for (size_t i = 0; i < 60; i++) {
+        size_t edit_position = random() % utf8_char_count(entry.input);
+        size_t deletion_size = random() % (utf8_char_count(entry.input) - edit_position);
+        string inserted_text = random_words(random() % 4 + 1);
+
+        if (insertions.insert({edit_position, inserted_text}).second) {
+          string description = "\"" + inserted_text + "\" at " + to_string(edit_position);
+
+          it_handles_edit_sequence("repairing an insertion of " + description, [&]() {
+            ts_document_edit(document, input->replace(edit_position, 0, inserted_text));
+            ts_document_parse(document);
+            assert_correct_tree_size(document, input->content);
+
+            ts_document_edit(document, input->undo());
+            assert_correct_tree_size(document, input->content);
+
+            TSRange *ranges;
+            uint32_t range_count;
+            ScopeSequence old_scope_sequence = build_scope_sequence(document, input->content);
+            ts_document_parse_and_get_changed_ranges(document, &ranges, &range_count);
+
+            ScopeSequence new_scope_sequence = build_scope_sequence(document, input->content);
+            verify_changed_ranges(old_scope_sequence, new_scope_sequence,
+                                  input->content, ranges, range_count);
+            ts_free(ranges);
+          });
+        }
+
+        if (deletions.insert({edit_position, deletion_size}).second) {
+          string desription = to_string(edit_position) + "-" + to_string(edit_position + deletion_size);
+
+          it_handles_edit_sequence("repairing a deletion of " + desription, [&]() {
+            ts_document_edit(document, input->replace(edit_position, deletion_size, ""));
+            ts_document_parse(document);
+            assert_correct_tree_size(document, input->content);
+
+            ts_document_edit(document, input->undo());
+            assert_correct_tree_size(document, input->content);
+
+            TSRange *ranges;
+            uint32_t range_count;
+            ScopeSequence old_scope_sequence = build_scope_sequence(document, input->content);
+            ts_document_parse_and_get_changed_ranges(document, &ranges, &range_count);
+
+            ScopeSequence new_scope_sequence = build_scope_sequence(document, input->content);
+            verify_changed_ranges(old_scope_sequence, new_scope_sequence,
+                                  input->content, ranges, range_count);
+            ts_free(ranges);
+          });
+        }
+      }
+    }
+  });
+}
+
+END_TEST
diff --git a/spec/integration/test_grammars.cc b/spec/integration/test_grammars.cc
new file mode 100644
index 00000000..128dd6cc
--- /dev/null
+++ b/spec/integration/test_grammars.cc
@@ -0,0 +1,78 @@
+#include "spec_helper.h"
+#include "helpers/read_test_entries.h"
+#include "helpers/load_language.h"
+#include "helpers/stderr_logger.h"
+#include "helpers/file_helpers.h"
+#include "runtime/alloc.h"
+
+START_TEST
+
+string grammars_dir_path = "spec/fixtures/test_grammars";
+vector<string> test_languages = list_directory(grammars_dir_path);
+
+for (auto &language_name : test_languages) {
+  if (language_name == "readme.md") continue;
+
+  describe(("test language: " + language_name).c_str(), [&]() {
+    string directory_path = grammars_dir_path + "/" + language_name;
+    string grammar_path = directory_path + "/grammar.json";
+    string external_scanner_path = directory_path + "/scanner.c";
+    string expected_error_path = directory_path + "/expected_error.txt";
+    string corpus_path = directory_path + "/corpus.txt";
+
+    if (!file_exists(external_scanner_path)) {
+      external_scanner_path = "";
+    }
+
+    string grammar_json = read_file(grammar_path);
+    TSCompileResult compile_result = ts_compile_grammar(grammar_json.c_str());
+
+    if (file_exists(expected_error_path)) {
+      it("fails with the correct error message", [&]() {
+        string expected_error = read_file(expected_error_path);
+        AssertThat((void *)compile_result.error_message, !IsNull());
+        AssertThat(compile_result.error_message, Equals(expected_error));
+      });
+
+      return;
+    } else {
+      TSDocument *document = nullptr;
+      const TSLanguage *language = nullptr;
+
+      before_each([&]() {
+        if (!language) {
+          language = load_test_language(
+            language_name,
+            compile_result,
+            external_scanner_path
+          );
+        }
+
+        document = ts_document_new();
+        ts_document_set_language(document, language);
+
+        // ts_document_set_logger(document, stderr_logger_new(true));
+        // ts_document_print_debugging_graphs(document, true);
+      });
+
+      after_each([&]() {
+        if (document) ts_document_free(document);
+      });
+
+      for (auto &entry : read_test_language_corpus(language_name)) {
+        it(("parses " + entry.description).c_str(), [&]() {
+          ts_document_set_input_string_with_length(document, entry.input.c_str(), entry.input.size());
+          ts_document_parse(document);
+
+          TSNode root_node = ts_document_root_node(document);
+          const char *node_string = ts_node_string(root_node, document);
+          string result(node_string);
+          ts_free((void *)node_string);
+          AssertThat(result, Equals(entry.tree_string));
+        });
+      }
+    }
+  });
+}
+
+END_TEST
\ No newline at end of file
diff --git a/spec/runtime/document_spec.cc b/spec/runtime/document_spec.cc
index f80419dc..1863e210 100644
--- a/spec/runtime/document_spec.cc
+++ b/spec/runtime/document_spec.cc
@@ -43,7 +43,7 @@ describe("Document", [&]() {
     before_each([&]() {
       spy_input = new SpyInput("{\"key\": [null, 2]}", 3);
 
-      ts_document_set_language(document, get_test_language("json"));
+      ts_document_set_language(document, load_real_language("json"));
       ts_document_set_input_string(document, "{\"key\": [1, 2]}");
       ts_document_parse(document);
 
@@ -152,7 +152,7 @@ describe("Document", [&]() {
     });
 
     it("uses the given language for future parses", [&]() {
-      ts_document_set_language(document, get_test_language("json"));
+      ts_document_set_language(document, load_real_language("json"));
       ts_document_parse(document);
 
       root = ts_document_root_node(document);
@@ -162,10 +162,10 @@ describe("Document", [&]() {
     });
 
     it("clears out any previous tree", [&]() {
-      ts_document_set_language(document, get_test_language("json"));
+      ts_document_set_language(document, load_real_language("json"));
       ts_document_parse(document);
 
-      ts_document_set_language(document, get_test_language("javascript"));
+      ts_document_set_language(document, load_real_language("javascript"));
       AssertThat(ts_document_root_node(document).data, Equals<void *>(nullptr));
 
       ts_document_parse(document);
@@ -177,7 +177,7 @@ describe("Document", [&]() {
     });
 
     it("does not allow setting a language with a different version number", [&]() {
-      TSLanguage language = *get_test_language("json");
+      TSLanguage language = *load_real_language("json");
       AssertThat(ts_language_version(&language), Equals<uint32_t>(TREE_SITTER_LANGUAGE_VERSION));
 
       language.version++;
@@ -193,7 +193,7 @@ describe("Document", [&]() {
 
     before_each([&]() {
       logger = new SpyLogger();
-      ts_document_set_language(document, get_test_language("json"));
+      ts_document_set_language(document, load_real_language("json"));
       ts_document_set_input_string(document, "[1, 2]");
     });
 
@@ -235,7 +235,7 @@ describe("Document", [&]() {
     SpyInput *input;
 
     before_each([&]() {
-      ts_document_set_language(document, get_test_language("javascript"));
+      ts_document_set_language(document, load_real_language("javascript"));
       input = new SpyInput("{a: null};", 3);
       ts_document_set_input(document, input->input());
       ts_document_parse(document);
diff --git a/spec/runtime/node_spec.cc b/spec/runtime/node_spec.cc
index 085e4d31..f01a862f 100644
--- a/spec/runtime/node_spec.cc
+++ b/spec/runtime/node_spec.cc
@@ -40,7 +40,7 @@ describe("Node", []() {
     record_alloc::start();
 
     document = ts_document_new();
-    ts_document_set_language(document, get_test_language("json"));
+    ts_document_set_language(document, load_real_language("json"));
     ts_document_set_input_string(document, input_string.c_str());
     ts_document_parse(document);
 
diff --git a/spec/runtime/parser_spec.cc b/spec/runtime/parser_spec.cc
index 0b4c0a3a..c1c3a547 100644
--- a/spec/runtime/parser_spec.cc
+++ b/spec/runtime/parser_spec.cc
@@ -83,7 +83,7 @@ describe("Parser", [&]() {
   describe("handling errors", [&]() {
     describe("when there is an invalid substring right before a valid token", [&]() {
       it("computes the error node's size and position correctly", [&]() {
-        ts_document_set_language(document, get_test_language("json"));
+        ts_document_set_language(document, load_real_language("json"));
         set_text("  [123,  @@@@@,   true]");
 
         assert_root_node(
@@ -108,7 +108,7 @@ describe("Parser", [&]() {
 
     describe("when there is an unexpected string in the middle of a token", [&]() {
       it("computes the error node's size and position correctly", [&]() {
-        ts_document_set_language(document, get_test_language("json"));
+        ts_document_set_language(document, load_real_language("json"));
         set_text("  [123, faaaaalse, true]");
 
         assert_root_node(
@@ -134,7 +134,7 @@ describe("Parser", [&]() {
 
     describe("when there is one unexpected token between two valid tokens", [&]() {
       it("computes the error node's size and position correctly", [&]() {
-        ts_document_set_language(document, get_test_language("json"));
+        ts_document_set_language(document, load_real_language("json"));
         set_text("  [123, true false, true]");
 
         assert_root_node(
@@ -153,7 +153,7 @@ describe("Parser", [&]() {
 
     describe("when there is an unexpected string at the end of a token", [&]() {
       it("computes the error's size and position correctly", [&]() {
-        ts_document_set_language(document, get_test_language("json"));
+        ts_document_set_language(document, load_real_language("json"));
         set_text("  [123, \"hi\n, true]");
 
         assert_root_node(
@@ -163,7 +163,7 @@ describe("Parser", [&]() {
 
     describe("when there is an unterminated error", [&]() {
       it("maintains a consistent tree", [&]() {
-        ts_document_set_language(document, get_test_language("javascript"));
+        ts_document_set_language(document, load_real_language("javascript"));
         set_text("a; /* b");
         assert_root_node(
           "(ERROR (program (expression_statement (identifier))) (UNEXPECTED EOF))");
@@ -172,7 +172,7 @@ describe("Parser", [&]() {
 
     describe("when there are extra tokens at the end of the viable prefix", [&]() {
       it("does not include them in the error node", [&]() {
-        ts_document_set_language(document, get_test_language("javascript"));
+        ts_document_set_language(document, load_real_language("javascript"));
         set_text(
           "var x;\n"
           "\n"
@@ -192,7 +192,7 @@ describe("Parser", [&]() {
   describe("handling extra tokens", [&]() {
     describe("when the token appears as part of a grammar rule", [&]() {
       it("incorporates it into the tree", [&]() {
-        ts_document_set_language(document, get_test_language("javascript"));
+        ts_document_set_language(document, load_real_language("javascript"));
         set_text("fn()\n");
 
         assert_root_node(
@@ -202,7 +202,7 @@ describe("Parser", [&]() {
 
     describe("when the token appears somewhere else", [&]() {
       it("incorporates it into the tree", [&]() {
-        ts_document_set_language(document, get_test_language("javascript"));
+        ts_document_set_language(document, load_real_language("javascript"));
         set_text(
           "fn()\n"
           "  .otherFn();");
@@ -218,7 +218,7 @@ describe("Parser", [&]() {
 
     describe("when several extra tokens appear in a row", [&]() {
       it("incorporates them into the tree", [&]() {
-        ts_document_set_language(document, get_test_language("javascript"));
+        ts_document_set_language(document, load_real_language("javascript"));
         set_text(
           "fn()\n\n"
           "// This is a comment"
@@ -239,7 +239,7 @@ describe("Parser", [&]() {
   describe("editing", [&]() {
     describe("creating new tokens near the end of the input", [&]() {
       it("updates the parse tree and re-reads only the changed portion of the text", [&]() {
-        ts_document_set_language(document, get_test_language("javascript"));
+        ts_document_set_language(document, load_real_language("javascript"));
         set_text("x * (100 + abc);");
 
         assert_root_node(
@@ -262,7 +262,7 @@ describe("Parser", [&]() {
       it("updates the parse tree and re-reads only the changed portion of the input", [&]() {
         chunk_size = 2;
 
-        ts_document_set_language(document, get_test_language("javascript"));
+        ts_document_set_language(document, load_real_language("javascript"));
         set_text("123 + 456 * (10 + x);");
 
         assert_root_node(
@@ -285,7 +285,7 @@ describe("Parser", [&]() {
 
     describe("introducing an error", [&]() {
       it("gives the error the right size", [&]() {
-        ts_document_set_language(document, get_test_language("javascript"));
+        ts_document_set_language(document, load_real_language("javascript"));
         set_text("var x = y;");
 
         assert_root_node(
@@ -308,7 +308,7 @@ describe("Parser", [&]() {
 
     describe("into the middle of an existing token", [&]() {
       it("updates the parse tree", [&]() {
-        ts_document_set_language(document, get_test_language("javascript"));
+        ts_document_set_language(document, load_real_language("javascript"));
         set_text("abc * 123;");
 
         assert_root_node(
@@ -327,7 +327,7 @@ describe("Parser", [&]() {
 
     describe("at the end of an existing token", [&]() {
       it("updates the parse tree", [&]() {
-        ts_document_set_language(document, get_test_language("javascript"));
+        ts_document_set_language(document, load_real_language("javascript"));
         set_text("abc * 123;");
 
         assert_root_node(
@@ -346,7 +346,7 @@ describe("Parser", [&]() {
 
     describe("inserting text into a node containing a extra token", [&]() {
       it("updates the parse tree", [&]() {
-        ts_document_set_language(document, get_test_language("javascript"));
+        ts_document_set_language(document, load_real_language("javascript"));
         set_text("123 *\n"
           "// a-comment\n"
           "abc;");
@@ -373,7 +373,7 @@ describe("Parser", [&]() {
 
     describe("when a critical token is removed", [&]() {
       it("updates the parse tree, creating an error", [&]() {
-        ts_document_set_language(document, get_test_language("javascript"));
+        ts_document_set_language(document, load_real_language("javascript"));
         set_text("123 * 456; 789 * 123;");
 
         assert_root_node(
@@ -392,7 +392,7 @@ describe("Parser", [&]() {
 
     describe("with external tokens", [&]() {
       it("maintains the external scanner's state during incremental parsing", [&]() {
-        ts_document_set_language(document, get_test_language("python"));
+        ts_document_set_language(document, load_real_language("python"));
         string text = dedent(R"PYTHON(
           if a:
               print b
@@ -420,7 +420,7 @@ describe("Parser", [&]() {
     });
 
     it("does not try to re-use nodes that are within the edited region", [&]() {
-      ts_document_set_language(document, get_test_language("javascript"));
+      ts_document_set_language(document, load_real_language("javascript"));
       set_text("{ x: (b.c) };");
 
       assert_root_node(
@@ -435,7 +435,7 @@ describe("Parser", [&]() {
     });
 
     it("updates the document's parse count", [&]() {
-      ts_document_set_language(document, get_test_language("javascript"));
+      ts_document_set_language(document, load_real_language("javascript"));
       AssertThat(ts_document_parse_count(document), Equals<size_t>(0));
 
       set_text("{ x: (b.c) };");
@@ -449,7 +449,7 @@ describe("Parser", [&]() {
   describe("lexing", [&]() {
     describe("handling tokens containing wildcard patterns (e.g. comments)", [&]() {
       it("terminates them at the end of the document", [&]() {
-        ts_document_set_language(document, get_test_language("javascript"));
+        ts_document_set_language(document, load_real_language("javascript"));
         set_text("x; // this is a comment");
 
         assert_root_node(
@@ -464,7 +464,7 @@ describe("Parser", [&]() {
 
     it("recognizes UTF8 characters as single characters", [&]() {
       // 'ΩΩΩ — ΔΔ';
-      ts_document_set_language(document, get_test_language("javascript"));
+      ts_document_set_language(document, load_real_language("javascript"));
       set_text("'\u03A9\u03A9\u03A9 \u2014 \u0394\u0394';");
 
       assert_root_node(